580.95.05

2026-01-31 13:39:47 +00:00 · 2025-09-30 12:52:14 -07:00
parent 87c0b12473
commit 2b436058a6
147 changed files with 56986 additions and 55176 deletions
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc
 ccflags-y += -I$(src)
 ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
 ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
-ccflags-y += -DNV_VERSION_STRING=\"580.82.09\"
+ccflags-y += -DNV_VERSION_STRING=\"580.95.05\"

 # Include and link Tegra out-of-tree modules.
 ifneq ($(wildcard /usr/src/nvidia/nvidia-oot),)
--- a/kernel-open/common/inc/nv-gpu-info.h
+++ b/kernel-open/common/inc/nv-gpu-info.h
@@ -32,6 +32,8 @@ typedef struct {
        NvU8  bus, slot, function;
    } pci_info;

+    NvBool needs_numa_setup;
+
    /*
     * opaque OS-specific pointer; on Linux, this is a pointer to the
     * 'struct device' for the GPU.
--- a/kernel-open/common/inc/nv-linux.h
+++ b/kernel-open/common/inc/nv-linux.h
@@ -1711,12 +1711,12 @@ static inline void nv_mutex_destroy(struct mutex *lock)
    mutex_destroy(lock);
 }

-static inline NvBool nv_platform_supports_numa(nv_linux_state_t *nvl)
+static inline NvBool nv_platform_supports_numa(const nv_linux_state_t *nvl)
 {
    return nvl->numa_info.node_id != NUMA_NO_NODE;
 }

-static inline int nv_get_numa_status(nv_linux_state_t *nvl)
+static inline int nv_get_numa_status(const nv_linux_state_t *nvl)
 {
    if (!nv_platform_supports_numa(nvl))
    {
--- a/kernel-open/common/inc/nv.h
+++ b/kernel-open/common/inc/nv.h
@@ -588,8 +588,8 @@ typedef struct nv_state_t
    /* Console is managed by drm drivers or NVKMS */
    NvBool client_managed_console;

-    /* Bool to check if power management is supported */
-    NvBool is_pm_supported;
+    /* Bool to check if power management is unsupported */
+    NvBool is_pm_unsupported;
 } nv_state_t;

 #define NVFP_TYPE_NONE       0x0
--- a/kernel-open/common/inc/nv_uvm_types.h
+++ b/kernel-open/common/inc/nv_uvm_types.h
@@ -685,6 +685,9 @@ typedef struct UvmGpuInfo_tag

    // GPU supports Non-PASID ATS capability
    NvBool nonPasidAtsSupport;
+
+    // GPU setup in CDMM mode
+    NvBool cdmmEnabled;
 } UvmGpuInfo;

 typedef struct UvmGpuFbInfo_tag
--- a/kernel-open/common/inc/nvkms-api-types.h
+++ b/kernel-open/common/inc/nvkms-api-types.h
@@ -667,7 +667,9 @@ enum NvKmsInputTf {
 enum NvKmsOutputColorimetry {
    NVKMS_OUTPUT_COLORIMETRY_DEFAULT = 0,

-    NVKMS_OUTPUT_COLORIMETRY_BT2100 = 1,
+    NVKMS_OUTPUT_COLORIMETRY_BT601 = 1,
+    NVKMS_OUTPUT_COLORIMETRY_BT709 = 2,
+    NVKMS_OUTPUT_COLORIMETRY_BT2100 = 3,
 };

 enum NvKmsOutputTf {
--- a/kernel-open/conftest.sh
+++ b/kernel-open/conftest.sh
@@ -4856,6 +4856,22 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_HELPER_FUNCS_MODE_VALID_HAS_CONST_MODE_ARG" "" "types"
        ;;

+        memory_device_coherent_present)
+            #
+            # Determine if MEMORY_DEVICE_COHERENT support is present or not
+            #
+            # Added by commit f25cbb7a95a2 ("mm: add zone device coherent type
+            # memory support") in v6.0.
+            #
+            CODE="
+            #include <linux/mm.h>
+            int memory_device_coherent = MEMORY_DEVICE_COHERENT;
+            "
+
+            compile_check_conftest "$CODE" "NV_MEMORY_DEVICE_COHERENT_PRESENT" "" "types"
+        ;;
+
+
        # When adding a new conftest entry, please use the correct format for
        # specifying the relevant upstream Linux kernel commit.  Please
        # avoid specifying -rc kernels, and only use SHAs that actually exist
--- a/kernel-open/nvidia-drm/nvidia-drm-connector.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-connector.c
@@ -445,6 +445,14 @@ __nv_drm_connector_atomic_check(struct drm_connector *connector,
            req_config->modeSetConfig.colorimetry =
                NVKMS_OUTPUT_COLORIMETRY_DEFAULT;
            break;
+        case DRM_MODE_COLORIMETRY_BT601_YCC:
+            req_config->modeSetConfig.colorimetry =
+                NVKMS_OUTPUT_COLORIMETRY_BT601;
+            break;
+        case DRM_MODE_COLORIMETRY_BT709_YCC:
+            req_config->modeSetConfig.colorimetry =
+                NVKMS_OUTPUT_COLORIMETRY_BT709;
+            break;
        case DRM_MODE_COLORIMETRY_BT2020_RGB:
        case DRM_MODE_COLORIMETRY_BT2020_YCC:
            // Ignore RGB/YCC
--- a/kernel-open/nvidia-drm/nvidia-drm-drv.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-drv.c
@@ -691,7 +691,16 @@ static int nv_drm_dev_load(struct drm_device *dev)
    pDevice = nvKms->allocateDevice(&allocateDeviceParams);

    if (pDevice == NULL) {
-        NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to allocate NvKmsKapiDevice");
+        if (nv_dev->gpu_info.needs_numa_setup) {
+            /*
+             * RM init from a kernel-mode driver may fail on GPUs that require
+             * NUMA setup. Just notify about that specifically rather than
+             * producing a scary-looking error.
+             */
+            NV_DRM_DEV_LOG_INFO(nv_dev, "NUMA was not set up yet; ignoring this device");
+        } else {
+            NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to allocate NvKmsKapiDevice");
+        }
        return -ENODEV;
    }

@@ -787,6 +796,7 @@ static int nv_drm_dev_load(struct drm_device *dev)
        }
 #endif
        nvKms->freeDevice(nv_dev->pDevice);
+        NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to create DRM properties");
        return -ENODEV;
    }

@@ -1994,7 +2004,6 @@ void nv_drm_register_drm_device(const struct NvKmsKapiGpuInfo *gpu_info)

    /* Load DRM device before registering it */
    if (nv_drm_dev_load(dev) != 0) {
-        NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to load device");
        goto failed_drm_load;
    }

--- a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
@@ -114,6 +114,10 @@ MODULE_PARM_DESC(fail_alloc_core_channel, "Control testing for hardware core cha
 static int fail_alloc_core_channel_method = -1;
 module_param_named(fail_alloc_core_channel, fail_alloc_core_channel_method, int, 0400);

+MODULE_PARM_DESC(debug, "Enable debug logging");
+static int debug = 0;
+module_param_named(debug, debug, int, 0600);
+
 #if NVKMS_CONFIG_FILE_SUPPORTED
 /* This parameter is used to find the dpy override conf file */
 #define NVKMS_CONF_FILE_SPECIFIED (nvkms_conf != NULL)
@@ -190,6 +194,11 @@ NvBool nvkms_enable_overlay_layers(void)
    return enable_overlay_layers;
 }

+NvBool nvkms_debug_logging(void)
+{
+    return debug != 0;
+}
+
 NvBool nvkms_kernel_supports_syncpts(void)
 {
 /*
--- a/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h
@@ -119,6 +119,7 @@ NvBool nvkms_vblank_sem_control(void);
 NvBool nvkms_opportunistic_display_sync(void);
 enum NvKmsDebugForceColorSpace nvkms_debug_force_color_space(void);
 NvBool nvkms_enable_overlay_layers(void);
+NvBool nvkms_debug_logging(void);

 void   nvkms_call_rm    (void *ops);
 void*  nvkms_alloc      (size_t size,
--- a/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
@@ -67,6 +67,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
 NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
 NV_CONFTEST_TYPE_COMPILE_TESTS += sg_dma_page_iter
 NV_CONFTEST_TYPE_COMPILE_TESTS += struct_page_has_zone_device_data
+NV_CONFTEST_TYPE_COMPILE_TESTS += memory_device_coherent_present

 NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
 NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.c
@@ -143,7 +143,7 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
 {
    uvm_gpu_t *gpu = gpu_va_space->gpu;
    int residency;
-    bool cdmm_enabled = gpu->mem_info.cdmm_enabled;
+    bool cdmm_enabled = gpu->parent->cdmm_enabled;

    if (gpu->parent->is_integrated_gpu || cdmm_enabled) {
        residency = gpu->parent->closest_cpu_numa_node;
--- a/kernel-open/nvidia-uvm/uvm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu.c
@@ -150,13 +150,8 @@ static NV_STATUS get_gpu_caps(uvm_gpu_t *gpu)

        gpu->mem_info.numa.enabled = true;
        gpu->mem_info.numa.node_id = gpu_caps.numaNodeId;
-        gpu->mem_info.cdmm_enabled = false;
    }
    else {
-        // TODO: Bug 5273146: Use RM control call to detect CDMM mode.
-        if (uvm_parent_gpu_is_coherent(gpu->parent))
-            gpu->mem_info.cdmm_enabled = true;
-
        gpu->mem_info.numa.node_id = NUMA_NO_NODE;
    }

@@ -248,16 +243,15 @@ static NV_STATUS alloc_and_init_address_space(uvm_gpu_t *gpu)
    return NV_OK;
 }

-int uvm_device_p2p_static_bar(uvm_gpu_t *gpu)
+int uvm_device_p2p_static_bar(uvm_parent_gpu_t *parent_gpu)
 {
-    return nv_bar_index_to_os_bar_index(gpu->parent->pci_dev, NV_GPU_BAR_INDEX_FB);
+    return nv_bar_index_to_os_bar_index(parent_gpu->pci_dev, NV_GPU_BAR_INDEX_FB);
 }

 static NV_STATUS get_gpu_fb_info(uvm_gpu_t *gpu)
 {
    NV_STATUS status;
    UvmGpuFbInfo fb_info = {0};
-    unsigned long pci_bar1_addr = pci_resource_start(gpu->parent->pci_dev, uvm_device_p2p_static_bar(gpu));

    status = uvm_rm_locked_call(nvUvmInterfaceGetFbInfo(uvm_gpu_device_handle(gpu), &fb_info));
    if (status != NV_OK)
@@ -270,9 +264,6 @@ static NV_STATUS get_gpu_fb_info(uvm_gpu_t *gpu)
    }

    gpu->mem_info.max_vidmem_page_size = fb_info.maxVidmemPageSize;
-    gpu->mem_info.static_bar1_start = pci_bar1_addr + fb_info.staticBar1StartOffset;
-    gpu->mem_info.static_bar1_size = fb_info.staticBar1Size;
-    gpu->mem_info.static_bar1_write_combined = fb_info.bStaticBar1WriteCombined;

    return NV_OK;
 }
@@ -1443,8 +1434,16 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
    if (status != NV_OK)
        return status;

-    if (!fb_info.bZeroFb)
+    if (!fb_info.bZeroFb) {
+        unsigned long pci_bar1_addr = pci_resource_start(parent_gpu->pci_dev, uvm_device_p2p_static_bar(parent_gpu));
+
        parent_gpu->max_allocatable_address = fb_info.maxAllocatableAddress;
+        parent_gpu->static_bar1_start = pci_bar1_addr + fb_info.staticBar1StartOffset;
+        parent_gpu->static_bar1_size = fb_info.staticBar1Size;
+        parent_gpu->static_bar1_write_combined = fb_info.bStaticBar1WriteCombined;
+    }
+
+    parent_gpu->cdmm_enabled = gpu_info->cdmmEnabled;

    parent_gpu->virt_mode = gpu_info->virtMode;
    if (parent_gpu->virt_mode == UVM_VIRT_MODE_LEGACY) {
@@ -1493,6 +1492,8 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
        return status;
    }

+    uvm_pmm_gpu_device_p2p_init(parent_gpu);
+
    status = uvm_ats_add_gpu(parent_gpu);
    if (status != NV_OK) {
        UVM_ERR_PRINT("uvm_ats_add_gpu failed: %s, GPU %s\n",
@@ -1597,7 +1598,7 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
        return status;
    }

-    uvm_pmm_gpu_device_p2p_init(gpu);
+    uvm_mutex_init(&gpu->device_p2p_lock, UVM_LOCK_ORDER_GLOBAL);

    status = init_semaphore_pools(gpu);
    if (status != NV_OK) {
@@ -1731,6 +1732,8 @@ static void deinit_parent_gpu(uvm_parent_gpu_t *parent_gpu)
    // Return ownership to RM
    uvm_parent_gpu_deinit_isr(parent_gpu);

+    uvm_pmm_gpu_device_p2p_deinit(parent_gpu);
+
    uvm_pmm_devmem_deinit(parent_gpu);
    uvm_ats_remove_gpu(parent_gpu);

@@ -1786,8 +1789,6 @@ static void deinit_gpu(uvm_gpu_t *gpu)

    deinit_semaphore_pools(gpu);

-    uvm_pmm_gpu_device_p2p_deinit(gpu);
-
    uvm_pmm_gpu_deinit(&gpu->pmm);

    if (gpu->rm_address_space != 0)
--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@@ -711,21 +711,6 @@ struct uvm_gpu_struct
            int node_id;
        } numa;

-        // Coherent Driver-based Memory Management (CDMM) is a mode that allows
-        // coherent GPU memory to be managed by the driver and not the OS. This
-        // is done by the driver not onlining the memory as NUMA nodes. Having
-        // the field provides the most flexibility and is sync with the numa
-        // properties above. CDMM as a property applies to the entire system.
-        bool cdmm_enabled;
-
-        // Physical address of the start of statically mapped fb memory in BAR1
-        NvU64 static_bar1_start;
-
-        // Size of statically mapped fb memory in BAR1.
-        NvU64 static_bar1_size;
-
-        // Whether or not RM has iomapped the region write combined.
-        NvBool static_bar1_write_combined;
    } mem_info;

    struct
@@ -941,9 +926,6 @@ struct uvm_gpu_struct
    // Force pushbuffer's GPU VA to be >= 1TB; used only for testing purposes.
    bool uvm_test_force_upper_pushbuffer_segment;

-    // Have we initialised device p2p pages.
-    bool device_p2p_initialised;
-
    // Used to protect allocation of p2p_mem and assignment of the page
    // zone_device_data fields.
    uvm_mutex_t device_p2p_lock;
@@ -1014,10 +996,28 @@ struct uvm_parent_gpu_struct
    // Total amount of physical memory available on the parent GPU.
    NvU64 max_allocatable_address;

-#if UVM_IS_CONFIG_HMM()
+#if UVM_IS_CONFIG_HMM() || defined(NV_MEMORY_DEVICE_COHERENT_PRESENT)
    uvm_pmm_gpu_devmem_t *devmem;
 #endif

+    // Physical address of the start of statically mapped fb memory in BAR1
+    NvU64 static_bar1_start;
+
+    // Size of statically mapped fb memory in BAR1.
+    NvU64 static_bar1_size;
+
+    // Whether or not RM has iomapped the region write combined.
+    NvBool static_bar1_write_combined;
+
+    // Have we initialised device p2p pages.
+    bool device_p2p_initialised;
+
+    // Coherent Driver-based Memory Management (CDMM) is a mode that allows
+    // coherent GPU memory to be managed by the driver and not the OS. This
+    // is done by the driver not onlining the memory as NUMA nodes. CDMM as a
+    // property applies to the entire system.
+    bool cdmm_enabled;
+
    // The physical address range addressable by the GPU
    //
    // The GPU has its NV_PFB_XV_UPPER_ADDR register set by RM to
@@ -1867,6 +1867,6 @@ typedef enum
 } uvm_gpu_buffer_flush_mode_t;

 // PCIe BAR containing static framebuffer memory mappings for PCIe P2P
-int uvm_device_p2p_static_bar(uvm_gpu_t *gpu);
+int uvm_device_p2p_static_bar(uvm_parent_gpu_t *gpu);

 #endif // __UVM_GPU_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
@@ -1576,7 +1576,7 @@ static NV_STATUS service_notifications_batch(uvm_gpu_va_space_t *gpu_va_space,
        }
    }
    else if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
-        if (!gpu_va_space->gpu->mem_info.cdmm_enabled) {
+        if (!gpu_va_space->gpu->parent->cdmm_enabled) {
            status = service_notification_ats(gpu_va_space, mm, access_counters, index, out_index);
        }
        else {
--- a/kernel-open/nvidia-uvm/uvm_linux.h
+++ b/kernel-open/nvidia-uvm/uvm_linux.h
@@ -85,6 +85,12 @@

    #define UVM_THREAD_AFFINITY_SUPPORTED() 1

+    #if defined(CONFIG_ZONE_DEVICE) && defined(NV_MEMORY_DEVICE_COHERENT_PRESENT)
+        #define UVM_CDMM_PAGES_SUPPORTED() 1
+    #else
+        #define UVM_CDMM_PAGES_SUPPORTED() 0
+    #endif
+
    #if defined(CONFIG_HMM_MIRROR) && defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MIGRATE_DEVICE_RANGE_PRESENT)
        #define UVM_IS_CONFIG_HMM() 1
    #else
--- a/kernel-open/nvidia-uvm/uvm_migrate.c
+++ b/kernel-open/nvidia-uvm/uvm_migrate.c
@@ -1020,7 +1020,7 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
                .fail_on_unresolved_sto_errors      = false,
            };

-            if (dest_gpu && dest_gpu->mem_info.cdmm_enabled) {
+            if (dest_gpu && dest_gpu->parent->cdmm_enabled) {
                uvm_migrate_args.dst_id = UVM_ID_CPU;
                uvm_migrate_args.dst_node_id = dest_gpu->parent->closest_cpu_numa_node;
                uvm_migrate_args.populate_on_cpu_alloc_failures = true;
--- a/kernel-open/nvidia-uvm/uvm_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_mmu.c
@@ -295,7 +295,7 @@ static void *uvm_mmu_page_table_cpu_map(uvm_gpu_t *gpu, uvm_mmu_page_table_alloc
 {
    // CDMM implies there are no struct pages corresponding to the
    // GPU memory physical address.
-    if (gpu->mem_info.cdmm_enabled) {
+    if (gpu->parent->cdmm_enabled) {
        NvU64 addr = uvm_gpu_chunk_to_sys_addr(&gpu->pmm, phys_alloc->handle.chunk);
        // Using cached access for coherent systems, there should be no conflicts
        // for the vidmem region
@@ -312,7 +312,7 @@ static void *uvm_mmu_page_table_cpu_map(uvm_gpu_t *gpu, uvm_mmu_page_table_alloc

 static void uvm_mmu_page_table_cpu_unmap(uvm_gpu_t *gpu, uvm_mmu_page_table_alloc_t *phys_alloc, void *ptr)
 {
-    if (gpu->mem_info.cdmm_enabled)
+    if (gpu->parent->cdmm_enabled)
        nv_iounmap(ptr, PAGE_SIZE);
    else
        kunmap(uvm_mmu_page_table_page(gpu, phys_alloc));
--- a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c
@@ -3246,11 +3246,117 @@ err:
    return NULL;
 }

+unsigned long uvm_pmm_gpu_devmem_get_pfn(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
+{
+    uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
+    unsigned long devmem_start = gpu->parent->devmem->pagemap.range.start;
+
+    return (devmem_start + chunk->address) >> PAGE_SHIFT;
+}
+#else // UVM_IS_CONFIG_HMM()
+static void *devmem_alloc_pagemap(unsigned long size) { return NULL; }
+static void *devmem_reuse_pagemap(unsigned long size) { return NULL; }
+#endif // UVM_IS_CONFIG_HMM()
+
+#if (UVM_CDMM_PAGES_SUPPORTED() || defined(CONFIG_PCI_P2PDMA)) && defined(NV_STRUCT_PAGE_HAS_ZONE_DEVICE_DATA)
+static void device_p2p_page_free_wake(struct nv_kref *ref)
+{
+    uvm_device_p2p_mem_t *p2p_mem = container_of(ref, uvm_device_p2p_mem_t, refcount);
+    wake_up(&p2p_mem->waitq);
+}
+
+static void device_p2p_page_free(struct page *page)
+{
+    uvm_device_p2p_mem_t *p2p_mem = page->zone_device_data;
+
+    page->zone_device_data = NULL;
+    nv_kref_put(&p2p_mem->refcount, device_p2p_page_free_wake);
+}
+#endif
+
+#if UVM_CDMM_PAGES_SUPPORTED()
+static void device_coherent_page_free(struct page *page)
+{
+    device_p2p_page_free(page);
+}
+
+static const struct dev_pagemap_ops uvm_device_coherent_pgmap_ops =
+{
+    .page_free = device_coherent_page_free,
+};
+
+static NV_STATUS uvm_pmm_cdmm_init(uvm_parent_gpu_t *parent_gpu)
+{
+    uvm_pmm_gpu_devmem_t *devmem;
+    void *ptr;
+    NV_STATUS status;
+
+    UVM_ASSERT(!uvm_hmm_is_enabled_system_wide());
+
+    list_for_each_entry(devmem, &g_uvm_global.devmem_ranges.list, list_node) {
+        if (devmem->pagemap.range.start == parent_gpu->system_bus.memory_window_start) {
+            UVM_ASSERT(devmem->pagemap.type == MEMORY_DEVICE_COHERENT);
+            UVM_ASSERT(devmem->pagemap.range.end ==
+                       SUBSECTION_ALIGN_UP(parent_gpu->system_bus.memory_window_end >> PAGE_SHIFT) << PAGE_SHIFT);
+            list_del(&devmem->list_node);
+            parent_gpu->devmem = devmem;
+            parent_gpu->device_p2p_initialised = true;
+            return NV_OK;
+        }
+    }
+
+    devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
+    if (!devmem)
+        goto err;
+
+    devmem->size = parent_gpu->system_bus.memory_window_end - parent_gpu->system_bus.memory_window_start;
+    devmem->pagemap.type = MEMORY_DEVICE_COHERENT;
+    devmem->pagemap.range.start = parent_gpu->system_bus.memory_window_start;
+    devmem->pagemap.range.end = SUBSECTION_ALIGN_UP(parent_gpu->system_bus.memory_window_end >> PAGE_SHIFT) << PAGE_SHIFT;
+    devmem->pagemap.nr_range = 1;
+    devmem->pagemap.ops = &uvm_device_coherent_pgmap_ops;
+    devmem->pagemap.owner = &g_uvm_global;
+
+    // Numa node ID doesn't matter for ZONE_DEVICE coherent pages.
+    ptr = memremap_pages(&devmem->pagemap, NUMA_NO_NODE);
+    if (IS_ERR(ptr)) {
+        UVM_ERR_PRINT("memremap_pages() err %ld\n", PTR_ERR(ptr));
+        status = errno_to_nv_status(PTR_ERR(ptr));
+        goto err_free;
+    }
+
+    parent_gpu->devmem = devmem;
+    parent_gpu->device_p2p_initialised = true;
+
+    return NV_OK;
+
+err_free:
+    kfree(devmem);
+
+err:
+    return NV_ERR_NOT_SUPPORTED;
+}
+
+static void uvm_pmm_cdmm_deinit(uvm_parent_gpu_t *parent_gpu)
+{
+    parent_gpu->device_p2p_initialised = false;
+    list_add_tail(&parent_gpu->devmem->list_node, &g_uvm_global.devmem_ranges.list);
+    parent_gpu->devmem = NULL;
+}
+#else // UVM_CDMM_PAGES_SUPPORTED
+static NV_STATUS uvm_pmm_cdmm_init(uvm_parent_gpu_t *parent_gpu) { return NV_OK; }
+static void uvm_pmm_cdmm_deinit(uvm_parent_gpu_t *parent_gpu) {}
+#endif // UVM_CDMM_PAGES_SUPPORTED
+
+#if UVM_IS_CONFIG_HMM() || UVM_CDMM_PAGES_SUPPORTED()
 NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu)
 {
    // Create a DEVICE_PRIVATE page for every GPU page available on the parent.
    unsigned long size = gpu->max_allocatable_address;

+    if (gpu->cdmm_enabled)
+        return uvm_pmm_cdmm_init(gpu);
+
    if (!uvm_hmm_is_enabled_system_wide()) {
        gpu->devmem = NULL;
        return NV_OK;
@@ -3268,6 +3374,11 @@ NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu)

 void uvm_pmm_devmem_deinit(uvm_parent_gpu_t *gpu)
 {
+    if (gpu->cdmm_enabled && gpu->devmem) {
+        uvm_pmm_cdmm_deinit(gpu);
+        return;
+    }
+
    if (!gpu->devmem)
        return;

@@ -3282,31 +3393,18 @@ void uvm_pmm_devmem_exit(void)
    list_for_each_entry_safe(devmem, devmem_next, &g_uvm_global.devmem_ranges.list, list_node) {
        list_del(&devmem->list_node);
        memunmap_pages(&devmem->pagemap);
-        release_mem_region(devmem->pagemap.range.start, range_len(&devmem->pagemap.range));
+        if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
+            release_mem_region(devmem->pagemap.range.start, range_len(&devmem->pagemap.range));
        kfree(devmem);
    }
 }
-
-unsigned long uvm_pmm_gpu_devmem_get_pfn(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
-{
-    uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
-    unsigned long devmem_start = gpu->parent->devmem->pagemap.range.start;
-
-    return (devmem_start + chunk->address) >> PAGE_SHIFT;
-}
-
-#endif // UVM_IS_CONFIG_HMM()
+#else
+NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu) { return NV_OK; }
+void uvm_pmm_devmem_deinit(uvm_parent_gpu_t *gpu) {}
+void uvm_pmm_devmem_exit(void) {}
+#endif

 #if !UVM_IS_CONFIG_HMM()
-NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu)
-{
-    return NV_OK;
-}
-
-void uvm_pmm_devmem_deinit(uvm_parent_gpu_t *gpu)
-{
-}
-
 static bool uvm_pmm_gpu_check_orphan_pages(uvm_pmm_gpu_t *pmm)
 {
    return true;
@@ -3318,41 +3416,32 @@ static bool uvm_pmm_gpu_check_orphan_pages(uvm_pmm_gpu_t *pmm)
 // TODO: Bug 5303506: ARM64: P2PDMA pages cannot be accessed from the CPU on
 // ARM
 #if defined(CONFIG_PCI_P2PDMA) && defined(NV_STRUCT_PAGE_HAS_ZONE_DEVICE_DATA) && !defined(NVCPU_AARCH64)
-static void device_p2p_page_free_wake(struct nv_kref *ref)
-{
-    uvm_device_p2p_mem_t *p2p_mem = container_of(ref, uvm_device_p2p_mem_t, refcount);
-    wake_up(&p2p_mem->waitq);
-}
-
-static void device_p2p_page_free(struct page *page)
-{
-    uvm_device_p2p_mem_t *p2p_mem = page->zone_device_data;
-
-    page->zone_device_data = NULL;
-    nv_kref_put(&p2p_mem->refcount, device_p2p_page_free_wake);
-}

 static const struct dev_pagemap_ops uvm_device_p2p_pgmap_ops =
 {
    .page_free = device_p2p_page_free,
 };

-void uvm_pmm_gpu_device_p2p_init(uvm_gpu_t *gpu)
+void uvm_pmm_gpu_device_p2p_init(uvm_parent_gpu_t *parent_gpu)
 {
-    unsigned long pci_start_pfn = pci_resource_start(gpu->parent->pci_dev,
-                                                     uvm_device_p2p_static_bar(gpu)) >> PAGE_SHIFT;
-    unsigned long pci_end_pfn = pci_start_pfn + (gpu->mem_info.static_bar1_size >> PAGE_SHIFT);
+    unsigned long pci_start_pfn = pci_resource_start(parent_gpu->pci_dev,
+                                                     uvm_device_p2p_static_bar(parent_gpu)) >> PAGE_SHIFT;
+    unsigned long pci_end_pfn = pci_start_pfn + (parent_gpu->static_bar1_size >> PAGE_SHIFT);
    struct page *p2p_page;

-    gpu->device_p2p_initialised = false;
-    uvm_mutex_init(&gpu->device_p2p_lock, UVM_LOCK_ORDER_GLOBAL);
+    if (uvm_parent_gpu_is_coherent(parent_gpu)) {
+        // P2PDMA support with CDMM enabled requires special
+        // MEMORY_DEVICE_COHERENT pages to have been allocated which will have
+        // also set the p2p initialised state if successful.
+        if (parent_gpu->cdmm_enabled)
+            return;

-    if (uvm_parent_gpu_is_coherent(gpu->parent)) {
-        // A coherent system uses normal struct pages.
-        gpu->device_p2p_initialised = true;
+        parent_gpu->device_p2p_initialised = true;
        return;
    }

+    parent_gpu->device_p2p_initialised = false;
+
    // RM sets static_bar1_size when it has created a contiguous BAR mapping
    // large enough to cover all of GPU memory that will be allocated to
    // userspace buffers. This is required to support the P2PDMA feature to
@@ -3364,10 +3453,10 @@ void uvm_pmm_gpu_device_p2p_init(uvm_gpu_t *gpu)
    // can be disabled by setting the RmForceDisableIomapWC regkey which allows
    // creation of the P2PDMA pages.
    // TODO: Bug 5044562: P2PDMA pages require the PCIe BAR to be mapped UC
-    if (!gpu->mem_info.static_bar1_size || gpu->mem_info.static_bar1_write_combined)
+    if (!parent_gpu->static_bar1_size || parent_gpu->static_bar1_write_combined)
        return;

-    if (pci_p2pdma_add_resource(gpu->parent->pci_dev, uvm_device_p2p_static_bar(gpu), 0, 0)) {
+    if (pci_p2pdma_add_resource(parent_gpu->pci_dev, uvm_device_p2p_static_bar(parent_gpu), 0, 0)) {
        UVM_ERR_PRINT("Unable to initialse PCI P2PDMA pages\n");
        return;
    }
@@ -3383,46 +3472,40 @@ void uvm_pmm_gpu_device_p2p_init(uvm_gpu_t *gpu)
    for (; page_to_pfn(p2p_page) < pci_end_pfn; p2p_page++)
        p2p_page->zone_device_data = NULL;

-    gpu->device_p2p_initialised = true;
+    parent_gpu->device_p2p_initialised = true;
 }

-void uvm_pmm_gpu_device_p2p_deinit(uvm_gpu_t *gpu)
+void uvm_pmm_gpu_device_p2p_deinit(uvm_parent_gpu_t *parent_gpu)
 {
-    unsigned long pci_start_pfn = pci_resource_start(gpu->parent->pci_dev,
-                                                     uvm_device_p2p_static_bar(gpu)) >> PAGE_SHIFT;
+    unsigned long pci_start_pfn = pci_resource_start(parent_gpu->pci_dev,
+                                                     uvm_device_p2p_static_bar(parent_gpu)) >> PAGE_SHIFT;
    struct page *p2p_page;

-    if (gpu->device_p2p_initialised && !uvm_parent_gpu_is_coherent(gpu->parent)) {
+    if (parent_gpu->device_p2p_initialised && !uvm_parent_gpu_is_coherent(parent_gpu)) {
        p2p_page = pfn_to_page(pci_start_pfn);
-        devm_memunmap_pages(&gpu->parent->pci_dev->dev, page_pgmap(p2p_page));
+        devm_memunmap_pages(&parent_gpu->pci_dev->dev, page_pgmap(p2p_page));
    }

-    gpu->device_p2p_initialised = false;
+    parent_gpu->device_p2p_initialised = false;
 }
 #else // CONFIG_PCI_P2PDMA

 // Coherent platforms can do P2PDMA without CONFIG_PCI_P2PDMA
-void uvm_pmm_gpu_device_p2p_init(uvm_gpu_t *gpu)
+void uvm_pmm_gpu_device_p2p_init(uvm_parent_gpu_t *parent_gpu)
 {
-    gpu->device_p2p_initialised = false;
-    uvm_mutex_init(&gpu->device_p2p_lock, UVM_LOCK_ORDER_GLOBAL);
-
-    if (uvm_parent_gpu_is_coherent(gpu->parent)) {
-        // CDMM implies that there are no struct pages corresponding to
-        // the GPU memory. P2PDMA struct pages which are required for
-        // device P2P mappings are not currently supported on ARM.
-        if (gpu->mem_info.cdmm_enabled)
+    if (uvm_parent_gpu_is_coherent(parent_gpu)) {
+        if (parent_gpu->cdmm_enabled)
            return;

        // A coherent system uses normal struct pages.
-        gpu->device_p2p_initialised = true;
+        parent_gpu->device_p2p_initialised = true;
        return;
    }
 }

-void uvm_pmm_gpu_device_p2p_deinit(uvm_gpu_t *gpu)
+void uvm_pmm_gpu_device_p2p_deinit(uvm_parent_gpu_t *parent_gpu)
 {
-    gpu->device_p2p_initialised = false;
+    parent_gpu->device_p2p_initialised = false;
 }
 #endif // CONFIG_PCI_P2PDMA

--- a/kernel-open/nvidia-uvm/uvm_pmm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_pmm_gpu.h
@@ -190,8 +190,7 @@ typedef uvm_chunk_size_t uvm_chunk_sizes_mask_t;

 typedef struct uvm_pmm_gpu_chunk_suballoc_struct uvm_pmm_gpu_chunk_suballoc_t;

-#if UVM_IS_CONFIG_HMM()
-
+#if UVM_IS_CONFIG_HMM() || defined(NV_MEMORY_DEVICE_COHERENT_PRESENT)
 typedef struct
 {
    // For g_uvm_global.devmem_ranges
@@ -205,7 +204,9 @@ typedef struct

    struct dev_pagemap pagemap;
 } uvm_pmm_gpu_devmem_t;
+#endif

+#if UVM_IS_CONFIG_HMM()
 typedef struct uvm_pmm_gpu_struct uvm_pmm_gpu_t;

 // Return the GPU chunk for a given device private struct page.
@@ -219,19 +220,18 @@ uvm_gpu_id_t uvm_pmm_devmem_page_to_gpu_id(struct page *page);

 // Return the PFN of the device private struct page for the given GPU chunk.
 unsigned long uvm_pmm_gpu_devmem_get_pfn(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
+#endif
+
+// Allocate and initialise struct page data in the kernel to support HMM.
+NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu);
+void uvm_pmm_devmem_deinit(uvm_parent_gpu_t *parent_gpu);
+
+void uvm_pmm_gpu_device_p2p_init(uvm_parent_gpu_t *gpu);
+void uvm_pmm_gpu_device_p2p_deinit(uvm_parent_gpu_t *gpu);

 // Free unused ZONE_DEVICE pages.
 void uvm_pmm_devmem_exit(void);

-#else
-static inline void uvm_pmm_devmem_exit(void)
-{
-}
-#endif
-
-void uvm_pmm_gpu_device_p2p_init(uvm_gpu_t *gpu);
-void uvm_pmm_gpu_device_p2p_deinit(uvm_gpu_t *gpu);
-
 struct uvm_gpu_chunk_struct
 {
    // Physical address of GPU chunk. This may be removed to save memory
@@ -627,10 +627,6 @@ static uvm_chunk_size_t uvm_chunk_find_prev_size(uvm_chunk_sizes_mask_t chunk_si
 // retained, and it's up to the caller to release them.
 NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region_size, uvm_reverse_map_t *out_mappings);

-// Allocate and initialise struct page data in the kernel to support HMM.
-NV_STATUS uvm_pmm_devmem_init(uvm_parent_gpu_t *gpu);
-void uvm_pmm_devmem_deinit(uvm_parent_gpu_t *parent_gpu);
-
 // Iterates over every size in the input mask from smallest to largest
 #define for_each_chunk_size(__size, __chunk_sizes)                                  \
    for ((__size) = (__chunk_sizes) ? uvm_chunk_find_first_size(__chunk_sizes) :    \
--- a/kernel-open/nvidia-uvm/uvm_va_range_device_p2p.c
+++ b/kernel-open/nvidia-uvm/uvm_va_range_device_p2p.c
@@ -47,6 +47,11 @@ void uvm_va_range_device_p2p_exit(void)
    kmem_cache_destroy_safe(&g_uvm_va_range_device_p2p_cache);
 }

+static bool device_p2p_uses_zone_device(uvm_parent_gpu_t *gpu)
+{
+    return !uvm_parent_gpu_is_coherent(gpu) || gpu->cdmm_enabled;
+}
+
 static NvU64 p2p_mem_page_count(uvm_device_p2p_mem_t *p2p_mem)
 {
    return (p2p_mem->pfn_count * p2p_mem->page_size) >> PAGE_SHIFT;
@@ -74,9 +79,11 @@ void uvm_va_range_free_device_p2p_mem(uvm_device_p2p_mem_t *p2p_mem)
    NvU64 i;
    uvm_gpu_t *gpu = p2p_mem->gpu;

-    // In the coherent case we don't hold references on the page because RM does
-    // via the duplicated handle.
-    if (!uvm_parent_gpu_is_coherent(gpu->parent)) {
+    // If normal, non-zone-device pages are used for providing device p2p
+    // functionality RM will already hold a reference on the page via the
+    // duplicated handle. Therefore UVM skipped taking a reference on the page
+    // so don't need to return one.
+    if (device_p2p_uses_zone_device(gpu->parent)) {
        uvm_mutex_lock(&gpu->device_p2p_lock);

        // It's possible that another range has been setup for the handle since
@@ -128,13 +135,13 @@ static void deinit_device_p2p_mem(uvm_device_p2p_mem_t *p2p_mem, struct list_hea
    // scheduling work which may not happen holding the va_space lock. Coherent
    // systems don't need to take the lock because the p2p_mem objects are not
    // shared between multiple va_ranges.
-    if (!uvm_parent_gpu_is_coherent(p2p_mem->gpu->parent))
+    if (device_p2p_uses_zone_device(p2p_mem->gpu->parent))
        uvm_mutex_lock_nested(&p2p_mem->gpu->device_p2p_lock);

    p2p_mem->deferred_free_list = deferred_free_list;
    nv_kref_put(&p2p_mem->va_range_count, put_device_p2p_mem);

-    if (!uvm_parent_gpu_is_coherent(p2p_mem->gpu->parent))
+    if (device_p2p_uses_zone_device(p2p_mem->gpu->parent))
        uvm_mutex_unlock_nested(&p2p_mem->gpu->device_p2p_lock);
 }

@@ -225,6 +232,8 @@ static NV_STATUS get_gpu_pfns(uvm_gpu_t *gpu,
    // start address or system memory start address and right shifting by
    // PAGE_SHIFT.
    for (i = 0; i < ext_mapping_info.numWrittenPhysAddrs; i++)
+        // MEMORY_DEVICE_COHERENT pages are in the system memory window so are
+        // the same as normal struct pages for the purposes of calculating pfn.
        if (uvm_parent_gpu_is_coherent(gpu->parent)) {
            NvU64 last_pfn = gpu->parent->system_bus.memory_window_end >> PAGE_SHIFT;

@@ -234,9 +243,9 @@ static NV_STATUS get_gpu_pfns(uvm_gpu_t *gpu,
                return NV_ERR_INVALID_ADDRESS;
        }
        else {
-            NvU64 last_pfn = ((gpu->mem_info.static_bar1_start + gpu->mem_info.static_bar1_size) >> PAGE_SHIFT) - 1;
+            NvU64 last_pfn = ((gpu->parent->static_bar1_start + gpu->parent->static_bar1_size) >> PAGE_SHIFT) - 1;

-            pfns[i] = (gpu->mem_info.static_bar1_start + pfns[i]) >> PAGE_SHIFT;
+            pfns[i] = (gpu->parent->static_bar1_start + pfns[i]) >> PAGE_SHIFT;
            UVM_ASSERT(pfns[i] <= last_pfn);
            if (pfns[i] > last_pfn)
                return NV_ERR_INVALID_ADDRESS;
@@ -253,27 +262,26 @@ static bool pci_p2pdma_page_free(struct page *page) {
 // page->zone_device_data does not exist in kernels versions older than v5.3
 // which don't support CONFIG_PCI_P2PDMA. Therefore we need these accessor
 // functions to ensure compilation succeeeds on older kernels.
-static void pci_p2pdma_page_set_zone_device_data(struct page *page, void *zone_device_data)
+static void page_set_zone_device_data(struct page *page, void *zone_device_data)
 {
    page->zone_device_data = zone_device_data;
 }

-static void *pci_p2pdma_page_get_zone_device_data(struct page *page)
+static void *page_get_zone_device_data(struct page *page)
 {
    return page->zone_device_data;
 }
 #else
 static bool pci_p2pdma_page_free(struct page *page) {
-    UVM_ASSERT(0);
    return false;
 }

-static void pci_p2pdma_page_set_zone_device_data(struct page *page, void *zone_device_data)
+static void page_set_zone_device_data(struct page *page, void *zone_device_data)
 {
    UVM_ASSERT(0);
 }

-static void *pci_p2pdma_page_get_zone_device_data(struct page *page)
+static void *page_get_zone_device_data(struct page *page)
 {
    UVM_ASSERT(0);
    return NULL;
@@ -335,7 +343,7 @@ static NV_STATUS alloc_device_p2p_mem(uvm_gpu_t *gpu,
    for (i = 0; i < p2p_mem_page_count(p2p_mem); i++) {
        struct page *page = p2p_mem_get_page(p2p_mem, i);

-        if (!pci_p2pdma_page_free(page)) {
+        if (!gpu->parent->cdmm_enabled && !pci_p2pdma_page_free(page)) {
            UVM_ASSERT(0);

            // This will leak the RM handle because we don't release it.
@@ -345,7 +353,23 @@ static NV_STATUS alloc_device_p2p_mem(uvm_gpu_t *gpu,
            return NV_ERR_INVALID_ARGUMENT;
        }

-        pci_p2pdma_page_set_zone_device_data(page, p2p_mem);
+        page_set_zone_device_data(page, p2p_mem);
+
+#if UVM_CDMM_PAGES_SUPPORTED()
+        // RM doesn't use DEVICE_COHERENT pages and therefore won't already hold
+        // a reference to them, so take one now if using DEVICE_COHERENT pages.
+        if (gpu->parent->cdmm_enabled) {
+            get_page(page);
+            get_dev_pagemap(page_to_pfn(page), NULL);
+        }
+#else
+        // CDMM P2PDMA will never be enabled for this case
+        if (gpu->parent->cdmm_enabled) {
+            UVM_ASSERT(0);
+            break;
+        }
+#endif
+
        nv_kref_get(&p2p_mem->refcount);
    }

@@ -400,7 +424,7 @@ static NV_STATUS alloc_pci_device_p2p(uvm_gpu_t *gpu,
    // also ensures if we don't find a p2p_mem object that we don't race with
    // some other thread assigning or clearing zone_device_data.
    uvm_mutex_lock(&gpu->device_p2p_lock);
-    p2p_mem = pci_p2pdma_page_get_zone_device_data(pfn_to_page(pfn));
+    p2p_mem = page_get_zone_device_data(pfn_to_page(pfn));
    if (!p2p_mem) {
        // We have not previously allocated p2pdma pages for this RM handle so do
        // so now.
@@ -513,12 +537,12 @@ NV_STATUS uvm_api_alloc_device_p2p(UVM_ALLOC_DEVICE_P2P_PARAMS *params, struct f
    if (!gpu)
        return NV_ERR_INVALID_DEVICE;

-    if (!gpu->device_p2p_initialised) {
+    if (!gpu->parent->device_p2p_initialised) {
        status = NV_ERR_NOT_SUPPORTED;
        goto out_release;
    }

-    if (uvm_parent_gpu_is_coherent(gpu->parent)) {
+    if (uvm_parent_gpu_is_coherent(gpu->parent) && !device_p2p_uses_zone_device(gpu->parent)) {
        status = alloc_coherent_device_p2p(gpu, params->hClient, params->hMemory, &p2p_mem);
        if (status != NV_OK)
            goto out_release;
--- a/kernel-open/nvidia-uvm/uvm_va_space.c
+++ b/kernel-open/nvidia-uvm/uvm_va_space.c
@@ -940,7 +940,7 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
        *numa_enabled = NV_TRUE;
        *numa_node_id = (NvS32)uvm_gpu_numa_node(gpu);
    }
-    else if (gpu->parent->is_integrated_gpu || gpu->mem_info.cdmm_enabled) {
+    else if (gpu->parent->is_integrated_gpu || gpu->parent->cdmm_enabled) {
        *numa_enabled = NV_FALSE;
        *numa_node_id = (NvS32)gpu->parent->closest_cpu_numa_node;
    }
--- a/kernel-open/nvidia/nv-modeset-interface.c
+++ b/kernel-open/nvidia/nv-modeset-interface.c
@@ -83,6 +83,7 @@ static NvU32 nvidia_modeset_enumerate_gpus(nv_gpu_info_t *gpu_info)
    for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next)
    {
        nv_state_t *nv = NV_STATE_PTR(nvl);
+        int numa_status = nv_get_numa_status(nvl);

        /*
         * The gpu_info[] array has NV_MAX_GPUS elements.  Fail if there
@@ -102,6 +103,10 @@ static NvU32 nvidia_modeset_enumerate_gpus(nv_gpu_info_t *gpu_info)
        gpu_info[count].pci_info.slot     = nv->pci_info.slot;
        gpu_info[count].pci_info.function = nv->pci_info.function;

+        gpu_info->needs_numa_setup =
+            numa_status != NV_IOCTL_NUMA_STATUS_DISABLED &&
+            numa_status != NV_IOCTL_NUMA_STATUS_ONLINE;
+
        gpu_info[count].os_device_ptr = nvl->dev;

        count++;
--- a/kernel-open/nvidia/nv.c
+++ b/kernel-open/nvidia/nv.c
@@ -2458,6 +2458,12 @@ nvidia_ioctl(
    {
        nv_ioctl_wait_open_complete_t *params = arg_copy;

+        if (arg_size != sizeof(nv_ioctl_wait_open_complete_t))
+        {
+            status = -EINVAL;
+            goto done_early;
+        }
+
        params->rc = nvlfp->open_rc;
        params->adapterStatus = nvlfp->adapter_status;
        goto done_early;
@@ -2538,8 +2544,12 @@ nvidia_ioctl(
                goto done;
            }

+            /* atomically check and alloc attached_gpus */
+            down(&nvl->ldata_lock);
+
            if (nvlfp->num_attached_gpus != 0)
            {
+                up(&nvl->ldata_lock);
                status = -EINVAL;
                goto done;
            }
@@ -2547,12 +2557,15 @@ nvidia_ioctl(
            NV_KMALLOC(nvlfp->attached_gpus, arg_size);
            if (nvlfp->attached_gpus == NULL)
            {
+                up(&nvl->ldata_lock);
                status = -ENOMEM;
                goto done;
            }
            memcpy(nvlfp->attached_gpus, arg_copy, arg_size);
            nvlfp->num_attached_gpus = num_arg_gpus;

+            up(&nvl->ldata_lock);
+
            for (i = 0; i < nvlfp->num_attached_gpus; i++)
            {
                if (nvlfp->attached_gpus[i] == 0)
@@ -2568,9 +2581,14 @@ nvidia_ioctl(
                            nvidia_dev_put(nvlfp->attached_gpus[i], sp);
                    }

+                    /* atomically free attached_gpus */
+                    down(&nvl->ldata_lock);
+
                    NV_KFREE(nvlfp->attached_gpus, arg_size);
                    nvlfp->num_attached_gpus = 0;

+                    up(&nvl->ldata_lock);
+
                    status = -EINVAL;
                    break;
                }
@@ -4504,18 +4522,18 @@ nvidia_suspend(

    down(&nvl->ldata_lock);

-    if (!nv->is_pm_supported)
-    {
-        status = NV_ERR_NOT_SUPPORTED;
-        goto done;
-    }
-
    if (((nv->flags & NV_FLAG_OPEN) == 0) &&
        ((nv->flags & NV_FLAG_PERSISTENT_SW_STATE) == 0))
    {
        goto done;
    }

+    if (nv->is_pm_unsupported)
+    {
+        status = NV_ERR_NOT_SUPPORTED;
+        goto done;
+    }
+
    if ((nv->flags & NV_FLAG_SUSPENDED) != 0)
    {
        nvl->suspend_count++;