580.105.08

2026-02-02 22:47:25 +00:00 · 2025-11-04 12:45:59 -08:00
parent 2b436058a6
commit 2af9f1f0f7
112 changed files with 56415 additions and 54596 deletions
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc
 ccflags-y += -I$(src)
 ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
 ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
-ccflags-y += -DNV_VERSION_STRING=\"580.95.05\"
+ccflags-y += -DNV_VERSION_STRING=\"580.105.08\"

 # Include and link Tegra out-of-tree modules.
 ifneq ($(wildcard /usr/src/nvidia/nvidia-oot),)
--- a/kernel-open/common/inc/nv-modeset-interface.h
+++ b/kernel-open/common/inc/nv-modeset-interface.h
@@ -58,6 +58,12 @@ typedef struct {
     */
    void (*suspend)(NvU32 gpu_id);
    void (*resume)(NvU32 gpu_id);
+
+    /* Remove callback, called when a device is going away completely. */
+    void (*remove)(NvU32 gpu_id);
+
+    /* Probe callback, called when a device is being hotplugged. */
+    void (*probe)(const nv_gpu_info_t *gpu_info);
 } nvidia_modeset_callbacks_t;

 /*
--- a/kernel-open/common/inc/nv-proto.h
+++ b/kernel-open/common/inc/nv-proto.h
@@ -78,6 +78,8 @@ NV_STATUS     nv_set_system_power_state (nv_power_state_t, nv_pm_action_depth_t)

 void          nvidia_modeset_suspend           (NvU32 gpuId);
 void          nvidia_modeset_resume            (NvU32 gpuId);
+void          nvidia_modeset_remove            (NvU32 gpuId);
+void          nvidia_modeset_probe             (const nv_linux_state_t *nvl);
 NvBool        nv_is_uuid_in_gpu_exclusion_list (const char *);

 NV_STATUS     nv_parse_per_device_option_string(nvidia_stack_t *sp);
--- a/kernel-open/common/inc/nv_uvm_types.h
+++ b/kernel-open/common/inc/nv_uvm_types.h
@@ -688,6 +688,13 @@ typedef struct UvmGpuInfo_tag

    // GPU setup in CDMM mode
    NvBool cdmmEnabled;
+
+    // If nvlinkDirectConnect is NV_TRUE,
+    // nvlDirectConnectMemoryWindowStart is the GPA base address for the GPU's vidmem over
+    // direct-connect NVLink. It is used when creating PTEs of GPU memory mappings
+    // to direct NVLink peers.
+    NvBool nvlDirectConnect;
+    NvU64 nvlDirectConnectMemoryWindowStart;
 } UvmGpuInfo;

 typedef struct UvmGpuFbInfo_tag
--- a/kernel-open/common/inc/nvkms-kapi.h
+++ b/kernel-open/common/inc/nvkms-kapi.h
@@ -598,13 +598,17 @@ typedef enum NvKmsKapiRegisterWaiterResultRec {
    NVKMS_KAPI_REG_WAITER_ALREADY_SIGNALLED,
 } NvKmsKapiRegisterWaiterResult;

-typedef void NvKmsKapiSuspendResumeCallbackFunc(NvBool suspend);
-
 struct NvKmsKapiGpuInfo {
    nv_gpu_info_t gpuInfo;
    MIGDeviceId   migDevice;
 };

+struct NvKmsKapiCallbacks {
+    void (*suspendResume)(NvBool suspend);
+    void (*remove)(NvU32 gpuId);
+    void (*probe)(const struct NvKmsKapiGpuInfo *gpu_info);
+};
+
 struct NvKmsKapiFunctionsTable {

    /*!
@@ -1473,12 +1477,12 @@ struct NvKmsKapiFunctionsTable {
    );

    /*!
-     * Set the callback function for suspending and resuming the display system.
+     * Set the pointer to the callback function table.
     */
    void
-    (*setSuspendResumeCallback)
+    (*setCallbacks)
    (
-        NvKmsKapiSuspendResumeCallbackFunc *function
+        const struct NvKmsKapiCallbacks *callbacks
    );

    /*!
--- a/kernel-open/common/inc/nvstatuscodes.h
+++ b/kernel-open/common/inc/nvstatuscodes.h
@@ -165,6 +165,7 @@ NV_STATUS_CODE(NV_ERR_FABRIC_STATE_OUT_OF_SYNC,        0x00000087, "NVLink fabri
 NV_STATUS_CODE(NV_ERR_BUFFER_FULL,                     0x00000088, "Buffer is full")
 NV_STATUS_CODE(NV_ERR_BUFFER_EMPTY,                    0x00000089, "Buffer is empty")
 NV_STATUS_CODE(NV_ERR_MC_FLA_OFFSET_TABLE_FULL,        0x0000008A, "Multicast FLA offset table has no available slots")
+NV_STATUS_CODE(NV_ERR_DMA_XFER_FAILED,                 0x0000008B, "DMA transfer failed")

 // Warnings:
 NV_STATUS_CODE(NV_WARN_HOT_SWITCH,                     0x00010001, "WARNING Hot switch")
--- a/kernel-open/common/inc/os-interface.h
+++ b/kernel-open/common/inc/os-interface.h
@@ -62,6 +62,11 @@ struct os_work_queue;
 /* Each OS defines its own version of this opaque type */
 typedef struct os_wait_queue os_wait_queue;

+/* Flags needed by os_get_current_proccess_flags */
+#define OS_CURRENT_PROCESS_FLAG_NONE            0x0
+#define OS_CURRENT_PROCESS_FLAG_KERNEL_THREAD   0x1
+#define OS_CURRENT_PROCESS_FLAG_EXITING         0x2
+
 /*
 * ---------------------------------------------------------------------------
 *
@@ -194,6 +199,7 @@ NV_STATUS   NV_API_CALL  os_open_readonly_file            (const char *, void **
 NV_STATUS   NV_API_CALL  os_open_and_read_file            (const char *, NvU8 *, NvU64);
 NvBool      NV_API_CALL  os_is_nvswitch_present           (void);
 NV_STATUS   NV_API_CALL  os_get_random_bytes              (NvU8 *, NvU16);
+NvU32       NV_API_CALL  os_get_current_process_flags     (void);
 NV_STATUS   NV_API_CALL  os_alloc_wait_queue              (os_wait_queue **);
 void        NV_API_CALL  os_free_wait_queue               (os_wait_queue *);
 void        NV_API_CALL  os_wait_uninterruptible          (os_wait_queue *);
--- a/kernel-open/nvidia-drm/nvidia-drm-drv.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-drv.c
@@ -101,6 +101,7 @@ static int nv_drm_revoke_modeset_permission(struct drm_device *dev,
                                            NvU32 dpyId);
 static int nv_drm_revoke_sub_ownership(struct drm_device *dev);

+static DEFINE_MUTEX(dev_list_mutex);
 static struct nv_drm_device *dev_list = NULL;

 static const char* nv_get_input_colorspace_name(
@@ -2067,8 +2068,10 @@ void nv_drm_register_drm_device(const struct NvKmsKapiGpuInfo *gpu_info)

    /* Add NVIDIA-DRM device into list */

+    mutex_lock(&dev_list_mutex);
    nv_dev->next = dev_list;
    dev_list = nv_dev;
+    mutex_unlock(&dev_list_mutex);

    return; /* Success */

@@ -2106,22 +2109,81 @@ int nv_drm_probe_devices(void)
 }
 #endif

+static struct nv_drm_device*
+nv_drm_pop_device(void)
+{
+    struct nv_drm_device *nv_dev;
+
+    mutex_lock(&dev_list_mutex);
+
+    nv_dev = dev_list;
+    if (nv_dev) {
+        dev_list = nv_dev->next;
+        nv_dev->next = NULL;
+    }
+
+    mutex_unlock(&dev_list_mutex);
+    return nv_dev;
+}
+
+static struct nv_drm_device*
+nv_drm_find_and_remove_device(NvU32 gpuId)
+{
+    struct nv_drm_device **pPrev = &dev_list;
+    struct nv_drm_device *nv_dev;
+
+    mutex_lock(&dev_list_mutex);
+    nv_dev = *pPrev;
+
+    while (nv_dev) {
+        if (nv_dev->gpu_info.gpu_id == gpuId) {
+            /* Remove it from the linked list */
+            *pPrev = nv_dev->next;
+            nv_dev->next = NULL;
+            break;
+        }
+
+        pPrev = &nv_dev->next;
+        nv_dev = *pPrev;
+    }
+
+    mutex_unlock(&dev_list_mutex);
+    return nv_dev;
+}
+
+static void nv_drm_dev_destroy(struct nv_drm_device *nv_dev)
+{
+    struct drm_device *dev = nv_dev->dev;
+
+    nv_drm_dev_unload(dev);
+    drm_dev_put(dev);
+    nv_drm_free(nv_dev);
+}
+
+/*
+ * Unregister a single NVIDIA DRM device.
+ */
+void nv_drm_remove(NvU32 gpuId)
+{
+    struct nv_drm_device *nv_dev = nv_drm_find_and_remove_device(gpuId);
+
+    if (nv_dev) {
+        NV_DRM_DEV_LOG_INFO(nv_dev, "Removing device");
+        drm_dev_unplug(nv_dev->dev);
+        nv_drm_dev_destroy(nv_dev);
+    }
+}
+
 /*
 * Unregister all NVIDIA DRM devices.
 */
 void nv_drm_remove_devices(void)
 {
-    while (dev_list != NULL) {
-        struct nv_drm_device *next = dev_list->next;
-        struct drm_device *dev = dev_list->dev;
+    struct nv_drm_device *nv_dev;

-        drm_dev_unregister(dev);
-        nv_drm_dev_unload(dev);
-        drm_dev_put(dev);
-
-        nv_drm_free(dev_list);
-
-        dev_list = next;
+    while ((nv_dev = nv_drm_pop_device())) {
+        drm_dev_unregister(nv_dev->dev);
+        nv_drm_dev_destroy(nv_dev);
    }
 }

@@ -2143,11 +2205,10 @@ void nv_drm_remove_devices(void)
 */
 void nv_drm_suspend_resume(NvBool suspend)
 {
-    static DEFINE_MUTEX(nv_drm_suspend_mutex);
    static NvU32 nv_drm_suspend_count = 0;
    struct nv_drm_device *nv_dev;

-    mutex_lock(&nv_drm_suspend_mutex);
+    mutex_lock(&dev_list_mutex);

    /*
     * Count the number of times the driver is asked to suspend. Suspend all DRM
@@ -2195,7 +2256,7 @@ void nv_drm_suspend_resume(NvBool suspend)
    }

 done:
-    mutex_unlock(&nv_drm_suspend_mutex);
+    mutex_unlock(&dev_list_mutex);
 }

 #endif /* NV_DRM_AVAILABLE */
--- a/kernel-open/nvidia-drm/nvidia-drm-drv.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-drv.h
@@ -31,6 +31,7 @@ struct NvKmsKapiGpuInfo;

 int nv_drm_probe_devices(void);

+void nv_drm_remove(NvU32 gpuId);
 void nv_drm_remove_devices(void);

 void nv_drm_suspend_resume(NvBool suspend);
--- a/kernel-open/nvidia-drm/nvidia-drm.c
+++ b/kernel-open/nvidia-drm/nvidia-drm.c
@@ -33,6 +33,12 @@ static struct NvKmsKapiFunctionsTable nvKmsFuncsTable = {

 const struct NvKmsKapiFunctionsTable* const nvKms = &nvKmsFuncsTable;

+const struct NvKmsKapiCallbacks nv_drm_kapi_callbacks = {
+    .suspendResume = nv_drm_suspend_resume,
+    .remove = nv_drm_remove,
+    .probe = nv_drm_register_drm_device,
+};
+
 #endif

 int nv_drm_init(void)
@@ -45,7 +51,7 @@ int nv_drm_init(void)
        return -EINVAL;
    }

-    nvKms->setSuspendResumeCallback(nv_drm_suspend_resume);
+    nvKms->setCallbacks(&nv_drm_kapi_callbacks);
    return nv_drm_probe_devices();
 #else
    return 0;
@@ -55,7 +61,7 @@ int nv_drm_init(void)
 void nv_drm_exit(void)
 {
 #if defined(NV_DRM_AVAILABLE)
-    nvKms->setSuspendResumeCallback(NULL);
+    nvKms->setCallbacks(NULL);
    nv_drm_remove_devices();
 #endif
 }
--- a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
@@ -820,6 +820,20 @@ static void nvkms_resume(NvU32 gpuId)
    nvKmsKapiSuspendResume(NV_FALSE /* suspend */);
 }

+static void nvkms_remove(NvU32 gpuId)
+{
+    nvKmsKapiRemove(gpuId);
+
+    // Eventually, this function should also terminate all NVKMS clients and
+    // free the NVDevEvoRec. Until that is implemented, all NVKMS clients must
+    // be closed before a device is removed.
+}
+
+static void nvkms_probe(const nv_gpu_info_t *gpu_info)
+{
+    nvKmsKapiProbe(gpu_info);
+}
+

 /*************************************************************************
 * Interface with resman.
@@ -828,7 +842,9 @@ static void nvkms_resume(NvU32 gpuId)
 static nvidia_modeset_rm_ops_t __rm_ops = { 0 };
 static nvidia_modeset_callbacks_t nvkms_rm_callbacks = {
    .suspend = nvkms_suspend,
-    .resume  = nvkms_resume
+    .resume  = nvkms_resume,
+    .remove  = nvkms_remove,
+    .probe   = nvkms_probe,
 };

 static int nvkms_alloc_rm(void)
--- a/kernel-open/nvidia-modeset/nvkms.h
+++ b/kernel-open/nvidia-modeset/nvkms.h
@@ -104,6 +104,8 @@ NvBool nvKmsKapiGetFunctionsTableInternal
 );

 void nvKmsKapiSuspendResume(NvBool suspend);
+void nvKmsKapiRemove(NvU32 gpuId);
+void nvKmsKapiProbe(const nv_gpu_info_t *gpu_info);

 NvBool nvKmsGetBacklight(NvU32 display_id, void *drv_priv, NvU32 *brightness);
 NvBool nvKmsSetBacklight(NvU32 display_id, void *drv_priv, NvU32 brightness);
--- a/kernel-open/nvidia-uvm/uvm_ampere_host.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere_host.c
@@ -461,3 +461,29 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
    if (params->membar == UvmInvalidateTlbMemBarLocal)
        uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
 }
+
+void uvm_hal_ampere_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t aperture)
+{
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+    NvU32 aperture_value;
+
+    if (aperture == UVM_APERTURE_SYS) {
+        aperture_value = HWCONST(C56F, MEM_OP_D, OPERATION, L2_SYSMEM_INVALIDATE);
+    }
+    else if (uvm_aperture_is_peer(aperture)) {
+        aperture_value = HWCONST(C56F, MEM_OP_D, OPERATION, L2_PEERMEM_INVALIDATE);
+    }
+    else {
+        UVM_ASSERT_MSG(false, "Invalid aperture_type %d\n", aperture);
+        return;
+    }
+
+    uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS);
+
+    NV_PUSH_4U(C56F, MEM_OP_A, 0,
+               MEM_OP_B, 0,
+               MEM_OP_C, 0,
+               MEM_OP_D, aperture_value);
+
+    uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS);
+}
--- a/kernel-open/nvidia-uvm/uvm_ats_sva.h
+++ b/kernel-open/nvidia-uvm/uvm_ats_sva.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2018-2023 NVIDIA Corporation
+    Copyright (c) 2018-2025 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@@ -43,8 +43,11 @@
 // commit cb4789b0d19ff231ce9f73376a023341300aed96 (11/23/2020). Commit
 // 701fac40384f07197b106136012804c3cae0b3de (02/15/2022) removed ioasid_get()
 // and added mm_pasid_drop().
+//
+// CONFIG_IOMMU_SVA_LIB was renamed to CONFIG_IOMMU_SVA with commit 7ba5647
+// (02/07/2022).
    #if UVM_CAN_USE_MMU_NOTIFIERS() && (defined(NV_IOASID_GET_PRESENT) || defined(NV_MM_PASID_DROP_PRESENT))
-        #if defined(CONFIG_IOMMU_SVA)
+        #if defined(CONFIG_IOMMU_SVA) || defined(CONFIG_IOMMU_SVA_LIB)
            #define UVM_ATS_SVA_SUPPORTED() 1
        #else
            #define UVM_ATS_SVA_SUPPORTED() 0
--- a/kernel-open/nvidia-uvm/uvm_blackwell_host.c
+++ b/kernel-open/nvidia-uvm/uvm_blackwell_host.c
@@ -347,10 +347,23 @@ uvm_hal_blackwell_access_counter_query_clear_op_gb20x(uvm_parent_gpu_t *parent_g
    return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED;
 }

-// Host-specific L2 cache invalidate for non-coherent sysmem
-void uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem(uvm_push_t *push)
+void uvm_hal_blackwell_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t aperture)
 {
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+    NvU32 aperture_value;
+
+    if (!gpu->parent->is_integrated_gpu) {
+        return uvm_hal_ampere_host_l2_invalidate(push, aperture);
+    }
+
+    switch (aperture) {
+        case UVM_APERTURE_SYS:
+            aperture_value = HWCONST(C96F, MEM_OP_D, OPERATION, L2_SYSMEM_NCOH_INVALIDATE);
+            break;
+        default:
+            UVM_ASSERT_MSG(false, "Invalid aperture_type %d\n", aperture);
+            return;
+    }

    // First sysmembar
    uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS);
@@ -363,7 +376,7 @@ void uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem(uvm_push_t *push)
    NV_PUSH_4U(C96F, MEM_OP_A, 0,
               MEM_OP_B, 0,
               MEM_OP_C, 0,
-               MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, L2_SYSMEM_NCOH_INVALIDATE));
+               MEM_OP_D, aperture_value);
    // Final sysmembar
    uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS);
 }
--- a/kernel-open/nvidia-uvm/uvm_get_rm_ptes_test.c
+++ b/kernel-open/nvidia-uvm/uvm_get_rm_ptes_test.c
@@ -144,10 +144,13 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,

    phys_offset = mapping_offset;

-    // Add the physical offset for nvswitch connected peer mappings
-    if (uvm_aperture_is_peer(aperture) &&
-        uvm_parent_gpus_are_nvswitch_connected(memory_mapping_gpu->parent, memory_owning_gpu->parent))
-        phys_offset += memory_owning_gpu->parent->nvswitch_info.fabric_memory_window_start;
+    // Add the physical offset for peer mappings
+    if (uvm_aperture_is_peer(aperture)) {
+        if (uvm_parent_gpus_are_direct_connected(memory_mapping_gpu->parent, memory_owning_gpu->parent))
+            phys_offset += memory_owning_gpu->parent->peer_address_info.peer_gpa_memory_window_start;
+        else if (uvm_parent_gpus_are_nvswitch_connected(memory_mapping_gpu->parent, memory_owning_gpu->parent))
+            phys_offset += memory_owning_gpu->parent->nvswitch_info.fabric_memory_window_start;
+    }

    for (index = 0; index < ext_mapping_info->numWrittenPtes; index++) {

--- a/kernel-open/nvidia-uvm/uvm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu.c
@@ -107,6 +107,7 @@ static void fill_parent_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo
    }

    parent_gpu->nvswitch_info.is_nvswitch_connected = gpu_info->connectedToSwitch;
+    parent_gpu->peer_address_info.is_direct_connected = gpu_info->nvlDirectConnect;

    // nvswitch is routed via physical pages, where the upper 13-bits of the
    // 47-bit address space holds the routing information for each peer.
@@ -115,6 +116,9 @@ static void fill_parent_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo
        parent_gpu->nvswitch_info.fabric_memory_window_start = gpu_info->nvswitchMemoryWindowStart;
        parent_gpu->nvswitch_info.egm_fabric_memory_window_start = gpu_info->nvswitchEgmMemoryWindowStart;
    }
+    else if (parent_gpu->peer_address_info.is_direct_connected) {
+        parent_gpu->peer_address_info.peer_gpa_memory_window_start = gpu_info->nvlDirectConnectMemoryWindowStart;
+    }

    parent_gpu->ats.non_pasid_ats_enabled = gpu_info->nonPasidAtsSupport;

@@ -2110,6 +2114,16 @@ bool uvm_parent_gpus_are_nvswitch_connected(const uvm_parent_gpu_t *parent_gpu0,
    return false;
 }

+bool uvm_parent_gpus_are_direct_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1)
+{
+    if (parent_gpu0 != parent_gpu1 &&
+        parent_gpu0->peer_address_info.is_direct_connected &&
+        parent_gpu1->peer_address_info.is_direct_connected)
+        return true;
+
+    return false;
+}
+
 NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu)
 {
    // We may need to call service_interrupts() which cannot be done in the top
@@ -3068,7 +3082,9 @@ uvm_gpu_phys_address_t uvm_gpu_peer_phys_address(uvm_gpu_t *owning_gpu, NvU64 ad
 {
    uvm_aperture_t aperture = uvm_gpu_peer_aperture(accessing_gpu, owning_gpu);

-    if (uvm_parent_gpus_are_nvswitch_connected(accessing_gpu->parent, owning_gpu->parent))
+    if (uvm_parent_gpus_are_direct_connected(accessing_gpu->parent, owning_gpu->parent))
+        address += owning_gpu->parent->peer_address_info.peer_gpa_memory_window_start;
+    else if (uvm_parent_gpus_are_nvswitch_connected(accessing_gpu->parent, owning_gpu->parent))
        address += owning_gpu->parent->nvswitch_info.fabric_memory_window_start;

    return uvm_gpu_phys_address(aperture, address);
--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@@ -1365,6 +1365,20 @@ struct uvm_parent_gpu_struct
        NvU64 base_address;
    } egm;

+    // Peer VIDMEM base offset used when creating GPA PTEs for
+    // peer mappings. RM will set this offset on systems where
+    // peer accesses are not zero-based (NVLINK 5+).
+    struct
+    {
+        // Is the GPU directly connected to peer GPUs.
+        bool is_direct_connected;
+
+        // 48-bit fabric memory physical offset that peer gpus need in order
+        // access to be rounted to the correct peer.
+        // Each memory window is 4TB, so the upper 6 bits are used for rounting.
+        NvU64 peer_gpa_memory_window_start;
+    } peer_address_info;
+
    uvm_test_parent_gpu_inject_error_t test;

    // PASID ATS
@@ -1619,6 +1633,8 @@ uvm_aperture_t uvm_gpu_egm_peer_aperture(uvm_parent_gpu_t *local_gpu, uvm_parent

 bool uvm_parent_gpus_are_nvswitch_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);

+bool uvm_parent_gpus_are_direct_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);
+
 static bool uvm_gpus_are_smc_peers(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
 {
    UVM_ASSERT(gpu0 != gpu1);
--- a/kernel-open/nvidia-uvm/uvm_hal.c
+++ b/kernel-open/nvidia-uvm/uvm_hal.c
@@ -221,7 +221,7 @@ static uvm_hal_class_ops_t host_table[] =
            .access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported,
            .access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported,
            .access_counter_query_clear_op = uvm_hal_maxwell_access_counter_query_clear_op_unsupported,
-            .l2_invalidate_noncoh_sysmem = uvm_hal_host_l2_invalidate_noncoh_sysmem_unsupported,
+            .l2_invalidate = uvm_hal_host_l2_invalidate_unsupported,
            .get_time = uvm_hal_maxwell_get_time,
        }
    },
@@ -287,6 +287,7 @@ static uvm_hal_class_ops_t host_table[] =
            .tlb_invalidate_all = uvm_hal_ampere_host_tlb_invalidate_all,
            .tlb_invalidate_va = uvm_hal_ampere_host_tlb_invalidate_va,
            .tlb_invalidate_test = uvm_hal_ampere_host_tlb_invalidate_test,
+            .l2_invalidate = uvm_hal_ampere_host_l2_invalidate,
        }
    },
    {
@@ -315,8 +316,8 @@ static uvm_hal_class_ops_t host_table[] =
            .tlb_invalidate_phys = uvm_hal_blackwell_host_tlb_invalidate_phys,
            .tlb_invalidate_test = uvm_hal_blackwell_host_tlb_invalidate_test,
            .tlb_flush_prefetch = uvm_hal_blackwell_host_tlb_flush_prefetch,
-            .l2_invalidate_noncoh_sysmem = uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem,
            .access_counter_query_clear_op = uvm_hal_blackwell_access_counter_query_clear_op_gb100,
+            .l2_invalidate = uvm_hal_blackwell_host_l2_invalidate,
        }
    },
    {
@@ -1162,10 +1163,11 @@ void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src)
 {
 }

-void uvm_hal_host_l2_invalidate_noncoh_sysmem_unsupported(uvm_push_t *push)
+void uvm_hal_host_l2_invalidate_unsupported(uvm_push_t *push, uvm_aperture_t aperture)
 {
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
    UVM_ERR_PRINT("L2 cache invalidation: Called on unsupported GPU %s (arch: 0x%x, impl: 0x%x)\n", 
                   uvm_gpu_name(gpu), gpu->parent->rm_info.gpuArch, gpu->parent->rm_info.gpuImplementation);
-    UVM_ASSERT_MSG(false, "host l2_invalidate_noncoh_sysmem called on unsupported GPU\n");
+    UVM_ASSERT_MSG(false, "L2 invalidate is not supported on %s",
+                   uvm_parent_gpu_name(gpu->parent));
 }
--- a/kernel-open/nvidia-uvm/uvm_hal.h
+++ b/kernel-open/nvidia-uvm/uvm_hal.h
@@ -248,11 +248,12 @@ typedef void (*uvm_hal_host_tlb_flush_prefetch_t)(uvm_push_t *push);
 void uvm_hal_maxwell_host_tlb_flush_prefetch_unsupported(uvm_push_t *push);
 void uvm_hal_blackwell_host_tlb_flush_prefetch(uvm_push_t *push);

-// L2 cache invalidate for non-coherent sysmem for systems with write back cache.
-// These are iGPUs as of now.
-typedef void (*uvm_hal_host_l2_invalidate_noncoh_sysmem_t)(uvm_push_t *push);
-void uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem(uvm_push_t *push);
-void uvm_hal_host_l2_invalidate_noncoh_sysmem_unsupported(uvm_push_t *push);
+// Performs L2 cache invalidation for peer or system memory.
+typedef void (*uvm_hal_host_l2_invalidate_t)(uvm_push_t *push, uvm_aperture_t aperture);
+void uvm_hal_blackwell_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t aperture);
+
+void uvm_hal_ampere_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t aperture);
+void uvm_hal_host_l2_invalidate_unsupported(uvm_push_t *push, uvm_aperture_t aperture);

 // By default all semaphore release operations include a membar sys before the
 // operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with
@@ -822,7 +823,7 @@ struct uvm_host_hal_struct
    uvm_hal_host_tlb_invalidate_phys_t tlb_invalidate_phys;
    uvm_hal_host_tlb_invalidate_test_t tlb_invalidate_test;
    uvm_hal_host_tlb_flush_prefetch_t tlb_flush_prefetch;
-    uvm_hal_host_l2_invalidate_noncoh_sysmem_t l2_invalidate_noncoh_sysmem;
+    uvm_hal_host_l2_invalidate_t l2_invalidate;
    uvm_hal_fault_buffer_replay_t replay_faults;
    uvm_hal_fault_cancel_global_t cancel_faults_global;
    uvm_hal_fault_cancel_targeted_t cancel_faults_targeted;
--- a/kernel-open/nvidia-uvm/uvm_map_external.c
+++ b/kernel-open/nvidia-uvm/uvm_map_external.c
@@ -1276,11 +1276,20 @@ void uvm_ext_gpu_map_destroy(uvm_va_range_external_t *external_range,

    range_tree = uvm_ext_gpu_range_tree(external_range, mapped_gpu);

-    // Perform L2 cache invalidation for noncoherent sysmem mappings. 
-    // This is done only on systems with write-back cache which is iGPUs as of now.
+    // Perform L2 cache invalidation for cached peer and sysmem mappings.
    if (ext_gpu_map->need_l2_invalidate_at_unmap) {
-        UVM_ASSERT(ext_gpu_map->gpu->parent->is_integrated_gpu);
-        status = uvm_mmu_l2_invalidate_noncoh_sysmem(mapped_gpu);
+        uvm_aperture_t aperture;
+
+        // Peer cache invalidation is not targeted to a specific peer, so we
+        // just use UVM_APERTURE_PEER(0).
+        if (ext_gpu_map->is_egm)
+            aperture = UVM_APERTURE_PEER(0);
+        else if (ext_gpu_map->is_sysmem)
+            aperture = UVM_APERTURE_SYS;
+        else
+            aperture = UVM_APERTURE_PEER(0);
+
+        status = uvm_mmu_l2_invalidate(mapped_gpu, aperture);
        UVM_ASSERT(status == NV_OK);
    }

--- a/kernel-open/nvidia-uvm/uvm_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_mmu.c
@@ -2974,25 +2974,21 @@ NV_STATUS uvm_mmu_tlb_invalidate_phys(uvm_gpu_t *gpu)
    return uvm_push_end_and_wait(&push);
 }

-NV_STATUS uvm_mmu_l2_invalidate_noncoh_sysmem(uvm_gpu_t *gpu)
+NV_STATUS uvm_mmu_l2_invalidate(uvm_gpu_t *gpu, uvm_aperture_t aperture)
 {
    uvm_push_t push;
    NV_STATUS status;

-    // L2 cache invalidation is only done for systems with write-back 
-    // cache which is iGPUs as of now.
-    UVM_ASSERT(gpu->parent->is_integrated_gpu);
-
    status = uvm_push_begin(gpu->channel_manager,
                            UVM_CHANNEL_TYPE_MEMOPS,
                            &push,
-                            "L2 cache invalidate for sysmem");
+                            "L2 cache invalidate");
    if (status != NV_OK) {
        UVM_ERR_PRINT("L2 cache invalidation: Failed to begin push, status: %s\n", nvstatusToString(status));
        return status;
    }

-    gpu->parent->host_hal->l2_invalidate_noncoh_sysmem(&push);
+    gpu->parent->host_hal->l2_invalidate(&push, aperture);

    status = uvm_push_end_and_wait(&push);
    if (status != NV_OK) 
--- a/kernel-open/nvidia-uvm/uvm_mmu.h
+++ b/kernel-open/nvidia-uvm/uvm_mmu.h
@@ -722,9 +722,8 @@ uvm_gpu_address_t uvm_mmu_gpu_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t phy
 // dma addresses, IOVAs, and GPAs). See uvm_dma_map_invalidation_t.
 NV_STATUS uvm_mmu_tlb_invalidate_phys(uvm_gpu_t *gpu);

-// Invalidate L2 cache when noncoherent sysmem mappings are unmapped.
-// This is done for systems with write-back cache i.e. iGPUs as of now.
-NV_STATUS uvm_mmu_l2_invalidate_noncoh_sysmem(uvm_gpu_t *gpu);
+// Invalidate L2 cache for peer or system memory.
+NV_STATUS uvm_mmu_l2_invalidate(uvm_gpu_t *gpu, uvm_aperture_t aperture);

 NV_STATUS uvm_test_invalidate_tlb(UVM_TEST_INVALIDATE_TLB_PARAMS *params, struct file *filp);

--- a/kernel-open/nvidia-uvm/uvm_va_range.h
+++ b/kernel-open/nvidia-uvm/uvm_va_range.h
@@ -204,8 +204,12 @@ typedef struct
    uvm_deferred_free_object_t deferred_free;

    // Flag indicating whether L2 cache invalidation is needed at unmap time.
-    // This is set by RM during mapping and used during unmap to determine
-    // if L2 cache invalidation should be performed for non coherent sysmem.
+    // This is set by RM during mapping and used during unmap to determine if L2
+    // cache invalidation should be performed. For GPU cached system memory
+    // allocations on systems a write-back cache this is required for
+    // correctness. For GPU cached peer and system memory on systems with a
+    // write-through cache the invalidation could be done by RM at map time
+    // however this introduces overhead during performance sensitive sections.
    bool need_l2_invalidate_at_unmap;
 } uvm_ext_gpu_map_t;

--- a/kernel-open/nvidia/detect-self-hosted.h
+++ b/kernel-open/nvidia/detect-self-hosted.h
@@ -32,7 +32,8 @@ static inline int pci_devid_is_self_hosted_hopper(unsigned short devid)
 static inline int pci_devid_is_self_hosted_blackwell(unsigned short devid)
 {
    return (devid >= 0x2940 && devid <= 0x297f)      // GB100 Self-Hosted
-           || (devid >= 0x31c0 && devid <= 0x31ff);  // GB110 Self-Hosted
+           || (devid >= 0x31c0 && devid <= 0x31ff)   // GB110 Self-Hosted
+           || (devid == 0x31a1);                     //
 }

 static inline int pci_devid_is_self_hosted(unsigned short devid)
--- a/kernel-open/nvidia/nv-acpi.c
+++ b/kernel-open/nvidia/nv-acpi.c
@@ -737,8 +737,6 @@ static NV_STATUS nv_acpi_evaluate_dsm_method(

        rmStatus = nv_acpi_extract_object(dsm, pOutData, *pSize, &data_size);
        *pSize = data_size;
-
-        kfree(output.pointer);
    }
    else
    {
@@ -751,6 +749,7 @@ static NV_STATUS nv_acpi_evaluate_dsm_method(
                  "NVRM: %s: DSM data invalid!\n", __FUNCTION__);
    }

+    kfree(output.pointer);
    return rmStatus;
 }

@@ -1183,6 +1182,7 @@ NvBool nv_acpi_power_resource_method_present(
        (object_package->package.count != 0x1))
    {
        nv_printf(NV_DBG_ERRORS,"NVRM: _PR3 object is not a type 'package'\n");
+        kfree(buf.pointer);
        return NV_FALSE;
    }

@@ -1194,8 +1194,10 @@ NvBool nv_acpi_power_resource_method_present(
    {
        nv_printf(NV_DBG_ERRORS,
                     "NVRM: _PR3 object does not contain POWER Reference\n");
+        kfree(buf.pointer);
        return NV_FALSE;
    }
+    kfree(buf.pointer);
    return NV_TRUE;
 }

@@ -1325,6 +1327,7 @@ static acpi_status nv_acpi_find_battery_info(
    if (object_package->type != ACPI_TYPE_PACKAGE)
    {
        nv_printf(NV_DBG_INFO, "NVRM: Battery method output is not package\n");
+        kfree(buf.pointer);
        return AE_OK;
    }

@@ -1350,11 +1353,13 @@ static acpi_status nv_acpi_find_battery_info(
    if ((object_package->package.elements[battery_technology_offset].type != ACPI_TYPE_INTEGER) ||
        (object_package->package.elements[battery_technology_offset].integer.value != BATTERY_RECHARGABLE))
    {
+        kfree(buf.pointer);
        return AE_OK;
    }

    battery_present = NV_TRUE;

+    kfree(buf.pointer);
    /* Stop traversing acpi tree. */
    return AE_CTRL_TERMINATE;
 }
--- a/kernel-open/nvidia/nv-modeset-interface.c
+++ b/kernel-open/nvidia/nv-modeset-interface.c
@@ -71,6 +71,45 @@ void nvidia_modeset_resume(NvU32 gpuId)
    }
 }

+void nvidia_modeset_remove(NvU32 gpuId)
+{
+    if (nv_modeset_callbacks && nv_modeset_callbacks->remove)
+    {
+        nv_modeset_callbacks->remove(gpuId);
+    }
+}
+
+static void nvidia_modeset_get_gpu_info(nv_gpu_info_t *gpu_info,
+                                        const nv_linux_state_t *nvl)
+{
+    nv_state_t *nv = NV_STATE_PTR(nvl);
+    int numa_status = nv_get_numa_status(nvl);
+
+    gpu_info->gpu_id = nv->gpu_id;
+
+    gpu_info->pci_info.domain   = nv->pci_info.domain;
+    gpu_info->pci_info.bus      = nv->pci_info.bus;
+    gpu_info->pci_info.slot     = nv->pci_info.slot;
+    gpu_info->pci_info.function = nv->pci_info.function;
+
+    gpu_info->needs_numa_setup =
+        numa_status != NV_IOCTL_NUMA_STATUS_DISABLED &&
+        numa_status != NV_IOCTL_NUMA_STATUS_ONLINE;
+
+    gpu_info->os_device_ptr = nvl->dev;
+}
+
+void nvidia_modeset_probe(const nv_linux_state_t *nvl)
+{
+    if (nv_modeset_callbacks && nv_modeset_callbacks->probe)
+    {
+        nv_gpu_info_t gpu_info;
+
+        nvidia_modeset_get_gpu_info(&gpu_info, nvl);
+        nv_modeset_callbacks->probe(&gpu_info);
+    }
+}
+
 static NvU32 nvidia_modeset_enumerate_gpus(nv_gpu_info_t *gpu_info)
 {
    nv_linux_state_t *nvl;
@@ -82,9 +121,6 @@ static NvU32 nvidia_modeset_enumerate_gpus(nv_gpu_info_t *gpu_info)

    for (nvl = nv_linux_devices; nvl != NULL; nvl = nvl->next)
    {
-        nv_state_t *nv = NV_STATE_PTR(nvl);
-        int numa_status = nv_get_numa_status(nvl);
-
        /*
         * The gpu_info[] array has NV_MAX_GPUS elements.  Fail if there
         * are more GPUs than that.
@@ -96,19 +132,7 @@ static NvU32 nvidia_modeset_enumerate_gpus(nv_gpu_info_t *gpu_info)
            break;
        }

-        gpu_info[count].gpu_id = nv->gpu_id;
-
-        gpu_info[count].pci_info.domain   = nv->pci_info.domain;
-        gpu_info[count].pci_info.bus      = nv->pci_info.bus;
-        gpu_info[count].pci_info.slot     = nv->pci_info.slot;
-        gpu_info[count].pci_info.function = nv->pci_info.function;
-
-        gpu_info->needs_numa_setup =
-            numa_status != NV_IOCTL_NUMA_STATUS_DISABLED &&
-            numa_status != NV_IOCTL_NUMA_STATUS_ONLINE;
-
-        gpu_info[count].os_device_ptr = nvl->dev;
-
+        nvidia_modeset_get_gpu_info(&gpu_info[count], nvl);
        count++;
    }

--- a/kernel-open/nvidia/nv-pci.c
+++ b/kernel-open/nvidia/nv-pci.c
@@ -1661,6 +1661,8 @@ nv_pci_probe

    nv_kmem_cache_free_stack(sp);

+    nvidia_modeset_probe(nvl);
+
    return 0;

 goto err_free_all;
@@ -1760,6 +1762,8 @@ nv_pci_remove(struct pci_dev *pci_dev)
     */
    nv_linux_stop_open_q(nvl);

+    nvidia_modeset_remove(nv->gpu_id);
+
    LOCK_NV_LINUX_DEVICES();
    down(&nvl->ldata_lock);
    nv->flags |= NV_FLAG_PCI_REMOVE_IN_PROGRESS;
@@ -1899,14 +1903,18 @@ nv_pci_shutdown(struct pci_dev *pci_dev)
 {
    nv_linux_state_t *nvl = pci_get_drvdata(pci_dev);

-    if ((nvl != NULL) && nvl->is_forced_shutdown)
-    {
-        nvl->is_forced_shutdown = NV_FALSE;
-        return;
-    }
-
    if (nvl != NULL)
    {
+        nv_state_t *nv = NV_STATE_PTR(nvl);
+
+        if (nvl->is_forced_shutdown)
+        {
+            nvl->is_forced_shutdown = NV_FALSE;
+            return;
+        }
+
+        nvidia_modeset_remove(nv->gpu_id);
+
        nvl->nv_state.is_shutdown = NV_TRUE;
    }

--- a/kernel-open/nvidia/nv-platform.c
+++ b/kernel-open/nvidia/nv-platform.c
@@ -1335,6 +1335,18 @@ static int nv_platform_device_remove_wrapper(struct platform_device *pdev)
 }
 #endif

+static void nv_platform_device_shutdown(struct platform_device *pdev)
+{
+    nv_linux_state_t *nvl = platform_get_drvdata(pdev);
+
+    if (nvl != NULL && !nvl->is_forced_shutdown)
+    {
+        nv_state_t *nv = NV_STATE_PTR(nvl);
+
+        nvidia_modeset_remove(nv->gpu_id);
+    }
+}
+
 const struct of_device_id nv_platform_device_table[] =
 {
    { .compatible = "nvidia,tegra234-display",},
@@ -1358,6 +1370,7 @@ struct platform_driver nv_platform_driver = {
    },
    .probe     = nv_platform_device_probe,
    .remove    = nv_platform_device_remove_wrapper,
+    .shutdown  = nv_platform_device_shutdown,
 };

 int nv_platform_count_devices(void)
--- a/kernel-open/nvidia/os-interface.c
+++ b/kernel-open/nvidia/os-interface.c
@@ -2061,6 +2061,22 @@ NV_STATUS NV_API_CALL os_get_random_bytes
    return NV_OK;
 }

+NvU32 NV_API_CALL os_get_current_process_flags
+(
+    void
+)
+{
+    NvU32 flags = OS_CURRENT_PROCESS_FLAG_NONE;
+
+    if (current->flags & PF_EXITING)
+        flags |= OS_CURRENT_PROCESS_FLAG_EXITING;
+
+    if (current->flags & PF_KTHREAD)
+        flags |= OS_CURRENT_PROCESS_FLAG_KERNEL_THREAD;
+
+    return flags;
+}
+
 NV_STATUS NV_API_CALL os_alloc_wait_queue
 (
    os_wait_queue **wq