535.183.01

2026-01-30 04:59:46 +00:00 · 2024-06-04 10:45:14 +02:00
parent f4bdce9a0a
commit 4459285b60
43 changed files with 2447 additions and 1544 deletions
--- a/src/nvidia/arch/nvalloc/common/inc/oob/smbpbi.h
+++ b/src/nvidia/arch/nvalloc/common/inc/oob/smbpbi.h
@@ -340,6 +340,7 @@
 #define NV_MSGBOX_CMD_ARG1_ECC_V6_ERROR_TYPE                          15:8
 #define NV_MSGBOX_CMD_ARG1_ECC_V6_ERROR_TYPE_CORRECTABLE_ERROR           0
 #define NV_MSGBOX_CMD_ARG1_ECC_V6_ERROR_TYPE_UNCORRECTABLE_ERROR         1
+#define NV_MSGBOX_CMD_ARG1_ECC_V6_ERROR_TYPE_ECC_STATE_FLAGS             2

 #define NV_MSGBOX_CMD_ARG1_ENERGY_COUNTER_GPU                   0x00000000
 #define NV_MSGBOX_CMD_ARG1_ENERGY_COUNTER_MODULE                0x00000003
@@ -633,7 +634,7 @@
 /* MSGBOX data, capability dword structure */

 #define NV_MSGBOX_DATA_REG                                                    31:0
-#define NV_MSGBOX_DATA_CAP_COUNT                                                 5
+#define NV_MSGBOX_DATA_CAP_COUNT                                                 6

 #define NV_MSGBOX_DATA_CAP_0                                                     0
 #define NV_MSGBOX_DATA_CAP_0_TEMP_GPU_0                                        0:0
@@ -950,6 +951,10 @@
 #define NV_MSGBOX_DATA_CAP_4_CONFIGURE_PROGRAMMABLE_EDPP_NOT_AVAILABLE              0x00000000
 #define NV_MSGBOX_DATA_CAP_4_CONFIGURE_PROGRAMMABLE_EDPP_AVAILABLE                  0x00000001

+#define NV_MSGBOX_DATA_CAP_5_SRAM_ERROR_THRESHOLD_EXCEEDED                                 9:9
+#define NV_MSGBOX_DATA_CAP_5_SRAM_ERROR_THRESHOLD_EXCEEDED_NOT_AVAILABLE            0x00000000
+#define NV_MSGBOX_DATA_CAP_5_SRAM_ERROR_THRESHOLD_EXCEEDED_AVAILABLE                0x00000001
+
 /* ECC counters */
 #define NV_MSGBOX_DATA_ECC_CNT_16BIT_DBE                             31:16
 #define NV_MSGBOX_DATA_ECC_CNT_16BIT_SBE                              16:0
@@ -984,6 +989,13 @@
 #define NV_MSGBOX_DATA_ECC_V5_METADATA_LOCATION_ID                   26:22
 #define NV_MSGBOX_DATA_ECC_V5_METADATA_SUBLOCATION_ID                31:27

+/* ECC state flags */
+#define NV_MSGBOX_DATA_ECC_V6_STATE_FLAGS                                    31:0
+
+#define NV_MSGBOX_DATA_ECC_V6_STATE_FLAGS_SRAM_ERROR_THRESHOLD_EXCEEDED       0:0
+#define NV_MSGBOX_DATA_ECC_V6_STATE_FLAGS_SRAM_ERROR_THRESHOLD_EXCEEDED_FALSE   0
+#define NV_MSGBOX_DATA_ECC_V6_STATE_FLAGS_SRAM_ERROR_THRESHOLD_EXCEEDED_TRUE    1
+
 /* NV_MSGBOX_CMD_OPCODE_SCRATCH_COPY src offset argument */
 #define NV_MSGBOX_DATA_COPY_SRC_OFFSET                                 7:0

--- a/src/nvidia/generated/g_crashcat_report_nvoc.c
+++ b/src/nvidia/generated/g_crashcat_report_nvoc.c
@@ -124,6 +124,16 @@ static void __nvoc_init_funcTable_CrashCatReport_1(CrashCatReport *pThis) {
    PORT_UNREFERENCED_VARIABLE(reportHal);
    PORT_UNREFERENCED_VARIABLE(reportHal_HalVarIdx);

+    // Hal function -- crashcatReportSourceContainment
+    if (( ((reportHal_HalVarIdx >> 5) == 0UL) && ((1UL << (reportHal_HalVarIdx & 0x1f)) & 0x00000004UL) )) /* CrashCatReportHal: V1_LIBOS3 */ 
+    {
+        pThis->__crashcatReportSourceContainment__ = &crashcatReportSourceContainment_V1_LIBOS3;
+    }
+    else
+    {
+        pThis->__crashcatReportSourceContainment__ = &crashcatReportSourceContainment_3e9f29;
+    }
+
    // Hal function -- crashcatReportLogReporter
    if (( ((reportHal_HalVarIdx >> 5) == 0UL) && ((1UL << (reportHal_HalVarIdx & 0x1f)) & 0x00000002UL) )) /* CrashCatReportHal: V1_LIBOS2 */ 
    {
--- a/src/nvidia/generated/g_crashcat_report_nvoc.h
+++ b/src/nvidia/generated/g_crashcat_report_nvoc.h
@@ -82,6 +82,7 @@ struct CrashCatReport {
    struct Object __nvoc_base_Object;
    struct Object *__nvoc_pbase_Object;
    struct CrashCatReport *__nvoc_pbase_CrashCatReport;
+    NV_CRASHCAT_CONTAINMENT (*__crashcatReportSourceContainment__)(struct CrashCatReport *);
    void (*__crashcatReportLogReporter__)(struct CrashCatReport *);
    void (*__crashcatReportLogSource__)(struct CrashCatReport *);
    struct CrashCatReportHal reportHal;
@@ -119,6 +120,8 @@ NV_STATUS __nvoc_objCreate_CrashCatReport(CrashCatReport**, Dynamic*, NvU32,
 #define __objCreate_CrashCatReport(ppNewObj, pParent, createFlags, CrashCatReportHal_version, CrashCatReportHal_implementer, arg_ppReportBytes, arg_bytesRemaining) \
    __nvoc_objCreate_CrashCatReport((ppNewObj), staticCast((pParent), Dynamic), (createFlags), CrashCatReportHal_version, CrashCatReportHal_implementer, arg_ppReportBytes, arg_bytesRemaining)

+#define crashcatReportSourceContainment(arg0) crashcatReportSourceContainment_DISPATCH(arg0)
+#define crashcatReportSourceContainment_HAL(arg0) crashcatReportSourceContainment_DISPATCH(arg0)
 #define crashcatReportLogReporter(arg0) crashcatReportLogReporter_DISPATCH(arg0)
 #define crashcatReportLogReporter_HAL(arg0) crashcatReportLogReporter_DISPATCH(arg0)
 #define crashcatReportLogSource(arg0) crashcatReportLogSource_DISPATCH(arg0)
@@ -263,6 +266,16 @@ static inline void crashcatReportLogIo32State(struct CrashCatReport *arg0) {

 #define crashcatReportLogIo32State_HAL(arg0) crashcatReportLogIo32State(arg0)

+static inline NV_CRASHCAT_CONTAINMENT crashcatReportSourceContainment_3e9f29(struct CrashCatReport *arg0) {
+    return NV_CRASHCAT_CONTAINMENT_UNSPECIFIED;
+}
+
+NV_CRASHCAT_CONTAINMENT crashcatReportSourceContainment_V1_LIBOS3(struct CrashCatReport *arg0);
+
+static inline NV_CRASHCAT_CONTAINMENT crashcatReportSourceContainment_DISPATCH(struct CrashCatReport *arg0) {
+    return arg0->__crashcatReportSourceContainment__(arg0);
+}
+
 void crashcatReportLogReporter_V1_GENERIC(struct CrashCatReport *arg0);

 void crashcatReportLogReporter_V1_LIBOS2(struct CrashCatReport *arg0);
--- a/src/nvidia/generated/g_gpu_nvoc.h
+++ b/src/nvidia/generated/g_gpu_nvoc.h
@@ -973,6 +973,7 @@ struct OBJGPU {
    NvBool PDB_PROP_GPU_SKIP_CE_MAPPINGS_NO_NVLINK;
    NvBool PDB_PROP_GPU_C2C_SYSMEM;
    NvBool PDB_PROP_GPU_IN_TCC_MODE;
+    NvBool PDB_PROP_GPU_SUPPORTS_TDR_EVENT;
    NvBool PDB_PROP_GPU_MSHYBRID_GC6_ACTIVE;
    NvBool PDB_PROP_GPU_VGPU_BIG_PAGE_SIZE_64K;
    NvBool PDB_PROP_GPU_OPTIMIZE_SPARSE_TEXTURE_BY_DEFAULT;
@@ -1306,6 +1307,8 @@ extern const struct NVOC_CLASS_DEF __nvoc_class_def_OBJGPU;
 #define PDB_PROP_GPU_IN_PM_CODEPATH_BASE_NAME PDB_PROP_GPU_IN_PM_CODEPATH
 #define PDB_PROP_GPU_ALLOW_PAGE_RETIREMENT_BASE_CAST
 #define PDB_PROP_GPU_ALLOW_PAGE_RETIREMENT_BASE_NAME PDB_PROP_GPU_ALLOW_PAGE_RETIREMENT
+#define PDB_PROP_GPU_SUPPORTS_TDR_EVENT_BASE_CAST
+#define PDB_PROP_GPU_SUPPORTS_TDR_EVENT_BASE_NAME PDB_PROP_GPU_SUPPORTS_TDR_EVENT
 #define PDB_PROP_GPU_UPSTREAM_PORT_L1_UNSUPPORTED_BASE_CAST
 #define PDB_PROP_GPU_UPSTREAM_PORT_L1_UNSUPPORTED_BASE_NAME PDB_PROP_GPU_UPSTREAM_PORT_L1_UNSUPPORTED
 #define PDB_PROP_GPU_BEHIND_BR03_BASE_CAST
--- a/src/nvidia/generated/g_kernel_fifo_nvoc.c
+++ b/src/nvidia/generated/g_kernel_fifo_nvoc.c
@@ -349,6 +349,28 @@ static void __nvoc_init_funcTable_KernelFifo_1(KernelFifo *pThis, RmHalspecOwner
        pThis->__kfifoGetMaxCeChannelGroups__ = &kfifoGetMaxCeChannelGroups_GA100;
    }

+    // Hal function -- kfifoStartChannelHalt
+    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x11f0fc00UL) )) /* ChipHal: GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 */ 
+    {
+        pThis->__kfifoStartChannelHalt__ = &kfifoStartChannelHalt_GA100;
+    }
+    // default
+    else
+    {
+        pThis->__kfifoStartChannelHalt__ = &kfifoStartChannelHalt_b3696a;
+    }
+
+    // Hal function -- kfifoCompleteChannelHalt
+    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x11f0fc00UL) )) /* ChipHal: GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 */ 
+    {
+        pThis->__kfifoCompleteChannelHalt__ = &kfifoCompleteChannelHalt_GA100;
+    }
+    // default
+    else
+    {
+        pThis->__kfifoCompleteChannelHalt__ = &kfifoCompleteChannelHalt_b3696a;
+    }
+
    pThis->__nvoc_base_OBJENGSTATE.__engstateConstructEngine__ = &__nvoc_thunk_KernelFifo_engstateConstructEngine;

    pThis->__nvoc_base_OBJENGSTATE.__engstateStateInitLocked__ = &__nvoc_thunk_KernelFifo_engstateStateInitLocked;
--- a/src/nvidia/generated/g_kernel_fifo_nvoc.h
+++ b/src/nvidia/generated/g_kernel_fifo_nvoc.h
@@ -41,6 +41,7 @@ extern "C" {
 \***************************************************************************/

 #include "kernel/gpu/eng_state.h"
+#include "kernel/gpu/gpu_timeout.h"
 #include "kernel/gpu/gpu_halspec.h"
 #include "kernel/gpu/fifo/channel_descendant.h"
 #include "kernel/gpu/gpu_engine_type.h"
@@ -493,6 +494,8 @@ struct KernelFifo {
    NV_STATUS (*__kfifoUpdateUsermodeDoorbell__)(struct OBJGPU *, struct KernelFifo *, NvU32, NvU32);
    NvU32 (*__kfifoRunlistGetBaseShift__)(struct KernelFifo *);
    NvU32 (*__kfifoGetMaxCeChannelGroups__)(struct OBJGPU *, struct KernelFifo *);
+    void (*__kfifoStartChannelHalt__)(struct OBJGPU *, struct KernelFifo *, struct KernelChannel *);
+    void (*__kfifoCompleteChannelHalt__)(struct OBJGPU *, struct KernelFifo *, struct KernelChannel *, RMTIMEOUT *);
    NV_STATUS (*__kfifoStateLoad__)(POBJGPU, struct KernelFifo *, NvU32);
    NV_STATUS (*__kfifoStateUnload__)(POBJGPU, struct KernelFifo *, NvU32);
    NV_STATUS (*__kfifoStatePreLoad__)(POBJGPU, struct KernelFifo *, NvU32);
@@ -595,6 +598,10 @@ NV_STATUS __nvoc_objCreate_KernelFifo(KernelFifo**, Dynamic*, NvU32);
 #define kfifoRunlistGetBaseShift_HAL(pKernelFifo) kfifoRunlistGetBaseShift_DISPATCH(pKernelFifo)
 #define kfifoGetMaxCeChannelGroups(pGpu, pKernelFifo) kfifoGetMaxCeChannelGroups_DISPATCH(pGpu, pKernelFifo)
 #define kfifoGetMaxCeChannelGroups_HAL(pGpu, pKernelFifo) kfifoGetMaxCeChannelGroups_DISPATCH(pGpu, pKernelFifo)
+#define kfifoStartChannelHalt(pGpu, pKernelFifo, pKernelChannel) kfifoStartChannelHalt_DISPATCH(pGpu, pKernelFifo, pKernelChannel)
+#define kfifoStartChannelHalt_HAL(pGpu, pKernelFifo, pKernelChannel) kfifoStartChannelHalt_DISPATCH(pGpu, pKernelFifo, pKernelChannel)
+#define kfifoCompleteChannelHalt(pGpu, pKernelFifo, pKernelChannel, pTimeout) kfifoCompleteChannelHalt_DISPATCH(pGpu, pKernelFifo, pKernelChannel, pTimeout)
+#define kfifoCompleteChannelHalt_HAL(pGpu, pKernelFifo, pKernelChannel, pTimeout) kfifoCompleteChannelHalt_DISPATCH(pGpu, pKernelFifo, pKernelChannel, pTimeout)
 #define kfifoStateLoad(pGpu, pEngstate, arg0) kfifoStateLoad_DISPATCH(pGpu, pEngstate, arg0)
 #define kfifoStateUnload(pGpu, pEngstate, arg0) kfifoStateUnload_DISPATCH(pGpu, pEngstate, arg0)
 #define kfifoStatePreLoad(pGpu, pEngstate, arg0) kfifoStatePreLoad_DISPATCH(pGpu, pEngstate, arg0)
@@ -1474,6 +1481,26 @@ static inline NvU32 kfifoGetMaxCeChannelGroups_DISPATCH(struct OBJGPU *pGpu, str
    return pKernelFifo->__kfifoGetMaxCeChannelGroups__(pGpu, pKernelFifo);
 }

+void kfifoStartChannelHalt_GA100(struct OBJGPU *pGpu, struct KernelFifo *pKernelFifo, struct KernelChannel *pKernelChannel);
+
+static inline void kfifoStartChannelHalt_b3696a(struct OBJGPU *pGpu, struct KernelFifo *pKernelFifo, struct KernelChannel *pKernelChannel) {
+    return;
+}
+
+static inline void kfifoStartChannelHalt_DISPATCH(struct OBJGPU *pGpu, struct KernelFifo *pKernelFifo, struct KernelChannel *pKernelChannel) {
+    pKernelFifo->__kfifoStartChannelHalt__(pGpu, pKernelFifo, pKernelChannel);
+}
+
+void kfifoCompleteChannelHalt_GA100(struct OBJGPU *pGpu, struct KernelFifo *pKernelFifo, struct KernelChannel *pKernelChannel, RMTIMEOUT *pTimeout);
+
+static inline void kfifoCompleteChannelHalt_b3696a(struct OBJGPU *pGpu, struct KernelFifo *pKernelFifo, struct KernelChannel *pKernelChannel, RMTIMEOUT *pTimeout) {
+    return;
+}
+
+static inline void kfifoCompleteChannelHalt_DISPATCH(struct OBJGPU *pGpu, struct KernelFifo *pKernelFifo, struct KernelChannel *pKernelChannel, RMTIMEOUT *pTimeout) {
+    pKernelFifo->__kfifoCompleteChannelHalt__(pGpu, pKernelFifo, pKernelChannel, pTimeout);
+}
+
 static inline NV_STATUS kfifoStateLoad_DISPATCH(POBJGPU pGpu, struct KernelFifo *pEngstate, NvU32 arg0) {
    return pEngstate->__kfifoStateLoad__(pGpu, pEngstate, arg0);
 }
--- a/src/nvidia/generated/g_kernel_graphics_context_nvoc.h
+++ b/src/nvidia/generated/g_kernel_graphics_context_nvoc.h
@@ -7,7 +7,7 @@ extern "C" {
 #endif

 /*
- * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -343,7 +343,7 @@ static inline void kgrctxDecObjectCount(struct OBJGPU *arg0, struct KernelGraphi

 #define kgrctxDecObjectCount_HAL(arg0, arg1, classNum) kgrctxDecObjectCount(arg0, arg1, classNum)

-GR_GLOBALCTX_BUFFER kgrctxGetRegisterAccessMapId_PF(struct OBJGPU *arg0, struct KernelGraphicsContext *arg1, struct KernelChannel *arg2);
+GR_GLOBALCTX_BUFFER kgrctxGetRegisterAccessMapId_IMPL(struct OBJGPU *arg0, struct KernelGraphicsContext *arg1, struct KernelChannel *arg2);


 #ifdef __nvoc_kernel_graphics_context_h_disabled
@@ -354,7 +354,7 @@ static inline GR_GLOBALCTX_BUFFER kgrctxGetRegisterAccessMapId(struct OBJGPU *ar
    return ret;
 }
 #else //__nvoc_kernel_graphics_context_h_disabled
-#define kgrctxGetRegisterAccessMapId(arg0, arg1, arg2) kgrctxGetRegisterAccessMapId_PF(arg0, arg1, arg2)
+#define kgrctxGetRegisterAccessMapId(arg0, arg1, arg2) kgrctxGetRegisterAccessMapId_IMPL(arg0, arg1, arg2)
 #endif //__nvoc_kernel_graphics_context_h_disabled

 #define kgrctxGetRegisterAccessMapId_HAL(arg0, arg1, arg2) kgrctxGetRegisterAccessMapId(arg0, arg1, arg2)
--- a/src/nvidia/generated/g_kernel_graphics_nvoc.c
+++ b/src/nvidia/generated/g_kernel_graphics_nvoc.c
@@ -223,6 +223,20 @@ void __nvoc_init_dataField_KernelGraphics(KernelGraphics *pThis, RmHalspecOwner
            pThis->bFecsRecordUcodeSeqnoSupported = ((NvBool)(0 != 0));
        }
    }
+
+    // Hal field -- bBug4208224WAREnabled
+    if (( ((rmVariantHal_HalVarIdx >> 5) == 0UL) && ((1UL << (rmVariantHal_HalVarIdx & 0x1f)) & 0x00000002UL) )) /* RmVariantHal: PF_KERNEL_ONLY */ 
+    {
+        if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000000e0UL) )) /* ChipHal: TU102 | TU104 | TU106 */ 
+        {
+            pThis->bBug4208224WAREnabled = ((NvBool)(0 == 0));
+        }
+        // default
+        else
+        {
+            pThis->bBug4208224WAREnabled = ((NvBool)(0 != 0));
+        }
+    }
 }

 NV_STATUS __nvoc_ctor_OBJENGSTATE(OBJENGSTATE* );
@@ -276,6 +290,39 @@ static void __nvoc_init_funcTable_KernelGraphics_1(KernelGraphics *pThis, RmHals

    pThis->__kgraphicsServiceNotificationInterrupt__ = &kgraphicsServiceNotificationInterrupt_IMPL;

+    // Hal function -- kgraphicsCreateBug4208224Channel
+    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000000e0UL) )) /* ChipHal: TU102 | TU104 | TU106 */ 
+    {
+        pThis->__kgraphicsCreateBug4208224Channel__ = &kgraphicsCreateBug4208224Channel_TU102;
+    }
+    // default
+    else
+    {
+        pThis->__kgraphicsCreateBug4208224Channel__ = &kgraphicsCreateBug4208224Channel_56cd7a;
+    }
+
+    // Hal function -- kgraphicsInitializeBug4208224WAR
+    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000000e0UL) )) /* ChipHal: TU102 | TU104 | TU106 */ 
+    {
+        pThis->__kgraphicsInitializeBug4208224WAR__ = &kgraphicsInitializeBug4208224WAR_TU102;
+    }
+    // default
+    else
+    {
+        pThis->__kgraphicsInitializeBug4208224WAR__ = &kgraphicsInitializeBug4208224WAR_56cd7a;
+    }
+
+    // Hal function -- kgraphicsIsBug4208224WARNeeded
+    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000000e0UL) )) /* ChipHal: TU102 | TU104 | TU106 */ 
+    {
+        pThis->__kgraphicsIsBug4208224WARNeeded__ = &kgraphicsIsBug4208224WARNeeded_TU102;
+    }
+    // default
+    else
+    {
+        pThis->__kgraphicsIsBug4208224WARNeeded__ = &kgraphicsIsBug4208224WARNeeded_491d52;
+    }
+
    // Hal function -- kgraphicsClearInterrupt
    pThis->__kgraphicsClearInterrupt__ = &kgraphicsClearInterrupt_GP100;

--- a/src/nvidia/generated/g_kernel_graphics_nvoc.h
+++ b/src/nvidia/generated/g_kernel_graphics_nvoc.h
@@ -53,6 +53,7 @@ struct KGRAPHICS_STATIC_INFO;
 typedef struct KGRAPHICS_STATIC_INFO KGRAPHICS_STATIC_INFO;
 typedef struct KGRAPHICS_FECS_TRACE_INFO KGRAPHICS_FECS_TRACE_INFO;
 typedef struct KGRAPHICS_GLOBAL_CTX_BUFFERS_INFO KGRAPHICS_GLOBAL_CTX_BUFFERS_INFO;
+typedef struct KGRAPHICS_BUG4208224_CONTEXT_INFO KGRAPHICS_BUG4208224_CONTEXT_INFO;

 /*!
 * Static info retrieved from Physical RM detailing the configuration of the
@@ -135,6 +136,24 @@ struct KGRAPHICS_GLOBAL_CTX_BUFFERS_INFO
    GR_BUFFER_ATTR        vfGlobalCtxAttr[GR_GLOBALCTX_BUFFER_COUNT];
 };

+#define KGRAPHICS_SCRUBBER_HANDLE_VAS        0xdada0042       
+#define KGRAPHICS_SCRUBBER_HANDLE_PBVIRT     (KGRAPHICS_SCRUBBER_HANDLE_VAS + 1)      
+#define KGRAPHICS_SCRUBBER_HANDLE_PBPHYS     (KGRAPHICS_SCRUBBER_HANDLE_VAS + 2)      
+#define KGRAPHICS_SCRUBBER_HANDLE_CHANNEL    (KGRAPHICS_SCRUBBER_HANDLE_VAS + 3)      
+#define KGRAPHICS_SCRUBBER_HANDLE_3DOBJ      (KGRAPHICS_SCRUBBER_HANDLE_VAS + 4)      
+#define KGRAPHICS_SCRUBBER_HANDLE_USERD      (KGRAPHICS_SCRUBBER_HANDLE_VAS + 5)      
+
+        
+struct KGRAPHICS_BUG4208224_CONTEXT_INFO
+{
+    /* Dynamically allocated client handles */
+    NvHandle hClient;
+    NvHandle hDeviceId;
+    NvHandle hSubdeviceId;
+
+    // Have resources been setup
+    NvBool bConstructed;
+};
 // Opaque forward declarations
 typedef struct KGRAPHICS_PRIVATE_DATA KGRAPHICS_PRIVATE_DATA;
 typedef struct KGRAPHICS_FECS_TRACE_INFO KGRAPHICS_FECS_TRACE_INFO;
@@ -168,6 +187,9 @@ struct KernelGraphics {
    NV_STATUS (*__kgraphicsStatePostLoad__)(OBJGPU *, struct KernelGraphics *, NvU32);
    void (*__kgraphicsRegisterIntrService__)(OBJGPU *, struct KernelGraphics *, IntrServiceRecord *);
    NV_STATUS (*__kgraphicsServiceNotificationInterrupt__)(OBJGPU *, struct KernelGraphics *, IntrServiceServiceNotificationInterruptArguments *);
+    NV_STATUS (*__kgraphicsCreateBug4208224Channel__)(OBJGPU *, struct KernelGraphics *);
+    NV_STATUS (*__kgraphicsInitializeBug4208224WAR__)(OBJGPU *, struct KernelGraphics *);
+    NvBool (*__kgraphicsIsBug4208224WARNeeded__)(OBJGPU *, struct KernelGraphics *);
    NvBool (*__kgraphicsClearInterrupt__)(OBJGPU *, struct KernelGraphics *, IntrServiceClearInterruptArguments *);
    NvU32 (*__kgraphicsServiceInterrupt__)(OBJGPU *, struct KernelGraphics *, IntrServiceServiceInterruptArguments *);
    NV_STATUS (*__kgraphicsStatePreLoad__)(POBJGPU, struct KernelGraphics *, NvU32);
@@ -185,6 +207,7 @@ struct KernelGraphics {
    NvBool PRIVATE_FIELD(bUcodeSupportsPrivAccessMap);
    NvBool PRIVATE_FIELD(bRtvCbSupported);
    NvBool PRIVATE_FIELD(bFecsRecordUcodeSeqnoSupported);
+    NvBool PRIVATE_FIELD(bBug4208224WAREnabled);
    NvU32 PRIVATE_FIELD(instance);
    KGRAPHICS_PRIVATE_DATA *PRIVATE_FIELD(pPrivate);
    NvBool PRIVATE_FIELD(bCollectingDeferredStaticData);
@@ -193,6 +216,7 @@ struct KernelGraphics {
    struct CTX_BUF_POOL_INFO *PRIVATE_FIELD(pCtxBufPool);
    CTX_BUF_INFO PRIVATE_FIELD(maxCtxBufSize)[9];
    GR_BUFFER_ATTR PRIVATE_FIELD(ctxAttr)[9];
+    struct KGRAPHICS_BUG4208224_CONTEXT_INFO PRIVATE_FIELD(bug4208224Info);
 };
 struct KernelGraphics_PRIVATE {
    const struct NVOC_RTTI *__nvoc_rtti;
@@ -212,6 +236,9 @@ struct KernelGraphics_PRIVATE {
    NV_STATUS (*__kgraphicsStatePostLoad__)(OBJGPU *, struct KernelGraphics *, NvU32);
    void (*__kgraphicsRegisterIntrService__)(OBJGPU *, struct KernelGraphics *, IntrServiceRecord *);
    NV_STATUS (*__kgraphicsServiceNotificationInterrupt__)(OBJGPU *, struct KernelGraphics *, IntrServiceServiceNotificationInterruptArguments *);
+    NV_STATUS (*__kgraphicsCreateBug4208224Channel__)(OBJGPU *, struct KernelGraphics *);
+    NV_STATUS (*__kgraphicsInitializeBug4208224WAR__)(OBJGPU *, struct KernelGraphics *);
+    NvBool (*__kgraphicsIsBug4208224WARNeeded__)(OBJGPU *, struct KernelGraphics *);
    NvBool (*__kgraphicsClearInterrupt__)(OBJGPU *, struct KernelGraphics *, IntrServiceClearInterruptArguments *);
    NvU32 (*__kgraphicsServiceInterrupt__)(OBJGPU *, struct KernelGraphics *, IntrServiceServiceInterruptArguments *);
    NV_STATUS (*__kgraphicsStatePreLoad__)(POBJGPU, struct KernelGraphics *, NvU32);
@@ -229,6 +256,7 @@ struct KernelGraphics_PRIVATE {
    NvBool bUcodeSupportsPrivAccessMap;
    NvBool bRtvCbSupported;
    NvBool bFecsRecordUcodeSeqnoSupported;
+    NvBool bBug4208224WAREnabled;
    NvU32 instance;
    KGRAPHICS_PRIVATE_DATA *pPrivate;
    NvBool bCollectingDeferredStaticData;
@@ -237,6 +265,7 @@ struct KernelGraphics_PRIVATE {
    struct CTX_BUF_POOL_INFO *pCtxBufPool;
    CTX_BUF_INFO maxCtxBufSize[9];
    GR_BUFFER_ATTR ctxAttr[9];
+    struct KGRAPHICS_BUG4208224_CONTEXT_INFO bug4208224Info;
 };

 #ifndef __NVOC_CLASS_KernelGraphics_TYPEDEF__
@@ -279,6 +308,12 @@ NV_STATUS __nvoc_objCreate_KernelGraphics(KernelGraphics**, Dynamic*, NvU32);
 #define kgraphicsStatePostLoad(arg0, arg1, flags) kgraphicsStatePostLoad_DISPATCH(arg0, arg1, flags)
 #define kgraphicsRegisterIntrService(arg0, arg1, arg2) kgraphicsRegisterIntrService_DISPATCH(arg0, arg1, arg2)
 #define kgraphicsServiceNotificationInterrupt(arg0, arg1, arg2) kgraphicsServiceNotificationInterrupt_DISPATCH(arg0, arg1, arg2)
+#define kgraphicsCreateBug4208224Channel(arg0, arg1) kgraphicsCreateBug4208224Channel_DISPATCH(arg0, arg1)
+#define kgraphicsCreateBug4208224Channel_HAL(arg0, arg1) kgraphicsCreateBug4208224Channel_DISPATCH(arg0, arg1)
+#define kgraphicsInitializeBug4208224WAR(arg0, arg1) kgraphicsInitializeBug4208224WAR_DISPATCH(arg0, arg1)
+#define kgraphicsInitializeBug4208224WAR_HAL(arg0, arg1) kgraphicsInitializeBug4208224WAR_DISPATCH(arg0, arg1)
+#define kgraphicsIsBug4208224WARNeeded(arg0, arg1) kgraphicsIsBug4208224WARNeeded_DISPATCH(arg0, arg1)
+#define kgraphicsIsBug4208224WARNeeded_HAL(arg0, arg1) kgraphicsIsBug4208224WARNeeded_DISPATCH(arg0, arg1)
 #define kgraphicsClearInterrupt(arg0, arg1, arg2) kgraphicsClearInterrupt_DISPATCH(arg0, arg1, arg2)
 #define kgraphicsClearInterrupt_HAL(arg0, arg1, arg2) kgraphicsClearInterrupt_DISPATCH(arg0, arg1, arg2)
 #define kgraphicsServiceInterrupt(arg0, arg1, arg2) kgraphicsServiceInterrupt_DISPATCH(arg0, arg1, arg2)
@@ -463,6 +498,36 @@ static inline NV_STATUS kgraphicsServiceNotificationInterrupt_DISPATCH(OBJGPU *a
    return arg1->__kgraphicsServiceNotificationInterrupt__(arg0, arg1, arg2);
 }

+NV_STATUS kgraphicsCreateBug4208224Channel_TU102(OBJGPU *arg0, struct KernelGraphics *arg1);
+
+static inline NV_STATUS kgraphicsCreateBug4208224Channel_56cd7a(OBJGPU *arg0, struct KernelGraphics *arg1) {
+    return NV_OK;
+}
+
+static inline NV_STATUS kgraphicsCreateBug4208224Channel_DISPATCH(OBJGPU *arg0, struct KernelGraphics *arg1) {
+    return arg1->__kgraphicsCreateBug4208224Channel__(arg0, arg1);
+}
+
+static inline NV_STATUS kgraphicsInitializeBug4208224WAR_56cd7a(OBJGPU *arg0, struct KernelGraphics *arg1) {
+    return NV_OK;
+}
+
+NV_STATUS kgraphicsInitializeBug4208224WAR_TU102(OBJGPU *arg0, struct KernelGraphics *arg1);
+
+static inline NV_STATUS kgraphicsInitializeBug4208224WAR_DISPATCH(OBJGPU *arg0, struct KernelGraphics *arg1) {
+    return arg1->__kgraphicsInitializeBug4208224WAR__(arg0, arg1);
+}
+
+static inline NvBool kgraphicsIsBug4208224WARNeeded_491d52(OBJGPU *arg0, struct KernelGraphics *arg1) {
+    return ((NvBool)(0 != 0));
+}
+
+NvBool kgraphicsIsBug4208224WARNeeded_TU102(OBJGPU *arg0, struct KernelGraphics *arg1);
+
+static inline NvBool kgraphicsIsBug4208224WARNeeded_DISPATCH(OBJGPU *arg0, struct KernelGraphics *arg1) {
+    return arg1->__kgraphicsIsBug4208224WARNeeded__(arg0, arg1);
+}
+
 NvBool kgraphicsClearInterrupt_GP100(OBJGPU *arg0, struct KernelGraphics *arg1, IntrServiceClearInterruptArguments *arg2);

 static inline NvBool kgraphicsClearInterrupt_DISPATCH(OBJGPU *arg0, struct KernelGraphics *arg1, IntrServiceClearInterruptArguments *arg2) {
@@ -574,6 +639,16 @@ static inline NvBool kgraphicsIsFecsRecordUcodeSeqnoSupported(OBJGPU *pGpu, stru
    return pKernelGraphics_PRIVATE->bFecsRecordUcodeSeqnoSupported;
 }

+static inline NvBool kgraphicsGetBug4208224WAREnabled(OBJGPU *pGpu, struct KernelGraphics *pKernelGraphics) {
+    struct KernelGraphics_PRIVATE *pKernelGraphics_PRIVATE = (struct KernelGraphics_PRIVATE *)pKernelGraphics;
+    return pKernelGraphics_PRIVATE->bBug4208224WAREnabled;
+}
+
+static inline void kgraphicsSetBug4208224WAREnabled(OBJGPU *pGpu, struct KernelGraphics *pKernelGraphics, NvBool bProp) {
+    struct KernelGraphics_PRIVATE *pKernelGraphics_PRIVATE = (struct KernelGraphics_PRIVATE *)pKernelGraphics;
+    pKernelGraphics_PRIVATE->bBug4208224WAREnabled = bProp;
+}
+
 void kgraphicsDestruct_IMPL(struct KernelGraphics *arg0);

 #define __nvoc_kgraphicsDestruct(arg0) kgraphicsDestruct_IMPL(arg0)
--- a/src/nvidia/generated/g_kernel_gsp_nvoc.h
+++ b/src/nvidia/generated/g_kernel_gsp_nvoc.h
@@ -1339,6 +1339,16 @@ static inline NV_STATUS kgspAllocateBooterUnloadUcodeImage(struct OBJGPU *pGpu,
 #define kgspAllocateBooterUnloadUcodeImage(pGpu, pKernelGsp, ppBooterUnloadUcode) kgspAllocateBooterUnloadUcodeImage_IMPL(pGpu, pKernelGsp, ppBooterUnloadUcode)
 #endif //__nvoc_kernel_gsp_h_disabled

+void kgspRcAndNotifyAllUserChannels_IMPL(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, NvU32 exceptType);
+
+#ifdef __nvoc_kernel_gsp_h_disabled
+static inline void kgspRcAndNotifyAllUserChannels(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, NvU32 exceptType) {
+    NV_ASSERT_FAILED_PRECOMP("KernelGsp was disabled!");
+}
+#else //__nvoc_kernel_gsp_h_disabled
+#define kgspRcAndNotifyAllUserChannels(pGpu, pKernelGsp, exceptType) kgspRcAndNotifyAllUserChannels_IMPL(pGpu, pKernelGsp, exceptType)
+#endif //__nvoc_kernel_gsp_h_disabled
+
 #undef PRIVATE_FIELD


--- a/src/nvidia/generated/g_nv_name_released.h
+++ b/src/nvidia/generated/g_nv_name_released.h
@@ -1044,6 +1044,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x28A1, 0x0000, 0x0000, "NVIDIA GeForce RTX 4050 Laptop GPU" },
    { 0x28B8, 0x0000, 0x0000, "NVIDIA RTX 2000 Ada Generation Laptop GPU" },
    { 0x28B9, 0x0000, 0x0000, "NVIDIA RTX 1000 Ada Generation Laptop GPU" },
+    { 0x28BA, 0x0000, 0x0000, "NVIDIA RTX 500 Ada Generation Laptop GPU" },
    { 0x28BB, 0x0000, 0x0000, "NVIDIA RTX 500 Ada Generation Laptop GPU" },
    { 0x28E0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Laptop GPU" },
    { 0x28E1, 0x0000, 0x0000, "NVIDIA GeForce RTX 4050 Laptop GPU" },
--- a/src/nvidia/generated/g_subdevice_nvoc.c
+++ b/src/nvidia/generated/g_subdevice_nvoc.c
--- a/src/nvidia/generated/g_subdevice_nvoc.h
+++ b/src/nvidia/generated/g_subdevice_nvoc.h
@@ -346,6 +346,7 @@ struct Subdevice {
    NV_STATUS (*__subdeviceCtrlCmdKGrInternalStaticGetFecsRecordSize__)(struct Subdevice *, NV2080_CTRL_INTERNAL_STATIC_GR_GET_FECS_RECORD_SIZE_PARAMS *);
    NV_STATUS (*__subdeviceCtrlCmdKGrInternalStaticGetFecsTraceDefines__)(struct Subdevice *, NV2080_CTRL_INTERNAL_STATIC_GR_GET_FECS_TRACE_DEFINES_PARAMS *);
    NV_STATUS (*__subdeviceCtrlCmdKGrInternalStaticGetPdbProperties__)(struct Subdevice *, NV2080_CTRL_INTERNAL_STATIC_GR_GET_PDB_PROPERTIES_PARAMS *);
+    NV_STATUS (*__subdeviceCtrlCmdKGrInternalInitBug4208224War__)(struct Subdevice *, NV2080_CTRL_INTERNAL_KGR_INIT_BUG4208224_WAR_PARAMS *);
    NV_STATUS (*__subdeviceCtrlCmdGpuGetCachedInfo__)(struct Subdevice *, NV2080_CTRL_GPU_GET_INFO_V2_PARAMS *);
    NV_STATUS (*__subdeviceCtrlCmdGpuGetInfoV2__)(struct Subdevice *, NV2080_CTRL_GPU_GET_INFO_V2_PARAMS *);
    NV_STATUS (*__subdeviceCtrlCmdGpuGetIpVersion__)(struct Subdevice *, NV2080_CTRL_GPU_GET_IP_VERSION_PARAMS *);
@@ -944,6 +945,7 @@ NV_STATUS __nvoc_objCreate_Subdevice(Subdevice**, Dynamic*, NvU32, struct CALL_C
 #define subdeviceCtrlCmdKGrInternalStaticGetFecsRecordSize(pSubdevice, pParams) subdeviceCtrlCmdKGrInternalStaticGetFecsRecordSize_DISPATCH(pSubdevice, pParams)
 #define subdeviceCtrlCmdKGrInternalStaticGetFecsTraceDefines(pSubdevice, pParams) subdeviceCtrlCmdKGrInternalStaticGetFecsTraceDefines_DISPATCH(pSubdevice, pParams)
 #define subdeviceCtrlCmdKGrInternalStaticGetPdbProperties(pSubdevice, pParams) subdeviceCtrlCmdKGrInternalStaticGetPdbProperties_DISPATCH(pSubdevice, pParams)
+#define subdeviceCtrlCmdKGrInternalInitBug4208224War(pSubdevice, pParams) subdeviceCtrlCmdKGrInternalInitBug4208224War_DISPATCH(pSubdevice, pParams)
 #define subdeviceCtrlCmdGpuGetCachedInfo(pSubdevice, pGpuInfoParams) subdeviceCtrlCmdGpuGetCachedInfo_DISPATCH(pSubdevice, pGpuInfoParams)
 #define subdeviceCtrlCmdGpuGetInfoV2(pSubdevice, pGpuInfoParams) subdeviceCtrlCmdGpuGetInfoV2_DISPATCH(pSubdevice, pGpuInfoParams)
 #define subdeviceCtrlCmdGpuGetIpVersion(pSubdevice, pGpuIpVersionParams) subdeviceCtrlCmdGpuGetIpVersion_DISPATCH(pSubdevice, pGpuIpVersionParams)
@@ -2634,6 +2636,12 @@ static inline NV_STATUS subdeviceCtrlCmdKGrInternalStaticGetPdbProperties_DISPAT
    return pSubdevice->__subdeviceCtrlCmdKGrInternalStaticGetPdbProperties__(pSubdevice, pParams);
 }

+NV_STATUS subdeviceCtrlCmdKGrInternalInitBug4208224War_IMPL(struct Subdevice *pSubdevice, NV2080_CTRL_INTERNAL_KGR_INIT_BUG4208224_WAR_PARAMS *pParams);
+
+static inline NV_STATUS subdeviceCtrlCmdKGrInternalInitBug4208224War_DISPATCH(struct Subdevice *pSubdevice, NV2080_CTRL_INTERNAL_KGR_INIT_BUG4208224_WAR_PARAMS *pParams) {
+    return pSubdevice->__subdeviceCtrlCmdKGrInternalInitBug4208224War__(pSubdevice, pParams);
+}
+
 NV_STATUS subdeviceCtrlCmdGpuGetCachedInfo_IMPL(struct Subdevice *pSubdevice, NV2080_CTRL_GPU_GET_INFO_V2_PARAMS *pGpuInfoParams);

 static inline NV_STATUS subdeviceCtrlCmdGpuGetCachedInfo_DISPATCH(struct Subdevice *pSubdevice, NV2080_CTRL_GPU_GET_INFO_V2_PARAMS *pGpuInfoParams) {
--- a/src/nvidia/generated/g_vgpuconfigapi_nvoc.c
+++ b/src/nvidia/generated/g_vgpuconfigapi_nvoc.c
@@ -238,12 +238,12 @@ static NvBool __nvoc_thunk_RmResource_vgpuconfigapiAccessCallback(struct VgpuCon
 static const struct NVOC_EXPORTED_METHOD_DEF __nvoc_exported_method_def_VgpuConfigApi[] = 
 {
    {               /*  [0] */
-#if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u)
+#if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x4u)
        /*pFunc=*/      (void (*)(void)) NULL,
 #else
        /*pFunc=*/      (void (*)(void)) vgpuconfigapiCtrlCmdVgpuConfigSetInfo_IMPL,
-#endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u)
-        /*flags=*/      0x10u,
+#endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x4u)
+        /*flags=*/      0x4u,
        /*accessRight=*/0x0u,
        /*methodId=*/   0xa0810101u,
        /*paramSize=*/  sizeof(NVA081_CTRL_VGPU_CONFIG_INFO_PARAMS),
@@ -571,7 +571,7 @@ __nvoc_ctor_VgpuConfigApi_exit:
 static void __nvoc_init_funcTable_VgpuConfigApi_1(VgpuConfigApi *pThis) {
    PORT_UNREFERENCED_VARIABLE(pThis);

-#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x10u)
+#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x4u)
    pThis->__vgpuconfigapiCtrlCmdVgpuConfigSetInfo__ = &vgpuconfigapiCtrlCmdVgpuConfigSetInfo_IMPL;
 #endif

--- a/src/nvidia/inc/kernel/gpu/gsp/gsp_static_config.h
+++ b/src/nvidia/inc/kernel/gpu/gsp/gsp_static_config.h
@@ -177,6 +177,7 @@ typedef struct GspSystemInfo
    NvBool bIsPassthru;
    NvU64 sysTimerOffsetNs;
    GSP_VF_INFO gspVFInfo;
+    NvBool bTdrEventSupported;
 } GspSystemInfo;


--- a/src/nvidia/kernel/vgpu/nv/rpc.c
+++ b/src/nvidia/kernel/vgpu/nv/rpc.c
@@ -1352,6 +1352,10 @@ NV_STATUS rpcGspSetSystemInfo_v17_00
        OBJTMR *pTmr = GPU_GET_TIMER(pGpu);
        rpcInfo->sysTimerOffsetNs = pTmr->sysTimerOffsetNs;

+
+        // Indicate whether the driver supports NV2080_NOTIFIERS_UCODE_RESET event.
+        rpcInfo->bTdrEventSupported = pGpu->getProperty(pGpu, PDB_PROP_GPU_SUPPORTS_TDR_EVENT);
+
        status = _issueRpcAsync(pGpu, pRpc);
    }

--- a/src/nvidia/src/kernel/gpu/fifo/arch/ampere/kernel_fifo_ga100.c
+++ b/src/nvidia/src/kernel/gpu/fifo/arch/ampere/kernel_fifo_ga100.c
@@ -35,6 +35,7 @@

 #include "published/ampere/ga100/dev_ram.h"
 #include "published/ampere/ga100/dev_ctrl.h"
+#include "published/ampere/ga100/dev_runlist.h"


 NV_STATUS
@@ -317,3 +318,93 @@ kfifoGetMaxCeChannelGroups_GA100

    return maxCeChannels;
 }
+
+/**
+ * @brief Starts halting a channel. A start operation must be matched with a
+ * complete operation later to wait for the channel to be preempted.
+ *
+ * @param[in] pGpu           GPU object pointer
+ * @param[in] pKernelFifo    Kernel FIFO object pointer
+ * @param[in] pKernelChannel Pointer to the channel to be halted.
+ */
+void
+kfifoStartChannelHalt_GA100
+(
+    OBJGPU        *pGpu,
+    KernelFifo    *pKernelFifo,
+    KernelChannel *pKernelChannel
+)
+{
+    NvU32       chramPriBase;
+    NvU32       channelVal;
+    NvU32       runlistId;
+    NvU32       runlistPriBase;
+    NvU32       runlistVal = 0;
+
+    runlistId = kchannelGetRunlistId(pKernelChannel);
+    if (kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
+            ENGINE_INFO_TYPE_RUNLIST,        runlistId,
+            ENGINE_INFO_TYPE_CHRAM_PRI_BASE, &chramPriBase) != NV_OK)
+    {
+        return;
+    }
+    if (kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
+            ENGINE_INFO_TYPE_RUNLIST,          runlistId,
+            ENGINE_INFO_TYPE_RUNLIST_PRI_BASE, &runlistPriBase) != NV_OK)
+    {
+        return;
+    }
+
+    // Disable this channel.
+    channelVal = FLD_SET_DRF(_CHRAM, _CHANNEL, _WRITE_CONTROL, _ONES_CLEAR_BITS, 0);
+    channelVal = FLD_SET_DRF(_CHRAM, _CHANNEL, _ENABLE, _IN_USE, channelVal);
+    GPU_REG_WR32(pGpu, chramPriBase + NV_CHRAM_CHANNEL(pKernelChannel->ChID), channelVal);
+
+    // Preempt the channel.
+    runlistVal = FLD_SET_DRF(_RUNLIST, _PREEMPT, _TYPE, _RUNLIST, 0);
+    GPU_REG_WR32(pGpu, runlistPriBase + NV_RUNLIST_PREEMPT, runlistVal);
+}
+
+/**
+ * @brief Completes halting a channel, waiting the channel preemption to
+ * complete, up to the specified timeout.
+ *
+ * @param[in] pGpu           GPU object pointer
+ * @param[in] pKernelFifo    Kernel FIFO object pointer
+ * @param[in] pKernelChannel Pointer to the channel in process of being halted.
+ * @param[in] pTimeout       Specifies the timeout to wait for the channel
+ *                           preemption.
+ */
+void
+kfifoCompleteChannelHalt_GA100
+(
+    OBJGPU        *pGpu,
+    KernelFifo    *pKernelFifo,
+    KernelChannel *pKernelChannel,
+    RMTIMEOUT     *pTimeout
+)
+{
+    NvU32       runlistId;
+    NvU32       runlistPriBase;
+    NvU32       runlistVal = 0;
+
+    runlistId = kchannelGetRunlistId(pKernelChannel);
+    if (kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
+            ENGINE_INFO_TYPE_RUNLIST,          runlistId,
+            ENGINE_INFO_TYPE_RUNLIST_PRI_BASE, &runlistPriBase) != NV_OK)
+    {
+        return;
+    }
+
+    // Wait for the preemption to complete.
+    do
+    {
+        if (gpuCheckTimeout(pGpu, pTimeout) == NV_ERR_TIMEOUT)
+        {
+            break;
+        }
+
+        runlistVal = GPU_REG_RD32(pGpu, runlistPriBase + NV_RUNLIST_PREEMPT);
+    } while (FLD_TEST_DRF(_RUNLIST, _PREEMPT, _RUNLIST_PREEMPT_PENDING, _TRUE, runlistVal));
+}
+
--- a/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c
+++ b/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -1674,6 +1674,9 @@ void kchannelNotifyGeneric_IMPL
    // validate notifyIndex
    NV_CHECK_OR_RETURN_VOID(LEVEL_INFO, notifyIndex < classInfo.notifiersMaxCount);

+    // Check if we have allocated the channel notifier action table
+    NV_CHECK_OR_RETURN_VOID(LEVEL_ERROR, pKernelChannel->pNotifyActions != NULL);
+
    // handle notification if client wants it
    if (pKernelChannel->pNotifyActions[notifyIndex] != classInfo.eventActionDisable)
    {
--- a/src/nvidia/src/kernel/gpu/gr/arch/turing/kgraphics_tu102.c
+++ b/src/nvidia/src/kernel/gpu/gr/arch/turing/kgraphics_tu102.c
@@ -179,3 +179,310 @@ kgraphicsAllocGrGlobalCtxBuffers_TU102

    return status;
 }
+/**
+ * @brief Initializes Bug 4208224 by performing the following actions
+ *        1.) Sets up static handles inside an info struct to be referenced later
+ *        2.) Creates a channel tied to VEID0 on GR0
+ *        3.) Sends an RPC to physical RM for the physical side initialization
+ */
+NV_STATUS
+kgraphicsInitializeBug4208224WAR_TU102
+(
+    OBJGPU *pGpu,
+    KernelGraphics *pKernelGraphics
+)
+{
+    NV_STATUS   status = NV_OK;
+    RM_API     *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
+    NV2080_CTRL_INTERNAL_KGR_INIT_BUG4208224_WAR_PARAMS params = {0};
+
+    NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
+        kgraphicsCreateBug4208224Channel_HAL(pGpu, pKernelGraphics));
+
+    params.bTeardown = NV_FALSE;
+    status =  pRmApi->Control(pRmApi,
+                        pKernelGraphics->bug4208224Info.hClient,
+                        pKernelGraphics->bug4208224Info.hSubdeviceId,
+                        NV2080_CTRL_CMD_INTERNAL_KGR_INIT_BUG4208224_WAR,
+                        &params,
+                        sizeof(params));
+
+    if (status != NV_OK)
+    {
+        NV_ASSERT_OK(pRmApi->Free(pRmApi,
+            pKernelGraphics->bug4208224Info.hClient,
+            pKernelGraphics->bug4208224Info.hClient));
+    }
+
+    return status;
+}
+
+/*!
+ * @brief Creates a VEID0 channel for Bug 4208224 WAR
+ *
+ * @return NV_OK if channel created successfully
+ */
+NV_STATUS
+kgraphicsCreateBug4208224Channel_TU102
+(
+    OBJGPU *pGpu,
+    KernelGraphics *pKernelGraphics
+)
+{
+    NV_STATUS                              status = NV_OK;
+    NvHandle                               hClientId = NV01_NULL_OBJECT;
+    NvHandle                               hDeviceId;
+    NvHandle                               hSubdeviceId;
+    NvHandle                               hVASpace     = KGRAPHICS_SCRUBBER_HANDLE_VAS;
+    NvHandle                               hPBVirtMemId = KGRAPHICS_SCRUBBER_HANDLE_PBVIRT;
+    NvHandle                               hPBPhysMemId = KGRAPHICS_SCRUBBER_HANDLE_PBPHYS;
+    NvHandle                               hChannelId   = KGRAPHICS_SCRUBBER_HANDLE_CHANNEL;
+    NvHandle                               hObj3D       = KGRAPHICS_SCRUBBER_HANDLE_3DOBJ;
+    NvHandle                               hUserdId     = KGRAPHICS_SCRUBBER_HANDLE_USERD;
+    NvU32                                  gpFifoEntries = 32;       // power-of-2 random choice
+    NvU64                                  gpFifoSize = NVA06F_GP_ENTRY__SIZE * gpFifoEntries;
+    NvU64                                  chSize = gpFifoSize;
+    RM_API                                *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
+    RsClient                              *pClientId;
+    NvBool                                 bBcStatus;
+    NvBool                                 bAcquireLock = NV_FALSE;
+    NvU32                                  sliLoopReentrancy;
+    NV_VASPACE_ALLOCATION_PARAMETERS       vaParams;
+    NV_MEMORY_ALLOCATION_PARAMS            memAllocParams;
+    NV_CHANNEL_ALLOC_PARAMS channelGPFIFOAllocParams;
+    NvU32                                  classNum;
+    NvU32                                  primarySliSubDeviceInstance;
+    // XXX This should be removed when broadcast SLI support is deprecated
+    if (!gpumgrIsParentGPU(pGpu))
+    {
+        return NV_OK;
+    }
+
+    bBcStatus = gpumgrGetBcEnabledStatus(pGpu);
+
+    // FIXME these allocations corrupt BC state
+    NV_ASSERT_OK_OR_RETURN(
+        rmapiutilAllocClientAndDeviceHandles(pRmApi, pGpu, &hClientId, &hDeviceId, &hSubdeviceId));
+
+    pKernelGraphics->bug4208224Info.hClient = hClientId;
+    pKernelGraphics->bug4208224Info.hDeviceId = hDeviceId;
+    pKernelGraphics->bug4208224Info.hSubdeviceId = hSubdeviceId;
+
+    // rmapiutilAllocClientAndDeviceHandles allocates a subdevice object for this subDeviceInstance
+    primarySliSubDeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
+
+    NV_ASSERT_OK_OR_RETURN(serverGetClientUnderLock(&g_resServ, hClientId, &pClientId));
+
+    gpumgrSetBcEnabledStatus(pGpu, NV_TRUE);
+
+    // As we have forced here SLI broadcast mode, temporarily reset the reentrancy count
+    sliLoopReentrancy = gpumgrSLILoopReentrancyPop(pGpu);
+
+    // Allocate subdevices for secondary GPUs
+    SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY)
+    {
+        NvHandle hSecondary;
+        NV2080_ALLOC_PARAMETERS nv2080AllocParams;
+        NvU32 thisSubDeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
+
+        // Skip if already allocated by rmapiutilAllocClientAndDeviceHandles()
+        if (thisSubDeviceInstance == primarySliSubDeviceInstance)
+            SLI_LOOP_CONTINUE;
+
+        // Allocate a subDevice
+        NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
+            clientGenResourceHandle(pClientId, &hSecondary),
+            cleanup);
+
+        portMemSet(&nv2080AllocParams, 0, sizeof(nv2080AllocParams));
+        nv2080AllocParams.subDeviceId = thisSubDeviceInstance;
+
+        NV_CHECK_OK(status, LEVEL_SILENT,
+            pRmApi->AllocWithHandle(pRmApi,
+                                    hClientId,
+                                    hDeviceId,
+                                    hSecondary,
+                                    NV20_SUBDEVICE_0,
+                                    &nv2080AllocParams,
+                                    sizeof(nv2080AllocParams)));
+    }
+    SLI_LOOP_END;
+
+    //
+    // VidHeapControl and vaspace creation calls should happen outside GPU locks
+    // UVM/CUDA may be holding the GPU locks here and the allocation may subsequently fail
+    // So explicitly release GPU locks before RmVidHeapControl
+    //
+    rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL);
+    bAcquireLock = NV_TRUE;
+    pRmApi = rmapiGetInterface(RMAPI_API_LOCK_INTERNAL);
+
+    // Create a new VAspace for channel
+    portMemSet(&vaParams, 0, sizeof(NV_VASPACE_ALLOCATION_PARAMETERS));
+
+    vaParams.flags = NV_VASPACE_ALLOCATION_FLAGS_PTETABLE_HEAP_MANAGED;
+
+    NV_ASSERT_OK_OR_GOTO(status,
+        pRmApi->AllocWithHandle(pRmApi, hClientId, hDeviceId, hVASpace, FERMI_VASPACE_A, &vaParams, sizeof(vaParams)),
+        cleanup);
+
+    // Allocate gpfifo entries
+    portMemSet(&memAllocParams, 0, sizeof(NV_MEMORY_ALLOCATION_PARAMS));
+    memAllocParams.owner     = HEAP_OWNER_RM_CLIENT_GENERIC;
+    memAllocParams.type      = NVOS32_TYPE_IMAGE;
+    memAllocParams.size      = chSize;
+    memAllocParams.attr      = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI);
+    memAllocParams.hVASpace  = 0; // Physical allocations don't expect vaSpace handles
+
+    NV_ASSERT_OK_OR_GOTO(status,
+        pRmApi->AllocWithHandle(pRmApi, hClientId, hDeviceId, hPBPhysMemId, NV01_MEMORY_SYSTEM, &memAllocParams, sizeof(memAllocParams)),
+        cleanup);
+
+    portMemSet(&memAllocParams, 0, sizeof(NV_MEMORY_ALLOCATION_PARAMS));
+    memAllocParams.owner     = HEAP_OWNER_RM_CLIENT_GENERIC;
+    memAllocParams.type      = NVOS32_TYPE_IMAGE;
+    memAllocParams.size      = chSize;
+    memAllocParams.attr      = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI);
+    memAllocParams.flags     = NVOS32_ALLOC_FLAGS_VIRTUAL;
+    memAllocParams.hVASpace  = hVASpace; // Virtual allocation expect vaSpace handles
+                                         // 0 handle = allocations on gpu default vaSpace
+
+    NV_ASSERT_OK_OR_GOTO(status,
+        pRmApi->AllocWithHandle(pRmApi, hClientId, hDeviceId, hPBVirtMemId, NV50_MEMORY_VIRTUAL, &memAllocParams, sizeof(memAllocParams)),
+        cleanup);
+
+    // Allocate Userd
+    NvU32 userdMemClass = NV01_MEMORY_LOCAL_USER;
+    NvU32 ctrlSize;
+
+    if (gpuIsClassSupported(pGpu, VOLTA_CHANNEL_GPFIFO_A))
+    {
+        ctrlSize = sizeof(Nvc36fControl);
+    }
+    else if (gpuIsClassSupported(pGpu, TURING_CHANNEL_GPFIFO_A))
+    {
+        ctrlSize = sizeof(Nvc46fControl);
+    }
+    else
+    {
+        status = NV_ERR_NOT_SUPPORTED;
+        goto cleanup;
+    }
+
+    portMemSet(&memAllocParams, 0, sizeof(NV_MEMORY_ALLOCATION_PARAMS));
+    memAllocParams.owner = HEAP_OWNER_RM_CLIENT_GENERIC;
+    memAllocParams.size  = ctrlSize;
+    memAllocParams.type  = NVOS32_TYPE_IMAGE;
+
+    // Apply registry overrides to USERD.
+    switch (DRF_VAL(_REG_STR_RM, _INST_LOC, _USERD, pGpu->instLocOverrides))
+    {
+        case NV_REG_STR_RM_INST_LOC_USERD_NCOH:
+        case NV_REG_STR_RM_INST_LOC_USERD_COH:
+            userdMemClass = NV01_MEMORY_SYSTEM;
+            memAllocParams.attr = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI);
+            break;
+
+        case NV_REG_STR_RM_INST_LOC_USERD_VID:
+        case NV_REG_STR_RM_INST_LOC_USERD_DEFAULT:
+            memAllocParams.attr = DRF_DEF(OS32, _ATTR, _LOCATION, _VIDMEM);
+            break;
+    }
+
+    NV_ASSERT_OK_OR_GOTO(status,
+        pRmApi->AllocWithHandle(pRmApi, hClientId, hDeviceId, hUserdId,
+                                userdMemClass, &memAllocParams, sizeof(memAllocParams)),
+        cleanup);
+
+    // Get fifo channel class Id
+    classNum = kfifoGetChannelClassId(pGpu, GPU_GET_KERNEL_FIFO(pGpu));
+    NV_ASSERT_OR_GOTO(classNum != 0, cleanup);
+
+    // Allocate a bare channel
+    portMemSet(&channelGPFIFOAllocParams, 0, sizeof(NV_CHANNEL_ALLOC_PARAMS));
+    channelGPFIFOAllocParams.hVASpace      = hVASpace;
+    channelGPFIFOAllocParams.hObjectBuffer = hPBVirtMemId;
+    channelGPFIFOAllocParams.gpFifoEntries = gpFifoEntries;
+
+    //
+    // Set the gpFifoOffset to zero intentionally since we only need this channel
+    // to be created, but will not submit any work to it. So it's fine not to
+    // provide a valid offset here.
+    //
+    channelGPFIFOAllocParams.gpFifoOffset  = 0;
+    channelGPFIFOAllocParams.hUserdMemory[0] = hUserdId;
+
+    channelGPFIFOAllocParams.engineType = gpuGetNv2080EngineType(RM_ENGINE_TYPE_GR0);
+
+    NV_ASSERT_OK_OR_GOTO(status,
+        pRmApi->AllocWithHandle(pRmApi, hClientId, hDeviceId, hChannelId,
+                                classNum, &channelGPFIFOAllocParams, sizeof(channelGPFIFOAllocParams)),
+        cleanup);
+
+    // Free userD handle as it shouldn't be needed for this channel
+    NV_ASSERT_OK_OR_GOTO(status, 
+        pRmApi->Free(pRmApi, hClientId, hUserdId),
+        cleanup);
+
+    // Reaquire the GPU locks
+    NV_ASSERT_OK_OR_GOTO(status,
+        rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_GR),
+        cleanup);
+    bAcquireLock = NV_FALSE;
+    pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
+
+    // Get KernelGraphicsObject class Id
+    NV_ASSERT_OK_OR_GOTO(status,
+        kgraphicsGetClassByType(pGpu, pKernelGraphics, GR_OBJECT_TYPE_3D, &classNum),
+        cleanup);
+    NV_ASSERT_OR_GOTO(classNum != 0, cleanup);
+
+    // Allocate a GR object on the channel
+    NV_ASSERT_OK_OR_GOTO(status,
+        pRmApi->AllocWithHandle(pRmApi, hClientId, hChannelId, hObj3D, classNum, NULL, 0),
+        cleanup);
+
+cleanup:
+
+    if (bAcquireLock)
+    {
+        NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(status,
+            rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_GR));
+        pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
+    }
+
+    if (status != NV_OK)
+    {
+        // Drop GPU lock while freeing memory and channel handles
+        // Free all handles
+        NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(status,
+            pRmApi->Free(pRmApi, hClientId, hClientId));
+    }
+
+    pKernelGraphics->bug4208224Info.bConstructed = (status == NV_OK);
+
+    // Restore the reentrancy count
+    gpumgrSLILoopReentrancyPush(pGpu, sliLoopReentrancy);
+
+    gpumgrSetBcEnabledStatus(pGpu, bBcStatus);
+
+    return status;
+}
+
+/*!
+ * @brief Determines if a channel for Bug 4208224 is needed
+ */
+NvBool
+kgraphicsIsBug4208224WARNeeded_TU102
+(
+    OBJGPU *pGpu,
+    KernelGraphics *pKernelGraphics
+)
+{
+    if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_ALL_INST_IN_SYSMEM))
+    {
+        return NV_FALSE;
+    }
+
+    return kgraphicsGetBug4208224WAREnabled(pGpu, pKernelGraphics);
+}
--- a/src/nvidia/src/kernel/gpu/gr/kernel_graphics.c
+++ b/src/nvidia/src/kernel/gpu/gr/kernel_graphics.c
@@ -98,6 +98,21 @@ static NV_STATUS _kgraphicsPostSchedulingEnableHandler(OBJGPU *, void *);
 static void
 _kgraphicsInitRegistryOverrides(OBJGPU *pGpu, KernelGraphics *pKernelGraphics)
 {
+    {
+        NvU32 data;
+
+        if (osReadRegistryDword(pGpu, NV_REG_STR_RM_FORCE_GR_SCRUBBER_CHANNEL, &data) == NV_OK)
+        {
+            if (data == NV_REG_STR_RM_FORCE_GR_SCRUBBER_CHANNEL_DISABLE)
+            {
+                kgraphicsSetBug4208224WAREnabled(pGpu, pKernelGraphics, NV_FALSE);
+            }
+            else if (data == NV_REG_STR_RM_FORCE_GR_SCRUBBER_CHANNEL_ENABLE)
+            {
+                kgraphicsSetBug4208224WAREnabled(pGpu, pKernelGraphics, NV_TRUE);
+            }
+        }
+    }
    return;
 }

@@ -308,6 +323,10 @@ kgraphicsStateInitLocked_IMPL
                                  NULL, NULL);
    }

+    pKernelGraphics->bug4208224Info.hClient      = NV01_NULL_OBJECT;
+    pKernelGraphics->bug4208224Info.hDeviceId    = NV01_NULL_OBJECT;
+    pKernelGraphics->bug4208224Info.hSubdeviceId = NV01_NULL_OBJECT;
+    pKernelGraphics->bug4208224Info.bConstructed = NV_FALSE;
    return NV_OK;
 }

@@ -356,6 +375,21 @@ kgraphicsStatePreUnload_IMPL
    NvU32 flags
 )
 {
+    if (pKernelGraphics->bug4208224Info.bConstructed)
+    {
+        RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
+        NV2080_CTRL_INTERNAL_KGR_INIT_BUG4208224_WAR_PARAMS params = {0};
+
+        params.bTeardown = NV_TRUE;
+        NV_ASSERT_OK(pRmApi->Control(pRmApi,
+                     pKernelGraphics->bug4208224Info.hClient,
+                     pKernelGraphics->bug4208224Info.hSubdeviceId,
+                     NV2080_CTRL_CMD_INTERNAL_KGR_INIT_BUG4208224_WAR,
+                     &params,
+                     sizeof(params)));
+        NV_ASSERT_OK(pRmApi->Free(pRmApi, pKernelGraphics->bug4208224Info.hClient, pKernelGraphics->bug4208224Info.hClient));
+        pKernelGraphics->bug4208224Info.bConstructed = NV_FALSE;
+    }

    fecsBufferUnmap(pGpu, pKernelGraphics);

@@ -440,7 +474,7 @@ _kgraphicsPostSchedulingEnableHandler
    const KGRAPHICS_STATIC_INFO *pKernelGraphicsStaticInfo = kgraphicsGetStaticInfo(pGpu, pKernelGraphics);

    // Nothing to do for non-GSPCLIENT
-    if (!IS_GSP_CLIENT(pGpu))
+    if (!IS_GSP_CLIENT(pGpu) && !kgraphicsIsBug4208224WARNeeded_HAL(pGpu, pKernelGraphics))
        return NV_OK;

    // Defer golden context channel creation to GPU instance configuration
@@ -471,7 +505,13 @@ _kgraphicsPostSchedulingEnableHandler
        }
    }

-    return kgraphicsCreateGoldenImageChannel(pGpu, pKernelGraphics);
+    NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, kgraphicsCreateGoldenImageChannel(pGpu, pKernelGraphics));
+    if (kgraphicsIsBug4208224WARNeeded_HAL(pGpu, pKernelGraphics))
+    {
+        return kgraphicsInitializeBug4208224WAR_HAL(pGpu, pKernelGraphics);
+    }
+
+    return NV_OK;
 }

 void
--- a/src/nvidia/src/kernel/gpu/gr/kernel_graphics_context.c
+++ b/src/nvidia/src/kernel/gpu/gr/kernel_graphics_context.c
@@ -3201,19 +3201,16 @@ kgrctxDecObjectCount_IMPL
 * one VGPU configuration.
 */
 GR_GLOBALCTX_BUFFER
-kgrctxGetRegisterAccessMapId_PF
+kgrctxGetRegisterAccessMapId_IMPL
 (
    OBJGPU *pGpu,
    KernelGraphicsContext *pKernelGraphicsContext,
    KernelChannel *pKernelChannel
 )
 {
-    RmClient *pRmClient = dynamicCast(RES_GET_CLIENT(pKernelChannel), RmClient);
-    RS_PRIV_LEVEL privLevel = rmclientGetCachedPrivilege(pRmClient);
-
    // Using cached privilege because this function is called at a raised IRQL.
-    if ((privLevel >= RS_PRIV_LEVEL_USER_ROOT)
-            && !hypervisorIsVgxHyper() && IS_GFID_PF(kchannelGetGfid(pKernelChannel)))
+    if (kchannelCheckIsAdmin(pKernelChannel)
+        && !hypervisorIsVgxHyper() && IS_GFID_PF(kchannelGetGfid(pKernelChannel)))
    {
        return GR_GLOBALCTX_BUFFER_UNRESTRICTED_PRIV_ACCESS_MAP;
    }
--- a/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_tu102.c
+++ b/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_tu102.c
@@ -773,6 +773,22 @@ kgspResetHw_TU102
    return NV_OK;
 }

+static NvBool kgspCrashCatReportImpactsGspRm(CrashCatReport *pReport)
+{
+    NV_CRASHCAT_CONTAINMENT containment;
+
+    containment = crashcatReportSourceContainment_HAL(pReport);
+    switch (containment)
+    {
+       case NV_CRASHCAT_CONTAINMENT_RISCV_MODE_M:
+       case NV_CRASHCAT_CONTAINMENT_RISCV_HART:
+       case NV_CRASHCAT_CONTAINMENT_UNCONTAINED:
+           return NV_TRUE;
+       default:
+           return NV_FALSE;
+    }
+}
+
 NvBool
 kgspHealthCheck_TU102
 (
@@ -791,7 +807,8 @@ kgspHealthCheck_TU102

        while ((pReport = crashcatEngineGetNextCrashReport(pCrashCatEng)) != NULL)
        {
-            bHealthy = NV_FALSE;
+            if (kgspCrashCatReportImpactsGspRm(pReport))
+                bHealthy = NV_FALSE;

            NV_PRINTF(LEVEL_ERROR,
                "****************************** GSP-CrashCat Report *******************************\n");
@@ -839,10 +856,19 @@ kgspHealthCheck_TU102
 exit_health_check:
    if (!bHealthy)
    {
+        NvBool bFirstFatal = !pKernelGsp->bFatalError;
+
        pKernelGsp->bFatalError = NV_TRUE;

        if (pKernelGsp->pRpc)
+        {
            kgspLogRpcDebugInfo(pGpu, pKernelGsp->pRpc, GSP_ERROR, pKernelGsp->bPollingForRpcResponse);
+        }
+
+        if (bFirstFatal)
+        {
+            kgspRcAndNotifyAllUserChannels(pGpu, pKernelGsp, GSP_ERROR);
+        }

        gpuCheckEccCounts_HAL(pGpu);

--- a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c
+++ b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c
@@ -583,6 +583,81 @@ _kgspRpcRCTriggered
        rpc_params->partitionAttributionId);
 }

+/*!
+ * This function is called on critical FW crash to RC and notify an error code to
+ * all user mode channels, allowing the user mode apps to fail deterministically.
+ *
+ * @param[in] pGpu        GPU object pointer
+ * @param[in] pKernelGsp  KernelGsp object pointer
+ * @param[in] exceptType  Error code to send to the RC notifiers
+ *
+ */
+void
+kgspRcAndNotifyAllUserChannels
+(
+    OBJGPU    *pGpu,
+    KernelGsp *pKernelGsp,
+    NvU32      exceptType
+)
+{
+    KernelRc         *pKernelRc = GPU_GET_KERNEL_RC(pGpu);
+    KernelChannel    *pKernelChannel;
+    KernelFifo       *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
+    CHANNEL_ITERATOR  chanIt;
+    RMTIMEOUT         timeout;
+
+    NV_PRINTF(LEVEL_ERROR, "RC all user channels for critical error %d.\n", exceptType);
+
+    // Pass 1: halt all user channels.
+    kfifoGetChannelIterator(pGpu, pKernelFifo, &chanIt);
+    while (kfifoGetNextKernelChannel(pGpu, pKernelFifo, &chanIt, &pKernelChannel) == NV_OK)
+    {
+        //
+        // Kernel (uvm) channels are skipped to workaround nvbug 4503046, where
+        // uvm attributes all errors as global and fails operations on all GPUs,
+        // in addition to the current failing GPU.
+        //
+        if (kchannelCheckIsKernel(pKernelChannel))
+        {
+            continue;
+        }
+
+        kfifoStartChannelHalt(pGpu, pKernelFifo, pKernelChannel);
+    }
+
+    //
+    // Pass 2: Wait for the halts to complete, and RC notify the user channels.
+    // The channel halts require a preemption, which may not be able to complete
+    // since the GSP is no longer servicing interrupts. Wait for up to the
+    // default GPU timeout value for the preemptions to complete.
+    //
+    gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, 0);
+    kfifoGetChannelIterator(pGpu, pKernelFifo, &chanIt);
+    while (kfifoGetNextKernelChannel(pGpu, pKernelFifo, &chanIt, &pKernelChannel) == NV_OK)
+    {
+        // Skip kernel (uvm) channels as only user channel halts are initiated above.
+        if (kchannelCheckIsKernel(pKernelChannel))
+        {
+            continue;
+        }
+
+        kfifoCompleteChannelHalt(pGpu, pKernelFifo, pKernelChannel, &timeout);
+
+        NV_ASSERT_OK(krcErrorSetNotifier(pGpu, pKernelRc,
+                                         pKernelChannel,
+                                         exceptType,
+                                         kchannelGetEngineType(pKernelChannel),
+                                         RC_NOTIFIER_SCOPE_CHANNEL));
+
+        NV_ASSERT_OK(krcErrorSendEventNotifications_HAL(pGpu, pKernelRc,
+            pKernelChannel,
+            kchannelGetEngineType(pKernelChannel),
+            exceptType,
+            RC_NOTIFIER_SCOPE_CHANNEL,
+            0));
+    }
+}
+
 /*!
 * Receive Xid notification from GSP-RM
 *
--- a/src/nvidia/src/kernel/mem_mgr/video_mem.c
+++ b/src/nvidia/src/kernel/mem_mgr/video_mem.c
@@ -125,7 +125,7 @@ _vidmemPmaAllocate
    MemoryManager               *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
    PMA                         *pPma           = &pHeap->pmaObject;
    NvU64                        size           = 0;
-    NvU32                        pageCount;
+    NvU32                        pageCount      = 0;
    NvU32                        pmaInfoSize;
    NvU64                        pageSize;
    NV_STATUS                    status;
@@ -290,6 +290,9 @@ _vidmemPmaAllocate


    // Get the number of pages to be allocated by PMA
+    NV_CHECK_OR_RETURN(LEVEL_ERROR,
+        (NV_DIV_AND_CEIL(size, pageSize) <= NV_U32_MAX),
+        NV_ERR_NO_MEMORY);
    pageCount = (NvU32) NV_DIV_AND_CEIL(size, pageSize);

 retry_alloc:
@@ -301,7 +304,12 @@ retry_alloc:
    }
    else
    {
-        pmaInfoSize = sizeof(PMA_ALLOC_INFO) + ((pageCount - 1) * sizeof(NvU64));
+        NV_CHECK_OR_RETURN(LEVEL_ERROR,
+            portSafeMulU32((pageCount - 1), (sizeof(NvU64)), &pmaInfoSize),
+            NV_ERR_NO_MEMORY);
+        NV_CHECK_OR_RETURN(LEVEL_ERROR,
+            portSafeAddU32(pmaInfoSize, (sizeof(PMA_ALLOC_INFO)), &pmaInfoSize),
+            NV_ERR_NO_MEMORY);
    }

    // Alloc the tracking structure and store the values in it.
--- a/src/nvidia/src/libraries/crashcat/v1/impl/crashcat_report_v1_libos3.c
+++ b/src/nvidia/src/libraries/crashcat/v1/impl/crashcat_report_v1_libos3.c
@@ -115,6 +115,11 @@ void crashcatReportLogSource_V1_LIBOS3(CrashCatReport *pReport)
    }
 }

+NV_CRASHCAT_CONTAINMENT crashcatReportSourceContainment_V1_LIBOS3(CrashCatReport *pReport)
+{
+    return crashcatReportV1SourceCauseContainment(&pReport->v1.report);
+}
+
 void crashcatReportLogReporter_V1_LIBOS3(CrashCatReport *pReport)
 {
    NvCrashCatReport_V1 *pReportV1 = &pReport->v1.report;