diff --git a/README.md b/README.md
index 3cac64804..2600ed18b 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # NVIDIA Linux Open GPU Kernel Module Source
 
 This is the source release of the NVIDIA Linux open GPU kernel modules,
-version 570.190.
+version 570.195.03.
 
 
 ## How to Build
@@ -17,7 +17,7 @@ as root:
 
 Note that the kernel modules built here must be used with GSP
 firmware and user-space NVIDIA GPU driver components from a corresponding
-570.190 driver release.  This can be achieved by installing
+570.195.03 driver release.  This can be achieved by installing
 the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
 option.  E.g.,
 
@@ -185,7 +185,7 @@ table below).
 For details on feature support and limitations, see the NVIDIA GPU driver
 end user README here:
 
-https://us.download.nvidia.com/XFree86/Linux-x86_64/570.190/README/kernel_open.html
+https://us.download.nvidia.com/XFree86/Linux-x86_64/570.195.03/README/kernel_open.html
 
 For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
 Package for more details.
diff --git a/kernel-open/Kbuild b/kernel-open/Kbuild
index 00c027771..3d2240d87 100644
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc
 ccflags-y += -I$(src)
 ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
 ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
-ccflags-y += -DNV_VERSION_STRING=\"570.190\"
+ccflags-y += -DNV_VERSION_STRING=\"570.195.03\"
 
 ifneq ($(SYSSRCHOST1X),)
  ccflags-y += -I$(SYSSRCHOST1X)
diff --git a/kernel-open/nvidia/nv.c b/kernel-open/nvidia/nv.c
index 568560540..9e29338c3 100644
--- a/kernel-open/nvidia/nv.c
+++ b/kernel-open/nvidia/nv.c
@@ -2423,6 +2423,12 @@ nvidia_ioctl(
     {
         nv_ioctl_wait_open_complete_t *params = arg_copy;
 
+        if (arg_size != sizeof(nv_ioctl_wait_open_complete_t))
+        {
+            status = -EINVAL;
+            goto done_early;
+        }
+
         params->rc = nvlfp->open_rc;
         params->adapterStatus = nvlfp->adapter_status;
         goto done_early;
@@ -2503,8 +2509,12 @@ nvidia_ioctl(
                 goto done;
             }
 
+            /* atomically check and alloc attached_gpus */
+            down(&nvl->ldata_lock);
+
             if (nvlfp->num_attached_gpus != 0)
             {
+                up(&nvl->ldata_lock);
                 status = -EINVAL;
                 goto done;
             }
@@ -2512,12 +2522,15 @@ nvidia_ioctl(
             NV_KMALLOC(nvlfp->attached_gpus, arg_size);
             if (nvlfp->attached_gpus == NULL)
             {
+                up(&nvl->ldata_lock);
                 status = -ENOMEM;
                 goto done;
             }
             memcpy(nvlfp->attached_gpus, arg_copy, arg_size);
             nvlfp->num_attached_gpus = num_arg_gpus;
 
+            up(&nvl->ldata_lock);
+
             for (i = 0; i < nvlfp->num_attached_gpus; i++)
             {
                 if (nvlfp->attached_gpus[i] == 0)
@@ -2533,9 +2546,14 @@ nvidia_ioctl(
                             nvidia_dev_put(nvlfp->attached_gpus[i], sp);
                     }
 
+                    /* atomically free attached_gpus */
+                    down(&nvl->ldata_lock);
+
                     NV_KFREE(nvlfp->attached_gpus, arg_size);
                     nvlfp->num_attached_gpus = 0;
 
+                    up(&nvl->ldata_lock);
+
                     status = -EINVAL;
                     break;
                 }
diff --git a/src/common/inc/nvBldVer.h b/src/common/inc/nvBldVer.h
index 9c4a387a0..950302998 100644
--- a/src/common/inc/nvBldVer.h
+++ b/src/common/inc/nvBldVer.h
@@ -36,25 +36,25 @@
 // and then checked back in. You cannot make changes to these sections without
 // corresponding changes to the buildmeister script
 #ifndef NV_BUILD_BRANCH
-    #define NV_BUILD_BRANCH             r570_00
+    #define NV_BUILD_BRANCH             r573_76
 #endif
 #ifndef NV_PUBLIC_BRANCH
-    #define NV_PUBLIC_BRANCH             r570_00
+    #define NV_PUBLIC_BRANCH             r573_76
 #endif
 
 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
-#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r570/r570_00-575"
-#define NV_BUILD_CHANGELIST_NUM         (36467544)
+#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r570/r573_76-590"
+#define NV_BUILD_CHANGELIST_NUM         (36569223)
 #define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "rel/gpu_drv/r570/r570_00-575"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36467544)
+#define NV_BUILD_NAME                   "rel/gpu_drv/r570/r573_76-590"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36569223)
 
 #else     /* Windows builds */
-#define NV_BUILD_BRANCH_VERSION         "r570_00-569"
-#define NV_BUILD_CHANGELIST_NUM         (36467544)
-#define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "573.73"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36467544)
+#define NV_BUILD_BRANCH_VERSION         "r573_76-1"
+#define NV_BUILD_CHANGELIST_NUM         (36518415)
+#define NV_BUILD_TYPE                   "Nightly"
+#define NV_BUILD_NAME                   "r573_76-250909"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36506718)
 #define NV_BUILD_BRANCH_BASE_VERSION    R570
 #endif
 // End buildmeister python edited section
diff --git a/src/common/inc/nvUnixVersion.h b/src/common/inc/nvUnixVersion.h
index cd6960628..22459a025 100644
--- a/src/common/inc/nvUnixVersion.h
+++ b/src/common/inc/nvUnixVersion.h
@@ -4,7 +4,7 @@
 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
     (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
 
-#define NV_VERSION_STRING               "570.190"
+#define NV_VERSION_STRING               "570.195.03"
 
 #else
 
diff --git a/src/common/sdk/nvidia/inc/nverror.h b/src/common/sdk/nvidia/inc/nverror.h
index 79167d110..423d64449 100644
--- a/src/common/sdk/nvidia/inc/nverror.h
+++ b/src/common/sdk/nvidia/inc/nverror.h
@@ -109,7 +109,7 @@
 #define ROBUST_CHANNEL_NVJPG5_ERROR                     (103)
 #define ROBUST_CHANNEL_NVJPG6_ERROR                     (104)
 #define ROBUST_CHANNEL_NVJPG7_ERROR                     (105)
-#define DESTINATION_FLA_TRANSLATION_ERROR               (108)
+#define NVLINK_REMOTE_TRANSLATION_ERROR                 (108)
 #define SEC_FAULT_ERROR                                 (110)
 #define GSP_RPC_TIMEOUT                                 (119)
 #define GSP_ERROR                                       (120)
@@ -129,7 +129,7 @@
 #define ROBUST_CHANNEL_CE18_ERROR                       (134)
 #define ROBUST_CHANNEL_CE19_ERROR                       (135)
 #define ALI_TRAINING_FAIL                               (136)
-#define NVLINK_FLA_PRIV_ERR                             (137)
+#define NVLINK_PRIV_ERR                                 (137)
 #define ROBUST_CHANNEL_DLA_ERROR                        (138)
 #define ROBUST_CHANNEL_OFA1_ERROR                       (139)
 #define UNRECOVERABLE_ECC_ERROR_ESCAPE                  (140)
diff --git a/src/nvidia/generated/g_kern_gmmu_nvoc.h b/src/nvidia/generated/g_kern_gmmu_nvoc.h
index 66ffbb810..a857e9d4e 100644
--- a/src/nvidia/generated/g_kern_gmmu_nvoc.h
+++ b/src/nvidia/generated/g_kern_gmmu_nvoc.h
@@ -500,7 +500,6 @@ struct KernelGmmu {
     NvBool PDB_PROP_KGMMU_REDUCE_NR_FAULT_BUFFER_SIZE;
 
     // Data members
-    NvBool bReportFlaTranslationXid;
     MEMORY_DESCRIPTOR *pFakeSparseBuffer;
     NvU64 fakeSparseEntry[3];
     NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo;
@@ -636,7 +635,6 @@ struct KernelGmmu_PRIVATE {
     NvBool PDB_PROP_KGMMU_REDUCE_NR_FAULT_BUFFER_SIZE;
 
     // Data members
-    NvBool bReportFlaTranslationXid;
     MEMORY_DESCRIPTOR *pFakeSparseBuffer;
     NvU64 fakeSparseEntry[3];
     NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo;
diff --git a/src/nvidia/inc/kernel/core/thread_state.h b/src/nvidia/inc/kernel/core/thread_state.h
index a619a6cb7..1f721c8ed 100644
--- a/src/nvidia/inc/kernel/core/thread_state.h
+++ b/src/nvidia/inc/kernel/core/thread_state.h
@@ -76,6 +76,7 @@ struct THREAD_STATE_NODE
      */
     NvU32                threadSeqId;
     NvBool               bValid;
+    NvBool               bUsingHeap;
     THREAD_TIMEOUT_STATE timeout;
     NvU32                cpuNum;
     NvU32                flags;
@@ -208,6 +209,7 @@ void        threadStateOnlyProcessWorkISRAndDeferredIntHandler(THREAD_STATE_NODE
 void        threadStateOnlyFreeISRAndDeferredIntHandler(THREAD_STATE_NODE *, OBJGPU*, NvU32);
 void        threadStateFreeISRAndDeferredIntHandler(THREAD_STATE_NODE *, OBJGPU*, NvU32);
 void        threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags);
+THREAD_STATE_NODE* threadStateAlloc(NvU32 flags);
 void        threadStateFree(THREAD_STATE_NODE *pThreadNode, NvU32 flags);
 
 NV_STATUS   threadStateGetCurrent(THREAD_STATE_NODE **ppThreadNode, OBJGPU *pGpu);
diff --git a/src/nvidia/inc/kernel/gpu/rpc/objrpc.h b/src/nvidia/inc/kernel/gpu/rpc/objrpc.h
index 4f6d57ec7..67db98c43 100644
--- a/src/nvidia/inc/kernel/gpu/rpc/objrpc.h
+++ b/src/nvidia/inc/kernel/gpu/rpc/objrpc.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2004-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2004-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -50,8 +50,8 @@ TYPEDEF_BITVECTOR(MC_ENGINE_BITVECTOR);
 #include "g_rpc_hal.h" // For RPC_HAL_IFACES
 #include "g_rpc_odb.h" // For RPC_HAL_IFACES
 
-#define RPC_TIMEOUT_LIMIT_PRINT_RATE_THRESH 3  // rate limit after 3 prints
-#define RPC_TIMEOUT_LIMIT_PRINT_RATE_SKIP   29 // skip 29 of 30 prints
+#define RPC_TIMEOUT_GPU_RESET_THRESHOLD 3  // Reset GPU after 3 back to back GSP RPC timeout
+#define RPC_TIMEOUT_PRINT_RATE_SKIP   29 // skip 29 of 30 prints
 
 #define RPC_HISTORY_DEPTH 128
 
diff --git a/src/nvidia/inc/kernel/mem_mgr/ctx_buf_pool.h b/src/nvidia/inc/kernel/mem_mgr/ctx_buf_pool.h
index 967bc948e..204a11471 100644
--- a/src/nvidia/inc/kernel/mem_mgr/ctx_buf_pool.h
+++ b/src/nvidia/inc/kernel/mem_mgr/ctx_buf_pool.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2016-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2016-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -44,10 +44,9 @@ struct CTX_BUF_POOL_INFO
 {
     //
     // Each array index corresponds to a pointer to memory pool with
-    // page size corresponding to RM_ATTR_PAGE_SIZE_*
-    // Pool corresponding to RM_ATTR_PAGE_SIZE_DEFAULT will be left unused
+    // page size corresponding to POOL_CONFIG_MODE
     //
-    RM_POOL_ALLOC_MEM_RESERVE_INFO *pMemPool[RM_ATTR_PAGE_SIZE_INVALID];
+    RM_POOL_ALLOC_MEM_RESERVE_INFO *pMemPool[POOL_CONFIG_MAX_SUPPORTED];
 };
 
 // List of all context buffers supported by memory pools
diff --git a/src/nvidia/inc/kernel/mem_mgr/pool_alloc.h b/src/nvidia/inc/kernel/mem_mgr/pool_alloc.h
index d07cc6919..019093b2c 100644
--- a/src/nvidia/inc/kernel/mem_mgr/pool_alloc.h
+++ b/src/nvidia/inc/kernel/mem_mgr/pool_alloc.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2016-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2016-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -51,6 +51,7 @@ typedef enum
     POOL_CONFIG_CTXBUF_256G,    // configure pool for RM internal allocations like ctx buffers with 256GB page size
     POOL_CONFIG_CTXBUF_512M,    // configure pool for RM internal allocations like ctx buffers with 512MB page size
     POOL_CONFIG_CTXBUF_2M,      // configure pool for RM internal allocations like ctx buffers with 2MB page size
+    POOL_CONFIG_CTXBUF_128K,   // configure pool for RM internal allocations like ctx buffers with  128KB page size
     POOL_CONFIG_CTXBUF_64K,     // configure pool for RM internal allocations like ctx buffers with 64KB page size
     POOL_CONFIG_CTXBUF_4K,      // configure pool for RM internal allocations like ctx buffers with 4KB page size
     POOL_CONFIG_MAX_SUPPORTED
diff --git a/src/nvidia/src/kernel/core/thread_state.c b/src/nvidia/src/kernel/core/thread_state.c
index d6961ab84..51759ece2 100644
--- a/src/nvidia/src/kernel/core/thread_state.c
+++ b/src/nvidia/src/kernel/core/thread_state.c
@@ -508,27 +508,21 @@ static void _threadStateLogInitCaller(THREAD_STATE_NODE *pThreadNode, NvU64 func
 }
 
 /**
- * @brief Initialize a threadState for regular threads (non-interrupt context)
- *
- * @param[in/out] pThreadNode
- * @param[in] flags
- *
+ * @brief Common initialization logic for both stack and heap thread state nodes
+ * 
+ * @param[in/out] pThreadNode The node to initialize 
+ * @param[in] flags Thread state flags
+ * @param[in] bUsingHeap NV_TRUE if heap-allocated, NV_FALSE if stack-allocated
+ * 
+ * @return NV_OK on success, error code on failure
  */
-void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags)
+static NV_STATUS _threadStateInitCommon(THREAD_STATE_NODE *pThreadNode, NvU32 flags, NvBool bUsingHeap)
 {
     NV_STATUS rmStatus;
     NvU64 funcAddr;
 
-    // Isrs should be using threadStateIsrInit().
-    NV_ASSERT((flags & (THREAD_STATE_FLAGS_IS_ISR_LOCKLESS |
-                        THREAD_STATE_FLAGS_IS_ISR |
-                        THREAD_STATE_FLAGS_DEFERRED_INT_HANDLER_RUNNING)) == 0);
-
-    // Check to see if ThreadState is enabled
-    if (!(threadStateDatabase.setupFlags & THREAD_STATE_SETUP_FLAGS_ENABLED))
-        return;
-
     portMemSet(pThreadNode, 0, sizeof(*pThreadNode));
+    pThreadNode->bUsingHeap = bUsingHeap;
     pThreadNode->threadSeqId = portAtomicIncrementU32(&threadStateDatabase.threadSeqCntr);
     pThreadNode->cpuNum = osGetCurrentProcessorNumber();
     pThreadNode->flags = flags;
@@ -546,9 +540,10 @@ void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags)
 
     rmStatus = osGetCurrentThread(&pThreadNode->threadId);
     if (rmStatus != NV_OK)
-        return;
+        return rmStatus;
 
-    NV_ASSERT_OR_RETURN_VOID(pThreadNode->cpuNum < threadStateDatabase.maxCPUs);
+    NV_ASSERT_OR_RETURN(pThreadNode->cpuNum < threadStateDatabase.maxCPUs, 
+                        NV_ERR_INVALID_STATE);
 
     funcAddr = (NvU64) (NV_RETURN_ADDRESS());
 
@@ -558,27 +553,23 @@ void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags)
         // Reset the threadId as insertion failed. bValid is already NV_FALSE
         pThreadNode->threadId = 0;
         portSyncSpinlockRelease(threadStateDatabase.spinlock);
-        return;
-    }
-    else
-    {
-        pThreadNode->bValid = NV_TRUE;
-        rmStatus = NV_OK;
+        return NV_ERR_GENERIC;
     }
 
+    pThreadNode->bValid = NV_TRUE;
     _threadStateLogInitCaller(pThreadNode, funcAddr);
 
     portSyncSpinlockRelease(threadStateDatabase.spinlock);
 
     _threadStatePrintInfo(pThreadNode);
 
-    NV_ASSERT(rmStatus == NV_OK);
     threadPriorityStateAlloc();
 
     if (TLS_MIRROR_THREADSTATE)
     {
         THREAD_STATE_NODE **pTls = (THREAD_STATE_NODE **)tlsEntryAcquire(TLS_ENTRY_ID_THREADSTATE);
-        NV_ASSERT_OR_RETURN_VOID(pTls != NULL);
+        NV_ASSERT_OR_RETURN(pTls != NULL, NV_ERR_INVALID_STATE);
+
         if (*pTls != NULL)
         {
             NV_PRINTF(LEVEL_WARNING,
@@ -587,6 +578,66 @@ void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags)
         }
         *pTls = pThreadNode;
     }
+    return NV_OK;
+}
+
+/**
+ * @brief Initialize a threadState for regular threads (non-interrupt context)
+ *  Use the new UAF-safe API for new code, threadStateAlloc().
+ * @param[in/out] pThreadNode
+ * @param[in] flags
+ *
+ */
+void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags)
+{
+    // Isrs should be using threadStateIsrInit().
+    NV_ASSERT_OR_RETURN_VOID((flags & (THREAD_STATE_FLAGS_IS_ISR_LOCKLESS |
+        THREAD_STATE_FLAGS_IS_ISR |
+        THREAD_STATE_FLAGS_DEFERRED_INT_HANDLER_RUNNING)) == 0);
+
+    // Check to see if ThreadState is enabled
+    if (!(threadStateDatabase.setupFlags & THREAD_STATE_SETUP_FLAGS_ENABLED))
+        return;
+
+    // Use common initialization logic (stack-allocated)
+    // Note: Legacy void API ignores errors for backward compatibility
+    _threadStateInitCommon(pThreadNode, flags, NV_FALSE);
+}
+
+/**
+ * @brief Allocate a heap-based threadState
+ * @param[in] flags Thread state flags
+ *
+ * @return Heap-allocated THREAD_STATE_NODE* on success, NULL on failure
+ */
+THREAD_STATE_NODE* threadStateAlloc(NvU32 flags)
+{
+    THREAD_STATE_NODE *pHeapNode;
+    NV_STATUS rmStatus;
+
+    // Isrs should be using threadStateIsrInit().
+    NV_ASSERT_OR_RETURN((flags & (THREAD_STATE_FLAGS_IS_ISR_LOCKLESS |
+        THREAD_STATE_FLAGS_IS_ISR |
+        THREAD_STATE_FLAGS_DEFERRED_INT_HANDLER_RUNNING)) == 0, NULL);
+
+    // Check to see if ThreadState is enabled
+    if (!(threadStateDatabase.setupFlags & THREAD_STATE_SETUP_FLAGS_ENABLED))
+        return NULL;
+
+    // Allocate heap node directly
+    pHeapNode = portMemAllocNonPaged(sizeof(THREAD_STATE_NODE));
+    if (pHeapNode == NULL)
+        return NULL;
+
+    rmStatus = _threadStateInitCommon(pHeapNode, flags, NV_TRUE);
+    if (rmStatus != NV_OK)
+        goto cleanup_heap;
+
+    return pHeapNode;
+
+cleanup_heap:
+    portMemFree(pHeapNode);
+    return NULL;
 }
 
 /**
@@ -870,6 +921,12 @@ void threadStateFree(THREAD_STATE_NODE *pThreadNode, NvU32 flags)
                      r);
         }
     }
+
+    // Free heap memory if this node was heap-allocated
+    if (pThreadNode->bUsingHeap)
+    {
+        portMemFree(pThreadNode);
+    }
 }
 
 /**
diff --git a/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c b/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c
index 25cdc0bc4..f0f3adee6 100644
--- a/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c
+++ b/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c
@@ -3117,10 +3117,12 @@ kchannelCtrlCmdResetIsolatedChannel_IMPL
     OBJGPU    *pGpu      = GPU_RES_GET_GPU(pKernelChannel);
     RM_API    *pRmApi    = GPU_GET_PHYSICAL_RMAPI(pGpu);
 
-
     // This ctrl sets bIsRcPending in the KernelChannel object. Because Kernel-RM is
     // the source of truth on this, it's important that this ctrl is called from CPU-RM
-    NV_ASSERT_OR_RETURN(!RMCFG_FEATURE_PLATFORM_GSP, NV_ERR_INVALID_OPERATION);
+
+    // In case of vGPU this Rmctrl gets called in GSP-RM only,
+    // this RmCtrl is issued from guest kernel RM and then called by the GSP plugin directly to GSP RM
+    // Since bIsRcPending is handled in guest, so we need to allow the call in GSP RM.
 
     // Call internal RMCTRL on physical-RM, kchannelFwdToInternalCtrl() is not
     // used because no conversion from KernelChannel to Channel is required
diff --git a/src/nvidia/src/kernel/gpu/fsp/arch/hopper/kern_fsp_gh100.c b/src/nvidia/src/kernel/gpu/fsp/arch/hopper/kern_fsp_gh100.c
index 2f758e601..031c6fc68 100644
--- a/src/nvidia/src/kernel/gpu/fsp/arch/hopper/kern_fsp_gh100.c
+++ b/src/nvidia/src/kernel/gpu/fsp/arch/hopper/kern_fsp_gh100.c
@@ -245,11 +245,14 @@ kfspCanSendPacket_GH100
 {
     NvU32 cmdqHead;
     NvU32 cmdqTail;
+    NvU32 msgqHead;
+    NvU32 msgqTail;
 
     _kfspGetQueueHeadTail_GH100(pGpu, pKernelFsp, &cmdqHead, &cmdqTail);
+    _kfspGetMsgQueueHeadTail_GH100(pGpu, pKernelFsp, &msgqHead, &msgqTail);
 
     // FSP will set QUEUE_HEAD = TAIL after each packet is received
-    return (cmdqHead == cmdqTail);
+    return (cmdqHead == cmdqTail) && (msgqHead == msgqTail);
 }
 
 /*!
diff --git a/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c b/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c
index fbccaca79..27a46a6cf 100644
--- a/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c
+++ b/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c
@@ -269,6 +269,27 @@ kfspStateDestroy_IMPL
 
 }
 
+/*
+ * @brief GpuWaitConditionFunc for FSP ready
+ *
+ * @param[in] pGpu          GPU object pointer
+ * @param[in] pCondData     KernelFsp object pointer
+ *
+ * @returns   NvBool        NV_TRUE if command and message fsp
+ *                          queues are empty
+ */
+static NvBool
+_kfspWaitForCanSend
+(
+    OBJGPU *pGpu,
+    void   *pCondData
+)
+{
+    KernelFsp *pKernelFsp = (KernelFsp*) pCondData;
+
+    return kfspCanSendPacket_HAL(pGpu, pKernelFsp);
+}
+
 /*!
  * @brief Wait until RM can send to FSP
  *
@@ -290,40 +311,11 @@ kfspPollForCanSend_IMPL
     gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout,
         GPU_TIMEOUT_FLAGS_OSTIMER);
 
-    while (!kfspCanSendPacket_HAL(pGpu, pKernelFsp))
+    status = gpuTimeoutCondWait(pGpu, _kfspWaitForCanSend, pKernelFsp, &timeout);
+    if (status != NV_OK)
     {
-        //
-        // For now we assume that any response from FSP before RM message
-        // send is complete indicates an error and we should abort.
-        //
-        // Ongoing dicussion on usefullness of this check. Bug to be filed.
-        //
-        if (kfspIsResponseAvailable_HAL(pGpu, pKernelFsp))
-        {
-            kfspReadMessage(pGpu, pKernelFsp, NULL, 0);
-            NV_PRINTF(LEVEL_ERROR,
-                "Received error message from FSP while waiting to send.\n");
-            status = NV_ERR_GENERIC;
-            break;
-        }
-
-        osSpinLoop();
-
-        status = gpuCheckTimeout(pGpu, &timeout);
-        if (status != NV_OK)
-        {
-            if ((status == NV_ERR_TIMEOUT) &&
-                kfspCanSendPacket_HAL(pGpu, pKernelFsp))
-            {
-                status = NV_OK;
-            }
-            else
-            {
-                NV_PRINTF(LEVEL_ERROR,
-                    "Timed out waiting for FSP command queue to be empty.\n");
-            }
-            break;
-        }
+        NV_PRINTF(LEVEL_ERROR,
+            "Timed out waiting for FSP queues to be empty.\n");
     }
 
     return status;
diff --git a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c
index 0d73a79e7..a189a4c84 100644
--- a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c
+++ b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c
@@ -2061,8 +2061,8 @@ _kgspRpcIncrementTimeoutCountAndRateLimitPrints
 {
     pRpc->timeoutCount++;
 
-    if ((pRpc->timeoutCount == (RPC_TIMEOUT_LIMIT_PRINT_RATE_THRESH + 1)) &&
-        (RPC_TIMEOUT_LIMIT_PRINT_RATE_SKIP > 0))
+    if ((pRpc->timeoutCount == (RPC_TIMEOUT_GPU_RESET_THRESHOLD + 1)) &&
+        (RPC_TIMEOUT_PRINT_RATE_SKIP > 0))
     {
         // make sure we warn Xid and NV_PRINTF/NVLOG consumers that we are rate limiting prints
         if (GPU_GET_KERNEL_RC(pGpu)->bLogEvents)
@@ -2072,15 +2072,15 @@ _kgspRpcIncrementTimeoutCountAndRateLimitPrints
                 gpuGetDomain(pGpu),
                 gpuGetBus(pGpu),
                 gpuGetDevice(pGpu),
-                RPC_TIMEOUT_LIMIT_PRINT_RATE_SKIP + 1);
+                RPC_TIMEOUT_PRINT_RATE_SKIP + 1);
         }
         NV_PRINTF(LEVEL_WARNING,
                   "Rate limiting GSP RPC error prints (printing 1 of every %d)\n",
-                  RPC_TIMEOUT_LIMIT_PRINT_RATE_SKIP + 1);
+                  RPC_TIMEOUT_PRINT_RATE_SKIP + 1);
     }
 
-    pRpc->bQuietPrints = ((pRpc->timeoutCount > RPC_TIMEOUT_LIMIT_PRINT_RATE_THRESH) &&
-                          ((pRpc->timeoutCount % (RPC_TIMEOUT_LIMIT_PRINT_RATE_SKIP + 1)) != 0));
+    pRpc->bQuietPrints = ((pRpc->timeoutCount > RPC_TIMEOUT_GPU_RESET_THRESHOLD) &&
+                          ((pRpc->timeoutCount % (RPC_TIMEOUT_PRINT_RATE_SKIP + 1)) != 0));
 }
 
 /*!
@@ -2228,6 +2228,22 @@ _kgspRpcRecvPoll
                 _kgspLogXid119(pGpu, pRpc, expectedFunc, expectedSequence);
             }
 
+            // Detect for 3 back to back GSP RPC timeout
+            if (pRpc->timeoutCount == RPC_TIMEOUT_GPU_RESET_THRESHOLD)
+            {
+                // GSP is completely stalled and cannot be recovered. Mark the GPU for reset.
+                NV_ASSERT_FAILED("Back to back GSP RPC timeout detected! GPU marked for reset");
+                gpuMarkDeviceForReset(pGpu);
+                pKernelGsp->bFatalError = NV_TRUE;
+
+                // For Windows, if TDR is supported, trigger TDR to recover the system.
+                if (pGpu->getProperty(pGpu, PDB_PROP_GPU_SUPPORTS_TDR_EVENT))
+                {
+                    NV_ASSERT_FAILED("Triggering TDR to recover from GSP hang");
+                    gpuNotifySubDeviceEvent(pGpu, NV2080_NOTIFIERS_UCODE_RESET, NULL, 0, 0, 0);
+                }
+            }
+
             goto done;
         }
         else if (timeoutStatus != NV_OK)
diff --git a/src/nvidia/src/kernel/gpu/gsp/message_queue_cpu.c b/src/nvidia/src/kernel/gpu/gsp/message_queue_cpu.c
index e4631130d..08ecb98be 100644
--- a/src/nvidia/src/kernel/gpu/gsp/message_queue_cpu.c
+++ b/src/nvidia/src/kernel/gpu/gsp/message_queue_cpu.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -720,7 +720,7 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
         else
         {
             NV_PRINTF(LEVEL_ERROR, "Read failed after %d retries.\n", nRetries);
-            return nvStatus;
+            goto exit;
         }
     }
 
@@ -758,16 +758,14 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
         nvStatus = NV_ERR_INVALID_PARAM_STRUCT;
     }
 
-    if (nvStatus == NV_OK)
-    {
-        pMQI->rxSeqNum++;
+exit:
+    pMQI->rxSeqNum++;
 
-        nRet = msgqRxMarkConsumed(pMQI->hQueue, nElements);
-        if (nRet < 0)
-        {
-            NV_PRINTF(LEVEL_ERROR, "msgqRxMarkConsumed failed: %d\n", nRet);
-            nvStatus = NV_ERR_GENERIC;
-        }
+    nRet = msgqRxMarkConsumed(pMQI->hQueue, nElements);
+    if (nRet < 0)
+    {
+        NV_PRINTF(LEVEL_ERROR, "msgqRxMarkConsumed failed: %d\n", nRet);
+        nvStatus = NV_ERR_GENERIC;
     }
 
     return nvStatus;
diff --git a/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c b/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c
index b6c79365a..e556aa646 100644
--- a/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c
+++ b/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c
@@ -236,6 +236,11 @@ memdescCreate
 
     allocSize = Size;
 
+    if (allocSize == 0)
+    {
+        return NV_ERR_INVALID_ARGUMENT;
+    }
+
     //
     // this memdesc may have gotten forced to sysmem if no carveout,
     // but for VPR it needs to be in vidmem, so check and re-direct here,
@@ -306,14 +311,7 @@ memdescCreate
     // (4k >> 12 = 1). This modification helps us to avoid overflow of variable
     // allocSize, in case caller of this function passes highest value of NvU64.
     //
-    if (allocSize == 0)
-    {
-        PageCount = 0;
-    }
-    else
-    {
-        PageCount = ((allocSize - 1) >> RM_PAGE_SHIFT) + 1;
-    }
+    PageCount = ((allocSize - 1) >> RM_PAGE_SHIFT) + 1;
 
     if (PhysicallyContiguous)
     {
diff --git a/src/nvidia/src/kernel/gpu/mmu/arch/ampere/kern_gmmu_ga100.c b/src/nvidia/src/kernel/gpu/mmu/arch/ampere/kern_gmmu_ga100.c
index c6ffa26b4..c87328764 100644
--- a/src/nvidia/src/kernel/gpu/mmu/arch/ampere/kern_gmmu_ga100.c
+++ b/src/nvidia/src/kernel/gpu/mmu/arch/ampere/kern_gmmu_ga100.c
@@ -166,7 +166,7 @@ kgmmuSetupWarForBug2720120FmtFamily_GA100
                                          kgmmuGetPTEAperture(pKernelGmmu),
                                          kgmmuGetPTEAttr(pKernelGmmu), 0));
 
-    memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_WAR_PT, 
+    memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_WAR_PT,
                     pKernelGmmu->pWarSmallPageTable);
     NV_ASSERT_OK_OR_GOTO(status, status, failed);
 
@@ -201,7 +201,7 @@ kgmmuSetupWarForBug2720120FmtFamily_GA100
                                                kgmmuGetPTEAperture(pKernelGmmu),
                                                kgmmuGetPTEAttr(pKernelGmmu), 0), failed);
 
-    memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_WAR_PD, 
+    memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_WAR_PD,
                     pKernelGmmu->pWarPageDirectory0);
     NV_ASSERT_OK_OR_GOTO(status, status, failed);
 
@@ -376,30 +376,26 @@ kgmmuServiceMmuFault_GA100
     FIFO_MMU_EXCEPTION_DATA *pMmuExceptionData
 )
 {
+    NV_STATUS status = NV_OK;
+
     MMU_FAULT_BUFFER_ENTRY *pParsedFaultEntry = KERNEL_POINTER_FROM_NvP64(MMU_FAULT_BUFFER_ENTRY *, pParsedFaultInfo);
 
     //  If FLA fault do not reset channel
     if (pParsedFaultEntry->mmuFaultEngineId == NV_PFAULT_MMU_ENG_ID_FLA)
     {
-        if (pKernelGmmu->bReportFlaTranslationXid)
-        {
-            nvErrorLog_va((void *)pGpu,
-                DESTINATION_FLA_TRANSLATION_ERROR,
-                "FLA Fault: inst:0x%x dev:0x%x subdev:0x%x, faulted @ 0x%x_%08x. Fault is of type %s %s",
-                gpuGetInstance(pGpu),
-                gpuGetDeviceInstance(pGpu),
-                pGpu->subdeviceInstance,
-                pMmuExceptionData->addrHi,
-                pMmuExceptionData->addrLo,
-                kgmmuGetFaultTypeString_HAL(pKernelGmmu, pMmuExceptionData->faultType),
-                kfifoGetFaultAccessTypeString_HAL(pGpu, GPU_GET_KERNEL_FIFO(pGpu),
-                    pMmuExceptionData->accessType));
-        }
-
-        return NV_OK;
+        nvErrorLog_va((void *)pGpu,
+            NVLINK_REMOTE_TRANSLATION_ERROR,
+            "NVLink remote translation error: faulted @ 0x%x_%08x. Fault is of type %s %s",
+            pMmuExceptionData->addrHi,
+            pMmuExceptionData->addrLo,
+            kgmmuGetFaultTypeString_HAL(pKernelGmmu, pMmuExceptionData->faultType),
+            kfifoGetFaultAccessTypeString_HAL(pGpu, GPU_GET_KERNEL_FIFO(pGpu),
+                pMmuExceptionData->accessType));
     }
     else
     {
-        return kgmmuServiceMmuFault_GV100(pGpu, pKernelGmmu, pParsedFaultInfo, pMmuExceptionData);
+        status = kgmmuServiceMmuFault_GV100(pGpu, pKernelGmmu, pParsedFaultInfo, pMmuExceptionData);
     }
+
+    return status;
 }
diff --git a/src/nvidia/src/kernel/gpu/rc/kernel_rc_callback.c b/src/nvidia/src/kernel/gpu/rc/kernel_rc_callback.c
index e8211ef2b..a88ca8127 100644
--- a/src/nvidia/src/kernel/gpu/rc/kernel_rc_callback.c
+++ b/src/nvidia/src/kernel/gpu/rc/kernel_rc_callback.c
@@ -61,12 +61,27 @@ _vgpuRcResetCallback
         {
             THREAD_STATE_NODE                             threadState;
             NV506F_CTRL_CMD_RESET_ISOLATED_CHANNEL_PARAMS params = {0};
+            RsClient      *pClient;
+            KernelChannel *pKernelChannel = NULL;
 
             threadStateInitISRAndDeferredIntHandler(
                 &threadState,
                 pRcErrorContext->pGpu,
                 THREAD_STATE_FLAGS_IS_DEFERRED_INT_HANDLER);
 
+            NV_ASSERT_OK_OR_GOTO(
+                status,
+                serverGetClientUnderLock(&g_resServ, hClient, &pClient),
+                error_cleanup);
+            NV_ASSERT_OK_OR_GOTO(
+                status,
+                CliGetKernelChannel(pClient, hChannel, &pKernelChannel),
+                error_cleanup);
+ 
+            NV_ASSERT_OR_ELSE(pKernelChannel != NULL,
+                              status = NV_ERR_INVALID_STATE;
+                              goto error_cleanup);
+
             params.engineID   = pRcErrorContext->EngineId;
             params.exceptType = pRcErrorContext->exceptType;
 
@@ -99,6 +114,11 @@ _vgpuRcResetCallback
     }
 
     return status;
+
+error_cleanup:
+    rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL);
+    osReleaseRmSema(pSys->pSema, NULL);
+    return status;
 }
 
 
diff --git a/src/nvidia/src/kernel/mem_mgr/ctx_buf_pool.c b/src/nvidia/src/kernel/mem_mgr/ctx_buf_pool.c
index c7e95a773..9a824ff3e 100644
--- a/src/nvidia/src/kernel/mem_mgr/ctx_buf_pool.c
+++ b/src/nvidia/src/kernel/mem_mgr/ctx_buf_pool.c
@@ -121,7 +121,7 @@ ctxBufPoolInit
 {
     NV_STATUS status = NV_OK;
     CTX_BUF_POOL_INFO *pCtxBufPool = NULL;
-    NvU32 i, poolConfig;
+    NvU32 i;
 
     NV_ASSERT_OR_RETURN(ppCtxBufPool != NULL, NV_ERR_INVALID_ARGUMENT);
 
@@ -136,35 +136,13 @@ ctxBufPoolInit
 
     //
     // create a mem pool for each page size supported by RM
-    // pool corresponding to RM_ATTR_PAGE_SIZE_DEFAULT remains unused
     //
-    for (i = 0; i < RM_ATTR_PAGE_SIZE_INVALID; i++)
+    for (i = 0; i < POOL_CONFIG_MAX_SUPPORTED; i++)
     {
-        switch (i)
-        {
-            case RM_ATTR_PAGE_SIZE_DEFAULT:
-            case RM_ATTR_PAGE_SIZE_4KB:
-                poolConfig = POOL_CONFIG_CTXBUF_4K;
-                break;
-            case RM_ATTR_PAGE_SIZE_BIG:
-                poolConfig = POOL_CONFIG_CTXBUF_64K;
-                break;
-            case RM_ATTR_PAGE_SIZE_HUGE:
-                poolConfig = POOL_CONFIG_CTXBUF_2M;
-                break;
-            case RM_ATTR_PAGE_SIZE_512MB:
-                poolConfig = POOL_CONFIG_CTXBUF_512M;
-                break;
-            case RM_ATTR_PAGE_SIZE_256GB:
-                poolConfig = POOL_CONFIG_CTXBUF_256G;
-                break;
-            default:
-                NV_PRINTF(LEVEL_ERROR, "Unsupported page size attr %d\n", i);
-                return NV_ERR_INVALID_STATE;
-        }
+        // Pool Config starts from POOL_CONFIG_CTXBUF_256G
         NV_ASSERT_OK_OR_GOTO(status,
             rmMemPoolSetup((void*)&pHeap->pmaObject, &pCtxBufPool->pMemPool[i],
-                           poolConfig),
+                           (POOL_CONFIG_MODE) i),
             cleanup);
 
         // Allocate the pool in CPR in case of Confidential Compute
@@ -211,7 +189,7 @@ ctxBufPoolDestroy
 
     pCtxBufPool = *ppCtxBufPool;
 
-    for (i = 0; i < RM_ATTR_PAGE_SIZE_INVALID; i++)
+    for (i = 0; i < POOL_CONFIG_MAX_SUPPORTED; i++)
     {
         if (pCtxBufPool->pMemPool[i] != NULL)
         {
@@ -224,6 +202,29 @@ ctxBufPoolDestroy
     NV_PRINTF(LEVEL_INFO, "Ctx buf pool destroyed\n");
 }
 
+static NvU32 NV_FORCEINLINE
+ctxBufPoolPageSizeToPoolIndex(NvU64 pageSize)
+{
+    switch (pageSize)
+    {
+        case RM_PAGE_SIZE:
+            return POOL_CONFIG_CTXBUF_4K;
+        case RM_PAGE_SIZE_64K:
+            return POOL_CONFIG_CTXBUF_64K;
+        case RM_PAGE_SIZE_128K:
+            return POOL_CONFIG_CTXBUF_128K;
+        case RM_PAGE_SIZE_HUGE:
+            return POOL_CONFIG_CTXBUF_2M;
+        case RM_PAGE_SIZE_512M:
+            return POOL_CONFIG_CTXBUF_512M;
+        case RM_PAGE_SIZE_256G:
+            return POOL_CONFIG_CTXBUF_256G;
+        default:
+            NV_PRINTF(LEVEL_ERROR, "Unrecognized/unsupported page size = 0x%llx\n", pageSize);
+            NV_ASSERT_OR_RETURN(0, POOL_CONFIG_MAX_SUPPORTED);
+    }
+}
+
 /*
  * @brief Calculates total amount of memory required for all buffers in each pool and reserves the memory
  *
@@ -263,7 +264,7 @@ ctxBufPoolReserve
     NV_STATUS status = NV_OK;
     NvU64 pageSize;
     NvU32 i;
-    NvU64 totalSize[RM_ATTR_PAGE_SIZE_INVALID] = {0};
+    NvU64 totalSize[POOL_CONFIG_MAX_SUPPORTED] = {0};
     NvU64 size;
 
     NV_ASSERT_OR_RETURN(pCtxBufPool != NULL, NV_ERR_INVALID_ARGUMENT);
@@ -282,32 +283,13 @@ ctxBufPoolReserve
         // Determine the pool(4K/64K/2M) from where this buffer will eventually
         // get allocated and mark that pool to reserve this memory.
         //
-        switch(pageSize)
-        {
-            case RM_PAGE_SIZE:
-                totalSize[RM_ATTR_PAGE_SIZE_4KB] += size;
-                break;
-            case RM_PAGE_SIZE_64K:
-            case RM_PAGE_SIZE_128K:
-                totalSize[RM_ATTR_PAGE_SIZE_BIG] += size;
-                break;
-            case RM_PAGE_SIZE_HUGE:
-                totalSize[RM_ATTR_PAGE_SIZE_HUGE] += size;
-                break;
-            case RM_PAGE_SIZE_512M:
-                totalSize[RM_ATTR_PAGE_SIZE_512MB] += size;
-                break;
-            case RM_PAGE_SIZE_256G:
-                totalSize[RM_ATTR_PAGE_SIZE_256GB] += size;
-                break;
-            default:
-                NV_PRINTF(LEVEL_ERROR, "Unrecognized/unsupported page size = 0x%llx\n", pageSize);
-                NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
-        }
+        NvU32 poolIndex = ctxBufPoolPageSizeToPoolIndex(pageSize);
+        NV_ASSERT_OR_RETURN(poolIndex < POOL_CONFIG_MAX_SUPPORTED, NV_ERR_INVALID_ARGUMENT);
+        totalSize[poolIndex] += size;
         NV_PRINTF(LEVEL_INFO, "Reserving 0x%llx bytes for buf Id = 0x%x in pool with page size = 0x%llx\n", size, i, pageSize);
     }
 
-    for (i = 0; i < RM_ATTR_PAGE_SIZE_INVALID; i++)
+    for (i = 0; i < POOL_CONFIG_MAX_SUPPORTED; i++)
     {
         if (totalSize[i] > 0)
         {
@@ -342,7 +324,7 @@ ctxBufPoolTrim
     NvU32 i;
     NV_ASSERT_OR_RETURN(pCtxBufPool != NULL, NV_ERR_INVALID_ARGUMENT);
 
-    for (i = 0; i < RM_ATTR_PAGE_SIZE_INVALID; i++)
+    for (i = 0; i < POOL_CONFIG_MAX_SUPPORTED; i++)
     {
         rmMemPoolTrim(pCtxBufPool->pMemPool[i], 0, 0);
         NV_PRINTF(LEVEL_INFO, "Trimmed pool with RM_ATTR_PAGE_SIZE_* = 0x%x\n", i);
@@ -369,7 +351,7 @@ ctxBufPoolRelease
     NvU32 i;
     NV_ASSERT(pCtxBufPool != NULL);
 
-    for (i = 0; i < RM_ATTR_PAGE_SIZE_INVALID; i++)
+    for (i = 0; i < POOL_CONFIG_MAX_SUPPORTED; i++)
     {
         rmMemPoolRelease(pCtxBufPool->pMemPool[i], 0);
     }
@@ -426,29 +408,10 @@ ctxBufPoolAllocate
         pageSize = newPageSize;
     }
 
-    // Determine the pool(4K/64K/2M) from where this buffer is to be allocated
-    switch(pageSize)
-    {
-        case RM_PAGE_SIZE:
-            pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_4KB];
-            break;
-        case RM_PAGE_SIZE_64K:
-        case RM_PAGE_SIZE_128K:
-            pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_BIG];
-            break;
-        case RM_PAGE_SIZE_HUGE:
-            pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_HUGE];
-            break;
-        case RM_PAGE_SIZE_512M:
-            pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_512MB];
-            break;
-        case RM_PAGE_SIZE_256G:
-            pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_256GB];
-            break;
-        default:
-            NV_PRINTF(LEVEL_ERROR, "Unsupported page size = 0x%llx set for context buffer\n", pageSize);
-            NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
-    }
+    NvU32 poolIndex = ctxBufPoolPageSizeToPoolIndex(pageSize);
+    NV_ASSERT_OR_RETURN(poolIndex < POOL_CONFIG_MAX_SUPPORTED, NV_ERR_INVALID_ARGUMENT);
+    pPool = pCtxBufPool->pMemPool[poolIndex];
+
     NV_ASSERT_OK_OR_RETURN(rmMemPoolAllocate(pPool, (RM_POOL_ALLOC_MEMDESC*)pMemDesc));
     NV_PRINTF(LEVEL_INFO, "Buffer allocated from ctx buf pool with page size = 0x%llx\n", pageSize);
     return NV_OK;
@@ -488,28 +451,9 @@ ctxBufPoolFree
             pMemDesc->Alignment, RM_ATTR_PAGE_SIZE_DEFAULT, NV_TRUE, &size, &pageSize));
     }
 
-    switch(pageSize)
-    {
-        case RM_PAGE_SIZE:
-            pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_4KB];
-            break;
-        case RM_PAGE_SIZE_64K:
-        case RM_PAGE_SIZE_128K:
-            pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_BIG];
-            break;
-        case RM_PAGE_SIZE_HUGE:
-            pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_HUGE];
-            break;
-        case RM_PAGE_SIZE_512M:
-            pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_512MB];
-            break;
-        case RM_PAGE_SIZE_256G:
-            pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_256GB];
-            break;
-        default:
-            NV_PRINTF(LEVEL_ERROR, "Unsupported page size detected for context buffer\n");
-            NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_STATE);
-    }
+    NvU32 poolIndex = ctxBufPoolPageSizeToPoolIndex(pageSize);
+    NV_ASSERT_OR_RETURN(poolIndex < POOL_CONFIG_MAX_SUPPORTED, NV_ERR_INVALID_ARGUMENT);
+    pPool = pCtxBufPool->pMemPool[poolIndex];
 
     // If scrubber is being skipped by PMA we need to manually scrub this memory
     if (rmMemPoolIsScrubSkipped(pPool))
@@ -665,16 +609,19 @@ ctxBufPoolGetSizeAndPageSize
     {
         NvU64 chunkSize = 0;
         NvU32 i;
-        for (i = 0; i < RM_ATTR_PAGE_SIZE_INVALID; i++)
+        //
+        // pools are sorted in descending order of chunk size. So, start from the pool with the smallest chunk size.
+        //
+        for (i = POOL_CONFIG_MAX_SUPPORTED; i; i--)
         {
-            NV_ASSERT_OK_OR_RETURN(rmMemPoolGetChunkAndPageSize(pCtxBufPool->pMemPool[i], &chunkSize, &pageSize));
+            NV_ASSERT_OK_OR_RETURN(rmMemPoolGetChunkAndPageSize(pCtxBufPool->pMemPool[i - 1], &chunkSize, &pageSize));
             if (chunkSize >= size)
             {
                 size = chunkSize;
                 break;
             }
         }
-        if (i == RM_ATTR_PAGE_SIZE_INVALID)
+        if (i == 0)
         {
             NV_PRINTF(LEVEL_ERROR, "couldn't find pool with chunksize >= 0x%llx\n", size);
             DBG_BREAKPOINT();
@@ -722,7 +669,7 @@ ctxBufPoolIsScrubSkipped
 {
     NvU32 i;
     NV_ASSERT_OR_RETURN(pCtxBufPool != NULL, NV_ERR_INVALID_ARGUMENT);
-    for (i = 0; i < RM_ATTR_PAGE_SIZE_INVALID; i++)
+    for (i = 0; i < POOL_CONFIG_MAX_SUPPORTED; i++)
     {
         if (!rmMemPoolIsScrubSkipped(pCtxBufPool->pMemPool[i]))
             return NV_FALSE;
@@ -747,7 +694,7 @@ ctxBufPoolSetScrubSkip
 {
     NvU32 i;
     NV_ASSERT_OR_RETURN_VOID(pCtxBufPool != NULL);
-    for (i = 0; i < RM_ATTR_PAGE_SIZE_INVALID; i++)
+    for (i = 0; i < POOL_CONFIG_MAX_SUPPORTED; i++)
     {
         rmMemPoolSkipScrub(pCtxBufPool->pMemPool[i], bSkipScrub);
     }
diff --git a/src/nvidia/src/kernel/mem_mgr/pool_alloc.c b/src/nvidia/src/kernel/mem_mgr/pool_alloc.c
index abfd1af22..0950cce56 100644
--- a/src/nvidia/src/kernel/mem_mgr/pool_alloc.c
+++ b/src/nvidia/src/kernel/mem_mgr/pool_alloc.c
@@ -1,5 +1,5 @@
  /*
- * SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2016-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -99,8 +99,7 @@ typedef enum
  * This array contains the alloction sizes (in bytes) of each pool.
  */
 static const NvU64 poolAllocSizes[] = {
-    0x4000000000,
-    0x20000000, 0x200000, 0x40000, 0x20000, 0x10000, 0x2000, 0x1000, 0x100
+    0x4000000000, 0x20000000, 0x200000, 0x40000, 0x20000, 0x10000, 0x2000, 0x1000, 0x100
 };
 
 #define POOL_CONFIG_POOL_IDX       0
@@ -112,7 +111,8 @@ static const NvU64 poolConfig[POOL_CONFIG_MAX_SUPPORTED][POOL_CONFIG_CHUNKSIZE_I
      { RM_POOL_IDX_4K,   PMA_CHUNK_SIZE_64K },  // pool with pageSize = 4K for GMMU_FMT_VERSION_2
      { RM_POOL_IDX_256G, PMA_CHUNK_SIZE_256G }, // pool with pageSize = 256G for RM allocated buffers (unused as of blackwell)
      { RM_POOL_IDX_512M, PMA_CHUNK_SIZE_512M }, // pool with pageSize = 512MB for RM allocated buffers (unused as of ampere)
-     { RM_POOL_IDX_2M,   PMA_CHUNK_SIZE_4M },   // pool with pageSize = 2MB for RM allocated buffers
+     { RM_POOL_IDX_2M,   PMA_CHUNK_SIZE_4M },   // pool with pageSize = 4MB for RM allocated buffers
+     { RM_POOL_IDX_128K, PMA_CHUNK_SIZE_2M},    // pool with pageSize = 2MB for RM allocated buffers
      { RM_POOL_IDX_64K,  PMA_CHUNK_SIZE_256K }, // pool with pageSize = 64K for RM allocated buffers
      { RM_POOL_IDX_4K,   PMA_CHUNK_SIZE_64K }   // pool with pageSize = 4K for RM allocated buffers
 };
diff --git a/src/nvidia/src/kernel/mem_mgr/standard_mem.c b/src/nvidia/src/kernel/mem_mgr/standard_mem.c
index 4ef0535ee..f6c9b1ffe 100644
--- a/src/nvidia/src/kernel/mem_mgr/standard_mem.c
+++ b/src/nvidia/src/kernel/mem_mgr/standard_mem.c
@@ -57,6 +57,11 @@ NV_STATUS stdmemValidateParams
         return NV_ERR_INVALID_ARGUMENT;
     }
 
+    if (pAllocData->size == 0)
+    {
+        return NV_ERR_INVALID_ARGUMENT;
+    }
+
     //
     // These flags don't do anything in this path. No mapping on alloc and
     // kernel map is controlled by TYPE
diff --git a/src/nvidia/src/kernel/rmapi/nv_gpu_ops.c b/src/nvidia/src/kernel/rmapi/nv_gpu_ops.c
index aec294151..5032c849e 100644
--- a/src/nvidia/src/kernel/rmapi/nv_gpu_ops.c
+++ b/src/nvidia/src/kernel/rmapi/nv_gpu_ops.c
@@ -7616,7 +7616,7 @@ static NV_STATUS dupMemory(struct gpuDevice *device,
 {
     NV_STATUS status = NV_OK;
     nvGpuOpsLockSet acquiredLocks;
-    THREAD_STATE_NODE threadState;
+    THREAD_STATE_NODE *pThreadState;
     NvHandle  dupedMemHandle;
     Memory *pMemory =  NULL;
     PMEMORY_DESCRIPTOR pMemDesc = NULL;
@@ -7637,14 +7637,15 @@ static NV_STATUS dupMemory(struct gpuDevice *device,
 
     NV_ASSERT((flags == NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE) || (flags == NV04_DUP_HANDLE_FLAGS_NONE));
 
-    threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
-
+    pThreadState = threadStateAlloc(THREAD_STATE_FLAGS_NONE);
+    if (!pThreadState)
+        return NV_ERR_NO_MEMORY;
     // RS-TODO use dual client locking
     status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_NONE, device->session->handle,
         &pSessionClient, &acquiredLocks);
     if (status != NV_OK)
     {
-        threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
+        threadStateFree(pThreadState, THREAD_STATE_FLAGS_NONE);
         return status;
     }
 
@@ -7686,10 +7687,18 @@ static NV_STATUS dupMemory(struct gpuDevice *device,
     }
 
     // For SYSMEM or indirect peer mappings
-    bIsIndirectPeer = gpumgrCheckIndirectPeer(pMappingGpu, pAdjustedMemDesc->pGpu);
+    // Deviceless memory (NV01_MEMORY_DEVICELESS) can have a NULL pGpu. Perform targeted
+    // null checks before IOMMU operations that require valid GPU contexts.
+    bIsIndirectPeer = (pAdjustedMemDesc->pGpu != NULL) ?
+                       gpumgrCheckIndirectPeer(pMappingGpu, pAdjustedMemDesc->pGpu) : NV_FALSE;
     if (bIsIndirectPeer ||
         memdescRequiresIommuMapping(pAdjustedMemDesc))
     {
+        if (NV_UNLIKELY(pAdjustedMemDesc->pGpu == NULL))
+        {
+            status = NV_ERR_INVALID_STATE;
+            goto freeGpaMemdesc;
+        }
         // For sysmem allocations, the dup done below is very shallow and in
         // particular doesn't create IOMMU mappings required for the mapped GPU
         // to access the memory. That's a problem if the mapped GPU is different
@@ -7778,7 +7787,7 @@ freeGpaMemdesc:
 
 done:
     _nvGpuOpsLocksRelease(&acquiredLocks);
-    threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
+    threadStateFree(pThreadState, THREAD_STATE_FLAGS_NONE);
     return status;
 }
 
diff --git a/src/nvidia/src/kernel/vgpu/rpc.c b/src/nvidia/src/kernel/vgpu/rpc.c
index 9b9e075bd..52a13f6c9 100644
--- a/src/nvidia/src/kernel/vgpu/rpc.c
+++ b/src/nvidia/src/kernel/vgpu/rpc.c
@@ -1856,7 +1856,7 @@ static NV_STATUS _issueRpcLarge
     // Set the correct length for this queue entry.
     vgpu_rpc_message_header_v->length = entryLength;
 
-    nvStatus = rpcSendMessage(pGpu, pRpc, &firstSequence);
+    nvStatus = rpcSendMessage(pGpu, pRpc, &lastSequence);
     if (nvStatus != NV_OK)
     {
         NV_PRINTF(LEVEL_ERROR, "rpcSendMessage failed with status 0x%08x for fn %d!\n",
diff --git a/version.mk b/version.mk
index 3af3247a1..4357c3248 100644
--- a/version.mk
+++ b/version.mk
@@ -1,4 +1,4 @@
-NVIDIA_VERSION = 570.190
+NVIDIA_VERSION = 570.195.03
 
 # This file.
 VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))