mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-27 03:29:47 +00:00
570.195.03
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 570.190.
|
||||
version 570.195.03.
|
||||
|
||||
|
||||
## How to Build
|
||||
@@ -17,7 +17,7 @@ as root:
|
||||
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
570.190 driver release. This can be achieved by installing
|
||||
570.195.03 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@@ -185,7 +185,7 @@ table below).
|
||||
For details on feature support and limitations, see the NVIDIA GPU driver
|
||||
end user README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/570.190/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/570.195.03/README/kernel_open.html
|
||||
|
||||
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
|
||||
Package for more details.
|
||||
|
||||
@@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc
|
||||
ccflags-y += -I$(src)
|
||||
ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
|
||||
ccflags-y += -DNV_VERSION_STRING=\"570.190\"
|
||||
ccflags-y += -DNV_VERSION_STRING=\"570.195.03\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
ccflags-y += -I$(SYSSRCHOST1X)
|
||||
|
||||
@@ -2423,6 +2423,12 @@ nvidia_ioctl(
|
||||
{
|
||||
nv_ioctl_wait_open_complete_t *params = arg_copy;
|
||||
|
||||
if (arg_size != sizeof(nv_ioctl_wait_open_complete_t))
|
||||
{
|
||||
status = -EINVAL;
|
||||
goto done_early;
|
||||
}
|
||||
|
||||
params->rc = nvlfp->open_rc;
|
||||
params->adapterStatus = nvlfp->adapter_status;
|
||||
goto done_early;
|
||||
@@ -2503,8 +2509,12 @@ nvidia_ioctl(
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* atomically check and alloc attached_gpus */
|
||||
down(&nvl->ldata_lock);
|
||||
|
||||
if (nvlfp->num_attached_gpus != 0)
|
||||
{
|
||||
up(&nvl->ldata_lock);
|
||||
status = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
@@ -2512,12 +2522,15 @@ nvidia_ioctl(
|
||||
NV_KMALLOC(nvlfp->attached_gpus, arg_size);
|
||||
if (nvlfp->attached_gpus == NULL)
|
||||
{
|
||||
up(&nvl->ldata_lock);
|
||||
status = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
memcpy(nvlfp->attached_gpus, arg_copy, arg_size);
|
||||
nvlfp->num_attached_gpus = num_arg_gpus;
|
||||
|
||||
up(&nvl->ldata_lock);
|
||||
|
||||
for (i = 0; i < nvlfp->num_attached_gpus; i++)
|
||||
{
|
||||
if (nvlfp->attached_gpus[i] == 0)
|
||||
@@ -2533,9 +2546,14 @@ nvidia_ioctl(
|
||||
nvidia_dev_put(nvlfp->attached_gpus[i], sp);
|
||||
}
|
||||
|
||||
/* atomically free attached_gpus */
|
||||
down(&nvl->ldata_lock);
|
||||
|
||||
NV_KFREE(nvlfp->attached_gpus, arg_size);
|
||||
nvlfp->num_attached_gpus = 0;
|
||||
|
||||
up(&nvl->ldata_lock);
|
||||
|
||||
status = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -36,25 +36,25 @@
|
||||
// and then checked back in. You cannot make changes to these sections without
|
||||
// corresponding changes to the buildmeister script
|
||||
#ifndef NV_BUILD_BRANCH
|
||||
#define NV_BUILD_BRANCH r570_00
|
||||
#define NV_BUILD_BRANCH r573_76
|
||||
#endif
|
||||
#ifndef NV_PUBLIC_BRANCH
|
||||
#define NV_PUBLIC_BRANCH r570_00
|
||||
#define NV_PUBLIC_BRANCH r573_76
|
||||
#endif
|
||||
|
||||
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
|
||||
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r570/r570_00-575"
|
||||
#define NV_BUILD_CHANGELIST_NUM (36467544)
|
||||
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r570/r573_76-590"
|
||||
#define NV_BUILD_CHANGELIST_NUM (36569223)
|
||||
#define NV_BUILD_TYPE "Official"
|
||||
#define NV_BUILD_NAME "rel/gpu_drv/r570/r570_00-575"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36467544)
|
||||
#define NV_BUILD_NAME "rel/gpu_drv/r570/r573_76-590"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36569223)
|
||||
|
||||
#else /* Windows builds */
|
||||
#define NV_BUILD_BRANCH_VERSION "r570_00-569"
|
||||
#define NV_BUILD_CHANGELIST_NUM (36467544)
|
||||
#define NV_BUILD_TYPE "Official"
|
||||
#define NV_BUILD_NAME "573.73"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36467544)
|
||||
#define NV_BUILD_BRANCH_VERSION "r573_76-1"
|
||||
#define NV_BUILD_CHANGELIST_NUM (36518415)
|
||||
#define NV_BUILD_TYPE "Nightly"
|
||||
#define NV_BUILD_NAME "r573_76-250909"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36506718)
|
||||
#define NV_BUILD_BRANCH_BASE_VERSION R570
|
||||
#endif
|
||||
// End buildmeister python edited section
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
|
||||
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
|
||||
|
||||
#define NV_VERSION_STRING "570.190"
|
||||
#define NV_VERSION_STRING "570.195.03"
|
||||
|
||||
#else
|
||||
|
||||
|
||||
@@ -109,7 +109,7 @@
|
||||
#define ROBUST_CHANNEL_NVJPG5_ERROR (103)
|
||||
#define ROBUST_CHANNEL_NVJPG6_ERROR (104)
|
||||
#define ROBUST_CHANNEL_NVJPG7_ERROR (105)
|
||||
#define DESTINATION_FLA_TRANSLATION_ERROR (108)
|
||||
#define NVLINK_REMOTE_TRANSLATION_ERROR (108)
|
||||
#define SEC_FAULT_ERROR (110)
|
||||
#define GSP_RPC_TIMEOUT (119)
|
||||
#define GSP_ERROR (120)
|
||||
@@ -129,7 +129,7 @@
|
||||
#define ROBUST_CHANNEL_CE18_ERROR (134)
|
||||
#define ROBUST_CHANNEL_CE19_ERROR (135)
|
||||
#define ALI_TRAINING_FAIL (136)
|
||||
#define NVLINK_FLA_PRIV_ERR (137)
|
||||
#define NVLINK_PRIV_ERR (137)
|
||||
#define ROBUST_CHANNEL_DLA_ERROR (138)
|
||||
#define ROBUST_CHANNEL_OFA1_ERROR (139)
|
||||
#define UNRECOVERABLE_ECC_ERROR_ESCAPE (140)
|
||||
|
||||
@@ -500,7 +500,6 @@ struct KernelGmmu {
|
||||
NvBool PDB_PROP_KGMMU_REDUCE_NR_FAULT_BUFFER_SIZE;
|
||||
|
||||
// Data members
|
||||
NvBool bReportFlaTranslationXid;
|
||||
MEMORY_DESCRIPTOR *pFakeSparseBuffer;
|
||||
NvU64 fakeSparseEntry[3];
|
||||
NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo;
|
||||
@@ -636,7 +635,6 @@ struct KernelGmmu_PRIVATE {
|
||||
NvBool PDB_PROP_KGMMU_REDUCE_NR_FAULT_BUFFER_SIZE;
|
||||
|
||||
// Data members
|
||||
NvBool bReportFlaTranslationXid;
|
||||
MEMORY_DESCRIPTOR *pFakeSparseBuffer;
|
||||
NvU64 fakeSparseEntry[3];
|
||||
NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo;
|
||||
|
||||
@@ -76,6 +76,7 @@ struct THREAD_STATE_NODE
|
||||
*/
|
||||
NvU32 threadSeqId;
|
||||
NvBool bValid;
|
||||
NvBool bUsingHeap;
|
||||
THREAD_TIMEOUT_STATE timeout;
|
||||
NvU32 cpuNum;
|
||||
NvU32 flags;
|
||||
@@ -208,6 +209,7 @@ void threadStateOnlyProcessWorkISRAndDeferredIntHandler(THREAD_STATE_NODE
|
||||
void threadStateOnlyFreeISRAndDeferredIntHandler(THREAD_STATE_NODE *, OBJGPU*, NvU32);
|
||||
void threadStateFreeISRAndDeferredIntHandler(THREAD_STATE_NODE *, OBJGPU*, NvU32);
|
||||
void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags);
|
||||
THREAD_STATE_NODE* threadStateAlloc(NvU32 flags);
|
||||
void threadStateFree(THREAD_STATE_NODE *pThreadNode, NvU32 flags);
|
||||
|
||||
NV_STATUS threadStateGetCurrent(THREAD_STATE_NODE **ppThreadNode, OBJGPU *pGpu);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2004-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2004-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -50,8 +50,8 @@ TYPEDEF_BITVECTOR(MC_ENGINE_BITVECTOR);
|
||||
#include "g_rpc_hal.h" // For RPC_HAL_IFACES
|
||||
#include "g_rpc_odb.h" // For RPC_HAL_IFACES
|
||||
|
||||
#define RPC_TIMEOUT_LIMIT_PRINT_RATE_THRESH 3 // rate limit after 3 prints
|
||||
#define RPC_TIMEOUT_LIMIT_PRINT_RATE_SKIP 29 // skip 29 of 30 prints
|
||||
#define RPC_TIMEOUT_GPU_RESET_THRESHOLD 3 // Reset GPU after 3 back to back GSP RPC timeout
|
||||
#define RPC_TIMEOUT_PRINT_RATE_SKIP 29 // skip 29 of 30 prints
|
||||
|
||||
#define RPC_HISTORY_DEPTH 128
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -44,10 +44,9 @@ struct CTX_BUF_POOL_INFO
|
||||
{
|
||||
//
|
||||
// Each array index corresponds to a pointer to memory pool with
|
||||
// page size corresponding to RM_ATTR_PAGE_SIZE_*
|
||||
// Pool corresponding to RM_ATTR_PAGE_SIZE_DEFAULT will be left unused
|
||||
// page size corresponding to POOL_CONFIG_MODE
|
||||
//
|
||||
RM_POOL_ALLOC_MEM_RESERVE_INFO *pMemPool[RM_ATTR_PAGE_SIZE_INVALID];
|
||||
RM_POOL_ALLOC_MEM_RESERVE_INFO *pMemPool[POOL_CONFIG_MAX_SUPPORTED];
|
||||
};
|
||||
|
||||
// List of all context buffers supported by memory pools
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -51,6 +51,7 @@ typedef enum
|
||||
POOL_CONFIG_CTXBUF_256G, // configure pool for RM internal allocations like ctx buffers with 256GB page size
|
||||
POOL_CONFIG_CTXBUF_512M, // configure pool for RM internal allocations like ctx buffers with 512MB page size
|
||||
POOL_CONFIG_CTXBUF_2M, // configure pool for RM internal allocations like ctx buffers with 2MB page size
|
||||
POOL_CONFIG_CTXBUF_128K, // configure pool for RM internal allocations like ctx buffers with 128KB page size
|
||||
POOL_CONFIG_CTXBUF_64K, // configure pool for RM internal allocations like ctx buffers with 64KB page size
|
||||
POOL_CONFIG_CTXBUF_4K, // configure pool for RM internal allocations like ctx buffers with 4KB page size
|
||||
POOL_CONFIG_MAX_SUPPORTED
|
||||
|
||||
@@ -508,27 +508,21 @@ static void _threadStateLogInitCaller(THREAD_STATE_NODE *pThreadNode, NvU64 func
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Initialize a threadState for regular threads (non-interrupt context)
|
||||
*
|
||||
* @param[in/out] pThreadNode
|
||||
* @param[in] flags
|
||||
*
|
||||
* @brief Common initialization logic for both stack and heap thread state nodes
|
||||
*
|
||||
* @param[in/out] pThreadNode The node to initialize
|
||||
* @param[in] flags Thread state flags
|
||||
* @param[in] bUsingHeap NV_TRUE if heap-allocated, NV_FALSE if stack-allocated
|
||||
*
|
||||
* @return NV_OK on success, error code on failure
|
||||
*/
|
||||
void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags)
|
||||
static NV_STATUS _threadStateInitCommon(THREAD_STATE_NODE *pThreadNode, NvU32 flags, NvBool bUsingHeap)
|
||||
{
|
||||
NV_STATUS rmStatus;
|
||||
NvU64 funcAddr;
|
||||
|
||||
// Isrs should be using threadStateIsrInit().
|
||||
NV_ASSERT((flags & (THREAD_STATE_FLAGS_IS_ISR_LOCKLESS |
|
||||
THREAD_STATE_FLAGS_IS_ISR |
|
||||
THREAD_STATE_FLAGS_DEFERRED_INT_HANDLER_RUNNING)) == 0);
|
||||
|
||||
// Check to see if ThreadState is enabled
|
||||
if (!(threadStateDatabase.setupFlags & THREAD_STATE_SETUP_FLAGS_ENABLED))
|
||||
return;
|
||||
|
||||
portMemSet(pThreadNode, 0, sizeof(*pThreadNode));
|
||||
pThreadNode->bUsingHeap = bUsingHeap;
|
||||
pThreadNode->threadSeqId = portAtomicIncrementU32(&threadStateDatabase.threadSeqCntr);
|
||||
pThreadNode->cpuNum = osGetCurrentProcessorNumber();
|
||||
pThreadNode->flags = flags;
|
||||
@@ -546,9 +540,10 @@ void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags)
|
||||
|
||||
rmStatus = osGetCurrentThread(&pThreadNode->threadId);
|
||||
if (rmStatus != NV_OK)
|
||||
return;
|
||||
return rmStatus;
|
||||
|
||||
NV_ASSERT_OR_RETURN_VOID(pThreadNode->cpuNum < threadStateDatabase.maxCPUs);
|
||||
NV_ASSERT_OR_RETURN(pThreadNode->cpuNum < threadStateDatabase.maxCPUs,
|
||||
NV_ERR_INVALID_STATE);
|
||||
|
||||
funcAddr = (NvU64) (NV_RETURN_ADDRESS());
|
||||
|
||||
@@ -558,27 +553,23 @@ void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags)
|
||||
// Reset the threadId as insertion failed. bValid is already NV_FALSE
|
||||
pThreadNode->threadId = 0;
|
||||
portSyncSpinlockRelease(threadStateDatabase.spinlock);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
pThreadNode->bValid = NV_TRUE;
|
||||
rmStatus = NV_OK;
|
||||
return NV_ERR_GENERIC;
|
||||
}
|
||||
|
||||
pThreadNode->bValid = NV_TRUE;
|
||||
_threadStateLogInitCaller(pThreadNode, funcAddr);
|
||||
|
||||
portSyncSpinlockRelease(threadStateDatabase.spinlock);
|
||||
|
||||
_threadStatePrintInfo(pThreadNode);
|
||||
|
||||
NV_ASSERT(rmStatus == NV_OK);
|
||||
threadPriorityStateAlloc();
|
||||
|
||||
if (TLS_MIRROR_THREADSTATE)
|
||||
{
|
||||
THREAD_STATE_NODE **pTls = (THREAD_STATE_NODE **)tlsEntryAcquire(TLS_ENTRY_ID_THREADSTATE);
|
||||
NV_ASSERT_OR_RETURN_VOID(pTls != NULL);
|
||||
NV_ASSERT_OR_RETURN(pTls != NULL, NV_ERR_INVALID_STATE);
|
||||
|
||||
if (*pTls != NULL)
|
||||
{
|
||||
NV_PRINTF(LEVEL_WARNING,
|
||||
@@ -587,6 +578,66 @@ void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags)
|
||||
}
|
||||
*pTls = pThreadNode;
|
||||
}
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Initialize a threadState for regular threads (non-interrupt context)
|
||||
* Use the new UAF-safe API for new code, threadStateAlloc().
|
||||
* @param[in/out] pThreadNode
|
||||
* @param[in] flags
|
||||
*
|
||||
*/
|
||||
void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags)
|
||||
{
|
||||
// Isrs should be using threadStateIsrInit().
|
||||
NV_ASSERT_OR_RETURN_VOID((flags & (THREAD_STATE_FLAGS_IS_ISR_LOCKLESS |
|
||||
THREAD_STATE_FLAGS_IS_ISR |
|
||||
THREAD_STATE_FLAGS_DEFERRED_INT_HANDLER_RUNNING)) == 0);
|
||||
|
||||
// Check to see if ThreadState is enabled
|
||||
if (!(threadStateDatabase.setupFlags & THREAD_STATE_SETUP_FLAGS_ENABLED))
|
||||
return;
|
||||
|
||||
// Use common initialization logic (stack-allocated)
|
||||
// Note: Legacy void API ignores errors for backward compatibility
|
||||
_threadStateInitCommon(pThreadNode, flags, NV_FALSE);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Allocate a heap-based threadState
|
||||
* @param[in] flags Thread state flags
|
||||
*
|
||||
* @return Heap-allocated THREAD_STATE_NODE* on success, NULL on failure
|
||||
*/
|
||||
THREAD_STATE_NODE* threadStateAlloc(NvU32 flags)
|
||||
{
|
||||
THREAD_STATE_NODE *pHeapNode;
|
||||
NV_STATUS rmStatus;
|
||||
|
||||
// Isrs should be using threadStateIsrInit().
|
||||
NV_ASSERT_OR_RETURN((flags & (THREAD_STATE_FLAGS_IS_ISR_LOCKLESS |
|
||||
THREAD_STATE_FLAGS_IS_ISR |
|
||||
THREAD_STATE_FLAGS_DEFERRED_INT_HANDLER_RUNNING)) == 0, NULL);
|
||||
|
||||
// Check to see if ThreadState is enabled
|
||||
if (!(threadStateDatabase.setupFlags & THREAD_STATE_SETUP_FLAGS_ENABLED))
|
||||
return NULL;
|
||||
|
||||
// Allocate heap node directly
|
||||
pHeapNode = portMemAllocNonPaged(sizeof(THREAD_STATE_NODE));
|
||||
if (pHeapNode == NULL)
|
||||
return NULL;
|
||||
|
||||
rmStatus = _threadStateInitCommon(pHeapNode, flags, NV_TRUE);
|
||||
if (rmStatus != NV_OK)
|
||||
goto cleanup_heap;
|
||||
|
||||
return pHeapNode;
|
||||
|
||||
cleanup_heap:
|
||||
portMemFree(pHeapNode);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -870,6 +921,12 @@ void threadStateFree(THREAD_STATE_NODE *pThreadNode, NvU32 flags)
|
||||
r);
|
||||
}
|
||||
}
|
||||
|
||||
// Free heap memory if this node was heap-allocated
|
||||
if (pThreadNode->bUsingHeap)
|
||||
{
|
||||
portMemFree(pThreadNode);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -3117,10 +3117,12 @@ kchannelCtrlCmdResetIsolatedChannel_IMPL
|
||||
OBJGPU *pGpu = GPU_RES_GET_GPU(pKernelChannel);
|
||||
RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
|
||||
|
||||
|
||||
// This ctrl sets bIsRcPending in the KernelChannel object. Because Kernel-RM is
|
||||
// the source of truth on this, it's important that this ctrl is called from CPU-RM
|
||||
NV_ASSERT_OR_RETURN(!RMCFG_FEATURE_PLATFORM_GSP, NV_ERR_INVALID_OPERATION);
|
||||
|
||||
// In case of vGPU this Rmctrl gets called in GSP-RM only,
|
||||
// this RmCtrl is issued from guest kernel RM and then called by the GSP plugin directly to GSP RM
|
||||
// Since bIsRcPending is handled in guest, so we need to allow the call in GSP RM.
|
||||
|
||||
// Call internal RMCTRL on physical-RM, kchannelFwdToInternalCtrl() is not
|
||||
// used because no conversion from KernelChannel to Channel is required
|
||||
|
||||
@@ -245,11 +245,14 @@ kfspCanSendPacket_GH100
|
||||
{
|
||||
NvU32 cmdqHead;
|
||||
NvU32 cmdqTail;
|
||||
NvU32 msgqHead;
|
||||
NvU32 msgqTail;
|
||||
|
||||
_kfspGetQueueHeadTail_GH100(pGpu, pKernelFsp, &cmdqHead, &cmdqTail);
|
||||
_kfspGetMsgQueueHeadTail_GH100(pGpu, pKernelFsp, &msgqHead, &msgqTail);
|
||||
|
||||
// FSP will set QUEUE_HEAD = TAIL after each packet is received
|
||||
return (cmdqHead == cmdqTail);
|
||||
return (cmdqHead == cmdqTail) && (msgqHead == msgqTail);
|
||||
}
|
||||
|
||||
/*!
|
||||
|
||||
@@ -269,6 +269,27 @@ kfspStateDestroy_IMPL
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* @brief GpuWaitConditionFunc for FSP ready
|
||||
*
|
||||
* @param[in] pGpu GPU object pointer
|
||||
* @param[in] pCondData KernelFsp object pointer
|
||||
*
|
||||
* @returns NvBool NV_TRUE if command and message fsp
|
||||
* queues are empty
|
||||
*/
|
||||
static NvBool
|
||||
_kfspWaitForCanSend
|
||||
(
|
||||
OBJGPU *pGpu,
|
||||
void *pCondData
|
||||
)
|
||||
{
|
||||
KernelFsp *pKernelFsp = (KernelFsp*) pCondData;
|
||||
|
||||
return kfspCanSendPacket_HAL(pGpu, pKernelFsp);
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief Wait until RM can send to FSP
|
||||
*
|
||||
@@ -290,40 +311,11 @@ kfspPollForCanSend_IMPL
|
||||
gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout,
|
||||
GPU_TIMEOUT_FLAGS_OSTIMER);
|
||||
|
||||
while (!kfspCanSendPacket_HAL(pGpu, pKernelFsp))
|
||||
status = gpuTimeoutCondWait(pGpu, _kfspWaitForCanSend, pKernelFsp, &timeout);
|
||||
if (status != NV_OK)
|
||||
{
|
||||
//
|
||||
// For now we assume that any response from FSP before RM message
|
||||
// send is complete indicates an error and we should abort.
|
||||
//
|
||||
// Ongoing dicussion on usefullness of this check. Bug to be filed.
|
||||
//
|
||||
if (kfspIsResponseAvailable_HAL(pGpu, pKernelFsp))
|
||||
{
|
||||
kfspReadMessage(pGpu, pKernelFsp, NULL, 0);
|
||||
NV_PRINTF(LEVEL_ERROR,
|
||||
"Received error message from FSP while waiting to send.\n");
|
||||
status = NV_ERR_GENERIC;
|
||||
break;
|
||||
}
|
||||
|
||||
osSpinLoop();
|
||||
|
||||
status = gpuCheckTimeout(pGpu, &timeout);
|
||||
if (status != NV_OK)
|
||||
{
|
||||
if ((status == NV_ERR_TIMEOUT) &&
|
||||
kfspCanSendPacket_HAL(pGpu, pKernelFsp))
|
||||
{
|
||||
status = NV_OK;
|
||||
}
|
||||
else
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR,
|
||||
"Timed out waiting for FSP command queue to be empty.\n");
|
||||
}
|
||||
break;
|
||||
}
|
||||
NV_PRINTF(LEVEL_ERROR,
|
||||
"Timed out waiting for FSP queues to be empty.\n");
|
||||
}
|
||||
|
||||
return status;
|
||||
|
||||
@@ -2061,8 +2061,8 @@ _kgspRpcIncrementTimeoutCountAndRateLimitPrints
|
||||
{
|
||||
pRpc->timeoutCount++;
|
||||
|
||||
if ((pRpc->timeoutCount == (RPC_TIMEOUT_LIMIT_PRINT_RATE_THRESH + 1)) &&
|
||||
(RPC_TIMEOUT_LIMIT_PRINT_RATE_SKIP > 0))
|
||||
if ((pRpc->timeoutCount == (RPC_TIMEOUT_GPU_RESET_THRESHOLD + 1)) &&
|
||||
(RPC_TIMEOUT_PRINT_RATE_SKIP > 0))
|
||||
{
|
||||
// make sure we warn Xid and NV_PRINTF/NVLOG consumers that we are rate limiting prints
|
||||
if (GPU_GET_KERNEL_RC(pGpu)->bLogEvents)
|
||||
@@ -2072,15 +2072,15 @@ _kgspRpcIncrementTimeoutCountAndRateLimitPrints
|
||||
gpuGetDomain(pGpu),
|
||||
gpuGetBus(pGpu),
|
||||
gpuGetDevice(pGpu),
|
||||
RPC_TIMEOUT_LIMIT_PRINT_RATE_SKIP + 1);
|
||||
RPC_TIMEOUT_PRINT_RATE_SKIP + 1);
|
||||
}
|
||||
NV_PRINTF(LEVEL_WARNING,
|
||||
"Rate limiting GSP RPC error prints (printing 1 of every %d)\n",
|
||||
RPC_TIMEOUT_LIMIT_PRINT_RATE_SKIP + 1);
|
||||
RPC_TIMEOUT_PRINT_RATE_SKIP + 1);
|
||||
}
|
||||
|
||||
pRpc->bQuietPrints = ((pRpc->timeoutCount > RPC_TIMEOUT_LIMIT_PRINT_RATE_THRESH) &&
|
||||
((pRpc->timeoutCount % (RPC_TIMEOUT_LIMIT_PRINT_RATE_SKIP + 1)) != 0));
|
||||
pRpc->bQuietPrints = ((pRpc->timeoutCount > RPC_TIMEOUT_GPU_RESET_THRESHOLD) &&
|
||||
((pRpc->timeoutCount % (RPC_TIMEOUT_PRINT_RATE_SKIP + 1)) != 0));
|
||||
}
|
||||
|
||||
/*!
|
||||
@@ -2228,6 +2228,22 @@ _kgspRpcRecvPoll
|
||||
_kgspLogXid119(pGpu, pRpc, expectedFunc, expectedSequence);
|
||||
}
|
||||
|
||||
// Detect for 3 back to back GSP RPC timeout
|
||||
if (pRpc->timeoutCount == RPC_TIMEOUT_GPU_RESET_THRESHOLD)
|
||||
{
|
||||
// GSP is completely stalled and cannot be recovered. Mark the GPU for reset.
|
||||
NV_ASSERT_FAILED("Back to back GSP RPC timeout detected! GPU marked for reset");
|
||||
gpuMarkDeviceForReset(pGpu);
|
||||
pKernelGsp->bFatalError = NV_TRUE;
|
||||
|
||||
// For Windows, if TDR is supported, trigger TDR to recover the system.
|
||||
if (pGpu->getProperty(pGpu, PDB_PROP_GPU_SUPPORTS_TDR_EVENT))
|
||||
{
|
||||
NV_ASSERT_FAILED("Triggering TDR to recover from GSP hang");
|
||||
gpuNotifySubDeviceEvent(pGpu, NV2080_NOTIFIERS_UCODE_RESET, NULL, 0, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
goto done;
|
||||
}
|
||||
else if (timeoutStatus != NV_OK)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -720,7 +720,7 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
|
||||
else
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "Read failed after %d retries.\n", nRetries);
|
||||
return nvStatus;
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -758,16 +758,14 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
|
||||
nvStatus = NV_ERR_INVALID_PARAM_STRUCT;
|
||||
}
|
||||
|
||||
if (nvStatus == NV_OK)
|
||||
{
|
||||
pMQI->rxSeqNum++;
|
||||
exit:
|
||||
pMQI->rxSeqNum++;
|
||||
|
||||
nRet = msgqRxMarkConsumed(pMQI->hQueue, nElements);
|
||||
if (nRet < 0)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "msgqRxMarkConsumed failed: %d\n", nRet);
|
||||
nvStatus = NV_ERR_GENERIC;
|
||||
}
|
||||
nRet = msgqRxMarkConsumed(pMQI->hQueue, nElements);
|
||||
if (nRet < 0)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "msgqRxMarkConsumed failed: %d\n", nRet);
|
||||
nvStatus = NV_ERR_GENERIC;
|
||||
}
|
||||
|
||||
return nvStatus;
|
||||
|
||||
@@ -236,6 +236,11 @@ memdescCreate
|
||||
|
||||
allocSize = Size;
|
||||
|
||||
if (allocSize == 0)
|
||||
{
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
//
|
||||
// this memdesc may have gotten forced to sysmem if no carveout,
|
||||
// but for VPR it needs to be in vidmem, so check and re-direct here,
|
||||
@@ -306,14 +311,7 @@ memdescCreate
|
||||
// (4k >> 12 = 1). This modification helps us to avoid overflow of variable
|
||||
// allocSize, in case caller of this function passes highest value of NvU64.
|
||||
//
|
||||
if (allocSize == 0)
|
||||
{
|
||||
PageCount = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
PageCount = ((allocSize - 1) >> RM_PAGE_SHIFT) + 1;
|
||||
}
|
||||
PageCount = ((allocSize - 1) >> RM_PAGE_SHIFT) + 1;
|
||||
|
||||
if (PhysicallyContiguous)
|
||||
{
|
||||
|
||||
@@ -166,7 +166,7 @@ kgmmuSetupWarForBug2720120FmtFamily_GA100
|
||||
kgmmuGetPTEAperture(pKernelGmmu),
|
||||
kgmmuGetPTEAttr(pKernelGmmu), 0));
|
||||
|
||||
memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_WAR_PT,
|
||||
memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_WAR_PT,
|
||||
pKernelGmmu->pWarSmallPageTable);
|
||||
NV_ASSERT_OK_OR_GOTO(status, status, failed);
|
||||
|
||||
@@ -201,7 +201,7 @@ kgmmuSetupWarForBug2720120FmtFamily_GA100
|
||||
kgmmuGetPTEAperture(pKernelGmmu),
|
||||
kgmmuGetPTEAttr(pKernelGmmu), 0), failed);
|
||||
|
||||
memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_WAR_PD,
|
||||
memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_WAR_PD,
|
||||
pKernelGmmu->pWarPageDirectory0);
|
||||
NV_ASSERT_OK_OR_GOTO(status, status, failed);
|
||||
|
||||
@@ -376,30 +376,26 @@ kgmmuServiceMmuFault_GA100
|
||||
FIFO_MMU_EXCEPTION_DATA *pMmuExceptionData
|
||||
)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
MMU_FAULT_BUFFER_ENTRY *pParsedFaultEntry = KERNEL_POINTER_FROM_NvP64(MMU_FAULT_BUFFER_ENTRY *, pParsedFaultInfo);
|
||||
|
||||
// If FLA fault do not reset channel
|
||||
if (pParsedFaultEntry->mmuFaultEngineId == NV_PFAULT_MMU_ENG_ID_FLA)
|
||||
{
|
||||
if (pKernelGmmu->bReportFlaTranslationXid)
|
||||
{
|
||||
nvErrorLog_va((void *)pGpu,
|
||||
DESTINATION_FLA_TRANSLATION_ERROR,
|
||||
"FLA Fault: inst:0x%x dev:0x%x subdev:0x%x, faulted @ 0x%x_%08x. Fault is of type %s %s",
|
||||
gpuGetInstance(pGpu),
|
||||
gpuGetDeviceInstance(pGpu),
|
||||
pGpu->subdeviceInstance,
|
||||
pMmuExceptionData->addrHi,
|
||||
pMmuExceptionData->addrLo,
|
||||
kgmmuGetFaultTypeString_HAL(pKernelGmmu, pMmuExceptionData->faultType),
|
||||
kfifoGetFaultAccessTypeString_HAL(pGpu, GPU_GET_KERNEL_FIFO(pGpu),
|
||||
pMmuExceptionData->accessType));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
nvErrorLog_va((void *)pGpu,
|
||||
NVLINK_REMOTE_TRANSLATION_ERROR,
|
||||
"NVLink remote translation error: faulted @ 0x%x_%08x. Fault is of type %s %s",
|
||||
pMmuExceptionData->addrHi,
|
||||
pMmuExceptionData->addrLo,
|
||||
kgmmuGetFaultTypeString_HAL(pKernelGmmu, pMmuExceptionData->faultType),
|
||||
kfifoGetFaultAccessTypeString_HAL(pGpu, GPU_GET_KERNEL_FIFO(pGpu),
|
||||
pMmuExceptionData->accessType));
|
||||
}
|
||||
else
|
||||
{
|
||||
return kgmmuServiceMmuFault_GV100(pGpu, pKernelGmmu, pParsedFaultInfo, pMmuExceptionData);
|
||||
status = kgmmuServiceMmuFault_GV100(pGpu, pKernelGmmu, pParsedFaultInfo, pMmuExceptionData);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -61,12 +61,27 @@ _vgpuRcResetCallback
|
||||
{
|
||||
THREAD_STATE_NODE threadState;
|
||||
NV506F_CTRL_CMD_RESET_ISOLATED_CHANNEL_PARAMS params = {0};
|
||||
RsClient *pClient;
|
||||
KernelChannel *pKernelChannel = NULL;
|
||||
|
||||
threadStateInitISRAndDeferredIntHandler(
|
||||
&threadState,
|
||||
pRcErrorContext->pGpu,
|
||||
THREAD_STATE_FLAGS_IS_DEFERRED_INT_HANDLER);
|
||||
|
||||
NV_ASSERT_OK_OR_GOTO(
|
||||
status,
|
||||
serverGetClientUnderLock(&g_resServ, hClient, &pClient),
|
||||
error_cleanup);
|
||||
NV_ASSERT_OK_OR_GOTO(
|
||||
status,
|
||||
CliGetKernelChannel(pClient, hChannel, &pKernelChannel),
|
||||
error_cleanup);
|
||||
|
||||
NV_ASSERT_OR_ELSE(pKernelChannel != NULL,
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto error_cleanup);
|
||||
|
||||
params.engineID = pRcErrorContext->EngineId;
|
||||
params.exceptType = pRcErrorContext->exceptType;
|
||||
|
||||
@@ -99,6 +114,11 @@ _vgpuRcResetCallback
|
||||
}
|
||||
|
||||
return status;
|
||||
|
||||
error_cleanup:
|
||||
rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL);
|
||||
osReleaseRmSema(pSys->pSema, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -121,7 +121,7 @@ ctxBufPoolInit
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
CTX_BUF_POOL_INFO *pCtxBufPool = NULL;
|
||||
NvU32 i, poolConfig;
|
||||
NvU32 i;
|
||||
|
||||
NV_ASSERT_OR_RETURN(ppCtxBufPool != NULL, NV_ERR_INVALID_ARGUMENT);
|
||||
|
||||
@@ -136,35 +136,13 @@ ctxBufPoolInit
|
||||
|
||||
//
|
||||
// create a mem pool for each page size supported by RM
|
||||
// pool corresponding to RM_ATTR_PAGE_SIZE_DEFAULT remains unused
|
||||
//
|
||||
for (i = 0; i < RM_ATTR_PAGE_SIZE_INVALID; i++)
|
||||
for (i = 0; i < POOL_CONFIG_MAX_SUPPORTED; i++)
|
||||
{
|
||||
switch (i)
|
||||
{
|
||||
case RM_ATTR_PAGE_SIZE_DEFAULT:
|
||||
case RM_ATTR_PAGE_SIZE_4KB:
|
||||
poolConfig = POOL_CONFIG_CTXBUF_4K;
|
||||
break;
|
||||
case RM_ATTR_PAGE_SIZE_BIG:
|
||||
poolConfig = POOL_CONFIG_CTXBUF_64K;
|
||||
break;
|
||||
case RM_ATTR_PAGE_SIZE_HUGE:
|
||||
poolConfig = POOL_CONFIG_CTXBUF_2M;
|
||||
break;
|
||||
case RM_ATTR_PAGE_SIZE_512MB:
|
||||
poolConfig = POOL_CONFIG_CTXBUF_512M;
|
||||
break;
|
||||
case RM_ATTR_PAGE_SIZE_256GB:
|
||||
poolConfig = POOL_CONFIG_CTXBUF_256G;
|
||||
break;
|
||||
default:
|
||||
NV_PRINTF(LEVEL_ERROR, "Unsupported page size attr %d\n", i);
|
||||
return NV_ERR_INVALID_STATE;
|
||||
}
|
||||
// Pool Config starts from POOL_CONFIG_CTXBUF_256G
|
||||
NV_ASSERT_OK_OR_GOTO(status,
|
||||
rmMemPoolSetup((void*)&pHeap->pmaObject, &pCtxBufPool->pMemPool[i],
|
||||
poolConfig),
|
||||
(POOL_CONFIG_MODE) i),
|
||||
cleanup);
|
||||
|
||||
// Allocate the pool in CPR in case of Confidential Compute
|
||||
@@ -211,7 +189,7 @@ ctxBufPoolDestroy
|
||||
|
||||
pCtxBufPool = *ppCtxBufPool;
|
||||
|
||||
for (i = 0; i < RM_ATTR_PAGE_SIZE_INVALID; i++)
|
||||
for (i = 0; i < POOL_CONFIG_MAX_SUPPORTED; i++)
|
||||
{
|
||||
if (pCtxBufPool->pMemPool[i] != NULL)
|
||||
{
|
||||
@@ -224,6 +202,29 @@ ctxBufPoolDestroy
|
||||
NV_PRINTF(LEVEL_INFO, "Ctx buf pool destroyed\n");
|
||||
}
|
||||
|
||||
static NvU32 NV_FORCEINLINE
|
||||
ctxBufPoolPageSizeToPoolIndex(NvU64 pageSize)
|
||||
{
|
||||
switch (pageSize)
|
||||
{
|
||||
case RM_PAGE_SIZE:
|
||||
return POOL_CONFIG_CTXBUF_4K;
|
||||
case RM_PAGE_SIZE_64K:
|
||||
return POOL_CONFIG_CTXBUF_64K;
|
||||
case RM_PAGE_SIZE_128K:
|
||||
return POOL_CONFIG_CTXBUF_128K;
|
||||
case RM_PAGE_SIZE_HUGE:
|
||||
return POOL_CONFIG_CTXBUF_2M;
|
||||
case RM_PAGE_SIZE_512M:
|
||||
return POOL_CONFIG_CTXBUF_512M;
|
||||
case RM_PAGE_SIZE_256G:
|
||||
return POOL_CONFIG_CTXBUF_256G;
|
||||
default:
|
||||
NV_PRINTF(LEVEL_ERROR, "Unrecognized/unsupported page size = 0x%llx\n", pageSize);
|
||||
NV_ASSERT_OR_RETURN(0, POOL_CONFIG_MAX_SUPPORTED);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* @brief Calculates total amount of memory required for all buffers in each pool and reserves the memory
|
||||
*
|
||||
@@ -263,7 +264,7 @@ ctxBufPoolReserve
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU64 pageSize;
|
||||
NvU32 i;
|
||||
NvU64 totalSize[RM_ATTR_PAGE_SIZE_INVALID] = {0};
|
||||
NvU64 totalSize[POOL_CONFIG_MAX_SUPPORTED] = {0};
|
||||
NvU64 size;
|
||||
|
||||
NV_ASSERT_OR_RETURN(pCtxBufPool != NULL, NV_ERR_INVALID_ARGUMENT);
|
||||
@@ -282,32 +283,13 @@ ctxBufPoolReserve
|
||||
// Determine the pool(4K/64K/2M) from where this buffer will eventually
|
||||
// get allocated and mark that pool to reserve this memory.
|
||||
//
|
||||
switch(pageSize)
|
||||
{
|
||||
case RM_PAGE_SIZE:
|
||||
totalSize[RM_ATTR_PAGE_SIZE_4KB] += size;
|
||||
break;
|
||||
case RM_PAGE_SIZE_64K:
|
||||
case RM_PAGE_SIZE_128K:
|
||||
totalSize[RM_ATTR_PAGE_SIZE_BIG] += size;
|
||||
break;
|
||||
case RM_PAGE_SIZE_HUGE:
|
||||
totalSize[RM_ATTR_PAGE_SIZE_HUGE] += size;
|
||||
break;
|
||||
case RM_PAGE_SIZE_512M:
|
||||
totalSize[RM_ATTR_PAGE_SIZE_512MB] += size;
|
||||
break;
|
||||
case RM_PAGE_SIZE_256G:
|
||||
totalSize[RM_ATTR_PAGE_SIZE_256GB] += size;
|
||||
break;
|
||||
default:
|
||||
NV_PRINTF(LEVEL_ERROR, "Unrecognized/unsupported page size = 0x%llx\n", pageSize);
|
||||
NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
|
||||
}
|
||||
NvU32 poolIndex = ctxBufPoolPageSizeToPoolIndex(pageSize);
|
||||
NV_ASSERT_OR_RETURN(poolIndex < POOL_CONFIG_MAX_SUPPORTED, NV_ERR_INVALID_ARGUMENT);
|
||||
totalSize[poolIndex] += size;
|
||||
NV_PRINTF(LEVEL_INFO, "Reserving 0x%llx bytes for buf Id = 0x%x in pool with page size = 0x%llx\n", size, i, pageSize);
|
||||
}
|
||||
|
||||
for (i = 0; i < RM_ATTR_PAGE_SIZE_INVALID; i++)
|
||||
for (i = 0; i < POOL_CONFIG_MAX_SUPPORTED; i++)
|
||||
{
|
||||
if (totalSize[i] > 0)
|
||||
{
|
||||
@@ -342,7 +324,7 @@ ctxBufPoolTrim
|
||||
NvU32 i;
|
||||
NV_ASSERT_OR_RETURN(pCtxBufPool != NULL, NV_ERR_INVALID_ARGUMENT);
|
||||
|
||||
for (i = 0; i < RM_ATTR_PAGE_SIZE_INVALID; i++)
|
||||
for (i = 0; i < POOL_CONFIG_MAX_SUPPORTED; i++)
|
||||
{
|
||||
rmMemPoolTrim(pCtxBufPool->pMemPool[i], 0, 0);
|
||||
NV_PRINTF(LEVEL_INFO, "Trimmed pool with RM_ATTR_PAGE_SIZE_* = 0x%x\n", i);
|
||||
@@ -369,7 +351,7 @@ ctxBufPoolRelease
|
||||
NvU32 i;
|
||||
NV_ASSERT(pCtxBufPool != NULL);
|
||||
|
||||
for (i = 0; i < RM_ATTR_PAGE_SIZE_INVALID; i++)
|
||||
for (i = 0; i < POOL_CONFIG_MAX_SUPPORTED; i++)
|
||||
{
|
||||
rmMemPoolRelease(pCtxBufPool->pMemPool[i], 0);
|
||||
}
|
||||
@@ -426,29 +408,10 @@ ctxBufPoolAllocate
|
||||
pageSize = newPageSize;
|
||||
}
|
||||
|
||||
// Determine the pool(4K/64K/2M) from where this buffer is to be allocated
|
||||
switch(pageSize)
|
||||
{
|
||||
case RM_PAGE_SIZE:
|
||||
pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_4KB];
|
||||
break;
|
||||
case RM_PAGE_SIZE_64K:
|
||||
case RM_PAGE_SIZE_128K:
|
||||
pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_BIG];
|
||||
break;
|
||||
case RM_PAGE_SIZE_HUGE:
|
||||
pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_HUGE];
|
||||
break;
|
||||
case RM_PAGE_SIZE_512M:
|
||||
pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_512MB];
|
||||
break;
|
||||
case RM_PAGE_SIZE_256G:
|
||||
pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_256GB];
|
||||
break;
|
||||
default:
|
||||
NV_PRINTF(LEVEL_ERROR, "Unsupported page size = 0x%llx set for context buffer\n", pageSize);
|
||||
NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
|
||||
}
|
||||
NvU32 poolIndex = ctxBufPoolPageSizeToPoolIndex(pageSize);
|
||||
NV_ASSERT_OR_RETURN(poolIndex < POOL_CONFIG_MAX_SUPPORTED, NV_ERR_INVALID_ARGUMENT);
|
||||
pPool = pCtxBufPool->pMemPool[poolIndex];
|
||||
|
||||
NV_ASSERT_OK_OR_RETURN(rmMemPoolAllocate(pPool, (RM_POOL_ALLOC_MEMDESC*)pMemDesc));
|
||||
NV_PRINTF(LEVEL_INFO, "Buffer allocated from ctx buf pool with page size = 0x%llx\n", pageSize);
|
||||
return NV_OK;
|
||||
@@ -488,28 +451,9 @@ ctxBufPoolFree
|
||||
pMemDesc->Alignment, RM_ATTR_PAGE_SIZE_DEFAULT, NV_TRUE, &size, &pageSize));
|
||||
}
|
||||
|
||||
switch(pageSize)
|
||||
{
|
||||
case RM_PAGE_SIZE:
|
||||
pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_4KB];
|
||||
break;
|
||||
case RM_PAGE_SIZE_64K:
|
||||
case RM_PAGE_SIZE_128K:
|
||||
pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_BIG];
|
||||
break;
|
||||
case RM_PAGE_SIZE_HUGE:
|
||||
pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_HUGE];
|
||||
break;
|
||||
case RM_PAGE_SIZE_512M:
|
||||
pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_512MB];
|
||||
break;
|
||||
case RM_PAGE_SIZE_256G:
|
||||
pPool = pCtxBufPool->pMemPool[RM_ATTR_PAGE_SIZE_256GB];
|
||||
break;
|
||||
default:
|
||||
NV_PRINTF(LEVEL_ERROR, "Unsupported page size detected for context buffer\n");
|
||||
NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_STATE);
|
||||
}
|
||||
NvU32 poolIndex = ctxBufPoolPageSizeToPoolIndex(pageSize);
|
||||
NV_ASSERT_OR_RETURN(poolIndex < POOL_CONFIG_MAX_SUPPORTED, NV_ERR_INVALID_ARGUMENT);
|
||||
pPool = pCtxBufPool->pMemPool[poolIndex];
|
||||
|
||||
// If scrubber is being skipped by PMA we need to manually scrub this memory
|
||||
if (rmMemPoolIsScrubSkipped(pPool))
|
||||
@@ -665,16 +609,19 @@ ctxBufPoolGetSizeAndPageSize
|
||||
{
|
||||
NvU64 chunkSize = 0;
|
||||
NvU32 i;
|
||||
for (i = 0; i < RM_ATTR_PAGE_SIZE_INVALID; i++)
|
||||
//
|
||||
// pools are sorted in descending order of chunk size. So, start from the pool with the smallest chunk size.
|
||||
//
|
||||
for (i = POOL_CONFIG_MAX_SUPPORTED; i; i--)
|
||||
{
|
||||
NV_ASSERT_OK_OR_RETURN(rmMemPoolGetChunkAndPageSize(pCtxBufPool->pMemPool[i], &chunkSize, &pageSize));
|
||||
NV_ASSERT_OK_OR_RETURN(rmMemPoolGetChunkAndPageSize(pCtxBufPool->pMemPool[i - 1], &chunkSize, &pageSize));
|
||||
if (chunkSize >= size)
|
||||
{
|
||||
size = chunkSize;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == RM_ATTR_PAGE_SIZE_INVALID)
|
||||
if (i == 0)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "couldn't find pool with chunksize >= 0x%llx\n", size);
|
||||
DBG_BREAKPOINT();
|
||||
@@ -722,7 +669,7 @@ ctxBufPoolIsScrubSkipped
|
||||
{
|
||||
NvU32 i;
|
||||
NV_ASSERT_OR_RETURN(pCtxBufPool != NULL, NV_ERR_INVALID_ARGUMENT);
|
||||
for (i = 0; i < RM_ATTR_PAGE_SIZE_INVALID; i++)
|
||||
for (i = 0; i < POOL_CONFIG_MAX_SUPPORTED; i++)
|
||||
{
|
||||
if (!rmMemPoolIsScrubSkipped(pCtxBufPool->pMemPool[i]))
|
||||
return NV_FALSE;
|
||||
@@ -747,7 +694,7 @@ ctxBufPoolSetScrubSkip
|
||||
{
|
||||
NvU32 i;
|
||||
NV_ASSERT_OR_RETURN_VOID(pCtxBufPool != NULL);
|
||||
for (i = 0; i < RM_ATTR_PAGE_SIZE_INVALID; i++)
|
||||
for (i = 0; i < POOL_CONFIG_MAX_SUPPORTED; i++)
|
||||
{
|
||||
rmMemPoolSkipScrub(pCtxBufPool->pMemPool[i], bSkipScrub);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@@ -99,8 +99,7 @@ typedef enum
|
||||
* This array contains the alloction sizes (in bytes) of each pool.
|
||||
*/
|
||||
static const NvU64 poolAllocSizes[] = {
|
||||
0x4000000000,
|
||||
0x20000000, 0x200000, 0x40000, 0x20000, 0x10000, 0x2000, 0x1000, 0x100
|
||||
0x4000000000, 0x20000000, 0x200000, 0x40000, 0x20000, 0x10000, 0x2000, 0x1000, 0x100
|
||||
};
|
||||
|
||||
#define POOL_CONFIG_POOL_IDX 0
|
||||
@@ -112,7 +111,8 @@ static const NvU64 poolConfig[POOL_CONFIG_MAX_SUPPORTED][POOL_CONFIG_CHUNKSIZE_I
|
||||
{ RM_POOL_IDX_4K, PMA_CHUNK_SIZE_64K }, // pool with pageSize = 4K for GMMU_FMT_VERSION_2
|
||||
{ RM_POOL_IDX_256G, PMA_CHUNK_SIZE_256G }, // pool with pageSize = 256G for RM allocated buffers (unused as of blackwell)
|
||||
{ RM_POOL_IDX_512M, PMA_CHUNK_SIZE_512M }, // pool with pageSize = 512MB for RM allocated buffers (unused as of ampere)
|
||||
{ RM_POOL_IDX_2M, PMA_CHUNK_SIZE_4M }, // pool with pageSize = 2MB for RM allocated buffers
|
||||
{ RM_POOL_IDX_2M, PMA_CHUNK_SIZE_4M }, // pool with pageSize = 4MB for RM allocated buffers
|
||||
{ RM_POOL_IDX_128K, PMA_CHUNK_SIZE_2M}, // pool with pageSize = 2MB for RM allocated buffers
|
||||
{ RM_POOL_IDX_64K, PMA_CHUNK_SIZE_256K }, // pool with pageSize = 64K for RM allocated buffers
|
||||
{ RM_POOL_IDX_4K, PMA_CHUNK_SIZE_64K } // pool with pageSize = 4K for RM allocated buffers
|
||||
};
|
||||
|
||||
@@ -57,6 +57,11 @@ NV_STATUS stdmemValidateParams
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if (pAllocData->size == 0)
|
||||
{
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
//
|
||||
// These flags don't do anything in this path. No mapping on alloc and
|
||||
// kernel map is controlled by TYPE
|
||||
|
||||
@@ -7616,7 +7616,7 @@ static NV_STATUS dupMemory(struct gpuDevice *device,
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
nvGpuOpsLockSet acquiredLocks;
|
||||
THREAD_STATE_NODE threadState;
|
||||
THREAD_STATE_NODE *pThreadState;
|
||||
NvHandle dupedMemHandle;
|
||||
Memory *pMemory = NULL;
|
||||
PMEMORY_DESCRIPTOR pMemDesc = NULL;
|
||||
@@ -7637,14 +7637,15 @@ static NV_STATUS dupMemory(struct gpuDevice *device,
|
||||
|
||||
NV_ASSERT((flags == NV04_DUP_HANDLE_FLAGS_REJECT_KERNEL_DUP_PRIVILEGE) || (flags == NV04_DUP_HANDLE_FLAGS_NONE));
|
||||
|
||||
threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
|
||||
|
||||
pThreadState = threadStateAlloc(THREAD_STATE_FLAGS_NONE);
|
||||
if (!pThreadState)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
// RS-TODO use dual client locking
|
||||
status = _nvGpuOpsLocksAcquireAll(RMAPI_LOCK_FLAGS_NONE, device->session->handle,
|
||||
&pSessionClient, &acquiredLocks);
|
||||
if (status != NV_OK)
|
||||
{
|
||||
threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
|
||||
threadStateFree(pThreadState, THREAD_STATE_FLAGS_NONE);
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -7686,10 +7687,18 @@ static NV_STATUS dupMemory(struct gpuDevice *device,
|
||||
}
|
||||
|
||||
// For SYSMEM or indirect peer mappings
|
||||
bIsIndirectPeer = gpumgrCheckIndirectPeer(pMappingGpu, pAdjustedMemDesc->pGpu);
|
||||
// Deviceless memory (NV01_MEMORY_DEVICELESS) can have a NULL pGpu. Perform targeted
|
||||
// null checks before IOMMU operations that require valid GPU contexts.
|
||||
bIsIndirectPeer = (pAdjustedMemDesc->pGpu != NULL) ?
|
||||
gpumgrCheckIndirectPeer(pMappingGpu, pAdjustedMemDesc->pGpu) : NV_FALSE;
|
||||
if (bIsIndirectPeer ||
|
||||
memdescRequiresIommuMapping(pAdjustedMemDesc))
|
||||
{
|
||||
if (NV_UNLIKELY(pAdjustedMemDesc->pGpu == NULL))
|
||||
{
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto freeGpaMemdesc;
|
||||
}
|
||||
// For sysmem allocations, the dup done below is very shallow and in
|
||||
// particular doesn't create IOMMU mappings required for the mapped GPU
|
||||
// to access the memory. That's a problem if the mapped GPU is different
|
||||
@@ -7778,7 +7787,7 @@ freeGpaMemdesc:
|
||||
|
||||
done:
|
||||
_nvGpuOpsLocksRelease(&acquiredLocks);
|
||||
threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
|
||||
threadStateFree(pThreadState, THREAD_STATE_FLAGS_NONE);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
@@ -1856,7 +1856,7 @@ static NV_STATUS _issueRpcLarge
|
||||
// Set the correct length for this queue entry.
|
||||
vgpu_rpc_message_header_v->length = entryLength;
|
||||
|
||||
nvStatus = rpcSendMessage(pGpu, pRpc, &firstSequence);
|
||||
nvStatus = rpcSendMessage(pGpu, pRpc, &lastSequence);
|
||||
if (nvStatus != NV_OK)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "rpcSendMessage failed with status 0x%08x for fn %d!\n",
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
NVIDIA_VERSION = 570.190
|
||||
NVIDIA_VERSION = 570.195.03
|
||||
|
||||
# This file.
|
||||
VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))
|
||||
|
||||
Reference in New Issue
Block a user