525.53

2026-02-06 16:19:58 +00:00 · 2022-11-10 08:39:33 -08:00
parent 7c345b838b
commit 758b4ee818
1323 changed files with 262135 additions and 60754 deletions
--- a/src/common/unix/nvidia-push/interface/nvidia-push-init.h
+++ b/src/common/unix/nvidia-push/interface/nvidia-push-init.h
@@ -0,0 +1,259 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This file contains nvidia-push device and channel setup structures and
+ * functions.
+ */
+
+#ifndef __NVIDIA_PUSH_INIT_H__
+#define __NVIDIA_PUSH_INIT_H__
+
+
+#include "nvidia-push-types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*!
+ * Return the index of the first class table element supported on this device.
+ *
+ * pClassTable is an array where each element corresponds to a class
+ * the caller supports.  The first field in the array element should
+ * be an NvPushSupportedClass struct.  There may be additional fields
+ * in the array element that are specific to the caller.  The
+ * classTableStride argument indicates the size in bytes of one array
+ * element, such that nvPushGetSupportedClassIndex() can step from one
+ * array element to the next by adding classTableStride.
+ *
+ * nvPushGetSupportedClassIndex() will query the list of classes
+ * supported by this device, and return the index of the first
+ * pClassTable array element that is supported by the device.  -1 is
+ * returned if there is no match.
+ *
+ * \param pDevice          The nvidia-push device whose class list to consider.
+ * \param pClassTable      The table of classes supported.
+ * \param classTableStride The size in bytes of one table element.
+ * \param classTableLength The number of table elements.
+ *
+ * \return  The index of the first table element that matches, or -1.
+ */
+
+typedef struct _NvPushSupportedClass {
+    NvU32 classNumber;
+    NVAModelConfig amodelConfig;
+} NvPushSupportedClass;
+
+int nvPushGetSupportedClassIndex(
+    NvPushDevicePtr pDevice,
+    const void *pClassTable,
+    size_t classTableStride,
+    size_t classTableLength);
+
+/*
+ * Parameter structure populated by the host driver when requesting an
+ * NvPushDeviceRec.
+ */
+typedef struct _NvPushAllocDeviceParams {
+
+    /* Pointer to host device, filled by host driver as needed */
+    void           *hostDevice;
+
+    const NvPushImports *pImports;
+
+    /* The host driver's RMAPI client (NV0000) handle. */
+    NvU32           clientHandle;
+
+    /* TRUE iff this device is in client-side SLI mode. */
+    NvBool          clientSli;
+
+    /* The number of subDevices allocated by the host driver. */
+    NvU32           numSubDevices;
+
+    struct {
+        /* The host driver's RMAPI device (NV0080) handles */
+        NvU32       deviceHandle;
+        /* The host driver's RMAPI subDevice (NV2080) handles. */
+        NvU32       handle;
+        /* FERMI_VASPACE_A object in which channels on this device should be
+         * mapped. */
+        NvU32       gpuVASpaceObject;
+        /* ctxDma handle to be used with MapMemoryDma. */
+        NvU32       gpuVASpace;
+    } subDevice[NV_MAX_SUBDEVICES];
+
+    struct {
+        /*
+         * The Amodel configuration requested by the host driver.
+         */
+        NVAModelConfig config;
+    } amodel;
+
+    /* Whether channels on this device will be used to program Tegra. */
+    NvBool          isTegra;
+
+    /*
+     * Pool of RMAPI object handles.  The host driver should populate
+     * all of the elements in this array before calling
+     * nvPushAllocDevice(), and release all of these handles if
+     * nvPushAllocDevice() fails, or after calling nvPushFreeDevice().
+     *
+     * The number of possible handles is:
+     *
+     *  hUserMode (per-sd)
+     */
+#define NV_PUSH_DEVICE_HANDLE_POOL_NUM \
+    (NV_MAX_SUBDEVICES)
+
+    NvU32           handlePool[NV_PUSH_DEVICE_HANDLE_POOL_NUM];
+
+    NvU32           numClasses;
+    const NvU32    *supportedClasses;
+
+} NvPushAllocDeviceParams;
+
+NvBool nvPushAllocDevice(
+    const NvPushAllocDeviceParams *pParams,
+    NvPushDevicePtr pDevice);
+
+void nvPushFreeDevice(
+    NvPushDevicePtr pDevice);
+
+
+/*
+ * Parameter structure populated by the host driver when requesting an
+ * NvPushChannelRec.
+ */
+typedef struct _NvPushAllocChannelParams {
+
+    /* NV2080_ENGINE_TYPE_ */
+    NvU32           engineType;
+
+    /*
+     * Whether to log the pushbuffer in nvdiss format, by calling
+     * nvPushImportLogNvDiss().
+     */
+    NvBool          logNvDiss;
+
+    /*
+     * Normally, the pushbuffer utility library will time out when
+     * waiting for things (space in the pushbuffer, waiting for
+     * notifiers, etc).  When the channel is created with
+     * noTimeout=TRUE, the channel will wait indefinitely for these
+     * things.
+     */
+    NvBool          noTimeout;
+
+    /*
+     * Normally, the pushbuffer utility library checks for channel
+     * errors and reports them to the host driver by calling
+     * nvPushImportChannelErrorOccurred().  Host drivers can set
+     * ignoreChannelErrors=TRUE to disable this check.
+     */
+    NvBool          ignoreChannelErrors;
+
+    /*
+     * DIFR stands for Display Idle Frame Refresh in which a CE is used to
+     * prefetch framebuffer pixels into the GPU's L2 cache. The prefetch
+     * operation requires the channel to be specifically configured for DIFR
+     * prefetching. This flag indicates if this channel is intended to be
+     * used for just that.
+     */
+    NvBool          difrPrefetch;
+
+    /*
+     * Host drivers should specify how many notifiers they want.  The
+     * pushbuffer utility library will allocate memory to hold this
+     * many notifiers on each subDevice, plus an error notifier.
+     *
+     * The 'notifierIndex' argument to, e.g., nvPushGetNotifierCpuAddress()
+     * should be in the range [0,numNotifiers).
+     */
+    NvU8            numNotifiers;
+
+    /*
+     * The size of the "main" pushbuffer in bytes.  Note this does not
+     * include space for gpfifo entries or progress tracking:
+     * nvidia-push will implicitly pad the total pushbuffer for those
+     * items.
+     */
+    NvU32           pushBufferSizeInBytes;
+
+    /*
+     * Pool of RMAPI object handles.  The host driver should populate
+     * all of the elements in this array before calling
+     * nvPushAllocChannel(), and release all of these handles if
+     * nvPushAllocChannel() fails, or after calling nvPushFreeChannel().
+     *
+     * The number of possible handles is:
+     *
+     *  progressSemaphore hMemory (per-sd) +
+     *  pushbufferHandle (per-device)      +
+     *  pushbufferVAHandle (per-sd)        +
+     *  userD.hMemory (per-sd)             +
+     *  channelHandle (per-sd)             +
+     *  notifier memoryHandle (per-device) +
+     *  error notifier ctxDma (per-device)
+     */
+#define NV_PUSH_CHANNEL_HANDLE_POOL_NUM \
+    (NV_MAX_SUBDEVICES     +    \
+     1                     +    \
+     NV_MAX_SUBDEVICES     +    \
+     NV_MAX_SUBDEVICES     +    \
+     NV_MAX_SUBDEVICES     +    \
+     1                     +    \
+     1)
+
+    NvU32           handlePool[NV_PUSH_CHANNEL_HANDLE_POOL_NUM];
+
+    /*
+     * A pointer to an NvPushDeviceRec, initialized with
+     * nvPushAllocDevice().  One or more NvPushChannelRecs may share
+     * the same NvPushDevicePtr.
+     *
+     * This pDevice should be kept allocated until all
+     * NvPushChannelRecs using it have been freed.
+     */
+    NvPushDevicePtr pDevice;
+
+} NvPushAllocChannelParams;
+
+NvBool nvPushAllocChannel(
+    const NvPushAllocChannelParams *pParams,
+    NvPushChannelPtr buffer);
+
+void nvPushFreeChannel(
+    NvPushChannelPtr buffer);
+
+
+void nvPushInitWaitForNotifier(
+    NvPushChannelPtr pChannel,
+    NvU32 notifierIndex,
+    NvU32 subdeviceMask);
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif /*__NVIDIA_PUSH_INIT_H__ */
--- a/src/common/unix/nvidia-push/interface/nvidia-push-methods.h
+++ b/src/common/unix/nvidia-push/interface/nvidia-push-methods.h
@@ -0,0 +1,247 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This file contains macros and inline functions used to actually program
+ * methods.
+ */
+
+#ifndef __NVIDIA_PUSH_METHODS_H__
+#define __NVIDIA_PUSH_METHODS_H__
+
+#include "nvidia-push-types.h"
+
+#include "class/cla16f.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void __nvPushSetMethodDataSegment(NvPushChannelSegmentPtr s, const NvU32 data)
+{
+    s->buffer->u = data;
+    s->buffer++;
+}
+
+static inline void nvPushSetMethodData(NvPushChannelPtr p, const NvU32 data)
+{
+    __nvPushSetMethodDataSegment(&p->main, data);
+}
+
+#if NV_PUSH_ALLOW_FLOAT
+static inline void __nvPushSetMethodDataSegmentF(NvPushChannelSegmentPtr s, const float data)
+{
+    s->buffer->f = data;
+    s->buffer++;
+}
+
+static inline void nvPushSetMethodDataF(NvPushChannelPtr p, const float data)
+{
+    __nvPushSetMethodDataSegmentF(&p->main, data);
+}
+#endif
+
+static inline void __nvPushSetMethodDataSegmentU64(NvPushChannelSegmentPtr s, const NvU64 data)
+{
+    __nvPushSetMethodDataSegment(s, NvU64_HI32(data));
+    __nvPushSetMethodDataSegment(s, NvU64_LO32(data));
+}
+
+static inline void nvPushSetMethodDataU64(NvPushChannelPtr p, const NvU64 data)
+{
+    __nvPushSetMethodDataSegmentU64(&p->main, data);
+}
+
+void __nvPushMoveDWORDS(NvU32* dst, const NvU32* src, int dwords);
+
+static inline void
+nvDmaMoveDWORDS(NvPushChannelUnion *dst, const NvU32* src, int dwords)
+{
+    // The 'dst' argument is an array of NvPushChannelUnion; it is safe
+    // to treat this as an array of NvU32, as long as NvU32 and
+    // NvPushChannelUnion are the same size.
+    ct_assert(sizeof(NvU32) == sizeof(NvPushChannelUnion));
+    __nvPushMoveDWORDS((NvU32 *)dst, src, dwords);
+}
+
+static inline void nvPushInlineData(NvPushChannelPtr p, const void *data,
+                                    size_t dwords)
+{
+    nvDmaMoveDWORDS(p->main.buffer, (const NvU32 *)data, dwords);
+    p->main.buffer += dwords;
+}
+
+/*!
+ * Return the maximum method count: the maximum number of dwords that can be
+ * specified in the nvPushMethod() family of macros.
+ */
+static inline NvU32 nvPushMaxMethodCount(const NvPushChannelRec *p)
+{
+    /*
+     * The number of methods that can be specified in one NVA16F_DMA_METHOD
+     * header is limited by the bit field size of NVA16F_DMA_METHOD_COUNT: 28:16
+     * (i.e., maximum representable value 8191).
+     */
+    const NvU32 maxFromMethodCountMask = DRF_MASK(NVA16F_DMA_METHOD_COUNT);
+
+    /*
+     * Further, the method count must be smaller than half the total pushbuffer
+     * size minus one, to correctly distinguish empty and full pushbuffers.  See
+     * nvPushHeader() for details.
+     */
+    const NvU32 pushBufferSizeInBytes = p->main.sizeInBytes;
+    const NvU32 pushBufferSizeInDWords = pushBufferSizeInBytes / 4;
+    const NvU32 pushBufferHalfSizeInDWords = pushBufferSizeInDWords / 2;
+
+    /*
+     * Subtract two from pushBufferHalfSizeInDWords:
+     *
+     * -1 to distinguish pushbuffer empty from full (see above).
+     *
+     * -1 to be smaller than, rather than equal to, the above constraints.
+     */
+    const NvU32 maxFromPushBufferSize = pushBufferHalfSizeInDWords - 2;
+
+    return NV_MIN(maxFromMethodCountMask, maxFromPushBufferSize);
+}
+
+// These macros verify that the values used in the methods fits
+// into the defined ranges.
+#define ASSERT_DRF_DEF(d, r, f, n) \
+    nvAssert(!(~DRF_MASK(NV ## d ## r ## f) & (NV ## d ## r ## f ## n)))
+#define ASSERT_DRF_NUM(d, r, f, n) \
+    nvAssert(!(~DRF_MASK(NV ## d ## r ## f) & (n)))
+
+#if defined(DEBUG)
+#include "class/clc36f.h"    /* VOLTA_CHANNEL_GPFIFO_A */
+
+/*
+ * When pushing GPFIFO methods (NVA16F_SEMAPHORE[ABCD]), all four
+ * methods must be pushed together.  If the four methods are not
+ * pushed together, nvidia-push might wrap, injecting its progress
+ * tracking semaphore release methods in the middle, and perturb the
+ * NVA16F_SEMAPHOREA_OFFSET_UPPER and NVA16F_SEMAPHOREB_OFFSET_LOWER
+ * channel state.
+ *
+ * Return whether the methods described by the arguments include some,
+ * but not all, of A, B, C, and D.  I.e., if the range starts at B, C,
+ * or D, or if the range ends at A, B, or C.
+ *
+ * Perform a similar check for Volta+ semaphore methods
+ * NVC36F_SEM_ADDR_LO..NVC36F_SEM_EXECUTE.  Note that we always check for both
+ * sets of methods, regardless of the GPU we're actually running on.  This is
+ * okay since:
+ * a) the NVC36F_SEM_ADDR_LO..NVC36F_SEM_EXECUTE method offsets were not used
+ *    for anything from (a16f..c36f].
+ * b) the SEMAPHORE[ABCD] methods still exist on the newer classes (they
+ *    haven't been reused for anything else)
+ */
+static inline NvBool __nvPushStartSplitsSemaphore(
+    NvU32 method,
+    NvU32 count,
+    NvU32 secOp)
+{
+    ct_assert(NVA16F_SEMAPHOREA < NVA16F_SEMAPHORED);
+    ct_assert(NVC36F_SEM_ADDR_LO < NVC36F_SEM_EXECUTE);
+
+    /*
+     * compute start and end as inclusive; if not incrementing, we
+     * assume end==start
+     */
+    const NvU32 start = method;
+    const NvU32 end = (secOp == NVA16F_DMA_SEC_OP_INC_METHOD) ?
+        (method + ((count - 1) * 4)) : method;
+
+    return ((start >  NVA16F_SEMAPHOREA)  && (start <= NVA16F_SEMAPHORED))  ||
+           ((end   >= NVA16F_SEMAPHOREA)  && (end   <  NVA16F_SEMAPHORED))  ||
+           ((start >  NVC36F_SEM_ADDR_LO) && (start <= NVC36F_SEM_EXECUTE)) ||
+           ((end   >= NVC36F_SEM_ADDR_LO) && (end   <  NVC36F_SEM_EXECUTE));
+}
+#endif /* DEBUG */
+
+/*
+ * Note that _count+1 must be less than half the total pushbuffer size.  This is
+ * required by GPFIFO because we can't reliably tell when we can write all the
+ * way to the end of the pushbuffer if we wrap (see bug 232454).  This
+ * assumption ensures that there will be enough space once GET reaches PUT.
+ */
+#define nvPushHeader(_push_buffer, _segment, _count, _header) do {    \
+    NvPushChannelSegmentPtr _pSegment = &(_push_buffer)->_segment;    \
+    nvAssert(((_count)+1) < ((_pSegment)->sizeInBytes / 8));          \
+    if ((_pSegment)->freeDwords < ((_count)+1))                       \
+        __nvPushMakeRoom((_push_buffer), (_count) + 1);               \
+    __nvPushSetMethodDataSegment((_pSegment), (_header));             \
+    (_pSegment)->freeDwords -= ((_count)+1);                          \
+} while(0)
+
+#define __nvPushStart(_push_buffer, _segment, _subch, _offset, _count, _opcode) \
+{                                                                     \
+    nvAssert(!__nvPushStartSplitsSemaphore(                           \
+                                      (_offset),                      \
+                                      (_count),                       \
+                                      NVA16F_DMA_SEC_OP ## _opcode)); \
+    ASSERT_DRF_DEF(A16F, _DMA, _SEC_OP, _opcode);                     \
+    ASSERT_DRF_NUM(A16F, _DMA, _METHOD_COUNT, _count);                \
+    ASSERT_DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL, _subch);           \
+    ASSERT_DRF_NUM(A16F, _DMA, _METHOD_ADDRESS, (_offset) >> 2);      \
+    nvPushHeader((_push_buffer), _segment, (_count),                  \
+        DRF_DEF(A16F, _DMA, _SEC_OP,               _opcode)  |        \
+        DRF_NUM(A16F, _DMA, _METHOD_COUNT,         _count)   |        \
+        DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL,    _subch)   |        \
+        DRF_NUM(A16F, _DMA, _METHOD_ADDRESS,    (_offset) >> 2));     \
+}
+
+// The GPU can encode a 13-bit constant method/data pair in a single DWORD.
+#define nvPushImmedValSegment(_push_buffer, _segment, _subch, _offset, _data) { \
+    ASSERT_DRF_NUM(A16F, _DMA, _IMMD_DATA, _data);                    \
+    ASSERT_DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL, _subch);           \
+    ASSERT_DRF_NUM(A16F, _DMA, _METHOD_ADDRESS, (_offset) >> 2);      \
+    if ((_push_buffer)->_segment.freeDwords < 1)                      \
+        __nvPushMakeRoom((_push_buffer), 1);                          \
+    __nvPushSetMethodDataSegment(&(_push_buffer)->_segment,           \
+        DRF_DEF(A16F, _DMA, _SEC_OP,     _IMMD_DATA_METHOD)  |        \
+        DRF_NUM(A16F, _DMA, _IMMD_DATA,             _data)   |        \
+        DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL,    _subch)   |        \
+        DRF_NUM(A16F, _DMA, _METHOD_ADDRESS,    (_offset) >> 2));     \
+    (_push_buffer)->_segment.freeDwords--;                            \
+}
+
+#define nvPushImmedVal(_push_buffer, _subch, _offset, _data) \
+    nvPushImmedValSegment(_push_buffer, main, _subch, _offset, _data)
+
+#define nvPushImmed(_push_buffer, _subch, _offset, _val) \
+    nvPushImmedVal(_push_buffer, _subch, _offset, _offset##_V_##_val)
+
+// Method headers.
+#define nvPushMethod(_push_buffer, _subch, _offset, _count) \
+    __nvPushStart(_push_buffer, main, _subch, _offset, _count, _INC_METHOD)
+#define nvPushMethodNoIncr(_push_buffer, _subch, _offset, _count) \
+    __nvPushStart(_push_buffer, main, _subch, _offset, _count, _NON_INC_METHOD)
+#define nvPushMethodOneIncr(_push_buffer, _subch, _offset, _count) \
+    __nvPushStart(_push_buffer, main, _subch, _offset, _count, _ONE_INC)
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif /* __NVIDIA_PUSH_METHODS_H__ */
--- a/src/common/unix/nvidia-push/interface/nvidia-push-types.h
+++ b/src/common/unix/nvidia-push/interface/nvidia-push-types.h
@@ -0,0 +1,281 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This file contains core definitions (structures and enums) for use in the
+ * rest of the nvidia-push code.
+ */
+
+#ifndef __NVIDIA_PUSH_TYPES_H__
+#define __NVIDIA_PUSH_TYPES_H__
+
+#include <stddef.h>          /* size_t */
+
+
+
+#include "nvtypes.h"
+#include "nvlimits.h"
+#include "nvmisc.h"
+#include "nvgputypes.h"      /* NvNotificationRec */
+#include "nv_common_utils.h" /* TRUE/FALSE */
+#include "nvctassert.h"
+#include "nv_assert.h"       /* nvAssert() */
+#include "nv_amodel_enum.h"  /* NVAModelConfig */
+#include "nvos.h"            /* NV_CHANNELGPFIFO_NOTIFICATION_* */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NV_PUSH_NOTIFIER_SHORT_TIMEOUT 3000 /* in milliseconds (ie:  3 seconds) */
+#define NV_PUSH_NOTIFIER_LONG_TIMEOUT 10000 /* in milliseconds (ie: 10 seconds) */
+
+# define NV_PUSH_PRINTF_FORMAT_ARGUMENT
+# define NV_PUSH_PRINTF_ATTRIBUTES(_fmt,_var) \
+    __attribute__((format (printf, _fmt, _var)))
+
+
+#if defined(NV_PUSH_IN_KERNEL)
+#  define NV_PUSH_ALLOW_FLOAT 0
+#else
+#  define NV_PUSH_ALLOW_FLOAT 1
+#endif
+
+typedef union _NvPushChannelUnion
+{
+    NvU32 u;
+#if NV_PUSH_ALLOW_FLOAT
+    float f;
+#endif
+} NvPushChannelUnion;
+
+typedef struct _NvPushChannelRec NvPushChannelRec;
+typedef struct _NvPushChannelRec *NvPushChannelPtr;
+
+typedef struct _nv_push_hal {
+    void (*kickoff)(struct _NvPushChannelRec*, NvU32 oldGpPut, NvU32 newGpPut);
+    void (*releaseTimelineSemaphore)(NvPushChannelPtr, void *cpuAddress, NvU64 gpuAddress, NvU64 val);
+    void (*acquireTimelineSemaphore)(NvPushChannelPtr, NvU64 gpuAddress, NvU64 val);
+    struct {
+        /* Requires USERD memory to be specified at channel allocation */
+        NvU32 clientAllocatesUserD                      :1;
+
+        /* On Tegra, we currently need to allocate double the requested GPFIFO
+         * entries */
+        NvU32 allocateDoubleSizeGpFifo                  :1;
+
+        /* Use Volta+ semaphore methods */
+        NvU32 voltaSemMethods                           :1;
+
+        NvU32 extendedBase                              :1;
+    } caps;
+} NvPushHal;
+
+typedef struct _NvPushDeviceRec {
+
+    void           *hostDevice;     /* Provided by the host driver */
+
+    NvBool          hostLBoverflowBug1667921 : 1;
+    NvBool          clientSli : 1;   /* Provided by the host driver */
+
+    NvU32           clientHandle;    /* Provided by the host driver */
+    NvU32           numSubDevices;   /* Provided by the host driver */
+
+    NvU32           numClasses;      /* Provided by the host driver */
+    const NvU32    *supportedClasses;/* Provided by the host driver */
+
+    struct {
+        NvU32       handle;          /* Provided by the host driver */
+        NvU32       deviceHandle;    /* Provided by the host driver */
+        NvU32       gpuVASpaceObject;/* Provided by the host driver */
+        NvU32       gpuVASpaceCtxDma;/* Provided by the host driver */
+        NvU32       hUserMode;       /* VOLTA_USERMODE_A object */
+        void       *pUserMode;       /* VOLTA_USERMODE_A mapping */
+    } subDevice[NV_MAX_SUBDEVICES];
+
+    NvU32           gpfifoClass;
+    size_t          userDSize;
+
+    NVAModelConfig  amodelConfig;
+
+    NvPushHal hal;
+    const struct _NvPushImports *pImports;
+
+} NvPushDeviceRec, *NvPushDevicePtr;
+
+
+typedef struct _NvPushChannelSegmentRec
+{
+    NvU32               freeDwords;  // free space (in dwords)
+    NvU32               sizeInBytes; // Push buffer size (in bytes)
+    NvU32               putOffset;   // Offset of last kickoff
+    NvPushChannelUnion *base;        // Push buffer start pointer
+    NvPushChannelUnion *buffer;      // Push buffer current pointer
+    NvU64               gpuMapOffset;
+} NvPushChannelSegmentRec, *NvPushChannelSegmentPtr;
+
+struct _NvPushChannelRec
+{
+    NvBool          initialized              : 1;
+    NvBool          logNvDiss                : 1;
+    NvBool          noTimeout                : 1;
+    NvBool          ignoreChannelErrors      : 1;
+    NvBool          channelErrorOccurred     : 1;
+
+    NvU32           channelHandle[NV_MAX_SUBDEVICES];
+    NvU32           pushbufferHandle;
+    NvU32           pushbufferVAHandle[NV_MAX_SUBDEVICES];
+    NvPushChannelSegmentRec main;
+
+    void           *control[NV_MAX_SUBDEVICES];
+    NvU32           numGpFifoEntries;
+    NvU32          *gpfifo;  // GPFIFO entries
+    NvU32           gpPutOffset; // GPFIFO entries last kicked off offset
+    NvU32           currentSubDevMask;
+
+    NvPushChannelSegmentRec progressTracker;
+    struct {
+        NvU32       handle[NV_MAX_SUBDEVICES];
+        void       *ptr[NV_MAX_SUBDEVICES];
+        NvU64       gpuVA;
+    } progressSemaphore;
+
+    struct {
+        NvU32 hMemory;
+    } userD[NV_MAX_SUBDEVICES];
+
+    struct {
+        NvU8            num;
+        NvU32           memoryHandle;
+        NvNotification *cpuAddress;
+        NvU64           gpuAddress;
+        NvU32           errorCtxDma;
+    } notifiers;
+
+    NvPushDeviceRec *pDevice;
+};
+
+/* Opaque type, only used by pointer within the push buffer utility library. */
+typedef struct _NvPushImportEvent NvPushImportEvent;
+
+/* Table of function pointers to be provided by the nvidia-push host driver. */
+typedef struct _NvPushImports {
+
+    NvU32  (*rmApiControl)          (NvPushDevicePtr pDevice,
+                                     NvU32 hObject,
+                                     NvU32 cmd,
+                                     void *pParams,
+                                     NvU32 paramsSize);
+
+    NvU32  (*rmApiAlloc)            (NvPushDevicePtr pDevice,
+                                     NvU32 hParent,
+                                     NvU32 hObject,
+                                     NvU32 hClass,
+                                     void *pAllocParams);
+
+    NvU32  (*rmApiFree)             (NvPushDevicePtr pDevice,
+                                     NvU32 hParent,
+                                     NvU32 hObject);
+
+    NvU32  (*rmApiMapMemoryDma)     (NvPushDevicePtr pDevice,
+                                     NvU32 hDevice,
+                                     NvU32 hDma,
+                                     NvU32 hMemory,
+                                     NvU64 offset,
+                                     NvU64 length,
+                                     NvU32 flags,
+                                     NvU64 *pDmaOffset);
+
+    NvU32  (*rmApiUnmapMemoryDma)   (NvPushDevicePtr pDevice,
+                                     NvU32 hDevice,
+                                     NvU32 hDma,
+                                     NvU32 hMemory,
+                                     NvU32 flags,
+                                     NvU64 dmaOffset);
+
+    NvU32  (*rmApiAllocMemory64)    (NvPushDevicePtr pDevice,
+                                     NvU32 hParent,
+                                     NvU32 hMemory,
+                                     NvU32 hClass,
+                                     NvU32 flags,
+                                     void **ppAddress,
+                                     NvU64 *pLimit);
+
+    NvU32  (*rmApiVidHeapControl)   (NvPushDevicePtr pDevice,
+                                     void *pVidHeapControlParms);
+
+    NvU32  (*rmApiMapMemory)        (NvPushDevicePtr pDevice,
+                                     NvU32 hDevice,
+                                     NvU32 hMemory,
+                                     NvU64 offset,
+                                     NvU64 length,
+                                     void **ppLinearAddress,
+                                     NvU32 flags);
+
+    NvU32  (*rmApiUnmapMemory)      (NvPushDevicePtr pDevice,
+                                     NvU32 hDevice,
+                                     NvU32 hMemory,
+                                     void *pLinearAddress,
+                                     NvU32 flags);
+
+    NvU64  (*getMilliSeconds)       (NvPushDevicePtr pDevice);
+
+    void   (*yield)                 (NvPushDevicePtr pDevice);
+
+    NvBool (*waitForEvent)          (NvPushDevicePtr pDevice,
+                                     NvPushImportEvent *pEvent,
+                                     NvU64 timeout);
+
+    void   (*emptyEventFifo)        (NvPushDevicePtr pDevice,
+                                     NvPushImportEvent *pEvent);
+
+    void   (*channelErrorOccurred)  (NvPushChannelPtr pChannel, NvU32 channelErrCode);
+
+    void   (*pushbufferWrapped)     (NvPushChannelPtr pChannel);
+
+    void   (*logError)              (NvPushDevicePtr pDevice,
+                                     NV_PUSH_PRINTF_FORMAT_ARGUMENT const char *fmt, ...)
+        NV_PUSH_PRINTF_ATTRIBUTES(2,3);
+
+    /*
+     * The logNvDiss() import, in DEBUG builds, logs strings to be
+     * parsed by nvdiss.  Note that multiple nvPushImportLogNvDiss()
+     * calls may be used to build one line of output (so, respect the
+     * newlines provided in the strings).
+     */
+#if defined(DEBUG)
+    void   (*logNvDiss)              (NvPushChannelPtr pChannel,
+                                      NV_PUSH_PRINTF_FORMAT_ARGUMENT const char *fmt, ...)
+        NV_PUSH_PRINTF_ATTRIBUTES(2,3);
+#endif
+
+} NvPushImports;
+
+
+void __nvPushMakeRoom(NvPushChannelPtr, NvU32 count);
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif /* __NVIDIA_PUSH_TYPES_H__ */
--- a/src/common/unix/nvidia-push/interface/nvidia-push-utils.h
+++ b/src/common/unix/nvidia-push/interface/nvidia-push-utils.h
@@ -0,0 +1,180 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* This file contains push buffer utility functions and declarations */
+
+#ifndef __NVIDIA_PUSH_UTILS_H__
+#define __NVIDIA_PUSH_UTILS_H__
+
+#include "nvidia-push-types.h"
+#include "nvlimits.h"
+
+#include "class/cla16f.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline NvBool nvPushIsAModel(const NvPushDeviceRec *pDevice)
+{
+    return FALSE;
+}
+
+
+/* declare prototypes: */
+NvBool nvPushCheckChannelError(NvPushChannelPtr pChannel);
+void nvPushKickoff(NvPushChannelPtr);
+NvBool nvPushIdleChannelTest(NvPushChannelPtr pChannel, NvU32 timeoutMSec);
+NvBool nvPushIdleChannel(NvPushChannelPtr);
+
+void nvPushWaitForNotifier(
+    NvPushChannelPtr pChannel,
+    NvU32 notifierIndex,
+    NvU32 subdeviceMask,
+    NvBool yield,
+    NvPushImportEvent *pEvent,
+    int id);
+
+void nvPushReleaseTimelineSemaphore(
+    NvPushChannelPtr p,
+    void *cpuAddress,
+    NvU64 gpuAddress,
+    NvU64 val);
+
+void nvPushAcquireTimelineSemaphore(
+    NvPushChannelPtr p,
+    NvU64 gpuAddress,
+    NvU64 val);
+
+NvBool nvPushDecodeMethod(NvU32 header, NvU32 *count);
+void nvPushSetObject(NvPushChannelPtr p, NvU32 subch, NvU32 object[NV_MAX_SUBDEVICES]);
+void nvPushSetSubdeviceMask(NvPushChannelPtr p, NvU32 mask);
+void __nvPushMakeRoom(NvPushChannelPtr, NvU32 count);
+
+#define NV_PUSH_SUBDEVICE_MASK_PRIMARY 0x00000001
+#define NV_PUSH_SUBDEVICE_MASK_ALL DRF_MASK(NVA16F_DMA_SET_SUBDEVICE_MASK_VALUE)
+
+/*
+ * Evaluates to TRUE if the two subDevMasks are equivalent for the given SLI
+ * device
+ */
+static inline NvBool nvPushSubDeviceMaskEquiv(
+    const NvPushDeviceRec *pDevice,
+    NvU32 maskA,
+    NvU32 maskB)
+{
+    const NvU32 allSubDevices = (1 << pDevice->numSubDevices) - 1;
+
+    return (maskA & allSubDevices) == (maskB & allSubDevices);
+}
+
+/* Evaluates to TRUE if subDevMask will write to all of the GPUs */
+static inline NvBool nvPushSubDeviceMaskAllActive(
+    const NvPushDeviceRec *pDevice,
+    NvU32 subDevMask)
+{
+    return nvPushSubDeviceMaskEquiv(pDevice, subDevMask,
+                                    NV_PUSH_SUBDEVICE_MASK_ALL);
+}
+
+#define NV_PUSH_NOTIFIER_INTERNAL_BIT 0x80
+ct_assert(NV_PUSH_NOTIFIER_INTERNAL_BIT >=
+          NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1);
+#define NV_PUSH_ERROR_NOTIFIER_INDEX \
+            (NV_PUSH_NOTIFIER_INTERNAL_BIT | \
+             NV_CHANNELGPFIFO_NOTIFICATION_TYPE_ERROR)
+#define NV_PUSH_TOKEN_NOTIFIER_INDEX \
+            (NV_PUSH_NOTIFIER_INTERNAL_BIT | \
+             NV_CHANNELGPFIFO_NOTIFICATION_TYPE_WORK_SUBMIT_TOKEN)
+
+/*
+ * Notifiers for use by nvidia-push, not exposed to clients:
+ * NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1: defined by RM
+ * NV_MAX_SUBDEVICES: one for each subdevice to track work submission token
+ */
+#define NV_PUSH_NUM_INTERNAL_NOTIFIERS \
+    (NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1 + NV_MAX_SUBDEVICES)
+
+static inline NvU32 __nvPushGetNotifierRawIndex(
+    const NvPushDeviceRec *pDevice,
+    NvU32 notifierIndex,
+    NvU32 sd)
+{
+    if (notifierIndex & NV_PUSH_NOTIFIER_INTERNAL_BIT) {
+        return notifierIndex & ~NV_PUSH_NOTIFIER_INTERNAL_BIT;
+    } else {
+        return (notifierIndex * pDevice->numSubDevices) + sd +
+                NV_PUSH_NUM_INTERNAL_NOTIFIERS;
+    }
+}
+
+static inline NvNotification *nvPushGetNotifierCpuAddress(
+    const NvPushChannelRec *pChannel,
+    NvU32 notifierIndex,
+    NvU32 sd)
+{
+    const NvU32 rawIndex =
+        __nvPushGetNotifierRawIndex(pChannel->pDevice, notifierIndex, sd);
+
+    return &pChannel->notifiers.cpuAddress[rawIndex];
+}
+
+static inline NvU64 nvPushGetNotifierGpuAddress(
+    const NvPushChannelRec *pChannel,
+    NvU32 notifierIndex,
+    NvU32 sd)
+{
+    const NvU32 rawIndex =
+        __nvPushGetNotifierRawIndex(pChannel->pDevice, notifierIndex, sd);
+    const size_t offset = rawIndex * sizeof(NvNotification);
+
+    return pChannel->notifiers.gpuAddress + offset;
+}
+
+
+extern NvU32 nvPushReadGetOffset(NvPushChannelPtr push_buffer, NvBool minimum);
+
+
+/*!
+ * Make room in the pushbuffer, checking for errors.
+ *
+ * If a channel error occurred, channelErrorOccurred is set to TRUE.
+ * nvPushCheckForRoomAndErrors() is designed to be called just before a
+ * nvPushMethod() with the same size.
+ */
+static inline void nvPushCheckForRoomAndErrors(
+    NvPushChannelPtr pChannel,
+    NvU32 count)
+{
+    pChannel->channelErrorOccurred = FALSE;
+
+    if (pChannel->main.freeDwords < (count + 1)) {
+        __nvPushMakeRoom(pChannel, count + 1);
+    }
+}
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif /* __NVIDIA_PUSH_UTILS_H__ */