525.53

2026-02-25 09:23:59 +00:00 · 2022-11-10 08:39:33 -08:00
parent 7c345b838b
commit 758b4ee818
1323 changed files with 262135 additions and 60754 deletions
--- a/src/common/unix/nvidia-push/interface/nvidia-push-methods.h
+++ b/src/common/unix/nvidia-push/interface/nvidia-push-methods.h
@@ -0,0 +1,247 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This file contains macros and inline functions used to actually program
+ * methods.
+ */
+
+#ifndef __NVIDIA_PUSH_METHODS_H__
+#define __NVIDIA_PUSH_METHODS_H__
+
+#include "nvidia-push-types.h"
+
+#include "class/cla16f.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void __nvPushSetMethodDataSegment(NvPushChannelSegmentPtr s, const NvU32 data)
+{
+    s->buffer->u = data;
+    s->buffer++;
+}
+
+static inline void nvPushSetMethodData(NvPushChannelPtr p, const NvU32 data)
+{
+    __nvPushSetMethodDataSegment(&p->main, data);
+}
+
+#if NV_PUSH_ALLOW_FLOAT
+static inline void __nvPushSetMethodDataSegmentF(NvPushChannelSegmentPtr s, const float data)
+{
+    s->buffer->f = data;
+    s->buffer++;
+}
+
+static inline void nvPushSetMethodDataF(NvPushChannelPtr p, const float data)
+{
+    __nvPushSetMethodDataSegmentF(&p->main, data);
+}
+#endif
+
+static inline void __nvPushSetMethodDataSegmentU64(NvPushChannelSegmentPtr s, const NvU64 data)
+{
+    __nvPushSetMethodDataSegment(s, NvU64_HI32(data));
+    __nvPushSetMethodDataSegment(s, NvU64_LO32(data));
+}
+
+static inline void nvPushSetMethodDataU64(NvPushChannelPtr p, const NvU64 data)
+{
+    __nvPushSetMethodDataSegmentU64(&p->main, data);
+}
+
+void __nvPushMoveDWORDS(NvU32* dst, const NvU32* src, int dwords);
+
+static inline void
+nvDmaMoveDWORDS(NvPushChannelUnion *dst, const NvU32* src, int dwords)
+{
+    // The 'dst' argument is an array of NvPushChannelUnion; it is safe
+    // to treat this as an array of NvU32, as long as NvU32 and
+    // NvPushChannelUnion are the same size.
+    ct_assert(sizeof(NvU32) == sizeof(NvPushChannelUnion));
+    __nvPushMoveDWORDS((NvU32 *)dst, src, dwords);
+}
+
+static inline void nvPushInlineData(NvPushChannelPtr p, const void *data,
+                                    size_t dwords)
+{
+    nvDmaMoveDWORDS(p->main.buffer, (const NvU32 *)data, dwords);
+    p->main.buffer += dwords;
+}
+
+/*!
+ * Return the maximum method count: the maximum number of dwords that can be
+ * specified in the nvPushMethod() family of macros.
+ */
+static inline NvU32 nvPushMaxMethodCount(const NvPushChannelRec *p)
+{
+    /*
+     * The number of methods that can be specified in one NVA16F_DMA_METHOD
+     * header is limited by the bit field size of NVA16F_DMA_METHOD_COUNT: 28:16
+     * (i.e., maximum representable value 8191).
+     */
+    const NvU32 maxFromMethodCountMask = DRF_MASK(NVA16F_DMA_METHOD_COUNT);
+
+    /*
+     * Further, the method count must be smaller than half the total pushbuffer
+     * size minus one, to correctly distinguish empty and full pushbuffers.  See
+     * nvPushHeader() for details.
+     */
+    const NvU32 pushBufferSizeInBytes = p->main.sizeInBytes;
+    const NvU32 pushBufferSizeInDWords = pushBufferSizeInBytes / 4;
+    const NvU32 pushBufferHalfSizeInDWords = pushBufferSizeInDWords / 2;
+
+    /*
+     * Subtract two from pushBufferHalfSizeInDWords:
+     *
+     * -1 to distinguish pushbuffer empty from full (see above).
+     *
+     * -1 to be smaller than, rather than equal to, the above constraints.
+     */
+    const NvU32 maxFromPushBufferSize = pushBufferHalfSizeInDWords - 2;
+
+    return NV_MIN(maxFromMethodCountMask, maxFromPushBufferSize);
+}
+
+// These macros verify that the values used in the methods fits
+// into the defined ranges.
+#define ASSERT_DRF_DEF(d, r, f, n) \
+    nvAssert(!(~DRF_MASK(NV ## d ## r ## f) & (NV ## d ## r ## f ## n)))
+#define ASSERT_DRF_NUM(d, r, f, n) \
+    nvAssert(!(~DRF_MASK(NV ## d ## r ## f) & (n)))
+
+#if defined(DEBUG)
+#include "class/clc36f.h"    /* VOLTA_CHANNEL_GPFIFO_A */
+
+/*
+ * When pushing GPFIFO methods (NVA16F_SEMAPHORE[ABCD]), all four
+ * methods must be pushed together.  If the four methods are not
+ * pushed together, nvidia-push might wrap, injecting its progress
+ * tracking semaphore release methods in the middle, and perturb the
+ * NVA16F_SEMAPHOREA_OFFSET_UPPER and NVA16F_SEMAPHOREB_OFFSET_LOWER
+ * channel state.
+ *
+ * Return whether the methods described by the arguments include some,
+ * but not all, of A, B, C, and D.  I.e., if the range starts at B, C,
+ * or D, or if the range ends at A, B, or C.
+ *
+ * Perform a similar check for Volta+ semaphore methods
+ * NVC36F_SEM_ADDR_LO..NVC36F_SEM_EXECUTE.  Note that we always check for both
+ * sets of methods, regardless of the GPU we're actually running on.  This is
+ * okay since:
+ * a) the NVC36F_SEM_ADDR_LO..NVC36F_SEM_EXECUTE method offsets were not used
+ *    for anything from (a16f..c36f].
+ * b) the SEMAPHORE[ABCD] methods still exist on the newer classes (they
+ *    haven't been reused for anything else)
+ */
+static inline NvBool __nvPushStartSplitsSemaphore(
+    NvU32 method,
+    NvU32 count,
+    NvU32 secOp)
+{
+    ct_assert(NVA16F_SEMAPHOREA < NVA16F_SEMAPHORED);
+    ct_assert(NVC36F_SEM_ADDR_LO < NVC36F_SEM_EXECUTE);
+
+    /*
+     * compute start and end as inclusive; if not incrementing, we
+     * assume end==start
+     */
+    const NvU32 start = method;
+    const NvU32 end = (secOp == NVA16F_DMA_SEC_OP_INC_METHOD) ?
+        (method + ((count - 1) * 4)) : method;
+
+    return ((start >  NVA16F_SEMAPHOREA)  && (start <= NVA16F_SEMAPHORED))  ||
+           ((end   >= NVA16F_SEMAPHOREA)  && (end   <  NVA16F_SEMAPHORED))  ||
+           ((start >  NVC36F_SEM_ADDR_LO) && (start <= NVC36F_SEM_EXECUTE)) ||
+           ((end   >= NVC36F_SEM_ADDR_LO) && (end   <  NVC36F_SEM_EXECUTE));
+}
+#endif /* DEBUG */
+
+/*
+ * Note that _count+1 must be less than half the total pushbuffer size.  This is
+ * required by GPFIFO because we can't reliably tell when we can write all the
+ * way to the end of the pushbuffer if we wrap (see bug 232454).  This
+ * assumption ensures that there will be enough space once GET reaches PUT.
+ */
+#define nvPushHeader(_push_buffer, _segment, _count, _header) do {    \
+    NvPushChannelSegmentPtr _pSegment = &(_push_buffer)->_segment;    \
+    nvAssert(((_count)+1) < ((_pSegment)->sizeInBytes / 8));          \
+    if ((_pSegment)->freeDwords < ((_count)+1))                       \
+        __nvPushMakeRoom((_push_buffer), (_count) + 1);               \
+    __nvPushSetMethodDataSegment((_pSegment), (_header));             \
+    (_pSegment)->freeDwords -= ((_count)+1);                          \
+} while(0)
+
+#define __nvPushStart(_push_buffer, _segment, _subch, _offset, _count, _opcode) \
+{                                                                     \
+    nvAssert(!__nvPushStartSplitsSemaphore(                           \
+                                      (_offset),                      \
+                                      (_count),                       \
+                                      NVA16F_DMA_SEC_OP ## _opcode)); \
+    ASSERT_DRF_DEF(A16F, _DMA, _SEC_OP, _opcode);                     \
+    ASSERT_DRF_NUM(A16F, _DMA, _METHOD_COUNT, _count);                \
+    ASSERT_DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL, _subch);           \
+    ASSERT_DRF_NUM(A16F, _DMA, _METHOD_ADDRESS, (_offset) >> 2);      \
+    nvPushHeader((_push_buffer), _segment, (_count),                  \
+        DRF_DEF(A16F, _DMA, _SEC_OP,               _opcode)  |        \
+        DRF_NUM(A16F, _DMA, _METHOD_COUNT,         _count)   |        \
+        DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL,    _subch)   |        \
+        DRF_NUM(A16F, _DMA, _METHOD_ADDRESS,    (_offset) >> 2));     \
+}
+
+// The GPU can encode a 13-bit constant method/data pair in a single DWORD.
+#define nvPushImmedValSegment(_push_buffer, _segment, _subch, _offset, _data) { \
+    ASSERT_DRF_NUM(A16F, _DMA, _IMMD_DATA, _data);                    \
+    ASSERT_DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL, _subch);           \
+    ASSERT_DRF_NUM(A16F, _DMA, _METHOD_ADDRESS, (_offset) >> 2);      \
+    if ((_push_buffer)->_segment.freeDwords < 1)                      \
+        __nvPushMakeRoom((_push_buffer), 1);                          \
+    __nvPushSetMethodDataSegment(&(_push_buffer)->_segment,           \
+        DRF_DEF(A16F, _DMA, _SEC_OP,     _IMMD_DATA_METHOD)  |        \
+        DRF_NUM(A16F, _DMA, _IMMD_DATA,             _data)   |        \
+        DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL,    _subch)   |        \
+        DRF_NUM(A16F, _DMA, _METHOD_ADDRESS,    (_offset) >> 2));     \
+    (_push_buffer)->_segment.freeDwords--;                            \
+}
+
+#define nvPushImmedVal(_push_buffer, _subch, _offset, _data) \
+    nvPushImmedValSegment(_push_buffer, main, _subch, _offset, _data)
+
+#define nvPushImmed(_push_buffer, _subch, _offset, _val) \
+    nvPushImmedVal(_push_buffer, _subch, _offset, _offset##_V_##_val)
+
+// Method headers.
+#define nvPushMethod(_push_buffer, _subch, _offset, _count) \
+    __nvPushStart(_push_buffer, main, _subch, _offset, _count, _INC_METHOD)
+#define nvPushMethodNoIncr(_push_buffer, _subch, _offset, _count) \
+    __nvPushStart(_push_buffer, main, _subch, _offset, _count, _NON_INC_METHOD)
+#define nvPushMethodOneIncr(_push_buffer, _subch, _offset, _count) \
+    __nvPushStart(_push_buffer, main, _subch, _offset, _count, _ONE_INC)
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif /* __NVIDIA_PUSH_METHODS_H__ */