525.53

2026-02-09 01:29:57 +00:00 · 2022-11-10 08:39:33 -08:00
parent 7c345b838b
commit 758b4ee818
1323 changed files with 262135 additions and 60754 deletions
--- a/src/common/unix/common/inc/nv_amodel_enum.h
+++ b/src/common/unix/common/inc/nv_amodel_enum.h
@@ -0,0 +1,42 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NV_AMODEL_ENUM_H__
+#define __NV_AMODEL_ENUM_H__
+
+
+
+typedef enum _NVAModelConfig {
+    NV_AMODEL_NONE = 0,
+    NV_AMODEL_KEPLER,
+    NV_AMODEL_KEPLER_SM35,
+    NV_AMODEL_MAXWELL,
+    NV_AMODEL_PASCAL,
+    NV_AMODEL_VOLTA,
+    NV_AMODEL_TURING,
+    NV_AMODEL_AMPERE,
+    NV_AMODEL_ADA,
+    NV_AMODEL_HOPPER,
+} NVAModelConfig;
+
+#endif /* __NV_AMODEL_ENUM_H__ */
--- a/src/common/unix/nvidia-3d/include/nv_xz_mem_hooks.h
+++ b/src/common/unix/nvidia-3d/include/nv_xz_mem_hooks.h
@@ -0,0 +1,44 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NV_XZ_MEM_HOOKS_H__
+#define __NV_XZ_MEM_HOOKS_H__
+
+/*
+ * This file is included by xz_config.h when NV_XZ_CUSTOM_MEM_HOOKS is defined,
+ * allowing us to override xzminidec's standard library use.
+ */
+
+#include "nvidia-3d-imports.h"
+
+#define kmalloc(size, flags) nv3dImportAlloc(size)
+#define kfree(ptr)           nv3dImportFree(ptr)
+#define vmalloc(size)        nv3dImportAlloc(size)
+#define vfree(ptr)           nv3dImportFree(ptr)
+
+#define memeq(a, b, size)   (nv3dImportMemCmp(a, b, size) == 0)
+#define memzero(buf, size)   nv3dImportMemSet(buf, 0, size)
+#define memcpy(a, b, size)   nv3dImportMemCpy(a, b, size)
+#define memmove(a, b, size)  nv3dImportMemMove(a, b, size)
+
+#endif /* __NV_XZ_MEM_HOOKS_H__ */
--- a/src/common/unix/nvidia-3d/include/nvidia-3d-fermi.h
+++ b/src/common/unix/nvidia-3d/include/nvidia-3d-fermi.h
@@ -0,0 +1,41 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVIDIA_3D_FERMI_H__
+#define __NVIDIA_3D_FERMI_H__
+
+#include "nvidia-3d-types.h"
+
+void _nv3dSetProgramOffsetFermi(
+    Nv3dChannelRec *p3dChannel,
+    NvU32 stage,
+    NvU32 offset);
+void _nv3dInvalidateTexturesFermi(
+    Nv3dChannelRec *p3dChannel);
+void _nv3dSetVertexStreamEndFermi(
+    Nv3dChannelPtr p3dChannel,
+    enum Nv3dVertexAttributeStreamType stream,
+    const Nv3dVertexAttributeStreamRec *pStream);
+
+#endif /* __NVIDIA_3D_FERMI__ */
+
--- a/src/common/unix/nvidia-3d/include/nvidia-3d-hopper.h
+++ b/src/common/unix/nvidia-3d/include/nvidia-3d-hopper.h
@@ -0,0 +1,35 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVIDIA_3D_HOPPER_H__
+#define __NVIDIA_3D_HOPPER_H__
+
+#include "nvidia-3d-types.h"
+
+void _nv3dInitChannelHopper(Nv3dChannelRec *p3dChannel);
+
+void _nv3dAssignNv3dTextureHopper(
+    Nv3dRenderTexInfo info,
+    Nv3dTexture *tex);
+
+#endif /* __NVIDIA_3D_HOPPER_H__ */
--- a/src/common/unix/nvidia-3d/include/nvidia-3d-kepler.h
+++ b/src/common/unix/nvidia-3d/include/nvidia-3d-kepler.h
@@ -0,0 +1,45 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVIDIA_3D_KEPLER_H__
+#define __NVIDIA_3D_KEPLER_H__
+
+#include "nvidia-3d-types.h"
+
+void _nv3dSetSpaVersionKepler(Nv3dChannelRec *p3dChannel);
+
+void _nv3dInitChannelKepler(Nv3dChannelRec *p3dChannel);
+
+void _nv3dUploadDataInlineKepler(
+    Nv3dChannelRec *p3dChannel,
+    NvU64 gpuBaseAddress,
+    size_t offset,
+    const void *data,
+    size_t bytes);
+void _nv3dBindTexturesKepler(
+    Nv3dChannelPtr p3dChannel,
+    int programIndex,
+    const int *textureBindingIndices);
+
+#endif /* __NVIDIA_3D_KEPLER__ */
+
--- a/src/common/unix/nvidia-3d/include/nvidia-3d-maxwell.h
+++ b/src/common/unix/nvidia-3d/include/nvidia-3d-maxwell.h
@@ -0,0 +1,35 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVIDIA_3D_MAXWELL_H__
+#define __NVIDIA_3D_MAXWELL_H__
+
+#include "nvidia-3d-types.h"
+
+void _nv3dInitChannelMaxwell(Nv3dChannelRec *p3dChannel);
+
+void _nv3dAssignNv3dTextureMaxwell(
+    Nv3dRenderTexInfo info,
+    Nv3dTexture *tex);
+
+#endif /* __NVIDIA_3D_MAXWELL__ */
--- a/src/common/unix/nvidia-3d/include/nvidia-3d-pascal.h
+++ b/src/common/unix/nvidia-3d/include/nvidia-3d-pascal.h
@@ -0,0 +1,35 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVIDIA_3D_PASCAL_H__
+#define __NVIDIA_3D_PASCAL_H__
+
+#include "nvidia-3d-types.h"
+
+void _nv3dInitChannelPascal(Nv3dChannelRec *p3dChannel);
+
+void _nv3dAssignNv3dTexturePascal(
+    Nv3dRenderTexInfo info,
+    Nv3dTexture *tex);
+
+#endif /* __NVIDIA_3D_PASCAL__ */
--- a/src/common/unix/nvidia-3d/include/nvidia-3d-surface.h
+++ b/src/common/unix/nvidia-3d/include/nvidia-3d-surface.h
@@ -0,0 +1,33 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVIDIA_3D_SURFACE_H__
+#define __NVIDIA_3D_SURFACE_H__
+
+#include "nvidia-3d-types.h"
+
+void _nv3dAssignSurfaceOffsets(
+    const Nv3dAllocChannelStateParams *pParams,
+    Nv3dChannelPtr p3dChannel);
+
+#endif /* __NVIDIA_3D_SURFACE_H__ */
--- a/src/common/unix/nvidia-3d/include/nvidia-3d-turing.h
+++ b/src/common/unix/nvidia-3d/include/nvidia-3d-turing.h
@@ -0,0 +1,35 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVIDIA_3D_TURING_H__
+#define __NVIDIA_3D_TURING_H__
+
+#include "nvidia-3d-types.h"
+
+void _nv3dInitChannelTuring(Nv3dChannelRec *p3dChannel);
+void _nv3dSetVertexStreamEndTuring(
+    Nv3dChannelPtr p3dChannel,
+    enum Nv3dVertexAttributeStreamType stream,
+    const Nv3dVertexAttributeStreamRec *pStream);
+
+#endif /* __NVIDIA_3D_TURING__ */
--- a/src/common/unix/nvidia-3d/include/nvidia-3d-types-priv.h
+++ b/src/common/unix/nvidia-3d/include/nvidia-3d-types-priv.h
@@ -0,0 +1,48 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVIDIA_3D_TYPES_PRIV_H__
+#define __NVIDIA_3D_TYPES_PRIV_H__
+
+#include "nvidia-3d-types.h"
+
+struct _Nv3dHal {
+    void   (*setSpaVersion)         (Nv3dChannelRec *p3dChannel);
+    void   (*initChannel)           (Nv3dChannelRec *p3dChannel);
+    void   (*uploadDataInline)      (Nv3dChannelRec *p3dChannel,
+                                     NvU64 gpuBaseAddress,
+                                     size_t offset,
+                                     const void *data,
+                                     size_t bytes);
+    void   (*setProgramOffset)      (Nv3dChannelRec *p3dChannel,
+                                     NvU32 stage,
+                                     NvU32 offset);
+    void   (*assignNv3dTexture)     (Nv3dRenderTexInfo info,
+                                     Nv3dTexture *tex);
+    void   (*setVertexStreamEnd)    (Nv3dChannelPtr p3dChannel,
+                                     enum Nv3dVertexAttributeStreamType stream,
+                                     const Nv3dVertexAttributeStreamRec *pStream);
+};
+
+#endif /* __NVIDIA_3D_TYPES_PRIV_H__ */
+
--- a/src/common/unix/nvidia-3d/include/nvidia-3d-vertex-arrays.h
+++ b/src/common/unix/nvidia-3d/include/nvidia-3d-vertex-arrays.h
@@ -0,0 +1,32 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVIDIA_3D_VERTEX_ARRAYS_H__
+#define __NVIDIA_3D_VERTEX_ARRAYS_H__
+
+#include "nvidia-3d-types.h"
+
+void _nv3dInitializeStreams(
+    Nv3dChannelRec *p3dChannel);
+
+#endif /* __NVIDIA_3D_VERTEX_ARRAYS_H__ */
--- a/src/common/unix/nvidia-3d/include/nvidia-3d-volta.h
+++ b/src/common/unix/nvidia-3d/include/nvidia-3d-volta.h
@@ -0,0 +1,35 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVIDIA_3D_VOLTA_H__
+#define __NVIDIA_3D_VOLTA_H__
+
+#include "nvidia-3d-types.h"
+
+void _nv3dSetProgramOffsetVolta(
+    Nv3dChannelRec *p3dChannel,
+    NvU32 stage,
+    NvU32 offset);
+
+#endif /* __NVIDIA_3D_VOLTA__ */
+
--- a/src/common/unix/nvidia-3d/interface/nvidia-3d-color-targets.h
+++ b/src/common/unix/nvidia-3d/interface/nvidia-3d-color-targets.h
@@ -0,0 +1,93 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVIDIA_3D_COLOR_TARGETS_H__
+#define __NVIDIA_3D_COLOR_TARGETS_H__
+
+#include "nvidia-3d.h"
+
+#include <class/cl9097.h>
+#include <class/cla06fsubch.h>
+
+/*
+ * This header file defines static inline functions to manage 3D class
+ * color targets.
+ */
+
+static inline void nv3dSelectColorTarget(
+    Nv3dChannelPtr p3dChannel,
+    NvU8 colorTargetIndex)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_CT_SELECT,
+        NV3D_V(9097, SET_CT_SELECT, TARGET_COUNT, 1)           |
+        NV3D_V(9097, SET_CT_SELECT, TARGET0, colorTargetIndex) |
+        NV3D_V(9097, SET_CT_SELECT, TARGET1, 0)                |
+        NV3D_V(9097, SET_CT_SELECT, TARGET2, 0)                |
+        NV3D_V(9097, SET_CT_SELECT, TARGET3, 0)                |
+        NV3D_V(9097, SET_CT_SELECT, TARGET4, 0)                |
+        NV3D_V(9097, SET_CT_SELECT, TARGET5, 0)                |
+        NV3D_V(9097, SET_CT_SELECT, TARGET6, 0)                |
+        NV3D_V(9097, SET_CT_SELECT, TARGET7, 0));
+}
+
+static inline void nv3dSetColorTarget(
+    Nv3dChannelPtr p3dChannel,
+    NvU8 colorTargetIndex,
+    NvU32 surfaceFormat,
+    NvU64 surfaceGpuAddress,
+    NvBool blockLinear,
+    Nv3dBlockLinearLog2GobsPerBlock gobsPerBlock,
+    NvU32 surfaceWidth,
+    NvU32 surfaceHeight)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    const NvU32 memoryInfo =
+        blockLinear ?
+        (NV3D_V(9097, SET_COLOR_TARGET_MEMORY, BLOCK_WIDTH,  gobsPerBlock.x) |
+         NV3D_V(9097, SET_COLOR_TARGET_MEMORY, BLOCK_HEIGHT, gobsPerBlock.y) |
+         NV3D_V(9097, SET_COLOR_TARGET_MEMORY, BLOCK_DEPTH,  gobsPerBlock.z) |
+         NV3D_C(9097, SET_COLOR_TARGET_MEMORY, LAYOUT, BLOCKLINEAR)) :
+        NV3D_C(9097, SET_COLOR_TARGET_MEMORY, LAYOUT, PITCH);
+
+    if (surfaceFormat == NV9097_SET_COLOR_TARGET_FORMAT_V_DISABLED) {
+        // Disable this color target.
+        nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D,
+            NV9097_SET_COLOR_TARGET_FORMAT(colorTargetIndex),
+            NV9097_SET_COLOR_TARGET_FORMAT_V_DISABLED);
+        return;
+    }
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_COLOR_TARGET_A(colorTargetIndex), 6);
+
+    nvPushSetMethodDataU64(p, surfaceGpuAddress);
+    nvPushSetMethodData(p, surfaceWidth);
+    nvPushSetMethodData(p, surfaceHeight);
+    nvPushSetMethodData(p, surfaceFormat);
+    nvPushSetMethodData(p, memoryInfo);
+}
+
+#endif /* __NVIDIA_3D_COLOR_TARGETS_H__ */
--- a/src/common/unix/nvidia-3d/interface/nvidia-3d-constant-buffers.h
+++ b/src/common/unix/nvidia-3d/interface/nvidia-3d-constant-buffers.h
@@ -0,0 +1,196 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVIDIA_3D_CONSTANT_BUFFERS_H__
+#define __NVIDIA_3D_CONSTANT_BUFFERS_H__
+
+#include "nvidia-3d.h"
+
+#include <class/cl9097.h>
+#include <class/cla06fsubch.h>
+
+/*
+ * This header file defines static inline functions to manage 3D class
+ * constant buffers.
+ */
+
+
+static inline void nv3dSelectCbAddress(
+    Nv3dChannelRec *p3dChannel,
+    NvU64 offset,
+    NvU32 size)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    nvAssert(size > 0);
+    nvAssert(NV_IS_ALIGNED(size, NV3D_MIN_CONSTBUF_ALIGNMENT));
+    nvAssert(size <= 65536);
+    nvAssert(NV_IS_ALIGNED(offset, NV3D_MIN_CONSTBUF_ALIGNMENT));
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_CONSTANT_BUFFER_SELECTOR_A, 3);
+    nvPushSetMethodData(p, size);
+    nvPushSetMethodDataU64(p, offset);
+}
+
+/*!
+ * Select a constant buffer for binding or updating.
+ */
+static inline void nv3dSelectCb(
+    Nv3dChannelRec *p3dChannel,
+    int constantBufferIndex)
+{
+    const NvU64 gpuAddress =
+        nv3dGetConstantBufferGpuAddress(p3dChannel, constantBufferIndex);
+
+    nv3dSelectCbAddress(p3dChannel, gpuAddress, NV3D_CONSTANT_BUFFER_SIZE);
+}
+
+/*!
+ * Bind the selected Cb to a given slot (or invalidate that slot).
+ */
+static inline void nv3dBindCb(
+    Nv3dChannelRec *p3dChannel,
+    int bindGroup, // XXX TODO: this type should be NVShaderBindGroup
+    int slot,
+    NvBool valid)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    ASSERT_DRF_NUM(9097, _BIND_GROUP_CONSTANT_BUFFER, _SHADER_SLOT, slot);
+
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_BIND_GROUP_CONSTANT_BUFFER(bindGroup),
+        NV3D_V(9097, BIND_GROUP_CONSTANT_BUFFER, VALID, !!valid) |
+        NV3D_V(9097, BIND_GROUP_CONSTANT_BUFFER, SHADER_SLOT, slot));
+}
+
+/*!
+ * Push *only the header* to tell the GPU to "load" constants from the
+ * pushbuffer.
+ *
+ * \param[in]       p3dChannel          The nvidia-3d channel.
+ * \param[in]       offset              The offset in bytes of the start of the
+ *                                      updates.
+ * \param[in]       dwords              Count of dwords to be loaded (after the
+ *                                      header).
+ *
+ * \return          An NvPushChannelUnion pointing immediately after the
+ *                  header, with enough contiguous space to copy 'dwords' of
+ *                  data.
+ */
+static inline NvPushChannelUnion *nv3dLoadConstantsHeader(
+    Nv3dChannelRec *p3dChannel,
+    NvU32 offset,
+    size_t dwords)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+    NvPushChannelUnion *buffer;
+
+    nvAssert((dwords + 1) <= nvPushMaxMethodCount(p));
+
+    nvPushMethodOneIncr(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_LOAD_CONSTANT_BUFFER_OFFSET, dwords + 1);
+    nvPushSetMethodData(p, offset);
+
+    buffer = p->main.buffer;
+    p->main.buffer += dwords;
+
+    return buffer;
+}
+
+/*!
+ * Load an array of bytes into a constant buffer at a specified location.
+ *
+ * The count must be a multiple of 4 bytes.
+ *
+ * \param[in]       p3dChannel          The nvidia-3d channel.
+ * \param[in]       offset              The offset in bytes of the start of the
+ *                                      updates.
+ * \param[in]       bytes               Count of bytes to write.  Must be a
+ *                                      multiple of 4.
+ * \param[in]       values              Data to be written.
+ */
+static inline void nv3dLoadConstants(
+    Nv3dChannelRec *p3dChannel,
+    NvU32 offset,
+    size_t bytes,
+    const void *values)
+{
+    const size_t dwords = bytes / 4;
+    NvPushChannelUnion *buffer;
+
+    nvAssert((bytes & 3) == 0);
+
+    buffer = nv3dLoadConstantsHeader(p3dChannel, offset, dwords);
+
+    nvDmaMoveDWORDS(buffer, values, dwords);
+}
+
+/*!
+ * Set the current constant buffer's current byte offset, for use with
+ * nv3dPushConstants().
+ */
+static inline void nv3dSetConstantBufferOffset(
+    Nv3dChannelRec *p3dChannel,
+    NvU32 offset)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_LOAD_CONSTANT_BUFFER_OFFSET, 1);
+    nvPushSetMethodData(p, offset);
+}
+
+// Load an array of dwords into a constant buffer at the current location.  This
+// also advances the constant buffer load offset, so that multiple calls to
+// nv3dPushConstants will write to sequential memory addresses.
+static inline void nv3dPushConstants(
+    Nv3dChannelRec *p3dChannel,
+    size_t bytes,
+    const void *values)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+    const size_t dwords = bytes / 4;
+    nvAssert((bytes & 3) == 0);
+    nvAssert(dwords <= nvPushMaxMethodCount(p));
+
+    nvPushMethodNoIncr(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_LOAD_CONSTANT_BUFFER(0), dwords);
+    nvPushInlineData(p, values, dwords);
+}
+
+static inline void nv3dLoadSingleConstant(
+    Nv3dChannelRec *p3dChannel,
+    NvU32 offset,
+    NvU32 value)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_LOAD_CONSTANT_BUFFER_OFFSET, 2);
+    nvPushSetMethodData(p, offset);
+    nvPushSetMethodData(p, value);
+}
+
+#endif /* __NVIDIA_3D_CONSTANT_BUFFERS_H__ */
--- a/src/common/unix/nvidia-3d/interface/nvidia-3d-imports.h
+++ b/src/common/unix/nvidia-3d/interface/nvidia-3d-imports.h
@@ -0,0 +1,41 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * nvidia-3d-imports.h declares functions with nvidia-3d host drivers must
+ * provide.
+ */
+
+#ifndef __NVIDIA_3D_IMPORTS_H__
+#define __NVIDIA_3D_IMPORTS_H__
+
+#include <stddef.h> /* size_t */
+
+void *nv3dImportAlloc(size_t size);
+void nv3dImportFree(void *ptr);
+int nv3dImportMemCmp(const void *a, const void *b, size_t size);
+void nv3dImportMemSet(void *s, int c, size_t size);
+void nv3dImportMemCpy(void *dest, const void *src, size_t size);
+void nv3dImportMemMove(void *dest, const void *src, size_t size);
+
+#endif /* __NVIDIA_3D_IMPORTS_H__ */
--- a/src/common/unix/nvidia-3d/interface/nvidia-3d-shader-constants.h
+++ b/src/common/unix/nvidia-3d/interface/nvidia-3d-shader-constants.h
@@ -0,0 +1,53 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _NVIDIA_3D_SHADER_CONSTANTS_H_
+#define _NVIDIA_3D_SHADER_CONSTANTS_H_
+
+#if defined(NV3D_BUILD_AS_GLSL)
+
+  #define NV3D_CB_SLOT_FIRST_USER_BINDABLE 0
+
+#else
+
+  /* Shaders always use this slot for compiler-emitted constants.  This
+   * assumption is verified at ucode build time. */
+  #define NV3D_CB_SLOT_COMPILER 1
+
+  /* Offset between GLSL slot 0 and hardware slot */
+  #define NV3D_CB_SLOT_FIRST_USER_BINDABLE 3
+
+#endif
+
+/* This slot is used for most uniforms/constants defined in each shader */
+#define NV3D_CB_SLOT_MISC1 (NV3D_CB_SLOT_FIRST_USER_BINDABLE + 0)
+
+/* When needed (Kepler+), shaders always use this constant slot for bindless
+ * texture handles. */
+#define NV3D_CB_SLOT_BINDLESS_TEXTURE (NV3D_CB_SLOT_FIRST_USER_BINDABLE + 1)
+
+
+/* Matches __GL_PGM_UNUSED_TEXTURE_UNIT */
+#define NV3D_TEX_BINDING_UNUSED 255
+
+#endif /* _NVIDIA_3D_SHADER_CONSTANTS_H_ */
--- a/src/common/unix/nvidia-3d/interface/nvidia-3d-shaders.h
+++ b/src/common/unix/nvidia-3d/interface/nvidia-3d-shaders.h
@@ -0,0 +1,69 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2010-2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVIDIA_3D_SHADERS_H__
+#define __NVIDIA_3D_SHADERS_H__
+
+#include <nvtypes.h>
+#include <class/cl9097.h>
+
+// These are used in the "shader type" field below
+#define NV3D_SHADER_TYPE_VERTEX   NV9097_SET_PIPELINE_SHADER_TYPE_VERTEX
+#define NV3D_SHADER_TYPE_PIXEL    NV9097_SET_PIPELINE_SHADER_TYPE_PIXEL
+
+typedef enum {
+    NV3D_HW_SHADER_STAGE_VERTEX_A = 0,
+    NV3D_HW_SHADER_STAGE_VERTEX_B,
+    NV3D_HW_SHADER_STAGE_TESS_CONTROL,
+    NV3D_HW_SHADER_STAGE_TESS_EVAL,
+    NV3D_HW_SHADER_STAGE_GEOMETRY,
+    NV3D_HW_SHADER_STAGE_PIXEL,
+    NV3D_HW_SHADER_STAGE_COUNT,
+} __attribute__ ((__packed__)) Nv3dShaderStage;
+
+typedef enum {
+    NV3D_HW_BIND_GROUP_VERTEX = 0,
+    NV3D_HW_BIND_GROUP_TESS_CONTROL,
+    NV3D_HW_BIND_GROUP_TESS_EVAL,
+    NV3D_HW_BIND_GROUP_GEOMETRY,
+    NV3D_HW_BIND_GROUP_FRAGMENT,
+    NV3D_HW_BIND_GROUP_LAST = NV3D_HW_BIND_GROUP_FRAGMENT
+} __attribute__ ((__packed__)) Nv3dShaderBindGroup;
+
+typedef struct _nv_program_info {
+    NvU32                       offset;         // Start offset relative to program heap
+    NvU8                        registerCount;  // From '#.MAX_REG n'+1
+    NvU8                        type;           // Shader type
+    NvS8                        constIndex;     // Index into the compiler-generated constant buffer table
+
+    Nv3dShaderStage             stage;          // Pipeline stage
+    Nv3dShaderBindGroup         bindGroup;      // NV3D_HW_BIND_GROUP
+} Nv3dProgramInfo;
+
+typedef struct _nv_shader_const_buf_info {
+    const NvU32 *data;
+    NvU32 offset;
+    NvU32 size;
+} Nv3dShaderConstBufInfo;
+
+#endif // __NVIDIA_3D_SHADERS_H__
--- a/src/common/unix/nvidia-3d/interface/nvidia-3d-types.h
+++ b/src/common/unix/nvidia-3d/interface/nvidia-3d-types.h
@@ -0,0 +1,478 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVIDIA_3D_TYPES_H__
+#define __NVIDIA_3D_TYPES_H__
+
+
+#include "nvtypes.h"
+#include "nvlimits.h"
+#include "nvidia-push-methods.h"
+
+#include "nvidia-3d-shaders.h"
+
+enum Nv3dBlendOperation {
+    NV3D_BLEND_OP_CLEAR,
+    NV3D_BLEND_OP_SRC,
+    NV3D_BLEND_OP_DST,
+    NV3D_BLEND_OP_OVER,
+    NV3D_BLEND_OP_OVER_REVERSE,
+    NV3D_BLEND_OP_IN,
+    NV3D_BLEND_OP_IN_REVERSE,
+    NV3D_BLEND_OP_OUT,
+    NV3D_BLEND_OP_OUT_REVERSE,
+    NV3D_BLEND_OP_ATOP,
+    NV3D_BLEND_OP_ATOP_REVERSE,
+    NV3D_BLEND_OP_XOR,
+    NV3D_BLEND_OP_ADD,
+    NV3D_BLEND_OP_SATURATE,
+};
+
+// We use two vertex streams: one for static attributes (values that are the
+// same for all vertices) and one for dynamic attributes.
+enum Nv3dVertexAttributeStreamType {
+    NV3D_VERTEX_ATTRIBUTE_STREAM_FIRST   = 0,
+    NV3D_VERTEX_ATTRIBUTE_STREAM_STATIC  = 0,
+    NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC = 1,
+    NV3D_VERTEX_ATTRIBUTE_STREAM_COUNT,
+} __attribute__ ((__packed__));
+
+/* The data type of a vertex attribute. */
+/* Names of enum Nv3dVertexAttributeDataType members follow
+ * "NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_{N_elements}_{element_size}_{NUMERICAL_TYPE}" convention
+ * where {NUMERICAL_TYPE} gives information about NV9097_SET_VERTEX_ATTRIBUTE_A_NUMERICAL_TYPE
+ */
+enum Nv3dVertexAttributeDataType {
+    NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_2_32_FLOAT,  /* two floats */
+    NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_4_32_FLOAT,  /* four floats */
+    NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_4_16_UNORM,  /* four unsigned shorts mapped to floats: [0,65535] => [0.0f,1.0f] */
+    NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_4_8_UNORM,   /* four unsigned bytes mapped to floats: [0,255] => [0.0f,1.0f] */
+    NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_2_16_SSCALED,/* two shorts mapped to floats: [-32768,32767] => [-32768.0f,32767.0f] */
+} __attribute__ ((__packed__));
+
+/* The possible vertex attributes. */
+enum Nv3dVertexAttributeType {
+    NV3D_VERTEX_ATTRIBUTE_POSITION        = 0,
+    NV3D_VERTEX_ATTRIBUTE_VERTEX_WEIGHT   = 1,
+    NV3D_VERTEX_ATTRIBUTE_NORMAL          = 2,
+    NV3D_VERTEX_ATTRIBUTE_COLOR           = 3,
+    NV3D_VERTEX_ATTRIBUTE_SECONDARY_COLOR = 4,
+    NV3D_VERTEX_ATTRIBUTE_FOG_COORD       = 5,
+    NV3D_VERTEX_ATTRIBUTE_POINT_SIZE      = 6,
+    NV3D_VERTEX_ATTRIBUTE_MATRIX_INDEX    = 7,
+    NV3D_VERTEX_ATTRIBUTE_TEXCOORD0       = 8,
+    NV3D_VERTEX_ATTRIBUTE_TEXCOORD1       = 9,
+    NV3D_VERTEX_ATTRIBUTE_TEXCOORD2       = 10,
+    NV3D_VERTEX_ATTRIBUTE_TEXCOORD3       = 11,
+    /*
+     * The _END enum value is used as a sentinel to terminate arrays of
+     * Nv3dVertexAttributeInfoRec (see Nv3dVertexAttributeInfoRec, below).
+     */
+    NV3D_VERTEX_ATTRIBUTE_END             = 255,
+} __attribute__ ((__packed__));
+
+/*
+ * Nv3dVertexAttributeInfoRec stores the triplet attribute, stream type, and
+ * data type.  Arrays of Nv3dVertexAttributeInfoRec are used to describe vertex
+ * attribute configurations to FermiSetupVertexArrays().
+ *
+ * The NV3D_ATTRIB_ENTRY() and NV3D_ATTRIB_END macros can be used to make
+ * Nv3dVertexAttributeInfoRec assignment more succinct.  E.g.,
+ *
+ *     Nv3dVertexAttributeInfoRec attribs[] = {
+ *         NV3D_ATTRIB_ENTRY(COLOR, STATIC, 4UB),
+ *         NV3D_ATTRIB_END,
+ *     };
+ */
+typedef struct _Nv3dVertexAttributeInfoRec {
+    enum Nv3dVertexAttributeType attributeType;
+    enum Nv3dVertexAttributeStreamType streamType;
+    enum Nv3dVertexAttributeDataType dataType;
+} Nv3dVertexAttributeInfoRec;
+
+#define NV3D_ATTRIB_TYPE_ENTRY(_i, _streamType, _dataType)           \
+    (Nv3dVertexAttributeInfoRec)                                     \
+    { .attributeType = _i,                                           \
+      .streamType    = NV3D_VERTEX_ATTRIBUTE_STREAM_##_streamType,   \
+      .dataType      = NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_##_dataType }
+
+#define NV3D_ATTRIB_ENTRY(_attribType, _streamType, _dataType)       \
+    NV3D_ATTRIB_TYPE_ENTRY(NV3D_VERTEX_ATTRIBUTE_##_attribType, _streamType, _dataType)
+
+#define NV3D_ATTRIB_END                                              \
+    (Nv3dVertexAttributeInfoRec)                                     \
+    { .attributeType = NV3D_VERTEX_ATTRIBUTE_END }
+
+/*
+ * When built into kernel code, define Nv3dFloat to be an NvU32: it is the same
+ * size as a float, but the caller is responsible for storing float bit patterns
+ * to Nv3dFloat.
+ */
+ct_assert(sizeof(float) == sizeof(NvU32));
+#if NV_PUSH_ALLOW_FLOAT
+typedef float Nv3dFloat;
+#else
+typedef NvU32 Nv3dFloat;
+#endif
+
+static inline void nv3dPushFloat(NvPushChannelPtr p, const Nv3dFloat data)
+{
+#if NV_PUSH_ALLOW_FLOAT
+    nvPushSetMethodDataF(p, data);
+#else
+    nvPushSetMethodData(p, data);
+#endif
+}
+
+/*
+ * Vertex attribute data types.  Each of these types represents a different way
+ * of specifying vertex attribute data.
+ */
+typedef struct __attribute__((packed)) {
+    Nv3dFloat x, y;
+} Nv3dVertexAttrib2F;
+
+typedef struct __attribute__((packed)) {
+    NvU32 x, y;
+} Nv3dVertexAttrib2U;
+
+typedef struct __attribute__((packed)) {
+    NvS32 x, y;
+} Nv3dVertexAttrib2S;
+
+typedef struct __attribute__((packed)) {
+    Nv3dFloat x, y, z;
+} Nv3dVertexAttrib3F;
+
+typedef struct __attribute__((packed)) {
+    NvU32 x, y, z;
+} Nv3dVertexAttrib3U;
+
+typedef struct __attribute__((packed)) {
+    Nv3dFloat x, y, z, w;
+} Nv3dVertexAttrib4F;
+
+typedef struct __attribute__((packed)) {
+    NvU16 x, y, z, w;
+} Nv3dVertexAttrib4US;
+
+typedef struct __attribute__((packed)) {
+    NvU8 x, y, z, w;
+} Nv3dVertexAttrib4UB;
+
+typedef struct {
+    NvU32 xyzw;
+} Nv3dVertexAttrib4UBPacked;
+
+typedef struct __attribute__((packed)) {
+    NvU32 xy;
+} Nv3dVertexAttrib2SPacked;
+
+// List of component sizes used for the internal representation of a
+// texture header
+enum Nv3dTexHeaderComponentSizes {
+    NV3D_TEXHEAD_A8B8G8R8,
+    NV3D_TEXHEAD_A2B10G10R10,
+    NV3D_TEXHEAD_B5G6R5,
+    NV3D_TEXHEAD_A1B5G5R5,
+    NV3D_TEXHEAD_R8,
+    NV3D_TEXHEAD_R32,
+    NV3D_TEXHEAD_R16,
+    NV3D_TEXHEAD_G8R8,
+    NV3D_TEXHEAD_R16G16B16A16,
+    NV3D_TEXHEAD_R32G32B32A32,
+    NV3D_TEXHEAD_Y8_VIDEO
+};
+
+// List of component sources used for the internal representation of a
+// texture header
+enum Nv3dTexHeaderSource {
+    NV3D_TEXHEAD_IN_A,
+    NV3D_TEXHEAD_IN_R,
+    NV3D_TEXHEAD_IN_G,
+    NV3D_TEXHEAD_IN_B,
+    NV3D_TEXHEAD_IN_ZERO,
+    NV3D_TEXHEAD_IN_ONE_FLOAT
+};
+
+// List of component data types used for the internal representation of
+// a texture header
+enum Nv3dTexHeaderDataType {
+    NV3D_TEXHEAD_NUM_UNORM,
+    NV3D_TEXHEAD_NUM_UINT,
+    NV3D_TEXHEAD_NUM_FLOAT,
+    NV3D_TEXHEAD_NUM_SNORM,
+    NV3D_TEXHEAD_NUM_SINT
+};
+
+enum Nv3dTexHeaderRepeatType {
+    NV3D_TEXHEAD_REPEAT_TYPE_NONE,
+    NV3D_TEXHEAD_REPEAT_TYPE_NORMAL,
+    NV3D_TEXHEAD_REPEAT_TYPE_PAD,
+    NV3D_TEXHEAD_REPEAT_TYPE_REFLECT
+};
+
+enum Nv3dTextureFilterType{
+    NV3D_TEXHEAD_FILTER_TYPE_NEAREST,
+    NV3D_TEXHEAD_FILTER_TYPE_LINEAR,
+    NV3D_TEXHEAD_FILTER_TYPE_ANISO_2X,
+    NV3D_TEXHEAD_FILTER_TYPE_ANISO_4X,
+    NV3D_TEXHEAD_FILTER_TYPE_ANISO_8X,
+    NV3D_TEXHEAD_FILTER_TYPE_ANISO_16X
+};
+
+enum Nv3dTexType {
+    NV3D_TEX_TYPE_ONE_D,
+    NV3D_TEX_TYPE_ONE_D_BUFFER,
+    NV3D_TEX_TYPE_TWO_D_PITCH,
+    NV3D_TEX_TYPE_TWO_D_BLOCKLINEAR,
+};
+
+typedef struct {
+    NvU32 x;
+    NvU32 y;
+    NvU32 z;
+} Nv3dBlockLinearLog2GobsPerBlock;
+
+// Intermediate representation of a texture header
+typedef struct {
+    NvBool error;
+
+    enum Nv3dTexHeaderComponentSizes sizes;
+
+    // Currently, we always use the same data type for all components.
+    enum Nv3dTexHeaderDataType dataType;
+
+    struct {
+        enum Nv3dTexHeaderSource x;
+        enum Nv3dTexHeaderSource y;
+        enum Nv3dTexHeaderSource z;
+        enum Nv3dTexHeaderSource w;
+    } source;
+
+    enum Nv3dTexType texType;
+
+    NvU64 offset;
+    NvBool normalizedCoords;
+    enum Nv3dTexHeaderRepeatType repeatType;
+    enum Nv3dTextureFilterType filtering;
+    int pitch;
+    int width;
+    int height;
+
+    Nv3dBlockLinearLog2GobsPerBlock log2GobsPerBlock;
+} Nv3dRenderTexInfo;
+
+typedef NvU32 Nv3dTexSampler[8];
+typedef NvU32 Nv3dTexHeader[8];
+
+// HW representation of a texture header
+typedef struct {
+    Nv3dTexSampler samp;
+    Nv3dTexHeader head;
+} Nv3dTexture;
+
+#define NV3D_CONSTANT_BUFFER_SIZE (4096 * 4)
+
+#define NV3D_TEXTURE_INDEX_INVALID (-1)
+
+#define NV3D_VERTEX_ATTRIBUTE_STREAM_SIZE (64 * 1024)
+
+/*
+ * The constant buffer alignment constraints, specifically for the methods:
+ *
+ *   NV*97_SET_CONSTANT_BUFFER_SELECTOR_A_SIZE
+ *   NV*97_SET_CONSTANT_BUFFER_SELECTOR_C_ADDRESS_LOWER
+ *
+ * have evolved over GPU architectures:
+ *
+ *          kepler  maxwell pascal  volta   turing
+ * SIZE     256     16      16      16      16
+ * ADDRESS  256     256     256     256     64
+ *
+ * But, using an alignment of 256 all the time is simpler.
+ */
+#define NV3D_MIN_CONSTBUF_ALIGNMENT 256
+
+/*
+ * 3D engine pitch alignment requirements for texture surface.
+ */
+#define NV3D_TEXTURE_PITCH_ALIGNMENT 256
+
+typedef struct _Nv3dStreamSurfaceRec {
+    NvU64 gpuAddress;
+    NvU64 size;
+} Nv3dStreamSurfaceRec;
+
+typedef struct _Nv3dVertexAttributeStreamRec {
+    // Current GPU address within the stream.
+    NvU64 current;
+    // Terminating GPU address within the stream.
+    NvU64 end;
+    // Number of bytes per vertex.
+    NvU32 stride;
+    // Index of the next vertex to be launched.
+    int nextLaunch;
+} Nv3dVertexAttributeStreamRec;
+
+typedef struct _Nv3dHal Nv3dHal;
+
+typedef struct _Nv3dDeviceCapsRec {
+    NvU32 hasSetBindlessTexture :1; /* Supports SetBindlessTexture method */
+    NvU32 hasProgramRegion   :1;
+
+    NvU32 maxDim;                /*
+                                  * Maximum width or height of the
+                                  * texture surface in pixels.
+                                  */
+} Nv3dDeviceCapsRec, *Nv3dDeviceCapsPtr;
+
+typedef struct  _Nv3dDeviceSpaVersionRec {
+    NvU16 major;
+    NvU16 minor;
+} Nv3dDeviceSpaVersionRec;
+
+/*
+ * Enum for each compiled shader version.
+ */
+enum Nv3dShaderArch {
+    NV3D_SHADER_ARCH_MAXWELL,
+    NV3D_SHADER_ARCH_PASCAL,
+    NV3D_SHADER_ARCH_VOLTA,
+    NV3D_SHADER_ARCH_TURING,
+    NV3D_SHADER_ARCH_AMPERE,
+    NV3D_SHADER_ARCH_HOPPER,
+    NV3D_SHADER_ARCH_COUNT,
+};
+
+typedef struct _Nv3dDeviceRec {
+
+    NvPushDevicePtr pPushDevice;
+    Nv3dDeviceCapsRec caps;
+    NvU32 classNumber;
+    enum Nv3dShaderArch shaderArch;
+
+    Nv3dDeviceSpaVersionRec spaVersion;
+
+    NvU32 maxThreadsPerWarp;
+    NvU32 maxWarps;
+
+    const Nv3dHal *hal;
+
+} Nv3dDeviceRec, *Nv3dDevicePtr;
+
+typedef struct _Nv3dChannelProgramsRec {
+    /*
+     * An array of program descriptors, and the number of elements
+     * in the array.
+     */
+    size_t num;
+    const Nv3dProgramInfo *info;
+
+    size_t maxLocalBytes;
+    size_t maxStackBytes;
+
+    /*
+     * The shader program code segment.
+     *
+     * The size is in bytes.
+     */
+    struct {
+        size_t decompressedSize;
+        const unsigned char *compressedStart;
+        const unsigned char *compressedEnd;
+    } code;
+
+    /*
+     * The constant buffers generated by the compiler for use with the above
+     * code segment.
+     *
+     * 'size' is the total size of the surface to allocate, in bytes.
+     * 'sizeAlign' is the minimum alignment required by the hardware for each
+     *             particular constant buffer.  (Although we may only have
+     *             N bytes of data to upload for each constant buffer, that
+     *             size should be padded out with zeroes to a multiple of this
+     *             value.)
+     * 'count' is the number of entries in the 'info' array.
+     * 'info' is a pointer to an array of Nv3dShaderConstBufInfo entries.
+     */
+    struct {
+        size_t size;
+        NvU32 sizeAlign;
+        NvU32 count;
+        const Nv3dShaderConstBufInfo *info;
+    } constants;
+} Nv3dChannelProgramsRec;
+
+typedef struct _Nv3dChannelRec {
+
+    Nv3dDevicePtr p3dDevice;
+    NvPushChannelPtr pPushChannel;
+
+    NvU32 handle[NV_MAX_SUBDEVICES];
+    NvU16 numTextures;
+    NvU16 numTextureBindings;
+
+    Nv3dVertexAttributeStreamRec
+        vertexStreams[NV3D_VERTEX_ATTRIBUTE_STREAM_COUNT];
+
+    /*
+     * Begin / end state.  ~0 if outside begin/end, or NV9097_BEGIN_OP_* if
+     * inside.
+     */
+    NvU32 currentPrimitiveMode;
+
+    Nv3dChannelProgramsRec programs;
+    int currentProgramIndex[NV3D_HW_SHADER_STAGE_COUNT];
+    NvU64 programLocalMemorySize;
+
+    NvBool hasFrameBoundaries;
+
+    struct {
+        NvU32 handle[NV_MAX_SUBDEVICES];
+        NvU64 gpuAddress;
+        NvU64 programOffset;
+        NvU64 programConstantsOffset;
+        NvU64 programLocalMemoryOffset;
+        NvU64 textureOffset;
+        NvU64 bindlessTextureConstantBufferOffset;
+        NvU64 constantBufferOffset;
+        NvU64 vertexStreamOffset[NV3D_VERTEX_ATTRIBUTE_STREAM_COUNT];
+        NvU64 totalSize;
+    } surface;
+
+} Nv3dChannelRec, *Nv3dChannelPtr;
+
+typedef struct {
+    Nv3dFloat red;
+    Nv3dFloat green;
+    Nv3dFloat blue;
+    Nv3dFloat alpha;
+} Nv3dColor;
+
+typedef struct {
+    NvU32 blendFactorSrc; /* NV9097_SET_BLEND_COLOR/ALPHA_SOURCE_COEFF_ */
+    NvU32 blendFactorDst; /* NV9097_SET_BLEND_COLOR/ALPHA_DEST_COEFF_ */
+    NvU32 blendEquation;  /* NV9097_SET_BLEND_COLOR/ALPHA_OP_ */
+} Nv3dBlendState;
+#endif /* __NVIDIA_3D_TYPES_H__ */
--- a/src/common/unix/nvidia-3d/interface/nvidia-3d-utils.h
+++ b/src/common/unix/nvidia-3d/interface/nvidia-3d-utils.h
@@ -0,0 +1,104 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVIDIA_3D_UTILS_H__
+#define __NVIDIA_3D_UTILS_H__
+
+#include "nvidia-3d.h"
+
+#include <class/cl9097.h>
+#include <class/cla06fsubch.h>
+
+static inline void nv3dSetSurfaceClip(
+    Nv3dChannelRec *p3dChannel,
+    NvS16 x,
+    NvS16 y,
+    NvU16 w,
+    NvU16 h)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_SURFACE_CLIP_HORIZONTAL, 2);
+    nvPushSetMethodData(p,
+        NV3D_V(9097, SET_SURFACE_CLIP_HORIZONTAL, X, x) |
+        NV3D_V(9097, SET_SURFACE_CLIP_HORIZONTAL, WIDTH, w));
+    nvPushSetMethodData(p,
+        NV3D_V(9097, SET_SURFACE_CLIP_VERTICAL, Y, y) |
+        NV3D_V(9097, SET_SURFACE_CLIP_VERTICAL, HEIGHT, h));
+}
+
+static inline void nv3dClearSurface(
+    Nv3dChannelRec *p3dChannel,
+    const NvU32 clearColor[4],
+    NvU16 x,
+    NvU16 y,
+    NvU16 w,
+    NvU16 h)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_COLOR_CLEAR_VALUE(0), 4);
+    nvPushSetMethodData(p, clearColor[0]);
+    nvPushSetMethodData(p, clearColor[1]);
+    nvPushSetMethodData(p, clearColor[2]);
+    nvPushSetMethodData(p, clearColor[3]);
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_CLEAR_RECT_HORIZONTAL, 2);
+    nvPushSetMethodData(p,
+        NV3D_V(9097, SET_CLEAR_RECT_HORIZONTAL, XMIN, x) |
+        NV3D_V(9097, SET_CLEAR_RECT_HORIZONTAL, XMAX, x + w));
+    nvPushSetMethodData(p,
+        NV3D_V(9097, SET_CLEAR_RECT_VERTICAL, YMIN, y) |
+        NV3D_V(9097, SET_CLEAR_RECT_VERTICAL, YMAX, y + h));
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_CLEAR_SURFACE,
+        NV3D_C(9097, CLEAR_SURFACE, R_ENABLE, TRUE) |
+        NV3D_C(9097, CLEAR_SURFACE, G_ENABLE, TRUE) |
+        NV3D_C(9097, CLEAR_SURFACE, B_ENABLE, TRUE) |
+        NV3D_C(9097, CLEAR_SURFACE, A_ENABLE, TRUE));
+}
+
+static inline void nv3dVasBegin(
+    Nv3dChannelRec *p3dChannel,
+    NvU32 mode)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    nvAssert(p3dChannel->currentPrimitiveMode == ~0);
+
+    p3dChannel->currentPrimitiveMode = mode;
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_BEGIN, mode);
+}
+
+static inline void nv3dVasEnd(
+    Nv3dChannelRec *p3dChannel)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    nvAssert(p3dChannel->currentPrimitiveMode != ~0);
+
+    p3dChannel->currentPrimitiveMode = ~0;
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_END, 0);
+}
+
+#endif /* __NVIDIA_3D_UTILS_H__ */
--- a/src/common/unix/nvidia-3d/interface/nvidia-3d.h
+++ b/src/common/unix/nvidia-3d/interface/nvidia-3d.h
@@ -0,0 +1,296 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * The nvidia-3d library provides utility code for programming a 3D
+ * object.
+ */
+
+#ifndef __NVIDIA_3D_H__
+#define __NVIDIA_3D_H__
+
+#include "nvtypes.h"
+#include "nvmisc.h" /* DRF_DEF, et al */
+#include "nvlimits.h" /* NV_MAX_SUBDEVICES */
+
+#include "nvidia-push-types.h"
+#include "nvidia-3d-types.h"
+
+#define NV3D_C(d, r, f, c) DRF_DEF( d, _ ## r, _ ## f, _ ## c)
+#define NV3D_V(d, r, f, v) DRF_NUM( d, _ ## r, _ ## f, (NvU32)(v) )
+
+/*
+ * Allocate and free an Nv3dDeviceRec
+ */
+typedef struct _Nv3dAllocDeviceParams {
+    NvPushDevicePtr pPushDevice;
+} Nv3dAllocDeviceParams;
+
+NvBool nv3dAllocDevice(
+    const Nv3dAllocDeviceParams *pParams,
+    Nv3dDevicePtr p3dDevice);
+
+void nv3dFreeDevice(
+    Nv3dDevicePtr p3dDevice);
+
+/*
+ * Allocate and free an Nv3dChannelRec data structure.
+ *
+ * Note that all pointers provided in this parameter structure are
+ * cached in the Nv3dChannelRec.  They must remain valid from
+ * nv3dAllocChannelState() until the corresponding
+ * nv3dFreeChannelState() call.
+ */
+typedef struct _Nv3dAllocChannelStateParams {
+    /*
+     * The Nv3dDeviceRec to use with this channel.
+     */
+    Nv3dDevicePtr p3dDevice;
+
+    /*
+     * The number of texture headers/samplers nvidia-3d should
+     * allocate.
+     */
+    NvU16 numTextures;
+
+    /*
+     * The number of general purpose constant buffers nvidia-3d should
+     * allocate.
+     */
+    NvU16 numConstantBuffers;
+
+    /*
+     * The number of texture bindings.
+     */
+    NvU16 numTextureBindings;
+
+    /*
+     * Whether the host driver renders in terms of frames, or, like the X
+     * driver, renders directly to the front buffer.  On >= Pascal, the pipe
+     * needs to be explicitly flushed at the end of a frame.
+     */
+    NvBool hasFrameBoundaries;
+
+} Nv3dAllocChannelStateParams;
+
+NvBool nv3dAllocChannelState(
+    const Nv3dAllocChannelStateParams *pParams,
+    Nv3dChannelPtr p3dChannel);
+
+void nv3dFreeChannelState(
+    Nv3dChannelPtr p3dChannel);
+
+
+/*
+ * Allocate and free the RM object for an Nv3dChannelRec.
+ */
+typedef struct _Nv3dAllocChannelObjectParams {
+    NvPushChannelPtr pPushChannel;
+    NvU32 handle[NV_MAX_SUBDEVICES];
+} Nv3dAllocChannelObjectParams;
+
+NvBool nv3dAllocChannelObject(
+    const Nv3dAllocChannelObjectParams *pParams,
+    Nv3dChannelPtr p3dChannel);
+
+void nv3dFreeChannelObject(
+    Nv3dChannelPtr p3dChannel);
+
+
+/*
+ * Allocate and free the surface needed by the Nv3dChannelRec.
+ */
+NvBool nv3dAllocChannelSurface(Nv3dChannelPtr p3dChannel);
+
+void nv3dFreeChannelSurface(Nv3dChannelPtr p3dChannel);
+
+
+/*
+ * Once the Nv3dChannelRec is allocated, and the objects and surface
+ * for it are allocated, nv3dInitChannel() is used to initialize the
+ * graphics engine and make it ready to use.
+ */
+NvBool nv3dInitChannel(Nv3dChannelPtr p3dChannel);
+
+
+/*
+ * Return the offset or GPU address of the specified item within the
+ * Nv3dChannelRec's surface.
+ */
+
+static inline NvU64 nv3dGetTextureOffset(
+    const Nv3dChannelRec *p3dChannel,
+    NvU32 textureIndex)
+{
+    const NvU64 offset = p3dChannel->surface.textureOffset;
+
+    return offset + (sizeof(Nv3dTexture) * textureIndex);
+}
+
+static inline NvU64 nv3dGetTextureGpuAddress(
+    const Nv3dChannelRec *p3dChannel,
+    NvU32 textureIndex)
+{
+    return p3dChannel->surface.gpuAddress +
+        nv3dGetTextureOffset(p3dChannel, textureIndex);
+}
+
+static inline NvU64 nv3dGetConstantBufferOffset(
+    const Nv3dChannelRec *p3dChannel,
+    NvU32 constantBufferIndex)
+{
+    const NvU64 offset = p3dChannel->surface.constantBufferOffset;
+
+    return offset + (NV3D_CONSTANT_BUFFER_SIZE * constantBufferIndex);
+}
+
+static inline NvU64 nv3dGetConstantBufferGpuAddress(
+    const Nv3dChannelRec *p3dChannel,
+    NvU32 constantBufferIndex)
+{
+    return p3dChannel->surface.gpuAddress +
+        nv3dGetConstantBufferOffset(p3dChannel, constantBufferIndex);
+}
+
+static inline NvU64 nv3dGetProgramOffset(
+    const Nv3dChannelRec *p3dChannel)
+{
+    return p3dChannel->surface.programOffset;
+}
+
+static inline NvU64 nv3dGetProgramGpuAddress(
+    const Nv3dChannelRec *p3dChannel)
+{
+    return p3dChannel->surface.gpuAddress + nv3dGetProgramOffset(p3dChannel);
+}
+
+static inline NvU64 nv3dGetProgramConstantsOffset(
+    const Nv3dChannelRec *p3dChannel)
+{
+    return p3dChannel->surface.programConstantsOffset;
+}
+
+static inline NvU64 nv3dGetProgramConstantsGpuAddress(
+    const Nv3dChannelRec *p3dChannel)
+{
+    return p3dChannel->surface.gpuAddress +
+        nv3dGetProgramConstantsOffset(p3dChannel);
+}
+
+static inline NvU64 nv3dGetProgramLocalMemoryOffset(
+    const Nv3dChannelRec *p3dChannel)
+{
+    return p3dChannel->surface.programLocalMemoryOffset;
+}
+
+static inline NvU64 nv3dGetProgramLocalMemoryGpuAddress(
+    const Nv3dChannelRec *p3dChannel)
+{
+    return p3dChannel->surface.gpuAddress +
+        nv3dGetProgramLocalMemoryOffset(p3dChannel);
+}
+
+static inline NvU64 nv3dGetBindlessTextureConstantBufferOffset(
+    const Nv3dChannelRec *p3dChannel)
+{
+    return p3dChannel->surface.bindlessTextureConstantBufferOffset;
+}
+
+static inline NvU64 nv3dGetBindlessTextureConstantBufferGpuAddress(
+    const Nv3dChannelRec *p3dChannel)
+{
+    return p3dChannel->surface.gpuAddress +
+        nv3dGetBindlessTextureConstantBufferOffset(p3dChannel);
+}
+
+static inline NvU64 nv3dGetVertexAttributestreamOffset(
+    const Nv3dChannelRec *p3dChannel,
+    enum Nv3dVertexAttributeStreamType stream)
+{
+    return p3dChannel->surface.vertexStreamOffset[stream];
+}
+
+static inline NvU64 nv3dGetVertexAttributestreamGpuAddress(
+    const Nv3dChannelRec *p3dChannel,
+    enum Nv3dVertexAttributeStreamType stream)
+{
+    return p3dChannel->surface.gpuAddress +
+        nv3dGetVertexAttributestreamOffset(p3dChannel, stream);
+}
+
+void nv3dUploadDataInline(
+    Nv3dChannelRec *p3dChannel,
+    NvU64 gpuBaseAddress,
+    size_t offset,
+    const void *data,
+    size_t bytes);
+
+void nv3dClearProgramCache(
+    Nv3dChannelRec *p3dChannel);
+
+void nv3dLoadProgram(
+    Nv3dChannelRec *p3dChannel,
+    int programIndex);
+
+void nv3dLoadTextures(
+    Nv3dChannelRec *p3dChannel,
+    int firstTextureIndex,
+    const Nv3dRenderTexInfo *texInfo,
+    int numTexures);
+
+void nv3dBindTextures(
+    Nv3dChannelPtr p3dChannel,
+    int programIndex,
+    const int *textureBindingIndices);
+
+void nv3dSetBlendColorCoefficients(
+    Nv3dChannelPtr p3dChannel,
+    enum Nv3dBlendOperation op,
+    NvBool forceNoDstAlphaBits,
+    NvBool dualSourceBlending);
+
+void nv3dSetBlend(
+    Nv3dChannelPtr p3dChannel,
+    const Nv3dBlendState *blendStateColor,
+    const Nv3dBlendState *blendStateAlpha,
+    const Nv3dColor *blendColor);
+
+int nv3dVasSetup(
+    Nv3dChannelRec *p3dChannel,
+    const Nv3dVertexAttributeInfoRec *attribs,
+    const Nv3dStreamSurfaceRec *pSurf);
+
+void nv3dVasSelectCbForVertexData(
+    Nv3dChannelRec *p3dChannel);
+
+void nv3dVasDrawInlineVerts(
+    Nv3dChannelRec *p3dChannel,
+    const void *data,
+    int numVerts);
+
+NvBool nv3dVasMakeRoom(
+    Nv3dChannelRec *p3dChannel,
+    NvU32 pendingVerts,
+    NvU32 moreVerts);
+
+#endif /* __NVIDIA_3D_H__ */
--- a/src/common/unix/nvidia-3d/src/nvidia-3d-core.c
+++ b/src/common/unix/nvidia-3d/src/nvidia-3d-core.c
@@ -0,0 +1,162 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvidia-3d.h"
+#include "nvidia-3d-types-priv.h"
+#include "nvos.h"
+#include "nvidia-3d-fermi.h"
+#include "nvidia-3d-kepler.h"
+#include "nvidia-push-utils.h"
+
+NvBool nv3dAllocChannelObject(
+    const Nv3dAllocChannelObjectParams *pParams,
+    Nv3dChannelPtr p3dChannel)
+{
+    NvPushChannelPtr pPushChannel = pParams->pPushChannel;
+    NvPushDevicePtr pPushDevice = pPushChannel->pDevice;
+    const NvU32 classNumber = p3dChannel->p3dDevice->classNumber;
+    int sd;
+
+    /*
+     * nv3dAllocChannel() should have been called to assign p3dDevice.
+     */
+    nvAssert(p3dChannel->p3dDevice != NULL);
+    nvAssert(p3dChannel->p3dDevice->pPushDevice ==
+             pParams->pPushChannel->pDevice);
+
+    for (sd = 0;
+         sd < ARRAY_LEN(pPushChannel->channelHandle) &&
+            pPushChannel->channelHandle[sd] != 0;
+         sd++) {
+
+        if (nvPushIsAModel(pPushDevice)) {
+            nvAssert(sd == 0);
+        } else {
+            const NvPushImports *pImports = pPushDevice->pImports;
+            nvAssert(pParams->handle[sd] != 0);
+            NvU32 ret = pImports->rmApiAlloc(pPushDevice,
+                                             pPushChannel->channelHandle[sd],
+                                             pParams->handle[sd],
+                                             classNumber,
+                                             NULL);
+            if (ret != NVOS_STATUS_SUCCESS) {
+                return FALSE;
+            }
+        }
+
+        p3dChannel->handle[sd] = pParams->handle[sd];
+    }
+
+    p3dChannel->pPushChannel = pPushChannel;
+
+    return TRUE;
+}
+
+void nv3dFreeChannelObject(Nv3dChannelPtr p3dChannel)
+{
+    int sd;
+
+    p3dChannel->pPushChannel = NULL;
+
+    // No need to actually free the object here.  It gets destroyed during
+    // channel teardown.
+    for (sd = 0; sd < ARRAY_LEN(p3dChannel->handle); sd++) {
+        p3dChannel->handle[sd] = 0;
+    }
+}
+
+void nv3dUploadDataInline(
+    Nv3dChannelRec *p3dChannel,
+    NvU64 gpuBaseAddress,
+    size_t offset,
+    const void *data,
+    size_t bytes)
+{
+    const Nv3dHal *pHal = p3dChannel->p3dDevice->hal;
+
+    pHal->uploadDataInline(p3dChannel, gpuBaseAddress, offset, data, bytes);
+}
+
+void nv3dClearProgramCache(Nv3dChannelRec *p3dChannel)
+{
+    Nv3dShaderStage stage;
+
+    for (stage = 0;
+         stage < ARRAY_LEN(p3dChannel->currentProgramIndex);
+         stage++) {
+        p3dChannel->currentProgramIndex[stage] = -1;
+    }
+}
+
+void nv3dLoadTextures(
+    Nv3dChannelRec *p3dChannel,
+    int firstTex,
+    const Nv3dRenderTexInfo *texInfo,
+    int numTex)
+{
+    /* Limit number of texture/samplers on the stack to 4 (256 bytes) */
+#define MAX_TEX_CHUNK 4
+    Nv3dTexture textures[MAX_TEX_CHUNK];
+    const Nv3dHal *pHal = p3dChannel->p3dDevice->hal;
+    const NvU64 gpuBaseAddress = nv3dGetTextureGpuAddress(p3dChannel, 0);
+
+    nvAssert(numTex >= 1);
+
+    // Invalidate the texture/sampler caches.  This will cause a wait for idle
+    // if there's rendering still in progress.  This is necessary in case the
+    // texture parameters we're about to overwrite are in use.
+    _nv3dInvalidateTexturesFermi(p3dChannel);
+
+    while (numTex) {
+        const NvU32 chunkNumTex = NV_MIN(numTex, MAX_TEX_CHUNK);
+        const size_t startOffset = sizeof(Nv3dTexture) * firstTex;
+        const size_t bytes = sizeof(Nv3dTexture) * chunkNumTex;
+        int i;
+
+        NVMISC_MEMSET(textures, 0, sizeof(textures));
+
+        nvAssert(firstTex + numTex <= p3dChannel->numTextures);
+
+        // Write texture header to HW format
+        for (i = 0; i < chunkNumTex; i++) {
+            pHal->assignNv3dTexture(texInfo[i], &textures[i]);
+        }
+
+        nv3dUploadDataInline(p3dChannel, gpuBaseAddress, startOffset,
+                             textures, bytes);
+
+        numTex -= chunkNumTex;
+        firstTex += chunkNumTex;
+        texInfo += chunkNumTex;
+    }
+}
+
+void nv3dBindTextures(
+    Nv3dChannelPtr p3dChannel,
+    int programIndex,
+    const int *textureBindingIndices)
+{
+    nvAssert(programIndex < p3dChannel->programs.num);
+
+    _nv3dBindTexturesKepler(p3dChannel, programIndex, textureBindingIndices);
+}
--- a/src/common/unix/nvidia-3d/src/nvidia-3d-fermi.c
+++ b/src/common/unix/nvidia-3d/src/nvidia-3d-fermi.c
@@ -0,0 +1,557 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvidia-3d-types-priv.h"
+#include "nvidia-3d-fermi.h"
+#include "nvidia-3d.h"
+#include "nvidia-3d-imports.h"
+#include "nvidia-3d-constant-buffers.h"
+#include "nvidia-3d-shader-constants.h"
+#include "nvidia-3d-vertex-arrays.h"
+#include "nvidia-push-utils.h" /* nvPushSetObject */
+
+#include <class/cl9097.h>
+#include <class/cla06fsubch.h>
+
+#include <xz.h>
+
+#if NV_PUSH_ALLOW_FLOAT
+  #define NV3D_FLOAT_ONE  (1.00f)
+#else
+  #define NV3D_FLOAT_ONE  0x3F800000 /* 1.00f */
+#endif
+
+static void *DecompressUsingXz(
+    const Nv3dChannelRec *p3dChannel,
+    const void *compressedData,
+    size_t compressedSize,
+    size_t decompressedSize)
+{
+    NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
+    const NvPushImports *pImports = pPushDevice->pImports;
+    void *decompressedData = nv3dImportAlloc(decompressedSize);
+    struct xz_dec *xzState;
+    enum xz_ret ret;
+
+    struct xz_buf xzBuf = {
+        .in = compressedData,
+        .in_pos = 0,
+        .in_size = compressedSize,
+        .out = decompressedData,
+        .out_pos = 0,
+        .out_size = decompressedSize,
+    };
+
+    if (decompressedData == NULL) {
+        return NULL;
+    }
+
+    xz_crc32_init();
+
+    xzState = xz_dec_init(XZ_SINGLE, 0);
+
+    if (xzState == NULL) {
+        pImports->logError(pPushDevice,
+                           "Failed to initialize xz decompression.");
+        goto fail;
+    }
+
+    ret = xz_dec_run(xzState, &xzBuf);
+
+    xz_dec_end(xzState);
+
+    if (ret != XZ_STREAM_END) {
+        pImports->logError(pPushDevice, "Failed to decompress xz data.");
+        goto fail;
+    }
+
+    return decompressedData;
+
+fail:
+    nv3dImportFree(decompressedData);
+    return NULL;
+}
+
+static void *DecompressPrograms(const Nv3dChannelRec *p3dChannel)
+{
+    const Nv3dChannelProgramsRec *pPrograms = &p3dChannel->programs;
+    const size_t compressedSize =
+        pPrograms->code.compressedEnd - pPrograms->code.compressedStart;
+
+    nvAssert(pPrograms->code.compressedEnd > pPrograms->code.compressedStart);
+
+    return DecompressUsingXz(p3dChannel,
+                             pPrograms->code.compressedStart,
+                             compressedSize,
+                             pPrograms->code.decompressedSize);
+}
+
+/*
+ * This function attempts to upload the precompiled shaders to the GPU through
+ * a temporary CPU mapping.
+ * Failure of this function is not fatal -- we can fall back to uploading
+ * through the pushbuffer.
+ */
+static NvBool UploadPrograms(Nv3dChannelPtr p3dChannel, const void *programCode)
+{
+    NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
+    const NvPushImports *pImports = pPushDevice->pImports;
+    const size_t size = p3dChannel->programs.code.decompressedSize;
+    NvU32 sd;
+
+    for (sd = 0; sd < pPushDevice->numSubDevices; sd++) {
+        NvU32 status;
+        void *ptr;
+        const NvU32 hMemory = pPushDevice->clientSli ?
+            p3dChannel->surface.handle[sd] :
+            p3dChannel->surface.handle[0];
+
+        status = pImports->rmApiMapMemory(pPushDevice,
+                                          pPushDevice->subDevice[sd].handle,
+                                          hMemory,
+                                          p3dChannel->surface.programOffset,
+                                          size,
+                                          &ptr,
+                                          0);
+        if (status != NVOS_STATUS_SUCCESS) {
+            return FALSE;
+        }
+
+        nvAssert((size % 4) == 0);
+        nvDmaMoveDWORDS(ptr, programCode, size / 4);
+
+        status = pImports->rmApiUnmapMemory(pPushDevice,
+                                            pPushDevice->subDevice[sd].handle,
+                                            hMemory,
+                                            ptr,
+                                            0);
+        nvAssert(status == NVOS_STATUS_SUCCESS);
+    }
+
+    return TRUE;
+}
+
+NvBool nv3dInitChannel(Nv3dChannelPtr p3dChannel)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+    const Nv3dDeviceCapsRec *pCaps = &p3dChannel->p3dDevice->caps;
+    const Nv3dHal *pHal = p3dChannel->p3dDevice->hal;
+    const NvU64 tex0GpuAddress = nv3dGetTextureGpuAddress(p3dChannel, 0);
+    NvU64 gpuAddress;
+    NvU32 i;
+    void *programCode = DecompressPrograms(p3dChannel);
+
+    if (programCode == NULL) {
+        return FALSE;
+    }
+
+    /*
+     * nv3dAllocChannel() should have been called to assign p3dDevice.
+     */
+    nvAssert(p3dChannel->p3dDevice != NULL);
+
+    /*
+     * nv3dAllocChannelObject() should have been called to assign
+     * pPushChannel.
+     */
+    nvAssert(p3dChannel->pPushChannel != NULL);
+
+    /*
+     * nv3dAllocChannelSurface() should have been called to allocate
+     * the surface.
+     */
+    nvAssert(p3dChannel->surface.handle[0] != 0);
+
+    nv3dClearProgramCache(p3dChannel);
+
+    p3dChannel->currentPrimitiveMode = ~0;
+
+    nvPushSetObject(p, NVA06F_SUBCHANNEL_3D, p3dChannel->handle);
+
+    // Ct[0]'s format defaults to A8R8G8B8, rather than DISABLED.
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_COLOR_TARGET_FORMAT(0),
+        NV3D_C(9097, SET_COLOR_TARGET_FORMAT, V, DISABLED));
+
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_ZT_SELECT,
+        NV3D_V(9097, SET_ZT_SELECT, TARGET_COUNT, 0));
+
+    // Set a substitute stream address.  This is used when the Vertex Attribute
+    // Fetch unit tries to fetch outside the bounds of an enabled stream, which
+    // should never happen.  However, AModel always fetches this value
+    // regardless of whether it actually needs it, so it causes MMU errors if
+    // it's not set.
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_VERTEX_STREAM_SUBSTITUTE_A, 2);
+        nvPushSetMethodDataU64(p, p3dChannel->surface.gpuAddress);
+
+    if (p3dChannel->programLocalMemorySize) {
+        nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+            NV9097_SET_SHADER_LOCAL_MEMORY_A, 4);
+        // ADDRESS_{UPPER,LOWER}
+        nvPushSetMethodDataU64(p,
+            nv3dGetProgramLocalMemoryGpuAddress(p3dChannel));
+        // SIZE_{UPPER,LOWER}
+        nvPushSetMethodDataU64(p, p3dChannel->programLocalMemorySize);
+    }
+
+    // Point rasterization.
+    nvPushImmed(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_POINT_CENTER_MODE, OGL);
+
+    // SPA Control.
+    nvPushImmed(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_SAMPLER_BINDING, VIA_HEADER_BINDING);
+
+    // Viewport parameters.
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_VIEWPORT_SCALE_OFFSET,
+        NV3D_C(9097, SET_VIEWPORT_SCALE_OFFSET, ENABLE, FALSE));
+
+    // Viewport clip.  There are 16 viewports
+    for (i = 0; i < 16; i++) {
+        nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+            NV9097_SET_VIEWPORT_CLIP_HORIZONTAL(i), 2);
+        nvPushSetMethodData(p, pCaps->maxDim << 16);
+        nvPushSetMethodData(p, pCaps->maxDim << 16);
+    }
+
+    nvPushImmed(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_PROVOKING_VERTEX, LAST);
+
+    // Use one rop state for all targets
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_SINGLE_ROP_CONTROL,
+        NV3D_C(9097, SET_SINGLE_ROP_CONTROL, ENABLE, TRUE));
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_SINGLE_CT_WRITE_CONTROL,
+        NV3D_C(9097, SET_SINGLE_CT_WRITE_CONTROL, ENABLE, TRUE));
+
+    // Set up blending: enable Ct[0]. It's disabled by default for the rest.
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND(0),
+            NV3D_C(9097, SET_BLEND, ENABLE, TRUE));
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND_CONST_ALPHA, 1);
+    nv3dPushFloat(p, NV3D_FLOAT_ONE);
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_BLEND_SEPARATE_FOR_ALPHA, 2);
+    nvPushSetMethodData(p,
+        NV3D_C(9097, SET_BLEND_SEPARATE_FOR_ALPHA, ENABLE, FALSE));
+    nvPushSetMethodData(p,
+        NV3D_C(9097, SET_BLEND_COLOR_OP, V, OGL_FUNC_ADD));
+
+    // Upload the pixel shaders.  First, attempt to upload through a CPU
+    // mapping (which is generally faster); if that fails (e.g., because there
+    // is no space in BAR1 for the mapping), then fall back to uploading inline
+    // through the pushbuffer.
+    if (!UploadPrograms(p3dChannel, programCode)) {
+        pHal->uploadDataInline(p3dChannel,
+                               nv3dGetProgramGpuAddress(p3dChannel),
+                               0,
+                               programCode,
+                               p3dChannel->programs.code.decompressedSize);
+    }
+
+    nv3dImportFree(programCode);
+    programCode = NULL;
+
+    for (i = 0; i < p3dChannel->programs.constants.count; i++) {
+        const Nv3dShaderConstBufInfo *pInfo =
+            &p3dChannel->programs.constants.info[i];
+
+        pHal->uploadDataInline(p3dChannel,
+                               nv3dGetProgramConstantsGpuAddress(p3dChannel),
+                               pInfo->offset,
+                               pInfo->data,
+                               pInfo->size);
+    }
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_INVALIDATE_SHADER_CACHES, 1);
+    nvPushSetMethodData(p,
+        DRF_DEF(9097, _INVALIDATE_SHADER_CACHES, _INSTRUCTION, _TRUE) |
+        DRF_DEF(9097, _INVALIDATE_SHADER_CACHES, _CONSTANT, _TRUE));
+
+    if (pCaps->hasProgramRegion) {
+        gpuAddress = nv3dGetProgramGpuAddress(p3dChannel);
+
+        nvAssert((gpuAddress & 255) == 0);
+        nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_PROGRAM_REGION_A, 2);
+        nvPushSetMethodDataU64(p, gpuAddress);
+    }
+
+    // Initialize the texture header and sampler area.
+    //
+    // To update these things, we upload data through the pushbuffer.  The
+    // upload has an alignment twice the size of a texture header/sampler, so we
+    // interleave the two.  Texture samplers come first.  Thus, "texture sampler
+    // 2i+1" is actually texture header 2i.  This allows us to use a single
+    // upload to update a single texture sampler/header pair if we so desire.
+    gpuAddress = tex0GpuAddress + offsetof(Nv3dTexture, samp);
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_TEX_SAMPLER_POOL_A, 3);
+    nvPushSetMethodDataU64(p, gpuAddress);
+    nvPushSetMethodData(p, 0); // Max index.  0 because we use VIA_HEADER mode.
+
+    gpuAddress = tex0GpuAddress + offsetof(Nv3dTexture, head);
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_TEX_HEADER_POOL_A, 3);
+    nvPushSetMethodDataU64(p, gpuAddress);
+    nvPushSetMethodData(p, 2 * (NV_MAX(p3dChannel->numTextures, 1) - 1)); // Max index
+
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_WINDOW_ORIGIN,
+        NV3D_C(9097, SET_WINDOW_ORIGIN, MODE, UPPER_LEFT) |
+        NV3D_C(9097, SET_WINDOW_ORIGIN, FLIP_Y, TRUE));
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_ZCULL_BOUNDS, 1);
+    nvPushSetMethodData(p,
+        NV3D_C(9097, SET_ZCULL_BOUNDS, Z_MIN_UNBOUNDED_ENABLE, FALSE) |
+        NV3D_C(9097, SET_ZCULL_BOUNDS, Z_MAX_UNBOUNDED_ENABLE, FALSE));
+
+    pHal->setSpaVersion(p3dChannel);
+
+    pHal->initChannel(p3dChannel);
+
+    _nv3dInitializeStreams(p3dChannel);
+
+    return TRUE;
+}
+
+void nv3dLoadProgram(
+    Nv3dChannelRec *p3dChannel,
+    int programIndex)
+{
+    const Nv3dHal *pHal = p3dChannel->p3dDevice->hal;
+    const Nv3dProgramInfo *pgm = &p3dChannel->programs.info[programIndex];
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    nvAssert(programIndex < p3dChannel->programs.num);
+    nvAssert(programIndex >= 0);
+    nvAssert(pgm->stage < ARRAY_LEN(p3dChannel->currentProgramIndex));
+    nvAssert(pgm->bindGroup <= NV3D_HW_BIND_GROUP_LAST);
+
+    if (p3dChannel->currentProgramIndex[pgm->stage] == programIndex) {
+        return;
+    }
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_PIPELINE_SHADER(pgm->stage), 1);
+    nvPushSetMethodData(p,
+        NV3D_C(9097, SET_PIPELINE_SHADER, ENABLE, TRUE) |
+        NV3D_V(9097, SET_PIPELINE_SHADER, TYPE, pgm->type));
+
+    pHal->setProgramOffset(p3dChannel, pgm->stage, pgm->offset);
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_PIPELINE_REGISTER_COUNT(pgm->stage), 2);
+    nvPushSetMethodData(p, pgm->registerCount);
+    nvPushSetMethodData(p, pgm->bindGroup);
+
+    // Bind or invalidate the compiler-generated constant buffer slot, which the
+    // compiler always puts in NV3D_CB_SLOT_COMPILER.
+    if (pgm->constIndex == -1) {
+        nv3dBindCb(p3dChannel, pgm->bindGroup,
+                   NV3D_CB_SLOT_COMPILER, FALSE);
+    } else if (p3dChannel->programs.constants.size > 0) {
+        const Nv3dShaderConstBufInfo *pInfo =
+            &p3dChannel->programs.constants.info[pgm->constIndex];
+        const NvU64 gpuAddress =
+            nv3dGetProgramConstantsGpuAddress(p3dChannel) + pInfo->offset;
+        const NvU32 paddedSize =
+            NV_ALIGN_UP(pInfo->size, p3dChannel->programs.constants.sizeAlign);
+
+        nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+            NV9097_SET_CONSTANT_BUFFER_SELECTOR_A, 3);
+        nvPushSetMethodData(p, paddedSize);
+        nvPushSetMethodDataU64(p, gpuAddress);
+        nv3dBindCb(p3dChannel, pgm->bindGroup, NV3D_CB_SLOT_COMPILER, TRUE);
+    }
+
+    p3dChannel->currentProgramIndex[pgm->stage] = programIndex;
+}
+
+void _nv3dSetProgramOffsetFermi(
+    Nv3dChannelRec *p3dChannel,
+    NvU32 stage,
+    NvU32 offset)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_PIPELINE_PROGRAM(stage), 1);
+    nvPushSetMethodData(p, offset);
+}
+
+void _nv3dInvalidateTexturesFermi(
+    Nv3dChannelRec *p3dChannel)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_INVALIDATE_SAMPLER_CACHE,
+        NV3D_C(9097, INVALIDATE_SAMPLER_CACHE, LINES, ALL));
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_INVALIDATE_TEXTURE_HEADER_CACHE,
+        NV3D_C(9097, INVALIDATE_TEXTURE_HEADER_CACHE, LINES, ALL));
+}
+
+void nv3dSetBlendColorCoefficients(
+    Nv3dChannelPtr p3dChannel,
+    enum Nv3dBlendOperation op,
+    NvBool forceNoDstAlphaBits,
+    NvBool dualSourceBlending)
+{
+#define SFACTOR(factor) (NV9097_SET_BLEND_COLOR_SOURCE_COEFF_V_OGL_##factor)
+#define DFACTOR(factor) (NV9097_SET_BLEND_COLOR_DEST_COEFF_V_OGL_##factor)
+#define OP(op)          (NV3D_BLEND_OP_##op)
+
+    static const struct {
+        NvU32 sfactor;
+        NvU32 dfactor;
+    } BlendOps[] = {
+        [OP(CLEAR)]        = {SFACTOR(ZERO),                DFACTOR(ZERO)},
+        [OP(SRC)]          = {SFACTOR(ONE),                 DFACTOR(ZERO)},
+        [OP(DST)]          = {SFACTOR(ZERO),                DFACTOR(ONE)},
+        [OP(OVER)]         = {SFACTOR(ONE),                 DFACTOR(ONE_MINUS_SRC_ALPHA)},
+        [OP(OVER_REVERSE)] = {SFACTOR(ONE_MINUS_DST_ALPHA), DFACTOR(ONE)},
+        [OP(IN)]           = {SFACTOR(DST_ALPHA),           DFACTOR(ZERO)},
+        [OP(IN_REVERSE)]   = {SFACTOR(ZERO),                DFACTOR(SRC_ALPHA)},
+        [OP(OUT)]          = {SFACTOR(ONE_MINUS_DST_ALPHA), DFACTOR(ZERO)},
+        [OP(OUT_REVERSE)]  = {SFACTOR(ZERO),                DFACTOR(ONE_MINUS_SRC_ALPHA)},
+        [OP(ATOP)]         = {SFACTOR(DST_ALPHA),           DFACTOR(ONE_MINUS_SRC_ALPHA)},
+        [OP(ATOP_REVERSE)] = {SFACTOR(ONE_MINUS_DST_ALPHA), DFACTOR(SRC_ALPHA)},
+        [OP(XOR)]          = {SFACTOR(ONE_MINUS_DST_ALPHA), DFACTOR(ONE_MINUS_SRC_ALPHA)},
+        [OP(ADD)]          = {SFACTOR(ONE),                 DFACTOR(ONE)},
+        [OP(SATURATE)]     = {SFACTOR(SRC_ALPHA_SATURATE),  DFACTOR(ONE)}
+    };
+
+    NvU32 sfactor, dfactor;
+
+    nvAssert(op < ARRAY_LEN(BlendOps));
+
+    sfactor = BlendOps[op].sfactor;
+    dfactor = BlendOps[op].dfactor;
+
+    // if we're rendering to a picture that has an XRGB format that HW doesn't
+    // support, feed in the 1.0 constant DstAlpha value
+    if (forceNoDstAlphaBits) {
+        switch (sfactor) {
+            case SFACTOR(DST_ALPHA):
+                sfactor = SFACTOR(CONSTANT_ALPHA);
+                break;
+            case SFACTOR(ONE_MINUS_DST_ALPHA):
+                sfactor = SFACTOR(ONE_MINUS_CONSTANT_ALPHA);
+                break;
+            default:
+                break;
+        }
+    }
+
+    // If dual-source blending is enabled, swap the dfactor for one that uses
+    // the second source color.
+    if (dualSourceBlending) {
+        switch (dfactor) {
+            case DFACTOR(SRC_ALPHA):
+            case DFACTOR(SRC_COLOR):
+                dfactor = DFACTOR(SRC1COLOR);
+                break;
+            case DFACTOR(ONE_MINUS_SRC_ALPHA):
+            case DFACTOR(ONE_MINUS_SRC_COLOR):
+                dfactor = DFACTOR(INVSRC1COLOR);
+                break;
+            default:
+                break;
+        }
+    }
+
+    Nv3dBlendState nv3dBlendStateColor = { };
+
+    nv3dBlendStateColor.blendEquation = NV3D_C(9097, SET_BLEND_COLOR_OP, V, OGL_FUNC_ADD);
+    nv3dBlendStateColor.blendFactorSrc = sfactor;
+    nv3dBlendStateColor.blendFactorDst = dfactor;
+
+    nv3dSetBlend(p3dChannel, &nv3dBlendStateColor, NULL, NULL);
+}
+
+void nv3dSetBlend(
+    Nv3dChannelPtr p3dChannel,
+    const Nv3dBlendState *blendStateColor,
+    const Nv3dBlendState *blendStateAlpha,
+    const Nv3dColor *blendColor)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    const Nv3dColor defaultColor = {
+        NV3D_FLOAT_ONE,
+        NV3D_FLOAT_ONE,
+        NV3D_FLOAT_ONE,
+        NV3D_FLOAT_ONE
+    };
+
+    if (blendColor == NULL) {
+        blendColor = &defaultColor;
+    }
+
+    if (blendStateColor == NULL && blendStateAlpha == NULL) {
+        nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND(0),
+            NV3D_C(9097, SET_BLEND, ENABLE, FALSE));
+        return;
+    }
+
+    if (blendStateColor != NULL) {
+        nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND_COLOR_OP, 3);
+        nvPushSetMethodData(p, blendStateColor->blendEquation);
+        nvPushSetMethodData(p, blendStateColor->blendFactorSrc);
+        nvPushSetMethodData(p, blendStateColor->blendFactorDst);
+    }
+
+    if (blendStateAlpha != NULL) {
+        nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND_SEPARATE_FOR_ALPHA,
+            NV3D_C(9097, SET_BLEND_SEPARATE_FOR_ALPHA, ENABLE, TRUE));
+        nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND_ALPHA_OP, 2);
+        nvPushSetMethodData(p, blendStateAlpha->blendEquation);
+        nvPushSetMethodData(p, blendStateAlpha->blendFactorSrc);
+        nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND_ALPHA_DEST_COEFF, 1);
+        nvPushSetMethodData(p, blendStateAlpha->blendFactorDst);
+    } else {
+        nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND_SEPARATE_FOR_ALPHA,
+            NV3D_C(9097, SET_BLEND_SEPARATE_FOR_ALPHA, ENABLE, FALSE));
+    }
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND_CONST_RED, 4);
+    nv3dPushFloat(p, blendColor->red);
+    nv3dPushFloat(p, blendColor->green);
+    nv3dPushFloat(p, blendColor->blue);
+    nv3dPushFloat(p, blendColor->alpha);
+
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND(0),
+        NV3D_C(9097, SET_BLEND, ENABLE, TRUE));
+
+}
+
+void _nv3dSetVertexStreamEndFermi(
+    Nv3dChannelPtr p3dChannel,
+    enum Nv3dVertexAttributeStreamType stream,
+    const Nv3dVertexAttributeStreamRec *pStream)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_VERTEX_STREAM_LIMIT_A_A(stream), 2);
+    nvPushSetMethodDataU64(p, pStream->end - 1);
+}
--- a/src/common/unix/nvidia-3d/src/nvidia-3d-hopper.c
+++ b/src/common/unix/nvidia-3d/src/nvidia-3d-hopper.c
@@ -0,0 +1,384 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvidia-3d-turing.h"
+#include "nvidia-3d-hopper.h"
+#include "nvidia-3d.h"
+
+#include <class/clcb97.h>
+#include <class/clcb97tex.h>
+#include <class/cla06fsubch.h>
+
+void _nv3dInitChannelHopper(Nv3dChannelRec *p3dChannel)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    _nv3dInitChannelTuring(p3dChannel);
+
+    // Select texture header major version 1 for the new Hopper format.
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NVCB97_SET_TEXTURE_HEADER_VERSION, 1);
+}
+
+void _nv3dAssignNv3dTextureHopper(
+    Nv3dRenderTexInfo info,
+    Nv3dTexture *tex)
+{
+    nvAssert(!info.error);
+
+    switch (info.sizes) {
+    case NV3D_TEXHEAD_A8B8G8R8:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
+                           _SIZES_A8B8G8R8, tex->head);
+        break;
+    case NV3D_TEXHEAD_A2B10G10R10:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
+                           _SIZES_A2B10G10R10, tex->head);
+        break;
+    case NV3D_TEXHEAD_B5G6R5:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
+                           _SIZES_B5G6R5, tex->head);
+        break;
+    case NV3D_TEXHEAD_A1B5G5R5:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
+                           _SIZES_A1B5G5R5, tex->head);
+        break;
+    case NV3D_TEXHEAD_R8:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
+                           _SIZES_R8, tex->head);
+        break;
+    case NV3D_TEXHEAD_R32:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
+                           _SIZES_R32, tex->head);
+        break;
+    case NV3D_TEXHEAD_R16:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
+                           _SIZES_R16, tex->head);
+        break;
+    case NV3D_TEXHEAD_G8R8:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
+                           _SIZES_G8R8, tex->head);
+        break;
+    case NV3D_TEXHEAD_R16G16B16A16:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
+                           _SIZES_R16_G16_B16_A16, tex->head);
+        break;
+    case NV3D_TEXHEAD_R32G32B32A32:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
+                           _SIZES_R32_G32_B32_A32, tex->head);
+        break;
+    case NV3D_TEXHEAD_Y8_VIDEO:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
+                           _SIZES_Y8_VIDEO, tex->head);
+       break;
+    default:
+        nvAssert(!"Unrecognized component sizes");
+    }
+
+    switch (info.dataType) {
+    case NV3D_TEXHEAD_NUM_UNORM:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _DATA_TYPE,
+                           _TEX_DATA_TYPE_UNORM, tex->head);
+        break;
+    case NV3D_TEXHEAD_NUM_UINT:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _DATA_TYPE,
+                           _TEX_DATA_TYPE_UINT, tex->head);
+        break;
+    case NV3D_TEXHEAD_NUM_FLOAT:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _DATA_TYPE,
+                           _TEX_DATA_TYPE_FLOAT, tex->head);
+        break;
+   case NV3D_TEXHEAD_NUM_SNORM:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _DATA_TYPE,
+                           _TEX_DATA_TYPE_FLOAT, tex->head);
+        break;
+   case NV3D_TEXHEAD_NUM_SINT:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _DATA_TYPE,
+                           _TEX_DATA_TYPE_SINT, tex->head);
+        break;
+    }
+
+    switch (info.source.x) {
+    case NV3D_TEXHEAD_IN_A:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _X_SOURCE,
+                           _IN_A, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_R:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _X_SOURCE,
+                           _IN_R, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_G:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _X_SOURCE,
+                           _IN_G, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_B:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _X_SOURCE,
+                           _IN_B, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ZERO:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _X_SOURCE,
+                           _IN_ZERO, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ONE_FLOAT:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _X_SOURCE,
+                           _IN_ONE_FLOAT, tex->head);
+        break;
+    }
+
+    switch (info.source.y) {
+    case NV3D_TEXHEAD_IN_A:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Y_SOURCE,
+                           _IN_A, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_R:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Y_SOURCE,
+                           _IN_R, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_G:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Y_SOURCE,
+                           _IN_G, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_B:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Y_SOURCE,
+                           _IN_B, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ZERO:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Y_SOURCE,
+                           _IN_ZERO, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ONE_FLOAT:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Y_SOURCE,
+                           _IN_ONE_FLOAT, tex->head);
+        break;
+    }
+
+    switch (info.source.z) {
+    case NV3D_TEXHEAD_IN_A:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Z_SOURCE,
+                           _IN_A, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_R:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Z_SOURCE,
+                           _IN_R, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_G:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Z_SOURCE,
+                           _IN_G, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_B:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Z_SOURCE,
+                           _IN_B, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ZERO:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Z_SOURCE,
+                           _IN_ZERO, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ONE_FLOAT:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Z_SOURCE,
+                           _IN_ONE_FLOAT, tex->head);
+        break;
+    }
+
+    switch (info.source.w) {
+    case NV3D_TEXHEAD_IN_A:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _W_SOURCE,
+                           _IN_A, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_R:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _W_SOURCE,
+                           _IN_R, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_G:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _W_SOURCE,
+                           _IN_G, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_B:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _W_SOURCE,
+                           _IN_B, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ZERO:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _W_SOURCE,
+                           _IN_ZERO, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ONE_FLOAT:
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _W_SOURCE,
+                           _IN_ONE_FLOAT, tex->head);
+        break;
+    }
+
+    // Default to edge clamping.  Our GPU seems to support wrapping
+    // even with non-normalized coordinates.
+    tex->samp[0] =
+        NV3D_C(CB97, TEXSAMP0, ADDRESS_U, CLAMP_TO_EDGE) |
+        NV3D_C(CB97, TEXSAMP0, ADDRESS_V, CLAMP_TO_EDGE) |
+        NV3D_C(CB97, TEXSAMP0, ADDRESS_P, CLAMP_TO_EDGE);
+
+    if (info.texType == NV3D_TEX_TYPE_ONE_D_BUFFER) {
+        FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_1DRT, _ADDRESS_BITS31TO0,
+                           NvU64_LO32(info.offset), tex->head);
+
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_1DRT, _HEADER_VERSION,
+                           _SELECT_ONE_D_RAW_TYPED, tex->head);
+        FLD_SET_DRF_NUM_MW(CB97_, TEXHEAD_V2_1DRT, _ADDRESS_BITS63TO32,
+                           NvU64_HI32(info.offset), tex->head);
+
+        FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_1DRT, _WIDTH_MINUS_ONE,
+                           info.width - 1, tex->head);
+    } else if (info.texType == NV3D_TEX_TYPE_TWO_D_PITCH) {
+        FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_PITCH, _ADDRESS_BITS31TO5,
+                           info.offset >> 5, tex->head);
+
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_PITCH, _HEADER_VERSION,
+                           _SELECT_PITCH_V2, tex->head);
+        FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_PITCH, _ADDRESS_BITS56TO32,
+                           NvU64_HI32(info.offset), tex->head);
+
+        FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_PITCH, _PITCH_BITS21TO5,
+                           NvU32_LO16(info.pitch >> 5), tex->head);
+
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_PITCH, _TEXTURE_TYPE,
+                           _TWO_D_NO_MIPMAP, tex->head);
+        FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_PITCH, _WIDTH_MINUS_ONE,
+                           info.width - 1, tex->head);
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_PITCH, _BORDER_SOURCE,
+                           _BORDER_COLOR, tex->head);
+
+        FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_PITCH, _HEIGHT_MINUS_ONE,
+                           info.height - 1, tex->head);
+        FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_PITCH, _NORMALIZED_COORDS,
+                           info.normalizedCoords, tex->head);
+    } else {
+        if (info.texType == NV3D_TEX_TYPE_ONE_D) {
+            FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _TEXTURE_TYPE,
+                               _ONE_D, tex->head);
+        } else if (info.texType == NV3D_TEX_TYPE_TWO_D_BLOCKLINEAR) {
+            FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _TEXTURE_TYPE,
+                               _TWO_D_NO_MIPMAP, tex->head);
+        }
+        FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _ADDRESS_BITS31TO9,
+                           info.offset >> 9, tex->head);
+
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _HEADER_VERSION,
+                           _SELECT_BLOCKLINEAR_V2, tex->head);
+        FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _ADDRESS_BITS56TO32,
+                           NvU64_HI32(info.offset), tex->head);
+
+        FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _GOBS_PER_BLOCK_WIDTH,
+                           info.log2GobsPerBlock.x, tex->head);
+        FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _GOBS_PER_BLOCK_HEIGHT,
+                           info.log2GobsPerBlock.y, tex->head);
+        FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _GOBS_PER_BLOCK_DEPTH,
+                           info.log2GobsPerBlock.z, tex->head);
+
+        FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _WIDTH_MINUS_ONE,
+                           info.width - 1, tex->head);
+        FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _BORDER_SOURCE,
+                           _BORDER_COLOR, tex->head);
+
+        FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _HEIGHT_MINUS_ONE,
+                           info.height - 1, tex->head);
+        FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _DEPTH_MINUS_ONE,
+                           0, tex->head);
+        FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _NORMALIZED_COORDS,
+                           info.normalizedCoords, tex->head);
+    }
+
+    switch (info.repeatType) {
+        case NV3D_TEXHEAD_REPEAT_TYPE_NORMAL:
+            tex->samp[0] = NV3D_C(CB97, TEXSAMP0, ADDRESS_U, WRAP) |
+                           NV3D_C(CB97, TEXSAMP0, ADDRESS_V, WRAP);
+            break;
+        case NV3D_TEXHEAD_REPEAT_TYPE_PAD:
+            tex->samp[0] = NV3D_C(CB97, TEXSAMP0, ADDRESS_U, CLAMP_TO_EDGE) |
+                           NV3D_C(CB97, TEXSAMP0, ADDRESS_V, CLAMP_TO_EDGE);
+            break;
+        case NV3D_TEXHEAD_REPEAT_TYPE_REFLECT:
+            tex->samp[0] = NV3D_C(CB97, TEXSAMP0, ADDRESS_U, MIRROR) |
+                           NV3D_C(CB97, TEXSAMP0, ADDRESS_V, MIRROR);
+            break;
+        case NV3D_TEXHEAD_REPEAT_TYPE_NONE:
+            tex->samp[0] = NV3D_C(CB97, TEXSAMP0, ADDRESS_U, BORDER) |
+                           NV3D_C(CB97, TEXSAMP0, ADDRESS_V, BORDER);
+            break;
+    }
+
+    switch (info.filtering) {
+        case NV3D_TEXHEAD_FILTER_TYPE_NEAREST:
+            tex->samp[1] = NV3D_C(CB97, TEXSAMP1, MAG_FILTER, MAG_POINT) |
+                       NV3D_C(CB97, TEXSAMP1, MIN_FILTER, MIN_POINT) |
+                       NV3D_C(CB97, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            break;
+
+        case NV3D_TEXHEAD_FILTER_TYPE_LINEAR:
+            tex->samp[1] = NV3D_C(CB97, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
+                       NV3D_C(CB97, TEXSAMP1, MIN_FILTER, MIN_LINEAR) |
+                       NV3D_C(CB97, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            break;
+
+        case NV3D_TEXHEAD_FILTER_TYPE_ANISO_2X:
+            tex->samp[0] |= NV3D_C(CB97, TEXSAMP0, MAX_ANISOTROPY, ANISO_2_TO_1);
+            tex->samp[1] = NV3D_C(CB97, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
+                           NV3D_C(CB97, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
+                           NV3D_C(CB97, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _MAX_ANISOTROPY,
+                               _ANISO_2_TO_1, tex->head);
+            FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _ANISO_FINE_SPREAD_MODIFIER,
+                               _SPREAD_MODIFIER_CONST_TWO, tex->head);
+
+            break;
+
+        case NV3D_TEXHEAD_FILTER_TYPE_ANISO_4X:
+            tex->samp[0] |= NV3D_C(CB97, TEXSAMP0, MAX_ANISOTROPY, ANISO_4_TO_1);
+            tex->samp[1] = NV3D_C(CB97, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
+                           NV3D_C(CB97, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
+                           NV3D_C(CB97, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _MAX_ANISOTROPY,
+                               _ANISO_4_TO_1, tex->head);
+            FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _ANISO_FINE_SPREAD_MODIFIER,
+                               _SPREAD_MODIFIER_CONST_TWO, tex->head);
+            break;
+
+        case NV3D_TEXHEAD_FILTER_TYPE_ANISO_8X:
+            tex->samp[0] |= NV3D_C(CB97, TEXSAMP0, MAX_ANISOTROPY, ANISO_8_TO_1);
+            tex->samp[1] = NV3D_C(CB97, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
+                           NV3D_C(CB97, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
+                           NV3D_C(CB97, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _MAX_ANISOTROPY,
+                               _ANISO_8_TO_1, tex->head);
+            FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _ANISO_FINE_SPREAD_MODIFIER,
+                               _SPREAD_MODIFIER_CONST_TWO, tex->head);
+
+            break;
+
+        case NV3D_TEXHEAD_FILTER_TYPE_ANISO_16X:
+            tex->samp[0] |=  NV3D_C(CB97, TEXSAMP0, MAX_ANISOTROPY, ANISO_16_TO_1);
+            tex->samp[1] = NV3D_C(CB97, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
+                           NV3D_C(CB97, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
+                           NV3D_C(CB97, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _MAX_ANISOTROPY,
+                               _ANISO_16_TO_1, tex->head);
+            FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _ANISO_FINE_SPREAD_MODIFIER,
+                               _SPREAD_MODIFIER_CONST_TWO, tex->head);
+            break;
+
+    }
+}
--- a/src/common/unix/nvidia-3d/src/nvidia-3d-init.c
+++ b/src/common/unix/nvidia-3d/src/nvidia-3d-init.c
@@ -0,0 +1,504 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2005-2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+
+#include "nvidia-3d.h"
+#include "nvidia-3d-surface.h"
+#include "nvidia-3d-types-priv.h"
+
+#include "nvidia-3d-fermi.h"
+#include "nvidia-3d-kepler.h"
+#include "nvidia-3d-maxwell.h"
+#include "nvidia-3d-pascal.h"
+#include "nvidia-3d-volta.h"
+#include "nvidia-3d-turing.h"
+#include "nvidia-3d-hopper.h"
+
+#include "nvidia-push-init.h" // nvPushGetSupportedClassIndex()
+#include "nvidia-push-utils.h" // nvPushIsAmodel()
+
+#include <class/clcb97.h> // HOPPER_A
+#include <class/clc997.h> // ADA_A
+#include <class/clc797.h> // AMPERE_B
+#include <class/clc697.h> // AMPERE_A
+#include <class/clc597.h> // TURING_A
+#include <class/clc397.h> // VOLTA_A
+#include <class/clc197.h> // PASCAL_B
+#include <class/clc097.h> // PASCAL_A
+#include <class/clb197.h> // MAXWELL_B
+#include <class/clb097.h> // MAXWELL_A
+
+#include <ctrl/ctrl2080/ctrl2080gr.h>
+#include <nvos.h>
+
+#include "g_maxwell_shader_info.h"
+#include "g_pascal_shader_info.h"
+#include "g_volta_shader_info.h"
+#include "g_turing_shader_info.h"
+#include "g_ampere_shader_info.h"
+#include "g_hopper_shader_info.h"
+
+#define _NV3D_CHANNEL_PROGRAMS_ENTRY(_archLower, _archCamel, _archUpper) \
+    [NV3D_SHADER_ARCH_ ## _archUpper ] = {                               \
+        .num                   = NUM_PROGRAMS,                           \
+        .info                  = _archCamel ## ProgramInfo,              \
+        .maxLocalBytes         = _archCamel ## ShaderMaxLocalBytes,      \
+        .maxStackBytes         = _archCamel ## ShaderMaxStackBytes,      \
+        .code.decompressedSize = _archCamel ## ProgramHeapSize,          \
+        .code.compressedStart  =                                         \
+            ({ extern const unsigned char                                \
+                    _binary_ ## _archLower ## _shaders_xz_start[];       \
+                _binary_ ## _archLower ## _shaders_xz_start; }),         \
+        .code.compressedEnd    =                                         \
+            ({ extern const unsigned char                                \
+                    _binary_ ## _archLower ## _shaders_xz_end[];         \
+                _binary_ ## _archLower ## _shaders_xz_end; }),           \
+        .constants.info        = _archCamel ## ConstBufInfo,             \
+        .constants.count       =                                         \
+            (NvU32)ARRAY_LEN(_archCamel ## ConstBufInfo),                \
+        .constants.size        = _archCamel ## ConstBufSize,             \
+        .constants.sizeAlign   = _archCamel ## ConstBufSizeAlign,        \
+    }
+
+static Nv3dChannelProgramsRec PickProgramsRec(
+    const Nv3dDeviceRec *p3dDevice)
+{
+    const Nv3dChannelProgramsRec programsTable[NV3D_SHADER_ARCH_COUNT] = {
+
+        _NV3D_CHANNEL_PROGRAMS_ENTRY(maxwell, Maxwell, MAXWELL),
+        _NV3D_CHANNEL_PROGRAMS_ENTRY(pascal, Pascal, PASCAL),
+        _NV3D_CHANNEL_PROGRAMS_ENTRY(volta, Volta, VOLTA),
+        _NV3D_CHANNEL_PROGRAMS_ENTRY(turing, Turing, TURING),
+        _NV3D_CHANNEL_PROGRAMS_ENTRY(ampere, Ampere, AMPERE),
+        _NV3D_CHANNEL_PROGRAMS_ENTRY(hopper, Hopper, HOPPER),
+    };
+
+    return programsTable[p3dDevice->shaderArch];
+}
+
+#undef _NV3D_CHANNEL_PROGRAMS_ENTRY
+
+
+static NvBool QueryThreadsAndWarpsOneSd(
+    Nv3dDevicePtr p3dDevice,
+    NvU32 sd,
+    NvU32 *pMaxWarps,
+    NvU32 *pThreadsPerWarp)
+{
+    NvPushDevicePtr pPushDevice = p3dDevice->pPushDevice;
+    const NvPushImports *pImports = pPushDevice->pImports;
+    NvU32 ret;
+
+    NV2080_CTRL_GR_GET_INFO_PARAMS grInfoParams = { 0 };
+    struct {
+        NV2080_CTRL_GR_INFO numSMs;
+        NV2080_CTRL_GR_INFO maxWarpsPerSM;
+        NV2080_CTRL_GR_INFO threadsPerWarp;
+    } grInfo;
+
+    NVMISC_MEMSET(&grInfo, 0, sizeof(grInfo));
+
+    grInfo.numSMs.index =
+        NV2080_CTRL_GR_INFO_INDEX_THREAD_STACK_SCALING_FACTOR;
+    grInfo.maxWarpsPerSM.index =
+        NV2080_CTRL_GR_INFO_INDEX_MAX_WARPS_PER_SM;
+    grInfo.threadsPerWarp.index =
+        NV2080_CTRL_GR_INFO_INDEX_MAX_THREADS_PER_WARP;
+
+    grInfoParams.grInfoListSize =
+        sizeof(grInfo) / sizeof(NV2080_CTRL_GR_INFO);
+
+    grInfoParams.grInfoList = NV_PTR_TO_NvP64(&grInfo);
+
+    ret = pImports->rmApiControl(pPushDevice,
+                                 pPushDevice->subDevice[sd].handle,
+                                 NV2080_CTRL_CMD_GR_GET_INFO,
+                                 &grInfoParams,
+                                 sizeof(grInfoParams));
+
+    if (ret != NVOS_STATUS_SUCCESS) {
+        return FALSE;
+    }
+
+    *pMaxWarps = grInfo.numSMs.data * grInfo.maxWarpsPerSM.data;
+    *pThreadsPerWarp = grInfo.threadsPerWarp.data;
+
+    return TRUE;
+}
+
+static NvBool GetMaxThreadsAndWarps(Nv3dDevicePtr p3dDevice)
+{
+    NvU32 sd;
+
+    p3dDevice->maxThreadsPerWarp = 0;
+    p3dDevice->maxWarps = 0;
+
+    for (sd = 0; sd < p3dDevice->pPushDevice->numSubDevices; sd++) {
+
+        NvU32 maxWarps, threadsPerWarp;
+
+        if (!QueryThreadsAndWarpsOneSd(p3dDevice, sd,
+                                       &maxWarps, &threadsPerWarp)) {
+            return FALSE;
+        }
+
+        p3dDevice->maxThreadsPerWarp =
+            NV_MAX(p3dDevice->maxThreadsPerWarp, threadsPerWarp);
+
+        p3dDevice->maxWarps = NV_MAX(p3dDevice->maxWarps, maxWarps);
+    }
+
+    return TRUE;
+}
+
+/*!
+ * Get the SM version reported by resman.
+ *
+ * \params  pPushDevice  The nvidia-push device corresponding to the GPU.
+ *
+ * \return  The SM version of this device.
+ */
+static NvU32 GetSmVersion(
+    NvPushDevicePtr pPushDevice)
+{
+    NvU32 sd, smVersion = NV2080_CTRL_GR_INFO_SM_VERSION_NONE;
+
+    if (nvPushIsAModel(pPushDevice)) {
+        /*
+         * On amodel resman cannot tell us the SM version, so we pick
+         * the SM version based on NVAModelConfig.
+         */
+        static const NvU32 table[] = {
+            [NV_AMODEL_MAXWELL]     = NV2080_CTRL_GR_INFO_SM_VERSION_5_0,
+            [NV_AMODEL_PASCAL]      = NV2080_CTRL_GR_INFO_SM_VERSION_6_0,
+            [NV_AMODEL_VOLTA]       = NV2080_CTRL_GR_INFO_SM_VERSION_7_0,
+            [NV_AMODEL_TURING]      = NV2080_CTRL_GR_INFO_SM_VERSION_7_5,
+            [NV_AMODEL_AMPERE]      = NV2080_CTRL_GR_INFO_SM_VERSION_8_2,
+            [NV_AMODEL_ADA]         = NV2080_CTRL_GR_INFO_SM_VERSION_8_9,
+            [NV_AMODEL_HOPPER]      = NV2080_CTRL_GR_INFO_SM_VERSION_9_0,
+        };
+
+        if (pPushDevice->amodelConfig >= ARRAY_LEN(table)) {
+            return NV2080_CTRL_GR_INFO_SM_VERSION_NONE;
+        }
+
+        return table[pPushDevice->amodelConfig];
+    }
+
+    /*
+     * Query the SM version from resman.  This query is per-subDevice,
+     * but we use SM version per-device, so assert that the SM version
+     * matches across subDevices.
+     */
+    for (sd = 0; sd < pPushDevice->numSubDevices; sd++) {
+
+        const NvPushImports *pImports = pPushDevice->pImports;
+        NV2080_CTRL_GR_GET_INFO_PARAMS params = { };
+        NV2080_CTRL_GR_INFO smVersionParams = { };
+        NvU32 ret;
+
+        smVersionParams.index = NV2080_CTRL_GR_INFO_INDEX_SM_VERSION;
+        params.grInfoListSize = 1;
+        params.grInfoList = NV_PTR_TO_NvP64(&smVersionParams);
+
+        ret = pImports->rmApiControl(pPushDevice,
+                                     pPushDevice->subDevice[sd].handle,
+                                     NV2080_CTRL_CMD_GR_GET_INFO,
+                                     &params,
+                                     sizeof(params));
+
+        if (ret != NVOS_STATUS_SUCCESS) {
+            return NV2080_CTRL_GR_INFO_SM_VERSION_NONE;
+        }
+
+        if (sd == 0) {
+            smVersion = smVersionParams.data;
+        } else {
+            nvAssert(smVersion == smVersionParams.data);
+        }
+    }
+
+    return smVersion;
+}
+
+/*!
+ * Get the SPA version to use with the 3D Class.
+ *
+ * Note that resman only reports the SM version (the "hardware
+ * revision"), not the SPA version (the ISA version).  So we use a
+ * table to map from SM version to SPA version.
+ *
+ * \params  pPushDevice  The nvidia-push device corresponding to the GPU.
+ * \params  pSpaVersion  The spaVersion to assign.
+ *
+ * \return  TRUE if the SPA version could be assigned.
+ */
+static NvBool GetSpaVersion(
+    NvPushDevicePtr pPushDevice,
+    Nv3dDeviceSpaVersionRec *pSpaVersion)
+{
+    static const struct {
+        NvU32 smVersion;
+        Nv3dDeviceSpaVersionRec spaVersion;
+    } table[] = {
+        /* Maxwell */
+        { NV2080_CTRL_GR_INFO_SM_VERSION_5_0, { 5,0 } },
+        { NV2080_CTRL_GR_INFO_SM_VERSION_5_2, { 5,2 } },
+        { NV2080_CTRL_GR_INFO_SM_VERSION_5_3, { 5,3 } },
+
+        /* Pascal */
+        { NV2080_CTRL_GR_INFO_SM_VERSION_6_0, { 5,5 } },
+        { NV2080_CTRL_GR_INFO_SM_VERSION_6_1, { 5,5 } },
+        { NV2080_CTRL_GR_INFO_SM_VERSION_6_2, { 5,6 } },
+
+        /* Volta */
+        { NV2080_CTRL_GR_INFO_SM_VERSION_7_0, { 7,0 } },
+        { NV2080_CTRL_GR_INFO_SM_VERSION_7_2, { 7,2 } },
+
+        /* Turing */
+        { NV2080_CTRL_GR_INFO_SM_VERSION_7_3, { 7,3 } },
+        { NV2080_CTRL_GR_INFO_SM_VERSION_7_5, { 7,5 } },
+
+        /* Ampere */
+        { NV2080_CTRL_GR_INFO_SM_VERSION_8_2, { 8,2 } },
+        { NV2080_CTRL_GR_INFO_SM_VERSION_8_6, { 8,6 } },
+        { NV2080_CTRL_GR_INFO_SM_VERSION_8_7, { 8,6 } },
+        { NV2080_CTRL_GR_INFO_SM_VERSION_8_8, { 8,6 } },
+        /* Ada */
+        { NV2080_CTRL_GR_INFO_SM_VERSION_8_9, { 8,9 } },
+        /* Hopper */
+        { NV2080_CTRL_GR_INFO_SM_VERSION_9_0, { 9,0 } },
+    };
+
+    const NvU32 smVersion = GetSmVersion(pPushDevice);
+    NvU32 i;
+
+    for (i = 0; i < ARRAY_LEN(table); i++) {
+        if (table[i].smVersion == smVersion) {
+            *pSpaVersion = table[i].spaVersion;
+            return TRUE;
+        }
+    }
+
+    return FALSE;
+}
+
+static const Nv3dHal _nv3dHalMaxwell = {
+    _nv3dSetSpaVersionKepler,                       /* setSpaVersion */
+    _nv3dInitChannelMaxwell,                        /* initChannel */
+    _nv3dUploadDataInlineKepler,                    /* uploadDataInline */
+    _nv3dSetProgramOffsetFermi,                     /* setProgramOffset */
+    _nv3dAssignNv3dTextureMaxwell,                  /* assignNv3dTexture */
+    _nv3dSetVertexStreamEndFermi,                   /* setVertexStreamEnd */
+};
+
+static const Nv3dHal _nv3dHalPascal = {
+    _nv3dSetSpaVersionKepler,                       /* setSpaVersion */
+    _nv3dInitChannelPascal,                         /* initChannel */
+    _nv3dUploadDataInlineKepler,                    /* uploadDataInline */
+    _nv3dSetProgramOffsetFermi,                     /* setProgramOffset */
+    _nv3dAssignNv3dTexturePascal,                   /* assignNv3dTexture */
+    _nv3dSetVertexStreamEndFermi,                   /* setVertexStreamEnd */
+};
+
+static const Nv3dHal _nv3dHalVolta = {
+    _nv3dSetSpaVersionKepler,                       /* setSpaVersion */
+    _nv3dInitChannelPascal,                         /* initChannel */
+    _nv3dUploadDataInlineKepler,                    /* uploadDataInline */
+    _nv3dSetProgramOffsetVolta,                     /* setProgramOffset */
+    _nv3dAssignNv3dTexturePascal,                   /* assignNv3dTexture */
+    _nv3dSetVertexStreamEndFermi,                   /* setVertexStreamEnd */
+};
+
+static const Nv3dHal _nv3dHalTuring = {
+    _nv3dSetSpaVersionKepler,                       /* setSpaVersion */
+    _nv3dInitChannelTuring,                         /* initChannel */
+    _nv3dUploadDataInlineKepler,                    /* uploadDataInline */
+    _nv3dSetProgramOffsetVolta,                     /* setProgramOffset */
+    _nv3dAssignNv3dTexturePascal,                   /* assignNv3dTexture */
+    _nv3dSetVertexStreamEndTuring,                  /* setVertexStreamEnd */
+};
+
+static const Nv3dHal _nv3dHalAmpere = {
+    _nv3dSetSpaVersionKepler,                       /* setSpaVersion */
+    _nv3dInitChannelTuring,                         /* initChannel */
+    _nv3dUploadDataInlineKepler,                    /* uploadDataInline */
+    _nv3dSetProgramOffsetVolta,                     /* setProgramOffset */
+    _nv3dAssignNv3dTexturePascal,                   /* assignNv3dTexture */
+    _nv3dSetVertexStreamEndTuring,                  /* setVertexStreamEnd */
+};
+
+static const Nv3dHal _nv3dHalHopper = {
+    _nv3dSetSpaVersionKepler,                       /* setSpaVersion */
+    _nv3dInitChannelHopper,                         /* initChannel */
+    _nv3dUploadDataInlineKepler,                    /* uploadDataInline */
+    _nv3dSetProgramOffsetVolta,                     /* setProgramOffset */
+    _nv3dAssignNv3dTextureHopper,                   /* assignNv3dTexture */
+    _nv3dSetVertexStreamEndTuring,                  /* setVertexStreamEnd */
+};
+
+NvBool nv3dAllocDevice(
+    const Nv3dAllocDeviceParams *pParams,
+    Nv3dDevicePtr p3dDevice)
+{
+    static const struct {
+        NvPushSupportedClass base;
+        const Nv3dDeviceCapsRec caps;
+        const Nv3dHal *hal;
+        enum Nv3dShaderArch shaderArch;
+    } table[] = {
+
+#define ENTRY(_classNumber,                                          \
+              _arch,                                                 \
+              _amodelArch,                                           \
+              _hasSetBindlessTexture,                                \
+              _hasProgramRegion,                                     \
+              _maxDim,                                               \
+              _hal)                                                  \
+        {                                                            \
+            .base.classNumber           = _classNumber,              \
+            .base.amodelConfig          = NV_AMODEL_ ## _amodelArch, \
+            .caps.hasSetBindlessTexture = _hasSetBindlessTexture,    \
+            .caps.hasProgramRegion      = _hasProgramRegion,         \
+            .caps.maxDim                = _maxDim,                   \
+            .hal                        = &_nv3dHal ## _hal,         \
+            .shaderArch                 = NV3D_SHADER_ARCH_ ## _arch,\
+        }
+
+        /*
+         * hal--------------------------------------------------+
+         * maxDim----------------------------------------+      |
+         * hasProgramRegion---------------------------+  |      |
+         * hasSetBindlessTexture-------------------+  |  |      |
+         * amodel arch----------------+            |  |  |      |
+         * shader arch---+            |            |  |  |      |
+         * classNumber   |            |            |  |  |      |
+         *    |          |            |            |  |  |      |
+         */
+        ENTRY(HOPPER_A,  HOPPER,      HOPPER,      0, 0, 32768, Hopper),
+        ENTRY(ADA_A,     AMPERE,      ADA,         0, 0, 32768, Ampere),
+        ENTRY(AMPERE_B,  AMPERE,      AMPERE,      0, 0, 32768, Ampere),
+        ENTRY(AMPERE_A,  AMPERE,      AMPERE,      0, 0, 32768, Ampere),
+        ENTRY(TURING_A,  TURING,      TURING,      0, 0, 32768, Turing),
+        ENTRY(VOLTA_A,   VOLTA,       VOLTA,       0, 0, 32768, Volta),
+        ENTRY(PASCAL_B,  PASCAL,      PASCAL,      1, 1, 32768, Pascal),
+        ENTRY(PASCAL_A,  PASCAL,      PASCAL,      1, 1, 32768, Pascal),
+        ENTRY(MAXWELL_B, MAXWELL,     MAXWELL,     1, 1, 16384, Maxwell),
+        ENTRY(MAXWELL_A, MAXWELL,     MAXWELL,     1, 1, 16384, Maxwell),
+    };
+
+    int i;
+
+    NVMISC_MEMSET(p3dDevice, 0, sizeof(*p3dDevice));
+
+    /* find the first supported 3D HAL */
+
+    i = nvPushGetSupportedClassIndex(pParams->pPushDevice,
+                                     table,
+                                     sizeof(table[0]),
+                                     ARRAY_LEN(table));
+    if (i == -1) {
+        goto fail;
+    }
+
+    if (!GetSpaVersion(pParams->pPushDevice, &p3dDevice->spaVersion)) {
+        goto fail;
+    }
+
+    p3dDevice->pPushDevice = pParams->pPushDevice;
+    p3dDevice->caps = table[i].caps;
+    p3dDevice->classNumber = table[i].base.classNumber;
+    p3dDevice->hal = table[i].hal;
+    p3dDevice->shaderArch = table[i].shaderArch;
+
+    if (!GetMaxThreadsAndWarps(p3dDevice)) {
+        goto fail;
+    }
+
+    return TRUE;
+
+fail:
+    nv3dFreeDevice(p3dDevice);
+    return FALSE;
+}
+
+void nv3dFreeDevice(Nv3dDevicePtr p3dDevice)
+{
+    /*
+     * So far, there is nothing to free: Nv3dDevicePtr only stores
+     * queried information.
+     */
+    NVMISC_MEMSET(p3dDevice, 0, sizeof(*p3dDevice));
+}
+
+static NvU64 ComputeProgramLocalMemorySize(
+    const Nv3dChannelRec *p3dChannel)
+{
+    const Nv3dDeviceRec *p3dDevice = p3dChannel->p3dDevice;
+
+    // LocalMemorySizePerSM needs to be a multiple of 512
+    // Note that maxLocalBytes and/or maxStackBytes might be zero.
+    const NvU64 defaultSizePerWarp =
+        NV_ALIGN_UP(p3dChannel->programs.maxLocalBytes *
+                    p3dDevice->maxThreadsPerWarp +
+                    p3dChannel->programs.maxStackBytes, 512);
+
+    // shader local memory lower bits must be a multiple of 128kB
+    return NV_ALIGN_UP(defaultSizePerWarp * p3dDevice->maxWarps, 128*1024);
+}
+
+NvBool nv3dAllocChannelState(
+    const Nv3dAllocChannelStateParams *pParams,
+    Nv3dChannelPtr p3dChannel)
+{
+    NVMISC_MEMSET(p3dChannel, 0, sizeof(*p3dChannel));
+
+    p3dChannel->p3dDevice = pParams->p3dDevice;
+
+    p3dChannel->numTextures = pParams->numTextures;
+    p3dChannel->numTextureBindings = pParams->numTextureBindings;
+
+    p3dChannel->hasFrameBoundaries = pParams->hasFrameBoundaries;
+
+    p3dChannel->programs = PickProgramsRec(pParams->p3dDevice);
+
+    p3dChannel->programLocalMemorySize =
+        ComputeProgramLocalMemorySize(p3dChannel);
+
+    _nv3dAssignSurfaceOffsets(pParams, p3dChannel);
+
+    return TRUE;
+}
+
+void nv3dFreeChannelState(Nv3dChannelPtr p3dChannel)
+{
+    int sd;
+    for (sd = 0; sd < NV_MAX_SUBDEVICES; sd++) {
+        nvAssert(p3dChannel->surface.handle[sd] == 0);
+    }
+    nvAssert(p3dChannel->pPushChannel == NULL);
+
+    NVMISC_MEMSET(p3dChannel, 0, sizeof(*p3dChannel));
+}
+
--- a/src/common/unix/nvidia-3d/src/nvidia-3d-kepler.c
+++ b/src/common/unix/nvidia-3d/src/nvidia-3d-kepler.c
@@ -0,0 +1,154 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvidia-3d-kepler.h"
+#include "nvidia-3d.h"
+#include "nvidia-3d-constant-buffers.h"
+#include "nvidia-3d-shader-constants.h"
+
+#include <class/cla097.h>
+#include <class/cla06fsubch.h>
+
+void _nv3dSetSpaVersionKepler(Nv3dChannelRec *p3dChannel)
+{
+    NvPushChannelPtr pPushChannel = p3dChannel->pPushChannel;
+    const NvU16 major = p3dChannel->p3dDevice->spaVersion.major;
+    const NvU16 minor = p3dChannel->p3dDevice->spaVersion.minor;
+
+    // Tell AModel or fmodel what shader model version to use.  This has no
+    // effect on real hardware.  The SM version (the "hardware revision" of the
+    // SM block) does not always match the SPA version (the ISA version).
+    nvPushMethod(pPushChannel, NVA06F_SUBCHANNEL_3D,
+        NVA097_SET_SPA_VERSION, 1);
+    nvPushSetMethodData(pPushChannel,
+        NV3D_V(A097, SET_SPA_VERSION, MAJOR, major) |
+        NV3D_V(A097, SET_SPA_VERSION, MINOR, minor));
+}
+
+void _nv3dInitChannelKepler(Nv3dChannelRec *p3dChannel)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    // Configure constant buffer slot NV3D_CB_SLOT_BINDLESS_TEXTURE as the
+    // place the texture binding table is stored. This is obsolete on Volta and
+    // later, so don't run it there.
+    if (p3dChannel->p3dDevice->caps.hasSetBindlessTexture) {
+        nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D,
+            NVA097_SET_BINDLESS_TEXTURE,
+            NV3D_V(A097, SET_BINDLESS_TEXTURE, CONSTANT_BUFFER_SLOT_SELECT,
+            NV3D_CB_SLOT_BINDLESS_TEXTURE));
+    }
+
+    // Disable shader exceptions.  This matches OpenGL driver behavior.
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_SHADER_EXCEPTIONS,
+        NV3D_C(9097, SET_SHADER_EXCEPTIONS, ENABLE, FALSE));
+}
+
+/*!
+ * Upload data using the INLINE_TO_MEMORY methods embedded in the KEPLER_A
+ * class.
+ *
+ * The number of dwords pushed inline is limited by nvPushMaxMethodCount().
+ * Push the data in multiple chunks, if necessary.
+ */
+void _nv3dUploadDataInlineKepler(
+    Nv3dChannelRec *p3dChannel,
+    NvU64 gpuBaseAddress,
+    size_t offset,
+    const void *data,
+    size_t bytes)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+    /*
+     * Below we use '1 + dwordsThisChunk' as the method count, so subtract one
+     * when computing chunkSizeDwords.
+     */
+    const NvU32 chunkSizeDwords = nvPushMaxMethodCount(p) - 1;
+    const NvU32 chunkSize = chunkSizeDwords * 4; /* in bytes */
+    size_t bytesSoFar;
+
+    // Only allow uploading complete dwords.
+    nvAssert((bytes & 3) == 0);
+
+    for (bytesSoFar = 0; bytesSoFar < bytes; bytesSoFar += chunkSize) {
+
+        const NvU32 bytesThisChunk = NV_MIN(bytes - bytesSoFar, chunkSize);
+        const NvU32 dwordsThisChunk = bytesThisChunk / 4;
+
+        nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NVA097_LINE_LENGTH_IN, 5);
+        nvPushSetMethodData(p, bytesThisChunk);
+        nvPushSetMethodData(p, 1); // NVA097_LINE_COUNT
+        nvPushSetMethodDataU64(p, gpuBaseAddress + offset + bytesSoFar);
+        nvPushSetMethodData(p, bytesThisChunk); // NVA097_PITCH_OUT
+
+        nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NVA097_SET_DST_WIDTH, 2);
+        nvPushSetMethodData(p, bytesThisChunk);
+        nvPushSetMethodData(p, 1); // NVA097_SET_DST_HEIGHT
+
+        nvPushMethodOneIncr(p, NVA06F_SUBCHANNEL_3D, NVA097_LAUNCH_DMA,
+                            1 + dwordsThisChunk);
+        nvPushSetMethodData(p,
+            NV3D_C(A097, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
+            // Disable flush -- As long as only 3D requires the data uploaded,
+            // we don't need to incur the performance penalty of a sys-membar.
+            NV3D_C(A097, LAUNCH_DMA, COMPLETION_TYPE, FLUSH_DISABLE) |
+            NV3D_C(A097, LAUNCH_DMA, INTERRUPT_TYPE, NONE) |
+            NV3D_C(A097, LAUNCH_DMA, SYSMEMBAR_DISABLE, TRUE));
+        nvPushInlineData(p, (const NvU8 *)data + bytesSoFar, dwordsThisChunk);
+    }
+}
+
+void _nv3dBindTexturesKepler(
+    Nv3dChannelPtr p3dChannel,
+    int programIndex,
+    const int *textureBindingIndices)
+{
+    const NvU16 numTextureBindings = p3dChannel->numTextureBindings;
+    NvPushChannelUnion *remappedBinding = NULL;
+    NvU8 slot;
+    const NvU64 gpuAddress =
+        nv3dGetBindlessTextureConstantBufferGpuAddress(p3dChannel);
+
+    nv3dSelectCbAddress(p3dChannel, gpuAddress, NV3D_CONSTANT_BUFFER_SIZE);
+    nv3dBindCb(p3dChannel, NV3D_HW_BIND_GROUP_FRAGMENT,
+               NV3D_CB_SLOT_BINDLESS_TEXTURE, TRUE);
+    /*
+     * Set up the header in the pushbuffer for the LOAD_CONSTANTS method.  The
+     * below loop will write the data to upload directly into the pushbuffer.
+     */
+    remappedBinding = nv3dLoadConstantsHeader(p3dChannel, 0,
+                                              numTextureBindings);
+
+    for (slot = 0; slot < numTextureBindings; slot++) {
+        int tex = textureBindingIndices[slot];
+
+        /*
+         * Bindless texture packed pointers.  Technically, these consist of
+         * a header at bits 19:0 and a sampler in 32:20, but we don't need
+         * to set a separate header because we enabled
+         * SET_SAMPLER_BINDING_VIA_HEADER_BINDING.
+         */
+        remappedBinding[slot].u = tex * 2;
+    }
+}
--- a/src/common/unix/nvidia-3d/src/nvidia-3d-maxwell.c
+++ b/src/common/unix/nvidia-3d/src/nvidia-3d-maxwell.c
@@ -0,0 +1,435 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvidia-3d-maxwell.h"
+#include "nvidia-3d-kepler.h"
+#include "nvidia-3d.h"
+
+#include "class/clb097.h"
+#include "class/clb097tex.h"
+#include <class/cla06fsubch.h>
+
+void _nv3dInitChannelMaxwell(Nv3dChannelRec *p3dChannel)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+    const Nv3dDeviceRec *p3dDevice = p3dChannel->p3dDevice;
+
+    _nv3dInitChannelKepler(p3dChannel);
+
+    if (p3dDevice->classNumber == MAXWELL_A) {
+        /*
+         * Use Maxwell texture header format.
+         *
+         * maxwell.mfs says:
+         * NOTE: this method is required to be sent in GM10x. It is ignored
+         * and treated as a NOP in GM20x.
+         *
+         * And on later chips, it is removed and causes exceptions.  So we only
+         * send this on GM10x (class MAXWELL_A).
+         */
+        nvPushImmed(p, NVA06F_SUBCHANNEL_3D,
+            NVB097_SET_SELECT_MAXWELL_TEXTURE_HEADERS, TRUE);
+    }
+}
+
+void _nv3dAssignNv3dTextureMaxwell(
+    Nv3dRenderTexInfo info,
+    Nv3dTexture *tex)
+{
+    NvU32 hi_offset = NvU32_LO16(info.offset >> 32);
+
+    nvAssert(!info.error);
+
+    switch (info.sizes) {
+    case NV3D_TEXHEAD_A8B8G8R8:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_A8B8G8R8, tex->head);
+        break;
+    case NV3D_TEXHEAD_A2B10G10R10:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_A2B10G10R10, tex->head);
+        break;
+    case NV3D_TEXHEAD_B5G6R5:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_B5G6R5, tex->head);
+        break;
+    case NV3D_TEXHEAD_A1B5G5R5:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_A1B5G5R5, tex->head);
+        break;
+    case NV3D_TEXHEAD_R8:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_R8, tex->head);
+        break;
+    case NV3D_TEXHEAD_R32:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_R32, tex->head);
+        break;
+    case NV3D_TEXHEAD_R16:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_R16, tex->head);
+        break;
+    case NV3D_TEXHEAD_G8R8:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_G8R8, tex->head);
+        break;
+    case NV3D_TEXHEAD_R16G16B16A16:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_R16_G16_B16_A16, tex->head);
+        break;
+    case NV3D_TEXHEAD_R32G32B32A32:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_R32_G32_B32_A32, tex->head);
+        break;
+    case NV3D_TEXHEAD_Y8_VIDEO:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_Y8_VIDEO, tex->head);
+        break;
+    default:
+        nvAssert(!"Unrecognized component sizes");
+    }
+
+    switch (info.dataType) {
+    case NV3D_TEXHEAD_NUM_UNORM:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _R_DATA_TYPE,
+                           _NUM_UNORM, tex->head);
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _G_DATA_TYPE,
+                           _NUM_UNORM, tex->head);
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _B_DATA_TYPE,
+                           _NUM_UNORM, tex->head);
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _A_DATA_TYPE,
+                           _NUM_UNORM, tex->head);
+        break;
+    case NV3D_TEXHEAD_NUM_UINT:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _R_DATA_TYPE,
+                           _NUM_UINT, tex->head);
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _G_DATA_TYPE,
+                           _NUM_UINT, tex->head);
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _B_DATA_TYPE,
+                           _NUM_UINT, tex->head);
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _A_DATA_TYPE,
+                           _NUM_UINT, tex->head);
+        break;
+    case NV3D_TEXHEAD_NUM_FLOAT:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _R_DATA_TYPE,
+                           _NUM_FLOAT, tex->head);
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _G_DATA_TYPE,
+                           _NUM_FLOAT, tex->head);
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _B_DATA_TYPE,
+                           _NUM_FLOAT, tex->head);
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _A_DATA_TYPE,
+                           _NUM_FLOAT, tex->head);
+        break;
+   case NV3D_TEXHEAD_NUM_SNORM:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _R_DATA_TYPE,
+                           _NUM_FLOAT, tex->head);
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _G_DATA_TYPE,
+                           _NUM_FLOAT, tex->head);
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _B_DATA_TYPE,
+                           _NUM_FLOAT, tex->head);
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _A_DATA_TYPE,
+                           _NUM_FLOAT, tex->head);
+        break;
+   case NV3D_TEXHEAD_NUM_SINT:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _R_DATA_TYPE,
+                           _NUM_SINT, tex->head);
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _G_DATA_TYPE,
+                           _NUM_SINT, tex->head);
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _B_DATA_TYPE,
+                           _NUM_SINT, tex->head);
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _A_DATA_TYPE,
+                           _NUM_SINT, tex->head);
+        break;
+    }
+
+    switch (info.source.x) {
+    case NV3D_TEXHEAD_IN_A:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _X_SOURCE,
+                           _IN_A, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_R:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _X_SOURCE,
+                           _IN_R, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_G:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _X_SOURCE,
+                           _IN_G, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_B:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _X_SOURCE,
+                           _IN_B, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ZERO:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _X_SOURCE,
+                           _IN_ZERO, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ONE_FLOAT:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _X_SOURCE,
+                           _IN_ONE_FLOAT, tex->head);
+        break;
+    }
+
+    switch (info.source.y) {
+    case NV3D_TEXHEAD_IN_A:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Y_SOURCE,
+                           _IN_A, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_R:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Y_SOURCE,
+                           _IN_R, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_G:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Y_SOURCE,
+                           _IN_G, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_B:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Y_SOURCE,
+                           _IN_B, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ZERO:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Y_SOURCE,
+                           _IN_ZERO, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ONE_FLOAT:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Y_SOURCE,
+                           _IN_ONE_FLOAT, tex->head);
+        break;
+    }
+
+    switch (info.source.z) {
+    case NV3D_TEXHEAD_IN_A:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Z_SOURCE,
+                           _IN_A, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_R:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Z_SOURCE,
+                           _IN_R, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_G:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Z_SOURCE,
+                           _IN_G, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_B:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Z_SOURCE,
+                           _IN_B, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ZERO:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Z_SOURCE,
+                           _IN_ZERO, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ONE_FLOAT:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Z_SOURCE,
+                           _IN_ONE_FLOAT, tex->head);
+        break;
+    }
+
+    switch (info.source.w) {
+    case NV3D_TEXHEAD_IN_A:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _W_SOURCE,
+                           _IN_A, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_R:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _W_SOURCE,
+                           _IN_R, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_G:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _W_SOURCE,
+                           _IN_G, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_B:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _W_SOURCE,
+                           _IN_B, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ZERO:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _W_SOURCE,
+                           _IN_ZERO, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ONE_FLOAT:
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _W_SOURCE,
+                           _IN_ONE_FLOAT, tex->head);
+        break;
+    }
+
+    // Default to edge clamping.  Our GPU seems to support wrapping
+    // even with non-normalized coordinates.
+    tex->samp[0] =
+        NV3D_C(B097, TEXSAMP0, ADDRESS_U, CLAMP_TO_EDGE) |
+        NV3D_C(B097, TEXSAMP0, ADDRESS_V, CLAMP_TO_EDGE) |
+        NV3D_C(B097, TEXSAMP0, ADDRESS_P, CLAMP_TO_EDGE);
+
+    if (info.texType == NV3D_TEX_TYPE_ONE_D_BUFFER) {
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_1D, _ADDRESS_BITS31TO0,
+                           NvU64_LO32(info.offset), tex->head);
+
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_1D, _HEADER_VERSION,
+                           _SELECT_ONE_D_BUFFER, tex->head);
+        FLD_SET_DRF_NUM_MW(B097_, TEXHEAD_1D_, ADDRESS_BITS47TO32,
+                           hi_offset, tex->head);
+
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_1D, _WIDTH_MINUS_ONE_BITS31TO16,
+                           NvU32_HI16(info.width - 1), tex->head);
+
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_1D, _TEXTURE_TYPE,
+                           _ONE_D_BUFFER, tex->head);
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_1D, _WIDTH_MINUS_ONE_BITS15TO0,
+                           NvU32_LO16(info.width - 1), tex->head);
+    } else if (info.texType == NV3D_TEX_TYPE_TWO_D_PITCH) {
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_PITCH, _ADDRESS_BITS31TO5,
+                           (NvU32)((info.offset >> 5) & 0x7ffffff), tex->head);
+
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_PITCH, _HEADER_VERSION,
+                           _SELECT_PITCH, tex->head);
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_PITCH, _ADDRESS_BITS47TO32,
+                           hi_offset, tex->head);
+
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_PITCH, _PITCH_BITS20TO5,
+                           NvU32_LO16(info.pitch >> 5), tex->head);
+
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_PITCH, _TEXTURE_TYPE,
+                           _TWO_D_NO_MIPMAP, tex->head);
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_PITCH, _WIDTH_MINUS_ONE,
+                           info.width - 1, tex->head);
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_PITCH, _BORDER_SIZE,
+                           _BORDER_SAMPLER_COLOR, tex->head);
+
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_PITCH, _HEIGHT_MINUS_ONE,
+                           info.height - 1, tex->head);
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_PITCH, _DEPTH_MINUS_ONE,
+                           0, tex->head);
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_PITCH, _NORMALIZED_COORDS,
+                           info.normalizedCoords, tex->head);
+    } else {
+        if (info.texType == NV3D_TEX_TYPE_ONE_D) {
+            FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _TEXTURE_TYPE,
+                               _ONE_D, tex->head);
+        } else if (info.texType == NV3D_TEX_TYPE_TWO_D_BLOCKLINEAR) {
+            FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _TEXTURE_TYPE,
+                               _TWO_D_NO_MIPMAP, tex->head);
+        }
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _ADDRESS_BITS31TO9,
+                           (info.offset >> 9) & 0x7fffff, tex->head);
+
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _HEADER_VERSION,
+                           _SELECT_BLOCKLINEAR, tex->head);
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _ADDRESS_BITS47TO32,
+                           hi_offset, tex->head);
+
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _GOBS_PER_BLOCK_WIDTH,
+                           info.log2GobsPerBlock.x, tex->head);
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _GOBS_PER_BLOCK_HEIGHT,
+                           info.log2GobsPerBlock.y, tex->head);
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _GOBS_PER_BLOCK_DEPTH,
+                           info.log2GobsPerBlock.z, tex->head);
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _WIDTH_MINUS_ONE,
+                           info.width - 1, tex->head);
+        FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _BORDER_SIZE,
+                           _BORDER_SAMPLER_COLOR, tex->head);
+
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _HEIGHT_MINUS_ONE,
+                           info.height - 1, tex->head);
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _DEPTH_MINUS_ONE,
+                           0, tex->head);
+        FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _NORMALIZED_COORDS,
+                           info.normalizedCoords, tex->head);
+    }
+
+    switch (info.repeatType) {
+        case NV3D_TEXHEAD_REPEAT_TYPE_NORMAL:
+            tex->samp[0] = NV3D_C(B097, TEXSAMP0, ADDRESS_U, WRAP) |
+                           NV3D_C(B097, TEXSAMP0, ADDRESS_V, WRAP);
+            break;
+        case NV3D_TEXHEAD_REPEAT_TYPE_PAD:
+            tex->samp[0] = NV3D_C(B097, TEXSAMP0, ADDRESS_U, CLAMP_TO_EDGE) |
+                           NV3D_C(B097, TEXSAMP0, ADDRESS_V, CLAMP_TO_EDGE);
+            break;
+        case NV3D_TEXHEAD_REPEAT_TYPE_REFLECT:
+            tex->samp[0] = NV3D_C(B097, TEXSAMP0, ADDRESS_U, MIRROR) |
+                           NV3D_C(B097, TEXSAMP0, ADDRESS_V, MIRROR);
+            break;
+        case NV3D_TEXHEAD_REPEAT_TYPE_NONE:
+            tex->samp[0] = NV3D_C(B097, TEXSAMP0, ADDRESS_U, BORDER) |
+                           NV3D_C(B097, TEXSAMP0, ADDRESS_V, BORDER);
+            break;
+    }
+
+    switch (info.filtering) {
+        case NV3D_TEXHEAD_FILTER_TYPE_NEAREST:
+            tex->samp[1] = NV3D_C(B097, TEXSAMP1, MAG_FILTER, MAG_POINT) |
+                           NV3D_C(B097, TEXSAMP1, MIN_FILTER, MIN_POINT) |
+                           NV3D_C(B097, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            break;
+
+        case NV3D_TEXHEAD_FILTER_TYPE_LINEAR:
+            tex->samp[1] = NV3D_C(B097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
+                           NV3D_C(B097, TEXSAMP1, MIN_FILTER, MIN_LINEAR) |
+                           NV3D_C(B097, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            break;
+
+        case NV3D_TEXHEAD_FILTER_TYPE_ANISO_2X:
+            tex->samp[0] |= NV3D_C(B097, TEXSAMP0, MAX_ANISOTROPY, ANISO_2_TO_1);
+            tex->samp[1] = NV3D_C(B097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
+                           NV3D_C(B097, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
+                           NV3D_C(B097, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _MAX_ANISOTROPY,
+                               _ANISO_2_TO_1, tex->head);
+            FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _ANISO_FINE_SPREAD_MODIFIER,
+                               _SPREAD_MODIFIER_CONST_TWO, tex->head);
+
+            break;
+
+        case NV3D_TEXHEAD_FILTER_TYPE_ANISO_4X:
+            tex->samp[0] |= NV3D_C(B097, TEXSAMP0, MAX_ANISOTROPY, ANISO_4_TO_1);
+            tex->samp[1] = NV3D_C(B097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
+                           NV3D_C(B097, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
+                           NV3D_C(B097, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _MAX_ANISOTROPY,
+                               _ANISO_4_TO_1, tex->head);
+            FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _ANISO_FINE_SPREAD_MODIFIER,
+                               _SPREAD_MODIFIER_CONST_TWO, tex->head);
+            break;
+
+        case NV3D_TEXHEAD_FILTER_TYPE_ANISO_8X:
+            tex->samp[0] |= NV3D_C(B097, TEXSAMP0, MAX_ANISOTROPY, ANISO_8_TO_1);
+            tex->samp[1] = NV3D_C(B097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
+                           NV3D_C(B097, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
+                           NV3D_C(B097, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _MAX_ANISOTROPY,
+                               _ANISO_8_TO_1, tex->head);
+            FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _ANISO_FINE_SPREAD_MODIFIER,
+                               _SPREAD_MODIFIER_CONST_TWO, tex->head);
+
+            break;
+
+        case NV3D_TEXHEAD_FILTER_TYPE_ANISO_16X:
+            tex->samp[0] |=  NV3D_C(B097, TEXSAMP0, MAX_ANISOTROPY, ANISO_16_TO_1);
+            tex->samp[1] = NV3D_C(B097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
+                           NV3D_C(B097, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
+                           NV3D_C(B097, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _MAX_ANISOTROPY,
+                               _ANISO_16_TO_1, tex->head);
+            FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _ANISO_FINE_SPREAD_MODIFIER,
+                               _SPREAD_MODIFIER_CONST_TWO, tex->head);
+            break;
+
+    }
+}
--- a/src/common/unix/nvidia-3d/src/nvidia-3d-pascal.c
+++ b/src/common/unix/nvidia-3d/src/nvidia-3d-pascal.c
@@ -0,0 +1,431 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvidia-3d-pascal.h"
+#include "nvidia-3d-maxwell.h"
+#include "nvidia-3d.h"
+
+#include "class/clc197.h" /* NVC197_SET_GO_IDLE_TIMEOUT */
+#include "class/clc097tex.h"
+#include <class/cla06fsubch.h>
+
+void _nv3dInitChannelPascal(Nv3dChannelRec *p3dChannel)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    _nv3dInitChannelMaxwell(p3dChannel);
+
+    if (!p3dChannel->hasFrameBoundaries) {
+        nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NVC197_SET_GO_IDLE_TIMEOUT, 1);
+        nvPushSetMethodData(p, 0x800);
+    }
+}
+
+void _nv3dAssignNv3dTexturePascal(
+    Nv3dRenderTexInfo info,
+    Nv3dTexture *tex)
+{
+    nvAssert(!info.error);
+
+    switch (info.sizes) {
+    case NV3D_TEXHEAD_A8B8G8R8:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_A8B8G8R8, tex->head);
+        break;
+    case NV3D_TEXHEAD_A2B10G10R10:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_A2B10G10R10, tex->head);
+        break;
+    case NV3D_TEXHEAD_B5G6R5:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_B5G6R5, tex->head);
+        break;
+    case NV3D_TEXHEAD_A1B5G5R5:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_A1B5G5R5, tex->head);
+        break;
+    case NV3D_TEXHEAD_R8:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_R8, tex->head);
+        break;
+    case NV3D_TEXHEAD_R32:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_R32, tex->head);
+        break;
+    case NV3D_TEXHEAD_R16:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_R16, tex->head);
+        break;
+    case NV3D_TEXHEAD_G8R8:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_G8R8, tex->head);
+        break;
+    case NV3D_TEXHEAD_R16G16B16A16:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_R16_G16_B16_A16, tex->head);
+        break;
+    case NV3D_TEXHEAD_R32G32B32A32:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_R32_G32_B32_A32, tex->head);
+        break;
+    case NV3D_TEXHEAD_Y8_VIDEO:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
+                           _SIZES_Y8_VIDEO, tex->head);
+       break;
+    default:
+        nvAssert(!"Unrecognized component sizes");
+    }
+
+    switch (info.dataType) {
+    case NV3D_TEXHEAD_NUM_UNORM:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _R_DATA_TYPE,
+                           _NUM_UNORM, tex->head);
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _G_DATA_TYPE,
+                           _NUM_UNORM, tex->head);
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _B_DATA_TYPE,
+                           _NUM_UNORM, tex->head);
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _A_DATA_TYPE,
+                           _NUM_UNORM, tex->head);
+        break;
+    case NV3D_TEXHEAD_NUM_UINT:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _R_DATA_TYPE,
+                           _NUM_UINT, tex->head);
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _G_DATA_TYPE,
+                           _NUM_UINT, tex->head);
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _B_DATA_TYPE,
+                           _NUM_UINT, tex->head);
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _A_DATA_TYPE,
+                           _NUM_UINT, tex->head);
+        break;
+    case NV3D_TEXHEAD_NUM_FLOAT:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _R_DATA_TYPE,
+                           _NUM_FLOAT, tex->head);
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _G_DATA_TYPE,
+                           _NUM_FLOAT, tex->head);
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _B_DATA_TYPE,
+                           _NUM_FLOAT, tex->head);
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _A_DATA_TYPE,
+                           _NUM_FLOAT, tex->head);
+        break;
+   case NV3D_TEXHEAD_NUM_SNORM:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _R_DATA_TYPE,
+                           _NUM_FLOAT, tex->head);
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _G_DATA_TYPE,
+                           _NUM_FLOAT, tex->head);
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _B_DATA_TYPE,
+                           _NUM_FLOAT, tex->head);
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _A_DATA_TYPE,
+                           _NUM_FLOAT, tex->head);
+        break;
+   case NV3D_TEXHEAD_NUM_SINT:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _R_DATA_TYPE,
+                           _NUM_SINT, tex->head);
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _G_DATA_TYPE,
+                           _NUM_SINT, tex->head);
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _B_DATA_TYPE,
+                           _NUM_SINT, tex->head);
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _A_DATA_TYPE,
+                           _NUM_SINT, tex->head);
+        break;
+    }
+
+    switch (info.source.x) {
+    case NV3D_TEXHEAD_IN_A:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _X_SOURCE,
+                           _IN_A, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_R:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _X_SOURCE,
+                           _IN_R, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_G:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _X_SOURCE,
+                           _IN_G, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_B:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _X_SOURCE,
+                           _IN_B, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ZERO:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _X_SOURCE,
+                           _IN_ZERO, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ONE_FLOAT:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _X_SOURCE,
+                           _IN_ONE_FLOAT, tex->head);
+        break;
+    }
+
+    switch (info.source.y) {
+    case NV3D_TEXHEAD_IN_A:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Y_SOURCE,
+                           _IN_A, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_R:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Y_SOURCE,
+                           _IN_R, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_G:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Y_SOURCE,
+                           _IN_G, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_B:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Y_SOURCE,
+                           _IN_B, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ZERO:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Y_SOURCE,
+                           _IN_ZERO, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ONE_FLOAT:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Y_SOURCE,
+                           _IN_ONE_FLOAT, tex->head);
+        break;
+    }
+
+    switch (info.source.z) {
+    case NV3D_TEXHEAD_IN_A:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Z_SOURCE,
+                           _IN_A, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_R:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Z_SOURCE,
+                           _IN_R, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_G:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Z_SOURCE,
+                           _IN_G, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_B:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Z_SOURCE,
+                           _IN_B, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ZERO:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Z_SOURCE,
+                           _IN_ZERO, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ONE_FLOAT:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Z_SOURCE,
+                           _IN_ONE_FLOAT, tex->head);
+        break;
+    }
+
+    switch (info.source.w) {
+    case NV3D_TEXHEAD_IN_A:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _W_SOURCE,
+                           _IN_A, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_R:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _W_SOURCE,
+                           _IN_R, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_G:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _W_SOURCE,
+                           _IN_G, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_B:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _W_SOURCE,
+                           _IN_B, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ZERO:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _W_SOURCE,
+                           _IN_ZERO, tex->head);
+        break;
+    case NV3D_TEXHEAD_IN_ONE_FLOAT:
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _W_SOURCE,
+                           _IN_ONE_FLOAT, tex->head);
+        break;
+    }
+
+    // Default to edge clamping.  Our GPU seems to support wrapping
+    // even with non-normalized coordinates.
+    tex->samp[0] =
+        NV3D_C(C097, TEXSAMP0, ADDRESS_U, CLAMP_TO_EDGE) |
+        NV3D_C(C097, TEXSAMP0, ADDRESS_V, CLAMP_TO_EDGE) |
+        NV3D_C(C097, TEXSAMP0, ADDRESS_P, CLAMP_TO_EDGE);
+
+    if (info.texType == NV3D_TEX_TYPE_ONE_D_BUFFER) {
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_1D, _ADDRESS_BITS31TO0,
+                           NvU64_LO32(info.offset), tex->head);
+
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_1D, _HEADER_VERSION,
+                           _SELECT_ONE_D_BUFFER, tex->head);
+        FLD_SET_DRF_NUM_MW(C097_, TEXHEAD_1D_, ADDRESS_BITS48TO32,
+                           NvU64_HI32(info.offset), tex->head);
+
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_1D, _WIDTH_MINUS_ONE_BITS31TO16,
+                           NvU32_HI16(info.width - 1), tex->head);
+
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_1D, _TEXTURE_TYPE,
+                           _ONE_D_BUFFER, tex->head);
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_1D, _WIDTH_MINUS_ONE_BITS15TO0,
+                           NvU32_LO16(info.width - 1), tex->head);
+    } else if (info.texType == NV3D_TEX_TYPE_TWO_D_PITCH) {
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _ADDRESS_BITS31TO5,
+                           info.offset >> 5, tex->head);
+
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_PITCH, _HEADER_VERSION,
+                           _SELECT_PITCH, tex->head);
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _ADDRESS_BITS48TO32,
+                           NvU64_HI32(info.offset), tex->head);
+
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _PITCH_BITS20TO5,
+                           NvU32_LO16(info.pitch >> 5), tex->head);
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _PITCH_BIT21,
+                           info.pitch >> 21, tex->head);
+
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_PITCH, _TEXTURE_TYPE,
+                           _TWO_D_NO_MIPMAP, tex->head);
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _WIDTH_MINUS_ONE,
+                           info.width - 1, tex->head);
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _HEIGHT_MINUS_ONE_BIT16,
+                           (info.height - 1) >> 16, tex->head);
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_PITCH, _BORDER_SIZE,
+                           _BORDER_SAMPLER_COLOR, tex->head);
+
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _HEIGHT_MINUS_ONE,
+                           info.height - 1, tex->head);
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _DEPTH_MINUS_ONE,
+                           0, tex->head);
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _NORMALIZED_COORDS,
+                           info.normalizedCoords, tex->head);
+    } else {
+        if (info.texType == NV3D_TEX_TYPE_ONE_D) {
+            FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _TEXTURE_TYPE,
+                               _ONE_D, tex->head);
+        } else if (info.texType == NV3D_TEX_TYPE_TWO_D_BLOCKLINEAR) {
+            FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _TEXTURE_TYPE,
+                               _TWO_D_NO_MIPMAP, tex->head);
+        }
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _ADDRESS_BITS31TO9,
+                           info.offset >> 9, tex->head);
+
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _HEADER_VERSION,
+                           _SELECT_BLOCKLINEAR, tex->head);
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _ADDRESS_BITS48TO32,
+                           NvU64_HI32(info.offset), tex->head);
+
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _GOBS_PER_BLOCK_WIDTH,
+                           info.log2GobsPerBlock.x, tex->head);
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _GOBS_PER_BLOCK_HEIGHT,
+                           info.log2GobsPerBlock.y, tex->head);
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _GOBS_PER_BLOCK_DEPTH,
+                           info.log2GobsPerBlock.z, tex->head);
+
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _WIDTH_MINUS_ONE,
+                           info.width - 1, tex->head);
+        FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _BORDER_SIZE,
+                           _BORDER_SAMPLER_COLOR, tex->head);
+
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _HEIGHT_MINUS_ONE,
+                           info.height - 1, tex->head);
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _HEIGHT_MINUS_ONE_BIT16,
+                           (info.height - 1) >> 16, tex->head);
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _DEPTH_MINUS_ONE,
+                           0, tex->head);
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _DEPTH_MINUS_ONE_BIT14,
+                           0, tex->head);
+        FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _NORMALIZED_COORDS,
+                           info.normalizedCoords, tex->head);
+    }
+
+    switch (info.repeatType) {
+        case NV3D_TEXHEAD_REPEAT_TYPE_NORMAL:
+            tex->samp[0] = NV3D_C(C097, TEXSAMP0, ADDRESS_U, WRAP) |
+                           NV3D_C(C097, TEXSAMP0, ADDRESS_V, WRAP);
+            break;
+        case NV3D_TEXHEAD_REPEAT_TYPE_PAD:
+            tex->samp[0] = NV3D_C(C097, TEXSAMP0, ADDRESS_U, CLAMP_TO_EDGE) |
+                           NV3D_C(C097, TEXSAMP0, ADDRESS_V, CLAMP_TO_EDGE);
+            break;
+        case NV3D_TEXHEAD_REPEAT_TYPE_REFLECT:
+            tex->samp[0] = NV3D_C(C097, TEXSAMP0, ADDRESS_U, MIRROR) |
+                           NV3D_C(C097, TEXSAMP0, ADDRESS_V, MIRROR);
+            break;
+        case NV3D_TEXHEAD_REPEAT_TYPE_NONE:
+            tex->samp[0] = NV3D_C(C097, TEXSAMP0, ADDRESS_U, BORDER) |
+                           NV3D_C(C097, TEXSAMP0, ADDRESS_V, BORDER);
+            break;
+    }
+
+    switch (info.filtering) {
+        case NV3D_TEXHEAD_FILTER_TYPE_NEAREST:
+            tex->samp[1] = NV3D_C(C097, TEXSAMP1, MAG_FILTER, MAG_POINT) |
+                       NV3D_C(C097, TEXSAMP1, MIN_FILTER, MIN_POINT) |
+                       NV3D_C(C097, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            break;
+
+        case NV3D_TEXHEAD_FILTER_TYPE_LINEAR:
+            tex->samp[1] = NV3D_C(C097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
+                       NV3D_C(C097, TEXSAMP1, MIN_FILTER, MIN_LINEAR) |
+                       NV3D_C(C097, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            break;
+
+        case NV3D_TEXHEAD_FILTER_TYPE_ANISO_2X:
+            tex->samp[0] |= NV3D_C(C097, TEXSAMP0, MAX_ANISOTROPY, ANISO_2_TO_1);
+            tex->samp[1] = NV3D_C(C097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
+                           NV3D_C(C097, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
+                           NV3D_C(C097, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _MAX_ANISOTROPY,
+                               _ANISO_2_TO_1, tex->head);
+            FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _ANISO_FINE_SPREAD_MODIFIER,
+                               _SPREAD_MODIFIER_CONST_TWO, tex->head);
+
+            break;
+
+        case NV3D_TEXHEAD_FILTER_TYPE_ANISO_4X:
+            tex->samp[0] |= NV3D_C(C097, TEXSAMP0, MAX_ANISOTROPY, ANISO_4_TO_1);
+            tex->samp[1] = NV3D_C(C097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
+                           NV3D_C(C097, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
+                           NV3D_C(C097, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _MAX_ANISOTROPY,
+                               _ANISO_4_TO_1, tex->head);
+            FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _ANISO_FINE_SPREAD_MODIFIER,
+                               _SPREAD_MODIFIER_CONST_TWO, tex->head);
+            break;
+
+        case NV3D_TEXHEAD_FILTER_TYPE_ANISO_8X:
+            tex->samp[0] |= NV3D_C(C097, TEXSAMP0, MAX_ANISOTROPY, ANISO_8_TO_1);
+            tex->samp[1] = NV3D_C(C097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
+                           NV3D_C(C097, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
+                           NV3D_C(C097, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _MAX_ANISOTROPY,
+                               _ANISO_8_TO_1, tex->head);
+            FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _ANISO_FINE_SPREAD_MODIFIER,
+                               _SPREAD_MODIFIER_CONST_TWO, tex->head);
+
+            break;
+
+        case NV3D_TEXHEAD_FILTER_TYPE_ANISO_16X:
+            tex->samp[0] |=  NV3D_C(C097, TEXSAMP0, MAX_ANISOTROPY, ANISO_16_TO_1);
+            tex->samp[1] = NV3D_C(C097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
+                           NV3D_C(C097, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
+                           NV3D_C(C097, TEXSAMP1, MIP_FILTER, MIP_NONE);
+            FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _MAX_ANISOTROPY,
+                               _ANISO_16_TO_1, tex->head);
+            FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _ANISO_FINE_SPREAD_MODIFIER,
+                               _SPREAD_MODIFIER_CONST_TWO, tex->head);
+            break;
+
+    }
+}
--- a/src/common/unix/nvidia-3d/src/nvidia-3d-surface.c
+++ b/src/common/unix/nvidia-3d/src/nvidia-3d-surface.c
@@ -0,0 +1,291 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvidia-3d.h"
+#include "nvidia-3d-surface.h"
+#include "nvidia-push-utils.h" /* nvPushIsAmodel() */
+
+#include <nvos.h>
+
+static void FreeSurface(
+    Nv3dChannelRec *p3dChannel)
+{
+    NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
+    int sd;
+
+    for (sd = ARRAY_LEN(pPushDevice->subDevice) - 1;
+         sd >= 0;
+         sd--) {
+        if (p3dChannel->surface.handle[sd]) {
+            NvU32 ret = pPushDevice->pImports->rmApiFree(
+                     pPushDevice,
+                     pPushDevice->subDevice[sd].deviceHandle,
+                     p3dChannel->surface.handle[sd]);
+            nvAssert(ret == NVOS_STATUS_SUCCESS);
+            (void)ret;
+            p3dChannel->surface.handle[sd] = 0;
+        }
+    }
+}
+
+static NvBool AllocSurface(
+    Nv3dChannelRec *p3dChannel,
+    NvU64 size)
+{
+    NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
+    const NvPushImports *pImports = pPushDevice->pImports;
+    int sd;
+
+    for (sd = 0;
+         sd < ARRAY_LEN(pPushDevice->subDevice) &&
+            pPushDevice->subDevice[sd].deviceHandle != 0;
+         sd++) {
+
+        NVOS32_PARAMETERS params = {
+            .hRoot = pPushDevice->clientHandle,
+            .hObjectParent = pPushDevice->subDevice[sd].deviceHandle,
+            .function = NVOS32_FUNCTION_ALLOC_SIZE,
+            .data.AllocSize.owner = pPushDevice->clientHandle,
+            .data.AllocSize.type = NVOS32_TYPE_SHADER_PROGRAM,
+            .data.AllocSize.size = size,
+            .data.AllocSize.attr =
+                DRF_DEF(OS32, _ATTR, _LOCATION, _VIDMEM) |
+                DRF_DEF(OS32, _ATTR, _PHYSICALITY, _ALLOW_NONCONTIGUOUS) |
+                DRF_DEF(OS32, _ATTR, _COHERENCY, _WRITE_COMBINE),
+            .data.AllocSize.attr2 =
+                DRF_DEF(OS32, _ATTR2, _GPU_CACHEABLE, _YES),
+            .data.AllocSize.flags = 0,
+            .data.AllocSize.alignment = 4096,
+        };
+
+        NvU32 ret = pImports->rmApiVidHeapControl(pPushDevice, &params);
+
+        if (ret != NVOS_STATUS_SUCCESS) {
+            FreeSurface(p3dChannel);
+            return FALSE;
+        }
+
+        p3dChannel->surface.handle[sd] = params.data.AllocSize.hMemory;
+    }
+
+    return TRUE;
+}
+
+static void UnmapSurface(
+    const Nv3dChannelRec *p3dChannel,
+    NvU64 gpuAddress)
+{
+    NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
+    const NvPushImports *pImports = pPushDevice->pImports;
+    int sd;
+
+    for (sd = ARRAY_LEN(p3dChannel->surface.handle) - 1; sd >= 0; sd--) {
+        if (p3dChannel->surface.handle[sd]) {
+            NvU32 ret = pImports->rmApiUnmapMemoryDma(
+                            pPushDevice,
+                            pPushDevice->subDevice[sd].deviceHandle,
+                            pPushDevice->subDevice[sd].gpuVASpaceCtxDma,
+                            p3dChannel->surface.handle[sd],
+                            0,
+                            gpuAddress);
+            nvAssert(ret == NVOS_STATUS_SUCCESS);
+            (void)ret;
+        }
+    }
+}
+
+static NvU64 MapSurface(
+    const Nv3dChannelRec *p3dChannel,
+    NvU64 size)
+{
+    NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
+    const NvPushImports *pImports = pPushDevice->pImports;
+    NvU64 gpuAddress = 0;
+    int sd;
+
+    for (sd = 0;
+         sd < ARRAY_LEN(p3dChannel->surface.handle) &&
+            p3dChannel->surface.handle[sd] != 0;
+         sd++) {
+        NvU32 flags = DRF_DEF(OS46, _FLAGS, _CACHE_SNOOP, _ENABLE);
+        NvU64 thisGpuAddress;
+
+        if (sd == 0) {
+            /* For the first device, RM assigns a virtual address. */
+            thisGpuAddress = 0;
+        } else {
+            /* For subsequent devices, use the same virtual address. */
+            flags = FLD_SET_DRF(OS46, _FLAGS, _DMA_OFFSET_FIXED, _TRUE, flags);
+            nvAssert(gpuAddress != 0);
+            thisGpuAddress = gpuAddress;
+        }
+
+        NvU32 ret = pImports->rmApiMapMemoryDma(pPushDevice,
+                                                pPushDevice->subDevice[sd].deviceHandle,
+                                                pPushDevice->subDevice[sd].gpuVASpaceCtxDma,
+                                                p3dChannel->surface.handle[sd],
+                                                0,
+                                                size,
+                                                flags,
+                                                &thisGpuAddress);
+        if (ret != NVOS_STATUS_SUCCESS) {
+            if (sd != 0) {
+                /* Clean up earlier successful mappings */
+                UnmapSurface(p3dChannel, gpuAddress);
+            }
+            return 0;
+        }
+
+        if (sd == 0) {
+            gpuAddress = thisGpuAddress;
+        } else {
+            nvAssert(gpuAddress == thisGpuAddress);
+        }
+    }
+
+    return gpuAddress;
+}
+
+NvBool nv3dAllocChannelSurface(Nv3dChannelPtr p3dChannel)
+{
+    const NvU64 size = p3dChannel->surface.totalSize;
+    NvU64 gpuAddress;
+
+    if (!AllocSurface(p3dChannel, size)) {
+        return FALSE;
+    }
+
+    gpuAddress = MapSurface(p3dChannel, size);
+
+    if (gpuAddress == 0) {
+        FreeSurface(p3dChannel);
+        return FALSE;
+    }
+
+    p3dChannel->surface.gpuAddress = gpuAddress;
+
+    return TRUE;
+}
+
+void nv3dFreeChannelSurface(Nv3dChannelPtr p3dChannel)
+{
+
+    if (p3dChannel->surface.gpuAddress != 0) {
+        /*
+         * If the surface is mapped into our channel, we need to ensure
+         * that any methods in the channel that might reference the
+         * gpuAddress have idled before we unmap the address.
+         */
+        nvPushIdleChannel(p3dChannel->pPushChannel);
+
+        UnmapSurface(p3dChannel,
+                     p3dChannel->surface.gpuAddress);
+        p3dChannel->surface.gpuAddress = 0;
+    }
+
+    FreeSurface(p3dChannel);
+}
+
+/*
+ * The Nv3dChannelRec's surface contains:
+ *
+ *   programLocalMemory
+ *   programCode
+ *   programConstants
+ *   Nv3dTexture[numTextures]
+ *   bindlessTextureConstantBuffer (optionally)
+ *   Nv3dConstantBuffer[numConstantBuffers]
+ *   vertexStreams
+ *
+ * Where all items are aligned to NV3D_TEXTURE_PITCH_ALIGNMENT.
+ *
+ * Compute all the offsets into the surface, and the total surface
+ * size.
+ *
+ * XXX TODO: use correct alignment for all items, rather than
+ * NV3D_TEXTURE_PITCH_ALIGNMENT.
+ */
+void _nv3dAssignSurfaceOffsets(
+    const Nv3dAllocChannelStateParams *pParams,
+    Nv3dChannelPtr p3dChannel)
+{
+    const NvU32 programPrefetchPadding = 2048;
+
+    NvU64 offset = 0;
+    enum Nv3dVertexAttributeStreamType stream;
+
+    /*
+     * Program local memory requires at least 4k alignment.  So, place
+     * it at the start of the surface.
+     */
+    p3dChannel->surface.programLocalMemoryOffset = offset;
+
+    offset += p3dChannel->programLocalMemorySize;
+    offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
+
+    p3dChannel->surface.programOffset = offset;
+
+    offset += p3dChannel->programs.code.decompressedSize;
+    offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
+
+    p3dChannel->surface.programConstantsOffset = offset;
+
+    offset += p3dChannel->programs.constants.size;
+    offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
+
+    p3dChannel->surface.textureOffset = offset;
+
+    offset += (sizeof(Nv3dTexture) * pParams->numTextures);
+    offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
+
+    p3dChannel->surface.bindlessTextureConstantBufferOffset = offset;
+    offset += NV3D_CONSTANT_BUFFER_SIZE;
+    offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
+
+    p3dChannel->surface.constantBufferOffset = offset;
+
+    offset += (NV3D_CONSTANT_BUFFER_SIZE * pParams->numConstantBuffers);
+    offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
+
+    /*
+     * TODO: not all nvidia-3d host drivers will require the vertex stream
+     * memory; maybe host drivers should opt in?
+     */
+    for (stream = NV3D_VERTEX_ATTRIBUTE_STREAM_FIRST;
+         stream < NV3D_VERTEX_ATTRIBUTE_STREAM_COUNT;
+         stream++) {
+
+        p3dChannel->surface.vertexStreamOffset[stream] = offset;
+
+        offset += NV3D_VERTEX_ATTRIBUTE_STREAM_SIZE;
+        offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
+    }
+
+    /*
+     * Make sure the total surface size is large enough to cover any
+     * potential prefetch region.
+     */
+    p3dChannel->surface.totalSize =
+        NV_MAX(p3dChannel->surface.programOffset + programPrefetchPadding,
+               offset);
+}
--- a/src/common/unix/nvidia-3d/src/nvidia-3d-turing.c
+++ b/src/common/unix/nvidia-3d/src/nvidia-3d-turing.c
@@ -0,0 +1,56 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvidia-3d-turing.h"
+#include "nvidia-3d-pascal.h"
+#include "nvidia-3d.h"
+
+#include "class/clc597.h"
+#include <class/cla06fsubch.h>
+
+void _nv3dInitChannelTuring(Nv3dChannelRec *p3dChannel)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    _nv3dInitChannelPascal(p3dChannel);
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NVC597_SET_SPH_VERSION, 2);
+    nvPushSetMethodData(p,
+        NV3D_V(C597, SET_SPH_VERSION, CURRENT, 4) |
+        NV3D_V(C597, SET_SPH_VERSION, OLDEST_SUPPORTED, 4));
+    nvPushSetMethodData(p,
+        NV3D_V(C597, CHECK_SPH_VERSION, CURRENT, 4) |
+        NV3D_V(C597, CHECK_SPH_VERSION, OLDEST_SUPPORTED, 4));
+}
+
+void _nv3dSetVertexStreamEndTuring(
+    Nv3dChannelPtr p3dChannel,
+    enum Nv3dVertexAttributeStreamType stream,
+    const Nv3dVertexAttributeStreamRec *pStream)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+        NVC597_SET_VERTEX_STREAM_SIZE_A(stream), 2);
+    nvPushSetMethodDataU64(p, pStream->end - pStream->current);
+}
--- a/src/common/unix/nvidia-3d/src/nvidia-3d-vertex-arrays.c
+++ b/src/common/unix/nvidia-3d/src/nvidia-3d-vertex-arrays.c
@@ -0,0 +1,531 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvidia-3d.h"
+#include "nvidia-3d-vertex-arrays.h"
+#include "nvidia-3d-types-priv.h"
+#include "nvidia-3d-constant-buffers.h"
+#include "nvidia-3d-utils.h"
+
+#include <class/cl9097.h>
+#include <class/cla06fsubch.h>
+
+static void InitializeStreamFromSurf(
+    const Nv3dStreamSurfaceRec *pSurf,
+    Nv3dVertexAttributeStreamRec *pStream)
+{
+    pStream->current = pSurf->gpuAddress;
+    pStream->end = pSurf->gpuAddress + pSurf->size;
+    pStream->stride = 0;
+    pStream->nextLaunch = 0;
+}
+
+static void InitializeStream(
+    Nv3dChannelRec *p3dChannel,
+    enum Nv3dVertexAttributeStreamType stream,
+    Nv3dVertexAttributeStreamRec *pStream)
+{
+    const Nv3dStreamSurfaceRec tmpSurf = {
+        .gpuAddress =
+            nv3dGetVertexAttributestreamGpuAddress(p3dChannel, stream),
+        .size = NV3D_VERTEX_ATTRIBUTE_STREAM_SIZE,
+    };
+    InitializeStreamFromSurf(&tmpSurf, pStream);
+}
+
+void _nv3dInitializeStreams(
+    Nv3dChannelRec *p3dChannel)
+{
+    enum Nv3dVertexAttributeStreamType stream;
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    // Disable vertex attribute vectors 16 through 31 (scalars 64 through 127).
+    // We don't use them.
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_DA_OUTPUT_ATTRIBUTE_SKIP_MASK_B(0), 2);
+    nvPushSetMethodData(p, ~0);
+    nvPushSetMethodData(p, ~0);
+
+    for (stream = NV3D_VERTEX_ATTRIBUTE_STREAM_FIRST;
+         stream < NV3D_VERTEX_ATTRIBUTE_STREAM_COUNT;
+         stream++) {
+
+        Nv3dVertexAttributeStreamRec *pStream =
+            &p3dChannel->vertexStreams[stream];
+
+        InitializeStream(p3dChannel, stream, pStream);
+    }
+}
+
+static void AdvanceStream(
+    Nv3dVertexAttributeStreamRec *pStream)
+{
+    pStream->current += pStream->stride * pStream->nextLaunch;
+    nvAssert(pStream->current <= pStream->end);
+    pStream->nextLaunch = 0;
+}
+
+/*!
+ * Configure a vertex attribute stream to fetch from a surface.
+ *
+ * \param[in]   p3dChannel  The channel
+ * \param[in]   stream      The vertex attribute stream
+ * \param[in]   pStream     The vertex attribute stream tracking structure
+ */
+static void
+SetVertexStreamSurface(
+    Nv3dChannelRec *p3dChannel,
+    enum Nv3dVertexAttributeStreamType stream,
+    const Nv3dVertexAttributeStreamRec *pStream)
+{
+    const Nv3dHal *pHal = p3dChannel->p3dDevice->hal;
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_VERTEX_STREAM_A_FORMAT(stream), 3);
+    nvPushSetMethodData(p,
+        NV3D_V(9097, SET_VERTEX_STREAM_A_FORMAT, STRIDE, pStream->stride) |
+        NV3D_C(9097, SET_VERTEX_STREAM_A_FORMAT, ENABLE, TRUE));
+    nvPushSetMethodDataU64(p, pStream->current);
+
+    pHal->setVertexStreamEnd(p3dChannel, stream, pStream);
+}
+
+/*!
+ * Reset a vertex attribute stream to the specified offset, while leaving its
+ * stride and limit alone.
+ */
+static void
+SetVertexStreamOffset(
+    Nv3dChannelRec *p3dChannel,
+    enum Nv3dVertexAttributeStreamType stream,
+    NvU64 offset)
+{
+    const Nv3dHal *pHal = p3dChannel->p3dDevice->hal;
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+    Nv3dVertexAttributeStreamRec *pStream = &p3dChannel->vertexStreams[stream];
+
+    pStream->current = offset;
+    pStream->nextLaunch = 0;
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_VERTEX_STREAM_A_LOCATION_A(stream), 2);
+    nvPushSetMethodDataU64(p, offset);
+
+    pHal->setVertexStreamEnd(p3dChannel, stream, pStream);
+}
+
+/*!
+ * Point the constant buffer selector at the next location for data in the
+ * given stream.
+ */
+static void SelectCbForStream(
+    Nv3dChannelRec *p3dChannel,
+    enum Nv3dVertexAttributeStreamType stream)
+{
+    Nv3dVertexAttributeStreamRec *pStream = &p3dChannel->vertexStreams[stream];
+    const NvU64 gpuAddress =
+        nv3dGetVertexAttributestreamGpuAddress(p3dChannel, stream);
+    int startOffset = pStream->current + pStream->stride * pStream->nextLaunch -
+                      gpuAddress;
+
+    nv3dSelectCbAddress(p3dChannel, gpuAddress,
+                        NV3D_VERTEX_ATTRIBUTE_STREAM_SIZE);
+    nv3dSetConstantBufferOffset(p3dChannel, startOffset);
+}
+
+/*!
+ * Configure the DA and VAF to fetch from vertex attribute streams.
+ *
+ * This function configures the Data Assembler (DA) and Vertex Attribute Fetch
+ * (VAF) units to fetch vertex attributes from pSurf using a format configured
+ * by the 'attribs' array.
+ *
+ * It configures two streams: NV3D_VERTEX_ATTRIBUTE_STREAM_STATIC and
+ * NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC.  The static stream contains attributes
+ * that are the same across all vertices.  The dynamic stream contains
+ * attributes that are different for each vertex.  The static stream sources
+ * from the next available location in the static vertex data surface and uses a
+ * stride of 0, so that all vertices in an array fetch the same values for those
+ * attributes.  Then, it configures the dynamic stream to fetch starting at
+ * offset 0 of pSurf, unless pSurf is NULL in which case it starts at the
+ * appropriate offset in the dynamic vertex data surface.
+ *
+ * The 'attribs' array stores Nv3dVertexAttributeInfoRecs, terminated with an
+ * element where attributeType is NV3D_VERTEX_ATTRIBUTE_END.  Each element
+ * contains:
+ *
+ * (a) An enum Nv3dVertexAttributeType indicating which vertex attribute this
+ * array element describes.
+ *
+ * (b) An enum Nv3dVertexAttributeDataType indicating the data type to use for
+ * the attribute.
+ *
+ * (c) An enum Nv3dVertexAttributeStreamType indicating which stream should use
+ * the attribute.
+ *
+ * If any attributes are enabled as static, this function selects the static
+ * stream surface as the current constant buffer.  The caller should push the
+ * appropriate vertex data.
+ *
+ * Note that if you launch rendering using vertex attributes from a surface, you
+ * must wait for idle before changing those attributes later.  Otherwise, the
+ * VAF unit may fetch the new data instead of the old data, causing corruption.
+ *
+ * \param[in]   p3dChannel  The 3d channel to program
+ * \param[in]   attribs     Description of vertex attributes (see above)
+ * \param[in]   pSurf       Surface that dynamic attributes will be fetched from
+ *
+ * \return      The size in bytes of the static attribute data
+ */
+int nv3dVasSetup(
+    Nv3dChannelRec *p3dChannel,
+    const Nv3dVertexAttributeInfoRec *attribs,
+    const Nv3dStreamSurfaceRec *pSurf)
+{
+    /* This table is indexed by enum Nv3dVertexAttributeDataType. */
+    static const struct {
+        NvU32 size;
+        NvU32 setVertexAttributeA;
+    } attribTypeTable[] = {
+
+        [NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_2_32_FLOAT] = {
+            sizeof(float) * 2,
+            NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A,
+                    COMPONENT_BIT_WIDTHS, R32_G32) |
+            NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A, NUMERICAL_TYPE, NUM_FLOAT),
+        },
+
+        [NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_4_32_FLOAT] = {
+            sizeof(float) * 4,
+            NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A,
+                    COMPONENT_BIT_WIDTHS, R32_G32_B32_A32) |
+            NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A, NUMERICAL_TYPE, NUM_FLOAT),
+        },
+
+        [NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_4_16_UNORM] = {
+            sizeof(NvU16) * 4,
+            NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A,
+                    COMPONENT_BIT_WIDTHS, R16_G16_B16_A16) |
+            NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A, NUMERICAL_TYPE, NUM_UNORM),
+        },
+
+        [NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_4_8_UNORM] = {
+            sizeof(NvU8) * 4,
+            NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A,
+                    COMPONENT_BIT_WIDTHS, A8B8G8R8) |
+            NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A, NUMERICAL_TYPE, NUM_UNORM),
+        },
+
+        [NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_2_16_SSCALED] = {
+            sizeof(NvU32),
+            NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A,
+                    COMPONENT_BIT_WIDTHS, R16_G16) |
+            NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A, NUMERICAL_TYPE, NUM_SSCALED),
+        },
+
+    };
+
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+    Nv3dVertexAttributeStreamRec *pStatic =
+        &p3dChannel->vertexStreams[NV3D_VERTEX_ATTRIBUTE_STREAM_STATIC];
+    Nv3dVertexAttributeStreamRec *pDynamic =
+        &p3dChannel->vertexStreams[NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC];
+    int staticOffset = 0, dynamicOffset = 0;
+    Nv3dVertexAttributeStreamRec tmpStreamRec;
+    NvU32 stride = 0;
+    NvU64 daEnableMask = 0, daSkipMask;
+    NvBool hasStaticAttribs = FALSE;
+    NvBool hasPositionAttrib = FALSE;
+    int i;
+
+    // POSITION must be specified and must be a dynamic attribute.
+    for (i = 0; attribs[i].attributeType != NV3D_VERTEX_ATTRIBUTE_END; i++) {
+        if (attribs[i].attributeType != NV3D_VERTEX_ATTRIBUTE_POSITION) {
+            continue;
+        }
+        hasPositionAttrib = TRUE;
+        nvAssert(attribs[i].streamType == NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC);
+    }
+    if (!hasPositionAttrib) {
+        nvAssert(!"POSITION vertex attribute not specified.");
+    }
+
+    // Configure the DA output skip mask so that it only fetches attributes for
+    // enabled streams.
+    for (i = 0; attribs[i].attributeType != NV3D_VERTEX_ATTRIBUTE_END; i++) {
+        const enum Nv3dVertexAttributeType attrib = attribs[i].attributeType;
+        // Always enable all four components of the value.  This causes the
+        // DA to generate default values if there are not enough components
+        // in the pulled vertex data.  This sets W=1 if W is missing.
+        //
+        // Otherwise, the value would come from the default the hardware
+        // generates as input to the vertex shader when that attribute is
+        // skipped in the DA, which is specified in the .mfs file as, "a
+        // default value is inserted".
+        //
+        // Note all attribute values are expected to be less than 16 (i.e., fit
+        // in MASK_A; attributes 16 through 31 would go in MASK_B).
+        nvAssert(attrib < 16);
+        daEnableMask |= 0xfULL << (4 * attrib);
+    }
+    daSkipMask = ~daEnableMask;
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+        NV9097_SET_DA_OUTPUT_ATTRIBUTE_SKIP_MASK_A(0), 2);
+    nvPushSetMethodData(p, NvU64_LO32(daSkipMask));
+    nvPushSetMethodData(p, NvU64_HI32(daSkipMask));
+
+    // Configure the attributes to fetch from the streams.
+    for (i = 0; attribs[i].attributeType != NV3D_VERTEX_ATTRIBUTE_END; i++) {
+
+        const enum Nv3dVertexAttributeType attrib = attribs[i].attributeType;
+        const enum Nv3dVertexAttributeDataType dataType = attribs[i].dataType;
+        const enum Nv3dVertexAttributeStreamType stream = attribs[i].streamType;
+        const NvU32 size = attribTypeTable[dataType].size;
+        const NvU32 setVertexAttributeA =
+            attribTypeTable[dataType].setVertexAttributeA;
+
+        int offset;
+
+        if (stream == NV3D_VERTEX_ATTRIBUTE_STREAM_STATIC) {
+            offset = staticOffset;
+            staticOffset += size;
+            hasStaticAttribs = TRUE;
+        } else {
+            nvAssert(stream == NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC);
+            offset = dynamicOffset;
+            dynamicOffset += size;
+            stride += size;
+        }
+
+        nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+            NV9097_SET_VERTEX_ATTRIBUTE_A(attrib), 1);
+        nvPushSetMethodData(p,
+            NV3D_V(9097, SET_VERTEX_ATTRIBUTE_A, STREAM, stream) |
+            NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A, SOURCE, ACTIVE) |
+            NV3D_V(9097, SET_VERTEX_ATTRIBUTE_A, OFFSET, offset) |
+            setVertexAttributeA);
+    }
+
+
+    // Advance the stream past any attribs used previously.
+    AdvanceStream(pStatic);
+    // Although we may have set a non-zero stride on a previous call to this
+    // function (mostly so the bookkeeping above works out), as far as the GPU
+    // is concerned we should program a stride of 0.
+    pStatic->stride = 0;
+
+    // See if we need to wrap the static stream.
+    if (pStatic->current + staticOffset >= pStatic->end) {
+        nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_WAIT_FOR_IDLE, 0);
+
+        // Reset both the static and dynamic streams, since we know the GPU is
+        // done reading from both.
+        InitializeStream(p3dChannel,
+                         NV3D_VERTEX_ATTRIBUTE_STREAM_STATIC, pStatic);
+        InitializeStream(p3dChannel,
+                         NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC, pDynamic);
+    } else if (!pSurf) {
+        // Advance the dynamic stream past any attribs used previously (unless
+        // we just reset the stream).
+        AdvanceStream(pDynamic);
+    }
+
+    /* override dynamic stream with pSurf */
+    if (pSurf) {
+        pDynamic = &tmpStreamRec;
+        InitializeStreamFromSurf(pSurf, pDynamic);
+    }
+
+    // Configure the streams.  A stride of 0 makes it read the same attribute
+    // each time.
+    nvAssert(pStatic->stride == 0);
+    SetVertexStreamSurface(p3dChannel,
+                           NV3D_VERTEX_ATTRIBUTE_STREAM_STATIC,
+                           pStatic);
+    nvAssert(stride != 0);
+    pDynamic->stride = stride;
+    SetVertexStreamSurface(p3dChannel,
+                           NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC,
+                           pDynamic);
+
+    // If there are static attributes, set up the constant buffer selector.
+    if (hasStaticAttribs) {
+        SelectCbForStream(p3dChannel, NV3D_VERTEX_ATTRIBUTE_STREAM_STATIC);
+
+        // Override the static stream's "stride" so that the next time this
+        // function is called it will set staticStartOffset to right after the
+        // static data here.
+        pStatic->stride = staticOffset;
+        pStatic->nextLaunch = 1;
+    }
+
+    return staticOffset;
+}
+
+/*!
+ * Check if uploading the specified number of vertices will write past the end
+ * of the given vertex stream.
+ */
+static NvBool WillVertexDataWrap(
+    Nv3dVertexAttributeStreamRec *pStream,
+    int n)
+{
+    // >= here is intentional: It's illegal to set the constant buffer selector
+    // past the end of the constant buffer, which could happen if the last
+    // primitive drawn exactly fills the dynamic data stream and another
+    // primitive is drawn.  Then the next call to nv3dVasSelectCbForVertexData()
+    // would cause a channel error.
+    //
+    // Instead of trying to detect that case there, just disallow completely
+    // filling the stream so it wraps slightly earlier.
+    return pStream->current + pStream->stride * (pStream->nextLaunch + n) >=
+           pStream->end;
+}
+
+/*!
+ * Launch vertices and update tracked vertex array state.
+ */
+static void DrawVertexArray(Nv3dChannelRec *p3dChannel, int numVerts)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+    Nv3dVertexAttributeStreamRec *pDynamic =
+        &p3dChannel->vertexStreams[NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC];
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_VERTEX_ARRAY_START, 2);
+    nvPushSetMethodData(p, pDynamic->nextLaunch);
+    nvPushSetMethodData(p, numVerts); // NV9097_DRAW_VERTEX_ARRAY
+
+    pDynamic->nextLaunch += numVerts;
+}
+
+/*!
+ * Reset both the static and dynamic vertex array streams to the base of the
+ * corresponding surfaces.
+ */
+static void WrapVertexStreams(Nv3dChannelRec *p3dChannel)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+    Nv3dVertexAttributeStreamRec *pStatic =
+        &p3dChannel->vertexStreams[NV3D_VERTEX_ATTRIBUTE_STREAM_STATIC];
+    const NvU64 gpuAddress =
+        nv3dGetVertexAttributestreamGpuAddress(p3dChannel,
+            NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC);
+    const NvU32 primMode = p3dChannel->currentPrimitiveMode;
+
+    // Set the software tracking for the static stream so it starts over at the
+    // beginning next time nv3dVasSetup() is called, but leave the hardware
+    // configured to read the data that's already there, in case vertices
+    // submitted later still need it.
+    pStatic->current = pStatic->end;
+    pStatic->nextLaunch = 0;
+
+    // The hardware can't handle changing the vertex stream offset inside a
+    // BEGIN / END block, so temporarily end now.
+    nv3dVasEnd(p3dChannel);
+
+    // Wrap the dynamic vertex stream.
+    nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_WAIT_FOR_IDLE, 0);
+    SetVertexStreamOffset(p3dChannel, NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC,
+                          gpuAddress);
+
+    nv3dVasBegin(p3dChannel, primMode);
+}
+
+/*!
+ * Point the constant buffer selector at the next location for vertex data in
+ * the dynamic data surface.
+ */
+void nv3dVasSelectCbForVertexData(Nv3dChannelRec *p3dChannel)
+{
+    SelectCbForStream(p3dChannel, NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC);
+}
+
+/*!
+ * Upload and draw vertices using the dynamic vertex data surface
+ *
+ * This function uploads data to the dynamic vertex attribute stream surface
+ * using inline constant buffer updates starting at the next free space in that
+ * surface, and then launches rendering.  The number of vertices rendered is
+ * specified by 'numVerts'.
+ *
+ * Static data should have already been written to the static vertex attribute
+ * stream surface by the caller.
+ *
+ * If not enough space is available in the dynamic data surface, this function
+ * waits for idle before wrapping to the beginning of the surface to avoid
+ * conflicting with earlier rendering that might be in flight.
+ *
+ * It is up to the caller to send BEGIN and END methods around calls to this
+ * function.
+ *
+ * \param[in]   p3dChannel  The channel
+ * \param[in]   data        Data to upload
+ * \param[in]   numVerts    Number of vertices rendered
+ */
+void nv3dVasDrawInlineVerts(
+    Nv3dChannelRec *p3dChannel,
+    const void *data,
+    int numVerts)
+{
+    if (data != NULL) {
+        Nv3dVertexAttributeStreamRec *pDynamic =
+            &p3dChannel->vertexStreams[NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC];
+
+        // See if we need to wrap the dynamic stream.
+        if (WillVertexDataWrap(pDynamic, numVerts)) {
+            WrapVertexStreams(p3dChannel);
+        }
+
+        nv3dVasSelectCbForVertexData(p3dChannel);
+        nv3dPushConstants(p3dChannel, pDynamic->stride * numVerts, data);
+    }
+
+    DrawVertexArray(p3dChannel, numVerts);
+}
+
+NvBool nv3dVasMakeRoom(
+    Nv3dChannelRec *p3dChannel,
+    NvU32 pendingVerts,
+    NvU32 moreVerts)
+{
+    Nv3dVertexAttributeStreamRec *pDynamic =
+        &p3dChannel->vertexStreams[NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC];
+
+    const NvBool wrap = WillVertexDataWrap(pDynamic, pendingVerts + moreVerts);
+
+    // If pendingVerts + moreVerts would exceed the dynamic vertex array buffer,
+    // flush it now and start over at the beginning.
+    if (wrap) {
+        DrawVertexArray(p3dChannel, pendingVerts);
+        WrapVertexStreams(p3dChannel);
+
+        // Reset the constant buffer update pointer to the beginning of the
+        // dynamic vertex data buffer.
+        nv3dSetConstantBufferOffset(p3dChannel, 0);
+    }
+
+    return wrap;
+}
+
--- a/src/common/unix/nvidia-3d/src/nvidia-3d-volta.c
+++ b/src/common/unix/nvidia-3d/src/nvidia-3d-volta.c
@@ -0,0 +1,41 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvidia-3d-volta.h"
+#include "nvidia-3d.h"
+
+#include "class/clc397.h"
+#include <class/cla06fsubch.h>
+
+void _nv3dSetProgramOffsetVolta(
+    Nv3dChannelRec *p3dChannel,
+    NvU32 stage,
+    NvU32 offset)
+{
+    NvPushChannelPtr p = p3dChannel->pPushChannel;
+    const NvU64 gpuAddress = nv3dGetProgramGpuAddress(p3dChannel) + offset;
+
+    nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
+        NVC397_SET_PIPELINE_PROGRAM_ADDRESS_A(stage), 2);
+    nvPushSetMethodDataU64(p, gpuAddress);
+}
--- a/src/common/unix/nvidia-headsurface/nvidia-headsurface-constants.h
+++ b/src/common/unix/nvidia-headsurface/nvidia-headsurface-constants.h
@@ -0,0 +1,44 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _NVIDIA_HEADSURFACE_CONSTANTS_H_
+#define _NVIDIA_HEADSURFACE_CONSTANTS_H_
+
+/* Possible values for NvHsFragmentUniforms::resamplingMethod */
+#define NVIDIA_HEADSURFACE_RESAMPLING_METHOD_BICUBIC_TRIANGULAR            1
+#define NVIDIA_HEADSURFACE_RESAMPLING_METHOD_BICUBIC_BELL_SHAPED           2
+#define NVIDIA_HEADSURFACE_RESAMPLING_METHOD_BICUBIC_BSPLINE               3
+#define NVIDIA_HEADSURFACE_RESAMPLING_METHOD_BICUBIC_ADAPTIVE_TRIANGULAR   4
+#define NVIDIA_HEADSURFACE_RESAMPLING_METHOD_BICUBIC_ADAPTIVE_BELL_SHAPED  5
+#define NVIDIA_HEADSURFACE_RESAMPLING_METHOD_BICUBIC_ADAPTIVE_BSPLINE      6
+
+/* Uniform sampler binding indices */
+#define NVIDIA_HEADSURFACE_UNIFORM_SAMPLER_BINDING_PRIMARY_TEX             0
+#define NVIDIA_HEADSURFACE_UNIFORM_SAMPLER_BINDING_CURSOR_TEX              1
+#define NVIDIA_HEADSURFACE_UNIFORM_SAMPLER_BINDING_BLEND_TEX               2
+#define NVIDIA_HEADSURFACE_UNIFORM_SAMPLER_BINDING_OFFSET_TEX              3
+#define NVIDIA_HEADSURFACE_UNIFORM_SAMPLER_BINDING_OVERLAY_TEX             4
+#define NVIDIA_HEADSURFACE_UNIFORM_SAMPLER_BINDING_LUT_TEX                 5
+#define NVIDIA_HEADSURFACE_UNIFORM_SAMPLER_BINDING_NUM                     6
+
+#endif /* _NVIDIA_HEADSURFACE_CONSTANTS_H_ */
--- a/src/common/unix/nvidia-headsurface/nvidia-headsurface-types.h
+++ b/src/common/unix/nvidia-headsurface/nvidia-headsurface-types.h
@@ -0,0 +1,67 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVIDIA_HEADSURFACE_TYPES_H__
+#define __NVIDIA_HEADSURFACE_TYPES_H__
+
+#include "nvtypes.h"
+#include "nvidia-3d-types.h"
+
+typedef struct _NvHsVertexUniforms {
+    Nv3dVertexAttrib2U vertexScale;
+    Nv3dVertexAttrib2U primaryTextureScale;
+    Nv3dVertexAttrib2U primaryTextureBias;
+    Nv3dVertexAttrib2S cursorPosition;
+} __attribute__((packed)) NvHsVertexUniforms;
+
+typedef struct _NvHsFragmentUniforms  {          // Byte offsets
+    Nv3dVertexAttrib2U vertexScale;                                    // 0
+    Nv3dVertexAttrib3U numLutEntries         NV_ALIGN_BYTES(16);       // 16
+    Nv3dVertexAttrib2U primaryTextureBias    NV_ALIGN_BYTES(8);        // 32
+    Nv3dVertexAttrib2S cursorPosition;                                 // 40
+    // Although this is really a 3x3 matrix, GLSL std140 uniform block
+    // layout says that the column stride is equal to a vec4.
+    Nv3dFloat transform[3][4];                                         // 48
+    Nv3dVertexAttrib2F pixelShiftOffset;                               // 96
+    Nv3dVertexAttrib3F luminanceCoefficient  NV_ALIGN_BYTES(16);       // 112
+    Nv3dVertexAttrib2F chromaCoefficient     NV_ALIGN_BYTES(8);        // 128
+    Nv3dFloat luminanceScale;                                          // 136
+    Nv3dFloat luminanceBlackLevel;                                     // 140
+    Nv3dFloat chrominanceScale;                                        // 144
+    Nv3dFloat chrominanceBlackLevel;                                   // 148
+    NvU32 useSatHue;                                                   // 152
+    Nv3dFloat satCos;                                                  // 156
+    int resamplingMethod;                                              // 160
+} __attribute__((packed)) NvHsFragmentUniforms;
+
+/*
+ * The static warp mesh consists of four vertices, each vertex has six
+ * components: (XY, UVRQ).
+ */
+typedef struct {
+    struct {
+        Nv3dFloat x, y, u, v, r, q;
+    } vertex[4];
+} NvHsStaticWarpMesh;
+
+#endif /* __NVIDIA_HEADSURFACE_TYPES_H__ */
--- a/src/common/unix/nvidia-push/include/nvidia-push-priv-imports.h
+++ b/src/common/unix/nvidia-push/include/nvidia-push-priv-imports.h
@@ -0,0 +1,203 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#if !defined(__NVIDIA_PUSH_PRIV_IMPORTS_H__)
+#define __NVIDIA_PUSH_PRIV_IMPORTS_H__
+
+#include "nvidia-push-types.h"
+
+static inline NvU32 nvPushImportRmApiControl(
+    NvPushDevicePtr pDevice,
+    NvU32 hObject,
+    NvU32 cmd,
+    void *pParams,
+    NvU32 paramsSize)
+{
+    return pDevice->pImports->rmApiControl(pDevice, hObject, cmd,
+                                           pParams, paramsSize);
+}
+
+static inline NvU32 nvPushImportRmApiAlloc(
+    NvPushDevicePtr pDevice,
+    NvU32 hParent,
+    NvU32 hObject,
+    NvU32 hClass,
+    void *pAllocParams)
+{
+
+    return pDevice->pImports->rmApiAlloc(pDevice, hParent, hObject, hClass,
+                                         pAllocParams);
+}
+
+static inline NvU32 nvPushImportRmApiFree(
+    NvPushDevicePtr pDevice,
+    NvU32 hParent,
+    NvU32 hObject)
+{
+    return pDevice->pImports->rmApiFree(pDevice, hParent, hObject);
+}
+
+static inline NvU32 nvPushImportRmApiMapMemoryDma(
+    NvPushDevicePtr pDevice,
+    NvU32 hDevice,
+    NvU32 hDma,
+    NvU32 hMemory,
+    NvU64 offset,
+    NvU64 length,
+    NvU32 flags,
+    NvU64 *pDmaOffset)
+{
+    return pDevice->pImports->rmApiMapMemoryDma(pDevice,
+                                                hDevice,
+                                                hDma,
+                                                hMemory,
+                                                offset,
+                                                length,
+                                                flags,
+                                                pDmaOffset);
+}
+
+static inline NvU32 nvPushImportRmApiUnmapMemoryDma(
+    NvPushDevicePtr pDevice,
+    NvU32 hDevice,
+    NvU32 hDma,
+    NvU32 hMemory,
+    NvU32 flags,
+    NvU64 dmaOffset)
+{
+    return pDevice->pImports->rmApiUnmapMemoryDma(pDevice,
+                                                  hDevice,
+                                                  hDma,
+                                                  hMemory,
+                                                  flags,
+                                                  dmaOffset);
+
+}
+
+static inline NvU32 nvPushImportRmApiAllocMemory64(
+    NvPushDevicePtr pDevice,
+    NvU32 hParent,
+    NvU32 hMemory,
+    NvU32 hClass,
+    NvU32 flags,
+    void **ppAddress,
+    NvU64 *pLimit)
+{
+    return pDevice->pImports->rmApiAllocMemory64(pDevice,
+                                                 hParent,
+                                                 hMemory,
+                                                 hClass,
+                                                 flags,
+                                                 ppAddress,
+                                                 pLimit);
+}
+
+static inline NvU32 nvPushImportRmApiVidHeapControl(
+    NvPushDevicePtr pDevice,
+    void *pVidHeapControlParms)
+{
+    return pDevice->pImports->rmApiVidHeapControl(pDevice,
+                                                  pVidHeapControlParms);
+}
+
+static inline NvU32 nvPushImportRmApiMapMemory(
+    NvPushDevicePtr pDevice,
+    NvU32 hDevice,
+    NvU32 hMemory,
+    NvU64 offset,
+    NvU64 length,
+    void **ppLinearAddress,
+    NvU32 flags)
+{
+    return pDevice->pImports->rmApiMapMemory(pDevice,
+                                             hDevice,
+                                             hMemory,
+                                             offset,
+                                             length,
+                                             ppLinearAddress,
+                                             flags);
+}
+
+static inline NvU32 nvPushImportRmApiUnmapMemory(
+    NvPushDevicePtr pDevice,
+    NvU32 hDevice,
+    NvU32 hMemory,
+    void *pLinearAddress,
+    NvU32 flags)
+{
+    return pDevice->pImports->rmApiUnmapMemory(pDevice,
+                                               hDevice,
+                                               hMemory,
+                                               pLinearAddress,
+                                               flags);
+}
+
+static inline NvU64 nvPushImportGetMilliSeconds(
+    NvPushDevicePtr pDevice)
+{
+    return pDevice->pImports->getMilliSeconds(pDevice);
+}
+
+static inline void nvPushImportYield(
+    NvPushDevicePtr pDevice)
+{
+    pDevice->pImports->yield(pDevice);
+}
+
+static inline NvBool nvPushImportWaitForEvent(
+    NvPushDevicePtr pDevice,
+    NvPushImportEvent *pEvent,
+    NvU64 timeout)
+{
+    return pDevice->pImports->waitForEvent(pDevice, pEvent, timeout);
+}
+
+static inline void nvPushImportEmptyEventFifo(
+    NvPushDevicePtr pDevice,
+    NvPushImportEvent *pEvent)
+{
+    pDevice->pImports->emptyEventFifo(pDevice, pEvent);
+}
+
+static inline void nvPushImportChannelErrorOccurred(
+    NvPushChannelPtr pChannel,
+    NvU32 channelErrCode)
+{
+    pChannel->pDevice->pImports->channelErrorOccurred(pChannel, channelErrCode);
+}
+
+static inline void nvPushImportPushbufferWrapped(
+    NvPushChannelPtr pChannel)
+{
+    pChannel->pDevice->pImports->pushbufferWrapped(pChannel);
+}
+
+#define nvPushImportLogError(_pDevice, ...) \
+    (_pDevice)->pImports->logError((_pDevice), __VA_ARGS__)
+
+#if defined(DEBUG)
+#define nvPushImportLogNvDiss(_pChannel, ...) \
+    (_pChannel)->pDevice->pImports->logNvDiss((_pChannel), __VA_ARGS__)
+#endif /* DEBUG */
+
+#endif /* __NVIDIA_PUSH_PRIV_IMPORTS_H__ */
--- a/src/common/unix/nvidia-push/include/nvidia-push-priv.h
+++ b/src/common/unix/nvidia-push/include/nvidia-push-priv.h
@@ -0,0 +1,122 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2018 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVIDIA_PUSH_PRIV_H__
+#define __NVIDIA_PUSH_PRIV_H__
+
+#include "nvmisc.h" // NV_ALIGN_UP
+#include "class/cla16f.h" // NVA16F_GP_ENTRY__SIZE
+
+/*
+ * Push buffer constants
+ * "The pushbuffer" consists of several regions packed into a single memory
+ * allocation.  In order, they are:
+ * 1. The "main" pushbuffer.  Most of the driver pushes methods here;
+ * 2. GPFIFO entries;
+ * 3. The "progress tracker" pushbuffer.  This is used by the DMA kickoff code
+ *    as a reserved area to put semaphore release methods, which we use to
+ *    track HOST's progress fetching the pushbuffer.  We also use this to
+ *    workaround hardware bug 1667921.
+ */
+
+/* Offset of the GPFIFO entries: entry (2) above. */
+static inline NvU32 __nvPushGpFifoOffset(const NvPushChannelRec *pChannel)
+{
+    nvAssert(pChannel->main.sizeInBytes != 0);
+    return NV_ALIGN_UP(pChannel->main.sizeInBytes, NVA16F_GP_ENTRY__SIZE);
+}
+
+/*
+ * We need to align each set of methods in the progress tracker pushbuffer to
+ * 128 bytes so that we avoid HW bug 1667921 (on chips that are affected).
+ * This is used for both the start of the GPFIFO segment _and_ the size (for
+ * each GPFIFO entry).
+ */
+#define NV_ALIGN_LBDAT_EXTRA_BUG         128
+/*
+ * Offset of the progress tracker pushbuffer: entry (3) above.
+ *
+ * Note that we always use the appropriate alignment to WAR the LBDAT_EXTRA bug
+ * for the offset.  Although this is only necessary on some chips, it's simpler
+ * to always use this alignment.
+ */
+static inline NvU32 __nvPushProgressTrackerOffset(
+    const NvPushChannelRec *pChannel)
+{
+    const NvU32 gpFifoOffset = __nvPushGpFifoOffset(pChannel);
+    const NvU32 gpFifoLength =
+        pChannel->numGpFifoEntries * NVA16F_GP_ENTRY__SIZE;
+
+    nvAssert(gpFifoLength != 0);
+
+    return NV_ALIGN_UP(gpFifoOffset + gpFifoLength, NV_ALIGN_LBDAT_EXTRA_BUG);
+}
+
+/* We always write two GPFIFO entries: one for the main pushbuffer, and one
+ * for the progress tracker pushbuffer. */
+#define NV_PUSH_NUM_GPFIFO_ENTRIES_PER_KICKOFF 2
+
+/*
+ * Encoding for the progress tracker semaphore payload.
+ * _GET stores dwords, rather than bytes.
+ * _GP_GET stores the number of "pairs" of gpFifo entries.
+ */
+#define NV_PUSH_PROGRESS_TRACKER_SEMAPHORE_GET      17:0
+#define NV_PUSH_PROGRESS_TRACKER_SEMAPHORE_GP_GET   31:18
+
+/*
+ * The number of 0080 RM devices for the given NvPushDevice.
+ * This is 1 for RM SLI and numSubDevices for client SLI.
+ */
+static inline int
+__nvPushGetNumDevices(const NvPushDeviceRec *pDevice)
+{
+    if (pDevice->clientSli) {
+        return pDevice->numSubDevices;
+    }
+    return 1;
+}
+
+/*
+ * The 0080 RM device index for the given subdevice index.
+ * This is 0 for RM SLI, and the subdevice index for client SLI.
+ */
+static inline int
+__nvPushGetDeviceIndex(const NvPushDeviceRec *pDevice, int sd)
+{
+    if (pDevice->clientSli) {
+        return sd;
+    }
+    return 0;
+}
+
+NvU32 __nvPushProgressTrackerEntrySize(const NvPushDeviceRec *pDevice);
+
+NvBool __nvPushTestPushBuffer(NvPushChannelPtr p);
+
+NvBool __nvPushGetHal(
+    const NvPushAllocDeviceParams *pParams,
+    NvU32 channelClass,
+    NvPushHal *pHal);
+
+#endif /* __NVIDIA_PUSH_PRIV_H__ */
--- a/src/common/unix/nvidia-push/interface/nvidia-push-init.h
+++ b/src/common/unix/nvidia-push/interface/nvidia-push-init.h
@@ -0,0 +1,259 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This file contains nvidia-push device and channel setup structures and
+ * functions.
+ */
+
+#ifndef __NVIDIA_PUSH_INIT_H__
+#define __NVIDIA_PUSH_INIT_H__
+
+
+#include "nvidia-push-types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*!
+ * Return the index of the first class table element supported on this device.
+ *
+ * pClassTable is an array where each element corresponds to a class
+ * the caller supports.  The first field in the array element should
+ * be an NvPushSupportedClass struct.  There may be additional fields
+ * in the array element that are specific to the caller.  The
+ * classTableStride argument indicates the size in bytes of one array
+ * element, such that nvPushGetSupportedClassIndex() can step from one
+ * array element to the next by adding classTableStride.
+ *
+ * nvPushGetSupportedClassIndex() will query the list of classes
+ * supported by this device, and return the index of the first
+ * pClassTable array element that is supported by the device.  -1 is
+ * returned if there is no match.
+ *
+ * \param pDevice          The nvidia-push device whose class list to consider.
+ * \param pClassTable      The table of classes supported.
+ * \param classTableStride The size in bytes of one table element.
+ * \param classTableLength The number of table elements.
+ *
+ * \return  The index of the first table element that matches, or -1.
+ */
+
+typedef struct _NvPushSupportedClass {
+    NvU32 classNumber;
+    NVAModelConfig amodelConfig;
+} NvPushSupportedClass;
+
+int nvPushGetSupportedClassIndex(
+    NvPushDevicePtr pDevice,
+    const void *pClassTable,
+    size_t classTableStride,
+    size_t classTableLength);
+
+/*
+ * Parameter structure populated by the host driver when requesting an
+ * NvPushDeviceRec.
+ */
+typedef struct _NvPushAllocDeviceParams {
+
+    /* Pointer to host device, filled by host driver as needed */
+    void           *hostDevice;
+
+    const NvPushImports *pImports;
+
+    /* The host driver's RMAPI client (NV0000) handle. */
+    NvU32           clientHandle;
+
+    /* TRUE iff this device is in client-side SLI mode. */
+    NvBool          clientSli;
+
+    /* The number of subDevices allocated by the host driver. */
+    NvU32           numSubDevices;
+
+    struct {
+        /* The host driver's RMAPI device (NV0080) handles */
+        NvU32       deviceHandle;
+        /* The host driver's RMAPI subDevice (NV2080) handles. */
+        NvU32       handle;
+        /* FERMI_VASPACE_A object in which channels on this device should be
+         * mapped. */
+        NvU32       gpuVASpaceObject;
+        /* ctxDma handle to be used with MapMemoryDma. */
+        NvU32       gpuVASpace;
+    } subDevice[NV_MAX_SUBDEVICES];
+
+    struct {
+        /*
+         * The Amodel configuration requested by the host driver.
+         */
+        NVAModelConfig config;
+    } amodel;
+
+    /* Whether channels on this device will be used to program Tegra. */
+    NvBool          isTegra;
+
+    /*
+     * Pool of RMAPI object handles.  The host driver should populate
+     * all of the elements in this array before calling
+     * nvPushAllocDevice(), and release all of these handles if
+     * nvPushAllocDevice() fails, or after calling nvPushFreeDevice().
+     *
+     * The number of possible handles is:
+     *
+     *  hUserMode (per-sd)
+     */
+#define NV_PUSH_DEVICE_HANDLE_POOL_NUM \
+    (NV_MAX_SUBDEVICES)
+
+    NvU32           handlePool[NV_PUSH_DEVICE_HANDLE_POOL_NUM];
+
+    NvU32           numClasses;
+    const NvU32    *supportedClasses;
+
+} NvPushAllocDeviceParams;
+
+NvBool nvPushAllocDevice(
+    const NvPushAllocDeviceParams *pParams,
+    NvPushDevicePtr pDevice);
+
+void nvPushFreeDevice(
+    NvPushDevicePtr pDevice);
+
+
+/*
+ * Parameter structure populated by the host driver when requesting an
+ * NvPushChannelRec.
+ */
+typedef struct _NvPushAllocChannelParams {
+
+    /* NV2080_ENGINE_TYPE_ */
+    NvU32           engineType;
+
+    /*
+     * Whether to log the pushbuffer in nvdiss format, by calling
+     * nvPushImportLogNvDiss().
+     */
+    NvBool          logNvDiss;
+
+    /*
+     * Normally, the pushbuffer utility library will time out when
+     * waiting for things (space in the pushbuffer, waiting for
+     * notifiers, etc).  When the channel is created with
+     * noTimeout=TRUE, the channel will wait indefinitely for these
+     * things.
+     */
+    NvBool          noTimeout;
+
+    /*
+     * Normally, the pushbuffer utility library checks for channel
+     * errors and reports them to the host driver by calling
+     * nvPushImportChannelErrorOccurred().  Host drivers can set
+     * ignoreChannelErrors=TRUE to disable this check.
+     */
+    NvBool          ignoreChannelErrors;
+
+    /*
+     * DIFR stands for Display Idle Frame Refresh in which a CE is used to
+     * prefetch framebuffer pixels into the GPU's L2 cache. The prefetch
+     * operation requires the channel to be specifically configured for DIFR
+     * prefetching. This flag indicates if this channel is intended to be
+     * used for just that.
+     */
+    NvBool          difrPrefetch;
+
+    /*
+     * Host drivers should specify how many notifiers they want.  The
+     * pushbuffer utility library will allocate memory to hold this
+     * many notifiers on each subDevice, plus an error notifier.
+     *
+     * The 'notifierIndex' argument to, e.g., nvPushGetNotifierCpuAddress()
+     * should be in the range [0,numNotifiers).
+     */
+    NvU8            numNotifiers;
+
+    /*
+     * The size of the "main" pushbuffer in bytes.  Note this does not
+     * include space for gpfifo entries or progress tracking:
+     * nvidia-push will implicitly pad the total pushbuffer for those
+     * items.
+     */
+    NvU32           pushBufferSizeInBytes;
+
+    /*
+     * Pool of RMAPI object handles.  The host driver should populate
+     * all of the elements in this array before calling
+     * nvPushAllocChannel(), and release all of these handles if
+     * nvPushAllocChannel() fails, or after calling nvPushFreeChannel().
+     *
+     * The number of possible handles is:
+     *
+     *  progressSemaphore hMemory (per-sd) +
+     *  pushbufferHandle (per-device)      +
+     *  pushbufferVAHandle (per-sd)        +
+     *  userD.hMemory (per-sd)             +
+     *  channelHandle (per-sd)             +
+     *  notifier memoryHandle (per-device) +
+     *  error notifier ctxDma (per-device)
+     */
+#define NV_PUSH_CHANNEL_HANDLE_POOL_NUM \
+    (NV_MAX_SUBDEVICES     +    \
+     1                     +    \
+     NV_MAX_SUBDEVICES     +    \
+     NV_MAX_SUBDEVICES     +    \
+     NV_MAX_SUBDEVICES     +    \
+     1                     +    \
+     1)
+
+    NvU32           handlePool[NV_PUSH_CHANNEL_HANDLE_POOL_NUM];
+
+    /*
+     * A pointer to an NvPushDeviceRec, initialized with
+     * nvPushAllocDevice().  One or more NvPushChannelRecs may share
+     * the same NvPushDevicePtr.
+     *
+     * This pDevice should be kept allocated until all
+     * NvPushChannelRecs using it have been freed.
+     */
+    NvPushDevicePtr pDevice;
+
+} NvPushAllocChannelParams;
+
+NvBool nvPushAllocChannel(
+    const NvPushAllocChannelParams *pParams,
+    NvPushChannelPtr buffer);
+
+void nvPushFreeChannel(
+    NvPushChannelPtr buffer);
+
+
+void nvPushInitWaitForNotifier(
+    NvPushChannelPtr pChannel,
+    NvU32 notifierIndex,
+    NvU32 subdeviceMask);
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif /*__NVIDIA_PUSH_INIT_H__ */
--- a/src/common/unix/nvidia-push/interface/nvidia-push-methods.h
+++ b/src/common/unix/nvidia-push/interface/nvidia-push-methods.h
@@ -0,0 +1,247 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This file contains macros and inline functions used to actually program
+ * methods.
+ */
+
+#ifndef __NVIDIA_PUSH_METHODS_H__
+#define __NVIDIA_PUSH_METHODS_H__
+
+#include "nvidia-push-types.h"
+
+#include "class/cla16f.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void __nvPushSetMethodDataSegment(NvPushChannelSegmentPtr s, const NvU32 data)
+{
+    s->buffer->u = data;
+    s->buffer++;
+}
+
+static inline void nvPushSetMethodData(NvPushChannelPtr p, const NvU32 data)
+{
+    __nvPushSetMethodDataSegment(&p->main, data);
+}
+
+#if NV_PUSH_ALLOW_FLOAT
+static inline void __nvPushSetMethodDataSegmentF(NvPushChannelSegmentPtr s, const float data)
+{
+    s->buffer->f = data;
+    s->buffer++;
+}
+
+static inline void nvPushSetMethodDataF(NvPushChannelPtr p, const float data)
+{
+    __nvPushSetMethodDataSegmentF(&p->main, data);
+}
+#endif
+
+static inline void __nvPushSetMethodDataSegmentU64(NvPushChannelSegmentPtr s, const NvU64 data)
+{
+    __nvPushSetMethodDataSegment(s, NvU64_HI32(data));
+    __nvPushSetMethodDataSegment(s, NvU64_LO32(data));
+}
+
+static inline void nvPushSetMethodDataU64(NvPushChannelPtr p, const NvU64 data)
+{
+    __nvPushSetMethodDataSegmentU64(&p->main, data);
+}
+
+void __nvPushMoveDWORDS(NvU32* dst, const NvU32* src, int dwords);
+
+static inline void
+nvDmaMoveDWORDS(NvPushChannelUnion *dst, const NvU32* src, int dwords)
+{
+    // The 'dst' argument is an array of NvPushChannelUnion; it is safe
+    // to treat this as an array of NvU32, as long as NvU32 and
+    // NvPushChannelUnion are the same size.
+    ct_assert(sizeof(NvU32) == sizeof(NvPushChannelUnion));
+    __nvPushMoveDWORDS((NvU32 *)dst, src, dwords);
+}
+
+static inline void nvPushInlineData(NvPushChannelPtr p, const void *data,
+                                    size_t dwords)
+{
+    nvDmaMoveDWORDS(p->main.buffer, (const NvU32 *)data, dwords);
+    p->main.buffer += dwords;
+}
+
+/*!
+ * Return the maximum method count: the maximum number of dwords that can be
+ * specified in the nvPushMethod() family of macros.
+ */
+static inline NvU32 nvPushMaxMethodCount(const NvPushChannelRec *p)
+{
+    /*
+     * The number of methods that can be specified in one NVA16F_DMA_METHOD
+     * header is limited by the bit field size of NVA16F_DMA_METHOD_COUNT: 28:16
+     * (i.e., maximum representable value 8191).
+     */
+    const NvU32 maxFromMethodCountMask = DRF_MASK(NVA16F_DMA_METHOD_COUNT);
+
+    /*
+     * Further, the method count must be smaller than half the total pushbuffer
+     * size minus one, to correctly distinguish empty and full pushbuffers.  See
+     * nvPushHeader() for details.
+     */
+    const NvU32 pushBufferSizeInBytes = p->main.sizeInBytes;
+    const NvU32 pushBufferSizeInDWords = pushBufferSizeInBytes / 4;
+    const NvU32 pushBufferHalfSizeInDWords = pushBufferSizeInDWords / 2;
+
+    /*
+     * Subtract two from pushBufferHalfSizeInDWords:
+     *
+     * -1 to distinguish pushbuffer empty from full (see above).
+     *
+     * -1 to be smaller than, rather than equal to, the above constraints.
+     */
+    const NvU32 maxFromPushBufferSize = pushBufferHalfSizeInDWords - 2;
+
+    return NV_MIN(maxFromMethodCountMask, maxFromPushBufferSize);
+}
+
+// These macros verify that the values used in the methods fits
+// into the defined ranges.
+#define ASSERT_DRF_DEF(d, r, f, n) \
+    nvAssert(!(~DRF_MASK(NV ## d ## r ## f) & (NV ## d ## r ## f ## n)))
+#define ASSERT_DRF_NUM(d, r, f, n) \
+    nvAssert(!(~DRF_MASK(NV ## d ## r ## f) & (n)))
+
+#if defined(DEBUG)
+#include "class/clc36f.h"    /* VOLTA_CHANNEL_GPFIFO_A */
+
+/*
+ * When pushing GPFIFO methods (NVA16F_SEMAPHORE[ABCD]), all four
+ * methods must be pushed together.  If the four methods are not
+ * pushed together, nvidia-push might wrap, injecting its progress
+ * tracking semaphore release methods in the middle, and perturb the
+ * NVA16F_SEMAPHOREA_OFFSET_UPPER and NVA16F_SEMAPHOREB_OFFSET_LOWER
+ * channel state.
+ *
+ * Return whether the methods described by the arguments include some,
+ * but not all, of A, B, C, and D.  I.e., if the range starts at B, C,
+ * or D, or if the range ends at A, B, or C.
+ *
+ * Perform a similar check for Volta+ semaphore methods
+ * NVC36F_SEM_ADDR_LO..NVC36F_SEM_EXECUTE.  Note that we always check for both
+ * sets of methods, regardless of the GPU we're actually running on.  This is
+ * okay since:
+ * a) the NVC36F_SEM_ADDR_LO..NVC36F_SEM_EXECUTE method offsets were not used
+ *    for anything from (a16f..c36f].
+ * b) the SEMAPHORE[ABCD] methods still exist on the newer classes (they
+ *    haven't been reused for anything else)
+ */
+static inline NvBool __nvPushStartSplitsSemaphore(
+    NvU32 method,
+    NvU32 count,
+    NvU32 secOp)
+{
+    ct_assert(NVA16F_SEMAPHOREA < NVA16F_SEMAPHORED);
+    ct_assert(NVC36F_SEM_ADDR_LO < NVC36F_SEM_EXECUTE);
+
+    /*
+     * compute start and end as inclusive; if not incrementing, we
+     * assume end==start
+     */
+    const NvU32 start = method;
+    const NvU32 end = (secOp == NVA16F_DMA_SEC_OP_INC_METHOD) ?
+        (method + ((count - 1) * 4)) : method;
+
+    return ((start >  NVA16F_SEMAPHOREA)  && (start <= NVA16F_SEMAPHORED))  ||
+           ((end   >= NVA16F_SEMAPHOREA)  && (end   <  NVA16F_SEMAPHORED))  ||
+           ((start >  NVC36F_SEM_ADDR_LO) && (start <= NVC36F_SEM_EXECUTE)) ||
+           ((end   >= NVC36F_SEM_ADDR_LO) && (end   <  NVC36F_SEM_EXECUTE));
+}
+#endif /* DEBUG */
+
+/*
+ * Note that _count+1 must be less than half the total pushbuffer size.  This is
+ * required by GPFIFO because we can't reliably tell when we can write all the
+ * way to the end of the pushbuffer if we wrap (see bug 232454).  This
+ * assumption ensures that there will be enough space once GET reaches PUT.
+ */
+#define nvPushHeader(_push_buffer, _segment, _count, _header) do {    \
+    NvPushChannelSegmentPtr _pSegment = &(_push_buffer)->_segment;    \
+    nvAssert(((_count)+1) < ((_pSegment)->sizeInBytes / 8));          \
+    if ((_pSegment)->freeDwords < ((_count)+1))                       \
+        __nvPushMakeRoom((_push_buffer), (_count) + 1);               \
+    __nvPushSetMethodDataSegment((_pSegment), (_header));             \
+    (_pSegment)->freeDwords -= ((_count)+1);                          \
+} while(0)
+
+#define __nvPushStart(_push_buffer, _segment, _subch, _offset, _count, _opcode) \
+{                                                                     \
+    nvAssert(!__nvPushStartSplitsSemaphore(                           \
+                                      (_offset),                      \
+                                      (_count),                       \
+                                      NVA16F_DMA_SEC_OP ## _opcode)); \
+    ASSERT_DRF_DEF(A16F, _DMA, _SEC_OP, _opcode);                     \
+    ASSERT_DRF_NUM(A16F, _DMA, _METHOD_COUNT, _count);                \
+    ASSERT_DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL, _subch);           \
+    ASSERT_DRF_NUM(A16F, _DMA, _METHOD_ADDRESS, (_offset) >> 2);      \
+    nvPushHeader((_push_buffer), _segment, (_count),                  \
+        DRF_DEF(A16F, _DMA, _SEC_OP,               _opcode)  |        \
+        DRF_NUM(A16F, _DMA, _METHOD_COUNT,         _count)   |        \
+        DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL,    _subch)   |        \
+        DRF_NUM(A16F, _DMA, _METHOD_ADDRESS,    (_offset) >> 2));     \
+}
+
+// The GPU can encode a 13-bit constant method/data pair in a single DWORD.
+#define nvPushImmedValSegment(_push_buffer, _segment, _subch, _offset, _data) { \
+    ASSERT_DRF_NUM(A16F, _DMA, _IMMD_DATA, _data);                    \
+    ASSERT_DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL, _subch);           \
+    ASSERT_DRF_NUM(A16F, _DMA, _METHOD_ADDRESS, (_offset) >> 2);      \
+    if ((_push_buffer)->_segment.freeDwords < 1)                      \
+        __nvPushMakeRoom((_push_buffer), 1);                          \
+    __nvPushSetMethodDataSegment(&(_push_buffer)->_segment,           \
+        DRF_DEF(A16F, _DMA, _SEC_OP,     _IMMD_DATA_METHOD)  |        \
+        DRF_NUM(A16F, _DMA, _IMMD_DATA,             _data)   |        \
+        DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL,    _subch)   |        \
+        DRF_NUM(A16F, _DMA, _METHOD_ADDRESS,    (_offset) >> 2));     \
+    (_push_buffer)->_segment.freeDwords--;                            \
+}
+
+#define nvPushImmedVal(_push_buffer, _subch, _offset, _data) \
+    nvPushImmedValSegment(_push_buffer, main, _subch, _offset, _data)
+
+#define nvPushImmed(_push_buffer, _subch, _offset, _val) \
+    nvPushImmedVal(_push_buffer, _subch, _offset, _offset##_V_##_val)
+
+// Method headers.
+#define nvPushMethod(_push_buffer, _subch, _offset, _count) \
+    __nvPushStart(_push_buffer, main, _subch, _offset, _count, _INC_METHOD)
+#define nvPushMethodNoIncr(_push_buffer, _subch, _offset, _count) \
+    __nvPushStart(_push_buffer, main, _subch, _offset, _count, _NON_INC_METHOD)
+#define nvPushMethodOneIncr(_push_buffer, _subch, _offset, _count) \
+    __nvPushStart(_push_buffer, main, _subch, _offset, _count, _ONE_INC)
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif /* __NVIDIA_PUSH_METHODS_H__ */
--- a/src/common/unix/nvidia-push/interface/nvidia-push-types.h
+++ b/src/common/unix/nvidia-push/interface/nvidia-push-types.h
@@ -0,0 +1,281 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This file contains core definitions (structures and enums) for use in the
+ * rest of the nvidia-push code.
+ */
+
+#ifndef __NVIDIA_PUSH_TYPES_H__
+#define __NVIDIA_PUSH_TYPES_H__
+
+#include <stddef.h>          /* size_t */
+
+
+
+#include "nvtypes.h"
+#include "nvlimits.h"
+#include "nvmisc.h"
+#include "nvgputypes.h"      /* NvNotificationRec */
+#include "nv_common_utils.h" /* TRUE/FALSE */
+#include "nvctassert.h"
+#include "nv_assert.h"       /* nvAssert() */
+#include "nv_amodel_enum.h"  /* NVAModelConfig */
+#include "nvos.h"            /* NV_CHANNELGPFIFO_NOTIFICATION_* */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NV_PUSH_NOTIFIER_SHORT_TIMEOUT 3000 /* in milliseconds (ie:  3 seconds) */
+#define NV_PUSH_NOTIFIER_LONG_TIMEOUT 10000 /* in milliseconds (ie: 10 seconds) */
+
+# define NV_PUSH_PRINTF_FORMAT_ARGUMENT
+# define NV_PUSH_PRINTF_ATTRIBUTES(_fmt,_var) \
+    __attribute__((format (printf, _fmt, _var)))
+
+
+#if defined(NV_PUSH_IN_KERNEL)
+#  define NV_PUSH_ALLOW_FLOAT 0
+#else
+#  define NV_PUSH_ALLOW_FLOAT 1
+#endif
+
+typedef union _NvPushChannelUnion
+{
+    NvU32 u;
+#if NV_PUSH_ALLOW_FLOAT
+    float f;
+#endif
+} NvPushChannelUnion;
+
+typedef struct _NvPushChannelRec NvPushChannelRec;
+typedef struct _NvPushChannelRec *NvPushChannelPtr;
+
+typedef struct _nv_push_hal {
+    void (*kickoff)(struct _NvPushChannelRec*, NvU32 oldGpPut, NvU32 newGpPut);
+    void (*releaseTimelineSemaphore)(NvPushChannelPtr, void *cpuAddress, NvU64 gpuAddress, NvU64 val);
+    void (*acquireTimelineSemaphore)(NvPushChannelPtr, NvU64 gpuAddress, NvU64 val);
+    struct {
+        /* Requires USERD memory to be specified at channel allocation */
+        NvU32 clientAllocatesUserD                      :1;
+
+        /* On Tegra, we currently need to allocate double the requested GPFIFO
+         * entries */
+        NvU32 allocateDoubleSizeGpFifo                  :1;
+
+        /* Use Volta+ semaphore methods */
+        NvU32 voltaSemMethods                           :1;
+
+        NvU32 extendedBase                              :1;
+    } caps;
+} NvPushHal;
+
+typedef struct _NvPushDeviceRec {
+
+    void           *hostDevice;     /* Provided by the host driver */
+
+    NvBool          hostLBoverflowBug1667921 : 1;
+    NvBool          clientSli : 1;   /* Provided by the host driver */
+
+    NvU32           clientHandle;    /* Provided by the host driver */
+    NvU32           numSubDevices;   /* Provided by the host driver */
+
+    NvU32           numClasses;      /* Provided by the host driver */
+    const NvU32    *supportedClasses;/* Provided by the host driver */
+
+    struct {
+        NvU32       handle;          /* Provided by the host driver */
+        NvU32       deviceHandle;    /* Provided by the host driver */
+        NvU32       gpuVASpaceObject;/* Provided by the host driver */
+        NvU32       gpuVASpaceCtxDma;/* Provided by the host driver */
+        NvU32       hUserMode;       /* VOLTA_USERMODE_A object */
+        void       *pUserMode;       /* VOLTA_USERMODE_A mapping */
+    } subDevice[NV_MAX_SUBDEVICES];
+
+    NvU32           gpfifoClass;
+    size_t          userDSize;
+
+    NVAModelConfig  amodelConfig;
+
+    NvPushHal hal;
+    const struct _NvPushImports *pImports;
+
+} NvPushDeviceRec, *NvPushDevicePtr;
+
+
+typedef struct _NvPushChannelSegmentRec
+{
+    NvU32               freeDwords;  // free space (in dwords)
+    NvU32               sizeInBytes; // Push buffer size (in bytes)
+    NvU32               putOffset;   // Offset of last kickoff
+    NvPushChannelUnion *base;        // Push buffer start pointer
+    NvPushChannelUnion *buffer;      // Push buffer current pointer
+    NvU64               gpuMapOffset;
+} NvPushChannelSegmentRec, *NvPushChannelSegmentPtr;
+
+struct _NvPushChannelRec
+{
+    NvBool          initialized              : 1;
+    NvBool          logNvDiss                : 1;
+    NvBool          noTimeout                : 1;
+    NvBool          ignoreChannelErrors      : 1;
+    NvBool          channelErrorOccurred     : 1;
+
+    NvU32           channelHandle[NV_MAX_SUBDEVICES];
+    NvU32           pushbufferHandle;
+    NvU32           pushbufferVAHandle[NV_MAX_SUBDEVICES];
+    NvPushChannelSegmentRec main;
+
+    void           *control[NV_MAX_SUBDEVICES];
+    NvU32           numGpFifoEntries;
+    NvU32          *gpfifo;  // GPFIFO entries
+    NvU32           gpPutOffset; // GPFIFO entries last kicked off offset
+    NvU32           currentSubDevMask;
+
+    NvPushChannelSegmentRec progressTracker;
+    struct {
+        NvU32       handle[NV_MAX_SUBDEVICES];
+        void       *ptr[NV_MAX_SUBDEVICES];
+        NvU64       gpuVA;
+    } progressSemaphore;
+
+    struct {
+        NvU32 hMemory;
+    } userD[NV_MAX_SUBDEVICES];
+
+    struct {
+        NvU8            num;
+        NvU32           memoryHandle;
+        NvNotification *cpuAddress;
+        NvU64           gpuAddress;
+        NvU32           errorCtxDma;
+    } notifiers;
+
+    NvPushDeviceRec *pDevice;
+};
+
+/* Opaque type, only used by pointer within the push buffer utility library. */
+typedef struct _NvPushImportEvent NvPushImportEvent;
+
+/* Table of function pointers to be provided by the nvidia-push host driver. */
+typedef struct _NvPushImports {
+
+    NvU32  (*rmApiControl)          (NvPushDevicePtr pDevice,
+                                     NvU32 hObject,
+                                     NvU32 cmd,
+                                     void *pParams,
+                                     NvU32 paramsSize);
+
+    NvU32  (*rmApiAlloc)            (NvPushDevicePtr pDevice,
+                                     NvU32 hParent,
+                                     NvU32 hObject,
+                                     NvU32 hClass,
+                                     void *pAllocParams);
+
+    NvU32  (*rmApiFree)             (NvPushDevicePtr pDevice,
+                                     NvU32 hParent,
+                                     NvU32 hObject);
+
+    NvU32  (*rmApiMapMemoryDma)     (NvPushDevicePtr pDevice,
+                                     NvU32 hDevice,
+                                     NvU32 hDma,
+                                     NvU32 hMemory,
+                                     NvU64 offset,
+                                     NvU64 length,
+                                     NvU32 flags,
+                                     NvU64 *pDmaOffset);
+
+    NvU32  (*rmApiUnmapMemoryDma)   (NvPushDevicePtr pDevice,
+                                     NvU32 hDevice,
+                                     NvU32 hDma,
+                                     NvU32 hMemory,
+                                     NvU32 flags,
+                                     NvU64 dmaOffset);
+
+    NvU32  (*rmApiAllocMemory64)    (NvPushDevicePtr pDevice,
+                                     NvU32 hParent,
+                                     NvU32 hMemory,
+                                     NvU32 hClass,
+                                     NvU32 flags,
+                                     void **ppAddress,
+                                     NvU64 *pLimit);
+
+    NvU32  (*rmApiVidHeapControl)   (NvPushDevicePtr pDevice,
+                                     void *pVidHeapControlParms);
+
+    NvU32  (*rmApiMapMemory)        (NvPushDevicePtr pDevice,
+                                     NvU32 hDevice,
+                                     NvU32 hMemory,
+                                     NvU64 offset,
+                                     NvU64 length,
+                                     void **ppLinearAddress,
+                                     NvU32 flags);
+
+    NvU32  (*rmApiUnmapMemory)      (NvPushDevicePtr pDevice,
+                                     NvU32 hDevice,
+                                     NvU32 hMemory,
+                                     void *pLinearAddress,
+                                     NvU32 flags);
+
+    NvU64  (*getMilliSeconds)       (NvPushDevicePtr pDevice);
+
+    void   (*yield)                 (NvPushDevicePtr pDevice);
+
+    NvBool (*waitForEvent)          (NvPushDevicePtr pDevice,
+                                     NvPushImportEvent *pEvent,
+                                     NvU64 timeout);
+
+    void   (*emptyEventFifo)        (NvPushDevicePtr pDevice,
+                                     NvPushImportEvent *pEvent);
+
+    void   (*channelErrorOccurred)  (NvPushChannelPtr pChannel, NvU32 channelErrCode);
+
+    void   (*pushbufferWrapped)     (NvPushChannelPtr pChannel);
+
+    void   (*logError)              (NvPushDevicePtr pDevice,
+                                     NV_PUSH_PRINTF_FORMAT_ARGUMENT const char *fmt, ...)
+        NV_PUSH_PRINTF_ATTRIBUTES(2,3);
+
+    /*
+     * The logNvDiss() import, in DEBUG builds, logs strings to be
+     * parsed by nvdiss.  Note that multiple nvPushImportLogNvDiss()
+     * calls may be used to build one line of output (so, respect the
+     * newlines provided in the strings).
+     */
+#if defined(DEBUG)
+    void   (*logNvDiss)              (NvPushChannelPtr pChannel,
+                                      NV_PUSH_PRINTF_FORMAT_ARGUMENT const char *fmt, ...)
+        NV_PUSH_PRINTF_ATTRIBUTES(2,3);
+#endif
+
+} NvPushImports;
+
+
+void __nvPushMakeRoom(NvPushChannelPtr, NvU32 count);
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif /* __NVIDIA_PUSH_TYPES_H__ */
--- a/src/common/unix/nvidia-push/interface/nvidia-push-utils.h
+++ b/src/common/unix/nvidia-push/interface/nvidia-push-utils.h
@@ -0,0 +1,180 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* This file contains push buffer utility functions and declarations */
+
+#ifndef __NVIDIA_PUSH_UTILS_H__
+#define __NVIDIA_PUSH_UTILS_H__
+
+#include "nvidia-push-types.h"
+#include "nvlimits.h"
+
+#include "class/cla16f.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline NvBool nvPushIsAModel(const NvPushDeviceRec *pDevice)
+{
+    return FALSE;
+}
+
+
+/* declare prototypes: */
+NvBool nvPushCheckChannelError(NvPushChannelPtr pChannel);
+void nvPushKickoff(NvPushChannelPtr);
+NvBool nvPushIdleChannelTest(NvPushChannelPtr pChannel, NvU32 timeoutMSec);
+NvBool nvPushIdleChannel(NvPushChannelPtr);
+
+void nvPushWaitForNotifier(
+    NvPushChannelPtr pChannel,
+    NvU32 notifierIndex,
+    NvU32 subdeviceMask,
+    NvBool yield,
+    NvPushImportEvent *pEvent,
+    int id);
+
+void nvPushReleaseTimelineSemaphore(
+    NvPushChannelPtr p,
+    void *cpuAddress,
+    NvU64 gpuAddress,
+    NvU64 val);
+
+void nvPushAcquireTimelineSemaphore(
+    NvPushChannelPtr p,
+    NvU64 gpuAddress,
+    NvU64 val);
+
+NvBool nvPushDecodeMethod(NvU32 header, NvU32 *count);
+void nvPushSetObject(NvPushChannelPtr p, NvU32 subch, NvU32 object[NV_MAX_SUBDEVICES]);
+void nvPushSetSubdeviceMask(NvPushChannelPtr p, NvU32 mask);
+void __nvPushMakeRoom(NvPushChannelPtr, NvU32 count);
+
+#define NV_PUSH_SUBDEVICE_MASK_PRIMARY 0x00000001
+#define NV_PUSH_SUBDEVICE_MASK_ALL DRF_MASK(NVA16F_DMA_SET_SUBDEVICE_MASK_VALUE)
+
+/*
+ * Evaluates to TRUE if the two subDevMasks are equivalent for the given SLI
+ * device
+ */
+static inline NvBool nvPushSubDeviceMaskEquiv(
+    const NvPushDeviceRec *pDevice,
+    NvU32 maskA,
+    NvU32 maskB)
+{
+    const NvU32 allSubDevices = (1 << pDevice->numSubDevices) - 1;
+
+    return (maskA & allSubDevices) == (maskB & allSubDevices);
+}
+
+/* Evaluates to TRUE if subDevMask will write to all of the GPUs */
+static inline NvBool nvPushSubDeviceMaskAllActive(
+    const NvPushDeviceRec *pDevice,
+    NvU32 subDevMask)
+{
+    return nvPushSubDeviceMaskEquiv(pDevice, subDevMask,
+                                    NV_PUSH_SUBDEVICE_MASK_ALL);
+}
+
+#define NV_PUSH_NOTIFIER_INTERNAL_BIT 0x80
+ct_assert(NV_PUSH_NOTIFIER_INTERNAL_BIT >=
+          NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1);
+#define NV_PUSH_ERROR_NOTIFIER_INDEX \
+            (NV_PUSH_NOTIFIER_INTERNAL_BIT | \
+             NV_CHANNELGPFIFO_NOTIFICATION_TYPE_ERROR)
+#define NV_PUSH_TOKEN_NOTIFIER_INDEX \
+            (NV_PUSH_NOTIFIER_INTERNAL_BIT | \
+             NV_CHANNELGPFIFO_NOTIFICATION_TYPE_WORK_SUBMIT_TOKEN)
+
+/*
+ * Notifiers for use by nvidia-push, not exposed to clients:
+ * NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1: defined by RM
+ * NV_MAX_SUBDEVICES: one for each subdevice to track work submission token
+ */
+#define NV_PUSH_NUM_INTERNAL_NOTIFIERS \
+    (NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1 + NV_MAX_SUBDEVICES)
+
+static inline NvU32 __nvPushGetNotifierRawIndex(
+    const NvPushDeviceRec *pDevice,
+    NvU32 notifierIndex,
+    NvU32 sd)
+{
+    if (notifierIndex & NV_PUSH_NOTIFIER_INTERNAL_BIT) {
+        return notifierIndex & ~NV_PUSH_NOTIFIER_INTERNAL_BIT;
+    } else {
+        return (notifierIndex * pDevice->numSubDevices) + sd +
+                NV_PUSH_NUM_INTERNAL_NOTIFIERS;
+    }
+}
+
+static inline NvNotification *nvPushGetNotifierCpuAddress(
+    const NvPushChannelRec *pChannel,
+    NvU32 notifierIndex,
+    NvU32 sd)
+{
+    const NvU32 rawIndex =
+        __nvPushGetNotifierRawIndex(pChannel->pDevice, notifierIndex, sd);
+
+    return &pChannel->notifiers.cpuAddress[rawIndex];
+}
+
+static inline NvU64 nvPushGetNotifierGpuAddress(
+    const NvPushChannelRec *pChannel,
+    NvU32 notifierIndex,
+    NvU32 sd)
+{
+    const NvU32 rawIndex =
+        __nvPushGetNotifierRawIndex(pChannel->pDevice, notifierIndex, sd);
+    const size_t offset = rawIndex * sizeof(NvNotification);
+
+    return pChannel->notifiers.gpuAddress + offset;
+}
+
+
+extern NvU32 nvPushReadGetOffset(NvPushChannelPtr push_buffer, NvBool minimum);
+
+
+/*!
+ * Make room in the pushbuffer, checking for errors.
+ *
+ * If a channel error occurred, channelErrorOccurred is set to TRUE.
+ * nvPushCheckForRoomAndErrors() is designed to be called just before a
+ * nvPushMethod() with the same size.
+ */
+static inline void nvPushCheckForRoomAndErrors(
+    NvPushChannelPtr pChannel,
+    NvU32 count)
+{
+    pChannel->channelErrorOccurred = FALSE;
+
+    if (pChannel->main.freeDwords < (count + 1)) {
+        __nvPushMakeRoom(pChannel, count + 1);
+    }
+}
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif /* __NVIDIA_PUSH_UTILS_H__ */
--- a/src/common/unix/nvidia-push/src/nvidia-push-init.c
+++ b/src/common/unix/nvidia-push/src/nvidia-push-init.c
--- a/src/common/unix/nvidia-push/src/nvidia-push.c
+++ b/src/common/unix/nvidia-push/src/nvidia-push.c
--- a/src/common/unix/xzminidec/interface/xz.h
+++ b/src/common/unix/xzminidec/interface/xz.h
@@ -0,0 +1,285 @@
+/*
+ * XZ decompressor
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ *          Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+#ifndef XZ_H
+#define XZ_H
+
+/* Get the definition of size_t. */
+#if defined(__KERNEL__)
+#	include <linux/stddef.h>
+#else
+#	include <stddef.h>
+#endif
+
+/* Get the definition of uint32_t and friends. */
+#if defined(NV_XZ_USE_NVTYPES)
+#       include <nvtypes.h>
+        typedef NvU8 uint8_t;
+        typedef NvU16 uint16_t;
+        typedef NvU32 uint32_t;
+        typedef NvU64 uint64_t;
+#elif defined(__KERNEL__)
+#	include <linux/types.h>
+#else
+#	include <stdint.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* In Linux, this is used to make extern functions static when needed. */
+#ifndef XZ_EXTERN
+#	define XZ_EXTERN extern
+#endif
+
+/**
+ * enum xz_mode - Operation mode
+ *
+ * @XZ_SINGLE:              Single-call mode. This uses less RAM than
+ *                          than multi-call modes, because the LZMA2
+ *                          dictionary doesn't need to be allocated as
+ *                          part of the decoder state. All required data
+ *                          structures are allocated at initialization,
+ *                          so xz_dec_run() cannot return XZ_MEM_ERROR.
+ * @XZ_PREALLOC:            Multi-call mode with preallocated LZMA2
+ *                          dictionary buffer. All data structures are
+ *                          allocated at initialization, so xz_dec_run()
+ *                          cannot return XZ_MEM_ERROR.
+ * @XZ_DYNALLOC:            Multi-call mode. The LZMA2 dictionary is
+ *                          allocated once the required size has been
+ *                          parsed from the stream headers. If the
+ *                          allocation fails, xz_dec_run() will return
+ *                          XZ_MEM_ERROR.
+ *
+ * It is possible to enable support only for a subset of the above
+ * modes at compile time by defining XZ_DEC_SINGLE, XZ_DEC_PREALLOC,
+ * or XZ_DEC_DYNALLOC. The xz_dec kernel module is always compiled
+ * with support for all operation modes, but the preboot code may
+ * be built with fewer features to minimize code size.
+ */
+enum xz_mode {
+	XZ_SINGLE,
+	XZ_PREALLOC,
+	XZ_DYNALLOC
+};
+
+/**
+ * enum xz_ret - Return codes
+ * @XZ_OK:                  Everything is OK so far. More input or more
+ *                          output space is required to continue. This
+ *                          return code is possible only in multi-call mode
+ *                          (XZ_PREALLOC or XZ_DYNALLOC).
+ * @XZ_STREAM_END:          Operation finished successfully.
+ * @XZ_UNSUPPORTED_CHECK:   Integrity check type is not supported. Decoding
+ *                          is still possible in multi-call mode by simply
+ *                          calling xz_dec_run() again.
+ *                          Note that this return value is used only if
+ *                          XZ_DEC_ANY_CHECK was defined at build time,
+ *                          which is not used in the kernel. Unsupported
+ *                          check types return XZ_OPTIONS_ERROR if
+ *                          XZ_DEC_ANY_CHECK was not defined at build time.
+ * @XZ_MEM_ERROR:           Allocating memory failed. This return code is
+ *                          possible only if the decoder was initialized
+ *                          with XZ_DYNALLOC. The amount of memory that was
+ *                          tried to be allocated was no more than the
+ *                          dict_max argument given to xz_dec_init().
+ * @XZ_MEMLIMIT_ERROR:      A bigger LZMA2 dictionary would be needed than
+ *                          allowed by the dict_max argument given to
+ *                          xz_dec_init(). This return value is possible
+ *                          only in multi-call mode (XZ_PREALLOC or
+ *                          XZ_DYNALLOC); the single-call mode (XZ_SINGLE)
+ *                          ignores the dict_max argument.
+ * @XZ_FORMAT_ERROR:        File format was not recognized (wrong magic
+ *                          bytes).
+ * @XZ_OPTIONS_ERROR:       This implementation doesn't support the requested
+ *                          compression options. In the decoder this means
+ *                          that the header CRC32 matches, but the header
+ *                          itself specifies something that we don't support.
+ * @XZ_DATA_ERROR:          Compressed data is corrupt.
+ * @XZ_BUF_ERROR:           Cannot make any progress. Details are slightly
+ *                          different between multi-call and single-call
+ *                          mode; more information below.
+ *
+ * In multi-call mode, XZ_BUF_ERROR is returned when two consecutive calls
+ * to XZ code cannot consume any input and cannot produce any new output.
+ * This happens when there is no new input available, or the output buffer
+ * is full while at least one output byte is still pending. Assuming your
+ * code is not buggy, you can get this error only when decoding a compressed
+ * stream that is truncated or otherwise corrupt.
+ *
+ * In single-call mode, XZ_BUF_ERROR is returned only when the output buffer
+ * is too small or the compressed input is corrupt in a way that makes the
+ * decoder produce more output than the caller expected. When it is
+ * (relatively) clear that the compressed input is truncated, XZ_DATA_ERROR
+ * is used instead of XZ_BUF_ERROR.
+ */
+enum xz_ret {
+	XZ_OK,
+	XZ_STREAM_END,
+	XZ_UNSUPPORTED_CHECK,
+	XZ_MEM_ERROR,
+	XZ_MEMLIMIT_ERROR,
+	XZ_FORMAT_ERROR,
+	XZ_OPTIONS_ERROR,
+	XZ_DATA_ERROR,
+	XZ_BUF_ERROR
+};
+
+/**
+ * struct xz_buf - Passing input and output buffers to XZ code
+ * @in:         Beginning of the input buffer. This may be NULL if and only
+ *              if in_pos is equal to in_size.
+ * @in_pos:     Current position in the input buffer. This must not exceed
+ *              in_size.
+ * @in_size:    Size of the input buffer
+ * @out:        Beginning of the output buffer. This may be NULL if and only
+ *              if out_pos is equal to out_size.
+ * @out_pos:    Current position in the output buffer. This must not exceed
+ *              out_size.
+ * @out_size:   Size of the output buffer
+ *
+ * Only the contents of the output buffer from out[out_pos] onward, and
+ * the variables in_pos and out_pos are modified by the XZ code.
+ */
+struct xz_buf {
+	const uint8_t *in;
+	size_t in_pos;
+	size_t in_size;
+
+	uint8_t *out;
+	size_t out_pos;
+	size_t out_size;
+};
+
+/**
+ * struct xz_dec - Opaque type to hold the XZ decoder state
+ */
+struct xz_dec;
+
+/**
+ * xz_dec_init() - Allocate and initialize a XZ decoder state
+ * @mode:       Operation mode
+ * @dict_max:   Maximum size of the LZMA2 dictionary (history buffer) for
+ *              multi-call decoding. This is ignored in single-call mode
+ *              (mode == XZ_SINGLE). LZMA2 dictionary is always 2^n bytes
+ *              or 2^n + 2^(n-1) bytes (the latter sizes are less common
+ *              in practice), so other values for dict_max don't make sense.
+ *              In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB,
+ *              512 KiB, and 1 MiB are probably the only reasonable values,
+ *              except for kernel and initramfs images where a bigger
+ *              dictionary can be fine and useful.
+ *
+ * Single-call mode (XZ_SINGLE): xz_dec_run() decodes the whole stream at
+ * once. The caller must provide enough output space or the decoding will
+ * fail. The output space is used as the dictionary buffer, which is why
+ * there is no need to allocate the dictionary as part of the decoder's
+ * internal state.
+ *
+ * Because the output buffer is used as the workspace, streams encoded using
+ * a big dictionary are not a problem in single-call mode. It is enough that
+ * the output buffer is big enough to hold the actual uncompressed data; it
+ * can be smaller than the dictionary size stored in the stream headers.
+ *
+ * Multi-call mode with preallocated dictionary (XZ_PREALLOC): dict_max bytes
+ * of memory is preallocated for the LZMA2 dictionary. This way there is no
+ * risk that xz_dec_run() could run out of memory, since xz_dec_run() will
+ * never allocate any memory. Instead, if the preallocated dictionary is too
+ * small for decoding the given input stream, xz_dec_run() will return
+ * XZ_MEMLIMIT_ERROR. Thus, it is important to know what kind of data will be
+ * decoded to avoid allocating excessive amount of memory for the dictionary.
+ *
+ * Multi-call mode with dynamically allocated dictionary (XZ_DYNALLOC):
+ * dict_max specifies the maximum allowed dictionary size that xz_dec_run()
+ * may allocate once it has parsed the dictionary size from the stream
+ * headers. This way excessive allocations can be avoided while still
+ * limiting the maximum memory usage to a sane value to prevent running the
+ * system out of memory when decompressing streams from untrusted sources.
+ *
+ * On success, xz_dec_init() returns a pointer to struct xz_dec, which is
+ * ready to be used with xz_dec_run(). If memory allocation fails,
+ * xz_dec_init() returns NULL.
+ */
+XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max);
+
+/**
+ * xz_dec_run() - Run the XZ decoder
+ * @s:          Decoder state allocated using xz_dec_init()
+ * @b:          Input and output buffers
+ *
+ * The possible return values depend on build options and operation mode.
+ * See enum xz_ret for details.
+ *
+ * Note that if an error occurs in single-call mode (return value is not
+ * XZ_STREAM_END), b->in_pos and b->out_pos are not modified and the
+ * contents of the output buffer from b->out[b->out_pos] onward are
+ * undefined. This is true even after XZ_BUF_ERROR, because with some filter
+ * chains, there may be a second pass over the output buffer, and this pass
+ * cannot be properly done if the output buffer is truncated. Thus, you
+ * cannot give the single-call decoder a too small buffer and then expect to
+ * get that amount valid data from the beginning of the stream. You must use
+ * the multi-call decoder if you don't want to uncompress the whole stream.
+ */
+XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b);
+
+/**
+ * xz_dec_reset() - Reset an already allocated decoder state
+ * @s:          Decoder state allocated using xz_dec_init()
+ *
+ * This function can be used to reset the multi-call decoder state without
+ * freeing and reallocating memory with xz_dec_end() and xz_dec_init().
+ *
+ * In single-call mode, xz_dec_reset() is always called in the beginning of
+ * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in
+ * multi-call mode.
+ */
+XZ_EXTERN void xz_dec_reset(struct xz_dec *s);
+
+/**
+ * xz_dec_end() - Free the memory allocated for the decoder state
+ * @s:          Decoder state allocated using xz_dec_init(). If s is NULL,
+ *              this function does nothing.
+ */
+XZ_EXTERN void xz_dec_end(struct xz_dec *s);
+
+/*
+ * Standalone build (userspace build or in-kernel build for boot time use)
+ * needs a CRC32 implementation. For normal in-kernel use, kernel's own
+ * CRC32 module is used instead, and users of this module don't need to
+ * care about the functions below.
+ */
+#ifndef XZ_INTERNAL_CRC32
+#	ifdef __KERNEL__
+#		define XZ_INTERNAL_CRC32 0
+#	else
+#		define XZ_INTERNAL_CRC32 1
+#	endif
+#endif
+
+#if XZ_INTERNAL_CRC32
+/*
+ * This must be called before any other xz_* function to initialize
+ * the CRC32 lookup table.
+ */
+XZ_EXTERN void xz_crc32_init(void);
+
+/*
+ * Update CRC32 value using the polynomial from IEEE-802.3. To start a new
+ * calculation, the third argument must be zero. To continue the calculation,
+ * the previously returned value is passed as the third argument.
+ */
+XZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/common/unix/xzminidec/src/xz_config.h
+++ b/src/common/unix/xzminidec/src/xz_config.h
@@ -0,0 +1,113 @@
+/*
+ * Private includes and definitions for userspace use of XZ Embedded
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+#ifndef XZ_CONFIG_H
+#define XZ_CONFIG_H
+
+/* Uncomment as needed to enable BCJ filter decoders. */
+/* #define XZ_DEC_X86 */
+/* #define XZ_DEC_POWERPC */
+/* #define XZ_DEC_IA64 */
+/* #define XZ_DEC_ARM */
+/* #define XZ_DEC_ARMTHUMB */
+/* #define XZ_DEC_SPARC */
+
+#include <stdbool.h>
+
+#include "xz.h"
+
+#if defined(NV_XZ_CUSTOM_MEM_HOOKS)
+#   include "nv_xz_mem_hooks.h"
+#else
+#   include <stdlib.h>
+#   include <string.h>
+#   define kmalloc(size, flags) malloc(size)
+#   define kfree(ptr) free(ptr)
+#   define vmalloc(size) malloc(size)
+#   define vfree(ptr) free(ptr)
+
+#   define memeq(a, b, size) (memcmp(a, b, size) == 0)
+#   define memzero(buf, size) memset(buf, 0, size)
+#endif /* defined(NV_XZ_CUSTOM_MEM_HOOKS) */
+
+#ifndef min
+#	define min(x, y) ((x) < (y) ? (x) : (y))
+#endif
+#define min_t(type, x, y) min(x, y)
+
+/*
+ * Some functions have been marked with __always_inline to keep the
+ * performance reasonable even when the compiler is optimizing for
+ * small code size. You may be able to save a few bytes by #defining
+ * __always_inline to plain inline, but don't complain if the code
+ * becomes slow.
+ *
+ * NOTE: System headers on GNU/Linux may #define this macro already,
+ * so if you want to change it, you need to #undef it first.
+ */
+#ifndef __always_inline
+#	ifdef __GNUC__
+#		define __always_inline \
+			inline __attribute__((__always_inline__))
+#	else
+#		define __always_inline inline
+#	endif
+#endif
+
+/* Inline functions to access unaligned unsigned 32-bit integers */
+#ifndef get_unaligned_le32
+static inline uint32_t get_unaligned_le32(const uint8_t *buf)
+{
+	return (uint32_t)buf[0]
+			| ((uint32_t)buf[1] << 8)
+			| ((uint32_t)buf[2] << 16)
+			| ((uint32_t)buf[3] << 24);
+}
+#endif
+
+#ifndef get_unaligned_be32
+static inline uint32_t get_unaligned_be32(const uint8_t *buf)
+{
+	return (uint32_t)(buf[0] << 24)
+			| ((uint32_t)buf[1] << 16)
+			| ((uint32_t)buf[2] << 8)
+			| (uint32_t)buf[3];
+}
+#endif
+
+#ifndef put_unaligned_le32
+static inline void put_unaligned_le32(uint32_t val, uint8_t *buf)
+{
+	buf[0] = (uint8_t)val;
+	buf[1] = (uint8_t)(val >> 8);
+	buf[2] = (uint8_t)(val >> 16);
+	buf[3] = (uint8_t)(val >> 24);
+}
+#endif
+
+#ifndef put_unaligned_be32
+static inline void put_unaligned_be32(uint32_t val, uint8_t *buf)
+{
+	buf[0] = (uint8_t)(val >> 24);
+	buf[1] = (uint8_t)(val >> 16);
+	buf[2] = (uint8_t)(val >> 8);
+	buf[3] = (uint8_t)val;
+}
+#endif
+
+/*
+ * Use get_unaligned_le32() also for aligned access for simplicity. On
+ * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr))
+ * could save a few bytes in code size.
+ */
+#ifndef get_le32
+#	define get_le32 get_unaligned_le32
+#endif
+
+#endif
--- a/src/common/unix/xzminidec/src/xz_crc32.c
+++ b/src/common/unix/xzminidec/src/xz_crc32.c
@@ -0,0 +1,59 @@
+/*
+ * CRC32 using the polynomial from IEEE-802.3
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ *          Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+/*
+ * This is not the fastest implementation, but it is pretty compact.
+ * The fastest versions of xz_crc32() on modern CPUs without hardware
+ * accelerated CRC instruction are 3-5 times as fast as this version,
+ * but they are bigger and use more memory for the lookup table.
+ */
+
+#include "xz_private.h"
+
+/*
+ * STATIC_RW_DATA is used in the pre-boot environment on some architectures.
+ * See <linux/decompress/mm.h> for details.
+ */
+#ifndef STATIC_RW_DATA
+#	define STATIC_RW_DATA static
+#endif
+
+STATIC_RW_DATA uint32_t xz_crc32_table[256];
+
+XZ_EXTERN void xz_crc32_init(void)
+{
+	const uint32_t poly = 0xEDB88320;
+
+	uint32_t i;
+	uint32_t j;
+	uint32_t r;
+
+	for (i = 0; i < 256; ++i) {
+		r = i;
+		for (j = 0; j < 8; ++j)
+			r = (r >> 1) ^ (poly & ~((r & 1) - 1));
+
+		xz_crc32_table[i] = r;
+	}
+
+	return;
+}
+
+XZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
+{
+	crc = ~crc;
+
+	while (size != 0) {
+		crc = xz_crc32_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8);
+		--size;
+	}
+
+	return ~crc;
+}
--- a/src/common/unix/xzminidec/src/xz_dec_bcj.c
+++ b/src/common/unix/xzminidec/src/xz_dec_bcj.c
@@ -0,0 +1,574 @@
+/*
+ * Branch/Call/Jump (BCJ) filter decoders
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ *          Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+#include "xz_private.h"
+
+/*
+ * The rest of the file is inside this ifdef. It makes things a little more
+ * convenient when building without support for any BCJ filters.
+ */
+#ifdef XZ_DEC_BCJ
+
+struct xz_dec_bcj {
+	/* Type of the BCJ filter being used */
+	enum {
+		BCJ_X86 = 4,        /* x86 or x86-64 */
+		BCJ_POWERPC = 5,    /* Big endian only */
+		BCJ_IA64 = 6,       /* Big or little endian */
+		BCJ_ARM = 7,        /* Little endian only */
+		BCJ_ARMTHUMB = 8,   /* Little endian only */
+		BCJ_SPARC = 9       /* Big or little endian */
+	} type;
+
+	/*
+	 * Return value of the next filter in the chain. We need to preserve
+	 * this information across calls, because we must not call the next
+	 * filter anymore once it has returned XZ_STREAM_END.
+	 */
+	enum xz_ret ret;
+
+	/* True if we are operating in single-call mode. */
+	bool single_call;
+
+	/*
+	 * Absolute position relative to the beginning of the uncompressed
+	 * data (in a single .xz Block). We care only about the lowest 32
+	 * bits so this doesn't need to be uint64_t even with big files.
+	 */
+	uint32_t pos;
+
+	/* x86 filter state */
+	uint32_t x86_prev_mask;
+
+	/* Temporary space to hold the variables from struct xz_buf */
+	uint8_t *out;
+	size_t out_pos;
+	size_t out_size;
+
+	struct {
+		/* Amount of already filtered data in the beginning of buf */
+		size_t filtered;
+
+		/* Total amount of data currently stored in buf  */
+		size_t size;
+
+		/*
+		 * Buffer to hold a mix of filtered and unfiltered data. This
+		 * needs to be big enough to hold Alignment + 2 * Look-ahead:
+		 *
+		 * Type         Alignment   Look-ahead
+		 * x86              1           4
+		 * PowerPC          4           0
+		 * IA-64           16           0
+		 * ARM              4           0
+		 * ARM-Thumb        2           2
+		 * SPARC            4           0
+		 */
+		uint8_t buf[16];
+	} temp;
+};
+
+#ifdef XZ_DEC_X86
+/*
+ * This is used to test the most significant byte of a memory address
+ * in an x86 instruction.
+ */
+static inline int bcj_x86_test_msbyte(uint8_t b)
+{
+	return b == 0x00 || b == 0xFF;
+}
+
+static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+	static const bool mask_to_allowed_status[8]
+		= { true, true, true, false, true, false, false, false };
+
+	static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
+
+	size_t i;
+	size_t prev_pos = (size_t)-1;
+	uint32_t prev_mask = s->x86_prev_mask;
+	uint32_t src;
+	uint32_t dest;
+	uint32_t j;
+	uint8_t b;
+
+	if (size <= 4)
+		return 0;
+
+	size -= 4;
+	for (i = 0; i < size; ++i) {
+		if ((buf[i] & 0xFE) != 0xE8)
+			continue;
+
+		prev_pos = i - prev_pos;
+		if (prev_pos > 3) {
+			prev_mask = 0;
+		} else {
+			prev_mask = (prev_mask << (prev_pos - 1)) & 7;
+			if (prev_mask != 0) {
+				b = buf[i + 4 - mask_to_bit_num[prev_mask]];
+				if (!mask_to_allowed_status[prev_mask]
+						|| bcj_x86_test_msbyte(b)) {
+					prev_pos = i;
+					prev_mask = (prev_mask << 1) | 1;
+					continue;
+				}
+			}
+		}
+
+		prev_pos = i;
+
+		if (bcj_x86_test_msbyte(buf[i + 4])) {
+			src = get_unaligned_le32(buf + i + 1);
+			while (true) {
+				dest = src - (s->pos + (uint32_t)i + 5);
+				if (prev_mask == 0)
+					break;
+
+				j = mask_to_bit_num[prev_mask] * 8;
+				b = (uint8_t)(dest >> (24 - j));
+				if (!bcj_x86_test_msbyte(b))
+					break;
+
+				src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
+			}
+
+			dest &= 0x01FFFFFF;
+			dest |= (uint32_t)0 - (dest & 0x01000000);
+			put_unaligned_le32(dest, buf + i + 1);
+			i += 4;
+		} else {
+			prev_mask = (prev_mask << 1) | 1;
+		}
+	}
+
+	prev_pos = i - prev_pos;
+	s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
+	return i;
+}
+#endif
+
+#ifdef XZ_DEC_POWERPC
+static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+	size_t i;
+	uint32_t instr;
+
+	for (i = 0; i + 4 <= size; i += 4) {
+		instr = get_unaligned_be32(buf + i);
+		if ((instr & 0xFC000003) == 0x48000001) {
+			instr &= 0x03FFFFFC;
+			instr -= s->pos + (uint32_t)i;
+			instr &= 0x03FFFFFC;
+			instr |= 0x48000001;
+			put_unaligned_be32(instr, buf + i);
+		}
+	}
+
+	return i;
+}
+#endif
+
+#ifdef XZ_DEC_IA64
+static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+	static const uint8_t branch_table[32] = {
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		4, 4, 6, 6, 0, 0, 7, 7,
+		4, 4, 0, 0, 4, 4, 0, 0
+	};
+
+	/*
+	 * The local variables take a little bit stack space, but it's less
+	 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
+	 * stack usage here without doing that for the LZMA2 decoder too.
+	 */
+
+	/* Loop counters */
+	size_t i;
+	size_t j;
+
+	/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
+	uint32_t slot;
+
+	/* Bitwise offset of the instruction indicated by slot */
+	uint32_t bit_pos;
+
+	/* bit_pos split into byte and bit parts */
+	uint32_t byte_pos;
+	uint32_t bit_res;
+
+	/* Address part of an instruction */
+	uint32_t addr;
+
+	/* Mask used to detect which instructions to convert */
+	uint32_t mask;
+
+	/* 41-bit instruction stored somewhere in the lowest 48 bits */
+	uint64_t instr;
+
+	/* Instruction normalized with bit_res for easier manipulation */
+	uint64_t norm;
+
+	for (i = 0; i + 16 <= size; i += 16) {
+		mask = branch_table[buf[i] & 0x1F];
+		for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
+			if (((mask >> slot) & 1) == 0)
+				continue;
+
+			byte_pos = bit_pos >> 3;
+			bit_res = bit_pos & 7;
+			instr = 0;
+			for (j = 0; j < 6; ++j)
+				instr |= (uint64_t)(buf[i + j + byte_pos])
+						<< (8 * j);
+
+			norm = instr >> bit_res;
+
+			if (((norm >> 37) & 0x0F) == 0x05
+					&& ((norm >> 9) & 0x07) == 0) {
+				addr = (norm >> 13) & 0x0FFFFF;
+				addr |= ((uint32_t)(norm >> 36) & 1) << 20;
+				addr <<= 4;
+				addr -= s->pos + (uint32_t)i;
+				addr >>= 4;
+
+				norm &= ~((uint64_t)0x8FFFFF << 13);
+				norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
+				norm |= (uint64_t)(addr & 0x100000)
+						<< (36 - 20);
+
+				instr &= (1 << bit_res) - 1;
+				instr |= norm << bit_res;
+
+				for (j = 0; j < 6; j++)
+					buf[i + j + byte_pos]
+						= (uint8_t)(instr >> (8 * j));
+			}
+		}
+	}
+
+	return i;
+}
+#endif
+
+#ifdef XZ_DEC_ARM
+static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+	size_t i;
+	uint32_t addr;
+
+	for (i = 0; i + 4 <= size; i += 4) {
+		if (buf[i + 3] == 0xEB) {
+			addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
+					| ((uint32_t)buf[i + 2] << 16);
+			addr <<= 2;
+			addr -= s->pos + (uint32_t)i + 8;
+			addr >>= 2;
+			buf[i] = (uint8_t)addr;
+			buf[i + 1] = (uint8_t)(addr >> 8);
+			buf[i + 2] = (uint8_t)(addr >> 16);
+		}
+	}
+
+	return i;
+}
+#endif
+
+#ifdef XZ_DEC_ARMTHUMB
+static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+	size_t i;
+	uint32_t addr;
+
+	for (i = 0; i + 4 <= size; i += 2) {
+		if ((buf[i + 1] & 0xF8) == 0xF0
+				&& (buf[i + 3] & 0xF8) == 0xF8) {
+			addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
+					| ((uint32_t)buf[i] << 11)
+					| (((uint32_t)buf[i + 3] & 0x07) << 8)
+					| (uint32_t)buf[i + 2];
+			addr <<= 1;
+			addr -= s->pos + (uint32_t)i + 4;
+			addr >>= 1;
+			buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
+			buf[i] = (uint8_t)(addr >> 11);
+			buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
+			buf[i + 2] = (uint8_t)addr;
+			i += 2;
+		}
+	}
+
+	return i;
+}
+#endif
+
+#ifdef XZ_DEC_SPARC
+static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+	size_t i;
+	uint32_t instr;
+
+	for (i = 0; i + 4 <= size; i += 4) {
+		instr = get_unaligned_be32(buf + i);
+		if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
+			instr <<= 2;
+			instr -= s->pos + (uint32_t)i;
+			instr >>= 2;
+			instr = ((uint32_t)0x40000000 - (instr & 0x400000))
+					| 0x40000000 | (instr & 0x3FFFFF);
+			put_unaligned_be32(instr, buf + i);
+		}
+	}
+
+	return i;
+}
+#endif
+
+/*
+ * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
+ * of data that got filtered.
+ *
+ * NOTE: This is implemented as a switch statement to avoid using function
+ * pointers, which could be problematic in the kernel boot code, which must
+ * avoid pointers to static data (at least on x86).
+ */
+static void bcj_apply(struct xz_dec_bcj *s,
+		      uint8_t *buf, size_t *pos, size_t size)
+{
+	size_t filtered;
+
+	buf += *pos;
+	size -= *pos;
+
+	switch (s->type) {
+#ifdef XZ_DEC_X86
+	case BCJ_X86:
+		filtered = bcj_x86(s, buf, size);
+		break;
+#endif
+#ifdef XZ_DEC_POWERPC
+	case BCJ_POWERPC:
+		filtered = bcj_powerpc(s, buf, size);
+		break;
+#endif
+#ifdef XZ_DEC_IA64
+	case BCJ_IA64:
+		filtered = bcj_ia64(s, buf, size);
+		break;
+#endif
+#ifdef XZ_DEC_ARM
+	case BCJ_ARM:
+		filtered = bcj_arm(s, buf, size);
+		break;
+#endif
+#ifdef XZ_DEC_ARMTHUMB
+	case BCJ_ARMTHUMB:
+		filtered = bcj_armthumb(s, buf, size);
+		break;
+#endif
+#ifdef XZ_DEC_SPARC
+	case BCJ_SPARC:
+		filtered = bcj_sparc(s, buf, size);
+		break;
+#endif
+	default:
+		/* Never reached but silence compiler warnings. */
+		filtered = 0;
+		break;
+	}
+
+	*pos += filtered;
+	s->pos += filtered;
+}
+
+/*
+ * Flush pending filtered data from temp to the output buffer.
+ * Move the remaining mixture of possibly filtered and unfiltered
+ * data to the beginning of temp.
+ */
+static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
+{
+	size_t copy_size;
+
+	copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
+	memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
+	b->out_pos += copy_size;
+
+	s->temp.filtered -= copy_size;
+	s->temp.size -= copy_size;
+	memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
+}
+
+/*
+ * The BCJ filter functions are primitive in sense that they process the
+ * data in chunks of 1-16 bytes. To hide this issue, this function does
+ * some buffering.
+ */
+XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
+				     struct xz_dec_lzma2 *lzma2,
+				     struct xz_buf *b)
+{
+	size_t out_start;
+
+	/*
+	 * Flush pending already filtered data to the output buffer. Return
+	 * immediatelly if we couldn't flush everything, or if the next
+	 * filter in the chain had already returned XZ_STREAM_END.
+	 */
+	if (s->temp.filtered > 0) {
+		bcj_flush(s, b);
+		if (s->temp.filtered > 0)
+			return XZ_OK;
+
+		if (s->ret == XZ_STREAM_END)
+			return XZ_STREAM_END;
+	}
+
+	/*
+	 * If we have more output space than what is currently pending in
+	 * temp, copy the unfiltered data from temp to the output buffer
+	 * and try to fill the output buffer by decoding more data from the
+	 * next filter in the chain. Apply the BCJ filter on the new data
+	 * in the output buffer. If everything cannot be filtered, copy it
+	 * to temp and rewind the output buffer position accordingly.
+	 *
+	 * This needs to be always run when temp.size == 0 to handle a special
+	 * case where the output buffer is full and the next filter has no
+	 * more output coming but hasn't returned XZ_STREAM_END yet.
+	 */
+	if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
+		out_start = b->out_pos;
+		memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
+		b->out_pos += s->temp.size;
+
+		s->ret = xz_dec_lzma2_run(lzma2, b);
+		if (s->ret != XZ_STREAM_END
+				&& (s->ret != XZ_OK || s->single_call))
+			return s->ret;
+
+		bcj_apply(s, b->out, &out_start, b->out_pos);
+
+		/*
+		 * As an exception, if the next filter returned XZ_STREAM_END,
+		 * we can do that too, since the last few bytes that remain
+		 * unfiltered are meant to remain unfiltered.
+		 */
+		if (s->ret == XZ_STREAM_END)
+			return XZ_STREAM_END;
+
+		s->temp.size = b->out_pos - out_start;
+		b->out_pos -= s->temp.size;
+		memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
+
+		/*
+		 * If there wasn't enough input to the next filter to fill
+		 * the output buffer with unfiltered data, there's no point
+		 * to try decoding more data to temp.
+		 */
+		if (b->out_pos + s->temp.size < b->out_size)
+			return XZ_OK;
+	}
+
+	/*
+	 * We have unfiltered data in temp. If the output buffer isn't full
+	 * yet, try to fill the temp buffer by decoding more data from the
+	 * next filter. Apply the BCJ filter on temp. Then we hopefully can
+	 * fill the actual output buffer by copying filtered data from temp.
+	 * A mix of filtered and unfiltered data may be left in temp; it will
+	 * be taken care on the next call to this function.
+	 */
+	if (b->out_pos < b->out_size) {
+		/* Make b->out{,_pos,_size} temporarily point to s->temp. */
+		s->out = b->out;
+		s->out_pos = b->out_pos;
+		s->out_size = b->out_size;
+		b->out = s->temp.buf;
+		b->out_pos = s->temp.size;
+		b->out_size = sizeof(s->temp.buf);
+
+		s->ret = xz_dec_lzma2_run(lzma2, b);
+
+		s->temp.size = b->out_pos;
+		b->out = s->out;
+		b->out_pos = s->out_pos;
+		b->out_size = s->out_size;
+
+		if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
+			return s->ret;
+
+		bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
+
+		/*
+		 * If the next filter returned XZ_STREAM_END, we mark that
+		 * everything is filtered, since the last unfiltered bytes
+		 * of the stream are meant to be left as is.
+		 */
+		if (s->ret == XZ_STREAM_END)
+			s->temp.filtered = s->temp.size;
+
+		bcj_flush(s, b);
+		if (s->temp.filtered > 0)
+			return XZ_OK;
+	}
+
+	return s->ret;
+}
+
+XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
+{
+	struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (s != NULL)
+		s->single_call = single_call;
+
+	return s;
+}
+
+XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
+{
+	switch (id) {
+#ifdef XZ_DEC_X86
+	case BCJ_X86:
+#endif
+#ifdef XZ_DEC_POWERPC
+	case BCJ_POWERPC:
+#endif
+#ifdef XZ_DEC_IA64
+	case BCJ_IA64:
+#endif
+#ifdef XZ_DEC_ARM
+	case BCJ_ARM:
+#endif
+#ifdef XZ_DEC_ARMTHUMB
+	case BCJ_ARMTHUMB:
+#endif
+#ifdef XZ_DEC_SPARC
+	case BCJ_SPARC:
+#endif
+		break;
+
+	default:
+		/* Unsupported Filter ID */
+		return XZ_OPTIONS_ERROR;
+	}
+
+	s->type = id;
+	s->ret = XZ_OK;
+	s->pos = 0;
+	s->x86_prev_mask = 0;
+	s->temp.filtered = 0;
+	s->temp.size = 0;
+
+	return XZ_OK;
+}
+
+#endif
--- a/src/common/unix/xzminidec/src/xz_dec_lzma2.c
+++ b/src/common/unix/xzminidec/src/xz_dec_lzma2.c
--- a/src/common/unix/xzminidec/src/xz_dec_stream.c
+++ b/src/common/unix/xzminidec/src/xz_dec_stream.c
@@ -0,0 +1,829 @@
+/*
+ * .xz Stream decoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+#include "xz_private.h"
+#include "xz_stream.h"
+
+/* Hash used to validate the Index field */
+struct xz_dec_hash {
+	vli_type unpadded;
+	vli_type uncompressed;
+	uint32_t crc32;
+};
+
+struct xz_dec {
+	/* Position in dec_main() */
+	enum {
+		SEQ_STREAM_HEADER,
+		SEQ_BLOCK_START,
+		SEQ_BLOCK_HEADER,
+		SEQ_BLOCK_UNCOMPRESS,
+		SEQ_BLOCK_PADDING,
+		SEQ_BLOCK_CHECK,
+		SEQ_INDEX,
+		SEQ_INDEX_PADDING,
+		SEQ_INDEX_CRC32,
+		SEQ_STREAM_FOOTER
+	} sequence;
+
+	/* Position in variable-length integers and Check fields */
+	uint32_t pos;
+
+	/* Variable-length integer decoded by dec_vli() */
+	vli_type vli;
+
+	/* Saved in_pos and out_pos */
+	size_t in_start;
+	size_t out_start;
+
+	/* CRC32 value in Block or Index */
+	uint32_t crc32;
+
+	/* Type of the integrity check calculated from uncompressed data */
+	enum xz_check check_type;
+
+	/* Operation mode */
+	enum xz_mode mode;
+
+	/*
+	 * True if the next call to xz_dec_run() is allowed to return
+	 * XZ_BUF_ERROR.
+	 */
+	bool allow_buf_error;
+
+	/* Information stored in Block Header */
+	struct {
+		/*
+		 * Value stored in the Compressed Size field, or
+		 * VLI_UNKNOWN if Compressed Size is not present.
+		 */
+		vli_type compressed;
+
+		/*
+		 * Value stored in the Uncompressed Size field, or
+		 * VLI_UNKNOWN if Uncompressed Size is not present.
+		 */
+		vli_type uncompressed;
+
+		/* Size of the Block Header field */
+		uint32_t size;
+	} block_header;
+
+	/* Information collected when decoding Blocks */
+	struct {
+		/* Observed compressed size of the current Block */
+		vli_type compressed;
+
+		/* Observed uncompressed size of the current Block */
+		vli_type uncompressed;
+
+		/* Number of Blocks decoded so far */
+		vli_type count;
+
+		/*
+		 * Hash calculated from the Block sizes. This is used to
+		 * validate the Index field.
+		 */
+		struct xz_dec_hash hash;
+	} block;
+
+	/* Variables needed when verifying the Index field */
+	struct {
+		/* Position in dec_index() */
+		enum {
+			SEQ_INDEX_COUNT,
+			SEQ_INDEX_UNPADDED,
+			SEQ_INDEX_UNCOMPRESSED
+		} sequence;
+
+		/* Size of the Index in bytes */
+		vli_type size;
+
+		/* Number of Records (matches block.count in valid files) */
+		vli_type count;
+
+		/*
+		 * Hash calculated from the Records (matches block.hash in
+		 * valid files).
+		 */
+		struct xz_dec_hash hash;
+	} index;
+
+	/*
+	 * Temporary buffer needed to hold Stream Header, Block Header,
+	 * and Stream Footer. The Block Header is the biggest (1 KiB)
+	 * so we reserve space according to that. buf[] has to be aligned
+	 * to a multiple of four bytes; the size_t variables before it
+	 * should guarantee this.
+	 */
+	struct {
+		size_t pos;
+		size_t size;
+		uint8_t buf[1024];
+	} temp;
+
+	struct xz_dec_lzma2 *lzma2;
+
+#ifdef XZ_DEC_BCJ
+	struct xz_dec_bcj *bcj;
+	bool bcj_active;
+#endif
+};
+
+#ifdef XZ_DEC_ANY_CHECK
+/* Sizes of the Check field with different Check IDs */
+static const uint8_t check_sizes[16] = {
+	0,
+	4, 4, 4,
+	8, 8, 8,
+	16, 16, 16,
+	32, 32, 32,
+	64, 64, 64
+};
+#endif
+
+/*
+ * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
+ * must have set s->temp.pos to indicate how much data we are supposed
+ * to copy into s->temp.buf. Return true once s->temp.pos has reached
+ * s->temp.size.
+ */
+static bool fill_temp(struct xz_dec *s, struct xz_buf *b)
+{
+	size_t copy_size = min_t(size_t,
+			b->in_size - b->in_pos, s->temp.size - s->temp.pos);
+
+	memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
+	b->in_pos += copy_size;
+	s->temp.pos += copy_size;
+
+	if (s->temp.pos == s->temp.size) {
+		s->temp.pos = 0;
+		return true;
+	}
+
+	return false;
+}
+
+/* Decode a variable-length integer (little-endian base-128 encoding) */
+static enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in,
+			   size_t *in_pos, size_t in_size)
+{
+	uint8_t byte;
+
+	if (s->pos == 0)
+		s->vli = 0;
+
+	while (*in_pos < in_size) {
+		byte = in[*in_pos];
+		++*in_pos;
+
+		s->vli |= (vli_type)(byte & 0x7F) << s->pos;
+
+		if ((byte & 0x80) == 0) {
+			/* Don't allow non-minimal encodings. */
+			if (byte == 0 && s->pos != 0)
+				return XZ_DATA_ERROR;
+
+			s->pos = 0;
+			return XZ_STREAM_END;
+		}
+
+		s->pos += 7;
+		if (s->pos == 7 * VLI_BYTES_MAX)
+			return XZ_DATA_ERROR;
+	}
+
+	return XZ_OK;
+}
+
+/*
+ * Decode the Compressed Data field from a Block. Update and validate
+ * the observed compressed and uncompressed sizes of the Block so that
+ * they don't exceed the values possibly stored in the Block Header
+ * (validation assumes that no integer overflow occurs, since vli_type
+ * is normally uint64_t). Update the CRC32 if presence of the CRC32
+ * field was indicated in Stream Header.
+ *
+ * Once the decoding is finished, validate that the observed sizes match
+ * the sizes possibly stored in the Block Header. Update the hash and
+ * Block count, which are later used to validate the Index field.
+ */
+static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b)
+{
+	enum xz_ret ret;
+
+	s->in_start = b->in_pos;
+	s->out_start = b->out_pos;
+
+#ifdef XZ_DEC_BCJ
+	if (s->bcj_active)
+		ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
+	else
+#endif
+		ret = xz_dec_lzma2_run(s->lzma2, b);
+
+	s->block.compressed += b->in_pos - s->in_start;
+	s->block.uncompressed += b->out_pos - s->out_start;
+
+	/*
+	 * There is no need to separately check for VLI_UNKNOWN, since
+	 * the observed sizes are always smaller than VLI_UNKNOWN.
+	 */
+	if (s->block.compressed > s->block_header.compressed
+			|| s->block.uncompressed
+				> s->block_header.uncompressed)
+		return XZ_DATA_ERROR;
+
+	if (s->check_type == XZ_CHECK_CRC32)
+		s->crc32 = xz_crc32(b->out + s->out_start,
+				b->out_pos - s->out_start, s->crc32);
+
+	if (ret == XZ_STREAM_END) {
+		if (s->block_header.compressed != VLI_UNKNOWN
+				&& s->block_header.compressed
+					!= s->block.compressed)
+			return XZ_DATA_ERROR;
+
+		if (s->block_header.uncompressed != VLI_UNKNOWN
+				&& s->block_header.uncompressed
+					!= s->block.uncompressed)
+			return XZ_DATA_ERROR;
+
+		s->block.hash.unpadded += s->block_header.size
+				+ s->block.compressed;
+
+#ifdef XZ_DEC_ANY_CHECK
+		s->block.hash.unpadded += check_sizes[s->check_type];
+#else
+		if (s->check_type == XZ_CHECK_CRC32)
+			s->block.hash.unpadded += 4;
+#endif
+
+		s->block.hash.uncompressed += s->block.uncompressed;
+		s->block.hash.crc32 = xz_crc32(
+				(const uint8_t *)&s->block.hash,
+				sizeof(s->block.hash), s->block.hash.crc32);
+
+		++s->block.count;
+	}
+
+	return ret;
+}
+
+/* Update the Index size and the CRC32 value. */
+static void index_update(struct xz_dec *s, const struct xz_buf *b)
+{
+	size_t in_used = b->in_pos - s->in_start;
+	s->index.size += in_used;
+	s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32);
+}
+
+/*
+ * Decode the Number of Records, Unpadded Size, and Uncompressed Size
+ * fields from the Index field. That is, Index Padding and CRC32 are not
+ * decoded by this function.
+ *
+ * This can return XZ_OK (more input needed), XZ_STREAM_END (everything
+ * successfully decoded), or XZ_DATA_ERROR (input is corrupt).
+ */
+static enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b)
+{
+	enum xz_ret ret;
+
+	do {
+		ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
+		if (ret != XZ_STREAM_END) {
+			index_update(s, b);
+			return ret;
+		}
+
+		switch (s->index.sequence) {
+		case SEQ_INDEX_COUNT:
+			s->index.count = s->vli;
+
+			/*
+			 * Validate that the Number of Records field
+			 * indicates the same number of Records as
+			 * there were Blocks in the Stream.
+			 */
+			if (s->index.count != s->block.count)
+				return XZ_DATA_ERROR;
+
+			s->index.sequence = SEQ_INDEX_UNPADDED;
+			break;
+
+		case SEQ_INDEX_UNPADDED:
+			s->index.hash.unpadded += s->vli;
+			s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
+			break;
+
+		case SEQ_INDEX_UNCOMPRESSED:
+			s->index.hash.uncompressed += s->vli;
+			s->index.hash.crc32 = xz_crc32(
+					(const uint8_t *)&s->index.hash,
+					sizeof(s->index.hash),
+					s->index.hash.crc32);
+			--s->index.count;
+			s->index.sequence = SEQ_INDEX_UNPADDED;
+			break;
+		}
+	} while (s->index.count > 0);
+
+	return XZ_STREAM_END;
+}
+
+/*
+ * Validate that the next four input bytes match the value of s->crc32.
+ * s->pos must be zero when starting to validate the first byte.
+ */
+static enum xz_ret crc32_validate(struct xz_dec *s, struct xz_buf *b)
+{
+	do {
+		if (b->in_pos == b->in_size)
+			return XZ_OK;
+
+		if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++])
+			return XZ_DATA_ERROR;
+
+		s->pos += 8;
+
+	} while (s->pos < 32);
+
+	s->crc32 = 0;
+	s->pos = 0;
+
+	return XZ_STREAM_END;
+}
+
+#ifdef XZ_DEC_ANY_CHECK
+/*
+ * Skip over the Check field when the Check ID is not supported.
+ * Returns true once the whole Check field has been skipped over.
+ */
+static bool check_skip(struct xz_dec *s, struct xz_buf *b)
+{
+	while (s->pos < check_sizes[s->check_type]) {
+		if (b->in_pos == b->in_size)
+			return false;
+
+		++b->in_pos;
+		++s->pos;
+	}
+
+	s->pos = 0;
+
+	return true;
+}
+#endif
+
+/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
+static enum xz_ret dec_stream_header(struct xz_dec *s)
+{
+	if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
+		return XZ_FORMAT_ERROR;
+
+	if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
+			!= get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
+		return XZ_DATA_ERROR;
+
+	if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
+		return XZ_OPTIONS_ERROR;
+
+	/*
+	 * Of integrity checks, we support only none (Check ID = 0) and
+	 * CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined,
+	 * we will accept other check types too, but then the check won't
+	 * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given.
+	 */
+	s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
+
+#ifdef XZ_DEC_ANY_CHECK
+	if (s->check_type > XZ_CHECK_MAX)
+		return XZ_OPTIONS_ERROR;
+
+	if (s->check_type > XZ_CHECK_CRC32)
+		return XZ_UNSUPPORTED_CHECK;
+#else
+	if (s->check_type > XZ_CHECK_CRC32)
+		return XZ_OPTIONS_ERROR;
+#endif
+
+	return XZ_OK;
+}
+
+/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
+static enum xz_ret dec_stream_footer(struct xz_dec *s)
+{
+	if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
+		return XZ_DATA_ERROR;
+
+	if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
+		return XZ_DATA_ERROR;
+
+	/*
+	 * Validate Backward Size. Note that we never added the size of the
+	 * Index CRC32 field to s->index.size, thus we use s->index.size / 4
+	 * instead of s->index.size / 4 - 1.
+	 */
+	if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
+		return XZ_DATA_ERROR;
+
+	if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
+		return XZ_DATA_ERROR;
+
+	/*
+	 * Use XZ_STREAM_END instead of XZ_OK to be more convenient
+	 * for the caller.
+	 */
+	return XZ_STREAM_END;
+}
+
+/* Decode the Block Header and initialize the filter chain. */
+static enum xz_ret dec_block_header(struct xz_dec *s)
+{
+	enum xz_ret ret;
+
+	/*
+	 * Validate the CRC32. We know that the temp buffer is at least
+	 * eight bytes so this is safe.
+	 */
+	s->temp.size -= 4;
+	if (xz_crc32(s->temp.buf, s->temp.size, 0)
+			!= get_le32(s->temp.buf + s->temp.size))
+		return XZ_DATA_ERROR;
+
+	s->temp.pos = 2;
+
+	/*
+	 * Catch unsupported Block Flags. We support only one or two filters
+	 * in the chain, so we catch that with the same test.
+	 */
+#ifdef XZ_DEC_BCJ
+	if (s->temp.buf[1] & 0x3E)
+#else
+	if (s->temp.buf[1] & 0x3F)
+#endif
+		return XZ_OPTIONS_ERROR;
+
+	/* Compressed Size */
+	if (s->temp.buf[1] & 0x40) {
+		if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
+					!= XZ_STREAM_END)
+			return XZ_DATA_ERROR;
+
+		s->block_header.compressed = s->vli;
+	} else {
+		s->block_header.compressed = VLI_UNKNOWN;
+	}
+
+	/* Uncompressed Size */
+	if (s->temp.buf[1] & 0x80) {
+		if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
+				!= XZ_STREAM_END)
+			return XZ_DATA_ERROR;
+
+		s->block_header.uncompressed = s->vli;
+	} else {
+		s->block_header.uncompressed = VLI_UNKNOWN;
+	}
+
+#ifdef XZ_DEC_BCJ
+	/* If there are two filters, the first one must be a BCJ filter. */
+	s->bcj_active = s->temp.buf[1] & 0x01;
+	if (s->bcj_active) {
+		if (s->temp.size - s->temp.pos < 2)
+			return XZ_OPTIONS_ERROR;
+
+		ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
+		if (ret != XZ_OK)
+			return ret;
+
+		/*
+		 * We don't support custom start offset,
+		 * so Size of Properties must be zero.
+		 */
+		if (s->temp.buf[s->temp.pos++] != 0x00)
+			return XZ_OPTIONS_ERROR;
+	}
+#endif
+
+	/* Valid Filter Flags always take at least two bytes. */
+	if (s->temp.size - s->temp.pos < 2)
+		return XZ_DATA_ERROR;
+
+	/* Filter ID = LZMA2 */
+	if (s->temp.buf[s->temp.pos++] != 0x21)
+		return XZ_OPTIONS_ERROR;
+
+	/* Size of Properties = 1-byte Filter Properties */
+	if (s->temp.buf[s->temp.pos++] != 0x01)
+		return XZ_OPTIONS_ERROR;
+
+	/* Filter Properties contains LZMA2 dictionary size. */
+	if (s->temp.size - s->temp.pos < 1)
+		return XZ_DATA_ERROR;
+
+	ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
+	if (ret != XZ_OK)
+		return ret;
+
+	/* The rest must be Header Padding. */
+	while (s->temp.pos < s->temp.size)
+		if (s->temp.buf[s->temp.pos++] != 0x00)
+			return XZ_OPTIONS_ERROR;
+
+	s->temp.pos = 0;
+	s->block.compressed = 0;
+	s->block.uncompressed = 0;
+
+	return XZ_OK;
+}
+
+static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
+{
+	enum xz_ret ret;
+
+	/*
+	 * Store the start position for the case when we are in the middle
+	 * of the Index field.
+	 */
+	s->in_start = b->in_pos;
+
+	while (true) {
+		switch (s->sequence) {
+		case SEQ_STREAM_HEADER:
+			/*
+			 * Stream Header is copied to s->temp, and then
+			 * decoded from there. This way if the caller
+			 * gives us only little input at a time, we can
+			 * still keep the Stream Header decoding code
+			 * simple. Similar approach is used in many places
+			 * in this file.
+			 */
+			if (!fill_temp(s, b))
+				return XZ_OK;
+
+			/*
+			 * If dec_stream_header() returns
+			 * XZ_UNSUPPORTED_CHECK, it is still possible
+			 * to continue decoding if working in multi-call
+			 * mode. Thus, update s->sequence before calling
+			 * dec_stream_header().
+			 */
+			s->sequence = SEQ_BLOCK_START;
+
+			ret = dec_stream_header(s);
+			if (ret != XZ_OK)
+				return ret;
+
+                        /* fallthrough */
+		case SEQ_BLOCK_START:
+			/* We need one byte of input to continue. */
+			if (b->in_pos == b->in_size)
+				return XZ_OK;
+
+			/* See if this is the beginning of the Index field. */
+			if (b->in[b->in_pos] == 0) {
+				s->in_start = b->in_pos++;
+				s->sequence = SEQ_INDEX;
+				break;
+			}
+
+			/*
+			 * Calculate the size of the Block Header and
+			 * prepare to decode it.
+			 */
+			s->block_header.size
+				= ((uint32_t)b->in[b->in_pos] + 1) * 4;
+
+			s->temp.size = s->block_header.size;
+			s->temp.pos = 0;
+			s->sequence = SEQ_BLOCK_HEADER;
+
+                        /* fallthrough */
+		case SEQ_BLOCK_HEADER:
+			if (!fill_temp(s, b))
+				return XZ_OK;
+
+			ret = dec_block_header(s);
+			if (ret != XZ_OK)
+				return ret;
+
+			s->sequence = SEQ_BLOCK_UNCOMPRESS;
+
+                        /* fallthrough */
+		case SEQ_BLOCK_UNCOMPRESS:
+			ret = dec_block(s, b);
+			if (ret != XZ_STREAM_END)
+				return ret;
+
+			s->sequence = SEQ_BLOCK_PADDING;
+
+                        /* fallthrough */
+		case SEQ_BLOCK_PADDING:
+			/*
+			 * Size of Compressed Data + Block Padding
+			 * must be a multiple of four. We don't need
+			 * s->block.compressed for anything else
+			 * anymore, so we use it here to test the size
+			 * of the Block Padding field.
+			 */
+			while (s->block.compressed & 3) {
+				if (b->in_pos == b->in_size)
+					return XZ_OK;
+
+				if (b->in[b->in_pos++] != 0)
+					return XZ_DATA_ERROR;
+
+				++s->block.compressed;
+			}
+
+			s->sequence = SEQ_BLOCK_CHECK;
+
+                        /* fallthrough */
+		case SEQ_BLOCK_CHECK:
+			if (s->check_type == XZ_CHECK_CRC32) {
+				ret = crc32_validate(s, b);
+				if (ret != XZ_STREAM_END)
+					return ret;
+			}
+#ifdef XZ_DEC_ANY_CHECK
+			else if (!check_skip(s, b)) {
+				return XZ_OK;
+			}
+#endif
+
+			s->sequence = SEQ_BLOCK_START;
+			break;
+
+		case SEQ_INDEX:
+			ret = dec_index(s, b);
+			if (ret != XZ_STREAM_END)
+				return ret;
+
+			s->sequence = SEQ_INDEX_PADDING;
+
+                        /* fallthrough */
+		case SEQ_INDEX_PADDING:
+			while ((s->index.size + (b->in_pos - s->in_start))
+					& 3) {
+				if (b->in_pos == b->in_size) {
+					index_update(s, b);
+					return XZ_OK;
+				}
+
+				if (b->in[b->in_pos++] != 0)
+					return XZ_DATA_ERROR;
+			}
+
+			/* Finish the CRC32 value and Index size. */
+			index_update(s, b);
+
+			/* Compare the hashes to validate the Index field. */
+			if (!memeq(&s->block.hash, &s->index.hash,
+					sizeof(s->block.hash)))
+				return XZ_DATA_ERROR;
+
+			s->sequence = SEQ_INDEX_CRC32;
+
+                        /* fallthrough */
+		case SEQ_INDEX_CRC32:
+			ret = crc32_validate(s, b);
+			if (ret != XZ_STREAM_END)
+				return ret;
+
+			s->temp.size = STREAM_HEADER_SIZE;
+			s->sequence = SEQ_STREAM_FOOTER;
+
+                        /* fallthrough */
+		case SEQ_STREAM_FOOTER:
+			if (!fill_temp(s, b))
+				return XZ_OK;
+
+			return dec_stream_footer(s);
+		}
+	}
+
+	/* Never reached */
+}
+
+/*
+ * xz_dec_run() is a wrapper for dec_main() to handle some special cases in
+ * multi-call and single-call decoding.
+ *
+ * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
+ * are not going to make any progress anymore. This is to prevent the caller
+ * from calling us infinitely when the input file is truncated or otherwise
+ * corrupt. Since zlib-style API allows that the caller fills the input buffer
+ * only when the decoder doesn't produce any new output, we have to be careful
+ * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
+ * after the second consecutive call to xz_dec_run() that makes no progress.
+ *
+ * In single-call mode, if we couldn't decode everything and no error
+ * occurred, either the input is truncated or the output buffer is too small.
+ * Since we know that the last input byte never produces any output, we know
+ * that if all the input was consumed and decoding wasn't finished, the file
+ * must be corrupt. Otherwise the output buffer has to be too small or the
+ * file is corrupt in a way that decoding it produces too big output.
+ *
+ * If single-call decoding fails, we reset b->in_pos and b->out_pos back to
+ * their original values. This is because with some filter chains there won't
+ * be any valid uncompressed data in the output buffer unless the decoding
+ * actually succeeds (that's the price to pay of using the output buffer as
+ * the workspace).
+ */
+XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
+{
+	size_t in_start;
+	size_t out_start;
+	enum xz_ret ret;
+
+	if (DEC_IS_SINGLE(s->mode))
+		xz_dec_reset(s);
+
+	in_start = b->in_pos;
+	out_start = b->out_pos;
+	ret = dec_main(s, b);
+
+	if (DEC_IS_SINGLE(s->mode)) {
+		if (ret == XZ_OK)
+			ret = b->in_pos == b->in_size
+					? XZ_DATA_ERROR : XZ_BUF_ERROR;
+
+		if (ret != XZ_STREAM_END) {
+			b->in_pos = in_start;
+			b->out_pos = out_start;
+		}
+
+	} else if (ret == XZ_OK && in_start == b->in_pos
+			&& out_start == b->out_pos) {
+		if (s->allow_buf_error)
+			ret = XZ_BUF_ERROR;
+
+		s->allow_buf_error = true;
+	} else {
+		s->allow_buf_error = false;
+	}
+
+	return ret;
+}
+
+XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max)
+{
+	struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (s == NULL)
+		return NULL;
+
+	s->mode = mode;
+
+#ifdef XZ_DEC_BCJ
+	s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode));
+	if (s->bcj == NULL)
+		goto error_bcj;
+#endif
+
+	s->lzma2 = xz_dec_lzma2_create(mode, dict_max);
+	if (s->lzma2 == NULL)
+		goto error_lzma2;
+
+	xz_dec_reset(s);
+	return s;
+
+error_lzma2:
+#ifdef XZ_DEC_BCJ
+	xz_dec_bcj_end(s->bcj);
+error_bcj:
+#endif
+	kfree(s);
+	return NULL;
+}
+
+XZ_EXTERN void xz_dec_reset(struct xz_dec *s)
+{
+	s->sequence = SEQ_STREAM_HEADER;
+	s->allow_buf_error = false;
+	s->pos = 0;
+	s->crc32 = 0;
+	memzero(&s->block, sizeof(s->block));
+	memzero(&s->index, sizeof(s->index));
+	s->temp.pos = 0;
+	s->temp.size = STREAM_HEADER_SIZE;
+}
+
+XZ_EXTERN void xz_dec_end(struct xz_dec *s)
+{
+	if (s != NULL) {
+		xz_dec_lzma2_end(s->lzma2);
+#ifdef XZ_DEC_BCJ
+		xz_dec_bcj_end(s->bcj);
+#endif
+		kfree(s);
+	}
+}
--- a/src/common/unix/xzminidec/src/xz_lzma2.h
+++ b/src/common/unix/xzminidec/src/xz_lzma2.h
@@ -0,0 +1,204 @@
+/*
+ * LZMA2 definitions
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ *          Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+#ifndef XZ_LZMA2_H
+#define XZ_LZMA2_H
+
+/* Range coder constants */
+#define RC_SHIFT_BITS 8
+#define RC_TOP_BITS 24
+#define RC_TOP_VALUE (1 << RC_TOP_BITS)
+#define RC_BIT_MODEL_TOTAL_BITS 11
+#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
+#define RC_MOVE_BITS 5
+
+/*
+ * Maximum number of position states. A position state is the lowest pb
+ * number of bits of the current uncompressed offset. In some places there
+ * are different sets of probabilities for different position states.
+ */
+#define POS_STATES_MAX (1 << 4)
+
+/*
+ * This enum is used to track which LZMA symbols have occurred most recently
+ * and in which order. This information is used to predict the next symbol.
+ *
+ * Symbols:
+ *  - Literal: One 8-bit byte
+ *  - Match: Repeat a chunk of data at some distance
+ *  - Long repeat: Multi-byte match at a recently seen distance
+ *  - Short repeat: One-byte repeat at a recently seen distance
+ *
+ * The symbol names are in from STATE_oldest_older_previous. REP means
+ * either short or long repeated match, and NONLIT means any non-literal.
+ */
+enum lzma_state {
+	STATE_LIT_LIT,
+	STATE_MATCH_LIT_LIT,
+	STATE_REP_LIT_LIT,
+	STATE_SHORTREP_LIT_LIT,
+	STATE_MATCH_LIT,
+	STATE_REP_LIT,
+	STATE_SHORTREP_LIT,
+	STATE_LIT_MATCH,
+	STATE_LIT_LONGREP,
+	STATE_LIT_SHORTREP,
+	STATE_NONLIT_MATCH,
+	STATE_NONLIT_REP
+};
+
+/* Total number of states */
+#define STATES 12
+
+/* The lowest 7 states indicate that the previous state was a literal. */
+#define LIT_STATES 7
+
+/* Indicate that the latest symbol was a literal. */
+static inline void lzma_state_literal(enum lzma_state *state)
+{
+	if (*state <= STATE_SHORTREP_LIT_LIT)
+		*state = STATE_LIT_LIT;
+	else if (*state <= STATE_LIT_SHORTREP)
+		*state -= 3;
+	else
+		*state -= 6;
+}
+
+/* Indicate that the latest symbol was a match. */
+static inline void lzma_state_match(enum lzma_state *state)
+{
+	*state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
+}
+
+/* Indicate that the latest state was a long repeated match. */
+static inline void lzma_state_long_rep(enum lzma_state *state)
+{
+	*state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
+}
+
+/* Indicate that the latest symbol was a short match. */
+static inline void lzma_state_short_rep(enum lzma_state *state)
+{
+	*state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
+}
+
+/* Test if the previous symbol was a literal. */
+static inline bool lzma_state_is_literal(enum lzma_state state)
+{
+	return state < LIT_STATES;
+}
+
+/* Each literal coder is divided in three sections:
+ *   - 0x001-0x0FF: Without match byte
+ *   - 0x101-0x1FF: With match byte; match bit is 0
+ *   - 0x201-0x2FF: With match byte; match bit is 1
+ *
+ * Match byte is used when the previous LZMA symbol was something else than
+ * a literal (that is, it was some kind of match).
+ */
+#define LITERAL_CODER_SIZE 0x300
+
+/* Maximum number of literal coders */
+#define LITERAL_CODERS_MAX (1 << 4)
+
+/* Minimum length of a match is two bytes. */
+#define MATCH_LEN_MIN 2
+
+/* Match length is encoded with 4, 5, or 10 bits.
+ *
+ * Length   Bits
+ *  2-9      4 = Choice=0 + 3 bits
+ * 10-17     5 = Choice=1 + Choice2=0 + 3 bits
+ * 18-273   10 = Choice=1 + Choice2=1 + 8 bits
+ */
+#define LEN_LOW_BITS 3
+#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
+#define LEN_MID_BITS 3
+#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
+#define LEN_HIGH_BITS 8
+#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
+#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
+
+/*
+ * Maximum length of a match is 273 which is a result of the encoding
+ * described above.
+ */
+#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
+
+/*
+ * Different sets of probabilities are used for match distances that have
+ * very short match length: Lengths of 2, 3, and 4 bytes have a separate
+ * set of probabilities for each length. The matches with longer length
+ * use a shared set of probabilities.
+ */
+#define DIST_STATES 4
+
+/*
+ * Get the index of the appropriate probability array for decoding
+ * the distance slot.
+ */
+static inline uint32_t lzma_get_dist_state(uint32_t len)
+{
+	return len < DIST_STATES + MATCH_LEN_MIN
+			? len - MATCH_LEN_MIN : DIST_STATES - 1;
+}
+
+/*
+ * The highest two bits of a 32-bit match distance are encoded using six bits.
+ * This six-bit value is called a distance slot. This way encoding a 32-bit
+ * value takes 6-36 bits, larger values taking more bits.
+ */
+#define DIST_SLOT_BITS 6
+#define DIST_SLOTS (1 << DIST_SLOT_BITS)
+
+/* Match distances up to 127 are fully encoded using probabilities. Since
+ * the highest two bits (distance slot) are always encoded using six bits,
+ * the distances 0-3 don't need any additional bits to encode, since the
+ * distance slot itself is the same as the actual distance. DIST_MODEL_START
+ * indicates the first distance slot where at least one additional bit is
+ * needed.
+ */
+#define DIST_MODEL_START 4
+
+/*
+ * Match distances greater than 127 are encoded in three pieces:
+ *   - distance slot: the highest two bits
+ *   - direct bits: 2-26 bits below the highest two bits
+ *   - alignment bits: four lowest bits
+ *
+ * Direct bits don't use any probabilities.
+ *
+ * The distance slot value of 14 is for distances 128-191.
+ */
+#define DIST_MODEL_END 14
+
+/* Distance slots that indicate a distance <= 127. */
+#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
+#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
+
+/*
+ * For match distances greater than 127, only the highest two bits and the
+ * lowest four bits (alignment) is encoded using probabilities.
+ */
+#define ALIGN_BITS 4
+#define ALIGN_SIZE (1 << ALIGN_BITS)
+#define ALIGN_MASK (ALIGN_SIZE - 1)
+
+/* Total number of all probability variables */
+#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
+
+/*
+ * LZMA remembers the four most recent match distances. Reusing these
+ * distances tends to take less space than re-encoding the actual
+ * distance value.
+ */
+#define REPS 4
+
+#endif
--- a/src/common/unix/xzminidec/src/xz_private.h
+++ b/src/common/unix/xzminidec/src/xz_private.h
@@ -0,0 +1,156 @@
+/*
+ * Private includes and definitions
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+#ifndef XZ_PRIVATE_H
+#define XZ_PRIVATE_H
+
+#ifdef __KERNEL__
+#	include <linux/xz.h>
+#	include <linux/kernel.h>
+#	include <asm/unaligned.h>
+	/* XZ_PREBOOT may be defined only via decompress_unxz.c. */
+#	ifndef XZ_PREBOOT
+#		include <linux/slab.h>
+#		include <linux/vmalloc.h>
+#		include <linux/string.h>
+#		ifdef CONFIG_XZ_DEC_X86
+#			define XZ_DEC_X86
+#		endif
+#		ifdef CONFIG_XZ_DEC_POWERPC
+#			define XZ_DEC_POWERPC
+#		endif
+#		ifdef CONFIG_XZ_DEC_IA64
+#			define XZ_DEC_IA64
+#		endif
+#		ifdef CONFIG_XZ_DEC_ARM
+#			define XZ_DEC_ARM
+#		endif
+#		ifdef CONFIG_XZ_DEC_ARMTHUMB
+#			define XZ_DEC_ARMTHUMB
+#		endif
+#		ifdef CONFIG_XZ_DEC_SPARC
+#			define XZ_DEC_SPARC
+#		endif
+#		define memeq(a, b, size) (memcmp(a, b, size) == 0)
+#		define memzero(buf, size) memset(buf, 0, size)
+#	endif
+#	define get_le32(p) le32_to_cpup((const uint32_t *)(p))
+#else
+	/*
+	 * For userspace builds, use a separate header to define the required
+	 * macros and functions. This makes it easier to adapt the code into
+	 * different environments and avoids clutter in the Linux kernel tree.
+	 */
+#	include "xz_config.h"
+#endif
+
+/* If no specific decoding mode is requested, enable support for all modes. */
+#if !defined(XZ_DEC_SINGLE) && !defined(XZ_DEC_PREALLOC) \
+		&& !defined(XZ_DEC_DYNALLOC)
+#	define XZ_DEC_SINGLE
+#	define XZ_DEC_PREALLOC
+#	define XZ_DEC_DYNALLOC
+#endif
+
+/*
+ * The DEC_IS_foo(mode) macros are used in "if" statements. If only some
+ * of the supported modes are enabled, these macros will evaluate to true or
+ * false at compile time and thus allow the compiler to omit unneeded code.
+ */
+#ifdef XZ_DEC_SINGLE
+#	define DEC_IS_SINGLE(mode) ((mode) == XZ_SINGLE)
+#else
+#	define DEC_IS_SINGLE(mode) (false)
+#endif
+
+#ifdef XZ_DEC_PREALLOC
+#	define DEC_IS_PREALLOC(mode) ((mode) == XZ_PREALLOC)
+#else
+#	define DEC_IS_PREALLOC(mode) (false)
+#endif
+
+#ifdef XZ_DEC_DYNALLOC
+#	define DEC_IS_DYNALLOC(mode) ((mode) == XZ_DYNALLOC)
+#else
+#	define DEC_IS_DYNALLOC(mode) (false)
+#endif
+
+#if !defined(XZ_DEC_SINGLE)
+#	define DEC_IS_MULTI(mode) (true)
+#elif defined(XZ_DEC_PREALLOC) || defined(XZ_DEC_DYNALLOC)
+#	define DEC_IS_MULTI(mode) ((mode) != XZ_SINGLE)
+#else
+#	define DEC_IS_MULTI(mode) (false)
+#endif
+
+/*
+ * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
+ * XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
+ */
+#ifndef XZ_DEC_BCJ
+#	if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
+			|| defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
+			|| defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
+			|| defined(XZ_DEC_SPARC)
+#		define XZ_DEC_BCJ
+#	endif
+#endif
+
+/*
+ * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
+ * before calling xz_dec_lzma2_run().
+ */
+XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
+						   uint32_t dict_max);
+
+/*
+ * Decode the LZMA2 properties (one byte) and reset the decoder. Return
+ * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
+ * big enough, and XZ_OPTIONS_ERROR if props indicates something that this
+ * decoder doesn't support.
+ */
+XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s,
+					 uint8_t props);
+
+/* Decode raw LZMA2 stream from b->in to b->out. */
+XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
+				       struct xz_buf *b);
+
+/* Free the memory allocated for the LZMA2 decoder. */
+XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s);
+
+#ifdef XZ_DEC_BCJ
+/*
+ * Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before
+ * calling xz_dec_bcj_run().
+ */
+XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call);
+
+/*
+ * Decode the Filter ID of a BCJ filter. This implementation doesn't
+ * support custom start offsets, so no decoding of Filter Properties
+ * is needed. Returns XZ_OK if the given Filter ID is supported.
+ * Otherwise XZ_OPTIONS_ERROR is returned.
+ */
+XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id);
+
+/*
+ * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
+ * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
+ * must be called directly.
+ */
+XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
+				     struct xz_dec_lzma2 *lzma2,
+				     struct xz_buf *b);
+
+/* Free the memory allocated for the BCJ filters. */
+#define xz_dec_bcj_end(s) kfree(s)
+#endif
+
+#endif
--- a/src/common/unix/xzminidec/src/xz_stream.h
+++ b/src/common/unix/xzminidec/src/xz_stream.h
@@ -0,0 +1,62 @@
+/*
+ * Definitions for handling the .xz file format
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+
+#ifndef XZ_STREAM_H
+#define XZ_STREAM_H
+
+#if defined(__KERNEL__) && !XZ_INTERNAL_CRC32
+#	include <linux/crc32.h>
+#	undef crc32
+#	define xz_crc32(buf, size, crc) \
+		(~crc32_le(~(uint32_t)(crc), buf, size))
+#endif
+
+/*
+ * See the .xz file format specification at
+ * http://tukaani.org/xz/xz-file-format.txt
+ * to understand the container format.
+ */
+
+#define STREAM_HEADER_SIZE 12
+
+#define HEADER_MAGIC "\3757zXZ"
+#define HEADER_MAGIC_SIZE 6
+
+#define FOOTER_MAGIC "YZ"
+#define FOOTER_MAGIC_SIZE 2
+
+/*
+ * Variable-length integer can hold a 63-bit unsigned integer or a special
+ * value indicating that the value is unknown.
+ *
+ * Experimental: vli_type can be defined to uint32_t to save a few bytes
+ * in code size (no effect on speed). Doing so limits the uncompressed and
+ * compressed size of the file to less than 256 MiB and may also weaken
+ * error detection slightly.
+ */
+typedef uint64_t vli_type;
+
+#define VLI_MAX ((vli_type)-1 / 2)
+#define VLI_UNKNOWN ((vli_type)-1)
+
+/* Maximum encoded size of a VLI */
+#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
+
+/* Integrity Check types */
+enum xz_check {
+	XZ_CHECK_NONE = 0,
+	XZ_CHECK_CRC32 = 1,
+	XZ_CHECK_CRC64 = 4,
+	XZ_CHECK_SHA256 = 10
+};
+
+/* Maximum possible Check ID */
+#define XZ_CHECK_MAX 15
+
+#endif