mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-02-09 01:29:57 +00:00
525.53
This commit is contained in:
42
src/common/unix/common/inc/nv_amodel_enum.h
Normal file
42
src/common/unix/common/inc/nv_amodel_enum.h
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NV_AMODEL_ENUM_H__
|
||||
#define __NV_AMODEL_ENUM_H__
|
||||
|
||||
|
||||
|
||||
typedef enum _NVAModelConfig {
|
||||
NV_AMODEL_NONE = 0,
|
||||
NV_AMODEL_KEPLER,
|
||||
NV_AMODEL_KEPLER_SM35,
|
||||
NV_AMODEL_MAXWELL,
|
||||
NV_AMODEL_PASCAL,
|
||||
NV_AMODEL_VOLTA,
|
||||
NV_AMODEL_TURING,
|
||||
NV_AMODEL_AMPERE,
|
||||
NV_AMODEL_ADA,
|
||||
NV_AMODEL_HOPPER,
|
||||
} NVAModelConfig;
|
||||
|
||||
#endif /* __NV_AMODEL_ENUM_H__ */
|
||||
44
src/common/unix/nvidia-3d/include/nv_xz_mem_hooks.h
Normal file
44
src/common/unix/nvidia-3d/include/nv_xz_mem_hooks.h
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NV_XZ_MEM_HOOKS_H__
|
||||
#define __NV_XZ_MEM_HOOKS_H__
|
||||
|
||||
/*
|
||||
* This file is included by xz_config.h when NV_XZ_CUSTOM_MEM_HOOKS is defined,
|
||||
* allowing us to override xzminidec's standard library use.
|
||||
*/
|
||||
|
||||
#include "nvidia-3d-imports.h"
|
||||
|
||||
#define kmalloc(size, flags) nv3dImportAlloc(size)
|
||||
#define kfree(ptr) nv3dImportFree(ptr)
|
||||
#define vmalloc(size) nv3dImportAlloc(size)
|
||||
#define vfree(ptr) nv3dImportFree(ptr)
|
||||
|
||||
#define memeq(a, b, size) (nv3dImportMemCmp(a, b, size) == 0)
|
||||
#define memzero(buf, size) nv3dImportMemSet(buf, 0, size)
|
||||
#define memcpy(a, b, size) nv3dImportMemCpy(a, b, size)
|
||||
#define memmove(a, b, size) nv3dImportMemMove(a, b, size)
|
||||
|
||||
#endif /* __NV_XZ_MEM_HOOKS_H__ */
|
||||
41
src/common/unix/nvidia-3d/include/nvidia-3d-fermi.h
Normal file
41
src/common/unix/nvidia-3d/include/nvidia-3d-fermi.h
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_3D_FERMI_H__
|
||||
#define __NVIDIA_3D_FERMI_H__
|
||||
|
||||
#include "nvidia-3d-types.h"
|
||||
|
||||
void _nv3dSetProgramOffsetFermi(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvU32 stage,
|
||||
NvU32 offset);
|
||||
void _nv3dInvalidateTexturesFermi(
|
||||
Nv3dChannelRec *p3dChannel);
|
||||
void _nv3dSetVertexStreamEndFermi(
|
||||
Nv3dChannelPtr p3dChannel,
|
||||
enum Nv3dVertexAttributeStreamType stream,
|
||||
const Nv3dVertexAttributeStreamRec *pStream);
|
||||
|
||||
#endif /* __NVIDIA_3D_FERMI__ */
|
||||
|
||||
35
src/common/unix/nvidia-3d/include/nvidia-3d-hopper.h
Normal file
35
src/common/unix/nvidia-3d/include/nvidia-3d-hopper.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_3D_HOPPER_H__
|
||||
#define __NVIDIA_3D_HOPPER_H__
|
||||
|
||||
#include "nvidia-3d-types.h"
|
||||
|
||||
void _nv3dInitChannelHopper(Nv3dChannelRec *p3dChannel);
|
||||
|
||||
void _nv3dAssignNv3dTextureHopper(
|
||||
Nv3dRenderTexInfo info,
|
||||
Nv3dTexture *tex);
|
||||
|
||||
#endif /* __NVIDIA_3D_HOPPER_H__ */
|
||||
45
src/common/unix/nvidia-3d/include/nvidia-3d-kepler.h
Normal file
45
src/common/unix/nvidia-3d/include/nvidia-3d-kepler.h
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_3D_KEPLER_H__
|
||||
#define __NVIDIA_3D_KEPLER_H__
|
||||
|
||||
#include "nvidia-3d-types.h"
|
||||
|
||||
void _nv3dSetSpaVersionKepler(Nv3dChannelRec *p3dChannel);
|
||||
|
||||
void _nv3dInitChannelKepler(Nv3dChannelRec *p3dChannel);
|
||||
|
||||
void _nv3dUploadDataInlineKepler(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvU64 gpuBaseAddress,
|
||||
size_t offset,
|
||||
const void *data,
|
||||
size_t bytes);
|
||||
void _nv3dBindTexturesKepler(
|
||||
Nv3dChannelPtr p3dChannel,
|
||||
int programIndex,
|
||||
const int *textureBindingIndices);
|
||||
|
||||
#endif /* __NVIDIA_3D_KEPLER__ */
|
||||
|
||||
35
src/common/unix/nvidia-3d/include/nvidia-3d-maxwell.h
Normal file
35
src/common/unix/nvidia-3d/include/nvidia-3d-maxwell.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_3D_MAXWELL_H__
|
||||
#define __NVIDIA_3D_MAXWELL_H__
|
||||
|
||||
#include "nvidia-3d-types.h"
|
||||
|
||||
void _nv3dInitChannelMaxwell(Nv3dChannelRec *p3dChannel);
|
||||
|
||||
void _nv3dAssignNv3dTextureMaxwell(
|
||||
Nv3dRenderTexInfo info,
|
||||
Nv3dTexture *tex);
|
||||
|
||||
#endif /* __NVIDIA_3D_MAXWELL__ */
|
||||
35
src/common/unix/nvidia-3d/include/nvidia-3d-pascal.h
Normal file
35
src/common/unix/nvidia-3d/include/nvidia-3d-pascal.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_3D_PASCAL_H__
|
||||
#define __NVIDIA_3D_PASCAL_H__
|
||||
|
||||
#include "nvidia-3d-types.h"
|
||||
|
||||
void _nv3dInitChannelPascal(Nv3dChannelRec *p3dChannel);
|
||||
|
||||
void _nv3dAssignNv3dTexturePascal(
|
||||
Nv3dRenderTexInfo info,
|
||||
Nv3dTexture *tex);
|
||||
|
||||
#endif /* __NVIDIA_3D_PASCAL__ */
|
||||
33
src/common/unix/nvidia-3d/include/nvidia-3d-surface.h
Normal file
33
src/common/unix/nvidia-3d/include/nvidia-3d-surface.h
Normal file
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_3D_SURFACE_H__
|
||||
#define __NVIDIA_3D_SURFACE_H__
|
||||
|
||||
#include "nvidia-3d-types.h"
|
||||
|
||||
void _nv3dAssignSurfaceOffsets(
|
||||
const Nv3dAllocChannelStateParams *pParams,
|
||||
Nv3dChannelPtr p3dChannel);
|
||||
|
||||
#endif /* __NVIDIA_3D_SURFACE_H__ */
|
||||
35
src/common/unix/nvidia-3d/include/nvidia-3d-turing.h
Normal file
35
src/common/unix/nvidia-3d/include/nvidia-3d-turing.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_3D_TURING_H__
|
||||
#define __NVIDIA_3D_TURING_H__
|
||||
|
||||
#include "nvidia-3d-types.h"
|
||||
|
||||
void _nv3dInitChannelTuring(Nv3dChannelRec *p3dChannel);
|
||||
void _nv3dSetVertexStreamEndTuring(
|
||||
Nv3dChannelPtr p3dChannel,
|
||||
enum Nv3dVertexAttributeStreamType stream,
|
||||
const Nv3dVertexAttributeStreamRec *pStream);
|
||||
|
||||
#endif /* __NVIDIA_3D_TURING__ */
|
||||
48
src/common/unix/nvidia-3d/include/nvidia-3d-types-priv.h
Normal file
48
src/common/unix/nvidia-3d/include/nvidia-3d-types-priv.h
Normal file
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_3D_TYPES_PRIV_H__
|
||||
#define __NVIDIA_3D_TYPES_PRIV_H__
|
||||
|
||||
#include "nvidia-3d-types.h"
|
||||
|
||||
struct _Nv3dHal {
|
||||
void (*setSpaVersion) (Nv3dChannelRec *p3dChannel);
|
||||
void (*initChannel) (Nv3dChannelRec *p3dChannel);
|
||||
void (*uploadDataInline) (Nv3dChannelRec *p3dChannel,
|
||||
NvU64 gpuBaseAddress,
|
||||
size_t offset,
|
||||
const void *data,
|
||||
size_t bytes);
|
||||
void (*setProgramOffset) (Nv3dChannelRec *p3dChannel,
|
||||
NvU32 stage,
|
||||
NvU32 offset);
|
||||
void (*assignNv3dTexture) (Nv3dRenderTexInfo info,
|
||||
Nv3dTexture *tex);
|
||||
void (*setVertexStreamEnd) (Nv3dChannelPtr p3dChannel,
|
||||
enum Nv3dVertexAttributeStreamType stream,
|
||||
const Nv3dVertexAttributeStreamRec *pStream);
|
||||
};
|
||||
|
||||
#endif /* __NVIDIA_3D_TYPES_PRIV_H__ */
|
||||
|
||||
32
src/common/unix/nvidia-3d/include/nvidia-3d-vertex-arrays.h
Normal file
32
src/common/unix/nvidia-3d/include/nvidia-3d-vertex-arrays.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_3D_VERTEX_ARRAYS_H__
|
||||
#define __NVIDIA_3D_VERTEX_ARRAYS_H__
|
||||
|
||||
#include "nvidia-3d-types.h"
|
||||
|
||||
void _nv3dInitializeStreams(
|
||||
Nv3dChannelRec *p3dChannel);
|
||||
|
||||
#endif /* __NVIDIA_3D_VERTEX_ARRAYS_H__ */
|
||||
35
src/common/unix/nvidia-3d/include/nvidia-3d-volta.h
Normal file
35
src/common/unix/nvidia-3d/include/nvidia-3d-volta.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_3D_VOLTA_H__
|
||||
#define __NVIDIA_3D_VOLTA_H__
|
||||
|
||||
#include "nvidia-3d-types.h"
|
||||
|
||||
void _nv3dSetProgramOffsetVolta(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvU32 stage,
|
||||
NvU32 offset);
|
||||
|
||||
#endif /* __NVIDIA_3D_VOLTA__ */
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_3D_COLOR_TARGETS_H__
|
||||
#define __NVIDIA_3D_COLOR_TARGETS_H__
|
||||
|
||||
#include "nvidia-3d.h"
|
||||
|
||||
#include <class/cl9097.h>
|
||||
#include <class/cla06fsubch.h>
|
||||
|
||||
/*
|
||||
* This header file defines static inline functions to manage 3D class
|
||||
* color targets.
|
||||
*/
|
||||
|
||||
static inline void nv3dSelectColorTarget(
|
||||
Nv3dChannelPtr p3dChannel,
|
||||
NvU8 colorTargetIndex)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_CT_SELECT,
|
||||
NV3D_V(9097, SET_CT_SELECT, TARGET_COUNT, 1) |
|
||||
NV3D_V(9097, SET_CT_SELECT, TARGET0, colorTargetIndex) |
|
||||
NV3D_V(9097, SET_CT_SELECT, TARGET1, 0) |
|
||||
NV3D_V(9097, SET_CT_SELECT, TARGET2, 0) |
|
||||
NV3D_V(9097, SET_CT_SELECT, TARGET3, 0) |
|
||||
NV3D_V(9097, SET_CT_SELECT, TARGET4, 0) |
|
||||
NV3D_V(9097, SET_CT_SELECT, TARGET5, 0) |
|
||||
NV3D_V(9097, SET_CT_SELECT, TARGET6, 0) |
|
||||
NV3D_V(9097, SET_CT_SELECT, TARGET7, 0));
|
||||
}
|
||||
|
||||
static inline void nv3dSetColorTarget(
|
||||
Nv3dChannelPtr p3dChannel,
|
||||
NvU8 colorTargetIndex,
|
||||
NvU32 surfaceFormat,
|
||||
NvU64 surfaceGpuAddress,
|
||||
NvBool blockLinear,
|
||||
Nv3dBlockLinearLog2GobsPerBlock gobsPerBlock,
|
||||
NvU32 surfaceWidth,
|
||||
NvU32 surfaceHeight)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
const NvU32 memoryInfo =
|
||||
blockLinear ?
|
||||
(NV3D_V(9097, SET_COLOR_TARGET_MEMORY, BLOCK_WIDTH, gobsPerBlock.x) |
|
||||
NV3D_V(9097, SET_COLOR_TARGET_MEMORY, BLOCK_HEIGHT, gobsPerBlock.y) |
|
||||
NV3D_V(9097, SET_COLOR_TARGET_MEMORY, BLOCK_DEPTH, gobsPerBlock.z) |
|
||||
NV3D_C(9097, SET_COLOR_TARGET_MEMORY, LAYOUT, BLOCKLINEAR)) :
|
||||
NV3D_C(9097, SET_COLOR_TARGET_MEMORY, LAYOUT, PITCH);
|
||||
|
||||
if (surfaceFormat == NV9097_SET_COLOR_TARGET_FORMAT_V_DISABLED) {
|
||||
// Disable this color target.
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_COLOR_TARGET_FORMAT(colorTargetIndex),
|
||||
NV9097_SET_COLOR_TARGET_FORMAT_V_DISABLED);
|
||||
return;
|
||||
}
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_COLOR_TARGET_A(colorTargetIndex), 6);
|
||||
|
||||
nvPushSetMethodDataU64(p, surfaceGpuAddress);
|
||||
nvPushSetMethodData(p, surfaceWidth);
|
||||
nvPushSetMethodData(p, surfaceHeight);
|
||||
nvPushSetMethodData(p, surfaceFormat);
|
||||
nvPushSetMethodData(p, memoryInfo);
|
||||
}
|
||||
|
||||
#endif /* __NVIDIA_3D_COLOR_TARGETS_H__ */
|
||||
196
src/common/unix/nvidia-3d/interface/nvidia-3d-constant-buffers.h
Normal file
196
src/common/unix/nvidia-3d/interface/nvidia-3d-constant-buffers.h
Normal file
@@ -0,0 +1,196 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_3D_CONSTANT_BUFFERS_H__
|
||||
#define __NVIDIA_3D_CONSTANT_BUFFERS_H__
|
||||
|
||||
#include "nvidia-3d.h"
|
||||
|
||||
#include <class/cl9097.h>
|
||||
#include <class/cla06fsubch.h>
|
||||
|
||||
/*
|
||||
* This header file defines static inline functions to manage 3D class
|
||||
* constant buffers.
|
||||
*/
|
||||
|
||||
|
||||
static inline void nv3dSelectCbAddress(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvU64 offset,
|
||||
NvU32 size)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
nvAssert(size > 0);
|
||||
nvAssert(NV_IS_ALIGNED(size, NV3D_MIN_CONSTBUF_ALIGNMENT));
|
||||
nvAssert(size <= 65536);
|
||||
nvAssert(NV_IS_ALIGNED(offset, NV3D_MIN_CONSTBUF_ALIGNMENT));
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_CONSTANT_BUFFER_SELECTOR_A, 3);
|
||||
nvPushSetMethodData(p, size);
|
||||
nvPushSetMethodDataU64(p, offset);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Select a constant buffer for binding or updating.
|
||||
*/
|
||||
static inline void nv3dSelectCb(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
int constantBufferIndex)
|
||||
{
|
||||
const NvU64 gpuAddress =
|
||||
nv3dGetConstantBufferGpuAddress(p3dChannel, constantBufferIndex);
|
||||
|
||||
nv3dSelectCbAddress(p3dChannel, gpuAddress, NV3D_CONSTANT_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Bind the selected Cb to a given slot (or invalidate that slot).
|
||||
*/
|
||||
static inline void nv3dBindCb(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
int bindGroup, // XXX TODO: this type should be NVShaderBindGroup
|
||||
int slot,
|
||||
NvBool valid)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
ASSERT_DRF_NUM(9097, _BIND_GROUP_CONSTANT_BUFFER, _SHADER_SLOT, slot);
|
||||
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_BIND_GROUP_CONSTANT_BUFFER(bindGroup),
|
||||
NV3D_V(9097, BIND_GROUP_CONSTANT_BUFFER, VALID, !!valid) |
|
||||
NV3D_V(9097, BIND_GROUP_CONSTANT_BUFFER, SHADER_SLOT, slot));
|
||||
}
|
||||
|
||||
/*!
|
||||
* Push *only the header* to tell the GPU to "load" constants from the
|
||||
* pushbuffer.
|
||||
*
|
||||
* \param[in] p3dChannel The nvidia-3d channel.
|
||||
* \param[in] offset The offset in bytes of the start of the
|
||||
* updates.
|
||||
* \param[in] dwords Count of dwords to be loaded (after the
|
||||
* header).
|
||||
*
|
||||
* \return An NvPushChannelUnion pointing immediately after the
|
||||
* header, with enough contiguous space to copy 'dwords' of
|
||||
* data.
|
||||
*/
|
||||
static inline NvPushChannelUnion *nv3dLoadConstantsHeader(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvU32 offset,
|
||||
size_t dwords)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
NvPushChannelUnion *buffer;
|
||||
|
||||
nvAssert((dwords + 1) <= nvPushMaxMethodCount(p));
|
||||
|
||||
nvPushMethodOneIncr(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_LOAD_CONSTANT_BUFFER_OFFSET, dwords + 1);
|
||||
nvPushSetMethodData(p, offset);
|
||||
|
||||
buffer = p->main.buffer;
|
||||
p->main.buffer += dwords;
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Load an array of bytes into a constant buffer at a specified location.
|
||||
*
|
||||
* The count must be a multiple of 4 bytes.
|
||||
*
|
||||
* \param[in] p3dChannel The nvidia-3d channel.
|
||||
* \param[in] offset The offset in bytes of the start of the
|
||||
* updates.
|
||||
* \param[in] bytes Count of bytes to write. Must be a
|
||||
* multiple of 4.
|
||||
* \param[in] values Data to be written.
|
||||
*/
|
||||
static inline void nv3dLoadConstants(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvU32 offset,
|
||||
size_t bytes,
|
||||
const void *values)
|
||||
{
|
||||
const size_t dwords = bytes / 4;
|
||||
NvPushChannelUnion *buffer;
|
||||
|
||||
nvAssert((bytes & 3) == 0);
|
||||
|
||||
buffer = nv3dLoadConstantsHeader(p3dChannel, offset, dwords);
|
||||
|
||||
nvDmaMoveDWORDS(buffer, values, dwords);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Set the current constant buffer's current byte offset, for use with
|
||||
* nv3dPushConstants().
|
||||
*/
|
||||
static inline void nv3dSetConstantBufferOffset(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvU32 offset)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_LOAD_CONSTANT_BUFFER_OFFSET, 1);
|
||||
nvPushSetMethodData(p, offset);
|
||||
}
|
||||
|
||||
// Load an array of dwords into a constant buffer at the current location. This
|
||||
// also advances the constant buffer load offset, so that multiple calls to
|
||||
// nv3dPushConstants will write to sequential memory addresses.
|
||||
static inline void nv3dPushConstants(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
size_t bytes,
|
||||
const void *values)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
const size_t dwords = bytes / 4;
|
||||
nvAssert((bytes & 3) == 0);
|
||||
nvAssert(dwords <= nvPushMaxMethodCount(p));
|
||||
|
||||
nvPushMethodNoIncr(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_LOAD_CONSTANT_BUFFER(0), dwords);
|
||||
nvPushInlineData(p, values, dwords);
|
||||
}
|
||||
|
||||
static inline void nv3dLoadSingleConstant(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvU32 offset,
|
||||
NvU32 value)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_LOAD_CONSTANT_BUFFER_OFFSET, 2);
|
||||
nvPushSetMethodData(p, offset);
|
||||
nvPushSetMethodData(p, value);
|
||||
}
|
||||
|
||||
#endif /* __NVIDIA_3D_CONSTANT_BUFFERS_H__ */
|
||||
41
src/common/unix/nvidia-3d/interface/nvidia-3d-imports.h
Normal file
41
src/common/unix/nvidia-3d/interface/nvidia-3d-imports.h
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* nvidia-3d-imports.h declares functions with nvidia-3d host drivers must
|
||||
* provide.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_3D_IMPORTS_H__
|
||||
#define __NVIDIA_3D_IMPORTS_H__
|
||||
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
void *nv3dImportAlloc(size_t size);
|
||||
void nv3dImportFree(void *ptr);
|
||||
int nv3dImportMemCmp(const void *a, const void *b, size_t size);
|
||||
void nv3dImportMemSet(void *s, int c, size_t size);
|
||||
void nv3dImportMemCpy(void *dest, const void *src, size_t size);
|
||||
void nv3dImportMemMove(void *dest, const void *src, size_t size);
|
||||
|
||||
#endif /* __NVIDIA_3D_IMPORTS_H__ */
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _NVIDIA_3D_SHADER_CONSTANTS_H_
|
||||
#define _NVIDIA_3D_SHADER_CONSTANTS_H_
|
||||
|
||||
#if defined(NV3D_BUILD_AS_GLSL)
|
||||
|
||||
#define NV3D_CB_SLOT_FIRST_USER_BINDABLE 0
|
||||
|
||||
#else
|
||||
|
||||
/* Shaders always use this slot for compiler-emitted constants. This
|
||||
* assumption is verified at ucode build time. */
|
||||
#define NV3D_CB_SLOT_COMPILER 1
|
||||
|
||||
/* Offset between GLSL slot 0 and hardware slot */
|
||||
#define NV3D_CB_SLOT_FIRST_USER_BINDABLE 3
|
||||
|
||||
#endif
|
||||
|
||||
/* This slot is used for most uniforms/constants defined in each shader */
|
||||
#define NV3D_CB_SLOT_MISC1 (NV3D_CB_SLOT_FIRST_USER_BINDABLE + 0)
|
||||
|
||||
/* When needed (Kepler+), shaders always use this constant slot for bindless
|
||||
* texture handles. */
|
||||
#define NV3D_CB_SLOT_BINDLESS_TEXTURE (NV3D_CB_SLOT_FIRST_USER_BINDABLE + 1)
|
||||
|
||||
|
||||
/* Matches __GL_PGM_UNUSED_TEXTURE_UNIT */
|
||||
#define NV3D_TEX_BINDING_UNUSED 255
|
||||
|
||||
#endif /* _NVIDIA_3D_SHADER_CONSTANTS_H_ */
|
||||
69
src/common/unix/nvidia-3d/interface/nvidia-3d-shaders.h
Normal file
69
src/common/unix/nvidia-3d/interface/nvidia-3d-shaders.h
Normal file
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2010-2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_3D_SHADERS_H__
|
||||
#define __NVIDIA_3D_SHADERS_H__
|
||||
|
||||
#include <nvtypes.h>
|
||||
#include <class/cl9097.h>
|
||||
|
||||
// These are used in the "shader type" field below
|
||||
#define NV3D_SHADER_TYPE_VERTEX NV9097_SET_PIPELINE_SHADER_TYPE_VERTEX
|
||||
#define NV3D_SHADER_TYPE_PIXEL NV9097_SET_PIPELINE_SHADER_TYPE_PIXEL
|
||||
|
||||
typedef enum {
|
||||
NV3D_HW_SHADER_STAGE_VERTEX_A = 0,
|
||||
NV3D_HW_SHADER_STAGE_VERTEX_B,
|
||||
NV3D_HW_SHADER_STAGE_TESS_CONTROL,
|
||||
NV3D_HW_SHADER_STAGE_TESS_EVAL,
|
||||
NV3D_HW_SHADER_STAGE_GEOMETRY,
|
||||
NV3D_HW_SHADER_STAGE_PIXEL,
|
||||
NV3D_HW_SHADER_STAGE_COUNT,
|
||||
} __attribute__ ((__packed__)) Nv3dShaderStage;
|
||||
|
||||
typedef enum {
|
||||
NV3D_HW_BIND_GROUP_VERTEX = 0,
|
||||
NV3D_HW_BIND_GROUP_TESS_CONTROL,
|
||||
NV3D_HW_BIND_GROUP_TESS_EVAL,
|
||||
NV3D_HW_BIND_GROUP_GEOMETRY,
|
||||
NV3D_HW_BIND_GROUP_FRAGMENT,
|
||||
NV3D_HW_BIND_GROUP_LAST = NV3D_HW_BIND_GROUP_FRAGMENT
|
||||
} __attribute__ ((__packed__)) Nv3dShaderBindGroup;
|
||||
|
||||
typedef struct _nv_program_info {
|
||||
NvU32 offset; // Start offset relative to program heap
|
||||
NvU8 registerCount; // From '#.MAX_REG n'+1
|
||||
NvU8 type; // Shader type
|
||||
NvS8 constIndex; // Index into the compiler-generated constant buffer table
|
||||
|
||||
Nv3dShaderStage stage; // Pipeline stage
|
||||
Nv3dShaderBindGroup bindGroup; // NV3D_HW_BIND_GROUP
|
||||
} Nv3dProgramInfo;
|
||||
|
||||
typedef struct _nv_shader_const_buf_info {
|
||||
const NvU32 *data;
|
||||
NvU32 offset;
|
||||
NvU32 size;
|
||||
} Nv3dShaderConstBufInfo;
|
||||
|
||||
#endif // __NVIDIA_3D_SHADERS_H__
|
||||
478
src/common/unix/nvidia-3d/interface/nvidia-3d-types.h
Normal file
478
src/common/unix/nvidia-3d/interface/nvidia-3d-types.h
Normal file
@@ -0,0 +1,478 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_3D_TYPES_H__
|
||||
#define __NVIDIA_3D_TYPES_H__
|
||||
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nvlimits.h"
|
||||
#include "nvidia-push-methods.h"
|
||||
|
||||
#include "nvidia-3d-shaders.h"
|
||||
|
||||
enum Nv3dBlendOperation {
|
||||
NV3D_BLEND_OP_CLEAR,
|
||||
NV3D_BLEND_OP_SRC,
|
||||
NV3D_BLEND_OP_DST,
|
||||
NV3D_BLEND_OP_OVER,
|
||||
NV3D_BLEND_OP_OVER_REVERSE,
|
||||
NV3D_BLEND_OP_IN,
|
||||
NV3D_BLEND_OP_IN_REVERSE,
|
||||
NV3D_BLEND_OP_OUT,
|
||||
NV3D_BLEND_OP_OUT_REVERSE,
|
||||
NV3D_BLEND_OP_ATOP,
|
||||
NV3D_BLEND_OP_ATOP_REVERSE,
|
||||
NV3D_BLEND_OP_XOR,
|
||||
NV3D_BLEND_OP_ADD,
|
||||
NV3D_BLEND_OP_SATURATE,
|
||||
};
|
||||
|
||||
// We use two vertex streams: one for static attributes (values that are the
|
||||
// same for all vertices) and one for dynamic attributes.
|
||||
enum Nv3dVertexAttributeStreamType {
|
||||
NV3D_VERTEX_ATTRIBUTE_STREAM_FIRST = 0,
|
||||
NV3D_VERTEX_ATTRIBUTE_STREAM_STATIC = 0,
|
||||
NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC = 1,
|
||||
NV3D_VERTEX_ATTRIBUTE_STREAM_COUNT,
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
/* The data type of a vertex attribute. */
|
||||
/* Names of enum Nv3dVertexAttributeDataType members follow
|
||||
* "NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_{N_elements}_{element_size}_{NUMERICAL_TYPE}" convention
|
||||
* where {NUMERICAL_TYPE} gives information about NV9097_SET_VERTEX_ATTRIBUTE_A_NUMERICAL_TYPE
|
||||
*/
|
||||
enum Nv3dVertexAttributeDataType {
|
||||
NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_2_32_FLOAT, /* two floats */
|
||||
NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_4_32_FLOAT, /* four floats */
|
||||
NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_4_16_UNORM, /* four unsigned shorts mapped to floats: [0,65535] => [0.0f,1.0f] */
|
||||
NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_4_8_UNORM, /* four unsigned bytes mapped to floats: [0,255] => [0.0f,1.0f] */
|
||||
NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_2_16_SSCALED,/* two shorts mapped to floats: [-32768,32767] => [-32768.0f,32767.0f] */
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
/* The possible vertex attributes. */
|
||||
enum Nv3dVertexAttributeType {
|
||||
NV3D_VERTEX_ATTRIBUTE_POSITION = 0,
|
||||
NV3D_VERTEX_ATTRIBUTE_VERTEX_WEIGHT = 1,
|
||||
NV3D_VERTEX_ATTRIBUTE_NORMAL = 2,
|
||||
NV3D_VERTEX_ATTRIBUTE_COLOR = 3,
|
||||
NV3D_VERTEX_ATTRIBUTE_SECONDARY_COLOR = 4,
|
||||
NV3D_VERTEX_ATTRIBUTE_FOG_COORD = 5,
|
||||
NV3D_VERTEX_ATTRIBUTE_POINT_SIZE = 6,
|
||||
NV3D_VERTEX_ATTRIBUTE_MATRIX_INDEX = 7,
|
||||
NV3D_VERTEX_ATTRIBUTE_TEXCOORD0 = 8,
|
||||
NV3D_VERTEX_ATTRIBUTE_TEXCOORD1 = 9,
|
||||
NV3D_VERTEX_ATTRIBUTE_TEXCOORD2 = 10,
|
||||
NV3D_VERTEX_ATTRIBUTE_TEXCOORD3 = 11,
|
||||
/*
|
||||
* The _END enum value is used as a sentinel to terminate arrays of
|
||||
* Nv3dVertexAttributeInfoRec (see Nv3dVertexAttributeInfoRec, below).
|
||||
*/
|
||||
NV3D_VERTEX_ATTRIBUTE_END = 255,
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
/*
|
||||
* Nv3dVertexAttributeInfoRec stores the triplet attribute, stream type, and
|
||||
* data type. Arrays of Nv3dVertexAttributeInfoRec are used to describe vertex
|
||||
* attribute configurations to FermiSetupVertexArrays().
|
||||
*
|
||||
* The NV3D_ATTRIB_ENTRY() and NV3D_ATTRIB_END macros can be used to make
|
||||
* Nv3dVertexAttributeInfoRec assignment more succinct. E.g.,
|
||||
*
|
||||
* Nv3dVertexAttributeInfoRec attribs[] = {
|
||||
* NV3D_ATTRIB_ENTRY(COLOR, STATIC, 4UB),
|
||||
* NV3D_ATTRIB_END,
|
||||
* };
|
||||
*/
|
||||
typedef struct _Nv3dVertexAttributeInfoRec {
|
||||
enum Nv3dVertexAttributeType attributeType;
|
||||
enum Nv3dVertexAttributeStreamType streamType;
|
||||
enum Nv3dVertexAttributeDataType dataType;
|
||||
} Nv3dVertexAttributeInfoRec;
|
||||
|
||||
#define NV3D_ATTRIB_TYPE_ENTRY(_i, _streamType, _dataType) \
|
||||
(Nv3dVertexAttributeInfoRec) \
|
||||
{ .attributeType = _i, \
|
||||
.streamType = NV3D_VERTEX_ATTRIBUTE_STREAM_##_streamType, \
|
||||
.dataType = NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_##_dataType }
|
||||
|
||||
#define NV3D_ATTRIB_ENTRY(_attribType, _streamType, _dataType) \
|
||||
NV3D_ATTRIB_TYPE_ENTRY(NV3D_VERTEX_ATTRIBUTE_##_attribType, _streamType, _dataType)
|
||||
|
||||
#define NV3D_ATTRIB_END \
|
||||
(Nv3dVertexAttributeInfoRec) \
|
||||
{ .attributeType = NV3D_VERTEX_ATTRIBUTE_END }
|
||||
|
||||
/*
|
||||
* When built into kernel code, define Nv3dFloat to be an NvU32: it is the same
|
||||
* size as a float, but the caller is responsible for storing float bit patterns
|
||||
* to Nv3dFloat.
|
||||
*/
|
||||
ct_assert(sizeof(float) == sizeof(NvU32));
|
||||
#if NV_PUSH_ALLOW_FLOAT
|
||||
typedef float Nv3dFloat;
|
||||
#else
|
||||
typedef NvU32 Nv3dFloat;
|
||||
#endif
|
||||
|
||||
static inline void nv3dPushFloat(NvPushChannelPtr p, const Nv3dFloat data)
|
||||
{
|
||||
#if NV_PUSH_ALLOW_FLOAT
|
||||
nvPushSetMethodDataF(p, data);
|
||||
#else
|
||||
nvPushSetMethodData(p, data);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Vertex attribute data types. Each of these types represents a different way
|
||||
* of specifying vertex attribute data.
|
||||
*/
|
||||
typedef struct __attribute__((packed)) {
|
||||
Nv3dFloat x, y;
|
||||
} Nv3dVertexAttrib2F;
|
||||
|
||||
typedef struct __attribute__((packed)) {
|
||||
NvU32 x, y;
|
||||
} Nv3dVertexAttrib2U;
|
||||
|
||||
typedef struct __attribute__((packed)) {
|
||||
NvS32 x, y;
|
||||
} Nv3dVertexAttrib2S;
|
||||
|
||||
typedef struct __attribute__((packed)) {
|
||||
Nv3dFloat x, y, z;
|
||||
} Nv3dVertexAttrib3F;
|
||||
|
||||
typedef struct __attribute__((packed)) {
|
||||
NvU32 x, y, z;
|
||||
} Nv3dVertexAttrib3U;
|
||||
|
||||
typedef struct __attribute__((packed)) {
|
||||
Nv3dFloat x, y, z, w;
|
||||
} Nv3dVertexAttrib4F;
|
||||
|
||||
typedef struct __attribute__((packed)) {
|
||||
NvU16 x, y, z, w;
|
||||
} Nv3dVertexAttrib4US;
|
||||
|
||||
typedef struct __attribute__((packed)) {
|
||||
NvU8 x, y, z, w;
|
||||
} Nv3dVertexAttrib4UB;
|
||||
|
||||
typedef struct {
|
||||
NvU32 xyzw;
|
||||
} Nv3dVertexAttrib4UBPacked;
|
||||
|
||||
typedef struct __attribute__((packed)) {
|
||||
NvU32 xy;
|
||||
} Nv3dVertexAttrib2SPacked;
|
||||
|
||||
// List of component sizes used for the internal representation of a
|
||||
// texture header
|
||||
enum Nv3dTexHeaderComponentSizes {
|
||||
NV3D_TEXHEAD_A8B8G8R8,
|
||||
NV3D_TEXHEAD_A2B10G10R10,
|
||||
NV3D_TEXHEAD_B5G6R5,
|
||||
NV3D_TEXHEAD_A1B5G5R5,
|
||||
NV3D_TEXHEAD_R8,
|
||||
NV3D_TEXHEAD_R32,
|
||||
NV3D_TEXHEAD_R16,
|
||||
NV3D_TEXHEAD_G8R8,
|
||||
NV3D_TEXHEAD_R16G16B16A16,
|
||||
NV3D_TEXHEAD_R32G32B32A32,
|
||||
NV3D_TEXHEAD_Y8_VIDEO
|
||||
};
|
||||
|
||||
// List of component sources used for the internal representation of a
|
||||
// texture header
|
||||
enum Nv3dTexHeaderSource {
|
||||
NV3D_TEXHEAD_IN_A,
|
||||
NV3D_TEXHEAD_IN_R,
|
||||
NV3D_TEXHEAD_IN_G,
|
||||
NV3D_TEXHEAD_IN_B,
|
||||
NV3D_TEXHEAD_IN_ZERO,
|
||||
NV3D_TEXHEAD_IN_ONE_FLOAT
|
||||
};
|
||||
|
||||
// List of component data types used for the internal representation of
|
||||
// a texture header
|
||||
enum Nv3dTexHeaderDataType {
|
||||
NV3D_TEXHEAD_NUM_UNORM,
|
||||
NV3D_TEXHEAD_NUM_UINT,
|
||||
NV3D_TEXHEAD_NUM_FLOAT,
|
||||
NV3D_TEXHEAD_NUM_SNORM,
|
||||
NV3D_TEXHEAD_NUM_SINT
|
||||
};
|
||||
|
||||
enum Nv3dTexHeaderRepeatType {
|
||||
NV3D_TEXHEAD_REPEAT_TYPE_NONE,
|
||||
NV3D_TEXHEAD_REPEAT_TYPE_NORMAL,
|
||||
NV3D_TEXHEAD_REPEAT_TYPE_PAD,
|
||||
NV3D_TEXHEAD_REPEAT_TYPE_REFLECT
|
||||
};
|
||||
|
||||
enum Nv3dTextureFilterType{
|
||||
NV3D_TEXHEAD_FILTER_TYPE_NEAREST,
|
||||
NV3D_TEXHEAD_FILTER_TYPE_LINEAR,
|
||||
NV3D_TEXHEAD_FILTER_TYPE_ANISO_2X,
|
||||
NV3D_TEXHEAD_FILTER_TYPE_ANISO_4X,
|
||||
NV3D_TEXHEAD_FILTER_TYPE_ANISO_8X,
|
||||
NV3D_TEXHEAD_FILTER_TYPE_ANISO_16X
|
||||
};
|
||||
|
||||
enum Nv3dTexType {
|
||||
NV3D_TEX_TYPE_ONE_D,
|
||||
NV3D_TEX_TYPE_ONE_D_BUFFER,
|
||||
NV3D_TEX_TYPE_TWO_D_PITCH,
|
||||
NV3D_TEX_TYPE_TWO_D_BLOCKLINEAR,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
NvU32 x;
|
||||
NvU32 y;
|
||||
NvU32 z;
|
||||
} Nv3dBlockLinearLog2GobsPerBlock;
|
||||
|
||||
// Intermediate representation of a texture header
|
||||
typedef struct {
|
||||
NvBool error;
|
||||
|
||||
enum Nv3dTexHeaderComponentSizes sizes;
|
||||
|
||||
// Currently, we always use the same data type for all components.
|
||||
enum Nv3dTexHeaderDataType dataType;
|
||||
|
||||
struct {
|
||||
enum Nv3dTexHeaderSource x;
|
||||
enum Nv3dTexHeaderSource y;
|
||||
enum Nv3dTexHeaderSource z;
|
||||
enum Nv3dTexHeaderSource w;
|
||||
} source;
|
||||
|
||||
enum Nv3dTexType texType;
|
||||
|
||||
NvU64 offset;
|
||||
NvBool normalizedCoords;
|
||||
enum Nv3dTexHeaderRepeatType repeatType;
|
||||
enum Nv3dTextureFilterType filtering;
|
||||
int pitch;
|
||||
int width;
|
||||
int height;
|
||||
|
||||
Nv3dBlockLinearLog2GobsPerBlock log2GobsPerBlock;
|
||||
} Nv3dRenderTexInfo;
|
||||
|
||||
typedef NvU32 Nv3dTexSampler[8];
|
||||
typedef NvU32 Nv3dTexHeader[8];
|
||||
|
||||
// HW representation of a texture header
|
||||
typedef struct {
|
||||
Nv3dTexSampler samp;
|
||||
Nv3dTexHeader head;
|
||||
} Nv3dTexture;
|
||||
|
||||
#define NV3D_CONSTANT_BUFFER_SIZE (4096 * 4)
|
||||
|
||||
#define NV3D_TEXTURE_INDEX_INVALID (-1)
|
||||
|
||||
#define NV3D_VERTEX_ATTRIBUTE_STREAM_SIZE (64 * 1024)
|
||||
|
||||
/*
|
||||
* The constant buffer alignment constraints, specifically for the methods:
|
||||
*
|
||||
* NV*97_SET_CONSTANT_BUFFER_SELECTOR_A_SIZE
|
||||
* NV*97_SET_CONSTANT_BUFFER_SELECTOR_C_ADDRESS_LOWER
|
||||
*
|
||||
* have evolved over GPU architectures:
|
||||
*
|
||||
* kepler maxwell pascal volta turing
|
||||
* SIZE 256 16 16 16 16
|
||||
* ADDRESS 256 256 256 256 64
|
||||
*
|
||||
* But, using an alignment of 256 all the time is simpler.
|
||||
*/
|
||||
#define NV3D_MIN_CONSTBUF_ALIGNMENT 256
|
||||
|
||||
/*
|
||||
* 3D engine pitch alignment requirements for texture surface.
|
||||
*/
|
||||
#define NV3D_TEXTURE_PITCH_ALIGNMENT 256
|
||||
|
||||
typedef struct _Nv3dStreamSurfaceRec {
|
||||
NvU64 gpuAddress;
|
||||
NvU64 size;
|
||||
} Nv3dStreamSurfaceRec;
|
||||
|
||||
typedef struct _Nv3dVertexAttributeStreamRec {
|
||||
// Current GPU address within the stream.
|
||||
NvU64 current;
|
||||
// Terminating GPU address within the stream.
|
||||
NvU64 end;
|
||||
// Number of bytes per vertex.
|
||||
NvU32 stride;
|
||||
// Index of the next vertex to be launched.
|
||||
int nextLaunch;
|
||||
} Nv3dVertexAttributeStreamRec;
|
||||
|
||||
typedef struct _Nv3dHal Nv3dHal;
|
||||
|
||||
typedef struct _Nv3dDeviceCapsRec {
|
||||
NvU32 hasSetBindlessTexture :1; /* Supports SetBindlessTexture method */
|
||||
NvU32 hasProgramRegion :1;
|
||||
|
||||
NvU32 maxDim; /*
|
||||
* Maximum width or height of the
|
||||
* texture surface in pixels.
|
||||
*/
|
||||
} Nv3dDeviceCapsRec, *Nv3dDeviceCapsPtr;
|
||||
|
||||
typedef struct _Nv3dDeviceSpaVersionRec {
|
||||
NvU16 major;
|
||||
NvU16 minor;
|
||||
} Nv3dDeviceSpaVersionRec;
|
||||
|
||||
/*
|
||||
* Enum for each compiled shader version.
|
||||
*/
|
||||
enum Nv3dShaderArch {
|
||||
NV3D_SHADER_ARCH_MAXWELL,
|
||||
NV3D_SHADER_ARCH_PASCAL,
|
||||
NV3D_SHADER_ARCH_VOLTA,
|
||||
NV3D_SHADER_ARCH_TURING,
|
||||
NV3D_SHADER_ARCH_AMPERE,
|
||||
NV3D_SHADER_ARCH_HOPPER,
|
||||
NV3D_SHADER_ARCH_COUNT,
|
||||
};
|
||||
|
||||
typedef struct _Nv3dDeviceRec {
|
||||
|
||||
NvPushDevicePtr pPushDevice;
|
||||
Nv3dDeviceCapsRec caps;
|
||||
NvU32 classNumber;
|
||||
enum Nv3dShaderArch shaderArch;
|
||||
|
||||
Nv3dDeviceSpaVersionRec spaVersion;
|
||||
|
||||
NvU32 maxThreadsPerWarp;
|
||||
NvU32 maxWarps;
|
||||
|
||||
const Nv3dHal *hal;
|
||||
|
||||
} Nv3dDeviceRec, *Nv3dDevicePtr;
|
||||
|
||||
typedef struct _Nv3dChannelProgramsRec {
|
||||
/*
|
||||
* An array of program descriptors, and the number of elements
|
||||
* in the array.
|
||||
*/
|
||||
size_t num;
|
||||
const Nv3dProgramInfo *info;
|
||||
|
||||
size_t maxLocalBytes;
|
||||
size_t maxStackBytes;
|
||||
|
||||
/*
|
||||
* The shader program code segment.
|
||||
*
|
||||
* The size is in bytes.
|
||||
*/
|
||||
struct {
|
||||
size_t decompressedSize;
|
||||
const unsigned char *compressedStart;
|
||||
const unsigned char *compressedEnd;
|
||||
} code;
|
||||
|
||||
/*
|
||||
* The constant buffers generated by the compiler for use with the above
|
||||
* code segment.
|
||||
*
|
||||
* 'size' is the total size of the surface to allocate, in bytes.
|
||||
* 'sizeAlign' is the minimum alignment required by the hardware for each
|
||||
* particular constant buffer. (Although we may only have
|
||||
* N bytes of data to upload for each constant buffer, that
|
||||
* size should be padded out with zeroes to a multiple of this
|
||||
* value.)
|
||||
* 'count' is the number of entries in the 'info' array.
|
||||
* 'info' is a pointer to an array of Nv3dShaderConstBufInfo entries.
|
||||
*/
|
||||
struct {
|
||||
size_t size;
|
||||
NvU32 sizeAlign;
|
||||
NvU32 count;
|
||||
const Nv3dShaderConstBufInfo *info;
|
||||
} constants;
|
||||
} Nv3dChannelProgramsRec;
|
||||
|
||||
typedef struct _Nv3dChannelRec {
|
||||
|
||||
Nv3dDevicePtr p3dDevice;
|
||||
NvPushChannelPtr pPushChannel;
|
||||
|
||||
NvU32 handle[NV_MAX_SUBDEVICES];
|
||||
NvU16 numTextures;
|
||||
NvU16 numTextureBindings;
|
||||
|
||||
Nv3dVertexAttributeStreamRec
|
||||
vertexStreams[NV3D_VERTEX_ATTRIBUTE_STREAM_COUNT];
|
||||
|
||||
/*
|
||||
* Begin / end state. ~0 if outside begin/end, or NV9097_BEGIN_OP_* if
|
||||
* inside.
|
||||
*/
|
||||
NvU32 currentPrimitiveMode;
|
||||
|
||||
Nv3dChannelProgramsRec programs;
|
||||
int currentProgramIndex[NV3D_HW_SHADER_STAGE_COUNT];
|
||||
NvU64 programLocalMemorySize;
|
||||
|
||||
NvBool hasFrameBoundaries;
|
||||
|
||||
struct {
|
||||
NvU32 handle[NV_MAX_SUBDEVICES];
|
||||
NvU64 gpuAddress;
|
||||
NvU64 programOffset;
|
||||
NvU64 programConstantsOffset;
|
||||
NvU64 programLocalMemoryOffset;
|
||||
NvU64 textureOffset;
|
||||
NvU64 bindlessTextureConstantBufferOffset;
|
||||
NvU64 constantBufferOffset;
|
||||
NvU64 vertexStreamOffset[NV3D_VERTEX_ATTRIBUTE_STREAM_COUNT];
|
||||
NvU64 totalSize;
|
||||
} surface;
|
||||
|
||||
} Nv3dChannelRec, *Nv3dChannelPtr;
|
||||
|
||||
typedef struct {
|
||||
Nv3dFloat red;
|
||||
Nv3dFloat green;
|
||||
Nv3dFloat blue;
|
||||
Nv3dFloat alpha;
|
||||
} Nv3dColor;
|
||||
|
||||
typedef struct {
|
||||
NvU32 blendFactorSrc; /* NV9097_SET_BLEND_COLOR/ALPHA_SOURCE_COEFF_ */
|
||||
NvU32 blendFactorDst; /* NV9097_SET_BLEND_COLOR/ALPHA_DEST_COEFF_ */
|
||||
NvU32 blendEquation; /* NV9097_SET_BLEND_COLOR/ALPHA_OP_ */
|
||||
} Nv3dBlendState;
|
||||
#endif /* __NVIDIA_3D_TYPES_H__ */
|
||||
104
src/common/unix/nvidia-3d/interface/nvidia-3d-utils.h
Normal file
104
src/common/unix/nvidia-3d/interface/nvidia-3d-utils.h
Normal file
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_3D_UTILS_H__
|
||||
#define __NVIDIA_3D_UTILS_H__
|
||||
|
||||
#include "nvidia-3d.h"
|
||||
|
||||
#include <class/cl9097.h>
|
||||
#include <class/cla06fsubch.h>
|
||||
|
||||
static inline void nv3dSetSurfaceClip(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvS16 x,
|
||||
NvS16 y,
|
||||
NvU16 w,
|
||||
NvU16 h)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_SURFACE_CLIP_HORIZONTAL, 2);
|
||||
nvPushSetMethodData(p,
|
||||
NV3D_V(9097, SET_SURFACE_CLIP_HORIZONTAL, X, x) |
|
||||
NV3D_V(9097, SET_SURFACE_CLIP_HORIZONTAL, WIDTH, w));
|
||||
nvPushSetMethodData(p,
|
||||
NV3D_V(9097, SET_SURFACE_CLIP_VERTICAL, Y, y) |
|
||||
NV3D_V(9097, SET_SURFACE_CLIP_VERTICAL, HEIGHT, h));
|
||||
}
|
||||
|
||||
static inline void nv3dClearSurface(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
const NvU32 clearColor[4],
|
||||
NvU16 x,
|
||||
NvU16 y,
|
||||
NvU16 w,
|
||||
NvU16 h)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_COLOR_CLEAR_VALUE(0), 4);
|
||||
nvPushSetMethodData(p, clearColor[0]);
|
||||
nvPushSetMethodData(p, clearColor[1]);
|
||||
nvPushSetMethodData(p, clearColor[2]);
|
||||
nvPushSetMethodData(p, clearColor[3]);
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_CLEAR_RECT_HORIZONTAL, 2);
|
||||
nvPushSetMethodData(p,
|
||||
NV3D_V(9097, SET_CLEAR_RECT_HORIZONTAL, XMIN, x) |
|
||||
NV3D_V(9097, SET_CLEAR_RECT_HORIZONTAL, XMAX, x + w));
|
||||
nvPushSetMethodData(p,
|
||||
NV3D_V(9097, SET_CLEAR_RECT_VERTICAL, YMIN, y) |
|
||||
NV3D_V(9097, SET_CLEAR_RECT_VERTICAL, YMAX, y + h));
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_CLEAR_SURFACE,
|
||||
NV3D_C(9097, CLEAR_SURFACE, R_ENABLE, TRUE) |
|
||||
NV3D_C(9097, CLEAR_SURFACE, G_ENABLE, TRUE) |
|
||||
NV3D_C(9097, CLEAR_SURFACE, B_ENABLE, TRUE) |
|
||||
NV3D_C(9097, CLEAR_SURFACE, A_ENABLE, TRUE));
|
||||
}
|
||||
|
||||
static inline void nv3dVasBegin(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvU32 mode)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
nvAssert(p3dChannel->currentPrimitiveMode == ~0);
|
||||
|
||||
p3dChannel->currentPrimitiveMode = mode;
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_BEGIN, mode);
|
||||
}
|
||||
|
||||
static inline void nv3dVasEnd(
|
||||
Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
nvAssert(p3dChannel->currentPrimitiveMode != ~0);
|
||||
|
||||
p3dChannel->currentPrimitiveMode = ~0;
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_END, 0);
|
||||
}
|
||||
|
||||
#endif /* __NVIDIA_3D_UTILS_H__ */
|
||||
296
src/common/unix/nvidia-3d/interface/nvidia-3d.h
Normal file
296
src/common/unix/nvidia-3d/interface/nvidia-3d.h
Normal file
@@ -0,0 +1,296 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The nvidia-3d library provides utility code for programming a 3D
|
||||
* object.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_3D_H__
|
||||
#define __NVIDIA_3D_H__
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nvmisc.h" /* DRF_DEF, et al */
|
||||
#include "nvlimits.h" /* NV_MAX_SUBDEVICES */
|
||||
|
||||
#include "nvidia-push-types.h"
|
||||
#include "nvidia-3d-types.h"
|
||||
|
||||
#define NV3D_C(d, r, f, c) DRF_DEF( d, _ ## r, _ ## f, _ ## c)
|
||||
#define NV3D_V(d, r, f, v) DRF_NUM( d, _ ## r, _ ## f, (NvU32)(v) )
|
||||
|
||||
/*
|
||||
* Allocate and free an Nv3dDeviceRec
|
||||
*/
|
||||
typedef struct _Nv3dAllocDeviceParams {
|
||||
NvPushDevicePtr pPushDevice;
|
||||
} Nv3dAllocDeviceParams;
|
||||
|
||||
NvBool nv3dAllocDevice(
|
||||
const Nv3dAllocDeviceParams *pParams,
|
||||
Nv3dDevicePtr p3dDevice);
|
||||
|
||||
void nv3dFreeDevice(
|
||||
Nv3dDevicePtr p3dDevice);
|
||||
|
||||
/*
|
||||
* Allocate and free an Nv3dChannelRec data structure.
|
||||
*
|
||||
* Note that all pointers provided in this parameter structure are
|
||||
* cached in the Nv3dChannelRec. They must remain valid from
|
||||
* nv3dAllocChannelState() until the corresponding
|
||||
* nv3dFreeChannelState() call.
|
||||
*/
|
||||
typedef struct _Nv3dAllocChannelStateParams {
|
||||
/*
|
||||
* The Nv3dDeviceRec to use with this channel.
|
||||
*/
|
||||
Nv3dDevicePtr p3dDevice;
|
||||
|
||||
/*
|
||||
* The number of texture headers/samplers nvidia-3d should
|
||||
* allocate.
|
||||
*/
|
||||
NvU16 numTextures;
|
||||
|
||||
/*
|
||||
* The number of general purpose constant buffers nvidia-3d should
|
||||
* allocate.
|
||||
*/
|
||||
NvU16 numConstantBuffers;
|
||||
|
||||
/*
|
||||
* The number of texture bindings.
|
||||
*/
|
||||
NvU16 numTextureBindings;
|
||||
|
||||
/*
|
||||
* Whether the host driver renders in terms of frames, or, like the X
|
||||
* driver, renders directly to the front buffer. On >= Pascal, the pipe
|
||||
* needs to be explicitly flushed at the end of a frame.
|
||||
*/
|
||||
NvBool hasFrameBoundaries;
|
||||
|
||||
} Nv3dAllocChannelStateParams;
|
||||
|
||||
NvBool nv3dAllocChannelState(
|
||||
const Nv3dAllocChannelStateParams *pParams,
|
||||
Nv3dChannelPtr p3dChannel);
|
||||
|
||||
void nv3dFreeChannelState(
|
||||
Nv3dChannelPtr p3dChannel);
|
||||
|
||||
|
||||
/*
|
||||
* Allocate and free the RM object for an Nv3dChannelRec.
|
||||
*/
|
||||
typedef struct _Nv3dAllocChannelObjectParams {
|
||||
NvPushChannelPtr pPushChannel;
|
||||
NvU32 handle[NV_MAX_SUBDEVICES];
|
||||
} Nv3dAllocChannelObjectParams;
|
||||
|
||||
NvBool nv3dAllocChannelObject(
|
||||
const Nv3dAllocChannelObjectParams *pParams,
|
||||
Nv3dChannelPtr p3dChannel);
|
||||
|
||||
void nv3dFreeChannelObject(
|
||||
Nv3dChannelPtr p3dChannel);
|
||||
|
||||
|
||||
/*
|
||||
* Allocate and free the surface needed by the Nv3dChannelRec.
|
||||
*/
|
||||
NvBool nv3dAllocChannelSurface(Nv3dChannelPtr p3dChannel);
|
||||
|
||||
void nv3dFreeChannelSurface(Nv3dChannelPtr p3dChannel);
|
||||
|
||||
|
||||
/*
|
||||
* Once the Nv3dChannelRec is allocated, and the objects and surface
|
||||
* for it are allocated, nv3dInitChannel() is used to initialize the
|
||||
* graphics engine and make it ready to use.
|
||||
*/
|
||||
NvBool nv3dInitChannel(Nv3dChannelPtr p3dChannel);
|
||||
|
||||
|
||||
/*
|
||||
* Return the offset or GPU address of the specified item within the
|
||||
* Nv3dChannelRec's surface.
|
||||
*/
|
||||
|
||||
static inline NvU64 nv3dGetTextureOffset(
|
||||
const Nv3dChannelRec *p3dChannel,
|
||||
NvU32 textureIndex)
|
||||
{
|
||||
const NvU64 offset = p3dChannel->surface.textureOffset;
|
||||
|
||||
return offset + (sizeof(Nv3dTexture) * textureIndex);
|
||||
}
|
||||
|
||||
static inline NvU64 nv3dGetTextureGpuAddress(
|
||||
const Nv3dChannelRec *p3dChannel,
|
||||
NvU32 textureIndex)
|
||||
{
|
||||
return p3dChannel->surface.gpuAddress +
|
||||
nv3dGetTextureOffset(p3dChannel, textureIndex);
|
||||
}
|
||||
|
||||
static inline NvU64 nv3dGetConstantBufferOffset(
|
||||
const Nv3dChannelRec *p3dChannel,
|
||||
NvU32 constantBufferIndex)
|
||||
{
|
||||
const NvU64 offset = p3dChannel->surface.constantBufferOffset;
|
||||
|
||||
return offset + (NV3D_CONSTANT_BUFFER_SIZE * constantBufferIndex);
|
||||
}
|
||||
|
||||
static inline NvU64 nv3dGetConstantBufferGpuAddress(
|
||||
const Nv3dChannelRec *p3dChannel,
|
||||
NvU32 constantBufferIndex)
|
||||
{
|
||||
return p3dChannel->surface.gpuAddress +
|
||||
nv3dGetConstantBufferOffset(p3dChannel, constantBufferIndex);
|
||||
}
|
||||
|
||||
static inline NvU64 nv3dGetProgramOffset(
|
||||
const Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
return p3dChannel->surface.programOffset;
|
||||
}
|
||||
|
||||
static inline NvU64 nv3dGetProgramGpuAddress(
|
||||
const Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
return p3dChannel->surface.gpuAddress + nv3dGetProgramOffset(p3dChannel);
|
||||
}
|
||||
|
||||
static inline NvU64 nv3dGetProgramConstantsOffset(
|
||||
const Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
return p3dChannel->surface.programConstantsOffset;
|
||||
}
|
||||
|
||||
static inline NvU64 nv3dGetProgramConstantsGpuAddress(
|
||||
const Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
return p3dChannel->surface.gpuAddress +
|
||||
nv3dGetProgramConstantsOffset(p3dChannel);
|
||||
}
|
||||
|
||||
static inline NvU64 nv3dGetProgramLocalMemoryOffset(
|
||||
const Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
return p3dChannel->surface.programLocalMemoryOffset;
|
||||
}
|
||||
|
||||
static inline NvU64 nv3dGetProgramLocalMemoryGpuAddress(
|
||||
const Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
return p3dChannel->surface.gpuAddress +
|
||||
nv3dGetProgramLocalMemoryOffset(p3dChannel);
|
||||
}
|
||||
|
||||
static inline NvU64 nv3dGetBindlessTextureConstantBufferOffset(
|
||||
const Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
return p3dChannel->surface.bindlessTextureConstantBufferOffset;
|
||||
}
|
||||
|
||||
static inline NvU64 nv3dGetBindlessTextureConstantBufferGpuAddress(
|
||||
const Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
return p3dChannel->surface.gpuAddress +
|
||||
nv3dGetBindlessTextureConstantBufferOffset(p3dChannel);
|
||||
}
|
||||
|
||||
static inline NvU64 nv3dGetVertexAttributestreamOffset(
|
||||
const Nv3dChannelRec *p3dChannel,
|
||||
enum Nv3dVertexAttributeStreamType stream)
|
||||
{
|
||||
return p3dChannel->surface.vertexStreamOffset[stream];
|
||||
}
|
||||
|
||||
static inline NvU64 nv3dGetVertexAttributestreamGpuAddress(
|
||||
const Nv3dChannelRec *p3dChannel,
|
||||
enum Nv3dVertexAttributeStreamType stream)
|
||||
{
|
||||
return p3dChannel->surface.gpuAddress +
|
||||
nv3dGetVertexAttributestreamOffset(p3dChannel, stream);
|
||||
}
|
||||
|
||||
void nv3dUploadDataInline(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvU64 gpuBaseAddress,
|
||||
size_t offset,
|
||||
const void *data,
|
||||
size_t bytes);
|
||||
|
||||
void nv3dClearProgramCache(
|
||||
Nv3dChannelRec *p3dChannel);
|
||||
|
||||
void nv3dLoadProgram(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
int programIndex);
|
||||
|
||||
void nv3dLoadTextures(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
int firstTextureIndex,
|
||||
const Nv3dRenderTexInfo *texInfo,
|
||||
int numTexures);
|
||||
|
||||
void nv3dBindTextures(
|
||||
Nv3dChannelPtr p3dChannel,
|
||||
int programIndex,
|
||||
const int *textureBindingIndices);
|
||||
|
||||
void nv3dSetBlendColorCoefficients(
|
||||
Nv3dChannelPtr p3dChannel,
|
||||
enum Nv3dBlendOperation op,
|
||||
NvBool forceNoDstAlphaBits,
|
||||
NvBool dualSourceBlending);
|
||||
|
||||
void nv3dSetBlend(
|
||||
Nv3dChannelPtr p3dChannel,
|
||||
const Nv3dBlendState *blendStateColor,
|
||||
const Nv3dBlendState *blendStateAlpha,
|
||||
const Nv3dColor *blendColor);
|
||||
|
||||
int nv3dVasSetup(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
const Nv3dVertexAttributeInfoRec *attribs,
|
||||
const Nv3dStreamSurfaceRec *pSurf);
|
||||
|
||||
void nv3dVasSelectCbForVertexData(
|
||||
Nv3dChannelRec *p3dChannel);
|
||||
|
||||
void nv3dVasDrawInlineVerts(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
const void *data,
|
||||
int numVerts);
|
||||
|
||||
NvBool nv3dVasMakeRoom(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvU32 pendingVerts,
|
||||
NvU32 moreVerts);
|
||||
|
||||
#endif /* __NVIDIA_3D_H__ */
|
||||
162
src/common/unix/nvidia-3d/src/nvidia-3d-core.c
Normal file
162
src/common/unix/nvidia-3d/src/nvidia-3d-core.c
Normal file
@@ -0,0 +1,162 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvidia-3d.h"
|
||||
#include "nvidia-3d-types-priv.h"
|
||||
#include "nvos.h"
|
||||
#include "nvidia-3d-fermi.h"
|
||||
#include "nvidia-3d-kepler.h"
|
||||
#include "nvidia-push-utils.h"
|
||||
|
||||
NvBool nv3dAllocChannelObject(
|
||||
const Nv3dAllocChannelObjectParams *pParams,
|
||||
Nv3dChannelPtr p3dChannel)
|
||||
{
|
||||
NvPushChannelPtr pPushChannel = pParams->pPushChannel;
|
||||
NvPushDevicePtr pPushDevice = pPushChannel->pDevice;
|
||||
const NvU32 classNumber = p3dChannel->p3dDevice->classNumber;
|
||||
int sd;
|
||||
|
||||
/*
|
||||
* nv3dAllocChannel() should have been called to assign p3dDevice.
|
||||
*/
|
||||
nvAssert(p3dChannel->p3dDevice != NULL);
|
||||
nvAssert(p3dChannel->p3dDevice->pPushDevice ==
|
||||
pParams->pPushChannel->pDevice);
|
||||
|
||||
for (sd = 0;
|
||||
sd < ARRAY_LEN(pPushChannel->channelHandle) &&
|
||||
pPushChannel->channelHandle[sd] != 0;
|
||||
sd++) {
|
||||
|
||||
if (nvPushIsAModel(pPushDevice)) {
|
||||
nvAssert(sd == 0);
|
||||
} else {
|
||||
const NvPushImports *pImports = pPushDevice->pImports;
|
||||
nvAssert(pParams->handle[sd] != 0);
|
||||
NvU32 ret = pImports->rmApiAlloc(pPushDevice,
|
||||
pPushChannel->channelHandle[sd],
|
||||
pParams->handle[sd],
|
||||
classNumber,
|
||||
NULL);
|
||||
if (ret != NVOS_STATUS_SUCCESS) {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
p3dChannel->handle[sd] = pParams->handle[sd];
|
||||
}
|
||||
|
||||
p3dChannel->pPushChannel = pPushChannel;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
void nv3dFreeChannelObject(Nv3dChannelPtr p3dChannel)
|
||||
{
|
||||
int sd;
|
||||
|
||||
p3dChannel->pPushChannel = NULL;
|
||||
|
||||
// No need to actually free the object here. It gets destroyed during
|
||||
// channel teardown.
|
||||
for (sd = 0; sd < ARRAY_LEN(p3dChannel->handle); sd++) {
|
||||
p3dChannel->handle[sd] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void nv3dUploadDataInline(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvU64 gpuBaseAddress,
|
||||
size_t offset,
|
||||
const void *data,
|
||||
size_t bytes)
|
||||
{
|
||||
const Nv3dHal *pHal = p3dChannel->p3dDevice->hal;
|
||||
|
||||
pHal->uploadDataInline(p3dChannel, gpuBaseAddress, offset, data, bytes);
|
||||
}
|
||||
|
||||
void nv3dClearProgramCache(Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
Nv3dShaderStage stage;
|
||||
|
||||
for (stage = 0;
|
||||
stage < ARRAY_LEN(p3dChannel->currentProgramIndex);
|
||||
stage++) {
|
||||
p3dChannel->currentProgramIndex[stage] = -1;
|
||||
}
|
||||
}
|
||||
|
||||
void nv3dLoadTextures(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
int firstTex,
|
||||
const Nv3dRenderTexInfo *texInfo,
|
||||
int numTex)
|
||||
{
|
||||
/* Limit number of texture/samplers on the stack to 4 (256 bytes) */
|
||||
#define MAX_TEX_CHUNK 4
|
||||
Nv3dTexture textures[MAX_TEX_CHUNK];
|
||||
const Nv3dHal *pHal = p3dChannel->p3dDevice->hal;
|
||||
const NvU64 gpuBaseAddress = nv3dGetTextureGpuAddress(p3dChannel, 0);
|
||||
|
||||
nvAssert(numTex >= 1);
|
||||
|
||||
// Invalidate the texture/sampler caches. This will cause a wait for idle
|
||||
// if there's rendering still in progress. This is necessary in case the
|
||||
// texture parameters we're about to overwrite are in use.
|
||||
_nv3dInvalidateTexturesFermi(p3dChannel);
|
||||
|
||||
while (numTex) {
|
||||
const NvU32 chunkNumTex = NV_MIN(numTex, MAX_TEX_CHUNK);
|
||||
const size_t startOffset = sizeof(Nv3dTexture) * firstTex;
|
||||
const size_t bytes = sizeof(Nv3dTexture) * chunkNumTex;
|
||||
int i;
|
||||
|
||||
NVMISC_MEMSET(textures, 0, sizeof(textures));
|
||||
|
||||
nvAssert(firstTex + numTex <= p3dChannel->numTextures);
|
||||
|
||||
// Write texture header to HW format
|
||||
for (i = 0; i < chunkNumTex; i++) {
|
||||
pHal->assignNv3dTexture(texInfo[i], &textures[i]);
|
||||
}
|
||||
|
||||
nv3dUploadDataInline(p3dChannel, gpuBaseAddress, startOffset,
|
||||
textures, bytes);
|
||||
|
||||
numTex -= chunkNumTex;
|
||||
firstTex += chunkNumTex;
|
||||
texInfo += chunkNumTex;
|
||||
}
|
||||
}
|
||||
|
||||
void nv3dBindTextures(
|
||||
Nv3dChannelPtr p3dChannel,
|
||||
int programIndex,
|
||||
const int *textureBindingIndices)
|
||||
{
|
||||
nvAssert(programIndex < p3dChannel->programs.num);
|
||||
|
||||
_nv3dBindTexturesKepler(p3dChannel, programIndex, textureBindingIndices);
|
||||
}
|
||||
557
src/common/unix/nvidia-3d/src/nvidia-3d-fermi.c
Normal file
557
src/common/unix/nvidia-3d/src/nvidia-3d-fermi.c
Normal file
@@ -0,0 +1,557 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvidia-3d-types-priv.h"
|
||||
#include "nvidia-3d-fermi.h"
|
||||
#include "nvidia-3d.h"
|
||||
#include "nvidia-3d-imports.h"
|
||||
#include "nvidia-3d-constant-buffers.h"
|
||||
#include "nvidia-3d-shader-constants.h"
|
||||
#include "nvidia-3d-vertex-arrays.h"
|
||||
#include "nvidia-push-utils.h" /* nvPushSetObject */
|
||||
|
||||
#include <class/cl9097.h>
|
||||
#include <class/cla06fsubch.h>
|
||||
|
||||
#include <xz.h>
|
||||
|
||||
#if NV_PUSH_ALLOW_FLOAT
|
||||
#define NV3D_FLOAT_ONE (1.00f)
|
||||
#else
|
||||
#define NV3D_FLOAT_ONE 0x3F800000 /* 1.00f */
|
||||
#endif
|
||||
|
||||
static void *DecompressUsingXz(
|
||||
const Nv3dChannelRec *p3dChannel,
|
||||
const void *compressedData,
|
||||
size_t compressedSize,
|
||||
size_t decompressedSize)
|
||||
{
|
||||
NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
|
||||
const NvPushImports *pImports = pPushDevice->pImports;
|
||||
void *decompressedData = nv3dImportAlloc(decompressedSize);
|
||||
struct xz_dec *xzState;
|
||||
enum xz_ret ret;
|
||||
|
||||
struct xz_buf xzBuf = {
|
||||
.in = compressedData,
|
||||
.in_pos = 0,
|
||||
.in_size = compressedSize,
|
||||
.out = decompressedData,
|
||||
.out_pos = 0,
|
||||
.out_size = decompressedSize,
|
||||
};
|
||||
|
||||
if (decompressedData == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
xz_crc32_init();
|
||||
|
||||
xzState = xz_dec_init(XZ_SINGLE, 0);
|
||||
|
||||
if (xzState == NULL) {
|
||||
pImports->logError(pPushDevice,
|
||||
"Failed to initialize xz decompression.");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
ret = xz_dec_run(xzState, &xzBuf);
|
||||
|
||||
xz_dec_end(xzState);
|
||||
|
||||
if (ret != XZ_STREAM_END) {
|
||||
pImports->logError(pPushDevice, "Failed to decompress xz data.");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
return decompressedData;
|
||||
|
||||
fail:
|
||||
nv3dImportFree(decompressedData);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *DecompressPrograms(const Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
const Nv3dChannelProgramsRec *pPrograms = &p3dChannel->programs;
|
||||
const size_t compressedSize =
|
||||
pPrograms->code.compressedEnd - pPrograms->code.compressedStart;
|
||||
|
||||
nvAssert(pPrograms->code.compressedEnd > pPrograms->code.compressedStart);
|
||||
|
||||
return DecompressUsingXz(p3dChannel,
|
||||
pPrograms->code.compressedStart,
|
||||
compressedSize,
|
||||
pPrograms->code.decompressedSize);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function attempts to upload the precompiled shaders to the GPU through
|
||||
* a temporary CPU mapping.
|
||||
* Failure of this function is not fatal -- we can fall back to uploading
|
||||
* through the pushbuffer.
|
||||
*/
|
||||
static NvBool UploadPrograms(Nv3dChannelPtr p3dChannel, const void *programCode)
|
||||
{
|
||||
NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
|
||||
const NvPushImports *pImports = pPushDevice->pImports;
|
||||
const size_t size = p3dChannel->programs.code.decompressedSize;
|
||||
NvU32 sd;
|
||||
|
||||
for (sd = 0; sd < pPushDevice->numSubDevices; sd++) {
|
||||
NvU32 status;
|
||||
void *ptr;
|
||||
const NvU32 hMemory = pPushDevice->clientSli ?
|
||||
p3dChannel->surface.handle[sd] :
|
||||
p3dChannel->surface.handle[0];
|
||||
|
||||
status = pImports->rmApiMapMemory(pPushDevice,
|
||||
pPushDevice->subDevice[sd].handle,
|
||||
hMemory,
|
||||
p3dChannel->surface.programOffset,
|
||||
size,
|
||||
&ptr,
|
||||
0);
|
||||
if (status != NVOS_STATUS_SUCCESS) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
nvAssert((size % 4) == 0);
|
||||
nvDmaMoveDWORDS(ptr, programCode, size / 4);
|
||||
|
||||
status = pImports->rmApiUnmapMemory(pPushDevice,
|
||||
pPushDevice->subDevice[sd].handle,
|
||||
hMemory,
|
||||
ptr,
|
||||
0);
|
||||
nvAssert(status == NVOS_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
NvBool nv3dInitChannel(Nv3dChannelPtr p3dChannel)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
const Nv3dDeviceCapsRec *pCaps = &p3dChannel->p3dDevice->caps;
|
||||
const Nv3dHal *pHal = p3dChannel->p3dDevice->hal;
|
||||
const NvU64 tex0GpuAddress = nv3dGetTextureGpuAddress(p3dChannel, 0);
|
||||
NvU64 gpuAddress;
|
||||
NvU32 i;
|
||||
void *programCode = DecompressPrograms(p3dChannel);
|
||||
|
||||
if (programCode == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
* nv3dAllocChannel() should have been called to assign p3dDevice.
|
||||
*/
|
||||
nvAssert(p3dChannel->p3dDevice != NULL);
|
||||
|
||||
/*
|
||||
* nv3dAllocChannelObject() should have been called to assign
|
||||
* pPushChannel.
|
||||
*/
|
||||
nvAssert(p3dChannel->pPushChannel != NULL);
|
||||
|
||||
/*
|
||||
* nv3dAllocChannelSurface() should have been called to allocate
|
||||
* the surface.
|
||||
*/
|
||||
nvAssert(p3dChannel->surface.handle[0] != 0);
|
||||
|
||||
nv3dClearProgramCache(p3dChannel);
|
||||
|
||||
p3dChannel->currentPrimitiveMode = ~0;
|
||||
|
||||
nvPushSetObject(p, NVA06F_SUBCHANNEL_3D, p3dChannel->handle);
|
||||
|
||||
// Ct[0]'s format defaults to A8R8G8B8, rather than DISABLED.
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_COLOR_TARGET_FORMAT(0),
|
||||
NV3D_C(9097, SET_COLOR_TARGET_FORMAT, V, DISABLED));
|
||||
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_ZT_SELECT,
|
||||
NV3D_V(9097, SET_ZT_SELECT, TARGET_COUNT, 0));
|
||||
|
||||
// Set a substitute stream address. This is used when the Vertex Attribute
|
||||
// Fetch unit tries to fetch outside the bounds of an enabled stream, which
|
||||
// should never happen. However, AModel always fetches this value
|
||||
// regardless of whether it actually needs it, so it causes MMU errors if
|
||||
// it's not set.
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_VERTEX_STREAM_SUBSTITUTE_A, 2);
|
||||
nvPushSetMethodDataU64(p, p3dChannel->surface.gpuAddress);
|
||||
|
||||
if (p3dChannel->programLocalMemorySize) {
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_SHADER_LOCAL_MEMORY_A, 4);
|
||||
// ADDRESS_{UPPER,LOWER}
|
||||
nvPushSetMethodDataU64(p,
|
||||
nv3dGetProgramLocalMemoryGpuAddress(p3dChannel));
|
||||
// SIZE_{UPPER,LOWER}
|
||||
nvPushSetMethodDataU64(p, p3dChannel->programLocalMemorySize);
|
||||
}
|
||||
|
||||
// Point rasterization.
|
||||
nvPushImmed(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_POINT_CENTER_MODE, OGL);
|
||||
|
||||
// SPA Control.
|
||||
nvPushImmed(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_SAMPLER_BINDING, VIA_HEADER_BINDING);
|
||||
|
||||
// Viewport parameters.
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_VIEWPORT_SCALE_OFFSET,
|
||||
NV3D_C(9097, SET_VIEWPORT_SCALE_OFFSET, ENABLE, FALSE));
|
||||
|
||||
// Viewport clip. There are 16 viewports
|
||||
for (i = 0; i < 16; i++) {
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_VIEWPORT_CLIP_HORIZONTAL(i), 2);
|
||||
nvPushSetMethodData(p, pCaps->maxDim << 16);
|
||||
nvPushSetMethodData(p, pCaps->maxDim << 16);
|
||||
}
|
||||
|
||||
nvPushImmed(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_PROVOKING_VERTEX, LAST);
|
||||
|
||||
// Use one rop state for all targets
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_SINGLE_ROP_CONTROL,
|
||||
NV3D_C(9097, SET_SINGLE_ROP_CONTROL, ENABLE, TRUE));
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_SINGLE_CT_WRITE_CONTROL,
|
||||
NV3D_C(9097, SET_SINGLE_CT_WRITE_CONTROL, ENABLE, TRUE));
|
||||
|
||||
// Set up blending: enable Ct[0]. It's disabled by default for the rest.
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND(0),
|
||||
NV3D_C(9097, SET_BLEND, ENABLE, TRUE));
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND_CONST_ALPHA, 1);
|
||||
nv3dPushFloat(p, NV3D_FLOAT_ONE);
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_BLEND_SEPARATE_FOR_ALPHA, 2);
|
||||
nvPushSetMethodData(p,
|
||||
NV3D_C(9097, SET_BLEND_SEPARATE_FOR_ALPHA, ENABLE, FALSE));
|
||||
nvPushSetMethodData(p,
|
||||
NV3D_C(9097, SET_BLEND_COLOR_OP, V, OGL_FUNC_ADD));
|
||||
|
||||
// Upload the pixel shaders. First, attempt to upload through a CPU
|
||||
// mapping (which is generally faster); if that fails (e.g., because there
|
||||
// is no space in BAR1 for the mapping), then fall back to uploading inline
|
||||
// through the pushbuffer.
|
||||
if (!UploadPrograms(p3dChannel, programCode)) {
|
||||
pHal->uploadDataInline(p3dChannel,
|
||||
nv3dGetProgramGpuAddress(p3dChannel),
|
||||
0,
|
||||
programCode,
|
||||
p3dChannel->programs.code.decompressedSize);
|
||||
}
|
||||
|
||||
nv3dImportFree(programCode);
|
||||
programCode = NULL;
|
||||
|
||||
for (i = 0; i < p3dChannel->programs.constants.count; i++) {
|
||||
const Nv3dShaderConstBufInfo *pInfo =
|
||||
&p3dChannel->programs.constants.info[i];
|
||||
|
||||
pHal->uploadDataInline(p3dChannel,
|
||||
nv3dGetProgramConstantsGpuAddress(p3dChannel),
|
||||
pInfo->offset,
|
||||
pInfo->data,
|
||||
pInfo->size);
|
||||
}
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_INVALIDATE_SHADER_CACHES, 1);
|
||||
nvPushSetMethodData(p,
|
||||
DRF_DEF(9097, _INVALIDATE_SHADER_CACHES, _INSTRUCTION, _TRUE) |
|
||||
DRF_DEF(9097, _INVALIDATE_SHADER_CACHES, _CONSTANT, _TRUE));
|
||||
|
||||
if (pCaps->hasProgramRegion) {
|
||||
gpuAddress = nv3dGetProgramGpuAddress(p3dChannel);
|
||||
|
||||
nvAssert((gpuAddress & 255) == 0);
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_PROGRAM_REGION_A, 2);
|
||||
nvPushSetMethodDataU64(p, gpuAddress);
|
||||
}
|
||||
|
||||
// Initialize the texture header and sampler area.
|
||||
//
|
||||
// To update these things, we upload data through the pushbuffer. The
|
||||
// upload has an alignment twice the size of a texture header/sampler, so we
|
||||
// interleave the two. Texture samplers come first. Thus, "texture sampler
|
||||
// 2i+1" is actually texture header 2i. This allows us to use a single
|
||||
// upload to update a single texture sampler/header pair if we so desire.
|
||||
gpuAddress = tex0GpuAddress + offsetof(Nv3dTexture, samp);
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_TEX_SAMPLER_POOL_A, 3);
|
||||
nvPushSetMethodDataU64(p, gpuAddress);
|
||||
nvPushSetMethodData(p, 0); // Max index. 0 because we use VIA_HEADER mode.
|
||||
|
||||
gpuAddress = tex0GpuAddress + offsetof(Nv3dTexture, head);
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_TEX_HEADER_POOL_A, 3);
|
||||
nvPushSetMethodDataU64(p, gpuAddress);
|
||||
nvPushSetMethodData(p, 2 * (NV_MAX(p3dChannel->numTextures, 1) - 1)); // Max index
|
||||
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_WINDOW_ORIGIN,
|
||||
NV3D_C(9097, SET_WINDOW_ORIGIN, MODE, UPPER_LEFT) |
|
||||
NV3D_C(9097, SET_WINDOW_ORIGIN, FLIP_Y, TRUE));
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_ZCULL_BOUNDS, 1);
|
||||
nvPushSetMethodData(p,
|
||||
NV3D_C(9097, SET_ZCULL_BOUNDS, Z_MIN_UNBOUNDED_ENABLE, FALSE) |
|
||||
NV3D_C(9097, SET_ZCULL_BOUNDS, Z_MAX_UNBOUNDED_ENABLE, FALSE));
|
||||
|
||||
pHal->setSpaVersion(p3dChannel);
|
||||
|
||||
pHal->initChannel(p3dChannel);
|
||||
|
||||
_nv3dInitializeStreams(p3dChannel);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
void nv3dLoadProgram(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
int programIndex)
|
||||
{
|
||||
const Nv3dHal *pHal = p3dChannel->p3dDevice->hal;
|
||||
const Nv3dProgramInfo *pgm = &p3dChannel->programs.info[programIndex];
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
nvAssert(programIndex < p3dChannel->programs.num);
|
||||
nvAssert(programIndex >= 0);
|
||||
nvAssert(pgm->stage < ARRAY_LEN(p3dChannel->currentProgramIndex));
|
||||
nvAssert(pgm->bindGroup <= NV3D_HW_BIND_GROUP_LAST);
|
||||
|
||||
if (p3dChannel->currentProgramIndex[pgm->stage] == programIndex) {
|
||||
return;
|
||||
}
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_PIPELINE_SHADER(pgm->stage), 1);
|
||||
nvPushSetMethodData(p,
|
||||
NV3D_C(9097, SET_PIPELINE_SHADER, ENABLE, TRUE) |
|
||||
NV3D_V(9097, SET_PIPELINE_SHADER, TYPE, pgm->type));
|
||||
|
||||
pHal->setProgramOffset(p3dChannel, pgm->stage, pgm->offset);
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_PIPELINE_REGISTER_COUNT(pgm->stage), 2);
|
||||
nvPushSetMethodData(p, pgm->registerCount);
|
||||
nvPushSetMethodData(p, pgm->bindGroup);
|
||||
|
||||
// Bind or invalidate the compiler-generated constant buffer slot, which the
|
||||
// compiler always puts in NV3D_CB_SLOT_COMPILER.
|
||||
if (pgm->constIndex == -1) {
|
||||
nv3dBindCb(p3dChannel, pgm->bindGroup,
|
||||
NV3D_CB_SLOT_COMPILER, FALSE);
|
||||
} else if (p3dChannel->programs.constants.size > 0) {
|
||||
const Nv3dShaderConstBufInfo *pInfo =
|
||||
&p3dChannel->programs.constants.info[pgm->constIndex];
|
||||
const NvU64 gpuAddress =
|
||||
nv3dGetProgramConstantsGpuAddress(p3dChannel) + pInfo->offset;
|
||||
const NvU32 paddedSize =
|
||||
NV_ALIGN_UP(pInfo->size, p3dChannel->programs.constants.sizeAlign);
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_CONSTANT_BUFFER_SELECTOR_A, 3);
|
||||
nvPushSetMethodData(p, paddedSize);
|
||||
nvPushSetMethodDataU64(p, gpuAddress);
|
||||
nv3dBindCb(p3dChannel, pgm->bindGroup, NV3D_CB_SLOT_COMPILER, TRUE);
|
||||
}
|
||||
|
||||
p3dChannel->currentProgramIndex[pgm->stage] = programIndex;
|
||||
}
|
||||
|
||||
void _nv3dSetProgramOffsetFermi(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvU32 stage,
|
||||
NvU32 offset)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_PIPELINE_PROGRAM(stage), 1);
|
||||
nvPushSetMethodData(p, offset);
|
||||
}
|
||||
|
||||
void _nv3dInvalidateTexturesFermi(
|
||||
Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_INVALIDATE_SAMPLER_CACHE,
|
||||
NV3D_C(9097, INVALIDATE_SAMPLER_CACHE, LINES, ALL));
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_INVALIDATE_TEXTURE_HEADER_CACHE,
|
||||
NV3D_C(9097, INVALIDATE_TEXTURE_HEADER_CACHE, LINES, ALL));
|
||||
}
|
||||
|
||||
void nv3dSetBlendColorCoefficients(
|
||||
Nv3dChannelPtr p3dChannel,
|
||||
enum Nv3dBlendOperation op,
|
||||
NvBool forceNoDstAlphaBits,
|
||||
NvBool dualSourceBlending)
|
||||
{
|
||||
#define SFACTOR(factor) (NV9097_SET_BLEND_COLOR_SOURCE_COEFF_V_OGL_##factor)
|
||||
#define DFACTOR(factor) (NV9097_SET_BLEND_COLOR_DEST_COEFF_V_OGL_##factor)
|
||||
#define OP(op) (NV3D_BLEND_OP_##op)
|
||||
|
||||
static const struct {
|
||||
NvU32 sfactor;
|
||||
NvU32 dfactor;
|
||||
} BlendOps[] = {
|
||||
[OP(CLEAR)] = {SFACTOR(ZERO), DFACTOR(ZERO)},
|
||||
[OP(SRC)] = {SFACTOR(ONE), DFACTOR(ZERO)},
|
||||
[OP(DST)] = {SFACTOR(ZERO), DFACTOR(ONE)},
|
||||
[OP(OVER)] = {SFACTOR(ONE), DFACTOR(ONE_MINUS_SRC_ALPHA)},
|
||||
[OP(OVER_REVERSE)] = {SFACTOR(ONE_MINUS_DST_ALPHA), DFACTOR(ONE)},
|
||||
[OP(IN)] = {SFACTOR(DST_ALPHA), DFACTOR(ZERO)},
|
||||
[OP(IN_REVERSE)] = {SFACTOR(ZERO), DFACTOR(SRC_ALPHA)},
|
||||
[OP(OUT)] = {SFACTOR(ONE_MINUS_DST_ALPHA), DFACTOR(ZERO)},
|
||||
[OP(OUT_REVERSE)] = {SFACTOR(ZERO), DFACTOR(ONE_MINUS_SRC_ALPHA)},
|
||||
[OP(ATOP)] = {SFACTOR(DST_ALPHA), DFACTOR(ONE_MINUS_SRC_ALPHA)},
|
||||
[OP(ATOP_REVERSE)] = {SFACTOR(ONE_MINUS_DST_ALPHA), DFACTOR(SRC_ALPHA)},
|
||||
[OP(XOR)] = {SFACTOR(ONE_MINUS_DST_ALPHA), DFACTOR(ONE_MINUS_SRC_ALPHA)},
|
||||
[OP(ADD)] = {SFACTOR(ONE), DFACTOR(ONE)},
|
||||
[OP(SATURATE)] = {SFACTOR(SRC_ALPHA_SATURATE), DFACTOR(ONE)}
|
||||
};
|
||||
|
||||
NvU32 sfactor, dfactor;
|
||||
|
||||
nvAssert(op < ARRAY_LEN(BlendOps));
|
||||
|
||||
sfactor = BlendOps[op].sfactor;
|
||||
dfactor = BlendOps[op].dfactor;
|
||||
|
||||
// if we're rendering to a picture that has an XRGB format that HW doesn't
|
||||
// support, feed in the 1.0 constant DstAlpha value
|
||||
if (forceNoDstAlphaBits) {
|
||||
switch (sfactor) {
|
||||
case SFACTOR(DST_ALPHA):
|
||||
sfactor = SFACTOR(CONSTANT_ALPHA);
|
||||
break;
|
||||
case SFACTOR(ONE_MINUS_DST_ALPHA):
|
||||
sfactor = SFACTOR(ONE_MINUS_CONSTANT_ALPHA);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If dual-source blending is enabled, swap the dfactor for one that uses
|
||||
// the second source color.
|
||||
if (dualSourceBlending) {
|
||||
switch (dfactor) {
|
||||
case DFACTOR(SRC_ALPHA):
|
||||
case DFACTOR(SRC_COLOR):
|
||||
dfactor = DFACTOR(SRC1COLOR);
|
||||
break;
|
||||
case DFACTOR(ONE_MINUS_SRC_ALPHA):
|
||||
case DFACTOR(ONE_MINUS_SRC_COLOR):
|
||||
dfactor = DFACTOR(INVSRC1COLOR);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Nv3dBlendState nv3dBlendStateColor = { };
|
||||
|
||||
nv3dBlendStateColor.blendEquation = NV3D_C(9097, SET_BLEND_COLOR_OP, V, OGL_FUNC_ADD);
|
||||
nv3dBlendStateColor.blendFactorSrc = sfactor;
|
||||
nv3dBlendStateColor.blendFactorDst = dfactor;
|
||||
|
||||
nv3dSetBlend(p3dChannel, &nv3dBlendStateColor, NULL, NULL);
|
||||
}
|
||||
|
||||
void nv3dSetBlend(
|
||||
Nv3dChannelPtr p3dChannel,
|
||||
const Nv3dBlendState *blendStateColor,
|
||||
const Nv3dBlendState *blendStateAlpha,
|
||||
const Nv3dColor *blendColor)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
const Nv3dColor defaultColor = {
|
||||
NV3D_FLOAT_ONE,
|
||||
NV3D_FLOAT_ONE,
|
||||
NV3D_FLOAT_ONE,
|
||||
NV3D_FLOAT_ONE
|
||||
};
|
||||
|
||||
if (blendColor == NULL) {
|
||||
blendColor = &defaultColor;
|
||||
}
|
||||
|
||||
if (blendStateColor == NULL && blendStateAlpha == NULL) {
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND(0),
|
||||
NV3D_C(9097, SET_BLEND, ENABLE, FALSE));
|
||||
return;
|
||||
}
|
||||
|
||||
if (blendStateColor != NULL) {
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND_COLOR_OP, 3);
|
||||
nvPushSetMethodData(p, blendStateColor->blendEquation);
|
||||
nvPushSetMethodData(p, blendStateColor->blendFactorSrc);
|
||||
nvPushSetMethodData(p, blendStateColor->blendFactorDst);
|
||||
}
|
||||
|
||||
if (blendStateAlpha != NULL) {
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND_SEPARATE_FOR_ALPHA,
|
||||
NV3D_C(9097, SET_BLEND_SEPARATE_FOR_ALPHA, ENABLE, TRUE));
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND_ALPHA_OP, 2);
|
||||
nvPushSetMethodData(p, blendStateAlpha->blendEquation);
|
||||
nvPushSetMethodData(p, blendStateAlpha->blendFactorSrc);
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND_ALPHA_DEST_COEFF, 1);
|
||||
nvPushSetMethodData(p, blendStateAlpha->blendFactorDst);
|
||||
} else {
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND_SEPARATE_FOR_ALPHA,
|
||||
NV3D_C(9097, SET_BLEND_SEPARATE_FOR_ALPHA, ENABLE, FALSE));
|
||||
}
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND_CONST_RED, 4);
|
||||
nv3dPushFloat(p, blendColor->red);
|
||||
nv3dPushFloat(p, blendColor->green);
|
||||
nv3dPushFloat(p, blendColor->blue);
|
||||
nv3dPushFloat(p, blendColor->alpha);
|
||||
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_BLEND(0),
|
||||
NV3D_C(9097, SET_BLEND, ENABLE, TRUE));
|
||||
|
||||
}
|
||||
|
||||
void _nv3dSetVertexStreamEndFermi(
|
||||
Nv3dChannelPtr p3dChannel,
|
||||
enum Nv3dVertexAttributeStreamType stream,
|
||||
const Nv3dVertexAttributeStreamRec *pStream)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_VERTEX_STREAM_LIMIT_A_A(stream), 2);
|
||||
nvPushSetMethodDataU64(p, pStream->end - 1);
|
||||
}
|
||||
384
src/common/unix/nvidia-3d/src/nvidia-3d-hopper.c
Normal file
384
src/common/unix/nvidia-3d/src/nvidia-3d-hopper.c
Normal file
@@ -0,0 +1,384 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvidia-3d-turing.h"
|
||||
#include "nvidia-3d-hopper.h"
|
||||
#include "nvidia-3d.h"
|
||||
|
||||
#include <class/clcb97.h>
|
||||
#include <class/clcb97tex.h>
|
||||
#include <class/cla06fsubch.h>
|
||||
|
||||
void _nv3dInitChannelHopper(Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
_nv3dInitChannelTuring(p3dChannel);
|
||||
|
||||
// Select texture header major version 1 for the new Hopper format.
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NVCB97_SET_TEXTURE_HEADER_VERSION, 1);
|
||||
}
|
||||
|
||||
void _nv3dAssignNv3dTextureHopper(
|
||||
Nv3dRenderTexInfo info,
|
||||
Nv3dTexture *tex)
|
||||
{
|
||||
nvAssert(!info.error);
|
||||
|
||||
switch (info.sizes) {
|
||||
case NV3D_TEXHEAD_A8B8G8R8:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
|
||||
_SIZES_A8B8G8R8, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_A2B10G10R10:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
|
||||
_SIZES_A2B10G10R10, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_B5G6R5:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
|
||||
_SIZES_B5G6R5, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_A1B5G5R5:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
|
||||
_SIZES_A1B5G5R5, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_R8:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
|
||||
_SIZES_R8, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_R32:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
|
||||
_SIZES_R32, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_R16:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
|
||||
_SIZES_R16, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_G8R8:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
|
||||
_SIZES_G8R8, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_R16G16B16A16:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
|
||||
_SIZES_R16_G16_B16_A16, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_R32G32B32A32:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
|
||||
_SIZES_R32_G32_B32_A32, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_Y8_VIDEO:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _COMPONENTS,
|
||||
_SIZES_Y8_VIDEO, tex->head);
|
||||
break;
|
||||
default:
|
||||
nvAssert(!"Unrecognized component sizes");
|
||||
}
|
||||
|
||||
switch (info.dataType) {
|
||||
case NV3D_TEXHEAD_NUM_UNORM:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _DATA_TYPE,
|
||||
_TEX_DATA_TYPE_UNORM, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_NUM_UINT:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _DATA_TYPE,
|
||||
_TEX_DATA_TYPE_UINT, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_NUM_FLOAT:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _DATA_TYPE,
|
||||
_TEX_DATA_TYPE_FLOAT, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_NUM_SNORM:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _DATA_TYPE,
|
||||
_TEX_DATA_TYPE_FLOAT, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_NUM_SINT:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _DATA_TYPE,
|
||||
_TEX_DATA_TYPE_SINT, tex->head);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (info.source.x) {
|
||||
case NV3D_TEXHEAD_IN_A:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _X_SOURCE,
|
||||
_IN_A, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_R:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _X_SOURCE,
|
||||
_IN_R, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_G:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _X_SOURCE,
|
||||
_IN_G, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_B:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _X_SOURCE,
|
||||
_IN_B, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ZERO:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _X_SOURCE,
|
||||
_IN_ZERO, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ONE_FLOAT:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _X_SOURCE,
|
||||
_IN_ONE_FLOAT, tex->head);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (info.source.y) {
|
||||
case NV3D_TEXHEAD_IN_A:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Y_SOURCE,
|
||||
_IN_A, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_R:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Y_SOURCE,
|
||||
_IN_R, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_G:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Y_SOURCE,
|
||||
_IN_G, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_B:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Y_SOURCE,
|
||||
_IN_B, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ZERO:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Y_SOURCE,
|
||||
_IN_ZERO, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ONE_FLOAT:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Y_SOURCE,
|
||||
_IN_ONE_FLOAT, tex->head);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (info.source.z) {
|
||||
case NV3D_TEXHEAD_IN_A:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Z_SOURCE,
|
||||
_IN_A, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_R:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Z_SOURCE,
|
||||
_IN_R, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_G:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Z_SOURCE,
|
||||
_IN_G, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_B:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Z_SOURCE,
|
||||
_IN_B, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ZERO:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Z_SOURCE,
|
||||
_IN_ZERO, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ONE_FLOAT:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _Z_SOURCE,
|
||||
_IN_ONE_FLOAT, tex->head);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (info.source.w) {
|
||||
case NV3D_TEXHEAD_IN_A:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _W_SOURCE,
|
||||
_IN_A, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_R:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _W_SOURCE,
|
||||
_IN_R, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_G:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _W_SOURCE,
|
||||
_IN_G, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_B:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _W_SOURCE,
|
||||
_IN_B, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ZERO:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _W_SOURCE,
|
||||
_IN_ZERO, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ONE_FLOAT:
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _W_SOURCE,
|
||||
_IN_ONE_FLOAT, tex->head);
|
||||
break;
|
||||
}
|
||||
|
||||
// Default to edge clamping. Our GPU seems to support wrapping
|
||||
// even with non-normalized coordinates.
|
||||
tex->samp[0] =
|
||||
NV3D_C(CB97, TEXSAMP0, ADDRESS_U, CLAMP_TO_EDGE) |
|
||||
NV3D_C(CB97, TEXSAMP0, ADDRESS_V, CLAMP_TO_EDGE) |
|
||||
NV3D_C(CB97, TEXSAMP0, ADDRESS_P, CLAMP_TO_EDGE);
|
||||
|
||||
if (info.texType == NV3D_TEX_TYPE_ONE_D_BUFFER) {
|
||||
FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_1DRT, _ADDRESS_BITS31TO0,
|
||||
NvU64_LO32(info.offset), tex->head);
|
||||
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_1DRT, _HEADER_VERSION,
|
||||
_SELECT_ONE_D_RAW_TYPED, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(CB97_, TEXHEAD_V2_1DRT, _ADDRESS_BITS63TO32,
|
||||
NvU64_HI32(info.offset), tex->head);
|
||||
|
||||
FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_1DRT, _WIDTH_MINUS_ONE,
|
||||
info.width - 1, tex->head);
|
||||
} else if (info.texType == NV3D_TEX_TYPE_TWO_D_PITCH) {
|
||||
FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_PITCH, _ADDRESS_BITS31TO5,
|
||||
info.offset >> 5, tex->head);
|
||||
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_PITCH, _HEADER_VERSION,
|
||||
_SELECT_PITCH_V2, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_PITCH, _ADDRESS_BITS56TO32,
|
||||
NvU64_HI32(info.offset), tex->head);
|
||||
|
||||
FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_PITCH, _PITCH_BITS21TO5,
|
||||
NvU32_LO16(info.pitch >> 5), tex->head);
|
||||
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_PITCH, _TEXTURE_TYPE,
|
||||
_TWO_D_NO_MIPMAP, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_PITCH, _WIDTH_MINUS_ONE,
|
||||
info.width - 1, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_PITCH, _BORDER_SOURCE,
|
||||
_BORDER_COLOR, tex->head);
|
||||
|
||||
FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_PITCH, _HEIGHT_MINUS_ONE,
|
||||
info.height - 1, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_PITCH, _NORMALIZED_COORDS,
|
||||
info.normalizedCoords, tex->head);
|
||||
} else {
|
||||
if (info.texType == NV3D_TEX_TYPE_ONE_D) {
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _TEXTURE_TYPE,
|
||||
_ONE_D, tex->head);
|
||||
} else if (info.texType == NV3D_TEX_TYPE_TWO_D_BLOCKLINEAR) {
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _TEXTURE_TYPE,
|
||||
_TWO_D_NO_MIPMAP, tex->head);
|
||||
}
|
||||
FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _ADDRESS_BITS31TO9,
|
||||
info.offset >> 9, tex->head);
|
||||
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _HEADER_VERSION,
|
||||
_SELECT_BLOCKLINEAR_V2, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _ADDRESS_BITS56TO32,
|
||||
NvU64_HI32(info.offset), tex->head);
|
||||
|
||||
FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _GOBS_PER_BLOCK_WIDTH,
|
||||
info.log2GobsPerBlock.x, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _GOBS_PER_BLOCK_HEIGHT,
|
||||
info.log2GobsPerBlock.y, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _GOBS_PER_BLOCK_DEPTH,
|
||||
info.log2GobsPerBlock.z, tex->head);
|
||||
|
||||
FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _WIDTH_MINUS_ONE,
|
||||
info.width - 1, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _BORDER_SOURCE,
|
||||
_BORDER_COLOR, tex->head);
|
||||
|
||||
FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _HEIGHT_MINUS_ONE,
|
||||
info.height - 1, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _DEPTH_MINUS_ONE,
|
||||
0, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(CB97, _TEXHEAD_V2_BL, _NORMALIZED_COORDS,
|
||||
info.normalizedCoords, tex->head);
|
||||
}
|
||||
|
||||
switch (info.repeatType) {
|
||||
case NV3D_TEXHEAD_REPEAT_TYPE_NORMAL:
|
||||
tex->samp[0] = NV3D_C(CB97, TEXSAMP0, ADDRESS_U, WRAP) |
|
||||
NV3D_C(CB97, TEXSAMP0, ADDRESS_V, WRAP);
|
||||
break;
|
||||
case NV3D_TEXHEAD_REPEAT_TYPE_PAD:
|
||||
tex->samp[0] = NV3D_C(CB97, TEXSAMP0, ADDRESS_U, CLAMP_TO_EDGE) |
|
||||
NV3D_C(CB97, TEXSAMP0, ADDRESS_V, CLAMP_TO_EDGE);
|
||||
break;
|
||||
case NV3D_TEXHEAD_REPEAT_TYPE_REFLECT:
|
||||
tex->samp[0] = NV3D_C(CB97, TEXSAMP0, ADDRESS_U, MIRROR) |
|
||||
NV3D_C(CB97, TEXSAMP0, ADDRESS_V, MIRROR);
|
||||
break;
|
||||
case NV3D_TEXHEAD_REPEAT_TYPE_NONE:
|
||||
tex->samp[0] = NV3D_C(CB97, TEXSAMP0, ADDRESS_U, BORDER) |
|
||||
NV3D_C(CB97, TEXSAMP0, ADDRESS_V, BORDER);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (info.filtering) {
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_NEAREST:
|
||||
tex->samp[1] = NV3D_C(CB97, TEXSAMP1, MAG_FILTER, MAG_POINT) |
|
||||
NV3D_C(CB97, TEXSAMP1, MIN_FILTER, MIN_POINT) |
|
||||
NV3D_C(CB97, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
break;
|
||||
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_LINEAR:
|
||||
tex->samp[1] = NV3D_C(CB97, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
|
||||
NV3D_C(CB97, TEXSAMP1, MIN_FILTER, MIN_LINEAR) |
|
||||
NV3D_C(CB97, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
break;
|
||||
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_ANISO_2X:
|
||||
tex->samp[0] |= NV3D_C(CB97, TEXSAMP0, MAX_ANISOTROPY, ANISO_2_TO_1);
|
||||
tex->samp[1] = NV3D_C(CB97, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
|
||||
NV3D_C(CB97, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
|
||||
NV3D_C(CB97, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _MAX_ANISOTROPY,
|
||||
_ANISO_2_TO_1, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _ANISO_FINE_SPREAD_MODIFIER,
|
||||
_SPREAD_MODIFIER_CONST_TWO, tex->head);
|
||||
|
||||
break;
|
||||
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_ANISO_4X:
|
||||
tex->samp[0] |= NV3D_C(CB97, TEXSAMP0, MAX_ANISOTROPY, ANISO_4_TO_1);
|
||||
tex->samp[1] = NV3D_C(CB97, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
|
||||
NV3D_C(CB97, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
|
||||
NV3D_C(CB97, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _MAX_ANISOTROPY,
|
||||
_ANISO_4_TO_1, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _ANISO_FINE_SPREAD_MODIFIER,
|
||||
_SPREAD_MODIFIER_CONST_TWO, tex->head);
|
||||
break;
|
||||
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_ANISO_8X:
|
||||
tex->samp[0] |= NV3D_C(CB97, TEXSAMP0, MAX_ANISOTROPY, ANISO_8_TO_1);
|
||||
tex->samp[1] = NV3D_C(CB97, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
|
||||
NV3D_C(CB97, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
|
||||
NV3D_C(CB97, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _MAX_ANISOTROPY,
|
||||
_ANISO_8_TO_1, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _ANISO_FINE_SPREAD_MODIFIER,
|
||||
_SPREAD_MODIFIER_CONST_TWO, tex->head);
|
||||
|
||||
break;
|
||||
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_ANISO_16X:
|
||||
tex->samp[0] |= NV3D_C(CB97, TEXSAMP0, MAX_ANISOTROPY, ANISO_16_TO_1);
|
||||
tex->samp[1] = NV3D_C(CB97, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
|
||||
NV3D_C(CB97, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
|
||||
NV3D_C(CB97, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _MAX_ANISOTROPY,
|
||||
_ANISO_16_TO_1, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(CB97, _TEXHEAD_V2_BL, _ANISO_FINE_SPREAD_MODIFIER,
|
||||
_SPREAD_MODIFIER_CONST_TWO, tex->head);
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
504
src/common/unix/nvidia-3d/src/nvidia-3d-init.c
Normal file
504
src/common/unix/nvidia-3d/src/nvidia-3d-init.c
Normal file
@@ -0,0 +1,504 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2005-2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "nvidia-3d.h"
|
||||
#include "nvidia-3d-surface.h"
|
||||
#include "nvidia-3d-types-priv.h"
|
||||
|
||||
#include "nvidia-3d-fermi.h"
|
||||
#include "nvidia-3d-kepler.h"
|
||||
#include "nvidia-3d-maxwell.h"
|
||||
#include "nvidia-3d-pascal.h"
|
||||
#include "nvidia-3d-volta.h"
|
||||
#include "nvidia-3d-turing.h"
|
||||
#include "nvidia-3d-hopper.h"
|
||||
|
||||
#include "nvidia-push-init.h" // nvPushGetSupportedClassIndex()
|
||||
#include "nvidia-push-utils.h" // nvPushIsAmodel()
|
||||
|
||||
#include <class/clcb97.h> // HOPPER_A
|
||||
#include <class/clc997.h> // ADA_A
|
||||
#include <class/clc797.h> // AMPERE_B
|
||||
#include <class/clc697.h> // AMPERE_A
|
||||
#include <class/clc597.h> // TURING_A
|
||||
#include <class/clc397.h> // VOLTA_A
|
||||
#include <class/clc197.h> // PASCAL_B
|
||||
#include <class/clc097.h> // PASCAL_A
|
||||
#include <class/clb197.h> // MAXWELL_B
|
||||
#include <class/clb097.h> // MAXWELL_A
|
||||
|
||||
#include <ctrl/ctrl2080/ctrl2080gr.h>
|
||||
#include <nvos.h>
|
||||
|
||||
#include "g_maxwell_shader_info.h"
|
||||
#include "g_pascal_shader_info.h"
|
||||
#include "g_volta_shader_info.h"
|
||||
#include "g_turing_shader_info.h"
|
||||
#include "g_ampere_shader_info.h"
|
||||
#include "g_hopper_shader_info.h"
|
||||
|
||||
#define _NV3D_CHANNEL_PROGRAMS_ENTRY(_archLower, _archCamel, _archUpper) \
|
||||
[NV3D_SHADER_ARCH_ ## _archUpper ] = { \
|
||||
.num = NUM_PROGRAMS, \
|
||||
.info = _archCamel ## ProgramInfo, \
|
||||
.maxLocalBytes = _archCamel ## ShaderMaxLocalBytes, \
|
||||
.maxStackBytes = _archCamel ## ShaderMaxStackBytes, \
|
||||
.code.decompressedSize = _archCamel ## ProgramHeapSize, \
|
||||
.code.compressedStart = \
|
||||
({ extern const unsigned char \
|
||||
_binary_ ## _archLower ## _shaders_xz_start[]; \
|
||||
_binary_ ## _archLower ## _shaders_xz_start; }), \
|
||||
.code.compressedEnd = \
|
||||
({ extern const unsigned char \
|
||||
_binary_ ## _archLower ## _shaders_xz_end[]; \
|
||||
_binary_ ## _archLower ## _shaders_xz_end; }), \
|
||||
.constants.info = _archCamel ## ConstBufInfo, \
|
||||
.constants.count = \
|
||||
(NvU32)ARRAY_LEN(_archCamel ## ConstBufInfo), \
|
||||
.constants.size = _archCamel ## ConstBufSize, \
|
||||
.constants.sizeAlign = _archCamel ## ConstBufSizeAlign, \
|
||||
}
|
||||
|
||||
static Nv3dChannelProgramsRec PickProgramsRec(
|
||||
const Nv3dDeviceRec *p3dDevice)
|
||||
{
|
||||
const Nv3dChannelProgramsRec programsTable[NV3D_SHADER_ARCH_COUNT] = {
|
||||
|
||||
_NV3D_CHANNEL_PROGRAMS_ENTRY(maxwell, Maxwell, MAXWELL),
|
||||
_NV3D_CHANNEL_PROGRAMS_ENTRY(pascal, Pascal, PASCAL),
|
||||
_NV3D_CHANNEL_PROGRAMS_ENTRY(volta, Volta, VOLTA),
|
||||
_NV3D_CHANNEL_PROGRAMS_ENTRY(turing, Turing, TURING),
|
||||
_NV3D_CHANNEL_PROGRAMS_ENTRY(ampere, Ampere, AMPERE),
|
||||
_NV3D_CHANNEL_PROGRAMS_ENTRY(hopper, Hopper, HOPPER),
|
||||
};
|
||||
|
||||
return programsTable[p3dDevice->shaderArch];
|
||||
}
|
||||
|
||||
#undef _NV3D_CHANNEL_PROGRAMS_ENTRY
|
||||
|
||||
|
||||
static NvBool QueryThreadsAndWarpsOneSd(
|
||||
Nv3dDevicePtr p3dDevice,
|
||||
NvU32 sd,
|
||||
NvU32 *pMaxWarps,
|
||||
NvU32 *pThreadsPerWarp)
|
||||
{
|
||||
NvPushDevicePtr pPushDevice = p3dDevice->pPushDevice;
|
||||
const NvPushImports *pImports = pPushDevice->pImports;
|
||||
NvU32 ret;
|
||||
|
||||
NV2080_CTRL_GR_GET_INFO_PARAMS grInfoParams = { 0 };
|
||||
struct {
|
||||
NV2080_CTRL_GR_INFO numSMs;
|
||||
NV2080_CTRL_GR_INFO maxWarpsPerSM;
|
||||
NV2080_CTRL_GR_INFO threadsPerWarp;
|
||||
} grInfo;
|
||||
|
||||
NVMISC_MEMSET(&grInfo, 0, sizeof(grInfo));
|
||||
|
||||
grInfo.numSMs.index =
|
||||
NV2080_CTRL_GR_INFO_INDEX_THREAD_STACK_SCALING_FACTOR;
|
||||
grInfo.maxWarpsPerSM.index =
|
||||
NV2080_CTRL_GR_INFO_INDEX_MAX_WARPS_PER_SM;
|
||||
grInfo.threadsPerWarp.index =
|
||||
NV2080_CTRL_GR_INFO_INDEX_MAX_THREADS_PER_WARP;
|
||||
|
||||
grInfoParams.grInfoListSize =
|
||||
sizeof(grInfo) / sizeof(NV2080_CTRL_GR_INFO);
|
||||
|
||||
grInfoParams.grInfoList = NV_PTR_TO_NvP64(&grInfo);
|
||||
|
||||
ret = pImports->rmApiControl(pPushDevice,
|
||||
pPushDevice->subDevice[sd].handle,
|
||||
NV2080_CTRL_CMD_GR_GET_INFO,
|
||||
&grInfoParams,
|
||||
sizeof(grInfoParams));
|
||||
|
||||
if (ret != NVOS_STATUS_SUCCESS) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
*pMaxWarps = grInfo.numSMs.data * grInfo.maxWarpsPerSM.data;
|
||||
*pThreadsPerWarp = grInfo.threadsPerWarp.data;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static NvBool GetMaxThreadsAndWarps(Nv3dDevicePtr p3dDevice)
|
||||
{
|
||||
NvU32 sd;
|
||||
|
||||
p3dDevice->maxThreadsPerWarp = 0;
|
||||
p3dDevice->maxWarps = 0;
|
||||
|
||||
for (sd = 0; sd < p3dDevice->pPushDevice->numSubDevices; sd++) {
|
||||
|
||||
NvU32 maxWarps, threadsPerWarp;
|
||||
|
||||
if (!QueryThreadsAndWarpsOneSd(p3dDevice, sd,
|
||||
&maxWarps, &threadsPerWarp)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
p3dDevice->maxThreadsPerWarp =
|
||||
NV_MAX(p3dDevice->maxThreadsPerWarp, threadsPerWarp);
|
||||
|
||||
p3dDevice->maxWarps = NV_MAX(p3dDevice->maxWarps, maxWarps);
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the SM version reported by resman.
|
||||
*
|
||||
* \params pPushDevice The nvidia-push device corresponding to the GPU.
|
||||
*
|
||||
* \return The SM version of this device.
|
||||
*/
|
||||
static NvU32 GetSmVersion(
|
||||
NvPushDevicePtr pPushDevice)
|
||||
{
|
||||
NvU32 sd, smVersion = NV2080_CTRL_GR_INFO_SM_VERSION_NONE;
|
||||
|
||||
if (nvPushIsAModel(pPushDevice)) {
|
||||
/*
|
||||
* On amodel resman cannot tell us the SM version, so we pick
|
||||
* the SM version based on NVAModelConfig.
|
||||
*/
|
||||
static const NvU32 table[] = {
|
||||
[NV_AMODEL_MAXWELL] = NV2080_CTRL_GR_INFO_SM_VERSION_5_0,
|
||||
[NV_AMODEL_PASCAL] = NV2080_CTRL_GR_INFO_SM_VERSION_6_0,
|
||||
[NV_AMODEL_VOLTA] = NV2080_CTRL_GR_INFO_SM_VERSION_7_0,
|
||||
[NV_AMODEL_TURING] = NV2080_CTRL_GR_INFO_SM_VERSION_7_5,
|
||||
[NV_AMODEL_AMPERE] = NV2080_CTRL_GR_INFO_SM_VERSION_8_2,
|
||||
[NV_AMODEL_ADA] = NV2080_CTRL_GR_INFO_SM_VERSION_8_9,
|
||||
[NV_AMODEL_HOPPER] = NV2080_CTRL_GR_INFO_SM_VERSION_9_0,
|
||||
};
|
||||
|
||||
if (pPushDevice->amodelConfig >= ARRAY_LEN(table)) {
|
||||
return NV2080_CTRL_GR_INFO_SM_VERSION_NONE;
|
||||
}
|
||||
|
||||
return table[pPushDevice->amodelConfig];
|
||||
}
|
||||
|
||||
/*
|
||||
* Query the SM version from resman. This query is per-subDevice,
|
||||
* but we use SM version per-device, so assert that the SM version
|
||||
* matches across subDevices.
|
||||
*/
|
||||
for (sd = 0; sd < pPushDevice->numSubDevices; sd++) {
|
||||
|
||||
const NvPushImports *pImports = pPushDevice->pImports;
|
||||
NV2080_CTRL_GR_GET_INFO_PARAMS params = { };
|
||||
NV2080_CTRL_GR_INFO smVersionParams = { };
|
||||
NvU32 ret;
|
||||
|
||||
smVersionParams.index = NV2080_CTRL_GR_INFO_INDEX_SM_VERSION;
|
||||
params.grInfoListSize = 1;
|
||||
params.grInfoList = NV_PTR_TO_NvP64(&smVersionParams);
|
||||
|
||||
ret = pImports->rmApiControl(pPushDevice,
|
||||
pPushDevice->subDevice[sd].handle,
|
||||
NV2080_CTRL_CMD_GR_GET_INFO,
|
||||
¶ms,
|
||||
sizeof(params));
|
||||
|
||||
if (ret != NVOS_STATUS_SUCCESS) {
|
||||
return NV2080_CTRL_GR_INFO_SM_VERSION_NONE;
|
||||
}
|
||||
|
||||
if (sd == 0) {
|
||||
smVersion = smVersionParams.data;
|
||||
} else {
|
||||
nvAssert(smVersion == smVersionParams.data);
|
||||
}
|
||||
}
|
||||
|
||||
return smVersion;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Get the SPA version to use with the 3D Class.
|
||||
*
|
||||
* Note that resman only reports the SM version (the "hardware
|
||||
* revision"), not the SPA version (the ISA version). So we use a
|
||||
* table to map from SM version to SPA version.
|
||||
*
|
||||
* \params pPushDevice The nvidia-push device corresponding to the GPU.
|
||||
* \params pSpaVersion The spaVersion to assign.
|
||||
*
|
||||
* \return TRUE if the SPA version could be assigned.
|
||||
*/
|
||||
static NvBool GetSpaVersion(
|
||||
NvPushDevicePtr pPushDevice,
|
||||
Nv3dDeviceSpaVersionRec *pSpaVersion)
|
||||
{
|
||||
static const struct {
|
||||
NvU32 smVersion;
|
||||
Nv3dDeviceSpaVersionRec spaVersion;
|
||||
} table[] = {
|
||||
/* Maxwell */
|
||||
{ NV2080_CTRL_GR_INFO_SM_VERSION_5_0, { 5,0 } },
|
||||
{ NV2080_CTRL_GR_INFO_SM_VERSION_5_2, { 5,2 } },
|
||||
{ NV2080_CTRL_GR_INFO_SM_VERSION_5_3, { 5,3 } },
|
||||
|
||||
/* Pascal */
|
||||
{ NV2080_CTRL_GR_INFO_SM_VERSION_6_0, { 5,5 } },
|
||||
{ NV2080_CTRL_GR_INFO_SM_VERSION_6_1, { 5,5 } },
|
||||
{ NV2080_CTRL_GR_INFO_SM_VERSION_6_2, { 5,6 } },
|
||||
|
||||
/* Volta */
|
||||
{ NV2080_CTRL_GR_INFO_SM_VERSION_7_0, { 7,0 } },
|
||||
{ NV2080_CTRL_GR_INFO_SM_VERSION_7_2, { 7,2 } },
|
||||
|
||||
/* Turing */
|
||||
{ NV2080_CTRL_GR_INFO_SM_VERSION_7_3, { 7,3 } },
|
||||
{ NV2080_CTRL_GR_INFO_SM_VERSION_7_5, { 7,5 } },
|
||||
|
||||
/* Ampere */
|
||||
{ NV2080_CTRL_GR_INFO_SM_VERSION_8_2, { 8,2 } },
|
||||
{ NV2080_CTRL_GR_INFO_SM_VERSION_8_6, { 8,6 } },
|
||||
{ NV2080_CTRL_GR_INFO_SM_VERSION_8_7, { 8,6 } },
|
||||
{ NV2080_CTRL_GR_INFO_SM_VERSION_8_8, { 8,6 } },
|
||||
/* Ada */
|
||||
{ NV2080_CTRL_GR_INFO_SM_VERSION_8_9, { 8,9 } },
|
||||
/* Hopper */
|
||||
{ NV2080_CTRL_GR_INFO_SM_VERSION_9_0, { 9,0 } },
|
||||
};
|
||||
|
||||
const NvU32 smVersion = GetSmVersion(pPushDevice);
|
||||
NvU32 i;
|
||||
|
||||
for (i = 0; i < ARRAY_LEN(table); i++) {
|
||||
if (table[i].smVersion == smVersion) {
|
||||
*pSpaVersion = table[i].spaVersion;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static const Nv3dHal _nv3dHalMaxwell = {
|
||||
_nv3dSetSpaVersionKepler, /* setSpaVersion */
|
||||
_nv3dInitChannelMaxwell, /* initChannel */
|
||||
_nv3dUploadDataInlineKepler, /* uploadDataInline */
|
||||
_nv3dSetProgramOffsetFermi, /* setProgramOffset */
|
||||
_nv3dAssignNv3dTextureMaxwell, /* assignNv3dTexture */
|
||||
_nv3dSetVertexStreamEndFermi, /* setVertexStreamEnd */
|
||||
};
|
||||
|
||||
static const Nv3dHal _nv3dHalPascal = {
|
||||
_nv3dSetSpaVersionKepler, /* setSpaVersion */
|
||||
_nv3dInitChannelPascal, /* initChannel */
|
||||
_nv3dUploadDataInlineKepler, /* uploadDataInline */
|
||||
_nv3dSetProgramOffsetFermi, /* setProgramOffset */
|
||||
_nv3dAssignNv3dTexturePascal, /* assignNv3dTexture */
|
||||
_nv3dSetVertexStreamEndFermi, /* setVertexStreamEnd */
|
||||
};
|
||||
|
||||
static const Nv3dHal _nv3dHalVolta = {
|
||||
_nv3dSetSpaVersionKepler, /* setSpaVersion */
|
||||
_nv3dInitChannelPascal, /* initChannel */
|
||||
_nv3dUploadDataInlineKepler, /* uploadDataInline */
|
||||
_nv3dSetProgramOffsetVolta, /* setProgramOffset */
|
||||
_nv3dAssignNv3dTexturePascal, /* assignNv3dTexture */
|
||||
_nv3dSetVertexStreamEndFermi, /* setVertexStreamEnd */
|
||||
};
|
||||
|
||||
static const Nv3dHal _nv3dHalTuring = {
|
||||
_nv3dSetSpaVersionKepler, /* setSpaVersion */
|
||||
_nv3dInitChannelTuring, /* initChannel */
|
||||
_nv3dUploadDataInlineKepler, /* uploadDataInline */
|
||||
_nv3dSetProgramOffsetVolta, /* setProgramOffset */
|
||||
_nv3dAssignNv3dTexturePascal, /* assignNv3dTexture */
|
||||
_nv3dSetVertexStreamEndTuring, /* setVertexStreamEnd */
|
||||
};
|
||||
|
||||
static const Nv3dHal _nv3dHalAmpere = {
|
||||
_nv3dSetSpaVersionKepler, /* setSpaVersion */
|
||||
_nv3dInitChannelTuring, /* initChannel */
|
||||
_nv3dUploadDataInlineKepler, /* uploadDataInline */
|
||||
_nv3dSetProgramOffsetVolta, /* setProgramOffset */
|
||||
_nv3dAssignNv3dTexturePascal, /* assignNv3dTexture */
|
||||
_nv3dSetVertexStreamEndTuring, /* setVertexStreamEnd */
|
||||
};
|
||||
|
||||
static const Nv3dHal _nv3dHalHopper = {
|
||||
_nv3dSetSpaVersionKepler, /* setSpaVersion */
|
||||
_nv3dInitChannelHopper, /* initChannel */
|
||||
_nv3dUploadDataInlineKepler, /* uploadDataInline */
|
||||
_nv3dSetProgramOffsetVolta, /* setProgramOffset */
|
||||
_nv3dAssignNv3dTextureHopper, /* assignNv3dTexture */
|
||||
_nv3dSetVertexStreamEndTuring, /* setVertexStreamEnd */
|
||||
};
|
||||
|
||||
NvBool nv3dAllocDevice(
|
||||
const Nv3dAllocDeviceParams *pParams,
|
||||
Nv3dDevicePtr p3dDevice)
|
||||
{
|
||||
static const struct {
|
||||
NvPushSupportedClass base;
|
||||
const Nv3dDeviceCapsRec caps;
|
||||
const Nv3dHal *hal;
|
||||
enum Nv3dShaderArch shaderArch;
|
||||
} table[] = {
|
||||
|
||||
#define ENTRY(_classNumber, \
|
||||
_arch, \
|
||||
_amodelArch, \
|
||||
_hasSetBindlessTexture, \
|
||||
_hasProgramRegion, \
|
||||
_maxDim, \
|
||||
_hal) \
|
||||
{ \
|
||||
.base.classNumber = _classNumber, \
|
||||
.base.amodelConfig = NV_AMODEL_ ## _amodelArch, \
|
||||
.caps.hasSetBindlessTexture = _hasSetBindlessTexture, \
|
||||
.caps.hasProgramRegion = _hasProgramRegion, \
|
||||
.caps.maxDim = _maxDim, \
|
||||
.hal = &_nv3dHal ## _hal, \
|
||||
.shaderArch = NV3D_SHADER_ARCH_ ## _arch,\
|
||||
}
|
||||
|
||||
/*
|
||||
* hal--------------------------------------------------+
|
||||
* maxDim----------------------------------------+ |
|
||||
* hasProgramRegion---------------------------+ | |
|
||||
* hasSetBindlessTexture-------------------+ | | |
|
||||
* amodel arch----------------+ | | | |
|
||||
* shader arch---+ | | | | |
|
||||
* classNumber | | | | | |
|
||||
* | | | | | | |
|
||||
*/
|
||||
ENTRY(HOPPER_A, HOPPER, HOPPER, 0, 0, 32768, Hopper),
|
||||
ENTRY(ADA_A, AMPERE, ADA, 0, 0, 32768, Ampere),
|
||||
ENTRY(AMPERE_B, AMPERE, AMPERE, 0, 0, 32768, Ampere),
|
||||
ENTRY(AMPERE_A, AMPERE, AMPERE, 0, 0, 32768, Ampere),
|
||||
ENTRY(TURING_A, TURING, TURING, 0, 0, 32768, Turing),
|
||||
ENTRY(VOLTA_A, VOLTA, VOLTA, 0, 0, 32768, Volta),
|
||||
ENTRY(PASCAL_B, PASCAL, PASCAL, 1, 1, 32768, Pascal),
|
||||
ENTRY(PASCAL_A, PASCAL, PASCAL, 1, 1, 32768, Pascal),
|
||||
ENTRY(MAXWELL_B, MAXWELL, MAXWELL, 1, 1, 16384, Maxwell),
|
||||
ENTRY(MAXWELL_A, MAXWELL, MAXWELL, 1, 1, 16384, Maxwell),
|
||||
};
|
||||
|
||||
int i;
|
||||
|
||||
NVMISC_MEMSET(p3dDevice, 0, sizeof(*p3dDevice));
|
||||
|
||||
/* find the first supported 3D HAL */
|
||||
|
||||
i = nvPushGetSupportedClassIndex(pParams->pPushDevice,
|
||||
table,
|
||||
sizeof(table[0]),
|
||||
ARRAY_LEN(table));
|
||||
if (i == -1) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (!GetSpaVersion(pParams->pPushDevice, &p3dDevice->spaVersion)) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
p3dDevice->pPushDevice = pParams->pPushDevice;
|
||||
p3dDevice->caps = table[i].caps;
|
||||
p3dDevice->classNumber = table[i].base.classNumber;
|
||||
p3dDevice->hal = table[i].hal;
|
||||
p3dDevice->shaderArch = table[i].shaderArch;
|
||||
|
||||
if (!GetMaxThreadsAndWarps(p3dDevice)) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
|
||||
fail:
|
||||
nv3dFreeDevice(p3dDevice);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
void nv3dFreeDevice(Nv3dDevicePtr p3dDevice)
|
||||
{
|
||||
/*
|
||||
* So far, there is nothing to free: Nv3dDevicePtr only stores
|
||||
* queried information.
|
||||
*/
|
||||
NVMISC_MEMSET(p3dDevice, 0, sizeof(*p3dDevice));
|
||||
}
|
||||
|
||||
static NvU64 ComputeProgramLocalMemorySize(
|
||||
const Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
const Nv3dDeviceRec *p3dDevice = p3dChannel->p3dDevice;
|
||||
|
||||
// LocalMemorySizePerSM needs to be a multiple of 512
|
||||
// Note that maxLocalBytes and/or maxStackBytes might be zero.
|
||||
const NvU64 defaultSizePerWarp =
|
||||
NV_ALIGN_UP(p3dChannel->programs.maxLocalBytes *
|
||||
p3dDevice->maxThreadsPerWarp +
|
||||
p3dChannel->programs.maxStackBytes, 512);
|
||||
|
||||
// shader local memory lower bits must be a multiple of 128kB
|
||||
return NV_ALIGN_UP(defaultSizePerWarp * p3dDevice->maxWarps, 128*1024);
|
||||
}
|
||||
|
||||
NvBool nv3dAllocChannelState(
|
||||
const Nv3dAllocChannelStateParams *pParams,
|
||||
Nv3dChannelPtr p3dChannel)
|
||||
{
|
||||
NVMISC_MEMSET(p3dChannel, 0, sizeof(*p3dChannel));
|
||||
|
||||
p3dChannel->p3dDevice = pParams->p3dDevice;
|
||||
|
||||
p3dChannel->numTextures = pParams->numTextures;
|
||||
p3dChannel->numTextureBindings = pParams->numTextureBindings;
|
||||
|
||||
p3dChannel->hasFrameBoundaries = pParams->hasFrameBoundaries;
|
||||
|
||||
p3dChannel->programs = PickProgramsRec(pParams->p3dDevice);
|
||||
|
||||
p3dChannel->programLocalMemorySize =
|
||||
ComputeProgramLocalMemorySize(p3dChannel);
|
||||
|
||||
_nv3dAssignSurfaceOffsets(pParams, p3dChannel);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
void nv3dFreeChannelState(Nv3dChannelPtr p3dChannel)
|
||||
{
|
||||
int sd;
|
||||
for (sd = 0; sd < NV_MAX_SUBDEVICES; sd++) {
|
||||
nvAssert(p3dChannel->surface.handle[sd] == 0);
|
||||
}
|
||||
nvAssert(p3dChannel->pPushChannel == NULL);
|
||||
|
||||
NVMISC_MEMSET(p3dChannel, 0, sizeof(*p3dChannel));
|
||||
}
|
||||
|
||||
154
src/common/unix/nvidia-3d/src/nvidia-3d-kepler.c
Normal file
154
src/common/unix/nvidia-3d/src/nvidia-3d-kepler.c
Normal file
@@ -0,0 +1,154 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvidia-3d-kepler.h"
|
||||
#include "nvidia-3d.h"
|
||||
#include "nvidia-3d-constant-buffers.h"
|
||||
#include "nvidia-3d-shader-constants.h"
|
||||
|
||||
#include <class/cla097.h>
|
||||
#include <class/cla06fsubch.h>
|
||||
|
||||
void _nv3dSetSpaVersionKepler(Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
NvPushChannelPtr pPushChannel = p3dChannel->pPushChannel;
|
||||
const NvU16 major = p3dChannel->p3dDevice->spaVersion.major;
|
||||
const NvU16 minor = p3dChannel->p3dDevice->spaVersion.minor;
|
||||
|
||||
// Tell AModel or fmodel what shader model version to use. This has no
|
||||
// effect on real hardware. The SM version (the "hardware revision" of the
|
||||
// SM block) does not always match the SPA version (the ISA version).
|
||||
nvPushMethod(pPushChannel, NVA06F_SUBCHANNEL_3D,
|
||||
NVA097_SET_SPA_VERSION, 1);
|
||||
nvPushSetMethodData(pPushChannel,
|
||||
NV3D_V(A097, SET_SPA_VERSION, MAJOR, major) |
|
||||
NV3D_V(A097, SET_SPA_VERSION, MINOR, minor));
|
||||
}
|
||||
|
||||
void _nv3dInitChannelKepler(Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
// Configure constant buffer slot NV3D_CB_SLOT_BINDLESS_TEXTURE as the
|
||||
// place the texture binding table is stored. This is obsolete on Volta and
|
||||
// later, so don't run it there.
|
||||
if (p3dChannel->p3dDevice->caps.hasSetBindlessTexture) {
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D,
|
||||
NVA097_SET_BINDLESS_TEXTURE,
|
||||
NV3D_V(A097, SET_BINDLESS_TEXTURE, CONSTANT_BUFFER_SLOT_SELECT,
|
||||
NV3D_CB_SLOT_BINDLESS_TEXTURE));
|
||||
}
|
||||
|
||||
// Disable shader exceptions. This matches OpenGL driver behavior.
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_SHADER_EXCEPTIONS,
|
||||
NV3D_C(9097, SET_SHADER_EXCEPTIONS, ENABLE, FALSE));
|
||||
}
|
||||
|
||||
/*!
|
||||
* Upload data using the INLINE_TO_MEMORY methods embedded in the KEPLER_A
|
||||
* class.
|
||||
*
|
||||
* The number of dwords pushed inline is limited by nvPushMaxMethodCount().
|
||||
* Push the data in multiple chunks, if necessary.
|
||||
*/
|
||||
void _nv3dUploadDataInlineKepler(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvU64 gpuBaseAddress,
|
||||
size_t offset,
|
||||
const void *data,
|
||||
size_t bytes)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
/*
|
||||
* Below we use '1 + dwordsThisChunk' as the method count, so subtract one
|
||||
* when computing chunkSizeDwords.
|
||||
*/
|
||||
const NvU32 chunkSizeDwords = nvPushMaxMethodCount(p) - 1;
|
||||
const NvU32 chunkSize = chunkSizeDwords * 4; /* in bytes */
|
||||
size_t bytesSoFar;
|
||||
|
||||
// Only allow uploading complete dwords.
|
||||
nvAssert((bytes & 3) == 0);
|
||||
|
||||
for (bytesSoFar = 0; bytesSoFar < bytes; bytesSoFar += chunkSize) {
|
||||
|
||||
const NvU32 bytesThisChunk = NV_MIN(bytes - bytesSoFar, chunkSize);
|
||||
const NvU32 dwordsThisChunk = bytesThisChunk / 4;
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NVA097_LINE_LENGTH_IN, 5);
|
||||
nvPushSetMethodData(p, bytesThisChunk);
|
||||
nvPushSetMethodData(p, 1); // NVA097_LINE_COUNT
|
||||
nvPushSetMethodDataU64(p, gpuBaseAddress + offset + bytesSoFar);
|
||||
nvPushSetMethodData(p, bytesThisChunk); // NVA097_PITCH_OUT
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NVA097_SET_DST_WIDTH, 2);
|
||||
nvPushSetMethodData(p, bytesThisChunk);
|
||||
nvPushSetMethodData(p, 1); // NVA097_SET_DST_HEIGHT
|
||||
|
||||
nvPushMethodOneIncr(p, NVA06F_SUBCHANNEL_3D, NVA097_LAUNCH_DMA,
|
||||
1 + dwordsThisChunk);
|
||||
nvPushSetMethodData(p,
|
||||
NV3D_C(A097, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
|
||||
// Disable flush -- As long as only 3D requires the data uploaded,
|
||||
// we don't need to incur the performance penalty of a sys-membar.
|
||||
NV3D_C(A097, LAUNCH_DMA, COMPLETION_TYPE, FLUSH_DISABLE) |
|
||||
NV3D_C(A097, LAUNCH_DMA, INTERRUPT_TYPE, NONE) |
|
||||
NV3D_C(A097, LAUNCH_DMA, SYSMEMBAR_DISABLE, TRUE));
|
||||
nvPushInlineData(p, (const NvU8 *)data + bytesSoFar, dwordsThisChunk);
|
||||
}
|
||||
}
|
||||
|
||||
void _nv3dBindTexturesKepler(
|
||||
Nv3dChannelPtr p3dChannel,
|
||||
int programIndex,
|
||||
const int *textureBindingIndices)
|
||||
{
|
||||
const NvU16 numTextureBindings = p3dChannel->numTextureBindings;
|
||||
NvPushChannelUnion *remappedBinding = NULL;
|
||||
NvU8 slot;
|
||||
const NvU64 gpuAddress =
|
||||
nv3dGetBindlessTextureConstantBufferGpuAddress(p3dChannel);
|
||||
|
||||
nv3dSelectCbAddress(p3dChannel, gpuAddress, NV3D_CONSTANT_BUFFER_SIZE);
|
||||
nv3dBindCb(p3dChannel, NV3D_HW_BIND_GROUP_FRAGMENT,
|
||||
NV3D_CB_SLOT_BINDLESS_TEXTURE, TRUE);
|
||||
/*
|
||||
* Set up the header in the pushbuffer for the LOAD_CONSTANTS method. The
|
||||
* below loop will write the data to upload directly into the pushbuffer.
|
||||
*/
|
||||
remappedBinding = nv3dLoadConstantsHeader(p3dChannel, 0,
|
||||
numTextureBindings);
|
||||
|
||||
for (slot = 0; slot < numTextureBindings; slot++) {
|
||||
int tex = textureBindingIndices[slot];
|
||||
|
||||
/*
|
||||
* Bindless texture packed pointers. Technically, these consist of
|
||||
* a header at bits 19:0 and a sampler in 32:20, but we don't need
|
||||
* to set a separate header because we enabled
|
||||
* SET_SAMPLER_BINDING_VIA_HEADER_BINDING.
|
||||
*/
|
||||
remappedBinding[slot].u = tex * 2;
|
||||
}
|
||||
}
|
||||
435
src/common/unix/nvidia-3d/src/nvidia-3d-maxwell.c
Normal file
435
src/common/unix/nvidia-3d/src/nvidia-3d-maxwell.c
Normal file
@@ -0,0 +1,435 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvidia-3d-maxwell.h"
|
||||
#include "nvidia-3d-kepler.h"
|
||||
#include "nvidia-3d.h"
|
||||
|
||||
#include "class/clb097.h"
|
||||
#include "class/clb097tex.h"
|
||||
#include <class/cla06fsubch.h>
|
||||
|
||||
void _nv3dInitChannelMaxwell(Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
const Nv3dDeviceRec *p3dDevice = p3dChannel->p3dDevice;
|
||||
|
||||
_nv3dInitChannelKepler(p3dChannel);
|
||||
|
||||
if (p3dDevice->classNumber == MAXWELL_A) {
|
||||
/*
|
||||
* Use Maxwell texture header format.
|
||||
*
|
||||
* maxwell.mfs says:
|
||||
* NOTE: this method is required to be sent in GM10x. It is ignored
|
||||
* and treated as a NOP in GM20x.
|
||||
*
|
||||
* And on later chips, it is removed and causes exceptions. So we only
|
||||
* send this on GM10x (class MAXWELL_A).
|
||||
*/
|
||||
nvPushImmed(p, NVA06F_SUBCHANNEL_3D,
|
||||
NVB097_SET_SELECT_MAXWELL_TEXTURE_HEADERS, TRUE);
|
||||
}
|
||||
}
|
||||
|
||||
void _nv3dAssignNv3dTextureMaxwell(
|
||||
Nv3dRenderTexInfo info,
|
||||
Nv3dTexture *tex)
|
||||
{
|
||||
NvU32 hi_offset = NvU32_LO16(info.offset >> 32);
|
||||
|
||||
nvAssert(!info.error);
|
||||
|
||||
switch (info.sizes) {
|
||||
case NV3D_TEXHEAD_A8B8G8R8:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_A8B8G8R8, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_A2B10G10R10:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_A2B10G10R10, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_B5G6R5:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_B5G6R5, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_A1B5G5R5:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_A1B5G5R5, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_R8:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_R8, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_R32:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_R32, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_R16:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_R16, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_G8R8:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_G8R8, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_R16G16B16A16:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_R16_G16_B16_A16, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_R32G32B32A32:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_R32_G32_B32_A32, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_Y8_VIDEO:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_Y8_VIDEO, tex->head);
|
||||
break;
|
||||
default:
|
||||
nvAssert(!"Unrecognized component sizes");
|
||||
}
|
||||
|
||||
switch (info.dataType) {
|
||||
case NV3D_TEXHEAD_NUM_UNORM:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _R_DATA_TYPE,
|
||||
_NUM_UNORM, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _G_DATA_TYPE,
|
||||
_NUM_UNORM, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _B_DATA_TYPE,
|
||||
_NUM_UNORM, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _A_DATA_TYPE,
|
||||
_NUM_UNORM, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_NUM_UINT:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _R_DATA_TYPE,
|
||||
_NUM_UINT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _G_DATA_TYPE,
|
||||
_NUM_UINT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _B_DATA_TYPE,
|
||||
_NUM_UINT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _A_DATA_TYPE,
|
||||
_NUM_UINT, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_NUM_FLOAT:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _R_DATA_TYPE,
|
||||
_NUM_FLOAT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _G_DATA_TYPE,
|
||||
_NUM_FLOAT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _B_DATA_TYPE,
|
||||
_NUM_FLOAT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _A_DATA_TYPE,
|
||||
_NUM_FLOAT, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_NUM_SNORM:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _R_DATA_TYPE,
|
||||
_NUM_FLOAT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _G_DATA_TYPE,
|
||||
_NUM_FLOAT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _B_DATA_TYPE,
|
||||
_NUM_FLOAT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _A_DATA_TYPE,
|
||||
_NUM_FLOAT, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_NUM_SINT:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _R_DATA_TYPE,
|
||||
_NUM_SINT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _G_DATA_TYPE,
|
||||
_NUM_SINT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _B_DATA_TYPE,
|
||||
_NUM_SINT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _A_DATA_TYPE,
|
||||
_NUM_SINT, tex->head);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (info.source.x) {
|
||||
case NV3D_TEXHEAD_IN_A:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _X_SOURCE,
|
||||
_IN_A, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_R:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _X_SOURCE,
|
||||
_IN_R, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_G:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _X_SOURCE,
|
||||
_IN_G, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_B:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _X_SOURCE,
|
||||
_IN_B, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ZERO:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _X_SOURCE,
|
||||
_IN_ZERO, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ONE_FLOAT:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _X_SOURCE,
|
||||
_IN_ONE_FLOAT, tex->head);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (info.source.y) {
|
||||
case NV3D_TEXHEAD_IN_A:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Y_SOURCE,
|
||||
_IN_A, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_R:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Y_SOURCE,
|
||||
_IN_R, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_G:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Y_SOURCE,
|
||||
_IN_G, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_B:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Y_SOURCE,
|
||||
_IN_B, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ZERO:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Y_SOURCE,
|
||||
_IN_ZERO, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ONE_FLOAT:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Y_SOURCE,
|
||||
_IN_ONE_FLOAT, tex->head);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (info.source.z) {
|
||||
case NV3D_TEXHEAD_IN_A:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Z_SOURCE,
|
||||
_IN_A, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_R:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Z_SOURCE,
|
||||
_IN_R, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_G:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Z_SOURCE,
|
||||
_IN_G, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_B:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Z_SOURCE,
|
||||
_IN_B, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ZERO:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Z_SOURCE,
|
||||
_IN_ZERO, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ONE_FLOAT:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _Z_SOURCE,
|
||||
_IN_ONE_FLOAT, tex->head);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (info.source.w) {
|
||||
case NV3D_TEXHEAD_IN_A:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _W_SOURCE,
|
||||
_IN_A, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_R:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _W_SOURCE,
|
||||
_IN_R, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_G:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _W_SOURCE,
|
||||
_IN_G, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_B:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _W_SOURCE,
|
||||
_IN_B, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ZERO:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _W_SOURCE,
|
||||
_IN_ZERO, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ONE_FLOAT:
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _W_SOURCE,
|
||||
_IN_ONE_FLOAT, tex->head);
|
||||
break;
|
||||
}
|
||||
|
||||
// Default to edge clamping. Our GPU seems to support wrapping
|
||||
// even with non-normalized coordinates.
|
||||
tex->samp[0] =
|
||||
NV3D_C(B097, TEXSAMP0, ADDRESS_U, CLAMP_TO_EDGE) |
|
||||
NV3D_C(B097, TEXSAMP0, ADDRESS_V, CLAMP_TO_EDGE) |
|
||||
NV3D_C(B097, TEXSAMP0, ADDRESS_P, CLAMP_TO_EDGE);
|
||||
|
||||
if (info.texType == NV3D_TEX_TYPE_ONE_D_BUFFER) {
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_1D, _ADDRESS_BITS31TO0,
|
||||
NvU64_LO32(info.offset), tex->head);
|
||||
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_1D, _HEADER_VERSION,
|
||||
_SELECT_ONE_D_BUFFER, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(B097_, TEXHEAD_1D_, ADDRESS_BITS47TO32,
|
||||
hi_offset, tex->head);
|
||||
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_1D, _WIDTH_MINUS_ONE_BITS31TO16,
|
||||
NvU32_HI16(info.width - 1), tex->head);
|
||||
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_1D, _TEXTURE_TYPE,
|
||||
_ONE_D_BUFFER, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_1D, _WIDTH_MINUS_ONE_BITS15TO0,
|
||||
NvU32_LO16(info.width - 1), tex->head);
|
||||
} else if (info.texType == NV3D_TEX_TYPE_TWO_D_PITCH) {
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_PITCH, _ADDRESS_BITS31TO5,
|
||||
(NvU32)((info.offset >> 5) & 0x7ffffff), tex->head);
|
||||
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_PITCH, _HEADER_VERSION,
|
||||
_SELECT_PITCH, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_PITCH, _ADDRESS_BITS47TO32,
|
||||
hi_offset, tex->head);
|
||||
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_PITCH, _PITCH_BITS20TO5,
|
||||
NvU32_LO16(info.pitch >> 5), tex->head);
|
||||
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_PITCH, _TEXTURE_TYPE,
|
||||
_TWO_D_NO_MIPMAP, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_PITCH, _WIDTH_MINUS_ONE,
|
||||
info.width - 1, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_PITCH, _BORDER_SIZE,
|
||||
_BORDER_SAMPLER_COLOR, tex->head);
|
||||
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_PITCH, _HEIGHT_MINUS_ONE,
|
||||
info.height - 1, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_PITCH, _DEPTH_MINUS_ONE,
|
||||
0, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_PITCH, _NORMALIZED_COORDS,
|
||||
info.normalizedCoords, tex->head);
|
||||
} else {
|
||||
if (info.texType == NV3D_TEX_TYPE_ONE_D) {
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _TEXTURE_TYPE,
|
||||
_ONE_D, tex->head);
|
||||
} else if (info.texType == NV3D_TEX_TYPE_TWO_D_BLOCKLINEAR) {
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _TEXTURE_TYPE,
|
||||
_TWO_D_NO_MIPMAP, tex->head);
|
||||
}
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _ADDRESS_BITS31TO9,
|
||||
(info.offset >> 9) & 0x7fffff, tex->head);
|
||||
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _HEADER_VERSION,
|
||||
_SELECT_BLOCKLINEAR, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _ADDRESS_BITS47TO32,
|
||||
hi_offset, tex->head);
|
||||
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _GOBS_PER_BLOCK_WIDTH,
|
||||
info.log2GobsPerBlock.x, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _GOBS_PER_BLOCK_HEIGHT,
|
||||
info.log2GobsPerBlock.y, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _GOBS_PER_BLOCK_DEPTH,
|
||||
info.log2GobsPerBlock.z, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _WIDTH_MINUS_ONE,
|
||||
info.width - 1, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _BORDER_SIZE,
|
||||
_BORDER_SAMPLER_COLOR, tex->head);
|
||||
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _HEIGHT_MINUS_ONE,
|
||||
info.height - 1, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _DEPTH_MINUS_ONE,
|
||||
0, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(B097, _TEXHEAD_BL, _NORMALIZED_COORDS,
|
||||
info.normalizedCoords, tex->head);
|
||||
}
|
||||
|
||||
switch (info.repeatType) {
|
||||
case NV3D_TEXHEAD_REPEAT_TYPE_NORMAL:
|
||||
tex->samp[0] = NV3D_C(B097, TEXSAMP0, ADDRESS_U, WRAP) |
|
||||
NV3D_C(B097, TEXSAMP0, ADDRESS_V, WRAP);
|
||||
break;
|
||||
case NV3D_TEXHEAD_REPEAT_TYPE_PAD:
|
||||
tex->samp[0] = NV3D_C(B097, TEXSAMP0, ADDRESS_U, CLAMP_TO_EDGE) |
|
||||
NV3D_C(B097, TEXSAMP0, ADDRESS_V, CLAMP_TO_EDGE);
|
||||
break;
|
||||
case NV3D_TEXHEAD_REPEAT_TYPE_REFLECT:
|
||||
tex->samp[0] = NV3D_C(B097, TEXSAMP0, ADDRESS_U, MIRROR) |
|
||||
NV3D_C(B097, TEXSAMP0, ADDRESS_V, MIRROR);
|
||||
break;
|
||||
case NV3D_TEXHEAD_REPEAT_TYPE_NONE:
|
||||
tex->samp[0] = NV3D_C(B097, TEXSAMP0, ADDRESS_U, BORDER) |
|
||||
NV3D_C(B097, TEXSAMP0, ADDRESS_V, BORDER);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (info.filtering) {
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_NEAREST:
|
||||
tex->samp[1] = NV3D_C(B097, TEXSAMP1, MAG_FILTER, MAG_POINT) |
|
||||
NV3D_C(B097, TEXSAMP1, MIN_FILTER, MIN_POINT) |
|
||||
NV3D_C(B097, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
break;
|
||||
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_LINEAR:
|
||||
tex->samp[1] = NV3D_C(B097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
|
||||
NV3D_C(B097, TEXSAMP1, MIN_FILTER, MIN_LINEAR) |
|
||||
NV3D_C(B097, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
break;
|
||||
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_ANISO_2X:
|
||||
tex->samp[0] |= NV3D_C(B097, TEXSAMP0, MAX_ANISOTROPY, ANISO_2_TO_1);
|
||||
tex->samp[1] = NV3D_C(B097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
|
||||
NV3D_C(B097, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
|
||||
NV3D_C(B097, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _MAX_ANISOTROPY,
|
||||
_ANISO_2_TO_1, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _ANISO_FINE_SPREAD_MODIFIER,
|
||||
_SPREAD_MODIFIER_CONST_TWO, tex->head);
|
||||
|
||||
break;
|
||||
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_ANISO_4X:
|
||||
tex->samp[0] |= NV3D_C(B097, TEXSAMP0, MAX_ANISOTROPY, ANISO_4_TO_1);
|
||||
tex->samp[1] = NV3D_C(B097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
|
||||
NV3D_C(B097, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
|
||||
NV3D_C(B097, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _MAX_ANISOTROPY,
|
||||
_ANISO_4_TO_1, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _ANISO_FINE_SPREAD_MODIFIER,
|
||||
_SPREAD_MODIFIER_CONST_TWO, tex->head);
|
||||
break;
|
||||
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_ANISO_8X:
|
||||
tex->samp[0] |= NV3D_C(B097, TEXSAMP0, MAX_ANISOTROPY, ANISO_8_TO_1);
|
||||
tex->samp[1] = NV3D_C(B097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
|
||||
NV3D_C(B097, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
|
||||
NV3D_C(B097, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _MAX_ANISOTROPY,
|
||||
_ANISO_8_TO_1, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _ANISO_FINE_SPREAD_MODIFIER,
|
||||
_SPREAD_MODIFIER_CONST_TWO, tex->head);
|
||||
|
||||
break;
|
||||
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_ANISO_16X:
|
||||
tex->samp[0] |= NV3D_C(B097, TEXSAMP0, MAX_ANISOTROPY, ANISO_16_TO_1);
|
||||
tex->samp[1] = NV3D_C(B097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
|
||||
NV3D_C(B097, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
|
||||
NV3D_C(B097, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _MAX_ANISOTROPY,
|
||||
_ANISO_16_TO_1, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(B097, _TEXHEAD_BL, _ANISO_FINE_SPREAD_MODIFIER,
|
||||
_SPREAD_MODIFIER_CONST_TWO, tex->head);
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
431
src/common/unix/nvidia-3d/src/nvidia-3d-pascal.c
Normal file
431
src/common/unix/nvidia-3d/src/nvidia-3d-pascal.c
Normal file
@@ -0,0 +1,431 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvidia-3d-pascal.h"
|
||||
#include "nvidia-3d-maxwell.h"
|
||||
#include "nvidia-3d.h"
|
||||
|
||||
#include "class/clc197.h" /* NVC197_SET_GO_IDLE_TIMEOUT */
|
||||
#include "class/clc097tex.h"
|
||||
#include <class/cla06fsubch.h>
|
||||
|
||||
void _nv3dInitChannelPascal(Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
_nv3dInitChannelMaxwell(p3dChannel);
|
||||
|
||||
if (!p3dChannel->hasFrameBoundaries) {
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NVC197_SET_GO_IDLE_TIMEOUT, 1);
|
||||
nvPushSetMethodData(p, 0x800);
|
||||
}
|
||||
}
|
||||
|
||||
void _nv3dAssignNv3dTexturePascal(
|
||||
Nv3dRenderTexInfo info,
|
||||
Nv3dTexture *tex)
|
||||
{
|
||||
nvAssert(!info.error);
|
||||
|
||||
switch (info.sizes) {
|
||||
case NV3D_TEXHEAD_A8B8G8R8:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_A8B8G8R8, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_A2B10G10R10:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_A2B10G10R10, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_B5G6R5:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_B5G6R5, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_A1B5G5R5:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_A1B5G5R5, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_R8:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_R8, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_R32:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_R32, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_R16:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_R16, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_G8R8:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_G8R8, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_R16G16B16A16:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_R16_G16_B16_A16, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_R32G32B32A32:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_R32_G32_B32_A32, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_Y8_VIDEO:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _COMPONENTS,
|
||||
_SIZES_Y8_VIDEO, tex->head);
|
||||
break;
|
||||
default:
|
||||
nvAssert(!"Unrecognized component sizes");
|
||||
}
|
||||
|
||||
switch (info.dataType) {
|
||||
case NV3D_TEXHEAD_NUM_UNORM:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _R_DATA_TYPE,
|
||||
_NUM_UNORM, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _G_DATA_TYPE,
|
||||
_NUM_UNORM, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _B_DATA_TYPE,
|
||||
_NUM_UNORM, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _A_DATA_TYPE,
|
||||
_NUM_UNORM, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_NUM_UINT:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _R_DATA_TYPE,
|
||||
_NUM_UINT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _G_DATA_TYPE,
|
||||
_NUM_UINT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _B_DATA_TYPE,
|
||||
_NUM_UINT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _A_DATA_TYPE,
|
||||
_NUM_UINT, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_NUM_FLOAT:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _R_DATA_TYPE,
|
||||
_NUM_FLOAT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _G_DATA_TYPE,
|
||||
_NUM_FLOAT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _B_DATA_TYPE,
|
||||
_NUM_FLOAT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _A_DATA_TYPE,
|
||||
_NUM_FLOAT, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_NUM_SNORM:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _R_DATA_TYPE,
|
||||
_NUM_FLOAT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _G_DATA_TYPE,
|
||||
_NUM_FLOAT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _B_DATA_TYPE,
|
||||
_NUM_FLOAT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _A_DATA_TYPE,
|
||||
_NUM_FLOAT, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_NUM_SINT:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _R_DATA_TYPE,
|
||||
_NUM_SINT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _G_DATA_TYPE,
|
||||
_NUM_SINT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _B_DATA_TYPE,
|
||||
_NUM_SINT, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _A_DATA_TYPE,
|
||||
_NUM_SINT, tex->head);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (info.source.x) {
|
||||
case NV3D_TEXHEAD_IN_A:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _X_SOURCE,
|
||||
_IN_A, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_R:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _X_SOURCE,
|
||||
_IN_R, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_G:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _X_SOURCE,
|
||||
_IN_G, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_B:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _X_SOURCE,
|
||||
_IN_B, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ZERO:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _X_SOURCE,
|
||||
_IN_ZERO, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ONE_FLOAT:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _X_SOURCE,
|
||||
_IN_ONE_FLOAT, tex->head);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (info.source.y) {
|
||||
case NV3D_TEXHEAD_IN_A:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Y_SOURCE,
|
||||
_IN_A, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_R:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Y_SOURCE,
|
||||
_IN_R, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_G:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Y_SOURCE,
|
||||
_IN_G, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_B:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Y_SOURCE,
|
||||
_IN_B, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ZERO:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Y_SOURCE,
|
||||
_IN_ZERO, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ONE_FLOAT:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Y_SOURCE,
|
||||
_IN_ONE_FLOAT, tex->head);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (info.source.z) {
|
||||
case NV3D_TEXHEAD_IN_A:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Z_SOURCE,
|
||||
_IN_A, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_R:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Z_SOURCE,
|
||||
_IN_R, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_G:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Z_SOURCE,
|
||||
_IN_G, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_B:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Z_SOURCE,
|
||||
_IN_B, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ZERO:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Z_SOURCE,
|
||||
_IN_ZERO, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ONE_FLOAT:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _Z_SOURCE,
|
||||
_IN_ONE_FLOAT, tex->head);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (info.source.w) {
|
||||
case NV3D_TEXHEAD_IN_A:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _W_SOURCE,
|
||||
_IN_A, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_R:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _W_SOURCE,
|
||||
_IN_R, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_G:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _W_SOURCE,
|
||||
_IN_G, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_B:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _W_SOURCE,
|
||||
_IN_B, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ZERO:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _W_SOURCE,
|
||||
_IN_ZERO, tex->head);
|
||||
break;
|
||||
case NV3D_TEXHEAD_IN_ONE_FLOAT:
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _W_SOURCE,
|
||||
_IN_ONE_FLOAT, tex->head);
|
||||
break;
|
||||
}
|
||||
|
||||
// Default to edge clamping. Our GPU seems to support wrapping
|
||||
// even with non-normalized coordinates.
|
||||
tex->samp[0] =
|
||||
NV3D_C(C097, TEXSAMP0, ADDRESS_U, CLAMP_TO_EDGE) |
|
||||
NV3D_C(C097, TEXSAMP0, ADDRESS_V, CLAMP_TO_EDGE) |
|
||||
NV3D_C(C097, TEXSAMP0, ADDRESS_P, CLAMP_TO_EDGE);
|
||||
|
||||
if (info.texType == NV3D_TEX_TYPE_ONE_D_BUFFER) {
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_1D, _ADDRESS_BITS31TO0,
|
||||
NvU64_LO32(info.offset), tex->head);
|
||||
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_1D, _HEADER_VERSION,
|
||||
_SELECT_ONE_D_BUFFER, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(C097_, TEXHEAD_1D_, ADDRESS_BITS48TO32,
|
||||
NvU64_HI32(info.offset), tex->head);
|
||||
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_1D, _WIDTH_MINUS_ONE_BITS31TO16,
|
||||
NvU32_HI16(info.width - 1), tex->head);
|
||||
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_1D, _TEXTURE_TYPE,
|
||||
_ONE_D_BUFFER, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_1D, _WIDTH_MINUS_ONE_BITS15TO0,
|
||||
NvU32_LO16(info.width - 1), tex->head);
|
||||
} else if (info.texType == NV3D_TEX_TYPE_TWO_D_PITCH) {
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _ADDRESS_BITS31TO5,
|
||||
info.offset >> 5, tex->head);
|
||||
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_PITCH, _HEADER_VERSION,
|
||||
_SELECT_PITCH, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _ADDRESS_BITS48TO32,
|
||||
NvU64_HI32(info.offset), tex->head);
|
||||
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _PITCH_BITS20TO5,
|
||||
NvU32_LO16(info.pitch >> 5), tex->head);
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _PITCH_BIT21,
|
||||
info.pitch >> 21, tex->head);
|
||||
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_PITCH, _TEXTURE_TYPE,
|
||||
_TWO_D_NO_MIPMAP, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _WIDTH_MINUS_ONE,
|
||||
info.width - 1, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _HEIGHT_MINUS_ONE_BIT16,
|
||||
(info.height - 1) >> 16, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_PITCH, _BORDER_SIZE,
|
||||
_BORDER_SAMPLER_COLOR, tex->head);
|
||||
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _HEIGHT_MINUS_ONE,
|
||||
info.height - 1, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _DEPTH_MINUS_ONE,
|
||||
0, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_PITCH, _NORMALIZED_COORDS,
|
||||
info.normalizedCoords, tex->head);
|
||||
} else {
|
||||
if (info.texType == NV3D_TEX_TYPE_ONE_D) {
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _TEXTURE_TYPE,
|
||||
_ONE_D, tex->head);
|
||||
} else if (info.texType == NV3D_TEX_TYPE_TWO_D_BLOCKLINEAR) {
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _TEXTURE_TYPE,
|
||||
_TWO_D_NO_MIPMAP, tex->head);
|
||||
}
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _ADDRESS_BITS31TO9,
|
||||
info.offset >> 9, tex->head);
|
||||
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _HEADER_VERSION,
|
||||
_SELECT_BLOCKLINEAR, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _ADDRESS_BITS48TO32,
|
||||
NvU64_HI32(info.offset), tex->head);
|
||||
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _GOBS_PER_BLOCK_WIDTH,
|
||||
info.log2GobsPerBlock.x, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _GOBS_PER_BLOCK_HEIGHT,
|
||||
info.log2GobsPerBlock.y, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _GOBS_PER_BLOCK_DEPTH,
|
||||
info.log2GobsPerBlock.z, tex->head);
|
||||
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _WIDTH_MINUS_ONE,
|
||||
info.width - 1, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _BORDER_SIZE,
|
||||
_BORDER_SAMPLER_COLOR, tex->head);
|
||||
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _HEIGHT_MINUS_ONE,
|
||||
info.height - 1, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _HEIGHT_MINUS_ONE_BIT16,
|
||||
(info.height - 1) >> 16, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _DEPTH_MINUS_ONE,
|
||||
0, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _DEPTH_MINUS_ONE_BIT14,
|
||||
0, tex->head);
|
||||
FLD_SET_DRF_NUM_MW(C097, _TEXHEAD_BL, _NORMALIZED_COORDS,
|
||||
info.normalizedCoords, tex->head);
|
||||
}
|
||||
|
||||
switch (info.repeatType) {
|
||||
case NV3D_TEXHEAD_REPEAT_TYPE_NORMAL:
|
||||
tex->samp[0] = NV3D_C(C097, TEXSAMP0, ADDRESS_U, WRAP) |
|
||||
NV3D_C(C097, TEXSAMP0, ADDRESS_V, WRAP);
|
||||
break;
|
||||
case NV3D_TEXHEAD_REPEAT_TYPE_PAD:
|
||||
tex->samp[0] = NV3D_C(C097, TEXSAMP0, ADDRESS_U, CLAMP_TO_EDGE) |
|
||||
NV3D_C(C097, TEXSAMP0, ADDRESS_V, CLAMP_TO_EDGE);
|
||||
break;
|
||||
case NV3D_TEXHEAD_REPEAT_TYPE_REFLECT:
|
||||
tex->samp[0] = NV3D_C(C097, TEXSAMP0, ADDRESS_U, MIRROR) |
|
||||
NV3D_C(C097, TEXSAMP0, ADDRESS_V, MIRROR);
|
||||
break;
|
||||
case NV3D_TEXHEAD_REPEAT_TYPE_NONE:
|
||||
tex->samp[0] = NV3D_C(C097, TEXSAMP0, ADDRESS_U, BORDER) |
|
||||
NV3D_C(C097, TEXSAMP0, ADDRESS_V, BORDER);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (info.filtering) {
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_NEAREST:
|
||||
tex->samp[1] = NV3D_C(C097, TEXSAMP1, MAG_FILTER, MAG_POINT) |
|
||||
NV3D_C(C097, TEXSAMP1, MIN_FILTER, MIN_POINT) |
|
||||
NV3D_C(C097, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
break;
|
||||
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_LINEAR:
|
||||
tex->samp[1] = NV3D_C(C097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
|
||||
NV3D_C(C097, TEXSAMP1, MIN_FILTER, MIN_LINEAR) |
|
||||
NV3D_C(C097, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
break;
|
||||
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_ANISO_2X:
|
||||
tex->samp[0] |= NV3D_C(C097, TEXSAMP0, MAX_ANISOTROPY, ANISO_2_TO_1);
|
||||
tex->samp[1] = NV3D_C(C097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
|
||||
NV3D_C(C097, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
|
||||
NV3D_C(C097, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _MAX_ANISOTROPY,
|
||||
_ANISO_2_TO_1, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _ANISO_FINE_SPREAD_MODIFIER,
|
||||
_SPREAD_MODIFIER_CONST_TWO, tex->head);
|
||||
|
||||
break;
|
||||
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_ANISO_4X:
|
||||
tex->samp[0] |= NV3D_C(C097, TEXSAMP0, MAX_ANISOTROPY, ANISO_4_TO_1);
|
||||
tex->samp[1] = NV3D_C(C097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
|
||||
NV3D_C(C097, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
|
||||
NV3D_C(C097, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _MAX_ANISOTROPY,
|
||||
_ANISO_4_TO_1, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _ANISO_FINE_SPREAD_MODIFIER,
|
||||
_SPREAD_MODIFIER_CONST_TWO, tex->head);
|
||||
break;
|
||||
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_ANISO_8X:
|
||||
tex->samp[0] |= NV3D_C(C097, TEXSAMP0, MAX_ANISOTROPY, ANISO_8_TO_1);
|
||||
tex->samp[1] = NV3D_C(C097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
|
||||
NV3D_C(C097, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
|
||||
NV3D_C(C097, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _MAX_ANISOTROPY,
|
||||
_ANISO_8_TO_1, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _ANISO_FINE_SPREAD_MODIFIER,
|
||||
_SPREAD_MODIFIER_CONST_TWO, tex->head);
|
||||
|
||||
break;
|
||||
|
||||
case NV3D_TEXHEAD_FILTER_TYPE_ANISO_16X:
|
||||
tex->samp[0] |= NV3D_C(C097, TEXSAMP0, MAX_ANISOTROPY, ANISO_16_TO_1);
|
||||
tex->samp[1] = NV3D_C(C097, TEXSAMP1, MAG_FILTER, MAG_LINEAR) |
|
||||
NV3D_C(C097, TEXSAMP1, MIN_FILTER, MIN_ANISO) |
|
||||
NV3D_C(C097, TEXSAMP1, MIP_FILTER, MIP_NONE);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _MAX_ANISOTROPY,
|
||||
_ANISO_16_TO_1, tex->head);
|
||||
FLD_SET_DRF_DEF_MW(C097, _TEXHEAD_BL, _ANISO_FINE_SPREAD_MODIFIER,
|
||||
_SPREAD_MODIFIER_CONST_TWO, tex->head);
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
291
src/common/unix/nvidia-3d/src/nvidia-3d-surface.c
Normal file
291
src/common/unix/nvidia-3d/src/nvidia-3d-surface.c
Normal file
@@ -0,0 +1,291 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvidia-3d.h"
|
||||
#include "nvidia-3d-surface.h"
|
||||
#include "nvidia-push-utils.h" /* nvPushIsAmodel() */
|
||||
|
||||
#include <nvos.h>
|
||||
|
||||
static void FreeSurface(
|
||||
Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
|
||||
int sd;
|
||||
|
||||
for (sd = ARRAY_LEN(pPushDevice->subDevice) - 1;
|
||||
sd >= 0;
|
||||
sd--) {
|
||||
if (p3dChannel->surface.handle[sd]) {
|
||||
NvU32 ret = pPushDevice->pImports->rmApiFree(
|
||||
pPushDevice,
|
||||
pPushDevice->subDevice[sd].deviceHandle,
|
||||
p3dChannel->surface.handle[sd]);
|
||||
nvAssert(ret == NVOS_STATUS_SUCCESS);
|
||||
(void)ret;
|
||||
p3dChannel->surface.handle[sd] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static NvBool AllocSurface(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvU64 size)
|
||||
{
|
||||
NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
|
||||
const NvPushImports *pImports = pPushDevice->pImports;
|
||||
int sd;
|
||||
|
||||
for (sd = 0;
|
||||
sd < ARRAY_LEN(pPushDevice->subDevice) &&
|
||||
pPushDevice->subDevice[sd].deviceHandle != 0;
|
||||
sd++) {
|
||||
|
||||
NVOS32_PARAMETERS params = {
|
||||
.hRoot = pPushDevice->clientHandle,
|
||||
.hObjectParent = pPushDevice->subDevice[sd].deviceHandle,
|
||||
.function = NVOS32_FUNCTION_ALLOC_SIZE,
|
||||
.data.AllocSize.owner = pPushDevice->clientHandle,
|
||||
.data.AllocSize.type = NVOS32_TYPE_SHADER_PROGRAM,
|
||||
.data.AllocSize.size = size,
|
||||
.data.AllocSize.attr =
|
||||
DRF_DEF(OS32, _ATTR, _LOCATION, _VIDMEM) |
|
||||
DRF_DEF(OS32, _ATTR, _PHYSICALITY, _ALLOW_NONCONTIGUOUS) |
|
||||
DRF_DEF(OS32, _ATTR, _COHERENCY, _WRITE_COMBINE),
|
||||
.data.AllocSize.attr2 =
|
||||
DRF_DEF(OS32, _ATTR2, _GPU_CACHEABLE, _YES),
|
||||
.data.AllocSize.flags = 0,
|
||||
.data.AllocSize.alignment = 4096,
|
||||
};
|
||||
|
||||
NvU32 ret = pImports->rmApiVidHeapControl(pPushDevice, ¶ms);
|
||||
|
||||
if (ret != NVOS_STATUS_SUCCESS) {
|
||||
FreeSurface(p3dChannel);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
p3dChannel->surface.handle[sd] = params.data.AllocSize.hMemory;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static void UnmapSurface(
|
||||
const Nv3dChannelRec *p3dChannel,
|
||||
NvU64 gpuAddress)
|
||||
{
|
||||
NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
|
||||
const NvPushImports *pImports = pPushDevice->pImports;
|
||||
int sd;
|
||||
|
||||
for (sd = ARRAY_LEN(p3dChannel->surface.handle) - 1; sd >= 0; sd--) {
|
||||
if (p3dChannel->surface.handle[sd]) {
|
||||
NvU32 ret = pImports->rmApiUnmapMemoryDma(
|
||||
pPushDevice,
|
||||
pPushDevice->subDevice[sd].deviceHandle,
|
||||
pPushDevice->subDevice[sd].gpuVASpaceCtxDma,
|
||||
p3dChannel->surface.handle[sd],
|
||||
0,
|
||||
gpuAddress);
|
||||
nvAssert(ret == NVOS_STATUS_SUCCESS);
|
||||
(void)ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static NvU64 MapSurface(
|
||||
const Nv3dChannelRec *p3dChannel,
|
||||
NvU64 size)
|
||||
{
|
||||
NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
|
||||
const NvPushImports *pImports = pPushDevice->pImports;
|
||||
NvU64 gpuAddress = 0;
|
||||
int sd;
|
||||
|
||||
for (sd = 0;
|
||||
sd < ARRAY_LEN(p3dChannel->surface.handle) &&
|
||||
p3dChannel->surface.handle[sd] != 0;
|
||||
sd++) {
|
||||
NvU32 flags = DRF_DEF(OS46, _FLAGS, _CACHE_SNOOP, _ENABLE);
|
||||
NvU64 thisGpuAddress;
|
||||
|
||||
if (sd == 0) {
|
||||
/* For the first device, RM assigns a virtual address. */
|
||||
thisGpuAddress = 0;
|
||||
} else {
|
||||
/* For subsequent devices, use the same virtual address. */
|
||||
flags = FLD_SET_DRF(OS46, _FLAGS, _DMA_OFFSET_FIXED, _TRUE, flags);
|
||||
nvAssert(gpuAddress != 0);
|
||||
thisGpuAddress = gpuAddress;
|
||||
}
|
||||
|
||||
NvU32 ret = pImports->rmApiMapMemoryDma(pPushDevice,
|
||||
pPushDevice->subDevice[sd].deviceHandle,
|
||||
pPushDevice->subDevice[sd].gpuVASpaceCtxDma,
|
||||
p3dChannel->surface.handle[sd],
|
||||
0,
|
||||
size,
|
||||
flags,
|
||||
&thisGpuAddress);
|
||||
if (ret != NVOS_STATUS_SUCCESS) {
|
||||
if (sd != 0) {
|
||||
/* Clean up earlier successful mappings */
|
||||
UnmapSurface(p3dChannel, gpuAddress);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (sd == 0) {
|
||||
gpuAddress = thisGpuAddress;
|
||||
} else {
|
||||
nvAssert(gpuAddress == thisGpuAddress);
|
||||
}
|
||||
}
|
||||
|
||||
return gpuAddress;
|
||||
}
|
||||
|
||||
NvBool nv3dAllocChannelSurface(Nv3dChannelPtr p3dChannel)
|
||||
{
|
||||
const NvU64 size = p3dChannel->surface.totalSize;
|
||||
NvU64 gpuAddress;
|
||||
|
||||
if (!AllocSurface(p3dChannel, size)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
gpuAddress = MapSurface(p3dChannel, size);
|
||||
|
||||
if (gpuAddress == 0) {
|
||||
FreeSurface(p3dChannel);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
p3dChannel->surface.gpuAddress = gpuAddress;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
void nv3dFreeChannelSurface(Nv3dChannelPtr p3dChannel)
|
||||
{
|
||||
|
||||
if (p3dChannel->surface.gpuAddress != 0) {
|
||||
/*
|
||||
* If the surface is mapped into our channel, we need to ensure
|
||||
* that any methods in the channel that might reference the
|
||||
* gpuAddress have idled before we unmap the address.
|
||||
*/
|
||||
nvPushIdleChannel(p3dChannel->pPushChannel);
|
||||
|
||||
UnmapSurface(p3dChannel,
|
||||
p3dChannel->surface.gpuAddress);
|
||||
p3dChannel->surface.gpuAddress = 0;
|
||||
}
|
||||
|
||||
FreeSurface(p3dChannel);
|
||||
}
|
||||
|
||||
/*
|
||||
* The Nv3dChannelRec's surface contains:
|
||||
*
|
||||
* programLocalMemory
|
||||
* programCode
|
||||
* programConstants
|
||||
* Nv3dTexture[numTextures]
|
||||
* bindlessTextureConstantBuffer (optionally)
|
||||
* Nv3dConstantBuffer[numConstantBuffers]
|
||||
* vertexStreams
|
||||
*
|
||||
* Where all items are aligned to NV3D_TEXTURE_PITCH_ALIGNMENT.
|
||||
*
|
||||
* Compute all the offsets into the surface, and the total surface
|
||||
* size.
|
||||
*
|
||||
* XXX TODO: use correct alignment for all items, rather than
|
||||
* NV3D_TEXTURE_PITCH_ALIGNMENT.
|
||||
*/
|
||||
void _nv3dAssignSurfaceOffsets(
|
||||
const Nv3dAllocChannelStateParams *pParams,
|
||||
Nv3dChannelPtr p3dChannel)
|
||||
{
|
||||
const NvU32 programPrefetchPadding = 2048;
|
||||
|
||||
NvU64 offset = 0;
|
||||
enum Nv3dVertexAttributeStreamType stream;
|
||||
|
||||
/*
|
||||
* Program local memory requires at least 4k alignment. So, place
|
||||
* it at the start of the surface.
|
||||
*/
|
||||
p3dChannel->surface.programLocalMemoryOffset = offset;
|
||||
|
||||
offset += p3dChannel->programLocalMemorySize;
|
||||
offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
|
||||
|
||||
p3dChannel->surface.programOffset = offset;
|
||||
|
||||
offset += p3dChannel->programs.code.decompressedSize;
|
||||
offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
|
||||
|
||||
p3dChannel->surface.programConstantsOffset = offset;
|
||||
|
||||
offset += p3dChannel->programs.constants.size;
|
||||
offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
|
||||
|
||||
p3dChannel->surface.textureOffset = offset;
|
||||
|
||||
offset += (sizeof(Nv3dTexture) * pParams->numTextures);
|
||||
offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
|
||||
|
||||
p3dChannel->surface.bindlessTextureConstantBufferOffset = offset;
|
||||
offset += NV3D_CONSTANT_BUFFER_SIZE;
|
||||
offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
|
||||
|
||||
p3dChannel->surface.constantBufferOffset = offset;
|
||||
|
||||
offset += (NV3D_CONSTANT_BUFFER_SIZE * pParams->numConstantBuffers);
|
||||
offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
|
||||
|
||||
/*
|
||||
* TODO: not all nvidia-3d host drivers will require the vertex stream
|
||||
* memory; maybe host drivers should opt in?
|
||||
*/
|
||||
for (stream = NV3D_VERTEX_ATTRIBUTE_STREAM_FIRST;
|
||||
stream < NV3D_VERTEX_ATTRIBUTE_STREAM_COUNT;
|
||||
stream++) {
|
||||
|
||||
p3dChannel->surface.vertexStreamOffset[stream] = offset;
|
||||
|
||||
offset += NV3D_VERTEX_ATTRIBUTE_STREAM_SIZE;
|
||||
offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure the total surface size is large enough to cover any
|
||||
* potential prefetch region.
|
||||
*/
|
||||
p3dChannel->surface.totalSize =
|
||||
NV_MAX(p3dChannel->surface.programOffset + programPrefetchPadding,
|
||||
offset);
|
||||
}
|
||||
56
src/common/unix/nvidia-3d/src/nvidia-3d-turing.c
Normal file
56
src/common/unix/nvidia-3d/src/nvidia-3d-turing.c
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvidia-3d-turing.h"
|
||||
#include "nvidia-3d-pascal.h"
|
||||
#include "nvidia-3d.h"
|
||||
|
||||
#include "class/clc597.h"
|
||||
#include <class/cla06fsubch.h>
|
||||
|
||||
void _nv3dInitChannelTuring(Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
_nv3dInitChannelPascal(p3dChannel);
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NVC597_SET_SPH_VERSION, 2);
|
||||
nvPushSetMethodData(p,
|
||||
NV3D_V(C597, SET_SPH_VERSION, CURRENT, 4) |
|
||||
NV3D_V(C597, SET_SPH_VERSION, OLDEST_SUPPORTED, 4));
|
||||
nvPushSetMethodData(p,
|
||||
NV3D_V(C597, CHECK_SPH_VERSION, CURRENT, 4) |
|
||||
NV3D_V(C597, CHECK_SPH_VERSION, OLDEST_SUPPORTED, 4));
|
||||
}
|
||||
|
||||
void _nv3dSetVertexStreamEndTuring(
|
||||
Nv3dChannelPtr p3dChannel,
|
||||
enum Nv3dVertexAttributeStreamType stream,
|
||||
const Nv3dVertexAttributeStreamRec *pStream)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NVC597_SET_VERTEX_STREAM_SIZE_A(stream), 2);
|
||||
nvPushSetMethodDataU64(p, pStream->end - pStream->current);
|
||||
}
|
||||
531
src/common/unix/nvidia-3d/src/nvidia-3d-vertex-arrays.c
Normal file
531
src/common/unix/nvidia-3d/src/nvidia-3d-vertex-arrays.c
Normal file
@@ -0,0 +1,531 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvidia-3d.h"
|
||||
#include "nvidia-3d-vertex-arrays.h"
|
||||
#include "nvidia-3d-types-priv.h"
|
||||
#include "nvidia-3d-constant-buffers.h"
|
||||
#include "nvidia-3d-utils.h"
|
||||
|
||||
#include <class/cl9097.h>
|
||||
#include <class/cla06fsubch.h>
|
||||
|
||||
static void InitializeStreamFromSurf(
|
||||
const Nv3dStreamSurfaceRec *pSurf,
|
||||
Nv3dVertexAttributeStreamRec *pStream)
|
||||
{
|
||||
pStream->current = pSurf->gpuAddress;
|
||||
pStream->end = pSurf->gpuAddress + pSurf->size;
|
||||
pStream->stride = 0;
|
||||
pStream->nextLaunch = 0;
|
||||
}
|
||||
|
||||
static void InitializeStream(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
enum Nv3dVertexAttributeStreamType stream,
|
||||
Nv3dVertexAttributeStreamRec *pStream)
|
||||
{
|
||||
const Nv3dStreamSurfaceRec tmpSurf = {
|
||||
.gpuAddress =
|
||||
nv3dGetVertexAttributestreamGpuAddress(p3dChannel, stream),
|
||||
.size = NV3D_VERTEX_ATTRIBUTE_STREAM_SIZE,
|
||||
};
|
||||
InitializeStreamFromSurf(&tmpSurf, pStream);
|
||||
}
|
||||
|
||||
void _nv3dInitializeStreams(
|
||||
Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
enum Nv3dVertexAttributeStreamType stream;
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
// Disable vertex attribute vectors 16 through 31 (scalars 64 through 127).
|
||||
// We don't use them.
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_DA_OUTPUT_ATTRIBUTE_SKIP_MASK_B(0), 2);
|
||||
nvPushSetMethodData(p, ~0);
|
||||
nvPushSetMethodData(p, ~0);
|
||||
|
||||
for (stream = NV3D_VERTEX_ATTRIBUTE_STREAM_FIRST;
|
||||
stream < NV3D_VERTEX_ATTRIBUTE_STREAM_COUNT;
|
||||
stream++) {
|
||||
|
||||
Nv3dVertexAttributeStreamRec *pStream =
|
||||
&p3dChannel->vertexStreams[stream];
|
||||
|
||||
InitializeStream(p3dChannel, stream, pStream);
|
||||
}
|
||||
}
|
||||
|
||||
static void AdvanceStream(
|
||||
Nv3dVertexAttributeStreamRec *pStream)
|
||||
{
|
||||
pStream->current += pStream->stride * pStream->nextLaunch;
|
||||
nvAssert(pStream->current <= pStream->end);
|
||||
pStream->nextLaunch = 0;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Configure a vertex attribute stream to fetch from a surface.
|
||||
*
|
||||
* \param[in] p3dChannel The channel
|
||||
* \param[in] stream The vertex attribute stream
|
||||
* \param[in] pStream The vertex attribute stream tracking structure
|
||||
*/
|
||||
static void
|
||||
SetVertexStreamSurface(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
enum Nv3dVertexAttributeStreamType stream,
|
||||
const Nv3dVertexAttributeStreamRec *pStream)
|
||||
{
|
||||
const Nv3dHal *pHal = p3dChannel->p3dDevice->hal;
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_VERTEX_STREAM_A_FORMAT(stream), 3);
|
||||
nvPushSetMethodData(p,
|
||||
NV3D_V(9097, SET_VERTEX_STREAM_A_FORMAT, STRIDE, pStream->stride) |
|
||||
NV3D_C(9097, SET_VERTEX_STREAM_A_FORMAT, ENABLE, TRUE));
|
||||
nvPushSetMethodDataU64(p, pStream->current);
|
||||
|
||||
pHal->setVertexStreamEnd(p3dChannel, stream, pStream);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Reset a vertex attribute stream to the specified offset, while leaving its
|
||||
* stride and limit alone.
|
||||
*/
|
||||
static void
|
||||
SetVertexStreamOffset(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
enum Nv3dVertexAttributeStreamType stream,
|
||||
NvU64 offset)
|
||||
{
|
||||
const Nv3dHal *pHal = p3dChannel->p3dDevice->hal;
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
Nv3dVertexAttributeStreamRec *pStream = &p3dChannel->vertexStreams[stream];
|
||||
|
||||
pStream->current = offset;
|
||||
pStream->nextLaunch = 0;
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_VERTEX_STREAM_A_LOCATION_A(stream), 2);
|
||||
nvPushSetMethodDataU64(p, offset);
|
||||
|
||||
pHal->setVertexStreamEnd(p3dChannel, stream, pStream);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Point the constant buffer selector at the next location for data in the
|
||||
* given stream.
|
||||
*/
|
||||
static void SelectCbForStream(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
enum Nv3dVertexAttributeStreamType stream)
|
||||
{
|
||||
Nv3dVertexAttributeStreamRec *pStream = &p3dChannel->vertexStreams[stream];
|
||||
const NvU64 gpuAddress =
|
||||
nv3dGetVertexAttributestreamGpuAddress(p3dChannel, stream);
|
||||
int startOffset = pStream->current + pStream->stride * pStream->nextLaunch -
|
||||
gpuAddress;
|
||||
|
||||
nv3dSelectCbAddress(p3dChannel, gpuAddress,
|
||||
NV3D_VERTEX_ATTRIBUTE_STREAM_SIZE);
|
||||
nv3dSetConstantBufferOffset(p3dChannel, startOffset);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Configure the DA and VAF to fetch from vertex attribute streams.
|
||||
*
|
||||
* This function configures the Data Assembler (DA) and Vertex Attribute Fetch
|
||||
* (VAF) units to fetch vertex attributes from pSurf using a format configured
|
||||
* by the 'attribs' array.
|
||||
*
|
||||
* It configures two streams: NV3D_VERTEX_ATTRIBUTE_STREAM_STATIC and
|
||||
* NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC. The static stream contains attributes
|
||||
* that are the same across all vertices. The dynamic stream contains
|
||||
* attributes that are different for each vertex. The static stream sources
|
||||
* from the next available location in the static vertex data surface and uses a
|
||||
* stride of 0, so that all vertices in an array fetch the same values for those
|
||||
* attributes. Then, it configures the dynamic stream to fetch starting at
|
||||
* offset 0 of pSurf, unless pSurf is NULL in which case it starts at the
|
||||
* appropriate offset in the dynamic vertex data surface.
|
||||
*
|
||||
* The 'attribs' array stores Nv3dVertexAttributeInfoRecs, terminated with an
|
||||
* element where attributeType is NV3D_VERTEX_ATTRIBUTE_END. Each element
|
||||
* contains:
|
||||
*
|
||||
* (a) An enum Nv3dVertexAttributeType indicating which vertex attribute this
|
||||
* array element describes.
|
||||
*
|
||||
* (b) An enum Nv3dVertexAttributeDataType indicating the data type to use for
|
||||
* the attribute.
|
||||
*
|
||||
* (c) An enum Nv3dVertexAttributeStreamType indicating which stream should use
|
||||
* the attribute.
|
||||
*
|
||||
* If any attributes are enabled as static, this function selects the static
|
||||
* stream surface as the current constant buffer. The caller should push the
|
||||
* appropriate vertex data.
|
||||
*
|
||||
* Note that if you launch rendering using vertex attributes from a surface, you
|
||||
* must wait for idle before changing those attributes later. Otherwise, the
|
||||
* VAF unit may fetch the new data instead of the old data, causing corruption.
|
||||
*
|
||||
* \param[in] p3dChannel The 3d channel to program
|
||||
* \param[in] attribs Description of vertex attributes (see above)
|
||||
* \param[in] pSurf Surface that dynamic attributes will be fetched from
|
||||
*
|
||||
* \return The size in bytes of the static attribute data
|
||||
*/
|
||||
int nv3dVasSetup(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
const Nv3dVertexAttributeInfoRec *attribs,
|
||||
const Nv3dStreamSurfaceRec *pSurf)
|
||||
{
|
||||
/* This table is indexed by enum Nv3dVertexAttributeDataType. */
|
||||
static const struct {
|
||||
NvU32 size;
|
||||
NvU32 setVertexAttributeA;
|
||||
} attribTypeTable[] = {
|
||||
|
||||
[NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_2_32_FLOAT] = {
|
||||
sizeof(float) * 2,
|
||||
NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A,
|
||||
COMPONENT_BIT_WIDTHS, R32_G32) |
|
||||
NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A, NUMERICAL_TYPE, NUM_FLOAT),
|
||||
},
|
||||
|
||||
[NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_4_32_FLOAT] = {
|
||||
sizeof(float) * 4,
|
||||
NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A,
|
||||
COMPONENT_BIT_WIDTHS, R32_G32_B32_A32) |
|
||||
NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A, NUMERICAL_TYPE, NUM_FLOAT),
|
||||
},
|
||||
|
||||
[NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_4_16_UNORM] = {
|
||||
sizeof(NvU16) * 4,
|
||||
NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A,
|
||||
COMPONENT_BIT_WIDTHS, R16_G16_B16_A16) |
|
||||
NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A, NUMERICAL_TYPE, NUM_UNORM),
|
||||
},
|
||||
|
||||
[NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_4_8_UNORM] = {
|
||||
sizeof(NvU8) * 4,
|
||||
NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A,
|
||||
COMPONENT_BIT_WIDTHS, A8B8G8R8) |
|
||||
NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A, NUMERICAL_TYPE, NUM_UNORM),
|
||||
},
|
||||
|
||||
[NV3D_VERTEX_ATTRIBUTE_DATA_TYPE_2_16_SSCALED] = {
|
||||
sizeof(NvU32),
|
||||
NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A,
|
||||
COMPONENT_BIT_WIDTHS, R16_G16) |
|
||||
NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A, NUMERICAL_TYPE, NUM_SSCALED),
|
||||
},
|
||||
|
||||
};
|
||||
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
Nv3dVertexAttributeStreamRec *pStatic =
|
||||
&p3dChannel->vertexStreams[NV3D_VERTEX_ATTRIBUTE_STREAM_STATIC];
|
||||
Nv3dVertexAttributeStreamRec *pDynamic =
|
||||
&p3dChannel->vertexStreams[NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC];
|
||||
int staticOffset = 0, dynamicOffset = 0;
|
||||
Nv3dVertexAttributeStreamRec tmpStreamRec;
|
||||
NvU32 stride = 0;
|
||||
NvU64 daEnableMask = 0, daSkipMask;
|
||||
NvBool hasStaticAttribs = FALSE;
|
||||
NvBool hasPositionAttrib = FALSE;
|
||||
int i;
|
||||
|
||||
// POSITION must be specified and must be a dynamic attribute.
|
||||
for (i = 0; attribs[i].attributeType != NV3D_VERTEX_ATTRIBUTE_END; i++) {
|
||||
if (attribs[i].attributeType != NV3D_VERTEX_ATTRIBUTE_POSITION) {
|
||||
continue;
|
||||
}
|
||||
hasPositionAttrib = TRUE;
|
||||
nvAssert(attribs[i].streamType == NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC);
|
||||
}
|
||||
if (!hasPositionAttrib) {
|
||||
nvAssert(!"POSITION vertex attribute not specified.");
|
||||
}
|
||||
|
||||
// Configure the DA output skip mask so that it only fetches attributes for
|
||||
// enabled streams.
|
||||
for (i = 0; attribs[i].attributeType != NV3D_VERTEX_ATTRIBUTE_END; i++) {
|
||||
const enum Nv3dVertexAttributeType attrib = attribs[i].attributeType;
|
||||
// Always enable all four components of the value. This causes the
|
||||
// DA to generate default values if there are not enough components
|
||||
// in the pulled vertex data. This sets W=1 if W is missing.
|
||||
//
|
||||
// Otherwise, the value would come from the default the hardware
|
||||
// generates as input to the vertex shader when that attribute is
|
||||
// skipped in the DA, which is specified in the .mfs file as, "a
|
||||
// default value is inserted".
|
||||
//
|
||||
// Note all attribute values are expected to be less than 16 (i.e., fit
|
||||
// in MASK_A; attributes 16 through 31 would go in MASK_B).
|
||||
nvAssert(attrib < 16);
|
||||
daEnableMask |= 0xfULL << (4 * attrib);
|
||||
}
|
||||
daSkipMask = ~daEnableMask;
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_DA_OUTPUT_ATTRIBUTE_SKIP_MASK_A(0), 2);
|
||||
nvPushSetMethodData(p, NvU64_LO32(daSkipMask));
|
||||
nvPushSetMethodData(p, NvU64_HI32(daSkipMask));
|
||||
|
||||
// Configure the attributes to fetch from the streams.
|
||||
for (i = 0; attribs[i].attributeType != NV3D_VERTEX_ATTRIBUTE_END; i++) {
|
||||
|
||||
const enum Nv3dVertexAttributeType attrib = attribs[i].attributeType;
|
||||
const enum Nv3dVertexAttributeDataType dataType = attribs[i].dataType;
|
||||
const enum Nv3dVertexAttributeStreamType stream = attribs[i].streamType;
|
||||
const NvU32 size = attribTypeTable[dataType].size;
|
||||
const NvU32 setVertexAttributeA =
|
||||
attribTypeTable[dataType].setVertexAttributeA;
|
||||
|
||||
int offset;
|
||||
|
||||
if (stream == NV3D_VERTEX_ATTRIBUTE_STREAM_STATIC) {
|
||||
offset = staticOffset;
|
||||
staticOffset += size;
|
||||
hasStaticAttribs = TRUE;
|
||||
} else {
|
||||
nvAssert(stream == NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC);
|
||||
offset = dynamicOffset;
|
||||
dynamicOffset += size;
|
||||
stride += size;
|
||||
}
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NV9097_SET_VERTEX_ATTRIBUTE_A(attrib), 1);
|
||||
nvPushSetMethodData(p,
|
||||
NV3D_V(9097, SET_VERTEX_ATTRIBUTE_A, STREAM, stream) |
|
||||
NV3D_C(9097, SET_VERTEX_ATTRIBUTE_A, SOURCE, ACTIVE) |
|
||||
NV3D_V(9097, SET_VERTEX_ATTRIBUTE_A, OFFSET, offset) |
|
||||
setVertexAttributeA);
|
||||
}
|
||||
|
||||
|
||||
// Advance the stream past any attribs used previously.
|
||||
AdvanceStream(pStatic);
|
||||
// Although we may have set a non-zero stride on a previous call to this
|
||||
// function (mostly so the bookkeeping above works out), as far as the GPU
|
||||
// is concerned we should program a stride of 0.
|
||||
pStatic->stride = 0;
|
||||
|
||||
// See if we need to wrap the static stream.
|
||||
if (pStatic->current + staticOffset >= pStatic->end) {
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_WAIT_FOR_IDLE, 0);
|
||||
|
||||
// Reset both the static and dynamic streams, since we know the GPU is
|
||||
// done reading from both.
|
||||
InitializeStream(p3dChannel,
|
||||
NV3D_VERTEX_ATTRIBUTE_STREAM_STATIC, pStatic);
|
||||
InitializeStream(p3dChannel,
|
||||
NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC, pDynamic);
|
||||
} else if (!pSurf) {
|
||||
// Advance the dynamic stream past any attribs used previously (unless
|
||||
// we just reset the stream).
|
||||
AdvanceStream(pDynamic);
|
||||
}
|
||||
|
||||
/* override dynamic stream with pSurf */
|
||||
if (pSurf) {
|
||||
pDynamic = &tmpStreamRec;
|
||||
InitializeStreamFromSurf(pSurf, pDynamic);
|
||||
}
|
||||
|
||||
// Configure the streams. A stride of 0 makes it read the same attribute
|
||||
// each time.
|
||||
nvAssert(pStatic->stride == 0);
|
||||
SetVertexStreamSurface(p3dChannel,
|
||||
NV3D_VERTEX_ATTRIBUTE_STREAM_STATIC,
|
||||
pStatic);
|
||||
nvAssert(stride != 0);
|
||||
pDynamic->stride = stride;
|
||||
SetVertexStreamSurface(p3dChannel,
|
||||
NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC,
|
||||
pDynamic);
|
||||
|
||||
// If there are static attributes, set up the constant buffer selector.
|
||||
if (hasStaticAttribs) {
|
||||
SelectCbForStream(p3dChannel, NV3D_VERTEX_ATTRIBUTE_STREAM_STATIC);
|
||||
|
||||
// Override the static stream's "stride" so that the next time this
|
||||
// function is called it will set staticStartOffset to right after the
|
||||
// static data here.
|
||||
pStatic->stride = staticOffset;
|
||||
pStatic->nextLaunch = 1;
|
||||
}
|
||||
|
||||
return staticOffset;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Check if uploading the specified number of vertices will write past the end
|
||||
* of the given vertex stream.
|
||||
*/
|
||||
static NvBool WillVertexDataWrap(
|
||||
Nv3dVertexAttributeStreamRec *pStream,
|
||||
int n)
|
||||
{
|
||||
// >= here is intentional: It's illegal to set the constant buffer selector
|
||||
// past the end of the constant buffer, which could happen if the last
|
||||
// primitive drawn exactly fills the dynamic data stream and another
|
||||
// primitive is drawn. Then the next call to nv3dVasSelectCbForVertexData()
|
||||
// would cause a channel error.
|
||||
//
|
||||
// Instead of trying to detect that case there, just disallow completely
|
||||
// filling the stream so it wraps slightly earlier.
|
||||
return pStream->current + pStream->stride * (pStream->nextLaunch + n) >=
|
||||
pStream->end;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Launch vertices and update tracked vertex array state.
|
||||
*/
|
||||
static void DrawVertexArray(Nv3dChannelRec *p3dChannel, int numVerts)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
Nv3dVertexAttributeStreamRec *pDynamic =
|
||||
&p3dChannel->vertexStreams[NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC];
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D, NV9097_SET_VERTEX_ARRAY_START, 2);
|
||||
nvPushSetMethodData(p, pDynamic->nextLaunch);
|
||||
nvPushSetMethodData(p, numVerts); // NV9097_DRAW_VERTEX_ARRAY
|
||||
|
||||
pDynamic->nextLaunch += numVerts;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Reset both the static and dynamic vertex array streams to the base of the
|
||||
* corresponding surfaces.
|
||||
*/
|
||||
static void WrapVertexStreams(Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
Nv3dVertexAttributeStreamRec *pStatic =
|
||||
&p3dChannel->vertexStreams[NV3D_VERTEX_ATTRIBUTE_STREAM_STATIC];
|
||||
const NvU64 gpuAddress =
|
||||
nv3dGetVertexAttributestreamGpuAddress(p3dChannel,
|
||||
NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC);
|
||||
const NvU32 primMode = p3dChannel->currentPrimitiveMode;
|
||||
|
||||
// Set the software tracking for the static stream so it starts over at the
|
||||
// beginning next time nv3dVasSetup() is called, but leave the hardware
|
||||
// configured to read the data that's already there, in case vertices
|
||||
// submitted later still need it.
|
||||
pStatic->current = pStatic->end;
|
||||
pStatic->nextLaunch = 0;
|
||||
|
||||
// The hardware can't handle changing the vertex stream offset inside a
|
||||
// BEGIN / END block, so temporarily end now.
|
||||
nv3dVasEnd(p3dChannel);
|
||||
|
||||
// Wrap the dynamic vertex stream.
|
||||
nvPushImmedVal(p, NVA06F_SUBCHANNEL_3D, NV9097_WAIT_FOR_IDLE, 0);
|
||||
SetVertexStreamOffset(p3dChannel, NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC,
|
||||
gpuAddress);
|
||||
|
||||
nv3dVasBegin(p3dChannel, primMode);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Point the constant buffer selector at the next location for vertex data in
|
||||
* the dynamic data surface.
|
||||
*/
|
||||
void nv3dVasSelectCbForVertexData(Nv3dChannelRec *p3dChannel)
|
||||
{
|
||||
SelectCbForStream(p3dChannel, NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC);
|
||||
}
|
||||
|
||||
/*!
|
||||
* Upload and draw vertices using the dynamic vertex data surface
|
||||
*
|
||||
* This function uploads data to the dynamic vertex attribute stream surface
|
||||
* using inline constant buffer updates starting at the next free space in that
|
||||
* surface, and then launches rendering. The number of vertices rendered is
|
||||
* specified by 'numVerts'.
|
||||
*
|
||||
* Static data should have already been written to the static vertex attribute
|
||||
* stream surface by the caller.
|
||||
*
|
||||
* If not enough space is available in the dynamic data surface, this function
|
||||
* waits for idle before wrapping to the beginning of the surface to avoid
|
||||
* conflicting with earlier rendering that might be in flight.
|
||||
*
|
||||
* It is up to the caller to send BEGIN and END methods around calls to this
|
||||
* function.
|
||||
*
|
||||
* \param[in] p3dChannel The channel
|
||||
* \param[in] data Data to upload
|
||||
* \param[in] numVerts Number of vertices rendered
|
||||
*/
|
||||
void nv3dVasDrawInlineVerts(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
const void *data,
|
||||
int numVerts)
|
||||
{
|
||||
if (data != NULL) {
|
||||
Nv3dVertexAttributeStreamRec *pDynamic =
|
||||
&p3dChannel->vertexStreams[NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC];
|
||||
|
||||
// See if we need to wrap the dynamic stream.
|
||||
if (WillVertexDataWrap(pDynamic, numVerts)) {
|
||||
WrapVertexStreams(p3dChannel);
|
||||
}
|
||||
|
||||
nv3dVasSelectCbForVertexData(p3dChannel);
|
||||
nv3dPushConstants(p3dChannel, pDynamic->stride * numVerts, data);
|
||||
}
|
||||
|
||||
DrawVertexArray(p3dChannel, numVerts);
|
||||
}
|
||||
|
||||
NvBool nv3dVasMakeRoom(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvU32 pendingVerts,
|
||||
NvU32 moreVerts)
|
||||
{
|
||||
Nv3dVertexAttributeStreamRec *pDynamic =
|
||||
&p3dChannel->vertexStreams[NV3D_VERTEX_ATTRIBUTE_STREAM_DYNAMIC];
|
||||
|
||||
const NvBool wrap = WillVertexDataWrap(pDynamic, pendingVerts + moreVerts);
|
||||
|
||||
// If pendingVerts + moreVerts would exceed the dynamic vertex array buffer,
|
||||
// flush it now and start over at the beginning.
|
||||
if (wrap) {
|
||||
DrawVertexArray(p3dChannel, pendingVerts);
|
||||
WrapVertexStreams(p3dChannel);
|
||||
|
||||
// Reset the constant buffer update pointer to the beginning of the
|
||||
// dynamic vertex data buffer.
|
||||
nv3dSetConstantBufferOffset(p3dChannel, 0);
|
||||
}
|
||||
|
||||
return wrap;
|
||||
}
|
||||
|
||||
41
src/common/unix/nvidia-3d/src/nvidia-3d-volta.c
Normal file
41
src/common/unix/nvidia-3d/src/nvidia-3d-volta.c
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvidia-3d-volta.h"
|
||||
#include "nvidia-3d.h"
|
||||
|
||||
#include "class/clc397.h"
|
||||
#include <class/cla06fsubch.h>
|
||||
|
||||
void _nv3dSetProgramOffsetVolta(
|
||||
Nv3dChannelRec *p3dChannel,
|
||||
NvU32 stage,
|
||||
NvU32 offset)
|
||||
{
|
||||
NvPushChannelPtr p = p3dChannel->pPushChannel;
|
||||
const NvU64 gpuAddress = nv3dGetProgramGpuAddress(p3dChannel) + offset;
|
||||
|
||||
nvPushMethod(p, NVA06F_SUBCHANNEL_3D,
|
||||
NVC397_SET_PIPELINE_PROGRAM_ADDRESS_A(stage), 2);
|
||||
nvPushSetMethodDataU64(p, gpuAddress);
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _NVIDIA_HEADSURFACE_CONSTANTS_H_
|
||||
#define _NVIDIA_HEADSURFACE_CONSTANTS_H_
|
||||
|
||||
/* Possible values for NvHsFragmentUniforms::resamplingMethod */
|
||||
#define NVIDIA_HEADSURFACE_RESAMPLING_METHOD_BICUBIC_TRIANGULAR 1
|
||||
#define NVIDIA_HEADSURFACE_RESAMPLING_METHOD_BICUBIC_BELL_SHAPED 2
|
||||
#define NVIDIA_HEADSURFACE_RESAMPLING_METHOD_BICUBIC_BSPLINE 3
|
||||
#define NVIDIA_HEADSURFACE_RESAMPLING_METHOD_BICUBIC_ADAPTIVE_TRIANGULAR 4
|
||||
#define NVIDIA_HEADSURFACE_RESAMPLING_METHOD_BICUBIC_ADAPTIVE_BELL_SHAPED 5
|
||||
#define NVIDIA_HEADSURFACE_RESAMPLING_METHOD_BICUBIC_ADAPTIVE_BSPLINE 6
|
||||
|
||||
/* Uniform sampler binding indices */
|
||||
#define NVIDIA_HEADSURFACE_UNIFORM_SAMPLER_BINDING_PRIMARY_TEX 0
|
||||
#define NVIDIA_HEADSURFACE_UNIFORM_SAMPLER_BINDING_CURSOR_TEX 1
|
||||
#define NVIDIA_HEADSURFACE_UNIFORM_SAMPLER_BINDING_BLEND_TEX 2
|
||||
#define NVIDIA_HEADSURFACE_UNIFORM_SAMPLER_BINDING_OFFSET_TEX 3
|
||||
#define NVIDIA_HEADSURFACE_UNIFORM_SAMPLER_BINDING_OVERLAY_TEX 4
|
||||
#define NVIDIA_HEADSURFACE_UNIFORM_SAMPLER_BINDING_LUT_TEX 5
|
||||
#define NVIDIA_HEADSURFACE_UNIFORM_SAMPLER_BINDING_NUM 6
|
||||
|
||||
#endif /* _NVIDIA_HEADSURFACE_CONSTANTS_H_ */
|
||||
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_HEADSURFACE_TYPES_H__
|
||||
#define __NVIDIA_HEADSURFACE_TYPES_H__
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nvidia-3d-types.h"
|
||||
|
||||
typedef struct _NvHsVertexUniforms {
|
||||
Nv3dVertexAttrib2U vertexScale;
|
||||
Nv3dVertexAttrib2U primaryTextureScale;
|
||||
Nv3dVertexAttrib2U primaryTextureBias;
|
||||
Nv3dVertexAttrib2S cursorPosition;
|
||||
} __attribute__((packed)) NvHsVertexUniforms;
|
||||
|
||||
typedef struct _NvHsFragmentUniforms { // Byte offsets
|
||||
Nv3dVertexAttrib2U vertexScale; // 0
|
||||
Nv3dVertexAttrib3U numLutEntries NV_ALIGN_BYTES(16); // 16
|
||||
Nv3dVertexAttrib2U primaryTextureBias NV_ALIGN_BYTES(8); // 32
|
||||
Nv3dVertexAttrib2S cursorPosition; // 40
|
||||
// Although this is really a 3x3 matrix, GLSL std140 uniform block
|
||||
// layout says that the column stride is equal to a vec4.
|
||||
Nv3dFloat transform[3][4]; // 48
|
||||
Nv3dVertexAttrib2F pixelShiftOffset; // 96
|
||||
Nv3dVertexAttrib3F luminanceCoefficient NV_ALIGN_BYTES(16); // 112
|
||||
Nv3dVertexAttrib2F chromaCoefficient NV_ALIGN_BYTES(8); // 128
|
||||
Nv3dFloat luminanceScale; // 136
|
||||
Nv3dFloat luminanceBlackLevel; // 140
|
||||
Nv3dFloat chrominanceScale; // 144
|
||||
Nv3dFloat chrominanceBlackLevel; // 148
|
||||
NvU32 useSatHue; // 152
|
||||
Nv3dFloat satCos; // 156
|
||||
int resamplingMethod; // 160
|
||||
} __attribute__((packed)) NvHsFragmentUniforms;
|
||||
|
||||
/*
|
||||
* The static warp mesh consists of four vertices, each vertex has six
|
||||
* components: (XY, UVRQ).
|
||||
*/
|
||||
typedef struct {
|
||||
struct {
|
||||
Nv3dFloat x, y, u, v, r, q;
|
||||
} vertex[4];
|
||||
} NvHsStaticWarpMesh;
|
||||
|
||||
#endif /* __NVIDIA_HEADSURFACE_TYPES_H__ */
|
||||
203
src/common/unix/nvidia-push/include/nvidia-push-priv-imports.h
Normal file
203
src/common/unix/nvidia-push/include/nvidia-push-priv-imports.h
Normal file
@@ -0,0 +1,203 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#if !defined(__NVIDIA_PUSH_PRIV_IMPORTS_H__)
|
||||
#define __NVIDIA_PUSH_PRIV_IMPORTS_H__
|
||||
|
||||
#include "nvidia-push-types.h"
|
||||
|
||||
static inline NvU32 nvPushImportRmApiControl(
|
||||
NvPushDevicePtr pDevice,
|
||||
NvU32 hObject,
|
||||
NvU32 cmd,
|
||||
void *pParams,
|
||||
NvU32 paramsSize)
|
||||
{
|
||||
return pDevice->pImports->rmApiControl(pDevice, hObject, cmd,
|
||||
pParams, paramsSize);
|
||||
}
|
||||
|
||||
static inline NvU32 nvPushImportRmApiAlloc(
|
||||
NvPushDevicePtr pDevice,
|
||||
NvU32 hParent,
|
||||
NvU32 hObject,
|
||||
NvU32 hClass,
|
||||
void *pAllocParams)
|
||||
{
|
||||
|
||||
return pDevice->pImports->rmApiAlloc(pDevice, hParent, hObject, hClass,
|
||||
pAllocParams);
|
||||
}
|
||||
|
||||
static inline NvU32 nvPushImportRmApiFree(
|
||||
NvPushDevicePtr pDevice,
|
||||
NvU32 hParent,
|
||||
NvU32 hObject)
|
||||
{
|
||||
return pDevice->pImports->rmApiFree(pDevice, hParent, hObject);
|
||||
}
|
||||
|
||||
static inline NvU32 nvPushImportRmApiMapMemoryDma(
|
||||
NvPushDevicePtr pDevice,
|
||||
NvU32 hDevice,
|
||||
NvU32 hDma,
|
||||
NvU32 hMemory,
|
||||
NvU64 offset,
|
||||
NvU64 length,
|
||||
NvU32 flags,
|
||||
NvU64 *pDmaOffset)
|
||||
{
|
||||
return pDevice->pImports->rmApiMapMemoryDma(pDevice,
|
||||
hDevice,
|
||||
hDma,
|
||||
hMemory,
|
||||
offset,
|
||||
length,
|
||||
flags,
|
||||
pDmaOffset);
|
||||
}
|
||||
|
||||
static inline NvU32 nvPushImportRmApiUnmapMemoryDma(
|
||||
NvPushDevicePtr pDevice,
|
||||
NvU32 hDevice,
|
||||
NvU32 hDma,
|
||||
NvU32 hMemory,
|
||||
NvU32 flags,
|
||||
NvU64 dmaOffset)
|
||||
{
|
||||
return pDevice->pImports->rmApiUnmapMemoryDma(pDevice,
|
||||
hDevice,
|
||||
hDma,
|
||||
hMemory,
|
||||
flags,
|
||||
dmaOffset);
|
||||
|
||||
}
|
||||
|
||||
static inline NvU32 nvPushImportRmApiAllocMemory64(
|
||||
NvPushDevicePtr pDevice,
|
||||
NvU32 hParent,
|
||||
NvU32 hMemory,
|
||||
NvU32 hClass,
|
||||
NvU32 flags,
|
||||
void **ppAddress,
|
||||
NvU64 *pLimit)
|
||||
{
|
||||
return pDevice->pImports->rmApiAllocMemory64(pDevice,
|
||||
hParent,
|
||||
hMemory,
|
||||
hClass,
|
||||
flags,
|
||||
ppAddress,
|
||||
pLimit);
|
||||
}
|
||||
|
||||
static inline NvU32 nvPushImportRmApiVidHeapControl(
|
||||
NvPushDevicePtr pDevice,
|
||||
void *pVidHeapControlParms)
|
||||
{
|
||||
return pDevice->pImports->rmApiVidHeapControl(pDevice,
|
||||
pVidHeapControlParms);
|
||||
}
|
||||
|
||||
static inline NvU32 nvPushImportRmApiMapMemory(
|
||||
NvPushDevicePtr pDevice,
|
||||
NvU32 hDevice,
|
||||
NvU32 hMemory,
|
||||
NvU64 offset,
|
||||
NvU64 length,
|
||||
void **ppLinearAddress,
|
||||
NvU32 flags)
|
||||
{
|
||||
return pDevice->pImports->rmApiMapMemory(pDevice,
|
||||
hDevice,
|
||||
hMemory,
|
||||
offset,
|
||||
length,
|
||||
ppLinearAddress,
|
||||
flags);
|
||||
}
|
||||
|
||||
static inline NvU32 nvPushImportRmApiUnmapMemory(
|
||||
NvPushDevicePtr pDevice,
|
||||
NvU32 hDevice,
|
||||
NvU32 hMemory,
|
||||
void *pLinearAddress,
|
||||
NvU32 flags)
|
||||
{
|
||||
return pDevice->pImports->rmApiUnmapMemory(pDevice,
|
||||
hDevice,
|
||||
hMemory,
|
||||
pLinearAddress,
|
||||
flags);
|
||||
}
|
||||
|
||||
static inline NvU64 nvPushImportGetMilliSeconds(
|
||||
NvPushDevicePtr pDevice)
|
||||
{
|
||||
return pDevice->pImports->getMilliSeconds(pDevice);
|
||||
}
|
||||
|
||||
static inline void nvPushImportYield(
|
||||
NvPushDevicePtr pDevice)
|
||||
{
|
||||
pDevice->pImports->yield(pDevice);
|
||||
}
|
||||
|
||||
static inline NvBool nvPushImportWaitForEvent(
|
||||
NvPushDevicePtr pDevice,
|
||||
NvPushImportEvent *pEvent,
|
||||
NvU64 timeout)
|
||||
{
|
||||
return pDevice->pImports->waitForEvent(pDevice, pEvent, timeout);
|
||||
}
|
||||
|
||||
static inline void nvPushImportEmptyEventFifo(
|
||||
NvPushDevicePtr pDevice,
|
||||
NvPushImportEvent *pEvent)
|
||||
{
|
||||
pDevice->pImports->emptyEventFifo(pDevice, pEvent);
|
||||
}
|
||||
|
||||
static inline void nvPushImportChannelErrorOccurred(
|
||||
NvPushChannelPtr pChannel,
|
||||
NvU32 channelErrCode)
|
||||
{
|
||||
pChannel->pDevice->pImports->channelErrorOccurred(pChannel, channelErrCode);
|
||||
}
|
||||
|
||||
static inline void nvPushImportPushbufferWrapped(
|
||||
NvPushChannelPtr pChannel)
|
||||
{
|
||||
pChannel->pDevice->pImports->pushbufferWrapped(pChannel);
|
||||
}
|
||||
|
||||
#define nvPushImportLogError(_pDevice, ...) \
|
||||
(_pDevice)->pImports->logError((_pDevice), __VA_ARGS__)
|
||||
|
||||
#if defined(DEBUG)
|
||||
#define nvPushImportLogNvDiss(_pChannel, ...) \
|
||||
(_pChannel)->pDevice->pImports->logNvDiss((_pChannel), __VA_ARGS__)
|
||||
#endif /* DEBUG */
|
||||
|
||||
#endif /* __NVIDIA_PUSH_PRIV_IMPORTS_H__ */
|
||||
122
src/common/unix/nvidia-push/include/nvidia-push-priv.h
Normal file
122
src/common/unix/nvidia-push/include/nvidia-push-priv.h
Normal file
@@ -0,0 +1,122 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2018 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_PUSH_PRIV_H__
|
||||
#define __NVIDIA_PUSH_PRIV_H__
|
||||
|
||||
#include "nvmisc.h" // NV_ALIGN_UP
|
||||
#include "class/cla16f.h" // NVA16F_GP_ENTRY__SIZE
|
||||
|
||||
/*
|
||||
* Push buffer constants
|
||||
* "The pushbuffer" consists of several regions packed into a single memory
|
||||
* allocation. In order, they are:
|
||||
* 1. The "main" pushbuffer. Most of the driver pushes methods here;
|
||||
* 2. GPFIFO entries;
|
||||
* 3. The "progress tracker" pushbuffer. This is used by the DMA kickoff code
|
||||
* as a reserved area to put semaphore release methods, which we use to
|
||||
* track HOST's progress fetching the pushbuffer. We also use this to
|
||||
* workaround hardware bug 1667921.
|
||||
*/
|
||||
|
||||
/* Offset of the GPFIFO entries: entry (2) above. */
|
||||
static inline NvU32 __nvPushGpFifoOffset(const NvPushChannelRec *pChannel)
|
||||
{
|
||||
nvAssert(pChannel->main.sizeInBytes != 0);
|
||||
return NV_ALIGN_UP(pChannel->main.sizeInBytes, NVA16F_GP_ENTRY__SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to align each set of methods in the progress tracker pushbuffer to
|
||||
* 128 bytes so that we avoid HW bug 1667921 (on chips that are affected).
|
||||
* This is used for both the start of the GPFIFO segment _and_ the size (for
|
||||
* each GPFIFO entry).
|
||||
*/
|
||||
#define NV_ALIGN_LBDAT_EXTRA_BUG 128
|
||||
/*
|
||||
* Offset of the progress tracker pushbuffer: entry (3) above.
|
||||
*
|
||||
* Note that we always use the appropriate alignment to WAR the LBDAT_EXTRA bug
|
||||
* for the offset. Although this is only necessary on some chips, it's simpler
|
||||
* to always use this alignment.
|
||||
*/
|
||||
static inline NvU32 __nvPushProgressTrackerOffset(
|
||||
const NvPushChannelRec *pChannel)
|
||||
{
|
||||
const NvU32 gpFifoOffset = __nvPushGpFifoOffset(pChannel);
|
||||
const NvU32 gpFifoLength =
|
||||
pChannel->numGpFifoEntries * NVA16F_GP_ENTRY__SIZE;
|
||||
|
||||
nvAssert(gpFifoLength != 0);
|
||||
|
||||
return NV_ALIGN_UP(gpFifoOffset + gpFifoLength, NV_ALIGN_LBDAT_EXTRA_BUG);
|
||||
}
|
||||
|
||||
/* We always write two GPFIFO entries: one for the main pushbuffer, and one
|
||||
* for the progress tracker pushbuffer. */
|
||||
#define NV_PUSH_NUM_GPFIFO_ENTRIES_PER_KICKOFF 2
|
||||
|
||||
/*
|
||||
* Encoding for the progress tracker semaphore payload.
|
||||
* _GET stores dwords, rather than bytes.
|
||||
* _GP_GET stores the number of "pairs" of gpFifo entries.
|
||||
*/
|
||||
#define NV_PUSH_PROGRESS_TRACKER_SEMAPHORE_GET 17:0
|
||||
#define NV_PUSH_PROGRESS_TRACKER_SEMAPHORE_GP_GET 31:18
|
||||
|
||||
/*
|
||||
* The number of 0080 RM devices for the given NvPushDevice.
|
||||
* This is 1 for RM SLI and numSubDevices for client SLI.
|
||||
*/
|
||||
static inline int
|
||||
__nvPushGetNumDevices(const NvPushDeviceRec *pDevice)
|
||||
{
|
||||
if (pDevice->clientSli) {
|
||||
return pDevice->numSubDevices;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The 0080 RM device index for the given subdevice index.
|
||||
* This is 0 for RM SLI, and the subdevice index for client SLI.
|
||||
*/
|
||||
static inline int
|
||||
__nvPushGetDeviceIndex(const NvPushDeviceRec *pDevice, int sd)
|
||||
{
|
||||
if (pDevice->clientSli) {
|
||||
return sd;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
NvU32 __nvPushProgressTrackerEntrySize(const NvPushDeviceRec *pDevice);
|
||||
|
||||
NvBool __nvPushTestPushBuffer(NvPushChannelPtr p);
|
||||
|
||||
NvBool __nvPushGetHal(
|
||||
const NvPushAllocDeviceParams *pParams,
|
||||
NvU32 channelClass,
|
||||
NvPushHal *pHal);
|
||||
|
||||
#endif /* __NVIDIA_PUSH_PRIV_H__ */
|
||||
259
src/common/unix/nvidia-push/interface/nvidia-push-init.h
Normal file
259
src/common/unix/nvidia-push/interface/nvidia-push-init.h
Normal file
@@ -0,0 +1,259 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file contains nvidia-push device and channel setup structures and
|
||||
* functions.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_PUSH_INIT_H__
|
||||
#define __NVIDIA_PUSH_INIT_H__
|
||||
|
||||
|
||||
#include "nvidia-push-types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*!
|
||||
* Return the index of the first class table element supported on this device.
|
||||
*
|
||||
* pClassTable is an array where each element corresponds to a class
|
||||
* the caller supports. The first field in the array element should
|
||||
* be an NvPushSupportedClass struct. There may be additional fields
|
||||
* in the array element that are specific to the caller. The
|
||||
* classTableStride argument indicates the size in bytes of one array
|
||||
* element, such that nvPushGetSupportedClassIndex() can step from one
|
||||
* array element to the next by adding classTableStride.
|
||||
*
|
||||
* nvPushGetSupportedClassIndex() will query the list of classes
|
||||
* supported by this device, and return the index of the first
|
||||
* pClassTable array element that is supported by the device. -1 is
|
||||
* returned if there is no match.
|
||||
*
|
||||
* \param pDevice The nvidia-push device whose class list to consider.
|
||||
* \param pClassTable The table of classes supported.
|
||||
* \param classTableStride The size in bytes of one table element.
|
||||
* \param classTableLength The number of table elements.
|
||||
*
|
||||
* \return The index of the first table element that matches, or -1.
|
||||
*/
|
||||
|
||||
typedef struct _NvPushSupportedClass {
|
||||
NvU32 classNumber;
|
||||
NVAModelConfig amodelConfig;
|
||||
} NvPushSupportedClass;
|
||||
|
||||
int nvPushGetSupportedClassIndex(
|
||||
NvPushDevicePtr pDevice,
|
||||
const void *pClassTable,
|
||||
size_t classTableStride,
|
||||
size_t classTableLength);
|
||||
|
||||
/*
|
||||
* Parameter structure populated by the host driver when requesting an
|
||||
* NvPushDeviceRec.
|
||||
*/
|
||||
typedef struct _NvPushAllocDeviceParams {
|
||||
|
||||
/* Pointer to host device, filled by host driver as needed */
|
||||
void *hostDevice;
|
||||
|
||||
const NvPushImports *pImports;
|
||||
|
||||
/* The host driver's RMAPI client (NV0000) handle. */
|
||||
NvU32 clientHandle;
|
||||
|
||||
/* TRUE iff this device is in client-side SLI mode. */
|
||||
NvBool clientSli;
|
||||
|
||||
/* The number of subDevices allocated by the host driver. */
|
||||
NvU32 numSubDevices;
|
||||
|
||||
struct {
|
||||
/* The host driver's RMAPI device (NV0080) handles */
|
||||
NvU32 deviceHandle;
|
||||
/* The host driver's RMAPI subDevice (NV2080) handles. */
|
||||
NvU32 handle;
|
||||
/* FERMI_VASPACE_A object in which channels on this device should be
|
||||
* mapped. */
|
||||
NvU32 gpuVASpaceObject;
|
||||
/* ctxDma handle to be used with MapMemoryDma. */
|
||||
NvU32 gpuVASpace;
|
||||
} subDevice[NV_MAX_SUBDEVICES];
|
||||
|
||||
struct {
|
||||
/*
|
||||
* The Amodel configuration requested by the host driver.
|
||||
*/
|
||||
NVAModelConfig config;
|
||||
} amodel;
|
||||
|
||||
/* Whether channels on this device will be used to program Tegra. */
|
||||
NvBool isTegra;
|
||||
|
||||
/*
|
||||
* Pool of RMAPI object handles. The host driver should populate
|
||||
* all of the elements in this array before calling
|
||||
* nvPushAllocDevice(), and release all of these handles if
|
||||
* nvPushAllocDevice() fails, or after calling nvPushFreeDevice().
|
||||
*
|
||||
* The number of possible handles is:
|
||||
*
|
||||
* hUserMode (per-sd)
|
||||
*/
|
||||
#define NV_PUSH_DEVICE_HANDLE_POOL_NUM \
|
||||
(NV_MAX_SUBDEVICES)
|
||||
|
||||
NvU32 handlePool[NV_PUSH_DEVICE_HANDLE_POOL_NUM];
|
||||
|
||||
NvU32 numClasses;
|
||||
const NvU32 *supportedClasses;
|
||||
|
||||
} NvPushAllocDeviceParams;
|
||||
|
||||
NvBool nvPushAllocDevice(
|
||||
const NvPushAllocDeviceParams *pParams,
|
||||
NvPushDevicePtr pDevice);
|
||||
|
||||
void nvPushFreeDevice(
|
||||
NvPushDevicePtr pDevice);
|
||||
|
||||
|
||||
/*
|
||||
* Parameter structure populated by the host driver when requesting an
|
||||
* NvPushChannelRec.
|
||||
*/
|
||||
typedef struct _NvPushAllocChannelParams {
|
||||
|
||||
/* NV2080_ENGINE_TYPE_ */
|
||||
NvU32 engineType;
|
||||
|
||||
/*
|
||||
* Whether to log the pushbuffer in nvdiss format, by calling
|
||||
* nvPushImportLogNvDiss().
|
||||
*/
|
||||
NvBool logNvDiss;
|
||||
|
||||
/*
|
||||
* Normally, the pushbuffer utility library will time out when
|
||||
* waiting for things (space in the pushbuffer, waiting for
|
||||
* notifiers, etc). When the channel is created with
|
||||
* noTimeout=TRUE, the channel will wait indefinitely for these
|
||||
* things.
|
||||
*/
|
||||
NvBool noTimeout;
|
||||
|
||||
/*
|
||||
* Normally, the pushbuffer utility library checks for channel
|
||||
* errors and reports them to the host driver by calling
|
||||
* nvPushImportChannelErrorOccurred(). Host drivers can set
|
||||
* ignoreChannelErrors=TRUE to disable this check.
|
||||
*/
|
||||
NvBool ignoreChannelErrors;
|
||||
|
||||
/*
|
||||
* DIFR stands for Display Idle Frame Refresh in which a CE is used to
|
||||
* prefetch framebuffer pixels into the GPU's L2 cache. The prefetch
|
||||
* operation requires the channel to be specifically configured for DIFR
|
||||
* prefetching. This flag indicates if this channel is intended to be
|
||||
* used for just that.
|
||||
*/
|
||||
NvBool difrPrefetch;
|
||||
|
||||
/*
|
||||
* Host drivers should specify how many notifiers they want. The
|
||||
* pushbuffer utility library will allocate memory to hold this
|
||||
* many notifiers on each subDevice, plus an error notifier.
|
||||
*
|
||||
* The 'notifierIndex' argument to, e.g., nvPushGetNotifierCpuAddress()
|
||||
* should be in the range [0,numNotifiers).
|
||||
*/
|
||||
NvU8 numNotifiers;
|
||||
|
||||
/*
|
||||
* The size of the "main" pushbuffer in bytes. Note this does not
|
||||
* include space for gpfifo entries or progress tracking:
|
||||
* nvidia-push will implicitly pad the total pushbuffer for those
|
||||
* items.
|
||||
*/
|
||||
NvU32 pushBufferSizeInBytes;
|
||||
|
||||
/*
|
||||
* Pool of RMAPI object handles. The host driver should populate
|
||||
* all of the elements in this array before calling
|
||||
* nvPushAllocChannel(), and release all of these handles if
|
||||
* nvPushAllocChannel() fails, or after calling nvPushFreeChannel().
|
||||
*
|
||||
* The number of possible handles is:
|
||||
*
|
||||
* progressSemaphore hMemory (per-sd) +
|
||||
* pushbufferHandle (per-device) +
|
||||
* pushbufferVAHandle (per-sd) +
|
||||
* userD.hMemory (per-sd) +
|
||||
* channelHandle (per-sd) +
|
||||
* notifier memoryHandle (per-device) +
|
||||
* error notifier ctxDma (per-device)
|
||||
*/
|
||||
#define NV_PUSH_CHANNEL_HANDLE_POOL_NUM \
|
||||
(NV_MAX_SUBDEVICES + \
|
||||
1 + \
|
||||
NV_MAX_SUBDEVICES + \
|
||||
NV_MAX_SUBDEVICES + \
|
||||
NV_MAX_SUBDEVICES + \
|
||||
1 + \
|
||||
1)
|
||||
|
||||
NvU32 handlePool[NV_PUSH_CHANNEL_HANDLE_POOL_NUM];
|
||||
|
||||
/*
|
||||
* A pointer to an NvPushDeviceRec, initialized with
|
||||
* nvPushAllocDevice(). One or more NvPushChannelRecs may share
|
||||
* the same NvPushDevicePtr.
|
||||
*
|
||||
* This pDevice should be kept allocated until all
|
||||
* NvPushChannelRecs using it have been freed.
|
||||
*/
|
||||
NvPushDevicePtr pDevice;
|
||||
|
||||
} NvPushAllocChannelParams;
|
||||
|
||||
NvBool nvPushAllocChannel(
|
||||
const NvPushAllocChannelParams *pParams,
|
||||
NvPushChannelPtr buffer);
|
||||
|
||||
void nvPushFreeChannel(
|
||||
NvPushChannelPtr buffer);
|
||||
|
||||
|
||||
void nvPushInitWaitForNotifier(
|
||||
NvPushChannelPtr pChannel,
|
||||
NvU32 notifierIndex,
|
||||
NvU32 subdeviceMask);
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /*__NVIDIA_PUSH_INIT_H__ */
|
||||
247
src/common/unix/nvidia-push/interface/nvidia-push-methods.h
Normal file
247
src/common/unix/nvidia-push/interface/nvidia-push-methods.h
Normal file
@@ -0,0 +1,247 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file contains macros and inline functions used to actually program
|
||||
* methods.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_PUSH_METHODS_H__
|
||||
#define __NVIDIA_PUSH_METHODS_H__
|
||||
|
||||
#include "nvidia-push-types.h"
|
||||
|
||||
#include "class/cla16f.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static inline void __nvPushSetMethodDataSegment(NvPushChannelSegmentPtr s, const NvU32 data)
|
||||
{
|
||||
s->buffer->u = data;
|
||||
s->buffer++;
|
||||
}
|
||||
|
||||
static inline void nvPushSetMethodData(NvPushChannelPtr p, const NvU32 data)
|
||||
{
|
||||
__nvPushSetMethodDataSegment(&p->main, data);
|
||||
}
|
||||
|
||||
#if NV_PUSH_ALLOW_FLOAT
|
||||
static inline void __nvPushSetMethodDataSegmentF(NvPushChannelSegmentPtr s, const float data)
|
||||
{
|
||||
s->buffer->f = data;
|
||||
s->buffer++;
|
||||
}
|
||||
|
||||
static inline void nvPushSetMethodDataF(NvPushChannelPtr p, const float data)
|
||||
{
|
||||
__nvPushSetMethodDataSegmentF(&p->main, data);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void __nvPushSetMethodDataSegmentU64(NvPushChannelSegmentPtr s, const NvU64 data)
|
||||
{
|
||||
__nvPushSetMethodDataSegment(s, NvU64_HI32(data));
|
||||
__nvPushSetMethodDataSegment(s, NvU64_LO32(data));
|
||||
}
|
||||
|
||||
static inline void nvPushSetMethodDataU64(NvPushChannelPtr p, const NvU64 data)
|
||||
{
|
||||
__nvPushSetMethodDataSegmentU64(&p->main, data);
|
||||
}
|
||||
|
||||
void __nvPushMoveDWORDS(NvU32* dst, const NvU32* src, int dwords);
|
||||
|
||||
static inline void
|
||||
nvDmaMoveDWORDS(NvPushChannelUnion *dst, const NvU32* src, int dwords)
|
||||
{
|
||||
// The 'dst' argument is an array of NvPushChannelUnion; it is safe
|
||||
// to treat this as an array of NvU32, as long as NvU32 and
|
||||
// NvPushChannelUnion are the same size.
|
||||
ct_assert(sizeof(NvU32) == sizeof(NvPushChannelUnion));
|
||||
__nvPushMoveDWORDS((NvU32 *)dst, src, dwords);
|
||||
}
|
||||
|
||||
static inline void nvPushInlineData(NvPushChannelPtr p, const void *data,
|
||||
size_t dwords)
|
||||
{
|
||||
nvDmaMoveDWORDS(p->main.buffer, (const NvU32 *)data, dwords);
|
||||
p->main.buffer += dwords;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Return the maximum method count: the maximum number of dwords that can be
|
||||
* specified in the nvPushMethod() family of macros.
|
||||
*/
|
||||
static inline NvU32 nvPushMaxMethodCount(const NvPushChannelRec *p)
|
||||
{
|
||||
/*
|
||||
* The number of methods that can be specified in one NVA16F_DMA_METHOD
|
||||
* header is limited by the bit field size of NVA16F_DMA_METHOD_COUNT: 28:16
|
||||
* (i.e., maximum representable value 8191).
|
||||
*/
|
||||
const NvU32 maxFromMethodCountMask = DRF_MASK(NVA16F_DMA_METHOD_COUNT);
|
||||
|
||||
/*
|
||||
* Further, the method count must be smaller than half the total pushbuffer
|
||||
* size minus one, to correctly distinguish empty and full pushbuffers. See
|
||||
* nvPushHeader() for details.
|
||||
*/
|
||||
const NvU32 pushBufferSizeInBytes = p->main.sizeInBytes;
|
||||
const NvU32 pushBufferSizeInDWords = pushBufferSizeInBytes / 4;
|
||||
const NvU32 pushBufferHalfSizeInDWords = pushBufferSizeInDWords / 2;
|
||||
|
||||
/*
|
||||
* Subtract two from pushBufferHalfSizeInDWords:
|
||||
*
|
||||
* -1 to distinguish pushbuffer empty from full (see above).
|
||||
*
|
||||
* -1 to be smaller than, rather than equal to, the above constraints.
|
||||
*/
|
||||
const NvU32 maxFromPushBufferSize = pushBufferHalfSizeInDWords - 2;
|
||||
|
||||
return NV_MIN(maxFromMethodCountMask, maxFromPushBufferSize);
|
||||
}
|
||||
|
||||
// These macros verify that the values used in the methods fits
|
||||
// into the defined ranges.
|
||||
#define ASSERT_DRF_DEF(d, r, f, n) \
|
||||
nvAssert(!(~DRF_MASK(NV ## d ## r ## f) & (NV ## d ## r ## f ## n)))
|
||||
#define ASSERT_DRF_NUM(d, r, f, n) \
|
||||
nvAssert(!(~DRF_MASK(NV ## d ## r ## f) & (n)))
|
||||
|
||||
#if defined(DEBUG)
|
||||
#include "class/clc36f.h" /* VOLTA_CHANNEL_GPFIFO_A */
|
||||
|
||||
/*
|
||||
* When pushing GPFIFO methods (NVA16F_SEMAPHORE[ABCD]), all four
|
||||
* methods must be pushed together. If the four methods are not
|
||||
* pushed together, nvidia-push might wrap, injecting its progress
|
||||
* tracking semaphore release methods in the middle, and perturb the
|
||||
* NVA16F_SEMAPHOREA_OFFSET_UPPER and NVA16F_SEMAPHOREB_OFFSET_LOWER
|
||||
* channel state.
|
||||
*
|
||||
* Return whether the methods described by the arguments include some,
|
||||
* but not all, of A, B, C, and D. I.e., if the range starts at B, C,
|
||||
* or D, or if the range ends at A, B, or C.
|
||||
*
|
||||
* Perform a similar check for Volta+ semaphore methods
|
||||
* NVC36F_SEM_ADDR_LO..NVC36F_SEM_EXECUTE. Note that we always check for both
|
||||
* sets of methods, regardless of the GPU we're actually running on. This is
|
||||
* okay since:
|
||||
* a) the NVC36F_SEM_ADDR_LO..NVC36F_SEM_EXECUTE method offsets were not used
|
||||
* for anything from (a16f..c36f].
|
||||
* b) the SEMAPHORE[ABCD] methods still exist on the newer classes (they
|
||||
* haven't been reused for anything else)
|
||||
*/
|
||||
static inline NvBool __nvPushStartSplitsSemaphore(
|
||||
NvU32 method,
|
||||
NvU32 count,
|
||||
NvU32 secOp)
|
||||
{
|
||||
ct_assert(NVA16F_SEMAPHOREA < NVA16F_SEMAPHORED);
|
||||
ct_assert(NVC36F_SEM_ADDR_LO < NVC36F_SEM_EXECUTE);
|
||||
|
||||
/*
|
||||
* compute start and end as inclusive; if not incrementing, we
|
||||
* assume end==start
|
||||
*/
|
||||
const NvU32 start = method;
|
||||
const NvU32 end = (secOp == NVA16F_DMA_SEC_OP_INC_METHOD) ?
|
||||
(method + ((count - 1) * 4)) : method;
|
||||
|
||||
return ((start > NVA16F_SEMAPHOREA) && (start <= NVA16F_SEMAPHORED)) ||
|
||||
((end >= NVA16F_SEMAPHOREA) && (end < NVA16F_SEMAPHORED)) ||
|
||||
((start > NVC36F_SEM_ADDR_LO) && (start <= NVC36F_SEM_EXECUTE)) ||
|
||||
((end >= NVC36F_SEM_ADDR_LO) && (end < NVC36F_SEM_EXECUTE));
|
||||
}
|
||||
#endif /* DEBUG */
|
||||
|
||||
/*
|
||||
* Note that _count+1 must be less than half the total pushbuffer size. This is
|
||||
* required by GPFIFO because we can't reliably tell when we can write all the
|
||||
* way to the end of the pushbuffer if we wrap (see bug 232454). This
|
||||
* assumption ensures that there will be enough space once GET reaches PUT.
|
||||
*/
|
||||
#define nvPushHeader(_push_buffer, _segment, _count, _header) do { \
|
||||
NvPushChannelSegmentPtr _pSegment = &(_push_buffer)->_segment; \
|
||||
nvAssert(((_count)+1) < ((_pSegment)->sizeInBytes / 8)); \
|
||||
if ((_pSegment)->freeDwords < ((_count)+1)) \
|
||||
__nvPushMakeRoom((_push_buffer), (_count) + 1); \
|
||||
__nvPushSetMethodDataSegment((_pSegment), (_header)); \
|
||||
(_pSegment)->freeDwords -= ((_count)+1); \
|
||||
} while(0)
|
||||
|
||||
#define __nvPushStart(_push_buffer, _segment, _subch, _offset, _count, _opcode) \
|
||||
{ \
|
||||
nvAssert(!__nvPushStartSplitsSemaphore( \
|
||||
(_offset), \
|
||||
(_count), \
|
||||
NVA16F_DMA_SEC_OP ## _opcode)); \
|
||||
ASSERT_DRF_DEF(A16F, _DMA, _SEC_OP, _opcode); \
|
||||
ASSERT_DRF_NUM(A16F, _DMA, _METHOD_COUNT, _count); \
|
||||
ASSERT_DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL, _subch); \
|
||||
ASSERT_DRF_NUM(A16F, _DMA, _METHOD_ADDRESS, (_offset) >> 2); \
|
||||
nvPushHeader((_push_buffer), _segment, (_count), \
|
||||
DRF_DEF(A16F, _DMA, _SEC_OP, _opcode) | \
|
||||
DRF_NUM(A16F, _DMA, _METHOD_COUNT, _count) | \
|
||||
DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL, _subch) | \
|
||||
DRF_NUM(A16F, _DMA, _METHOD_ADDRESS, (_offset) >> 2)); \
|
||||
}
|
||||
|
||||
// The GPU can encode a 13-bit constant method/data pair in a single DWORD.
|
||||
#define nvPushImmedValSegment(_push_buffer, _segment, _subch, _offset, _data) { \
|
||||
ASSERT_DRF_NUM(A16F, _DMA, _IMMD_DATA, _data); \
|
||||
ASSERT_DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL, _subch); \
|
||||
ASSERT_DRF_NUM(A16F, _DMA, _METHOD_ADDRESS, (_offset) >> 2); \
|
||||
if ((_push_buffer)->_segment.freeDwords < 1) \
|
||||
__nvPushMakeRoom((_push_buffer), 1); \
|
||||
__nvPushSetMethodDataSegment(&(_push_buffer)->_segment, \
|
||||
DRF_DEF(A16F, _DMA, _SEC_OP, _IMMD_DATA_METHOD) | \
|
||||
DRF_NUM(A16F, _DMA, _IMMD_DATA, _data) | \
|
||||
DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL, _subch) | \
|
||||
DRF_NUM(A16F, _DMA, _METHOD_ADDRESS, (_offset) >> 2)); \
|
||||
(_push_buffer)->_segment.freeDwords--; \
|
||||
}
|
||||
|
||||
#define nvPushImmedVal(_push_buffer, _subch, _offset, _data) \
|
||||
nvPushImmedValSegment(_push_buffer, main, _subch, _offset, _data)
|
||||
|
||||
#define nvPushImmed(_push_buffer, _subch, _offset, _val) \
|
||||
nvPushImmedVal(_push_buffer, _subch, _offset, _offset##_V_##_val)
|
||||
|
||||
// Method headers.
|
||||
#define nvPushMethod(_push_buffer, _subch, _offset, _count) \
|
||||
__nvPushStart(_push_buffer, main, _subch, _offset, _count, _INC_METHOD)
|
||||
#define nvPushMethodNoIncr(_push_buffer, _subch, _offset, _count) \
|
||||
__nvPushStart(_push_buffer, main, _subch, _offset, _count, _NON_INC_METHOD)
|
||||
#define nvPushMethodOneIncr(_push_buffer, _subch, _offset, _count) \
|
||||
__nvPushStart(_push_buffer, main, _subch, _offset, _count, _ONE_INC)
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /* __NVIDIA_PUSH_METHODS_H__ */
|
||||
281
src/common/unix/nvidia-push/interface/nvidia-push-types.h
Normal file
281
src/common/unix/nvidia-push/interface/nvidia-push-types.h
Normal file
@@ -0,0 +1,281 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file contains core definitions (structures and enums) for use in the
|
||||
* rest of the nvidia-push code.
|
||||
*/
|
||||
|
||||
#ifndef __NVIDIA_PUSH_TYPES_H__
|
||||
#define __NVIDIA_PUSH_TYPES_H__
|
||||
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nvlimits.h"
|
||||
#include "nvmisc.h"
|
||||
#include "nvgputypes.h" /* NvNotificationRec */
|
||||
#include "nv_common_utils.h" /* TRUE/FALSE */
|
||||
#include "nvctassert.h"
|
||||
#include "nv_assert.h" /* nvAssert() */
|
||||
#include "nv_amodel_enum.h" /* NVAModelConfig */
|
||||
#include "nvos.h" /* NV_CHANNELGPFIFO_NOTIFICATION_* */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define NV_PUSH_NOTIFIER_SHORT_TIMEOUT 3000 /* in milliseconds (ie: 3 seconds) */
|
||||
#define NV_PUSH_NOTIFIER_LONG_TIMEOUT 10000 /* in milliseconds (ie: 10 seconds) */
|
||||
|
||||
# define NV_PUSH_PRINTF_FORMAT_ARGUMENT
|
||||
# define NV_PUSH_PRINTF_ATTRIBUTES(_fmt,_var) \
|
||||
__attribute__((format (printf, _fmt, _var)))
|
||||
|
||||
|
||||
#if defined(NV_PUSH_IN_KERNEL)
|
||||
# define NV_PUSH_ALLOW_FLOAT 0
|
||||
#else
|
||||
# define NV_PUSH_ALLOW_FLOAT 1
|
||||
#endif
|
||||
|
||||
typedef union _NvPushChannelUnion
|
||||
{
|
||||
NvU32 u;
|
||||
#if NV_PUSH_ALLOW_FLOAT
|
||||
float f;
|
||||
#endif
|
||||
} NvPushChannelUnion;
|
||||
|
||||
typedef struct _NvPushChannelRec NvPushChannelRec;
|
||||
typedef struct _NvPushChannelRec *NvPushChannelPtr;
|
||||
|
||||
typedef struct _nv_push_hal {
|
||||
void (*kickoff)(struct _NvPushChannelRec*, NvU32 oldGpPut, NvU32 newGpPut);
|
||||
void (*releaseTimelineSemaphore)(NvPushChannelPtr, void *cpuAddress, NvU64 gpuAddress, NvU64 val);
|
||||
void (*acquireTimelineSemaphore)(NvPushChannelPtr, NvU64 gpuAddress, NvU64 val);
|
||||
struct {
|
||||
/* Requires USERD memory to be specified at channel allocation */
|
||||
NvU32 clientAllocatesUserD :1;
|
||||
|
||||
/* On Tegra, we currently need to allocate double the requested GPFIFO
|
||||
* entries */
|
||||
NvU32 allocateDoubleSizeGpFifo :1;
|
||||
|
||||
/* Use Volta+ semaphore methods */
|
||||
NvU32 voltaSemMethods :1;
|
||||
|
||||
NvU32 extendedBase :1;
|
||||
} caps;
|
||||
} NvPushHal;
|
||||
|
||||
typedef struct _NvPushDeviceRec {
|
||||
|
||||
void *hostDevice; /* Provided by the host driver */
|
||||
|
||||
NvBool hostLBoverflowBug1667921 : 1;
|
||||
NvBool clientSli : 1; /* Provided by the host driver */
|
||||
|
||||
NvU32 clientHandle; /* Provided by the host driver */
|
||||
NvU32 numSubDevices; /* Provided by the host driver */
|
||||
|
||||
NvU32 numClasses; /* Provided by the host driver */
|
||||
const NvU32 *supportedClasses;/* Provided by the host driver */
|
||||
|
||||
struct {
|
||||
NvU32 handle; /* Provided by the host driver */
|
||||
NvU32 deviceHandle; /* Provided by the host driver */
|
||||
NvU32 gpuVASpaceObject;/* Provided by the host driver */
|
||||
NvU32 gpuVASpaceCtxDma;/* Provided by the host driver */
|
||||
NvU32 hUserMode; /* VOLTA_USERMODE_A object */
|
||||
void *pUserMode; /* VOLTA_USERMODE_A mapping */
|
||||
} subDevice[NV_MAX_SUBDEVICES];
|
||||
|
||||
NvU32 gpfifoClass;
|
||||
size_t userDSize;
|
||||
|
||||
NVAModelConfig amodelConfig;
|
||||
|
||||
NvPushHal hal;
|
||||
const struct _NvPushImports *pImports;
|
||||
|
||||
} NvPushDeviceRec, *NvPushDevicePtr;
|
||||
|
||||
|
||||
typedef struct _NvPushChannelSegmentRec
|
||||
{
|
||||
NvU32 freeDwords; // free space (in dwords)
|
||||
NvU32 sizeInBytes; // Push buffer size (in bytes)
|
||||
NvU32 putOffset; // Offset of last kickoff
|
||||
NvPushChannelUnion *base; // Push buffer start pointer
|
||||
NvPushChannelUnion *buffer; // Push buffer current pointer
|
||||
NvU64 gpuMapOffset;
|
||||
} NvPushChannelSegmentRec, *NvPushChannelSegmentPtr;
|
||||
|
||||
struct _NvPushChannelRec
|
||||
{
|
||||
NvBool initialized : 1;
|
||||
NvBool logNvDiss : 1;
|
||||
NvBool noTimeout : 1;
|
||||
NvBool ignoreChannelErrors : 1;
|
||||
NvBool channelErrorOccurred : 1;
|
||||
|
||||
NvU32 channelHandle[NV_MAX_SUBDEVICES];
|
||||
NvU32 pushbufferHandle;
|
||||
NvU32 pushbufferVAHandle[NV_MAX_SUBDEVICES];
|
||||
NvPushChannelSegmentRec main;
|
||||
|
||||
void *control[NV_MAX_SUBDEVICES];
|
||||
NvU32 numGpFifoEntries;
|
||||
NvU32 *gpfifo; // GPFIFO entries
|
||||
NvU32 gpPutOffset; // GPFIFO entries last kicked off offset
|
||||
NvU32 currentSubDevMask;
|
||||
|
||||
NvPushChannelSegmentRec progressTracker;
|
||||
struct {
|
||||
NvU32 handle[NV_MAX_SUBDEVICES];
|
||||
void *ptr[NV_MAX_SUBDEVICES];
|
||||
NvU64 gpuVA;
|
||||
} progressSemaphore;
|
||||
|
||||
struct {
|
||||
NvU32 hMemory;
|
||||
} userD[NV_MAX_SUBDEVICES];
|
||||
|
||||
struct {
|
||||
NvU8 num;
|
||||
NvU32 memoryHandle;
|
||||
NvNotification *cpuAddress;
|
||||
NvU64 gpuAddress;
|
||||
NvU32 errorCtxDma;
|
||||
} notifiers;
|
||||
|
||||
NvPushDeviceRec *pDevice;
|
||||
};
|
||||
|
||||
/* Opaque type, only used by pointer within the push buffer utility library. */
|
||||
typedef struct _NvPushImportEvent NvPushImportEvent;
|
||||
|
||||
/* Table of function pointers to be provided by the nvidia-push host driver. */
|
||||
typedef struct _NvPushImports {
|
||||
|
||||
NvU32 (*rmApiControl) (NvPushDevicePtr pDevice,
|
||||
NvU32 hObject,
|
||||
NvU32 cmd,
|
||||
void *pParams,
|
||||
NvU32 paramsSize);
|
||||
|
||||
NvU32 (*rmApiAlloc) (NvPushDevicePtr pDevice,
|
||||
NvU32 hParent,
|
||||
NvU32 hObject,
|
||||
NvU32 hClass,
|
||||
void *pAllocParams);
|
||||
|
||||
NvU32 (*rmApiFree) (NvPushDevicePtr pDevice,
|
||||
NvU32 hParent,
|
||||
NvU32 hObject);
|
||||
|
||||
NvU32 (*rmApiMapMemoryDma) (NvPushDevicePtr pDevice,
|
||||
NvU32 hDevice,
|
||||
NvU32 hDma,
|
||||
NvU32 hMemory,
|
||||
NvU64 offset,
|
||||
NvU64 length,
|
||||
NvU32 flags,
|
||||
NvU64 *pDmaOffset);
|
||||
|
||||
NvU32 (*rmApiUnmapMemoryDma) (NvPushDevicePtr pDevice,
|
||||
NvU32 hDevice,
|
||||
NvU32 hDma,
|
||||
NvU32 hMemory,
|
||||
NvU32 flags,
|
||||
NvU64 dmaOffset);
|
||||
|
||||
NvU32 (*rmApiAllocMemory64) (NvPushDevicePtr pDevice,
|
||||
NvU32 hParent,
|
||||
NvU32 hMemory,
|
||||
NvU32 hClass,
|
||||
NvU32 flags,
|
||||
void **ppAddress,
|
||||
NvU64 *pLimit);
|
||||
|
||||
NvU32 (*rmApiVidHeapControl) (NvPushDevicePtr pDevice,
|
||||
void *pVidHeapControlParms);
|
||||
|
||||
NvU32 (*rmApiMapMemory) (NvPushDevicePtr pDevice,
|
||||
NvU32 hDevice,
|
||||
NvU32 hMemory,
|
||||
NvU64 offset,
|
||||
NvU64 length,
|
||||
void **ppLinearAddress,
|
||||
NvU32 flags);
|
||||
|
||||
NvU32 (*rmApiUnmapMemory) (NvPushDevicePtr pDevice,
|
||||
NvU32 hDevice,
|
||||
NvU32 hMemory,
|
||||
void *pLinearAddress,
|
||||
NvU32 flags);
|
||||
|
||||
NvU64 (*getMilliSeconds) (NvPushDevicePtr pDevice);
|
||||
|
||||
void (*yield) (NvPushDevicePtr pDevice);
|
||||
|
||||
NvBool (*waitForEvent) (NvPushDevicePtr pDevice,
|
||||
NvPushImportEvent *pEvent,
|
||||
NvU64 timeout);
|
||||
|
||||
void (*emptyEventFifo) (NvPushDevicePtr pDevice,
|
||||
NvPushImportEvent *pEvent);
|
||||
|
||||
void (*channelErrorOccurred) (NvPushChannelPtr pChannel, NvU32 channelErrCode);
|
||||
|
||||
void (*pushbufferWrapped) (NvPushChannelPtr pChannel);
|
||||
|
||||
void (*logError) (NvPushDevicePtr pDevice,
|
||||
NV_PUSH_PRINTF_FORMAT_ARGUMENT const char *fmt, ...)
|
||||
NV_PUSH_PRINTF_ATTRIBUTES(2,3);
|
||||
|
||||
/*
|
||||
* The logNvDiss() import, in DEBUG builds, logs strings to be
|
||||
* parsed by nvdiss. Note that multiple nvPushImportLogNvDiss()
|
||||
* calls may be used to build one line of output (so, respect the
|
||||
* newlines provided in the strings).
|
||||
*/
|
||||
#if defined(DEBUG)
|
||||
void (*logNvDiss) (NvPushChannelPtr pChannel,
|
||||
NV_PUSH_PRINTF_FORMAT_ARGUMENT const char *fmt, ...)
|
||||
NV_PUSH_PRINTF_ATTRIBUTES(2,3);
|
||||
#endif
|
||||
|
||||
} NvPushImports;
|
||||
|
||||
|
||||
void __nvPushMakeRoom(NvPushChannelPtr, NvU32 count);
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /* __NVIDIA_PUSH_TYPES_H__ */
|
||||
180
src/common/unix/nvidia-push/interface/nvidia-push-utils.h
Normal file
180
src/common/unix/nvidia-push/interface/nvidia-push-utils.h
Normal file
@@ -0,0 +1,180 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* This file contains push buffer utility functions and declarations */
|
||||
|
||||
#ifndef __NVIDIA_PUSH_UTILS_H__
|
||||
#define __NVIDIA_PUSH_UTILS_H__
|
||||
|
||||
#include "nvidia-push-types.h"
|
||||
#include "nvlimits.h"
|
||||
|
||||
#include "class/cla16f.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static inline NvBool nvPushIsAModel(const NvPushDeviceRec *pDevice)
|
||||
{
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
/* declare prototypes: */
|
||||
NvBool nvPushCheckChannelError(NvPushChannelPtr pChannel);
|
||||
void nvPushKickoff(NvPushChannelPtr);
|
||||
NvBool nvPushIdleChannelTest(NvPushChannelPtr pChannel, NvU32 timeoutMSec);
|
||||
NvBool nvPushIdleChannel(NvPushChannelPtr);
|
||||
|
||||
void nvPushWaitForNotifier(
|
||||
NvPushChannelPtr pChannel,
|
||||
NvU32 notifierIndex,
|
||||
NvU32 subdeviceMask,
|
||||
NvBool yield,
|
||||
NvPushImportEvent *pEvent,
|
||||
int id);
|
||||
|
||||
void nvPushReleaseTimelineSemaphore(
|
||||
NvPushChannelPtr p,
|
||||
void *cpuAddress,
|
||||
NvU64 gpuAddress,
|
||||
NvU64 val);
|
||||
|
||||
void nvPushAcquireTimelineSemaphore(
|
||||
NvPushChannelPtr p,
|
||||
NvU64 gpuAddress,
|
||||
NvU64 val);
|
||||
|
||||
NvBool nvPushDecodeMethod(NvU32 header, NvU32 *count);
|
||||
void nvPushSetObject(NvPushChannelPtr p, NvU32 subch, NvU32 object[NV_MAX_SUBDEVICES]);
|
||||
void nvPushSetSubdeviceMask(NvPushChannelPtr p, NvU32 mask);
|
||||
void __nvPushMakeRoom(NvPushChannelPtr, NvU32 count);
|
||||
|
||||
#define NV_PUSH_SUBDEVICE_MASK_PRIMARY 0x00000001
|
||||
#define NV_PUSH_SUBDEVICE_MASK_ALL DRF_MASK(NVA16F_DMA_SET_SUBDEVICE_MASK_VALUE)
|
||||
|
||||
/*
|
||||
* Evaluates to TRUE if the two subDevMasks are equivalent for the given SLI
|
||||
* device
|
||||
*/
|
||||
static inline NvBool nvPushSubDeviceMaskEquiv(
|
||||
const NvPushDeviceRec *pDevice,
|
||||
NvU32 maskA,
|
||||
NvU32 maskB)
|
||||
{
|
||||
const NvU32 allSubDevices = (1 << pDevice->numSubDevices) - 1;
|
||||
|
||||
return (maskA & allSubDevices) == (maskB & allSubDevices);
|
||||
}
|
||||
|
||||
/* Evaluates to TRUE if subDevMask will write to all of the GPUs */
|
||||
static inline NvBool nvPushSubDeviceMaskAllActive(
|
||||
const NvPushDeviceRec *pDevice,
|
||||
NvU32 subDevMask)
|
||||
{
|
||||
return nvPushSubDeviceMaskEquiv(pDevice, subDevMask,
|
||||
NV_PUSH_SUBDEVICE_MASK_ALL);
|
||||
}
|
||||
|
||||
#define NV_PUSH_NOTIFIER_INTERNAL_BIT 0x80
|
||||
ct_assert(NV_PUSH_NOTIFIER_INTERNAL_BIT >=
|
||||
NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1);
|
||||
#define NV_PUSH_ERROR_NOTIFIER_INDEX \
|
||||
(NV_PUSH_NOTIFIER_INTERNAL_BIT | \
|
||||
NV_CHANNELGPFIFO_NOTIFICATION_TYPE_ERROR)
|
||||
#define NV_PUSH_TOKEN_NOTIFIER_INDEX \
|
||||
(NV_PUSH_NOTIFIER_INTERNAL_BIT | \
|
||||
NV_CHANNELGPFIFO_NOTIFICATION_TYPE_WORK_SUBMIT_TOKEN)
|
||||
|
||||
/*
|
||||
* Notifiers for use by nvidia-push, not exposed to clients:
|
||||
* NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1: defined by RM
|
||||
* NV_MAX_SUBDEVICES: one for each subdevice to track work submission token
|
||||
*/
|
||||
#define NV_PUSH_NUM_INTERNAL_NOTIFIERS \
|
||||
(NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1 + NV_MAX_SUBDEVICES)
|
||||
|
||||
static inline NvU32 __nvPushGetNotifierRawIndex(
|
||||
const NvPushDeviceRec *pDevice,
|
||||
NvU32 notifierIndex,
|
||||
NvU32 sd)
|
||||
{
|
||||
if (notifierIndex & NV_PUSH_NOTIFIER_INTERNAL_BIT) {
|
||||
return notifierIndex & ~NV_PUSH_NOTIFIER_INTERNAL_BIT;
|
||||
} else {
|
||||
return (notifierIndex * pDevice->numSubDevices) + sd +
|
||||
NV_PUSH_NUM_INTERNAL_NOTIFIERS;
|
||||
}
|
||||
}
|
||||
|
||||
static inline NvNotification *nvPushGetNotifierCpuAddress(
|
||||
const NvPushChannelRec *pChannel,
|
||||
NvU32 notifierIndex,
|
||||
NvU32 sd)
|
||||
{
|
||||
const NvU32 rawIndex =
|
||||
__nvPushGetNotifierRawIndex(pChannel->pDevice, notifierIndex, sd);
|
||||
|
||||
return &pChannel->notifiers.cpuAddress[rawIndex];
|
||||
}
|
||||
|
||||
static inline NvU64 nvPushGetNotifierGpuAddress(
|
||||
const NvPushChannelRec *pChannel,
|
||||
NvU32 notifierIndex,
|
||||
NvU32 sd)
|
||||
{
|
||||
const NvU32 rawIndex =
|
||||
__nvPushGetNotifierRawIndex(pChannel->pDevice, notifierIndex, sd);
|
||||
const size_t offset = rawIndex * sizeof(NvNotification);
|
||||
|
||||
return pChannel->notifiers.gpuAddress + offset;
|
||||
}
|
||||
|
||||
|
||||
extern NvU32 nvPushReadGetOffset(NvPushChannelPtr push_buffer, NvBool minimum);
|
||||
|
||||
|
||||
/*!
|
||||
* Make room in the pushbuffer, checking for errors.
|
||||
*
|
||||
* If a channel error occurred, channelErrorOccurred is set to TRUE.
|
||||
* nvPushCheckForRoomAndErrors() is designed to be called just before a
|
||||
* nvPushMethod() with the same size.
|
||||
*/
|
||||
static inline void nvPushCheckForRoomAndErrors(
|
||||
NvPushChannelPtr pChannel,
|
||||
NvU32 count)
|
||||
{
|
||||
pChannel->channelErrorOccurred = FALSE;
|
||||
|
||||
if (pChannel->main.freeDwords < (count + 1)) {
|
||||
__nvPushMakeRoom(pChannel, count + 1);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /* __NVIDIA_PUSH_UTILS_H__ */
|
||||
1531
src/common/unix/nvidia-push/src/nvidia-push-init.c
Normal file
1531
src/common/unix/nvidia-push/src/nvidia-push-init.c
Normal file
File diff suppressed because it is too large
Load Diff
1173
src/common/unix/nvidia-push/src/nvidia-push.c
Normal file
1173
src/common/unix/nvidia-push/src/nvidia-push.c
Normal file
File diff suppressed because it is too large
Load Diff
285
src/common/unix/xzminidec/interface/xz.h
Normal file
285
src/common/unix/xzminidec/interface/xz.h
Normal file
@@ -0,0 +1,285 @@
|
||||
/*
|
||||
* XZ decompressor
|
||||
*
|
||||
* Authors: Lasse Collin <lasse.collin@tukaani.org>
|
||||
* Igor Pavlov <http://7-zip.org/>
|
||||
*
|
||||
* This file has been put into the public domain.
|
||||
* You can do whatever you want with this file.
|
||||
*/
|
||||
|
||||
#ifndef XZ_H
|
||||
#define XZ_H
|
||||
|
||||
/* Get the definition of size_t. */
|
||||
#if defined(__KERNEL__)
|
||||
# include <linux/stddef.h>
|
||||
#else
|
||||
# include <stddef.h>
|
||||
#endif
|
||||
|
||||
/* Get the definition of uint32_t and friends. */
|
||||
#if defined(NV_XZ_USE_NVTYPES)
|
||||
# include <nvtypes.h>
|
||||
typedef NvU8 uint8_t;
|
||||
typedef NvU16 uint16_t;
|
||||
typedef NvU32 uint32_t;
|
||||
typedef NvU64 uint64_t;
|
||||
#elif defined(__KERNEL__)
|
||||
# include <linux/types.h>
|
||||
#else
|
||||
# include <stdint.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* In Linux, this is used to make extern functions static when needed. */
|
||||
#ifndef XZ_EXTERN
|
||||
# define XZ_EXTERN extern
|
||||
#endif
|
||||
|
||||
/**
|
||||
* enum xz_mode - Operation mode
|
||||
*
|
||||
* @XZ_SINGLE: Single-call mode. This uses less RAM than
|
||||
* than multi-call modes, because the LZMA2
|
||||
* dictionary doesn't need to be allocated as
|
||||
* part of the decoder state. All required data
|
||||
* structures are allocated at initialization,
|
||||
* so xz_dec_run() cannot return XZ_MEM_ERROR.
|
||||
* @XZ_PREALLOC: Multi-call mode with preallocated LZMA2
|
||||
* dictionary buffer. All data structures are
|
||||
* allocated at initialization, so xz_dec_run()
|
||||
* cannot return XZ_MEM_ERROR.
|
||||
* @XZ_DYNALLOC: Multi-call mode. The LZMA2 dictionary is
|
||||
* allocated once the required size has been
|
||||
* parsed from the stream headers. If the
|
||||
* allocation fails, xz_dec_run() will return
|
||||
* XZ_MEM_ERROR.
|
||||
*
|
||||
* It is possible to enable support only for a subset of the above
|
||||
* modes at compile time by defining XZ_DEC_SINGLE, XZ_DEC_PREALLOC,
|
||||
* or XZ_DEC_DYNALLOC. The xz_dec kernel module is always compiled
|
||||
* with support for all operation modes, but the preboot code may
|
||||
* be built with fewer features to minimize code size.
|
||||
*/
|
||||
enum xz_mode {
|
||||
XZ_SINGLE,
|
||||
XZ_PREALLOC,
|
||||
XZ_DYNALLOC
|
||||
};
|
||||
|
||||
/**
|
||||
* enum xz_ret - Return codes
|
||||
* @XZ_OK: Everything is OK so far. More input or more
|
||||
* output space is required to continue. This
|
||||
* return code is possible only in multi-call mode
|
||||
* (XZ_PREALLOC or XZ_DYNALLOC).
|
||||
* @XZ_STREAM_END: Operation finished successfully.
|
||||
* @XZ_UNSUPPORTED_CHECK: Integrity check type is not supported. Decoding
|
||||
* is still possible in multi-call mode by simply
|
||||
* calling xz_dec_run() again.
|
||||
* Note that this return value is used only if
|
||||
* XZ_DEC_ANY_CHECK was defined at build time,
|
||||
* which is not used in the kernel. Unsupported
|
||||
* check types return XZ_OPTIONS_ERROR if
|
||||
* XZ_DEC_ANY_CHECK was not defined at build time.
|
||||
* @XZ_MEM_ERROR: Allocating memory failed. This return code is
|
||||
* possible only if the decoder was initialized
|
||||
* with XZ_DYNALLOC. The amount of memory that was
|
||||
* tried to be allocated was no more than the
|
||||
* dict_max argument given to xz_dec_init().
|
||||
* @XZ_MEMLIMIT_ERROR: A bigger LZMA2 dictionary would be needed than
|
||||
* allowed by the dict_max argument given to
|
||||
* xz_dec_init(). This return value is possible
|
||||
* only in multi-call mode (XZ_PREALLOC or
|
||||
* XZ_DYNALLOC); the single-call mode (XZ_SINGLE)
|
||||
* ignores the dict_max argument.
|
||||
* @XZ_FORMAT_ERROR: File format was not recognized (wrong magic
|
||||
* bytes).
|
||||
* @XZ_OPTIONS_ERROR: This implementation doesn't support the requested
|
||||
* compression options. In the decoder this means
|
||||
* that the header CRC32 matches, but the header
|
||||
* itself specifies something that we don't support.
|
||||
* @XZ_DATA_ERROR: Compressed data is corrupt.
|
||||
* @XZ_BUF_ERROR: Cannot make any progress. Details are slightly
|
||||
* different between multi-call and single-call
|
||||
* mode; more information below.
|
||||
*
|
||||
* In multi-call mode, XZ_BUF_ERROR is returned when two consecutive calls
|
||||
* to XZ code cannot consume any input and cannot produce any new output.
|
||||
* This happens when there is no new input available, or the output buffer
|
||||
* is full while at least one output byte is still pending. Assuming your
|
||||
* code is not buggy, you can get this error only when decoding a compressed
|
||||
* stream that is truncated or otherwise corrupt.
|
||||
*
|
||||
* In single-call mode, XZ_BUF_ERROR is returned only when the output buffer
|
||||
* is too small or the compressed input is corrupt in a way that makes the
|
||||
* decoder produce more output than the caller expected. When it is
|
||||
* (relatively) clear that the compressed input is truncated, XZ_DATA_ERROR
|
||||
* is used instead of XZ_BUF_ERROR.
|
||||
*/
|
||||
enum xz_ret {
|
||||
XZ_OK,
|
||||
XZ_STREAM_END,
|
||||
XZ_UNSUPPORTED_CHECK,
|
||||
XZ_MEM_ERROR,
|
||||
XZ_MEMLIMIT_ERROR,
|
||||
XZ_FORMAT_ERROR,
|
||||
XZ_OPTIONS_ERROR,
|
||||
XZ_DATA_ERROR,
|
||||
XZ_BUF_ERROR
|
||||
};
|
||||
|
||||
/**
|
||||
* struct xz_buf - Passing input and output buffers to XZ code
|
||||
* @in: Beginning of the input buffer. This may be NULL if and only
|
||||
* if in_pos is equal to in_size.
|
||||
* @in_pos: Current position in the input buffer. This must not exceed
|
||||
* in_size.
|
||||
* @in_size: Size of the input buffer
|
||||
* @out: Beginning of the output buffer. This may be NULL if and only
|
||||
* if out_pos is equal to out_size.
|
||||
* @out_pos: Current position in the output buffer. This must not exceed
|
||||
* out_size.
|
||||
* @out_size: Size of the output buffer
|
||||
*
|
||||
* Only the contents of the output buffer from out[out_pos] onward, and
|
||||
* the variables in_pos and out_pos are modified by the XZ code.
|
||||
*/
|
||||
struct xz_buf {
|
||||
const uint8_t *in;
|
||||
size_t in_pos;
|
||||
size_t in_size;
|
||||
|
||||
uint8_t *out;
|
||||
size_t out_pos;
|
||||
size_t out_size;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct xz_dec - Opaque type to hold the XZ decoder state
|
||||
*/
|
||||
struct xz_dec;
|
||||
|
||||
/**
|
||||
* xz_dec_init() - Allocate and initialize a XZ decoder state
|
||||
* @mode: Operation mode
|
||||
* @dict_max: Maximum size of the LZMA2 dictionary (history buffer) for
|
||||
* multi-call decoding. This is ignored in single-call mode
|
||||
* (mode == XZ_SINGLE). LZMA2 dictionary is always 2^n bytes
|
||||
* or 2^n + 2^(n-1) bytes (the latter sizes are less common
|
||||
* in practice), so other values for dict_max don't make sense.
|
||||
* In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB,
|
||||
* 512 KiB, and 1 MiB are probably the only reasonable values,
|
||||
* except for kernel and initramfs images where a bigger
|
||||
* dictionary can be fine and useful.
|
||||
*
|
||||
* Single-call mode (XZ_SINGLE): xz_dec_run() decodes the whole stream at
|
||||
* once. The caller must provide enough output space or the decoding will
|
||||
* fail. The output space is used as the dictionary buffer, which is why
|
||||
* there is no need to allocate the dictionary as part of the decoder's
|
||||
* internal state.
|
||||
*
|
||||
* Because the output buffer is used as the workspace, streams encoded using
|
||||
* a big dictionary are not a problem in single-call mode. It is enough that
|
||||
* the output buffer is big enough to hold the actual uncompressed data; it
|
||||
* can be smaller than the dictionary size stored in the stream headers.
|
||||
*
|
||||
* Multi-call mode with preallocated dictionary (XZ_PREALLOC): dict_max bytes
|
||||
* of memory is preallocated for the LZMA2 dictionary. This way there is no
|
||||
* risk that xz_dec_run() could run out of memory, since xz_dec_run() will
|
||||
* never allocate any memory. Instead, if the preallocated dictionary is too
|
||||
* small for decoding the given input stream, xz_dec_run() will return
|
||||
* XZ_MEMLIMIT_ERROR. Thus, it is important to know what kind of data will be
|
||||
* decoded to avoid allocating excessive amount of memory for the dictionary.
|
||||
*
|
||||
* Multi-call mode with dynamically allocated dictionary (XZ_DYNALLOC):
|
||||
* dict_max specifies the maximum allowed dictionary size that xz_dec_run()
|
||||
* may allocate once it has parsed the dictionary size from the stream
|
||||
* headers. This way excessive allocations can be avoided while still
|
||||
* limiting the maximum memory usage to a sane value to prevent running the
|
||||
* system out of memory when decompressing streams from untrusted sources.
|
||||
*
|
||||
* On success, xz_dec_init() returns a pointer to struct xz_dec, which is
|
||||
* ready to be used with xz_dec_run(). If memory allocation fails,
|
||||
* xz_dec_init() returns NULL.
|
||||
*/
|
||||
XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max);
|
||||
|
||||
/**
|
||||
* xz_dec_run() - Run the XZ decoder
|
||||
* @s: Decoder state allocated using xz_dec_init()
|
||||
* @b: Input and output buffers
|
||||
*
|
||||
* The possible return values depend on build options and operation mode.
|
||||
* See enum xz_ret for details.
|
||||
*
|
||||
* Note that if an error occurs in single-call mode (return value is not
|
||||
* XZ_STREAM_END), b->in_pos and b->out_pos are not modified and the
|
||||
* contents of the output buffer from b->out[b->out_pos] onward are
|
||||
* undefined. This is true even after XZ_BUF_ERROR, because with some filter
|
||||
* chains, there may be a second pass over the output buffer, and this pass
|
||||
* cannot be properly done if the output buffer is truncated. Thus, you
|
||||
* cannot give the single-call decoder a too small buffer and then expect to
|
||||
* get that amount valid data from the beginning of the stream. You must use
|
||||
* the multi-call decoder if you don't want to uncompress the whole stream.
|
||||
*/
|
||||
XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b);
|
||||
|
||||
/**
|
||||
* xz_dec_reset() - Reset an already allocated decoder state
|
||||
* @s: Decoder state allocated using xz_dec_init()
|
||||
*
|
||||
* This function can be used to reset the multi-call decoder state without
|
||||
* freeing and reallocating memory with xz_dec_end() and xz_dec_init().
|
||||
*
|
||||
* In single-call mode, xz_dec_reset() is always called in the beginning of
|
||||
* xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in
|
||||
* multi-call mode.
|
||||
*/
|
||||
XZ_EXTERN void xz_dec_reset(struct xz_dec *s);
|
||||
|
||||
/**
|
||||
* xz_dec_end() - Free the memory allocated for the decoder state
|
||||
* @s: Decoder state allocated using xz_dec_init(). If s is NULL,
|
||||
* this function does nothing.
|
||||
*/
|
||||
XZ_EXTERN void xz_dec_end(struct xz_dec *s);
|
||||
|
||||
/*
|
||||
* Standalone build (userspace build or in-kernel build for boot time use)
|
||||
* needs a CRC32 implementation. For normal in-kernel use, kernel's own
|
||||
* CRC32 module is used instead, and users of this module don't need to
|
||||
* care about the functions below.
|
||||
*/
|
||||
#ifndef XZ_INTERNAL_CRC32
|
||||
# ifdef __KERNEL__
|
||||
# define XZ_INTERNAL_CRC32 0
|
||||
# else
|
||||
# define XZ_INTERNAL_CRC32 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if XZ_INTERNAL_CRC32
|
||||
/*
|
||||
* This must be called before any other xz_* function to initialize
|
||||
* the CRC32 lookup table.
|
||||
*/
|
||||
XZ_EXTERN void xz_crc32_init(void);
|
||||
|
||||
/*
|
||||
* Update CRC32 value using the polynomial from IEEE-802.3. To start a new
|
||||
* calculation, the third argument must be zero. To continue the calculation,
|
||||
* the previously returned value is passed as the third argument.
|
||||
*/
|
||||
XZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
113
src/common/unix/xzminidec/src/xz_config.h
Normal file
113
src/common/unix/xzminidec/src/xz_config.h
Normal file
@@ -0,0 +1,113 @@
|
||||
/*
|
||||
* Private includes and definitions for userspace use of XZ Embedded
|
||||
*
|
||||
* Author: Lasse Collin <lasse.collin@tukaani.org>
|
||||
*
|
||||
* This file has been put into the public domain.
|
||||
* You can do whatever you want with this file.
|
||||
*/
|
||||
|
||||
#ifndef XZ_CONFIG_H
|
||||
#define XZ_CONFIG_H
|
||||
|
||||
/* Uncomment as needed to enable BCJ filter decoders. */
|
||||
/* #define XZ_DEC_X86 */
|
||||
/* #define XZ_DEC_POWERPC */
|
||||
/* #define XZ_DEC_IA64 */
|
||||
/* #define XZ_DEC_ARM */
|
||||
/* #define XZ_DEC_ARMTHUMB */
|
||||
/* #define XZ_DEC_SPARC */
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "xz.h"
|
||||
|
||||
#if defined(NV_XZ_CUSTOM_MEM_HOOKS)
|
||||
# include "nv_xz_mem_hooks.h"
|
||||
#else
|
||||
# include <stdlib.h>
|
||||
# include <string.h>
|
||||
# define kmalloc(size, flags) malloc(size)
|
||||
# define kfree(ptr) free(ptr)
|
||||
# define vmalloc(size) malloc(size)
|
||||
# define vfree(ptr) free(ptr)
|
||||
|
||||
# define memeq(a, b, size) (memcmp(a, b, size) == 0)
|
||||
# define memzero(buf, size) memset(buf, 0, size)
|
||||
#endif /* defined(NV_XZ_CUSTOM_MEM_HOOKS) */
|
||||
|
||||
#ifndef min
|
||||
# define min(x, y) ((x) < (y) ? (x) : (y))
|
||||
#endif
|
||||
#define min_t(type, x, y) min(x, y)
|
||||
|
||||
/*
|
||||
* Some functions have been marked with __always_inline to keep the
|
||||
* performance reasonable even when the compiler is optimizing for
|
||||
* small code size. You may be able to save a few bytes by #defining
|
||||
* __always_inline to plain inline, but don't complain if the code
|
||||
* becomes slow.
|
||||
*
|
||||
* NOTE: System headers on GNU/Linux may #define this macro already,
|
||||
* so if you want to change it, you need to #undef it first.
|
||||
*/
|
||||
#ifndef __always_inline
|
||||
# ifdef __GNUC__
|
||||
# define __always_inline \
|
||||
inline __attribute__((__always_inline__))
|
||||
# else
|
||||
# define __always_inline inline
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Inline functions to access unaligned unsigned 32-bit integers */
|
||||
#ifndef get_unaligned_le32
|
||||
static inline uint32_t get_unaligned_le32(const uint8_t *buf)
|
||||
{
|
||||
return (uint32_t)buf[0]
|
||||
| ((uint32_t)buf[1] << 8)
|
||||
| ((uint32_t)buf[2] << 16)
|
||||
| ((uint32_t)buf[3] << 24);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef get_unaligned_be32
|
||||
static inline uint32_t get_unaligned_be32(const uint8_t *buf)
|
||||
{
|
||||
return (uint32_t)(buf[0] << 24)
|
||||
| ((uint32_t)buf[1] << 16)
|
||||
| ((uint32_t)buf[2] << 8)
|
||||
| (uint32_t)buf[3];
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef put_unaligned_le32
|
||||
static inline void put_unaligned_le32(uint32_t val, uint8_t *buf)
|
||||
{
|
||||
buf[0] = (uint8_t)val;
|
||||
buf[1] = (uint8_t)(val >> 8);
|
||||
buf[2] = (uint8_t)(val >> 16);
|
||||
buf[3] = (uint8_t)(val >> 24);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef put_unaligned_be32
|
||||
static inline void put_unaligned_be32(uint32_t val, uint8_t *buf)
|
||||
{
|
||||
buf[0] = (uint8_t)(val >> 24);
|
||||
buf[1] = (uint8_t)(val >> 16);
|
||||
buf[2] = (uint8_t)(val >> 8);
|
||||
buf[3] = (uint8_t)val;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Use get_unaligned_le32() also for aligned access for simplicity. On
|
||||
* little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr))
|
||||
* could save a few bytes in code size.
|
||||
*/
|
||||
#ifndef get_le32
|
||||
# define get_le32 get_unaligned_le32
|
||||
#endif
|
||||
|
||||
#endif
|
||||
59
src/common/unix/xzminidec/src/xz_crc32.c
Normal file
59
src/common/unix/xzminidec/src/xz_crc32.c
Normal file
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* CRC32 using the polynomial from IEEE-802.3
|
||||
*
|
||||
* Authors: Lasse Collin <lasse.collin@tukaani.org>
|
||||
* Igor Pavlov <http://7-zip.org/>
|
||||
*
|
||||
* This file has been put into the public domain.
|
||||
* You can do whatever you want with this file.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This is not the fastest implementation, but it is pretty compact.
|
||||
* The fastest versions of xz_crc32() on modern CPUs without hardware
|
||||
* accelerated CRC instruction are 3-5 times as fast as this version,
|
||||
* but they are bigger and use more memory for the lookup table.
|
||||
*/
|
||||
|
||||
#include "xz_private.h"
|
||||
|
||||
/*
|
||||
* STATIC_RW_DATA is used in the pre-boot environment on some architectures.
|
||||
* See <linux/decompress/mm.h> for details.
|
||||
*/
|
||||
#ifndef STATIC_RW_DATA
|
||||
# define STATIC_RW_DATA static
|
||||
#endif
|
||||
|
||||
STATIC_RW_DATA uint32_t xz_crc32_table[256];
|
||||
|
||||
XZ_EXTERN void xz_crc32_init(void)
|
||||
{
|
||||
const uint32_t poly = 0xEDB88320;
|
||||
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
uint32_t r;
|
||||
|
||||
for (i = 0; i < 256; ++i) {
|
||||
r = i;
|
||||
for (j = 0; j < 8; ++j)
|
||||
r = (r >> 1) ^ (poly & ~((r & 1) - 1));
|
||||
|
||||
xz_crc32_table[i] = r;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
XZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
|
||||
{
|
||||
crc = ~crc;
|
||||
|
||||
while (size != 0) {
|
||||
crc = xz_crc32_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8);
|
||||
--size;
|
||||
}
|
||||
|
||||
return ~crc;
|
||||
}
|
||||
574
src/common/unix/xzminidec/src/xz_dec_bcj.c
Normal file
574
src/common/unix/xzminidec/src/xz_dec_bcj.c
Normal file
@@ -0,0 +1,574 @@
|
||||
/*
|
||||
* Branch/Call/Jump (BCJ) filter decoders
|
||||
*
|
||||
* Authors: Lasse Collin <lasse.collin@tukaani.org>
|
||||
* Igor Pavlov <http://7-zip.org/>
|
||||
*
|
||||
* This file has been put into the public domain.
|
||||
* You can do whatever you want with this file.
|
||||
*/
|
||||
|
||||
#include "xz_private.h"
|
||||
|
||||
/*
|
||||
* The rest of the file is inside this ifdef. It makes things a little more
|
||||
* convenient when building without support for any BCJ filters.
|
||||
*/
|
||||
#ifdef XZ_DEC_BCJ
|
||||
|
||||
struct xz_dec_bcj {
|
||||
/* Type of the BCJ filter being used */
|
||||
enum {
|
||||
BCJ_X86 = 4, /* x86 or x86-64 */
|
||||
BCJ_POWERPC = 5, /* Big endian only */
|
||||
BCJ_IA64 = 6, /* Big or little endian */
|
||||
BCJ_ARM = 7, /* Little endian only */
|
||||
BCJ_ARMTHUMB = 8, /* Little endian only */
|
||||
BCJ_SPARC = 9 /* Big or little endian */
|
||||
} type;
|
||||
|
||||
/*
|
||||
* Return value of the next filter in the chain. We need to preserve
|
||||
* this information across calls, because we must not call the next
|
||||
* filter anymore once it has returned XZ_STREAM_END.
|
||||
*/
|
||||
enum xz_ret ret;
|
||||
|
||||
/* True if we are operating in single-call mode. */
|
||||
bool single_call;
|
||||
|
||||
/*
|
||||
* Absolute position relative to the beginning of the uncompressed
|
||||
* data (in a single .xz Block). We care only about the lowest 32
|
||||
* bits so this doesn't need to be uint64_t even with big files.
|
||||
*/
|
||||
uint32_t pos;
|
||||
|
||||
/* x86 filter state */
|
||||
uint32_t x86_prev_mask;
|
||||
|
||||
/* Temporary space to hold the variables from struct xz_buf */
|
||||
uint8_t *out;
|
||||
size_t out_pos;
|
||||
size_t out_size;
|
||||
|
||||
struct {
|
||||
/* Amount of already filtered data in the beginning of buf */
|
||||
size_t filtered;
|
||||
|
||||
/* Total amount of data currently stored in buf */
|
||||
size_t size;
|
||||
|
||||
/*
|
||||
* Buffer to hold a mix of filtered and unfiltered data. This
|
||||
* needs to be big enough to hold Alignment + 2 * Look-ahead:
|
||||
*
|
||||
* Type Alignment Look-ahead
|
||||
* x86 1 4
|
||||
* PowerPC 4 0
|
||||
* IA-64 16 0
|
||||
* ARM 4 0
|
||||
* ARM-Thumb 2 2
|
||||
* SPARC 4 0
|
||||
*/
|
||||
uint8_t buf[16];
|
||||
} temp;
|
||||
};
|
||||
|
||||
#ifdef XZ_DEC_X86
|
||||
/*
|
||||
* This is used to test the most significant byte of a memory address
|
||||
* in an x86 instruction.
|
||||
*/
|
||||
static inline int bcj_x86_test_msbyte(uint8_t b)
|
||||
{
|
||||
return b == 0x00 || b == 0xFF;
|
||||
}
|
||||
|
||||
static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
|
||||
{
|
||||
static const bool mask_to_allowed_status[8]
|
||||
= { true, true, true, false, true, false, false, false };
|
||||
|
||||
static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
|
||||
|
||||
size_t i;
|
||||
size_t prev_pos = (size_t)-1;
|
||||
uint32_t prev_mask = s->x86_prev_mask;
|
||||
uint32_t src;
|
||||
uint32_t dest;
|
||||
uint32_t j;
|
||||
uint8_t b;
|
||||
|
||||
if (size <= 4)
|
||||
return 0;
|
||||
|
||||
size -= 4;
|
||||
for (i = 0; i < size; ++i) {
|
||||
if ((buf[i] & 0xFE) != 0xE8)
|
||||
continue;
|
||||
|
||||
prev_pos = i - prev_pos;
|
||||
if (prev_pos > 3) {
|
||||
prev_mask = 0;
|
||||
} else {
|
||||
prev_mask = (prev_mask << (prev_pos - 1)) & 7;
|
||||
if (prev_mask != 0) {
|
||||
b = buf[i + 4 - mask_to_bit_num[prev_mask]];
|
||||
if (!mask_to_allowed_status[prev_mask]
|
||||
|| bcj_x86_test_msbyte(b)) {
|
||||
prev_pos = i;
|
||||
prev_mask = (prev_mask << 1) | 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
prev_pos = i;
|
||||
|
||||
if (bcj_x86_test_msbyte(buf[i + 4])) {
|
||||
src = get_unaligned_le32(buf + i + 1);
|
||||
while (true) {
|
||||
dest = src - (s->pos + (uint32_t)i + 5);
|
||||
if (prev_mask == 0)
|
||||
break;
|
||||
|
||||
j = mask_to_bit_num[prev_mask] * 8;
|
||||
b = (uint8_t)(dest >> (24 - j));
|
||||
if (!bcj_x86_test_msbyte(b))
|
||||
break;
|
||||
|
||||
src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
|
||||
}
|
||||
|
||||
dest &= 0x01FFFFFF;
|
||||
dest |= (uint32_t)0 - (dest & 0x01000000);
|
||||
put_unaligned_le32(dest, buf + i + 1);
|
||||
i += 4;
|
||||
} else {
|
||||
prev_mask = (prev_mask << 1) | 1;
|
||||
}
|
||||
}
|
||||
|
||||
prev_pos = i - prev_pos;
|
||||
s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
|
||||
return i;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef XZ_DEC_POWERPC
|
||||
static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
|
||||
{
|
||||
size_t i;
|
||||
uint32_t instr;
|
||||
|
||||
for (i = 0; i + 4 <= size; i += 4) {
|
||||
instr = get_unaligned_be32(buf + i);
|
||||
if ((instr & 0xFC000003) == 0x48000001) {
|
||||
instr &= 0x03FFFFFC;
|
||||
instr -= s->pos + (uint32_t)i;
|
||||
instr &= 0x03FFFFFC;
|
||||
instr |= 0x48000001;
|
||||
put_unaligned_be32(instr, buf + i);
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef XZ_DEC_IA64
|
||||
static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
|
||||
{
|
||||
static const uint8_t branch_table[32] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
4, 4, 6, 6, 0, 0, 7, 7,
|
||||
4, 4, 0, 0, 4, 4, 0, 0
|
||||
};
|
||||
|
||||
/*
|
||||
* The local variables take a little bit stack space, but it's less
|
||||
* than what LZMA2 decoder takes, so it doesn't make sense to reduce
|
||||
* stack usage here without doing that for the LZMA2 decoder too.
|
||||
*/
|
||||
|
||||
/* Loop counters */
|
||||
size_t i;
|
||||
size_t j;
|
||||
|
||||
/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
|
||||
uint32_t slot;
|
||||
|
||||
/* Bitwise offset of the instruction indicated by slot */
|
||||
uint32_t bit_pos;
|
||||
|
||||
/* bit_pos split into byte and bit parts */
|
||||
uint32_t byte_pos;
|
||||
uint32_t bit_res;
|
||||
|
||||
/* Address part of an instruction */
|
||||
uint32_t addr;
|
||||
|
||||
/* Mask used to detect which instructions to convert */
|
||||
uint32_t mask;
|
||||
|
||||
/* 41-bit instruction stored somewhere in the lowest 48 bits */
|
||||
uint64_t instr;
|
||||
|
||||
/* Instruction normalized with bit_res for easier manipulation */
|
||||
uint64_t norm;
|
||||
|
||||
for (i = 0; i + 16 <= size; i += 16) {
|
||||
mask = branch_table[buf[i] & 0x1F];
|
||||
for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
|
||||
if (((mask >> slot) & 1) == 0)
|
||||
continue;
|
||||
|
||||
byte_pos = bit_pos >> 3;
|
||||
bit_res = bit_pos & 7;
|
||||
instr = 0;
|
||||
for (j = 0; j < 6; ++j)
|
||||
instr |= (uint64_t)(buf[i + j + byte_pos])
|
||||
<< (8 * j);
|
||||
|
||||
norm = instr >> bit_res;
|
||||
|
||||
if (((norm >> 37) & 0x0F) == 0x05
|
||||
&& ((norm >> 9) & 0x07) == 0) {
|
||||
addr = (norm >> 13) & 0x0FFFFF;
|
||||
addr |= ((uint32_t)(norm >> 36) & 1) << 20;
|
||||
addr <<= 4;
|
||||
addr -= s->pos + (uint32_t)i;
|
||||
addr >>= 4;
|
||||
|
||||
norm &= ~((uint64_t)0x8FFFFF << 13);
|
||||
norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
|
||||
norm |= (uint64_t)(addr & 0x100000)
|
||||
<< (36 - 20);
|
||||
|
||||
instr &= (1 << bit_res) - 1;
|
||||
instr |= norm << bit_res;
|
||||
|
||||
for (j = 0; j < 6; j++)
|
||||
buf[i + j + byte_pos]
|
||||
= (uint8_t)(instr >> (8 * j));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef XZ_DEC_ARM
|
||||
static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
|
||||
{
|
||||
size_t i;
|
||||
uint32_t addr;
|
||||
|
||||
for (i = 0; i + 4 <= size; i += 4) {
|
||||
if (buf[i + 3] == 0xEB) {
|
||||
addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
|
||||
| ((uint32_t)buf[i + 2] << 16);
|
||||
addr <<= 2;
|
||||
addr -= s->pos + (uint32_t)i + 8;
|
||||
addr >>= 2;
|
||||
buf[i] = (uint8_t)addr;
|
||||
buf[i + 1] = (uint8_t)(addr >> 8);
|
||||
buf[i + 2] = (uint8_t)(addr >> 16);
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef XZ_DEC_ARMTHUMB
|
||||
static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
|
||||
{
|
||||
size_t i;
|
||||
uint32_t addr;
|
||||
|
||||
for (i = 0; i + 4 <= size; i += 2) {
|
||||
if ((buf[i + 1] & 0xF8) == 0xF0
|
||||
&& (buf[i + 3] & 0xF8) == 0xF8) {
|
||||
addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
|
||||
| ((uint32_t)buf[i] << 11)
|
||||
| (((uint32_t)buf[i + 3] & 0x07) << 8)
|
||||
| (uint32_t)buf[i + 2];
|
||||
addr <<= 1;
|
||||
addr -= s->pos + (uint32_t)i + 4;
|
||||
addr >>= 1;
|
||||
buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
|
||||
buf[i] = (uint8_t)(addr >> 11);
|
||||
buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
|
||||
buf[i + 2] = (uint8_t)addr;
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef XZ_DEC_SPARC
|
||||
static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
|
||||
{
|
||||
size_t i;
|
||||
uint32_t instr;
|
||||
|
||||
for (i = 0; i + 4 <= size; i += 4) {
|
||||
instr = get_unaligned_be32(buf + i);
|
||||
if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
|
||||
instr <<= 2;
|
||||
instr -= s->pos + (uint32_t)i;
|
||||
instr >>= 2;
|
||||
instr = ((uint32_t)0x40000000 - (instr & 0x400000))
|
||||
| 0x40000000 | (instr & 0x3FFFFF);
|
||||
put_unaligned_be32(instr, buf + i);
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Apply the selected BCJ filter. Update *pos and s->pos to match the amount
|
||||
* of data that got filtered.
|
||||
*
|
||||
* NOTE: This is implemented as a switch statement to avoid using function
|
||||
* pointers, which could be problematic in the kernel boot code, which must
|
||||
* avoid pointers to static data (at least on x86).
|
||||
*/
|
||||
static void bcj_apply(struct xz_dec_bcj *s,
|
||||
uint8_t *buf, size_t *pos, size_t size)
|
||||
{
|
||||
size_t filtered;
|
||||
|
||||
buf += *pos;
|
||||
size -= *pos;
|
||||
|
||||
switch (s->type) {
|
||||
#ifdef XZ_DEC_X86
|
||||
case BCJ_X86:
|
||||
filtered = bcj_x86(s, buf, size);
|
||||
break;
|
||||
#endif
|
||||
#ifdef XZ_DEC_POWERPC
|
||||
case BCJ_POWERPC:
|
||||
filtered = bcj_powerpc(s, buf, size);
|
||||
break;
|
||||
#endif
|
||||
#ifdef XZ_DEC_IA64
|
||||
case BCJ_IA64:
|
||||
filtered = bcj_ia64(s, buf, size);
|
||||
break;
|
||||
#endif
|
||||
#ifdef XZ_DEC_ARM
|
||||
case BCJ_ARM:
|
||||
filtered = bcj_arm(s, buf, size);
|
||||
break;
|
||||
#endif
|
||||
#ifdef XZ_DEC_ARMTHUMB
|
||||
case BCJ_ARMTHUMB:
|
||||
filtered = bcj_armthumb(s, buf, size);
|
||||
break;
|
||||
#endif
|
||||
#ifdef XZ_DEC_SPARC
|
||||
case BCJ_SPARC:
|
||||
filtered = bcj_sparc(s, buf, size);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
/* Never reached but silence compiler warnings. */
|
||||
filtered = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
*pos += filtered;
|
||||
s->pos += filtered;
|
||||
}
|
||||
|
||||
/*
|
||||
* Flush pending filtered data from temp to the output buffer.
|
||||
* Move the remaining mixture of possibly filtered and unfiltered
|
||||
* data to the beginning of temp.
|
||||
*/
|
||||
static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
|
||||
{
|
||||
size_t copy_size;
|
||||
|
||||
copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
|
||||
memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
|
||||
b->out_pos += copy_size;
|
||||
|
||||
s->temp.filtered -= copy_size;
|
||||
s->temp.size -= copy_size;
|
||||
memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
|
||||
}
|
||||
|
||||
/*
|
||||
* The BCJ filter functions are primitive in sense that they process the
|
||||
* data in chunks of 1-16 bytes. To hide this issue, this function does
|
||||
* some buffering.
|
||||
*/
|
||||
XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
|
||||
struct xz_dec_lzma2 *lzma2,
|
||||
struct xz_buf *b)
|
||||
{
|
||||
size_t out_start;
|
||||
|
||||
/*
|
||||
* Flush pending already filtered data to the output buffer. Return
|
||||
* immediatelly if we couldn't flush everything, or if the next
|
||||
* filter in the chain had already returned XZ_STREAM_END.
|
||||
*/
|
||||
if (s->temp.filtered > 0) {
|
||||
bcj_flush(s, b);
|
||||
if (s->temp.filtered > 0)
|
||||
return XZ_OK;
|
||||
|
||||
if (s->ret == XZ_STREAM_END)
|
||||
return XZ_STREAM_END;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have more output space than what is currently pending in
|
||||
* temp, copy the unfiltered data from temp to the output buffer
|
||||
* and try to fill the output buffer by decoding more data from the
|
||||
* next filter in the chain. Apply the BCJ filter on the new data
|
||||
* in the output buffer. If everything cannot be filtered, copy it
|
||||
* to temp and rewind the output buffer position accordingly.
|
||||
*
|
||||
* This needs to be always run when temp.size == 0 to handle a special
|
||||
* case where the output buffer is full and the next filter has no
|
||||
* more output coming but hasn't returned XZ_STREAM_END yet.
|
||||
*/
|
||||
if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
|
||||
out_start = b->out_pos;
|
||||
memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
|
||||
b->out_pos += s->temp.size;
|
||||
|
||||
s->ret = xz_dec_lzma2_run(lzma2, b);
|
||||
if (s->ret != XZ_STREAM_END
|
||||
&& (s->ret != XZ_OK || s->single_call))
|
||||
return s->ret;
|
||||
|
||||
bcj_apply(s, b->out, &out_start, b->out_pos);
|
||||
|
||||
/*
|
||||
* As an exception, if the next filter returned XZ_STREAM_END,
|
||||
* we can do that too, since the last few bytes that remain
|
||||
* unfiltered are meant to remain unfiltered.
|
||||
*/
|
||||
if (s->ret == XZ_STREAM_END)
|
||||
return XZ_STREAM_END;
|
||||
|
||||
s->temp.size = b->out_pos - out_start;
|
||||
b->out_pos -= s->temp.size;
|
||||
memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
|
||||
|
||||
/*
|
||||
* If there wasn't enough input to the next filter to fill
|
||||
* the output buffer with unfiltered data, there's no point
|
||||
* to try decoding more data to temp.
|
||||
*/
|
||||
if (b->out_pos + s->temp.size < b->out_size)
|
||||
return XZ_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* We have unfiltered data in temp. If the output buffer isn't full
|
||||
* yet, try to fill the temp buffer by decoding more data from the
|
||||
* next filter. Apply the BCJ filter on temp. Then we hopefully can
|
||||
* fill the actual output buffer by copying filtered data from temp.
|
||||
* A mix of filtered and unfiltered data may be left in temp; it will
|
||||
* be taken care on the next call to this function.
|
||||
*/
|
||||
if (b->out_pos < b->out_size) {
|
||||
/* Make b->out{,_pos,_size} temporarily point to s->temp. */
|
||||
s->out = b->out;
|
||||
s->out_pos = b->out_pos;
|
||||
s->out_size = b->out_size;
|
||||
b->out = s->temp.buf;
|
||||
b->out_pos = s->temp.size;
|
||||
b->out_size = sizeof(s->temp.buf);
|
||||
|
||||
s->ret = xz_dec_lzma2_run(lzma2, b);
|
||||
|
||||
s->temp.size = b->out_pos;
|
||||
b->out = s->out;
|
||||
b->out_pos = s->out_pos;
|
||||
b->out_size = s->out_size;
|
||||
|
||||
if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
|
||||
return s->ret;
|
||||
|
||||
bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
|
||||
|
||||
/*
|
||||
* If the next filter returned XZ_STREAM_END, we mark that
|
||||
* everything is filtered, since the last unfiltered bytes
|
||||
* of the stream are meant to be left as is.
|
||||
*/
|
||||
if (s->ret == XZ_STREAM_END)
|
||||
s->temp.filtered = s->temp.size;
|
||||
|
||||
bcj_flush(s, b);
|
||||
if (s->temp.filtered > 0)
|
||||
return XZ_OK;
|
||||
}
|
||||
|
||||
return s->ret;
|
||||
}
|
||||
|
||||
XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
|
||||
{
|
||||
struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
|
||||
if (s != NULL)
|
||||
s->single_call = single_call;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
|
||||
{
|
||||
switch (id) {
|
||||
#ifdef XZ_DEC_X86
|
||||
case BCJ_X86:
|
||||
#endif
|
||||
#ifdef XZ_DEC_POWERPC
|
||||
case BCJ_POWERPC:
|
||||
#endif
|
||||
#ifdef XZ_DEC_IA64
|
||||
case BCJ_IA64:
|
||||
#endif
|
||||
#ifdef XZ_DEC_ARM
|
||||
case BCJ_ARM:
|
||||
#endif
|
||||
#ifdef XZ_DEC_ARMTHUMB
|
||||
case BCJ_ARMTHUMB:
|
||||
#endif
|
||||
#ifdef XZ_DEC_SPARC
|
||||
case BCJ_SPARC:
|
||||
#endif
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Unsupported Filter ID */
|
||||
return XZ_OPTIONS_ERROR;
|
||||
}
|
||||
|
||||
s->type = id;
|
||||
s->ret = XZ_OK;
|
||||
s->pos = 0;
|
||||
s->x86_prev_mask = 0;
|
||||
s->temp.filtered = 0;
|
||||
s->temp.size = 0;
|
||||
|
||||
return XZ_OK;
|
||||
}
|
||||
|
||||
#endif
|
||||
1173
src/common/unix/xzminidec/src/xz_dec_lzma2.c
Normal file
1173
src/common/unix/xzminidec/src/xz_dec_lzma2.c
Normal file
File diff suppressed because it is too large
Load Diff
829
src/common/unix/xzminidec/src/xz_dec_stream.c
Normal file
829
src/common/unix/xzminidec/src/xz_dec_stream.c
Normal file
@@ -0,0 +1,829 @@
|
||||
/*
|
||||
* .xz Stream decoder
|
||||
*
|
||||
* Author: Lasse Collin <lasse.collin@tukaani.org>
|
||||
*
|
||||
* This file has been put into the public domain.
|
||||
* You can do whatever you want with this file.
|
||||
*/
|
||||
|
||||
#include "xz_private.h"
|
||||
#include "xz_stream.h"
|
||||
|
||||
/* Hash used to validate the Index field */
|
||||
struct xz_dec_hash {
|
||||
vli_type unpadded;
|
||||
vli_type uncompressed;
|
||||
uint32_t crc32;
|
||||
};
|
||||
|
||||
struct xz_dec {
|
||||
/* Position in dec_main() */
|
||||
enum {
|
||||
SEQ_STREAM_HEADER,
|
||||
SEQ_BLOCK_START,
|
||||
SEQ_BLOCK_HEADER,
|
||||
SEQ_BLOCK_UNCOMPRESS,
|
||||
SEQ_BLOCK_PADDING,
|
||||
SEQ_BLOCK_CHECK,
|
||||
SEQ_INDEX,
|
||||
SEQ_INDEX_PADDING,
|
||||
SEQ_INDEX_CRC32,
|
||||
SEQ_STREAM_FOOTER
|
||||
} sequence;
|
||||
|
||||
/* Position in variable-length integers and Check fields */
|
||||
uint32_t pos;
|
||||
|
||||
/* Variable-length integer decoded by dec_vli() */
|
||||
vli_type vli;
|
||||
|
||||
/* Saved in_pos and out_pos */
|
||||
size_t in_start;
|
||||
size_t out_start;
|
||||
|
||||
/* CRC32 value in Block or Index */
|
||||
uint32_t crc32;
|
||||
|
||||
/* Type of the integrity check calculated from uncompressed data */
|
||||
enum xz_check check_type;
|
||||
|
||||
/* Operation mode */
|
||||
enum xz_mode mode;
|
||||
|
||||
/*
|
||||
* True if the next call to xz_dec_run() is allowed to return
|
||||
* XZ_BUF_ERROR.
|
||||
*/
|
||||
bool allow_buf_error;
|
||||
|
||||
/* Information stored in Block Header */
|
||||
struct {
|
||||
/*
|
||||
* Value stored in the Compressed Size field, or
|
||||
* VLI_UNKNOWN if Compressed Size is not present.
|
||||
*/
|
||||
vli_type compressed;
|
||||
|
||||
/*
|
||||
* Value stored in the Uncompressed Size field, or
|
||||
* VLI_UNKNOWN if Uncompressed Size is not present.
|
||||
*/
|
||||
vli_type uncompressed;
|
||||
|
||||
/* Size of the Block Header field */
|
||||
uint32_t size;
|
||||
} block_header;
|
||||
|
||||
/* Information collected when decoding Blocks */
|
||||
struct {
|
||||
/* Observed compressed size of the current Block */
|
||||
vli_type compressed;
|
||||
|
||||
/* Observed uncompressed size of the current Block */
|
||||
vli_type uncompressed;
|
||||
|
||||
/* Number of Blocks decoded so far */
|
||||
vli_type count;
|
||||
|
||||
/*
|
||||
* Hash calculated from the Block sizes. This is used to
|
||||
* validate the Index field.
|
||||
*/
|
||||
struct xz_dec_hash hash;
|
||||
} block;
|
||||
|
||||
/* Variables needed when verifying the Index field */
|
||||
struct {
|
||||
/* Position in dec_index() */
|
||||
enum {
|
||||
SEQ_INDEX_COUNT,
|
||||
SEQ_INDEX_UNPADDED,
|
||||
SEQ_INDEX_UNCOMPRESSED
|
||||
} sequence;
|
||||
|
||||
/* Size of the Index in bytes */
|
||||
vli_type size;
|
||||
|
||||
/* Number of Records (matches block.count in valid files) */
|
||||
vli_type count;
|
||||
|
||||
/*
|
||||
* Hash calculated from the Records (matches block.hash in
|
||||
* valid files).
|
||||
*/
|
||||
struct xz_dec_hash hash;
|
||||
} index;
|
||||
|
||||
/*
|
||||
* Temporary buffer needed to hold Stream Header, Block Header,
|
||||
* and Stream Footer. The Block Header is the biggest (1 KiB)
|
||||
* so we reserve space according to that. buf[] has to be aligned
|
||||
* to a multiple of four bytes; the size_t variables before it
|
||||
* should guarantee this.
|
||||
*/
|
||||
struct {
|
||||
size_t pos;
|
||||
size_t size;
|
||||
uint8_t buf[1024];
|
||||
} temp;
|
||||
|
||||
struct xz_dec_lzma2 *lzma2;
|
||||
|
||||
#ifdef XZ_DEC_BCJ
|
||||
struct xz_dec_bcj *bcj;
|
||||
bool bcj_active;
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef XZ_DEC_ANY_CHECK
|
||||
/* Sizes of the Check field with different Check IDs */
|
||||
static const uint8_t check_sizes[16] = {
|
||||
0,
|
||||
4, 4, 4,
|
||||
8, 8, 8,
|
||||
16, 16, 16,
|
||||
32, 32, 32,
|
||||
64, 64, 64
|
||||
};
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
|
||||
* must have set s->temp.pos to indicate how much data we are supposed
|
||||
* to copy into s->temp.buf. Return true once s->temp.pos has reached
|
||||
* s->temp.size.
|
||||
*/
|
||||
static bool fill_temp(struct xz_dec *s, struct xz_buf *b)
|
||||
{
|
||||
size_t copy_size = min_t(size_t,
|
||||
b->in_size - b->in_pos, s->temp.size - s->temp.pos);
|
||||
|
||||
memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
|
||||
b->in_pos += copy_size;
|
||||
s->temp.pos += copy_size;
|
||||
|
||||
if (s->temp.pos == s->temp.size) {
|
||||
s->temp.pos = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Decode a variable-length integer (little-endian base-128 encoding) */
|
||||
static enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in,
|
||||
size_t *in_pos, size_t in_size)
|
||||
{
|
||||
uint8_t byte;
|
||||
|
||||
if (s->pos == 0)
|
||||
s->vli = 0;
|
||||
|
||||
while (*in_pos < in_size) {
|
||||
byte = in[*in_pos];
|
||||
++*in_pos;
|
||||
|
||||
s->vli |= (vli_type)(byte & 0x7F) << s->pos;
|
||||
|
||||
if ((byte & 0x80) == 0) {
|
||||
/* Don't allow non-minimal encodings. */
|
||||
if (byte == 0 && s->pos != 0)
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
s->pos = 0;
|
||||
return XZ_STREAM_END;
|
||||
}
|
||||
|
||||
s->pos += 7;
|
||||
if (s->pos == 7 * VLI_BYTES_MAX)
|
||||
return XZ_DATA_ERROR;
|
||||
}
|
||||
|
||||
return XZ_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode the Compressed Data field from a Block. Update and validate
|
||||
* the observed compressed and uncompressed sizes of the Block so that
|
||||
* they don't exceed the values possibly stored in the Block Header
|
||||
* (validation assumes that no integer overflow occurs, since vli_type
|
||||
* is normally uint64_t). Update the CRC32 if presence of the CRC32
|
||||
* field was indicated in Stream Header.
|
||||
*
|
||||
* Once the decoding is finished, validate that the observed sizes match
|
||||
* the sizes possibly stored in the Block Header. Update the hash and
|
||||
* Block count, which are later used to validate the Index field.
|
||||
*/
|
||||
static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b)
|
||||
{
|
||||
enum xz_ret ret;
|
||||
|
||||
s->in_start = b->in_pos;
|
||||
s->out_start = b->out_pos;
|
||||
|
||||
#ifdef XZ_DEC_BCJ
|
||||
if (s->bcj_active)
|
||||
ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
|
||||
else
|
||||
#endif
|
||||
ret = xz_dec_lzma2_run(s->lzma2, b);
|
||||
|
||||
s->block.compressed += b->in_pos - s->in_start;
|
||||
s->block.uncompressed += b->out_pos - s->out_start;
|
||||
|
||||
/*
|
||||
* There is no need to separately check for VLI_UNKNOWN, since
|
||||
* the observed sizes are always smaller than VLI_UNKNOWN.
|
||||
*/
|
||||
if (s->block.compressed > s->block_header.compressed
|
||||
|| s->block.uncompressed
|
||||
> s->block_header.uncompressed)
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
if (s->check_type == XZ_CHECK_CRC32)
|
||||
s->crc32 = xz_crc32(b->out + s->out_start,
|
||||
b->out_pos - s->out_start, s->crc32);
|
||||
|
||||
if (ret == XZ_STREAM_END) {
|
||||
if (s->block_header.compressed != VLI_UNKNOWN
|
||||
&& s->block_header.compressed
|
||||
!= s->block.compressed)
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
if (s->block_header.uncompressed != VLI_UNKNOWN
|
||||
&& s->block_header.uncompressed
|
||||
!= s->block.uncompressed)
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
s->block.hash.unpadded += s->block_header.size
|
||||
+ s->block.compressed;
|
||||
|
||||
#ifdef XZ_DEC_ANY_CHECK
|
||||
s->block.hash.unpadded += check_sizes[s->check_type];
|
||||
#else
|
||||
if (s->check_type == XZ_CHECK_CRC32)
|
||||
s->block.hash.unpadded += 4;
|
||||
#endif
|
||||
|
||||
s->block.hash.uncompressed += s->block.uncompressed;
|
||||
s->block.hash.crc32 = xz_crc32(
|
||||
(const uint8_t *)&s->block.hash,
|
||||
sizeof(s->block.hash), s->block.hash.crc32);
|
||||
|
||||
++s->block.count;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Update the Index size and the CRC32 value. */
|
||||
static void index_update(struct xz_dec *s, const struct xz_buf *b)
|
||||
{
|
||||
size_t in_used = b->in_pos - s->in_start;
|
||||
s->index.size += in_used;
|
||||
s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode the Number of Records, Unpadded Size, and Uncompressed Size
|
||||
* fields from the Index field. That is, Index Padding and CRC32 are not
|
||||
* decoded by this function.
|
||||
*
|
||||
* This can return XZ_OK (more input needed), XZ_STREAM_END (everything
|
||||
* successfully decoded), or XZ_DATA_ERROR (input is corrupt).
|
||||
*/
|
||||
static enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b)
|
||||
{
|
||||
enum xz_ret ret;
|
||||
|
||||
do {
|
||||
ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
|
||||
if (ret != XZ_STREAM_END) {
|
||||
index_update(s, b);
|
||||
return ret;
|
||||
}
|
||||
|
||||
switch (s->index.sequence) {
|
||||
case SEQ_INDEX_COUNT:
|
||||
s->index.count = s->vli;
|
||||
|
||||
/*
|
||||
* Validate that the Number of Records field
|
||||
* indicates the same number of Records as
|
||||
* there were Blocks in the Stream.
|
||||
*/
|
||||
if (s->index.count != s->block.count)
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
s->index.sequence = SEQ_INDEX_UNPADDED;
|
||||
break;
|
||||
|
||||
case SEQ_INDEX_UNPADDED:
|
||||
s->index.hash.unpadded += s->vli;
|
||||
s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
|
||||
break;
|
||||
|
||||
case SEQ_INDEX_UNCOMPRESSED:
|
||||
s->index.hash.uncompressed += s->vli;
|
||||
s->index.hash.crc32 = xz_crc32(
|
||||
(const uint8_t *)&s->index.hash,
|
||||
sizeof(s->index.hash),
|
||||
s->index.hash.crc32);
|
||||
--s->index.count;
|
||||
s->index.sequence = SEQ_INDEX_UNPADDED;
|
||||
break;
|
||||
}
|
||||
} while (s->index.count > 0);
|
||||
|
||||
return XZ_STREAM_END;
|
||||
}
|
||||
|
||||
/*
|
||||
* Validate that the next four input bytes match the value of s->crc32.
|
||||
* s->pos must be zero when starting to validate the first byte.
|
||||
*/
|
||||
static enum xz_ret crc32_validate(struct xz_dec *s, struct xz_buf *b)
|
||||
{
|
||||
do {
|
||||
if (b->in_pos == b->in_size)
|
||||
return XZ_OK;
|
||||
|
||||
if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++])
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
s->pos += 8;
|
||||
|
||||
} while (s->pos < 32);
|
||||
|
||||
s->crc32 = 0;
|
||||
s->pos = 0;
|
||||
|
||||
return XZ_STREAM_END;
|
||||
}
|
||||
|
||||
#ifdef XZ_DEC_ANY_CHECK
|
||||
/*
|
||||
* Skip over the Check field when the Check ID is not supported.
|
||||
* Returns true once the whole Check field has been skipped over.
|
||||
*/
|
||||
static bool check_skip(struct xz_dec *s, struct xz_buf *b)
|
||||
{
|
||||
while (s->pos < check_sizes[s->check_type]) {
|
||||
if (b->in_pos == b->in_size)
|
||||
return false;
|
||||
|
||||
++b->in_pos;
|
||||
++s->pos;
|
||||
}
|
||||
|
||||
s->pos = 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
|
||||
static enum xz_ret dec_stream_header(struct xz_dec *s)
|
||||
{
|
||||
if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
|
||||
return XZ_FORMAT_ERROR;
|
||||
|
||||
if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
|
||||
!= get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
|
||||
return XZ_OPTIONS_ERROR;
|
||||
|
||||
/*
|
||||
* Of integrity checks, we support only none (Check ID = 0) and
|
||||
* CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined,
|
||||
* we will accept other check types too, but then the check won't
|
||||
* be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given.
|
||||
*/
|
||||
s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
|
||||
|
||||
#ifdef XZ_DEC_ANY_CHECK
|
||||
if (s->check_type > XZ_CHECK_MAX)
|
||||
return XZ_OPTIONS_ERROR;
|
||||
|
||||
if (s->check_type > XZ_CHECK_CRC32)
|
||||
return XZ_UNSUPPORTED_CHECK;
|
||||
#else
|
||||
if (s->check_type > XZ_CHECK_CRC32)
|
||||
return XZ_OPTIONS_ERROR;
|
||||
#endif
|
||||
|
||||
return XZ_OK;
|
||||
}
|
||||
|
||||
/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
|
||||
static enum xz_ret dec_stream_footer(struct xz_dec *s)
|
||||
{
|
||||
if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
/*
|
||||
* Validate Backward Size. Note that we never added the size of the
|
||||
* Index CRC32 field to s->index.size, thus we use s->index.size / 4
|
||||
* instead of s->index.size / 4 - 1.
|
||||
*/
|
||||
if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
/*
|
||||
* Use XZ_STREAM_END instead of XZ_OK to be more convenient
|
||||
* for the caller.
|
||||
*/
|
||||
return XZ_STREAM_END;
|
||||
}
|
||||
|
||||
/* Decode the Block Header and initialize the filter chain. */
|
||||
static enum xz_ret dec_block_header(struct xz_dec *s)
|
||||
{
|
||||
enum xz_ret ret;
|
||||
|
||||
/*
|
||||
* Validate the CRC32. We know that the temp buffer is at least
|
||||
* eight bytes so this is safe.
|
||||
*/
|
||||
s->temp.size -= 4;
|
||||
if (xz_crc32(s->temp.buf, s->temp.size, 0)
|
||||
!= get_le32(s->temp.buf + s->temp.size))
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
s->temp.pos = 2;
|
||||
|
||||
/*
|
||||
* Catch unsupported Block Flags. We support only one or two filters
|
||||
* in the chain, so we catch that with the same test.
|
||||
*/
|
||||
#ifdef XZ_DEC_BCJ
|
||||
if (s->temp.buf[1] & 0x3E)
|
||||
#else
|
||||
if (s->temp.buf[1] & 0x3F)
|
||||
#endif
|
||||
return XZ_OPTIONS_ERROR;
|
||||
|
||||
/* Compressed Size */
|
||||
if (s->temp.buf[1] & 0x40) {
|
||||
if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
|
||||
!= XZ_STREAM_END)
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
s->block_header.compressed = s->vli;
|
||||
} else {
|
||||
s->block_header.compressed = VLI_UNKNOWN;
|
||||
}
|
||||
|
||||
/* Uncompressed Size */
|
||||
if (s->temp.buf[1] & 0x80) {
|
||||
if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
|
||||
!= XZ_STREAM_END)
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
s->block_header.uncompressed = s->vli;
|
||||
} else {
|
||||
s->block_header.uncompressed = VLI_UNKNOWN;
|
||||
}
|
||||
|
||||
#ifdef XZ_DEC_BCJ
|
||||
/* If there are two filters, the first one must be a BCJ filter. */
|
||||
s->bcj_active = s->temp.buf[1] & 0x01;
|
||||
if (s->bcj_active) {
|
||||
if (s->temp.size - s->temp.pos < 2)
|
||||
return XZ_OPTIONS_ERROR;
|
||||
|
||||
ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
|
||||
if (ret != XZ_OK)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* We don't support custom start offset,
|
||||
* so Size of Properties must be zero.
|
||||
*/
|
||||
if (s->temp.buf[s->temp.pos++] != 0x00)
|
||||
return XZ_OPTIONS_ERROR;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Valid Filter Flags always take at least two bytes. */
|
||||
if (s->temp.size - s->temp.pos < 2)
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
/* Filter ID = LZMA2 */
|
||||
if (s->temp.buf[s->temp.pos++] != 0x21)
|
||||
return XZ_OPTIONS_ERROR;
|
||||
|
||||
/* Size of Properties = 1-byte Filter Properties */
|
||||
if (s->temp.buf[s->temp.pos++] != 0x01)
|
||||
return XZ_OPTIONS_ERROR;
|
||||
|
||||
/* Filter Properties contains LZMA2 dictionary size. */
|
||||
if (s->temp.size - s->temp.pos < 1)
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
|
||||
if (ret != XZ_OK)
|
||||
return ret;
|
||||
|
||||
/* The rest must be Header Padding. */
|
||||
while (s->temp.pos < s->temp.size)
|
||||
if (s->temp.buf[s->temp.pos++] != 0x00)
|
||||
return XZ_OPTIONS_ERROR;
|
||||
|
||||
s->temp.pos = 0;
|
||||
s->block.compressed = 0;
|
||||
s->block.uncompressed = 0;
|
||||
|
||||
return XZ_OK;
|
||||
}
|
||||
|
||||
static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
|
||||
{
|
||||
enum xz_ret ret;
|
||||
|
||||
/*
|
||||
* Store the start position for the case when we are in the middle
|
||||
* of the Index field.
|
||||
*/
|
||||
s->in_start = b->in_pos;
|
||||
|
||||
while (true) {
|
||||
switch (s->sequence) {
|
||||
case SEQ_STREAM_HEADER:
|
||||
/*
|
||||
* Stream Header is copied to s->temp, and then
|
||||
* decoded from there. This way if the caller
|
||||
* gives us only little input at a time, we can
|
||||
* still keep the Stream Header decoding code
|
||||
* simple. Similar approach is used in many places
|
||||
* in this file.
|
||||
*/
|
||||
if (!fill_temp(s, b))
|
||||
return XZ_OK;
|
||||
|
||||
/*
|
||||
* If dec_stream_header() returns
|
||||
* XZ_UNSUPPORTED_CHECK, it is still possible
|
||||
* to continue decoding if working in multi-call
|
||||
* mode. Thus, update s->sequence before calling
|
||||
* dec_stream_header().
|
||||
*/
|
||||
s->sequence = SEQ_BLOCK_START;
|
||||
|
||||
ret = dec_stream_header(s);
|
||||
if (ret != XZ_OK)
|
||||
return ret;
|
||||
|
||||
/* fallthrough */
|
||||
case SEQ_BLOCK_START:
|
||||
/* We need one byte of input to continue. */
|
||||
if (b->in_pos == b->in_size)
|
||||
return XZ_OK;
|
||||
|
||||
/* See if this is the beginning of the Index field. */
|
||||
if (b->in[b->in_pos] == 0) {
|
||||
s->in_start = b->in_pos++;
|
||||
s->sequence = SEQ_INDEX;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the size of the Block Header and
|
||||
* prepare to decode it.
|
||||
*/
|
||||
s->block_header.size
|
||||
= ((uint32_t)b->in[b->in_pos] + 1) * 4;
|
||||
|
||||
s->temp.size = s->block_header.size;
|
||||
s->temp.pos = 0;
|
||||
s->sequence = SEQ_BLOCK_HEADER;
|
||||
|
||||
/* fallthrough */
|
||||
case SEQ_BLOCK_HEADER:
|
||||
if (!fill_temp(s, b))
|
||||
return XZ_OK;
|
||||
|
||||
ret = dec_block_header(s);
|
||||
if (ret != XZ_OK)
|
||||
return ret;
|
||||
|
||||
s->sequence = SEQ_BLOCK_UNCOMPRESS;
|
||||
|
||||
/* fallthrough */
|
||||
case SEQ_BLOCK_UNCOMPRESS:
|
||||
ret = dec_block(s, b);
|
||||
if (ret != XZ_STREAM_END)
|
||||
return ret;
|
||||
|
||||
s->sequence = SEQ_BLOCK_PADDING;
|
||||
|
||||
/* fallthrough */
|
||||
case SEQ_BLOCK_PADDING:
|
||||
/*
|
||||
* Size of Compressed Data + Block Padding
|
||||
* must be a multiple of four. We don't need
|
||||
* s->block.compressed for anything else
|
||||
* anymore, so we use it here to test the size
|
||||
* of the Block Padding field.
|
||||
*/
|
||||
while (s->block.compressed & 3) {
|
||||
if (b->in_pos == b->in_size)
|
||||
return XZ_OK;
|
||||
|
||||
if (b->in[b->in_pos++] != 0)
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
++s->block.compressed;
|
||||
}
|
||||
|
||||
s->sequence = SEQ_BLOCK_CHECK;
|
||||
|
||||
/* fallthrough */
|
||||
case SEQ_BLOCK_CHECK:
|
||||
if (s->check_type == XZ_CHECK_CRC32) {
|
||||
ret = crc32_validate(s, b);
|
||||
if (ret != XZ_STREAM_END)
|
||||
return ret;
|
||||
}
|
||||
#ifdef XZ_DEC_ANY_CHECK
|
||||
else if (!check_skip(s, b)) {
|
||||
return XZ_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
s->sequence = SEQ_BLOCK_START;
|
||||
break;
|
||||
|
||||
case SEQ_INDEX:
|
||||
ret = dec_index(s, b);
|
||||
if (ret != XZ_STREAM_END)
|
||||
return ret;
|
||||
|
||||
s->sequence = SEQ_INDEX_PADDING;
|
||||
|
||||
/* fallthrough */
|
||||
case SEQ_INDEX_PADDING:
|
||||
while ((s->index.size + (b->in_pos - s->in_start))
|
||||
& 3) {
|
||||
if (b->in_pos == b->in_size) {
|
||||
index_update(s, b);
|
||||
return XZ_OK;
|
||||
}
|
||||
|
||||
if (b->in[b->in_pos++] != 0)
|
||||
return XZ_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* Finish the CRC32 value and Index size. */
|
||||
index_update(s, b);
|
||||
|
||||
/* Compare the hashes to validate the Index field. */
|
||||
if (!memeq(&s->block.hash, &s->index.hash,
|
||||
sizeof(s->block.hash)))
|
||||
return XZ_DATA_ERROR;
|
||||
|
||||
s->sequence = SEQ_INDEX_CRC32;
|
||||
|
||||
/* fallthrough */
|
||||
case SEQ_INDEX_CRC32:
|
||||
ret = crc32_validate(s, b);
|
||||
if (ret != XZ_STREAM_END)
|
||||
return ret;
|
||||
|
||||
s->temp.size = STREAM_HEADER_SIZE;
|
||||
s->sequence = SEQ_STREAM_FOOTER;
|
||||
|
||||
/* fallthrough */
|
||||
case SEQ_STREAM_FOOTER:
|
||||
if (!fill_temp(s, b))
|
||||
return XZ_OK;
|
||||
|
||||
return dec_stream_footer(s);
|
||||
}
|
||||
}
|
||||
|
||||
/* Never reached */
|
||||
}
|
||||
|
||||
/*
|
||||
* xz_dec_run() is a wrapper for dec_main() to handle some special cases in
|
||||
* multi-call and single-call decoding.
|
||||
*
|
||||
* In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
|
||||
* are not going to make any progress anymore. This is to prevent the caller
|
||||
* from calling us infinitely when the input file is truncated or otherwise
|
||||
* corrupt. Since zlib-style API allows that the caller fills the input buffer
|
||||
* only when the decoder doesn't produce any new output, we have to be careful
|
||||
* to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
|
||||
* after the second consecutive call to xz_dec_run() that makes no progress.
|
||||
*
|
||||
* In single-call mode, if we couldn't decode everything and no error
|
||||
* occurred, either the input is truncated or the output buffer is too small.
|
||||
* Since we know that the last input byte never produces any output, we know
|
||||
* that if all the input was consumed and decoding wasn't finished, the file
|
||||
* must be corrupt. Otherwise the output buffer has to be too small or the
|
||||
* file is corrupt in a way that decoding it produces too big output.
|
||||
*
|
||||
* If single-call decoding fails, we reset b->in_pos and b->out_pos back to
|
||||
* their original values. This is because with some filter chains there won't
|
||||
* be any valid uncompressed data in the output buffer unless the decoding
|
||||
* actually succeeds (that's the price to pay of using the output buffer as
|
||||
* the workspace).
|
||||
*/
|
||||
XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
|
||||
{
|
||||
size_t in_start;
|
||||
size_t out_start;
|
||||
enum xz_ret ret;
|
||||
|
||||
if (DEC_IS_SINGLE(s->mode))
|
||||
xz_dec_reset(s);
|
||||
|
||||
in_start = b->in_pos;
|
||||
out_start = b->out_pos;
|
||||
ret = dec_main(s, b);
|
||||
|
||||
if (DEC_IS_SINGLE(s->mode)) {
|
||||
if (ret == XZ_OK)
|
||||
ret = b->in_pos == b->in_size
|
||||
? XZ_DATA_ERROR : XZ_BUF_ERROR;
|
||||
|
||||
if (ret != XZ_STREAM_END) {
|
||||
b->in_pos = in_start;
|
||||
b->out_pos = out_start;
|
||||
}
|
||||
|
||||
} else if (ret == XZ_OK && in_start == b->in_pos
|
||||
&& out_start == b->out_pos) {
|
||||
if (s->allow_buf_error)
|
||||
ret = XZ_BUF_ERROR;
|
||||
|
||||
s->allow_buf_error = true;
|
||||
} else {
|
||||
s->allow_buf_error = false;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max)
|
||||
{
|
||||
struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL);
|
||||
if (s == NULL)
|
||||
return NULL;
|
||||
|
||||
s->mode = mode;
|
||||
|
||||
#ifdef XZ_DEC_BCJ
|
||||
s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode));
|
||||
if (s->bcj == NULL)
|
||||
goto error_bcj;
|
||||
#endif
|
||||
|
||||
s->lzma2 = xz_dec_lzma2_create(mode, dict_max);
|
||||
if (s->lzma2 == NULL)
|
||||
goto error_lzma2;
|
||||
|
||||
xz_dec_reset(s);
|
||||
return s;
|
||||
|
||||
error_lzma2:
|
||||
#ifdef XZ_DEC_BCJ
|
||||
xz_dec_bcj_end(s->bcj);
|
||||
error_bcj:
|
||||
#endif
|
||||
kfree(s);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
XZ_EXTERN void xz_dec_reset(struct xz_dec *s)
|
||||
{
|
||||
s->sequence = SEQ_STREAM_HEADER;
|
||||
s->allow_buf_error = false;
|
||||
s->pos = 0;
|
||||
s->crc32 = 0;
|
||||
memzero(&s->block, sizeof(s->block));
|
||||
memzero(&s->index, sizeof(s->index));
|
||||
s->temp.pos = 0;
|
||||
s->temp.size = STREAM_HEADER_SIZE;
|
||||
}
|
||||
|
||||
XZ_EXTERN void xz_dec_end(struct xz_dec *s)
|
||||
{
|
||||
if (s != NULL) {
|
||||
xz_dec_lzma2_end(s->lzma2);
|
||||
#ifdef XZ_DEC_BCJ
|
||||
xz_dec_bcj_end(s->bcj);
|
||||
#endif
|
||||
kfree(s);
|
||||
}
|
||||
}
|
||||
204
src/common/unix/xzminidec/src/xz_lzma2.h
Normal file
204
src/common/unix/xzminidec/src/xz_lzma2.h
Normal file
@@ -0,0 +1,204 @@
|
||||
/*
|
||||
* LZMA2 definitions
|
||||
*
|
||||
* Authors: Lasse Collin <lasse.collin@tukaani.org>
|
||||
* Igor Pavlov <http://7-zip.org/>
|
||||
*
|
||||
* This file has been put into the public domain.
|
||||
* You can do whatever you want with this file.
|
||||
*/
|
||||
|
||||
#ifndef XZ_LZMA2_H
|
||||
#define XZ_LZMA2_H
|
||||
|
||||
/* Range coder constants */
|
||||
#define RC_SHIFT_BITS 8
|
||||
#define RC_TOP_BITS 24
|
||||
#define RC_TOP_VALUE (1 << RC_TOP_BITS)
|
||||
#define RC_BIT_MODEL_TOTAL_BITS 11
|
||||
#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
|
||||
#define RC_MOVE_BITS 5
|
||||
|
||||
/*
|
||||
* Maximum number of position states. A position state is the lowest pb
|
||||
* number of bits of the current uncompressed offset. In some places there
|
||||
* are different sets of probabilities for different position states.
|
||||
*/
|
||||
#define POS_STATES_MAX (1 << 4)
|
||||
|
||||
/*
|
||||
* This enum is used to track which LZMA symbols have occurred most recently
|
||||
* and in which order. This information is used to predict the next symbol.
|
||||
*
|
||||
* Symbols:
|
||||
* - Literal: One 8-bit byte
|
||||
* - Match: Repeat a chunk of data at some distance
|
||||
* - Long repeat: Multi-byte match at a recently seen distance
|
||||
* - Short repeat: One-byte repeat at a recently seen distance
|
||||
*
|
||||
* The symbol names are in from STATE_oldest_older_previous. REP means
|
||||
* either short or long repeated match, and NONLIT means any non-literal.
|
||||
*/
|
||||
enum lzma_state {
|
||||
STATE_LIT_LIT,
|
||||
STATE_MATCH_LIT_LIT,
|
||||
STATE_REP_LIT_LIT,
|
||||
STATE_SHORTREP_LIT_LIT,
|
||||
STATE_MATCH_LIT,
|
||||
STATE_REP_LIT,
|
||||
STATE_SHORTREP_LIT,
|
||||
STATE_LIT_MATCH,
|
||||
STATE_LIT_LONGREP,
|
||||
STATE_LIT_SHORTREP,
|
||||
STATE_NONLIT_MATCH,
|
||||
STATE_NONLIT_REP
|
||||
};
|
||||
|
||||
/* Total number of states */
|
||||
#define STATES 12
|
||||
|
||||
/* The lowest 7 states indicate that the previous state was a literal. */
|
||||
#define LIT_STATES 7
|
||||
|
||||
/* Indicate that the latest symbol was a literal. */
|
||||
static inline void lzma_state_literal(enum lzma_state *state)
|
||||
{
|
||||
if (*state <= STATE_SHORTREP_LIT_LIT)
|
||||
*state = STATE_LIT_LIT;
|
||||
else if (*state <= STATE_LIT_SHORTREP)
|
||||
*state -= 3;
|
||||
else
|
||||
*state -= 6;
|
||||
}
|
||||
|
||||
/* Indicate that the latest symbol was a match. */
|
||||
static inline void lzma_state_match(enum lzma_state *state)
|
||||
{
|
||||
*state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
|
||||
}
|
||||
|
||||
/* Indicate that the latest state was a long repeated match. */
|
||||
static inline void lzma_state_long_rep(enum lzma_state *state)
|
||||
{
|
||||
*state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
|
||||
}
|
||||
|
||||
/* Indicate that the latest symbol was a short match. */
|
||||
static inline void lzma_state_short_rep(enum lzma_state *state)
|
||||
{
|
||||
*state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
|
||||
}
|
||||
|
||||
/* Test if the previous symbol was a literal. */
|
||||
static inline bool lzma_state_is_literal(enum lzma_state state)
|
||||
{
|
||||
return state < LIT_STATES;
|
||||
}
|
||||
|
||||
/* Each literal coder is divided in three sections:
|
||||
* - 0x001-0x0FF: Without match byte
|
||||
* - 0x101-0x1FF: With match byte; match bit is 0
|
||||
* - 0x201-0x2FF: With match byte; match bit is 1
|
||||
*
|
||||
* Match byte is used when the previous LZMA symbol was something else than
|
||||
* a literal (that is, it was some kind of match).
|
||||
*/
|
||||
#define LITERAL_CODER_SIZE 0x300
|
||||
|
||||
/* Maximum number of literal coders */
|
||||
#define LITERAL_CODERS_MAX (1 << 4)
|
||||
|
||||
/* Minimum length of a match is two bytes. */
|
||||
#define MATCH_LEN_MIN 2
|
||||
|
||||
/* Match length is encoded with 4, 5, or 10 bits.
|
||||
*
|
||||
* Length Bits
|
||||
* 2-9 4 = Choice=0 + 3 bits
|
||||
* 10-17 5 = Choice=1 + Choice2=0 + 3 bits
|
||||
* 18-273 10 = Choice=1 + Choice2=1 + 8 bits
|
||||
*/
|
||||
#define LEN_LOW_BITS 3
|
||||
#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
|
||||
#define LEN_MID_BITS 3
|
||||
#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
|
||||
#define LEN_HIGH_BITS 8
|
||||
#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
|
||||
#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
|
||||
|
||||
/*
|
||||
* Maximum length of a match is 273 which is a result of the encoding
|
||||
* described above.
|
||||
*/
|
||||
#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
|
||||
|
||||
/*
|
||||
* Different sets of probabilities are used for match distances that have
|
||||
* very short match length: Lengths of 2, 3, and 4 bytes have a separate
|
||||
* set of probabilities for each length. The matches with longer length
|
||||
* use a shared set of probabilities.
|
||||
*/
|
||||
#define DIST_STATES 4
|
||||
|
||||
/*
|
||||
* Get the index of the appropriate probability array for decoding
|
||||
* the distance slot.
|
||||
*/
|
||||
static inline uint32_t lzma_get_dist_state(uint32_t len)
|
||||
{
|
||||
return len < DIST_STATES + MATCH_LEN_MIN
|
||||
? len - MATCH_LEN_MIN : DIST_STATES - 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The highest two bits of a 32-bit match distance are encoded using six bits.
|
||||
* This six-bit value is called a distance slot. This way encoding a 32-bit
|
||||
* value takes 6-36 bits, larger values taking more bits.
|
||||
*/
|
||||
#define DIST_SLOT_BITS 6
|
||||
#define DIST_SLOTS (1 << DIST_SLOT_BITS)
|
||||
|
||||
/* Match distances up to 127 are fully encoded using probabilities. Since
|
||||
* the highest two bits (distance slot) are always encoded using six bits,
|
||||
* the distances 0-3 don't need any additional bits to encode, since the
|
||||
* distance slot itself is the same as the actual distance. DIST_MODEL_START
|
||||
* indicates the first distance slot where at least one additional bit is
|
||||
* needed.
|
||||
*/
|
||||
#define DIST_MODEL_START 4
|
||||
|
||||
/*
|
||||
* Match distances greater than 127 are encoded in three pieces:
|
||||
* - distance slot: the highest two bits
|
||||
* - direct bits: 2-26 bits below the highest two bits
|
||||
* - alignment bits: four lowest bits
|
||||
*
|
||||
* Direct bits don't use any probabilities.
|
||||
*
|
||||
* The distance slot value of 14 is for distances 128-191.
|
||||
*/
|
||||
#define DIST_MODEL_END 14
|
||||
|
||||
/* Distance slots that indicate a distance <= 127. */
|
||||
#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
|
||||
#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
|
||||
|
||||
/*
|
||||
* For match distances greater than 127, only the highest two bits and the
|
||||
* lowest four bits (alignment) is encoded using probabilities.
|
||||
*/
|
||||
#define ALIGN_BITS 4
|
||||
#define ALIGN_SIZE (1 << ALIGN_BITS)
|
||||
#define ALIGN_MASK (ALIGN_SIZE - 1)
|
||||
|
||||
/* Total number of all probability variables */
|
||||
#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
|
||||
|
||||
/*
|
||||
* LZMA remembers the four most recent match distances. Reusing these
|
||||
* distances tends to take less space than re-encoding the actual
|
||||
* distance value.
|
||||
*/
|
||||
#define REPS 4
|
||||
|
||||
#endif
|
||||
156
src/common/unix/xzminidec/src/xz_private.h
Normal file
156
src/common/unix/xzminidec/src/xz_private.h
Normal file
@@ -0,0 +1,156 @@
|
||||
/*
|
||||
* Private includes and definitions
|
||||
*
|
||||
* Author: Lasse Collin <lasse.collin@tukaani.org>
|
||||
*
|
||||
* This file has been put into the public domain.
|
||||
* You can do whatever you want with this file.
|
||||
*/
|
||||
|
||||
#ifndef XZ_PRIVATE_H
|
||||
#define XZ_PRIVATE_H
|
||||
|
||||
#ifdef __KERNEL__
|
||||
# include <linux/xz.h>
|
||||
# include <linux/kernel.h>
|
||||
# include <asm/unaligned.h>
|
||||
/* XZ_PREBOOT may be defined only via decompress_unxz.c. */
|
||||
# ifndef XZ_PREBOOT
|
||||
# include <linux/slab.h>
|
||||
# include <linux/vmalloc.h>
|
||||
# include <linux/string.h>
|
||||
# ifdef CONFIG_XZ_DEC_X86
|
||||
# define XZ_DEC_X86
|
||||
# endif
|
||||
# ifdef CONFIG_XZ_DEC_POWERPC
|
||||
# define XZ_DEC_POWERPC
|
||||
# endif
|
||||
# ifdef CONFIG_XZ_DEC_IA64
|
||||
# define XZ_DEC_IA64
|
||||
# endif
|
||||
# ifdef CONFIG_XZ_DEC_ARM
|
||||
# define XZ_DEC_ARM
|
||||
# endif
|
||||
# ifdef CONFIG_XZ_DEC_ARMTHUMB
|
||||
# define XZ_DEC_ARMTHUMB
|
||||
# endif
|
||||
# ifdef CONFIG_XZ_DEC_SPARC
|
||||
# define XZ_DEC_SPARC
|
||||
# endif
|
||||
# define memeq(a, b, size) (memcmp(a, b, size) == 0)
|
||||
# define memzero(buf, size) memset(buf, 0, size)
|
||||
# endif
|
||||
# define get_le32(p) le32_to_cpup((const uint32_t *)(p))
|
||||
#else
|
||||
/*
|
||||
* For userspace builds, use a separate header to define the required
|
||||
* macros and functions. This makes it easier to adapt the code into
|
||||
* different environments and avoids clutter in the Linux kernel tree.
|
||||
*/
|
||||
# include "xz_config.h"
|
||||
#endif
|
||||
|
||||
/* If no specific decoding mode is requested, enable support for all modes. */
|
||||
#if !defined(XZ_DEC_SINGLE) && !defined(XZ_DEC_PREALLOC) \
|
||||
&& !defined(XZ_DEC_DYNALLOC)
|
||||
# define XZ_DEC_SINGLE
|
||||
# define XZ_DEC_PREALLOC
|
||||
# define XZ_DEC_DYNALLOC
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The DEC_IS_foo(mode) macros are used in "if" statements. If only some
|
||||
* of the supported modes are enabled, these macros will evaluate to true or
|
||||
* false at compile time and thus allow the compiler to omit unneeded code.
|
||||
*/
|
||||
#ifdef XZ_DEC_SINGLE
|
||||
# define DEC_IS_SINGLE(mode) ((mode) == XZ_SINGLE)
|
||||
#else
|
||||
# define DEC_IS_SINGLE(mode) (false)
|
||||
#endif
|
||||
|
||||
#ifdef XZ_DEC_PREALLOC
|
||||
# define DEC_IS_PREALLOC(mode) ((mode) == XZ_PREALLOC)
|
||||
#else
|
||||
# define DEC_IS_PREALLOC(mode) (false)
|
||||
#endif
|
||||
|
||||
#ifdef XZ_DEC_DYNALLOC
|
||||
# define DEC_IS_DYNALLOC(mode) ((mode) == XZ_DYNALLOC)
|
||||
#else
|
||||
# define DEC_IS_DYNALLOC(mode) (false)
|
||||
#endif
|
||||
|
||||
#if !defined(XZ_DEC_SINGLE)
|
||||
# define DEC_IS_MULTI(mode) (true)
|
||||
#elif defined(XZ_DEC_PREALLOC) || defined(XZ_DEC_DYNALLOC)
|
||||
# define DEC_IS_MULTI(mode) ((mode) != XZ_SINGLE)
|
||||
#else
|
||||
# define DEC_IS_MULTI(mode) (false)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
|
||||
* XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
|
||||
*/
|
||||
#ifndef XZ_DEC_BCJ
|
||||
# if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
|
||||
|| defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
|
||||
|| defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
|
||||
|| defined(XZ_DEC_SPARC)
|
||||
# define XZ_DEC_BCJ
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
|
||||
* before calling xz_dec_lzma2_run().
|
||||
*/
|
||||
XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
|
||||
uint32_t dict_max);
|
||||
|
||||
/*
|
||||
* Decode the LZMA2 properties (one byte) and reset the decoder. Return
|
||||
* XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
|
||||
* big enough, and XZ_OPTIONS_ERROR if props indicates something that this
|
||||
* decoder doesn't support.
|
||||
*/
|
||||
XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s,
|
||||
uint8_t props);
|
||||
|
||||
/* Decode raw LZMA2 stream from b->in to b->out. */
|
||||
XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
|
||||
struct xz_buf *b);
|
||||
|
||||
/* Free the memory allocated for the LZMA2 decoder. */
|
||||
XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s);
|
||||
|
||||
#ifdef XZ_DEC_BCJ
|
||||
/*
|
||||
* Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before
|
||||
* calling xz_dec_bcj_run().
|
||||
*/
|
||||
XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call);
|
||||
|
||||
/*
|
||||
* Decode the Filter ID of a BCJ filter. This implementation doesn't
|
||||
* support custom start offsets, so no decoding of Filter Properties
|
||||
* is needed. Returns XZ_OK if the given Filter ID is supported.
|
||||
* Otherwise XZ_OPTIONS_ERROR is returned.
|
||||
*/
|
||||
XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id);
|
||||
|
||||
/*
|
||||
* Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
|
||||
* a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
|
||||
* must be called directly.
|
||||
*/
|
||||
XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
|
||||
struct xz_dec_lzma2 *lzma2,
|
||||
struct xz_buf *b);
|
||||
|
||||
/* Free the memory allocated for the BCJ filters. */
|
||||
#define xz_dec_bcj_end(s) kfree(s)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
62
src/common/unix/xzminidec/src/xz_stream.h
Normal file
62
src/common/unix/xzminidec/src/xz_stream.h
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Definitions for handling the .xz file format
|
||||
*
|
||||
* Author: Lasse Collin <lasse.collin@tukaani.org>
|
||||
*
|
||||
* This file has been put into the public domain.
|
||||
* You can do whatever you want with this file.
|
||||
*/
|
||||
|
||||
#ifndef XZ_STREAM_H
|
||||
#define XZ_STREAM_H
|
||||
|
||||
#if defined(__KERNEL__) && !XZ_INTERNAL_CRC32
|
||||
# include <linux/crc32.h>
|
||||
# undef crc32
|
||||
# define xz_crc32(buf, size, crc) \
|
||||
(~crc32_le(~(uint32_t)(crc), buf, size))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* See the .xz file format specification at
|
||||
* http://tukaani.org/xz/xz-file-format.txt
|
||||
* to understand the container format.
|
||||
*/
|
||||
|
||||
#define STREAM_HEADER_SIZE 12
|
||||
|
||||
#define HEADER_MAGIC "\3757zXZ"
|
||||
#define HEADER_MAGIC_SIZE 6
|
||||
|
||||
#define FOOTER_MAGIC "YZ"
|
||||
#define FOOTER_MAGIC_SIZE 2
|
||||
|
||||
/*
|
||||
* Variable-length integer can hold a 63-bit unsigned integer or a special
|
||||
* value indicating that the value is unknown.
|
||||
*
|
||||
* Experimental: vli_type can be defined to uint32_t to save a few bytes
|
||||
* in code size (no effect on speed). Doing so limits the uncompressed and
|
||||
* compressed size of the file to less than 256 MiB and may also weaken
|
||||
* error detection slightly.
|
||||
*/
|
||||
typedef uint64_t vli_type;
|
||||
|
||||
#define VLI_MAX ((vli_type)-1 / 2)
|
||||
#define VLI_UNKNOWN ((vli_type)-1)
|
||||
|
||||
/* Maximum encoded size of a VLI */
|
||||
#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
|
||||
|
||||
/* Integrity Check types */
|
||||
enum xz_check {
|
||||
XZ_CHECK_NONE = 0,
|
||||
XZ_CHECK_CRC32 = 1,
|
||||
XZ_CHECK_CRC64 = 4,
|
||||
XZ_CHECK_SHA256 = 10
|
||||
};
|
||||
|
||||
/* Maximum possible Check ID */
|
||||
#define XZ_CHECK_MAX 15
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user