515.43.04

This commit is contained in:
Andy Ritger
2022-05-09 13:18:59 -07:00
commit 1739a20efc
2519 changed files with 1060036 additions and 0 deletions

View File

@@ -0,0 +1,29 @@
/*******************************************************************************
Copyright (c) 2013 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __cla06fsubch_h__
#define __cla06fsubch_h__
#define NVA06F_SUBCHANNEL_COPY_ENGINE 4
#endif // {__cla06fsubch_h__}

View File

@@ -0,0 +1,155 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef _cla16f_h_
#define _cla16f_h_
#ifdef __cplusplus
extern "C" {
#endif
#include "nvtypes.h"
#define KEPLER_CHANNEL_GPFIFO_B (0x0000A16F)
#define NVA16F_SET_OBJECT (0x00000000)
#define NVA16F_NOP (0x00000008)
#define NVA16F_NOP_HANDLE 31:0
#define NVA16F_SEMAPHOREA (0x00000010)
#define NVA16F_SEMAPHOREA_OFFSET_UPPER 7:0
#define NVA16F_SEMAPHOREB (0x00000014)
#define NVA16F_SEMAPHOREB_OFFSET_LOWER 31:2
#define NVA16F_SEMAPHOREC (0x00000018)
#define NVA16F_SEMAPHOREC_PAYLOAD 31:0
#define NVA16F_SEMAPHORED (0x0000001C)
#define NVA16F_SEMAPHORED_OPERATION 4:0
#define NVA16F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
#define NVA16F_SEMAPHORED_OPERATION_RELEASE 0x00000002
#define NVA16F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
#define NVA16F_SEMAPHORED_OPERATION_ACQ_AND 0x00000008
#define NVA16F_SEMAPHORED_OPERATION_REDUCTION 0x00000010
#define NVA16F_SEMAPHORED_ACQUIRE_SWITCH 12:12
#define NVA16F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
#define NVA16F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
#define NVA16F_SEMAPHORED_RELEASE_WFI 20:20
#define NVA16F_SEMAPHORED_RELEASE_WFI_EN 0x00000000
#define NVA16F_SEMAPHORED_RELEASE_WFI_DIS 0x00000001
#define NVA16F_SEMAPHORED_RELEASE_SIZE 24:24
#define NVA16F_SEMAPHORED_RELEASE_SIZE_16BYTE 0x00000000
#define NVA16F_SEMAPHORED_RELEASE_SIZE_4BYTE 0x00000001
#define NVA16F_SEMAPHORED_REDUCTION 30:27
#define NVA16F_SEMAPHORED_REDUCTION_MIN 0x00000000
#define NVA16F_SEMAPHORED_REDUCTION_MAX 0x00000001
#define NVA16F_SEMAPHORED_REDUCTION_XOR 0x00000002
#define NVA16F_SEMAPHORED_REDUCTION_AND 0x00000003
#define NVA16F_SEMAPHORED_REDUCTION_OR 0x00000004
#define NVA16F_SEMAPHORED_REDUCTION_ADD 0x00000005
#define NVA16F_SEMAPHORED_REDUCTION_INC 0x00000006
#define NVA16F_SEMAPHORED_REDUCTION_DEC 0x00000007
#define NVA16F_SEMAPHORED_FORMAT 31:31
#define NVA16F_SEMAPHORED_FORMAT_SIGNED 0x00000000
#define NVA16F_SEMAPHORED_FORMAT_UNSIGNED 0x00000001
#define NVA16F_NON_STALL_INTERRUPT (0x00000020)
#define NVA16F_NON_STALL_INTERRUPT_HANDLE 31:0
#define NVA16F_FB_FLUSH (0x00000024)
#define NVA16F_FB_FLUSH_HANDLE 31:0
#define NVA16F_MEM_OP_A (0x00000028)
#define NVA16F_MEM_OP_A_OPERAND_LOW 31:2
#define NVA16F_MEM_OP_A_TLB_INVALIDATE_ADDR 29:2
#define NVA16F_MEM_OP_A_TLB_INVALIDATE_TARGET 31:30
#define NVA16F_MEM_OP_A_TLB_INVALIDATE_TARGET_VID_MEM 0x00000000
#define NVA16F_MEM_OP_A_TLB_INVALIDATE_TARGET_SYS_MEM_COHERENT 0x00000002
#define NVA16F_MEM_OP_A_TLB_INVALIDATE_TARGET_SYS_MEM_NONCOHERENT 0x00000003
#define NVA16F_MEM_OP_B (0x0000002c)
#define NVA16F_MEM_OP_B_OPERAND_HIGH 7:0
#define NVA16F_MEM_OP_B_OPERATION 31:27
#define NVA16F_MEM_OP_B_OPERATION_SYSMEMBAR_FLUSH 0x00000005
#define NVA16F_MEM_OP_B_OPERATION_SOFT_FLUSH 0x00000006
#define NVA16F_MEM_OP_B_OPERATION_MMU_TLB_INVALIDATE 0x00000009
#define NVA16F_MEM_OP_B_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d
#define NVA16F_MEM_OP_B_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e
#define NVA16F_MEM_OP_B_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
#define NVA16F_MEM_OP_B_OPERATION_L2_FLUSH_DIRTY 0x00000010
#define NVA16F_MEM_OP_B_MMU_TLB_INVALIDATE_PDB 0:0
#define NVA16F_MEM_OP_B_MMU_TLB_INVALIDATE_PDB_ONE 0x00000000
#define NVA16F_MEM_OP_B_MMU_TLB_INVALIDATE_PDB_ALL 0x00000001
#define NVA16F_MEM_OP_B_MMU_TLB_INVALIDATE_GPC 1:1
#define NVA16F_MEM_OP_B_MMU_TLB_INVALIDATE_GPC_ENABLE 0x00000000
#define NVA16F_MEM_OP_B_MMU_TLB_INVALIDATE_GPC_DISABLE 0x00000001
#define NVA16F_SET_REFERENCE (0x00000050)
#define NVA16F_SET_REFERENCE_COUNT 31:0
#define NVA16F_WFI (0x00000078)
#define NVA16F_WFI_HANDLE 31:0
/* GPFIFO entry format */
#define NVA16F_GP_ENTRY__SIZE 8
#define NVA16F_GP_ENTRY0_FETCH 0:0
#define NVA16F_GP_ENTRY0_FETCH_UNCONDITIONAL 0x00000000
#define NVA16F_GP_ENTRY0_FETCH_CONDITIONAL 0x00000001
#define NVA16F_GP_ENTRY0_GET 31:2
#define NVA16F_GP_ENTRY0_OPERAND 31:0
#define NVA16F_GP_ENTRY1_GET_HI 7:0
#define NVA16F_GP_ENTRY1_PRIV 8:8
#define NVA16F_GP_ENTRY1_PRIV_USER 0x00000000
#define NVA16F_GP_ENTRY1_PRIV_KERNEL 0x00000001
#define NVA16F_GP_ENTRY1_LEVEL 9:9
#define NVA16F_GP_ENTRY1_LEVEL_MAIN 0x00000000
#define NVA16F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
#define NVA16F_GP_ENTRY1_LENGTH 30:10
/* dma method formats */
#define NVA16F_DMA_METHOD_ADDRESS 11:0
#define NVA16F_DMA_METHOD_SUBCHANNEL 15:13
#define NVA16F_DMA_METHOD_COUNT 28:16
#define NVA16F_DMA_SEC_OP 31:29
#define NVA16F_DMA_SEC_OP_INC_METHOD (0x00000001)
#define NVA16F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
/* dma incrementing method format */
#define NVA16F_DMA_INCR_ADDRESS 11:0
#define NVA16F_DMA_INCR_SUBCHANNEL 15:13
#define NVA16F_DMA_INCR_COUNT 28:16
#define NVA16F_DMA_INCR_OPCODE 31:29
#define NVA16F_DMA_INCR_OPCODE_VALUE (0x00000001)
#define NVA16F_DMA_INCR_DATA 31:0
/* dma non-incrementing method format */
#define NVA16F_DMA_NONINCR_ADDRESS 11:0
#define NVA16F_DMA_NONINCR_SUBCHANNEL 15:13
#define NVA16F_DMA_NONINCR_COUNT 28:16
#define NVA16F_DMA_NONINCR_OPCODE 31:29
#define NVA16F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
#define NVA16F_DMA_NONINCR_DATA 31:0
/* dma immediate-data format */
#define NVA16F_DMA_IMMD_ADDRESS 11:0
#define NVA16F_DMA_IMMD_SUBCHANNEL 15:13
#define NVA16F_DMA_IMMD_DATA 28:16
#define NVA16F_DMA_IMMD_OPCODE 31:29
#define NVA16F_DMA_IMMD_OPCODE_VALUE (0x00000004)
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif /* _cla16F_h_ */

View File

@@ -0,0 +1,62 @@
/*******************************************************************************
Copyright (c) 2014 NVidia Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef _clb069_h_
#define _clb069_h_
#ifdef __cplusplus
extern "C" {
#endif
#define MAXWELL_FAULT_BUFFER_A (0xb069)
#define NVB069_FAULT_BUF_ENTRY 0x0000001f:0x00000000
#define NVB069_FAULT_BUF_SIZE 32
#define NVB069_FAULT_BUF_ENTRY_INST_APERTURE MW((9+0*32):(0*32+8))
#define NVB069_FAULT_BUF_ENTRY_INST_APERTURE_VID_MEM 0x00000000
#define NVB069_FAULT_BUF_ENTRY_INST_APERTURE_SYS_MEM_COHERENT 0x00000002
#define NVB069_FAULT_BUF_ENTRY_INST_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
#define NVB069_FAULT_BUF_ENTRY_INST_LO MW((31+0*32):(0*32+12))
#define NVB069_FAULT_BUF_ENTRY_INST_HI MW((31+1*32):(1*32+0))
#define NVB069_FAULT_BUF_ENTRY_INST MW((31+1*32):(0*32+12))
#define NVB069_FAULT_BUF_ENTRY_ADDR_LO MW((31+2*32):(2*32+0))
#define NVB069_FAULT_BUF_ENTRY_ADDR_HI MW((31+3*32):(3*32+0))
#define NVB069_FAULT_BUF_ENTRY_ADDR MW((31+3*32):(2*32+0))
#define NVB069_FAULT_BUF_ENTRY_TIMESTAMP_LO MW((31+4*32):(4*32+0))
#define NVB069_FAULT_BUF_ENTRY_TIMESTAMP_HI MW((31+5*32):(5*32+0))
#define NVB069_FAULT_BUF_ENTRY_TIMESTAMP MW((31+5*32):(4*32+0))
#define NVB069_FAULT_BUF_ENTRY_RESERVED MW((31+6*32):(6*32+0))
#define NVB069_FAULT_BUF_ENTRY_FAULT_TYPE MW((4+7*32):(7*32+0))
#define NVB069_FAULT_BUF_ENTRY_CLIENT MW((14+7*32):(7*32+8))
#define NVB069_FAULT_BUF_ENTRY_ACCESS_TYPE MW((18+7*32):(7*32+16))
#define NVB069_FAULT_BUF_ENTRY_MMU_CLIENT_TYPE MW((20+7*32):(7*32+20))
#define NVB069_FAULT_BUF_ENTRY_GPC_ID MW((28+7*32):(7*32+24))
#define NVB069_FAULT_BUF_ENTRY_VALID MW((31+7*32):(7*32+31))
#define NVB069_FAULT_BUF_ENTRY_VALID_FALSE 0x00000000
#define NVB069_FAULT_BUF_ENTRY_VALID_TRUE 0x00000001
#define NVB069_NOTIFIERS_REPLAYABLE_FAULT (0)
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif /* _clb069_h_ */

View File

@@ -0,0 +1,140 @@
/*******************************************************************************
Copyright (c) 2014 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef _clB06f_h_
#define _clB06f_h_
#ifdef __cplusplus
extern "C" {
#endif
#include "nvtypes.h"
#define MAXWELL_CHANNEL_GPFIFO_A (0x0000B06F)
/* class MAXWELL_CHANNEL_GPFIFO */
#define NVB06F_SET_OBJECT (0x00000000)
#define NVB06F_NOP (0x00000008)
#define NVB06F_NOP_HANDLE 31:0
#define NVB06F_SEMAPHOREA (0x00000010)
#define NVB06F_SEMAPHOREA_OFFSET_UPPER 7:0
#define NVB06F_SEMAPHOREB (0x00000014)
#define NVB06F_SEMAPHOREB_OFFSET_LOWER 31:2
#define NVB06F_SEMAPHOREC (0x00000018)
#define NVB06F_SEMAPHOREC_PAYLOAD 31:0
#define NVB06F_SEMAPHORED (0x0000001C)
#define NVB06F_SEMAPHORED_OPERATION 4:0
#define NVB06F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
#define NVB06F_SEMAPHORED_OPERATION_RELEASE 0x00000002
#define NVB06F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
#define NVB06F_SEMAPHORED_ACQUIRE_SWITCH 12:12
#define NVB06F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
#define NVB06F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
#define NVB06F_SEMAPHORED_RELEASE_WFI 20:20
#define NVB06F_SEMAPHORED_RELEASE_WFI_EN 0x00000000
#define NVB06F_SEMAPHORED_RELEASE_WFI_DIS 0x00000001
#define NVB06F_SEMAPHORED_RELEASE_SIZE 24:24
#define NVB06F_SEMAPHORED_RELEASE_SIZE_16BYTE 0x00000000
#define NVB06F_SEMAPHORED_RELEASE_SIZE_4BYTE 0x00000001
#define NVB06F_NON_STALL_INTERRUPT (0x00000020)
// NOTE - MEM_OP_A and MEM_OP_B have been removed for gm20x to make room for
// possible future MEM_OP features. MEM_OP_C/D have identical functionality
// to the previous MEM_OP_A/B methods.
#define NVB06F_MEM_OP_C (0x00000030)
#define NVB06F_MEM_OP_C_OPERAND_LOW 31:2
#define NVB06F_MEM_OP_C_TLB_INVALIDATE_PDB 0:0
#define NVB06F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE 0x00000000
#define NVB06F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL 0x00000001
#define NVB06F_MEM_OP_C_TLB_INVALIDATE_GPC 1:1
#define NVB06F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE 0x00000000
#define NVB06F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE 0x00000001
#define NVB06F_MEM_OP_C_TLB_INVALIDATE_TARGET 11:10
#define NVB06F_MEM_OP_C_TLB_INVALIDATE_TARGET_VID_MEM 0x00000000
#define NVB06F_MEM_OP_C_TLB_INVALIDATE_TARGET_SYS_MEM_COHERENT 0x00000002
#define NVB06F_MEM_OP_C_TLB_INVALIDATE_TARGET_SYS_MEM_NONCOHERENT 0x00000003
#define NVB06F_MEM_OP_C_TLB_INVALIDATE_ADDR_LO 31:12
#define NVB06F_MEM_OP_D (0x00000034)
#define NVB06F_MEM_OP_D_OPERAND_HIGH 7:0
#define NVB06F_MEM_OP_D_OPERATION 31:27
#define NVB06F_MEM_OP_D_OPERATION_MEMBAR 0x00000005
#define NVB06F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE 0x00000009
#define NVB06F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d
#define NVB06F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e
#define NVB06F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
#define NVB06F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
#define NVB06F_MEM_OP_D_TLB_INVALIDATE_ADDR_HI 7:0
#define NVB06F_WFI (0x00000078)
/* GPFIFO entry format */
#define NVB06F_GP_ENTRY__SIZE 8
#define NVB06F_GP_ENTRY0_GET 31:2
#define NVB06F_GP_ENTRY0_OPERAND 31:0
#define NVB06F_GP_ENTRY1_GET_HI 7:0
#define NVB06F_GP_ENTRY1_PRIV 8:8
#define NVB06F_GP_ENTRY1_PRIV_USER 0x00000000
#define NVB06F_GP_ENTRY1_PRIV_KERNEL 0x00000001
#define NVB06F_GP_ENTRY1_LEVEL 9:9
#define NVB06F_GP_ENTRY1_LEVEL_MAIN 0x00000000
#define NVB06F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
#define NVB06F_GP_ENTRY1_LENGTH 30:10
/* dma method formats */
#define NVB06F_DMA_SEC_OP 31:29
#define NVB06F_DMA_SEC_OP_INC_METHOD (0x00000001)
#define NVB06F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
/* dma incrementing method format */
#define NVB06F_DMA_INCR_ADDRESS 11:0
#define NVB06F_DMA_INCR_SUBCHANNEL 15:13
#define NVB06F_DMA_INCR_COUNT 28:16
#define NVB06F_DMA_INCR_OPCODE 31:29
#define NVB06F_DMA_INCR_OPCODE_VALUE (0x00000001)
#define NVB06F_DMA_INCR_DATA 31:0
/* dma non-incrementing method format */
#define NVB06F_DMA_NONINCR_ADDRESS 11:0
#define NVB06F_DMA_NONINCR_SUBCHANNEL 15:13
#define NVB06F_DMA_NONINCR_COUNT 28:16
#define NVB06F_DMA_NONINCR_OPCODE 31:29
#define NVB06F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
#define NVB06F_DMA_NONINCR_DATA 31:0
/* dma increment-once method format */
#define NVB06F_DMA_ONEINCR_ADDRESS 11:0
#define NVB06F_DMA_ONEINCR_SUBCHANNEL 15:13
#define NVB06F_DMA_ONEINCR_COUNT 28:16
#define NVB06F_DMA_ONEINCR_OPCODE 31:29
#define NVB06F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
#define NVB06F_DMA_ONEINCR_DATA 31:0
/* dma no-operation format */
#define NVB06F_DMA_NOP (0x00000000)
/* dma immediate-data format */
#define NVB06F_DMA_IMMD_ADDRESS 11:0
#define NVB06F_DMA_IMMD_SUBCHANNEL 15:13
#define NVB06F_DMA_IMMD_DATA 28:16
#define NVB06F_DMA_IMMD_OPCODE 31:29
#define NVB06F_DMA_IMMD_OPCODE_VALUE (0x00000004)
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif /* _clB06F_h_ */

View File

@@ -0,0 +1,191 @@
/*******************************************************************************
Copyright (c) 2014 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "nvtypes.h"
#ifndef _clb0b5_h_
#define _clb0b5_h_
#ifdef __cplusplus
extern "C" {
#endif
#define MAXWELL_DMA_COPY_A (0x0000B0B5)
#define NVB0B5_SET_SEMAPHORE_A (0x00000240)
#define NVB0B5_SET_SEMAPHORE_A_UPPER 7:0
#define NVB0B5_SET_SEMAPHORE_B (0x00000244)
#define NVB0B5_SET_SEMAPHORE_B_LOWER 31:0
#define NVB0B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
#define NVB0B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD 31:0
#define NVB0B5_SET_RENDER_ENABLE_A (0x00000254)
#define NVB0B5_SET_RENDER_ENABLE_A_UPPER 7:0
#define NVB0B5_SET_RENDER_ENABLE_B (0x00000258)
#define NVB0B5_SET_RENDER_ENABLE_B_LOWER 31:0
#define NVB0B5_SET_RENDER_ENABLE_C (0x0000025C)
#define NVB0B5_SET_RENDER_ENABLE_C_MODE 2:0
#define NVB0B5_SET_RENDER_ENABLE_C_MODE_FALSE (0x00000000)
#define NVB0B5_SET_RENDER_ENABLE_C_MODE_TRUE (0x00000001)
#define NVB0B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL (0x00000002)
#define NVB0B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL (0x00000003)
#define NVB0B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL (0x00000004)
#define NVB0B5_SET_SRC_PHYS_MODE (0x00000260)
#define NVB0B5_SET_SRC_PHYS_MODE_TARGET 1:0
#define NVB0B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVB0B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVB0B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVB0B5_SET_DST_PHYS_MODE (0x00000264)
#define NVB0B5_SET_DST_PHYS_MODE_TARGET 1:0
#define NVB0B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVB0B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVB0B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVB0B5_LAUNCH_DMA (0x00000300)
#define NVB0B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
#define NVB0B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
#define NVB0B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED (0x00000001)
#define NVB0B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED (0x00000002)
#define NVB0B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
#define NVB0B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
#define NVB0B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE (0x00000002)
#define NVB0B5_LAUNCH_DMA_INTERRUPT_TYPE 6:5
#define NVB0B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE (0x00000000)
#define NVB0B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING (0x00000001)
#define NVB0B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING (0x00000002)
#define NVB0B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT 7:7
#define NVB0B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVB0B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVB0B5_LAUNCH_DMA_DST_MEMORY_LAYOUT 8:8
#define NVB0B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVB0B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVB0B5_LAUNCH_DMA_MULTI_LINE_ENABLE 9:9
#define NVB0B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE (0x00000000)
#define NVB0B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE (0x00000001)
#define NVB0B5_LAUNCH_DMA_REMAP_ENABLE 10:10
#define NVB0B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
#define NVB0B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
#define NVB0B5_LAUNCH_DMA_FORCE_RMWDISABLE 11:11
#define NVB0B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE (0x00000000)
#define NVB0B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE (0x00000001)
#define NVB0B5_LAUNCH_DMA_SRC_TYPE 12:12
#define NVB0B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
#define NVB0B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
#define NVB0B5_LAUNCH_DMA_DST_TYPE 13:13
#define NVB0B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
#define NVB0B5_LAUNCH_DMA_DST_TYPE_PHYSICAL (0x00000001)
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION 17:14
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN (0x00000000)
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX (0x00000001)
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR (0x00000002)
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND (0x00000003)
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR (0x00000004)
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD (0x00000005)
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC (0x00000006)
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC (0x00000007)
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD (0x0000000A)
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN 18:18
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED (0x00000000)
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED (0x00000001)
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE 19:19
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE (0x00000000)
#define NVB0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE (0x00000001)
#define NVB0B5_LAUNCH_DMA_BYPASS_L2 20:20
#define NVB0B5_LAUNCH_DMA_BYPASS_L2_USE_PTE_SETTING (0x00000000)
#define NVB0B5_LAUNCH_DMA_BYPASS_L2_FORCE_VOLATILE (0x00000001)
#define NVB0B5_OFFSET_IN_UPPER (0x00000400)
#define NVB0B5_OFFSET_IN_UPPER_UPPER 7:0
#define NVB0B5_OFFSET_IN_LOWER (0x00000404)
#define NVB0B5_OFFSET_IN_LOWER_VALUE 31:0
#define NVB0B5_OFFSET_OUT_UPPER (0x00000408)
#define NVB0B5_OFFSET_OUT_UPPER_UPPER 7:0
#define NVB0B5_OFFSET_OUT_LOWER (0x0000040C)
#define NVB0B5_OFFSET_OUT_LOWER_VALUE 31:0
#define NVB0B5_PITCH_IN (0x00000410)
#define NVB0B5_PITCH_IN_VALUE 31:0
#define NVB0B5_PITCH_OUT (0x00000414)
#define NVB0B5_PITCH_OUT_VALUE 31:0
#define NVB0B5_LINE_LENGTH_IN (0x00000418)
#define NVB0B5_LINE_LENGTH_IN_VALUE 31:0
#define NVB0B5_LINE_COUNT (0x0000041C)
#define NVB0B5_LINE_COUNT_VALUE 31:0
#define NVB0B5_SET_REMAP_CONST_A (0x00000700)
#define NVB0B5_SET_REMAP_CONST_A_V 31:0
#define NVB0B5_SET_REMAP_CONST_B (0x00000704)
#define NVB0B5_SET_REMAP_CONST_B_V 31:0
#define NVB0B5_SET_REMAP_COMPONENTS (0x00000708)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_X 2:0
#define NVB0B5_SET_REMAP_COMPONENTS_DST_X_SRC_X (0x00000000)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y (0x00000001)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z (0x00000002)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_X_SRC_W (0x00000003)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_X_CONST_A (0x00000004)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_X_CONST_B (0x00000005)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE (0x00000006)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_Y 6:4
#define NVB0B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X (0x00000000)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y (0x00000001)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z (0x00000002)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W (0x00000003)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A (0x00000004)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B (0x00000005)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE (0x00000006)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_Z 10:8
#define NVB0B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X (0x00000000)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y (0x00000001)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z (0x00000002)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W (0x00000003)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A (0x00000004)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B (0x00000005)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE (0x00000006)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_W 14:12
#define NVB0B5_SET_REMAP_COMPONENTS_DST_W_SRC_X (0x00000000)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y (0x00000001)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z (0x00000002)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_W_SRC_W (0x00000003)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_W_CONST_A (0x00000004)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_W_CONST_B (0x00000005)
#define NVB0B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE (0x00000006)
#define NVB0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE 17:16
#define NVB0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE (0x00000000)
#define NVB0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO (0x00000001)
#define NVB0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE (0x00000002)
#define NVB0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR (0x00000003)
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS 21:20
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE (0x00000000)
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO (0x00000001)
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE (0x00000002)
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR (0x00000003)
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS 25:24
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE (0x00000000)
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
#define NVB0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif // _clb0b5_h

View File

@@ -0,0 +1,173 @@
/*******************************************************************************
Copyright (c) 2014 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef _clc06f_h_
#define _clc06f_h_
#ifdef __cplusplus
extern "C" {
#endif
#include "nvtypes.h"
#define PASCAL_CHANNEL_GPFIFO_A (0x0000C06F)
/* class PASCAL_CHANNEL_GPFIFO_A */
#define NVC06F_SET_OBJECT (0x00000000)
#define NVC06F_NOP (0x00000008)
#define NVC06F_NOP_HANDLE 31:0
#define NVC06F_SEMAPHOREA (0x00000010)
#define NVC06F_SEMAPHOREA_OFFSET_UPPER 7:0
#define NVC06F_SEMAPHOREB (0x00000014)
#define NVC06F_SEMAPHOREB_OFFSET_LOWER 31:2
#define NVC06F_SEMAPHOREC (0x00000018)
#define NVC06F_SEMAPHOREC_PAYLOAD 31:0
#define NVC06F_SEMAPHORED (0x0000001C)
#define NVC06F_SEMAPHORED_OPERATION 4:0
#define NVC06F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
#define NVC06F_SEMAPHORED_OPERATION_RELEASE 0x00000002
#define NVC06F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
#define NVC06F_SEMAPHORED_ACQUIRE_SWITCH 12:12
#define NVC06F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
#define NVC06F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
/* GPFIFO entry format */
#define NVC06F_GP_ENTRY__SIZE 8
#define NVC06F_GP_ENTRY0_GET 31:2
#define NVC06F_GP_ENTRY0_OPERAND 31:0
#define NVC06F_GP_ENTRY1_GET_HI 7:0
#define NVC06F_GP_ENTRY1_PRIV 8:8
#define NVC06F_GP_ENTRY1_PRIV_USER 0x00000000
#define NVC06F_GP_ENTRY1_PRIV_KERNEL 0x00000001
#define NVC06F_GP_ENTRY1_LEVEL 9:9
#define NVC06F_GP_ENTRY1_LEVEL_MAIN 0x00000000
#define NVC06F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
#define NVC06F_GP_ENTRY1_LENGTH 30:10
/* dma incrementing method format */
#define NVC06F_DMA_INCR_ADDRESS 11:0
#define NVC06F_DMA_INCR_SUBCHANNEL 15:13
#define NVC06F_DMA_INCR_COUNT 28:16
#define NVC06F_DMA_INCR_OPCODE 31:29
#define NVC06F_DMA_INCR_OPCODE_VALUE (0x00000001)
#define NVC06F_DMA_INCR_DATA 31:0
/* dma non-incrementing method format */
#define NVC06F_DMA_NONINCR_ADDRESS 11:0
#define NVC06F_DMA_NONINCR_SUBCHANNEL 15:13
#define NVC06F_DMA_NONINCR_COUNT 28:16
#define NVC06F_DMA_NONINCR_OPCODE 31:29
#define NVC06F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
#define NVC06F_DMA_NONINCR_DATA 31:0
/* dma increment-once method format */
#define NVC06F_DMA_ONEINCR_ADDRESS 11:0
#define NVC06F_DMA_ONEINCR_SUBCHANNEL 15:13
#define NVC06F_DMA_ONEINCR_COUNT 28:16
#define NVC06F_DMA_ONEINCR_OPCODE 31:29
#define NVC06F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
#define NVC06F_DMA_ONEINCR_DATA 31:0
/* dma no-operation format */
#define NVC06F_DMA_NOP (0x00000000)
/* dma immediate-data format */
#define NVC06F_DMA_IMMD_ADDRESS 11:0
#define NVC06F_DMA_IMMD_SUBCHANNEL 15:13
#define NVC06F_DMA_IMMD_DATA 28:16
#define NVC06F_DMA_IMMD_OPCODE 31:29
#define NVC06F_DMA_IMMD_OPCODE_VALUE (0x00000004)
// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
// specifying the page address for a targeted TLB invalidate and the uTLB for
// a targeted REPLAY_CANCEL for UVM.
// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
// rearranged fields.
#define NVC06F_MEM_OP_A (0x00000028)
#define NVC06F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID 5:0 // only relevant for REPLAY_CANCEL_TARGETED
#define NVC06F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID 10:6 // only relevant for REPLAY_CANCEL_TARGETED
#define NVC06F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR 11:11
#define NVC06F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN 0x00000001
#define NVC06F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS 0x00000000
#define NVC06F_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO 31:12
#define NVC06F_MEM_OP_B (0x0000002c)
#define NVC06F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI 31:0
#define NVC06F_MEM_OP_C (0x00000030)
#define NVC06F_MEM_OP_C_MEMBAR_TYPE 2:0
#define NVC06F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR 0x00000000
#define NVC06F_MEM_OP_C_MEMBAR_TYPE_MEMBAR 0x00000001
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB 0:0
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE 0x00000000
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL 0x00000001 // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_GPC 1:1
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE 0x00000000
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE 0x00000001
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY 4:2 // only relevant if GPC ENABLE
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE 0x00000000
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START 0x00000001
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL 0x00000002
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED 0x00000003
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL 0x00000004
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE 6:5 // only relevant if GPC ENABLE
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE 0x00000000
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY 0x00000001
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE 0x00000002
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL 9:7 // Invalidate affects this level and all below
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL 0x00000000 // Invalidate tlb caches at all levels of the page table
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY 0x00000001
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0 0x00000002
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1 0x00000003
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2 0x00000004
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 0x00000005
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 0x00000006
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5 0x00000007
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE 11:10 // only relevant if PDB_ONE
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM 0x00000000
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT 0x00000002
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO 31:12 // only relevant if PDB_ONE
// MEM_OP_D MUST be preceded by MEM_OPs A-C.
#define NVC06F_MEM_OP_D (0x00000034)
#define NVC06F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI 26:0 // only relevant if PDB_ONE
#define NVC06F_MEM_OP_D_OPERATION 31:27
#define NVC06F_MEM_OP_D_OPERATION_MEMBAR 0x00000005
#define NVC06F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE 0x00000009
#define NVC06F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED 0x0000000a
#define NVC06F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d
#define NVC06F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e
// CLEAN_LINES is an alias for Tegra/GPU IP usage
#define NVC06F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e
#define NVC06F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
#define NVC06F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
#define NVC06F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS 0x00000015
#define NVC06F_SET_REFERENCE (0x00000050)
#define NVC06F_SET_REFERENCE_COUNT 31:0
#define NVC06F_WFI (0x00000078)
#define NVC06F_WFI_SCOPE 0:0
#define NVC06F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
#define NVC06F_WFI_SCOPE_ALL 0x00000001
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif /* _clc06f_h_ */

View File

@@ -0,0 +1,82 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2015-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _clc076_h_
#define _clc076_h_
#ifdef __cplusplus
extern "C" {
#endif
#include "nvtypes.h"
#define GP100_UVM_SW (0x0000c076)
#define NVC076_SET_OBJECT (0x00000000)
#define NVC076_NO_OPERATION (0x00000100)
/* Method data fields to support gpu fault cancel. These are pushed in order by UVM */
#define NVC076_FAULT_CANCEL_A (0x00000104)
#define NVC076_FAULT_CANCEL_A_INST_APERTURE 1:0
#define NVC076_FAULT_CANCEL_A_INST_APERTURE_VID_MEM 0x00000000
#define NVC076_FAULT_CANCEL_A_INST_APERTURE_SYS_MEM_COHERENT 0x00000002
#define NVC076_FAULT_CANCEL_A_INST_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
/* instance pointer is 4k aligned so those bits are reused to store the aperture */
#define NVC076_FAULT_CANCEL_A_INST_LOW 31:12
#define NVC076_FAULT_CANCEL_B (0x00000108)
#define NVC076_FAULT_CANCEL_B_INST_HI 31:0
#define NVC076_FAULT_CANCEL_C (0x0000010c)
#define NVC076_FAULT_CANCEL_C_CLIENT_ID 5:0
#define NVC076_FAULT_CANCEL_C_GPC_ID 10:6
#define NVC076_FAULT_CANCEL_C_MODE 31:30
#define NVC076_FAULT_CANCEL_C_MODE_TARGETED 0x00000000
#define NVC076_FAULT_CANCEL_C_MODE_GLOBAL 0x00000001
/* Method data fields to support clearing faulted bit. These are pushed in order by UVM */
#define NVC076_CLEAR_FAULTED_A (0x00000110)
#define NVC076_CLEAR_FAULTED_A_INST_APERTURE 1:0
#define NVC076_CLEAR_FAULTED_A_INST_APERTURE_VID_MEM 0x00000000
#define NVC076_CLEAR_FAULTED_A_INST_APERTURE_SYS_MEM_COHERENT 0x00000002
#define NVC076_CLEAR_FAULTED_A_INST_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
#define NVC076_CLEAR_FAULTED_A_TYPE 2:2
#define NVC076_CLEAR_FAULTED_A_TYPE_PBDMA_FAULTED 0x00000000
#define NVC076_CLEAR_FAULTED_A_TYPE_ENG_FAULTED 0x00000001
/* instance pointer is 4k aligned */
#define NVC076_CLEAR_FAULTED_A_INST_LOW 31:12
#define NVC076_CLEAR_FAULTED_B (0x00000114)
#define NVC076_CLEAR_FAULTED_B_INST_HI 31:0
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif /* _clc076_h_ */

View File

@@ -0,0 +1,191 @@
/*******************************************************************************
Copyright (c) 2014 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "nvtypes.h"
#ifndef _clc0b5_h_
#define _clc0b5_h_
#ifdef __cplusplus
extern "C" {
#endif
#define PASCAL_DMA_COPY_A (0x0000C0B5)
#define NVC0B5_SET_SEMAPHORE_A (0x00000240)
#define NVC0B5_SET_SEMAPHORE_A_UPPER 16:0
#define NVC0B5_SET_SEMAPHORE_B (0x00000244)
#define NVC0B5_SET_SEMAPHORE_B_LOWER 31:0
#define NVC0B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
#define NVC0B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD 31:0
#define NVC0B5_SET_RENDER_ENABLE_A (0x00000254)
#define NVC0B5_SET_RENDER_ENABLE_A_UPPER 7:0
#define NVC0B5_SET_RENDER_ENABLE_B (0x00000258)
#define NVC0B5_SET_RENDER_ENABLE_B_LOWER 31:0
#define NVC0B5_SET_RENDER_ENABLE_C (0x0000025C)
#define NVC0B5_SET_RENDER_ENABLE_C_MODE 2:0
#define NVC0B5_SET_RENDER_ENABLE_C_MODE_FALSE (0x00000000)
#define NVC0B5_SET_RENDER_ENABLE_C_MODE_TRUE (0x00000001)
#define NVC0B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL (0x00000002)
#define NVC0B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL (0x00000003)
#define NVC0B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL (0x00000004)
#define NVC0B5_SET_SRC_PHYS_MODE (0x00000260)
#define NVC0B5_SET_SRC_PHYS_MODE_TARGET 1:0
#define NVC0B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC0B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC0B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC0B5_SET_DST_PHYS_MODE (0x00000264)
#define NVC0B5_SET_DST_PHYS_MODE_TARGET 1:0
#define NVC0B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC0B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC0B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC0B5_LAUNCH_DMA (0x00000300)
#define NVC0B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
#define NVC0B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
#define NVC0B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED (0x00000001)
#define NVC0B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED (0x00000002)
#define NVC0B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
#define NVC0B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
#define NVC0B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE (0x00000002)
#define NVC0B5_LAUNCH_DMA_INTERRUPT_TYPE 6:5
#define NVC0B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE (0x00000000)
#define NVC0B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING (0x00000001)
#define NVC0B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING (0x00000002)
#define NVC0B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT 7:7
#define NVC0B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC0B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC0B5_LAUNCH_DMA_DST_MEMORY_LAYOUT 8:8
#define NVC0B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC0B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC0B5_LAUNCH_DMA_MULTI_LINE_ENABLE 9:9
#define NVC0B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE (0x00000000)
#define NVC0B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE (0x00000001)
#define NVC0B5_LAUNCH_DMA_REMAP_ENABLE 10:10
#define NVC0B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
#define NVC0B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
#define NVC0B5_LAUNCH_DMA_FORCE_RMWDISABLE 11:11
#define NVC0B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE (0x00000000)
#define NVC0B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE (0x00000001)
#define NVC0B5_LAUNCH_DMA_SRC_TYPE 12:12
#define NVC0B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
#define NVC0B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
#define NVC0B5_LAUNCH_DMA_DST_TYPE 13:13
#define NVC0B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
#define NVC0B5_LAUNCH_DMA_DST_TYPE_PHYSICAL (0x00000001)
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION 17:14
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN (0x00000000)
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX (0x00000001)
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR (0x00000002)
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND (0x00000003)
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR (0x00000004)
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD (0x00000005)
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC (0x00000006)
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC (0x00000007)
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD (0x0000000A)
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN 18:18
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED (0x00000000)
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED (0x00000001)
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE 19:19
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE (0x00000000)
#define NVC0B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE (0x00000001)
#define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2 20:20
#define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2_USE_PTE_SETTING (0x00000000)
#define NVC0B5_LAUNCH_DMA_SRC_BYPASS_L2_FORCE_VOLATILE (0x00000001)
#define NVC0B5_OFFSET_IN_UPPER (0x00000400)
#define NVC0B5_OFFSET_IN_UPPER_UPPER 16:0
#define NVC0B5_OFFSET_IN_LOWER (0x00000404)
#define NVC0B5_OFFSET_IN_LOWER_VALUE 31:0
#define NVC0B5_OFFSET_OUT_UPPER (0x00000408)
#define NVC0B5_OFFSET_OUT_UPPER_UPPER 16:0
#define NVC0B5_OFFSET_OUT_LOWER (0x0000040C)
#define NVC0B5_OFFSET_OUT_LOWER_VALUE 31:0
#define NVC0B5_PITCH_IN (0x00000410)
#define NVC0B5_PITCH_IN_VALUE 31:0
#define NVC0B5_PITCH_OUT (0x00000414)
#define NVC0B5_PITCH_OUT_VALUE 31:0
#define NVC0B5_LINE_LENGTH_IN (0x00000418)
#define NVC0B5_LINE_LENGTH_IN_VALUE 31:0
#define NVC0B5_LINE_COUNT (0x0000041C)
#define NVC0B5_LINE_COUNT_VALUE 31:0
#define NVC0B5_SET_REMAP_CONST_A (0x00000700)
#define NVC0B5_SET_REMAP_CONST_A_V 31:0
#define NVC0B5_SET_REMAP_CONST_B (0x00000704)
#define NVC0B5_SET_REMAP_CONST_B_V 31:0
#define NVC0B5_SET_REMAP_COMPONENTS (0x00000708)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_X 2:0
#define NVC0B5_SET_REMAP_COMPONENTS_DST_X_SRC_X (0x00000000)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y (0x00000001)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z (0x00000002)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_X_SRC_W (0x00000003)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_X_CONST_A (0x00000004)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_X_CONST_B (0x00000005)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE (0x00000006)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_Y 6:4
#define NVC0B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X (0x00000000)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y (0x00000001)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z (0x00000002)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W (0x00000003)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A (0x00000004)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B (0x00000005)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE (0x00000006)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_Z 10:8
#define NVC0B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X (0x00000000)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y (0x00000001)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z (0x00000002)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W (0x00000003)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A (0x00000004)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B (0x00000005)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE (0x00000006)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_W 14:12
#define NVC0B5_SET_REMAP_COMPONENTS_DST_W_SRC_X (0x00000000)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y (0x00000001)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z (0x00000002)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_W_SRC_W (0x00000003)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_W_CONST_A (0x00000004)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_W_CONST_B (0x00000005)
#define NVC0B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE (0x00000006)
#define NVC0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE 17:16
#define NVC0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE (0x00000000)
#define NVC0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO (0x00000001)
#define NVC0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE (0x00000002)
#define NVC0B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR (0x00000003)
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS 21:20
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE (0x00000000)
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO (0x00000001)
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE (0x00000002)
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR (0x00000003)
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS 25:24
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE (0x00000000)
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
#define NVC0B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif // _clc0b5_h

View File

@@ -0,0 +1,191 @@
/*******************************************************************************
Copyright (c) 2014 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "nvtypes.h"
#ifndef _clc1b5_h_
#define _clc1b5_h_
#ifdef __cplusplus
extern "C" {
#endif
#define PASCAL_DMA_COPY_B (0x0000C1B5)
#define NVC1B5_SET_SEMAPHORE_A (0x00000240)
#define NVC1B5_SET_SEMAPHORE_A_UPPER 16:0
#define NVC1B5_SET_SEMAPHORE_B (0x00000244)
#define NVC1B5_SET_SEMAPHORE_B_LOWER 31:0
#define NVC1B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
#define NVC1B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD 31:0
#define NVC1B5_SET_RENDER_ENABLE_A (0x00000254)
#define NVC1B5_SET_RENDER_ENABLE_A_UPPER 7:0
#define NVC1B5_SET_RENDER_ENABLE_B (0x00000258)
#define NVC1B5_SET_RENDER_ENABLE_B_LOWER 31:0
#define NVC1B5_SET_RENDER_ENABLE_C (0x0000025C)
#define NVC1B5_SET_RENDER_ENABLE_C_MODE 2:0
#define NVC1B5_SET_RENDER_ENABLE_C_MODE_FALSE (0x00000000)
#define NVC1B5_SET_RENDER_ENABLE_C_MODE_TRUE (0x00000001)
#define NVC1B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL (0x00000002)
#define NVC1B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL (0x00000003)
#define NVC1B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL (0x00000004)
#define NVC1B5_SET_SRC_PHYS_MODE (0x00000260)
#define NVC1B5_SET_SRC_PHYS_MODE_TARGET 1:0
#define NVC1B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC1B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC1B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC1B5_SET_DST_PHYS_MODE (0x00000264)
#define NVC1B5_SET_DST_PHYS_MODE_TARGET 1:0
#define NVC1B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC1B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC1B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC1B5_LAUNCH_DMA (0x00000300)
#define NVC1B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
#define NVC1B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
#define NVC1B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED (0x00000001)
#define NVC1B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED (0x00000002)
#define NVC1B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
#define NVC1B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
#define NVC1B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE (0x00000002)
#define NVC1B5_LAUNCH_DMA_INTERRUPT_TYPE 6:5
#define NVC1B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE (0x00000000)
#define NVC1B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING (0x00000001)
#define NVC1B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING (0x00000002)
#define NVC1B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT 7:7
#define NVC1B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC1B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC1B5_LAUNCH_DMA_DST_MEMORY_LAYOUT 8:8
#define NVC1B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC1B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC1B5_LAUNCH_DMA_MULTI_LINE_ENABLE 9:9
#define NVC1B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE (0x00000000)
#define NVC1B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE (0x00000001)
#define NVC1B5_LAUNCH_DMA_REMAP_ENABLE 10:10
#define NVC1B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
#define NVC1B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
#define NVC1B5_LAUNCH_DMA_FORCE_RMWDISABLE 11:11
#define NVC1B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE (0x00000000)
#define NVC1B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE (0x00000001)
#define NVC1B5_LAUNCH_DMA_SRC_TYPE 12:12
#define NVC1B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
#define NVC1B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
#define NVC1B5_LAUNCH_DMA_DST_TYPE 13:13
#define NVC1B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
#define NVC1B5_LAUNCH_DMA_DST_TYPE_PHYSICAL (0x00000001)
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION 17:14
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN (0x00000000)
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX (0x00000001)
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR (0x00000002)
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND (0x00000003)
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR (0x00000004)
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD (0x00000005)
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC (0x00000006)
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC (0x00000007)
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD (0x0000000A)
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN 18:18
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED (0x00000000)
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED (0x00000001)
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE 19:19
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE (0x00000000)
#define NVC1B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE (0x00000001)
#define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2 20:20
#define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2_USE_PTE_SETTING (0x00000000)
#define NVC1B5_LAUNCH_DMA_SRC_BYPASS_L2_FORCE_VOLATILE (0x00000001)
#define NVC1B5_OFFSET_IN_UPPER (0x00000400)
#define NVC1B5_OFFSET_IN_UPPER_UPPER 16:0
#define NVC1B5_OFFSET_IN_LOWER (0x00000404)
#define NVC1B5_OFFSET_IN_LOWER_VALUE 31:0
#define NVC1B5_OFFSET_OUT_UPPER (0x00000408)
#define NVC1B5_OFFSET_OUT_UPPER_UPPER 16:0
#define NVC1B5_OFFSET_OUT_LOWER (0x0000040C)
#define NVC1B5_OFFSET_OUT_LOWER_VALUE 31:0
#define NVC1B5_PITCH_IN (0x00000410)
#define NVC1B5_PITCH_IN_VALUE 31:0
#define NVC1B5_PITCH_OUT (0x00000414)
#define NVC1B5_PITCH_OUT_VALUE 31:0
#define NVC1B5_LINE_LENGTH_IN (0x00000418)
#define NVC1B5_LINE_LENGTH_IN_VALUE 31:0
#define NVC1B5_LINE_COUNT (0x0000041C)
#define NVC1B5_LINE_COUNT_VALUE 31:0
#define NVC1B5_SET_REMAP_CONST_A (0x00000700)
#define NVC1B5_SET_REMAP_CONST_A_V 31:0
#define NVC1B5_SET_REMAP_CONST_B (0x00000704)
#define NVC1B5_SET_REMAP_CONST_B_V 31:0
#define NVC1B5_SET_REMAP_COMPONENTS (0x00000708)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_X 2:0
#define NVC1B5_SET_REMAP_COMPONENTS_DST_X_SRC_X (0x00000000)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y (0x00000001)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z (0x00000002)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_X_SRC_W (0x00000003)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_X_CONST_A (0x00000004)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_X_CONST_B (0x00000005)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE (0x00000006)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_Y 6:4
#define NVC1B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X (0x00000000)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y (0x00000001)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z (0x00000002)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W (0x00000003)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A (0x00000004)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B (0x00000005)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE (0x00000006)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_Z 10:8
#define NVC1B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X (0x00000000)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y (0x00000001)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z (0x00000002)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W (0x00000003)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A (0x00000004)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B (0x00000005)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE (0x00000006)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_W 14:12
#define NVC1B5_SET_REMAP_COMPONENTS_DST_W_SRC_X (0x00000000)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y (0x00000001)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z (0x00000002)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_W_SRC_W (0x00000003)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_W_CONST_A (0x00000004)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_W_CONST_B (0x00000005)
#define NVC1B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE (0x00000006)
#define NVC1B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE 17:16
#define NVC1B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE (0x00000000)
#define NVC1B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO (0x00000001)
#define NVC1B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE (0x00000002)
#define NVC1B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR (0x00000003)
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS 21:20
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE (0x00000000)
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO (0x00000001)
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE (0x00000002)
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR (0x00000003)
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS 25:24
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE (0x00000000)
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
#define NVC1B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif // _clc1b5_h

View File

@@ -0,0 +1,93 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
// AUTO GENERATED -- DO NOT EDIT - this file automatically generated by refhdr2class.pl
// Command: ../../../bin/manuals/refhdr2class.pl clc365.h c365 ACCESS_COUNTER_NOTIFY_BUFFER --search_str=NV_ACCESS_COUNTER --input_file=nv_ref_dev_access_counter.h
#ifndef _clc365_h_
#define _clc365_h_
#ifdef __cplusplus
extern "C" {
#endif
#define ACCESS_COUNTER_NOTIFY_BUFFER (0xc365)
#define NVC365_NOTIFY_BUF
#define NVC365_NOTIFY_BUF_ENTRY 0x0000001f:0x00000000
#define NVC365_NOTIFY_BUF_SIZE 32
#define NVC365_NOTIFY_BUF_ENTRY_TYPE MW((0+0*32):(0*32+0))
#define NVC365_NOTIFY_BUF_ENTRY_TYPE_CPU 0x00000000
#define NVC365_NOTIFY_BUF_ENTRY_TYPE_GPU 0x00000001
#define NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE MW((1+0*32):(0*32+1))
#define NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GVA 0x00000000
#define NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GPA 0x00000001
#define NVC365_NOTIFY_BUF_ENTRY_BANK MW((5+0*32):(0*32+2))
#define NVC365_NOTIFY_BUF_ENTRY_BANK_0 0x00000000
#define NVC365_NOTIFY_BUF_ENTRY_BANK_1 0x00000001
#define NVC365_NOTIFY_BUF_ENTRY_BANK_2 0x00000002
#define NVC365_NOTIFY_BUF_ENTRY_BANK_3 0x00000003
#define NVC365_NOTIFY_BUF_ENTRY_BANK_4 0x00000004
#define NVC365_NOTIFY_BUF_ENTRY_BANK_5 0x00000005
#define NVC365_NOTIFY_BUF_ENTRY_BANK_6 0x00000006
#define NVC365_NOTIFY_BUF_ENTRY_BANK_7 0x00000007
#define NVC365_NOTIFY_BUF_ENTRY_BANK_8 0x00000008
#define NVC365_NOTIFY_BUF_ENTRY_BANK_9 0x00000009
#define NVC365_NOTIFY_BUF_ENTRY_BANK_10 0x0000000a
#define NVC365_NOTIFY_BUF_ENTRY_BANK_11 0x0000000b
#define NVC365_NOTIFY_BUF_ENTRY_BANK_12 0x0000000c
#define NVC365_NOTIFY_BUF_ENTRY_BANK_13 0x0000000d
#define NVC365_NOTIFY_BUF_ENTRY_BANK_14 0x0000000e
#define NVC365_NOTIFY_BUF_ENTRY_BANK_15 0x0000000f
#define NVC365_NOTIFY_BUF_ENTRY_APERTURE MW((9+0*32):(0*32+8))
#define NVC365_NOTIFY_BUF_ENTRY_APERTURE_VID_MEM 0x00000000
#define NVC365_NOTIFY_BUF_ENTRY_APERTURE_PEER_MEM 0x00000001
#define NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_COHERENT 0x00000002
#define NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
#define NVC365_NOTIFY_BUF_ENTRY_INST_APERTURE MW((11+0*32):(0*32+10))
#define NVC365_NOTIFY_BUF_ENTRY_INST_APERTURE_VID_MEM 0x00000000
#define NVC365_NOTIFY_BUF_ENTRY_INST_APERTURE_SYS_MEM_COHERENT 0x00000002
#define NVC365_NOTIFY_BUF_ENTRY_INST_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
#define NVC365_NOTIFY_BUF_ENTRY_INST_LO MW((31+0*32):(0*32+12))
#define NVC365_NOTIFY_BUF_ENTRY_INST_HI MW((31+1*32):(1*32+0))
#define NVC365_NOTIFY_BUF_ENTRY_INST MW((31+1*32):(0*32+12))
#define NVC365_NOTIFY_BUF_ENTRY_ADDR_LO MW((31+2*32):(2*32+0))
#define NVC365_NOTIFY_BUF_ENTRY_ADDR_HI MW((31+3*32):(3*32+0))
#define NVC365_NOTIFY_BUF_ENTRY_ADDR MW((31+3*32):(2*32+0))
#define NVC365_NOTIFY_BUF_ENTRY_SUB_GRANULARITY MW((31+4*32):(4*32+0))
#define NVC365_NOTIFY_BUF_ENTRY_NOTIFY_TAG MW((19+5*32):(5*32+0))
#define NVC365_NOTIFY_BUF_ENTRY_COUNTER_VAL MW((15+6*32):(6*32+0))
#define NVC365_NOTIFY_BUF_ENTRY_PEER_ID MW((2+7*32):(7*32+0))
#define NVC365_NOTIFY_BUF_ENTRY_MMU_ENGINE_ID MW((28+7*32):(7*32+20))
#define NVC365_NOTIFY_BUF_ENTRY_VALID MW((31+7*32):(7*32+31))
#define NVC365_NOTIFY_BUF_ENTRY_VALID_FALSE 0x00000000
#define NVC365_NOTIFY_BUF_ENTRY_VALID_TRUE 0x00000001
#define NVC365_NOTIFIERS_ACCESS_COUNTER (0)
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif /* _clc365_h_ */

View File

@@ -0,0 +1,82 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
// AUTO GENERATED -- DO NOT EDIT - this file automatically generated by refhdr2class.pl
// Command: ../../../bin/manuals/refhdr2class.pl clc369.h c369 MMU_FAULT_BUFFER --search_str=NV_MMU_FAULT --input_file=nv_ref_dev_mmu_fault.h
#ifndef _clc369_h_
#define _clc369_h_
#ifdef __cplusplus
extern "C" {
#endif
#define MMU_FAULT_BUFFER (0xc369)
#define NVC369_BUF
#define NVC369_BUF_ENTRY 0x0000001f:0x00000000
#define NVC369_BUF_SIZE 32
#define NVC369_BUF_ENTRY_INST_APERTURE MW((9+0*32):(0*32+8))
#define NVC369_BUF_ENTRY_INST_APERTURE_VID_MEM 0x00000000
#define NVC369_BUF_ENTRY_INST_APERTURE_SYS_MEM_COHERENT 0x00000002
#define NVC369_BUF_ENTRY_INST_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
#define NVC369_BUF_ENTRY_INST_LO MW((31+0*32):(0*32+12))
#define NVC369_BUF_ENTRY_INST_HI MW((31+1*32):(1*32+0))
#define NVC369_BUF_ENTRY_INST MW((31+1*32):(0*32+12))
#define NVC369_BUF_ENTRY_ADDR_PHYS_APERTURE MW((1+2*32):(2*32+0))
#define NVC369_BUF_ENTRY_ADDR_LO MW((31+2*32):(2*32+12))
#define NVC369_BUF_ENTRY_ADDR_HI MW((31+3*32):(3*32+0))
#define NVC369_BUF_ENTRY_ADDR MW((31+3*32):(2*32+12))
#define NVC369_BUF_ENTRY_TIMESTAMP_LO MW((31+4*32):(4*32+0))
#define NVC369_BUF_ENTRY_TIMESTAMP_HI MW((31+5*32):(5*32+0))
#define NVC369_BUF_ENTRY_TIMESTAMP MW((31+5*32):(4*32+0))
#define NVC369_BUF_ENTRY_ENGINE_ID MW((8+6*32):(6*32+0))
#define NVC369_BUF_ENTRY_FAULT_TYPE MW((4+7*32):(7*32+0))
#define NVC369_BUF_ENTRY_REPLAYABLE_FAULT MW((7+7*32):(7*32+7))
#define NVC369_BUF_ENTRY_REPLAYABLE_FAULT_FALSE 0x00000000
#define NVC369_BUF_ENTRY_REPLAYABLE_FAULT_TRUE 0x00000001
#define NVC369_BUF_ENTRY_CLIENT MW((14+7*32):(7*32+8))
#define NVC369_BUF_ENTRY_ACCESS_TYPE MW((19+7*32):(7*32+16))
#define NVC369_BUF_ENTRY_MMU_CLIENT_TYPE MW((20+7*32):(7*32+20))
#define NVC369_BUF_ENTRY_GPC_ID MW((28+7*32):(7*32+24))
#define NVC369_BUF_ENTRY_PROTECTED_MODE MW((29+7*32):(7*32+29))
#define NVC369_BUF_ENTRY_PROTECTED_MODE_FALSE 0x00000000
#define NVC369_BUF_ENTRY_PROTECTED_MODE_TRUE 0x00000001
#define NVC369_BUF_ENTRY_REPLAYABLE_FAULT_EN MW((30+7*32):(7*32+30))
#define NVC369_BUF_ENTRY_REPLAYABLE_FAULT_EN_FALSE 0x00000000
#define NVC369_BUF_ENTRY_REPLAYABLE_FAULT_EN_TRUE 0x00000001
#define NVC369_BUF_ENTRY_VALID MW((31+7*32):(7*32+31))
#define NVC369_BUF_ENTRY_VALID_FALSE 0x00000000
#define NVC369_BUF_ENTRY_VALID_TRUE 0x00000001
#define NVC369_NOTIFIER_MMU_FAULT_NON_REPLAYABLE 0
#define NVC369_NOTIFIER_MMU_FAULT_REPLAYABLE 1
#define NVC369_NOTIFIER_MMU_FAULT_ERROR 2
#define NVC369_NOTIFIER_MMU_FAULT_NON_REPLAYABLE_IN_PRIV 3
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif /* _clc369_h_ */

View File

@@ -0,0 +1,368 @@
/*******************************************************************************
Copyright (c) 2012-2015 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef _clc36f_h_
#define _clc36f_h_
#ifdef __cplusplus
extern "C" {
#endif
#include "nvtypes.h"
/* class VOLTA_CHANNEL_GPFIFO */
/*
* Documentation for VOLTA_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
* chapter "User Control Registers". It is documented as device NV_UDMA.
* The GPFIFO format itself is also documented in dev_pbdma.ref,
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
*
* Note there is no .mfs file for this class.
*/
#define VOLTA_CHANNEL_GPFIFO_A (0x0000C36F)
#define NVC36F_TYPEDEF VOLTA_CHANNELChannelGPFifoA
/* dma flow control data structure */
typedef volatile struct Nvc36fControl_struct {
NvU32 Ignored00[0x010]; /* 0000-003f*/
NvU32 Put; /* put offset, read/write 0040-0043*/
NvU32 Get; /* get offset, read only 0044-0047*/
NvU32 Reference; /* reference value, read only 0048-004b*/
NvU32 PutHi; /* high order put offset bits 004c-004f*/
NvU32 Ignored01[0x002]; /* 0050-0057*/
NvU32 TopLevelGet; /* top level get offset, read only 0058-005b*/
NvU32 TopLevelGetHi; /* high order top level get bits 005c-005f*/
NvU32 GetHi; /* high order get offset bits 0060-0063*/
NvU32 Ignored02[0x007]; /* 0064-007f*/
NvU32 Ignored03; /* used to be engine yield 0080-0083*/
NvU32 Ignored04[0x001]; /* 0084-0087*/
NvU32 GPGet; /* GP FIFO get offset, read only 0088-008b*/
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
NvU32 Ignored05[0x5c];
} Nvc36fControl, VoltaAControlGPFifo;
/* fields and values */
#define NVC36F_NUMBER_OF_SUBCHANNELS (8)
#define NVC36F_SET_OBJECT (0x00000000)
#define NVC36F_SET_OBJECT_NVCLASS 15:0
#define NVC36F_SET_OBJECT_ENGINE 20:16
#define NVC36F_SET_OBJECT_ENGINE_SW 0x0000001f
#define NVC36F_ILLEGAL (0x00000004)
#define NVC36F_ILLEGAL_HANDLE 31:0
#define NVC36F_NOP (0x00000008)
#define NVC36F_NOP_HANDLE 31:0
#define NVC36F_SEMAPHOREA (0x00000010)
#define NVC36F_SEMAPHOREA_OFFSET_UPPER 7:0
#define NVC36F_SEMAPHOREB (0x00000014)
#define NVC36F_SEMAPHOREB_OFFSET_LOWER 31:2
#define NVC36F_SEMAPHOREC (0x00000018)
#define NVC36F_SEMAPHOREC_PAYLOAD 31:0
#define NVC36F_SEMAPHORED (0x0000001C)
#define NVC36F_SEMAPHORED_OPERATION 4:0
#define NVC36F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
#define NVC36F_SEMAPHORED_OPERATION_RELEASE 0x00000002
#define NVC36F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
#define NVC36F_SEMAPHORED_OPERATION_ACQ_AND 0x00000008
#define NVC36F_SEMAPHORED_OPERATION_REDUCTION 0x00000010
#define NVC36F_SEMAPHORED_ACQUIRE_SWITCH 12:12
#define NVC36F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
#define NVC36F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
#define NVC36F_SEMAPHORED_RELEASE_WFI 20:20
#define NVC36F_SEMAPHORED_RELEASE_WFI_EN 0x00000000
#define NVC36F_SEMAPHORED_RELEASE_WFI_DIS 0x00000001
#define NVC36F_SEMAPHORED_RELEASE_SIZE 24:24
#define NVC36F_SEMAPHORED_RELEASE_SIZE_16BYTE 0x00000000
#define NVC36F_SEMAPHORED_RELEASE_SIZE_4BYTE 0x00000001
#define NVC36F_SEMAPHORED_REDUCTION 30:27
#define NVC36F_SEMAPHORED_REDUCTION_MIN 0x00000000
#define NVC36F_SEMAPHORED_REDUCTION_MAX 0x00000001
#define NVC36F_SEMAPHORED_REDUCTION_XOR 0x00000002
#define NVC36F_SEMAPHORED_REDUCTION_AND 0x00000003
#define NVC36F_SEMAPHORED_REDUCTION_OR 0x00000004
#define NVC36F_SEMAPHORED_REDUCTION_ADD 0x00000005
#define NVC36F_SEMAPHORED_REDUCTION_INC 0x00000006
#define NVC36F_SEMAPHORED_REDUCTION_DEC 0x00000007
#define NVC36F_SEMAPHORED_FORMAT 31:31
#define NVC36F_SEMAPHORED_FORMAT_SIGNED 0x00000000
#define NVC36F_SEMAPHORED_FORMAT_UNSIGNED 0x00000001
#define NVC36F_NON_STALL_INTERRUPT (0x00000020)
#define NVC36F_NON_STALL_INTERRUPT_HANDLE 31:0
#define NVC36F_FB_FLUSH (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
#define NVC36F_FB_FLUSH_HANDLE 31:0
// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
// specifying the page address for a targeted TLB invalidate and the uTLB for
// a targeted REPLAY_CANCEL for UVM.
// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
// rearranged fields.
#define NVC36F_MEM_OP_A (0x00000028)
#define NVC36F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID 5:0 // only relevant for REPLAY_CANCEL_TARGETED
#define NVC36F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE 5:0 // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type
#define NVC36F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID 10:6 // only relevant for REPLAY_CANCEL_TARGETED
#define NVC36F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID 6:0 // only relevant for REPLAY_CANCEL_VA_GLOBAL
#define NVC36F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR 11:11
#define NVC36F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN 0x00000001
#define NVC36F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS 0x00000000
#define NVC36F_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO 31:12
#define NVC36F_MEM_OP_B (0x0000002c)
#define NVC36F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI 31:0
#define NVC36F_MEM_OP_C (0x00000030)
#define NVC36F_MEM_OP_C_MEMBAR_TYPE 2:0
#define NVC36F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR 0x00000000
#define NVC36F_MEM_OP_C_MEMBAR_TYPE_MEMBAR 0x00000001
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB 0:0
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE 0x00000000
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL 0x00000001 // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_GPC 1:1
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE 0x00000000
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE 0x00000001
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY 4:2 // only relevant if GPC ENABLE
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE 0x00000000
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START 0x00000001
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL 0x00000002
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED 0x00000003
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL 0x00000004
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL 0x00000005
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE 6:5 // only relevant if GPC ENABLE
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE 0x00000000
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY 0x00000001
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE 0x00000002
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE 9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ 0
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE 1
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG 2
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD 3
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK 4
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL 5
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC 6
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL 7
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL 9:7 // Invalidate affects this level and all below
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL 0x00000000 // Invalidate tlb caches at all levels of the page table
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY 0x00000001
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0 0x00000002
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1 0x00000003
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2 0x00000004
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 0x00000005
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 0x00000006
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5 0x00000007
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE 11:10 // only relevant if PDB_ONE
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM 0x00000000
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT 0x00000002
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO 31:12 // only relevant if PDB_ONE
#define NVC36F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG 19:0
// MEM_OP_D MUST be preceded by MEM_OPs A-C.
#define NVC36F_MEM_OP_D (0x00000034)
#define NVC36F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI 26:0 // only relevant if PDB_ONE
#define NVC36F_MEM_OP_D_OPERATION 31:27
#define NVC36F_MEM_OP_D_OPERATION_MEMBAR 0x00000005
#define NVC36F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE 0x00000009
#define NVC36F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED 0x0000000a
#define NVC36F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d
#define NVC36F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e
// CLEAN_LINES is an alias for Tegra/GPU IP usage
#define NVC36F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e
#define NVC36F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
#define NVC36F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
#define NVC36F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS 0x00000015
#define NVC36F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR 0x00000016
#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE 1:0
#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC 0x00000000
#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC 0x00000001
#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL 0x00000002
#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED 0x00000003
#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE 2:2
#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC 0x00000000
#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC 0x00000001
#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK 6:3
#define NVC36F_SET_REFERENCE (0x00000050)
#define NVC36F_SET_REFERENCE_COUNT 31:0
#define NVC36F_SEM_ADDR_LO (0x0000005c)
#define NVC36F_SEM_ADDR_LO_OFFSET 31:2
#define NVC36F_SEM_ADDR_HI (0x00000060)
#define NVC36F_SEM_ADDR_HI_OFFSET 7:0
#define NVC36F_SEM_PAYLOAD_LO (0x00000064)
#define NVC36F_SEM_PAYLOAD_LO_PAYLOAD 31:0
#define NVC36F_SEM_PAYLOAD_HI (0x00000068)
#define NVC36F_SEM_PAYLOAD_HI_PAYLOAD 31:0
#define NVC36F_SEM_EXECUTE (0x0000006c)
#define NVC36F_SEM_EXECUTE_OPERATION 2:0
#define NVC36F_SEM_EXECUTE_OPERATION_ACQUIRE 0x00000000
#define NVC36F_SEM_EXECUTE_OPERATION_RELEASE 0x00000001
#define NVC36F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ 0x00000002
#define NVC36F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ 0x00000003
#define NVC36F_SEM_EXECUTE_OPERATION_ACQ_AND 0x00000004
#define NVC36F_SEM_EXECUTE_OPERATION_ACQ_NOR 0x00000005
#define NVC36F_SEM_EXECUTE_OPERATION_REDUCTION 0x00000006
#define NVC36F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG 12:12
#define NVC36F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS 0x00000000
#define NVC36F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN 0x00000001
#define NVC36F_SEM_EXECUTE_RELEASE_WFI 20:20
#define NVC36F_SEM_EXECUTE_RELEASE_WFI_DIS 0x00000000
#define NVC36F_SEM_EXECUTE_RELEASE_WFI_EN 0x00000001
#define NVC36F_SEM_EXECUTE_PAYLOAD_SIZE 24:24
#define NVC36F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT 0x00000000
#define NVC36F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT 0x00000001
#define NVC36F_SEM_EXECUTE_RELEASE_TIMESTAMP 25:25
#define NVC36F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS 0x00000000
#define NVC36F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN 0x00000001
#define NVC36F_SEM_EXECUTE_REDUCTION 30:27
#define NVC36F_SEM_EXECUTE_REDUCTION_IMIN 0x00000000
#define NVC36F_SEM_EXECUTE_REDUCTION_IMAX 0x00000001
#define NVC36F_SEM_EXECUTE_REDUCTION_IXOR 0x00000002
#define NVC36F_SEM_EXECUTE_REDUCTION_IAND 0x00000003
#define NVC36F_SEM_EXECUTE_REDUCTION_IOR 0x00000004
#define NVC36F_SEM_EXECUTE_REDUCTION_IADD 0x00000005
#define NVC36F_SEM_EXECUTE_REDUCTION_INC 0x00000006
#define NVC36F_SEM_EXECUTE_REDUCTION_DEC 0x00000007
#define NVC36F_SEM_EXECUTE_REDUCTION_FORMAT 31:31
#define NVC36F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED 0x00000000
#define NVC36F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED 0x00000001
#define NVC36F_WFI (0x00000078)
#define NVC36F_WFI_SCOPE 0:0
#define NVC36F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
#define NVC36F_WFI_SCOPE_CURRENT_VEID 0x00000000
#define NVC36F_WFI_SCOPE_ALL 0x00000001
#define NVC36F_CRC_CHECK (0x0000007c)
#define NVC36F_CRC_CHECK_VALUE 31:0
#define NVC36F_YIELD (0x00000080)
#define NVC36F_YIELD_OP 1:0
#define NVC36F_YIELD_OP_NOP 0x00000000
#define NVC36F_YIELD_OP_RUNLIST_TIMESLICE 0x00000002
#define NVC36F_YIELD_OP_TSG 0x00000003
#define NVC36F_CLEAR_FAULTED (0x00000084)
#define NVC36F_CLEAR_FAULTED_CHID 11:0
#define NVC36F_CLEAR_FAULTED_TYPE 31:31
#define NVC36F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
#define NVC36F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
#define NVC36F_QUADRO_VERIFY (0x000000a0)
/* GPFIFO entry format */
#define NVC36F_GP_ENTRY__SIZE 8
#define NVC36F_GP_ENTRY0_FETCH 0:0
#define NVC36F_GP_ENTRY0_FETCH_UNCONDITIONAL 0x00000000
#define NVC36F_GP_ENTRY0_FETCH_CONDITIONAL 0x00000001
#define NVC36F_GP_ENTRY0_GET 31:2
#define NVC36F_GP_ENTRY0_OPERAND 31:0
#define NVC36F_GP_ENTRY1_GET_HI 7:0
#define NVC36F_GP_ENTRY1_PRIV 8:8
#define NVC36F_GP_ENTRY1_PRIV_USER 0x00000000
#define NVC36F_GP_ENTRY1_PRIV_KERNEL 0x00000001
#define NVC36F_GP_ENTRY1_LEVEL 9:9
#define NVC36F_GP_ENTRY1_LEVEL_MAIN 0x00000000
#define NVC36F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
#define NVC36F_GP_ENTRY1_LENGTH 30:10
#define NVC36F_GP_ENTRY1_SYNC 31:31
#define NVC36F_GP_ENTRY1_SYNC_PROCEED 0x00000000
#define NVC36F_GP_ENTRY1_SYNC_WAIT 0x00000001
#define NVC36F_GP_ENTRY1_OPCODE 7:0
#define NVC36F_GP_ENTRY1_OPCODE_NOP 0x00000000
#define NVC36F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
#define NVC36F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
#define NVC36F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
/* dma method formats */
#define NVC36F_DMA_METHOD_ADDRESS_OLD 12:2
#define NVC36F_DMA_METHOD_ADDRESS 11:0
#define NVC36F_DMA_SUBDEVICE_MASK 15:4
#define NVC36F_DMA_METHOD_SUBCHANNEL 15:13
#define NVC36F_DMA_TERT_OP 17:16
#define NVC36F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
#define NVC36F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
#define NVC36F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
#define NVC36F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
#define NVC36F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
#define NVC36F_DMA_METHOD_COUNT_OLD 28:18
#define NVC36F_DMA_METHOD_COUNT 28:16
#define NVC36F_DMA_IMMD_DATA 28:16
#define NVC36F_DMA_SEC_OP 31:29
#define NVC36F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
#define NVC36F_DMA_SEC_OP_INC_METHOD (0x00000001)
#define NVC36F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
#define NVC36F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
#define NVC36F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
#define NVC36F_DMA_SEC_OP_ONE_INC (0x00000005)
#define NVC36F_DMA_SEC_OP_RESERVED6 (0x00000006)
#define NVC36F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
/* dma incrementing method format */
#define NVC36F_DMA_INCR_ADDRESS 11:0
#define NVC36F_DMA_INCR_SUBCHANNEL 15:13
#define NVC36F_DMA_INCR_COUNT 28:16
#define NVC36F_DMA_INCR_OPCODE 31:29
#define NVC36F_DMA_INCR_OPCODE_VALUE (0x00000001)
#define NVC36F_DMA_INCR_DATA 31:0
/* dma non-incrementing method format */
#define NVC36F_DMA_NONINCR_ADDRESS 11:0
#define NVC36F_DMA_NONINCR_SUBCHANNEL 15:13
#define NVC36F_DMA_NONINCR_COUNT 28:16
#define NVC36F_DMA_NONINCR_OPCODE 31:29
#define NVC36F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
#define NVC36F_DMA_NONINCR_DATA 31:0
/* dma increment-once method format */
#define NVC36F_DMA_ONEINCR_ADDRESS 11:0
#define NVC36F_DMA_ONEINCR_SUBCHANNEL 15:13
#define NVC36F_DMA_ONEINCR_COUNT 28:16
#define NVC36F_DMA_ONEINCR_OPCODE 31:29
#define NVC36F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
#define NVC36F_DMA_ONEINCR_DATA 31:0
/* dma no-operation format */
#define NVC36F_DMA_NOP (0x00000000)
/* dma immediate-data format */
#define NVC36F_DMA_IMMD_ADDRESS 11:0
#define NVC36F_DMA_IMMD_SUBCHANNEL 15:13
#define NVC36F_DMA_IMMD_DATA 28:16
#define NVC36F_DMA_IMMD_OPCODE 31:29
#define NVC36F_DMA_IMMD_OPCODE_VALUE (0x00000004)
/* dma set sub-device mask format */
#define NVC36F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
#define NVC36F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
#define NVC36F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
/* dma store sub-device mask format */
#define NVC36F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
#define NVC36F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
#define NVC36F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
/* dma use sub-device mask format */
#define NVC36F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
#define NVC36F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
/* dma end-segment format */
#define NVC36F_DMA_ENDSEG_OPCODE 31:29
#define NVC36F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
/* dma legacy incrementing/non-incrementing formats */
#define NVC36F_DMA_ADDRESS 12:2
#define NVC36F_DMA_SUBCH 15:13
#define NVC36F_DMA_OPCODE3 17:16
#define NVC36F_DMA_OPCODE3_NONE (0x00000000)
#define NVC36F_DMA_COUNT 28:18
#define NVC36F_DMA_OPCODE 31:29
#define NVC36F_DMA_OPCODE_METHOD (0x00000000)
#define NVC36F_DMA_OPCODE_NONINC_METHOD (0x00000002)
#define NVC36F_DMA_DATA 31:0
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif /* _clc36f_h_ */

View File

@@ -0,0 +1,203 @@
/*******************************************************************************
Copyright (c) 2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "nvtypes.h"
#ifndef _clc3b5_h_
#define _clc3b5_h_
#ifdef __cplusplus
extern "C" {
#endif
#define VOLTA_DMA_COPY_A (0x0000C3B5)
#define NVC3B5_SET_SEMAPHORE_A (0x00000240)
#define NVC3B5_SET_SEMAPHORE_A_UPPER 16:0
#define NVC3B5_SET_SEMAPHORE_B (0x00000244)
#define NVC3B5_SET_SEMAPHORE_B_LOWER 31:0
#define NVC3B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
#define NVC3B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD 31:0
#define NVC3B5_SET_RENDER_ENABLE_A (0x00000254)
#define NVC3B5_SET_RENDER_ENABLE_A_UPPER 7:0
#define NVC3B5_SET_RENDER_ENABLE_B (0x00000258)
#define NVC3B5_SET_RENDER_ENABLE_B_LOWER 31:0
#define NVC3B5_SET_RENDER_ENABLE_C (0x0000025C)
#define NVC3B5_SET_RENDER_ENABLE_C_MODE 2:0
#define NVC3B5_SET_RENDER_ENABLE_C_MODE_FALSE (0x00000000)
#define NVC3B5_SET_RENDER_ENABLE_C_MODE_TRUE (0x00000001)
#define NVC3B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL (0x00000002)
#define NVC3B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL (0x00000003)
#define NVC3B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL (0x00000004)
#define NVC3B5_SET_SRC_PHYS_MODE (0x00000260)
#define NVC3B5_SET_SRC_PHYS_MODE_TARGET 1:0
#define NVC3B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC3B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC3B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC3B5_SET_SRC_PHYS_MODE_BASIC_KIND 5:2
#define NVC3B5_SET_DST_PHYS_MODE (0x00000264)
#define NVC3B5_SET_DST_PHYS_MODE_TARGET 1:0
#define NVC3B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC3B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC3B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC3B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
#define NVC3B5_LAUNCH_DMA (0x00000300)
#define NVC3B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
#define NVC3B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
#define NVC3B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED (0x00000001)
#define NVC3B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED (0x00000002)
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE (0x00000002)
#define NVC3B5_LAUNCH_DMA_INTERRUPT_TYPE 6:5
#define NVC3B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE (0x00000000)
#define NVC3B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING (0x00000001)
#define NVC3B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING (0x00000002)
#define NVC3B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT 7:7
#define NVC3B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC3B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC3B5_LAUNCH_DMA_DST_MEMORY_LAYOUT 8:8
#define NVC3B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC3B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC3B5_LAUNCH_DMA_MULTI_LINE_ENABLE 9:9
#define NVC3B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE (0x00000000)
#define NVC3B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE (0x00000001)
#define NVC3B5_LAUNCH_DMA_REMAP_ENABLE 10:10
#define NVC3B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
#define NVC3B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
#define NVC3B5_LAUNCH_DMA_FORCE_RMWDISABLE 11:11
#define NVC3B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE (0x00000000)
#define NVC3B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE (0x00000001)
#define NVC3B5_LAUNCH_DMA_SRC_TYPE 12:12
#define NVC3B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
#define NVC3B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
#define NVC3B5_LAUNCH_DMA_DST_TYPE 13:13
#define NVC3B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
#define NVC3B5_LAUNCH_DMA_DST_TYPE_PHYSICAL (0x00000001)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION 17:14
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN (0x00000000)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX (0x00000001)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR (0x00000002)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND (0x00000003)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR (0x00000004)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD (0x00000005)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC (0x00000006)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC (0x00000007)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD (0x0000000A)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN 18:18
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED (0x00000000)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED (0x00000001)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE 19:19
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE (0x00000000)
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE (0x00000001)
#define NVC3B5_LAUNCH_DMA_SRC_BYPASS_L2 20:20
#define NVC3B5_LAUNCH_DMA_SRC_BYPASS_L2_USE_PTE_SETTING (0x00000000)
#define NVC3B5_LAUNCH_DMA_SRC_BYPASS_L2_FORCE_VOLATILE (0x00000001)
#define NVC3B5_LAUNCH_DMA_DST_BYPASS_L2 21:21
#define NVC3B5_LAUNCH_DMA_DST_BYPASS_L2_USE_PTE_SETTING (0x00000000)
#define NVC3B5_LAUNCH_DMA_DST_BYPASS_L2_FORCE_VOLATILE (0x00000001)
#define NVC3B5_LAUNCH_DMA_VPRMODE 23:22
#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS (0x00000002)
#define NVC3B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID (0x00000003)
#define NVC3B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
#define NVC3B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
#define NVC3B5_OFFSET_IN_UPPER (0x00000400)
#define NVC3B5_OFFSET_IN_UPPER_UPPER 16:0
#define NVC3B5_OFFSET_IN_LOWER (0x00000404)
#define NVC3B5_OFFSET_IN_LOWER_VALUE 31:0
#define NVC3B5_OFFSET_OUT_UPPER (0x00000408)
#define NVC3B5_OFFSET_OUT_UPPER_UPPER 16:0
#define NVC3B5_OFFSET_OUT_LOWER (0x0000040C)
#define NVC3B5_OFFSET_OUT_LOWER_VALUE 31:0
#define NVC3B5_PITCH_IN (0x00000410)
#define NVC3B5_PITCH_IN_VALUE 31:0
#define NVC3B5_PITCH_OUT (0x00000414)
#define NVC3B5_PITCH_OUT_VALUE 31:0
#define NVC3B5_LINE_LENGTH_IN (0x00000418)
#define NVC3B5_LINE_LENGTH_IN_VALUE 31:0
#define NVC3B5_LINE_COUNT (0x0000041C)
#define NVC3B5_LINE_COUNT_VALUE 31:0
#define NVC3B5_SET_REMAP_CONST_A (0x00000700)
#define NVC3B5_SET_REMAP_CONST_A_V 31:0
#define NVC3B5_SET_REMAP_CONST_B (0x00000704)
#define NVC3B5_SET_REMAP_CONST_B_V 31:0
#define NVC3B5_SET_REMAP_COMPONENTS (0x00000708)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_X 2:0
#define NVC3B5_SET_REMAP_COMPONENTS_DST_X_SRC_X (0x00000000)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y (0x00000001)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z (0x00000002)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_X_SRC_W (0x00000003)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_X_CONST_A (0x00000004)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_X_CONST_B (0x00000005)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE (0x00000006)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_Y 6:4
#define NVC3B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X (0x00000000)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y (0x00000001)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z (0x00000002)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W (0x00000003)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A (0x00000004)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B (0x00000005)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE (0x00000006)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_Z 10:8
#define NVC3B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X (0x00000000)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y (0x00000001)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z (0x00000002)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W (0x00000003)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A (0x00000004)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B (0x00000005)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE (0x00000006)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_W 14:12
#define NVC3B5_SET_REMAP_COMPONENTS_DST_W_SRC_X (0x00000000)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y (0x00000001)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z (0x00000002)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_W_SRC_W (0x00000003)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_W_CONST_A (0x00000004)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_W_CONST_B (0x00000005)
#define NVC3B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE (0x00000006)
#define NVC3B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE 17:16
#define NVC3B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE (0x00000000)
#define NVC3B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO (0x00000001)
#define NVC3B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE (0x00000002)
#define NVC3B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR (0x00000003)
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS 21:20
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE (0x00000000)
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO (0x00000001)
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE (0x00000002)
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR (0x00000003)
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS 25:24
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE (0x00000000)
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
#define NVC3B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif // _clc3b5_h

View File

@@ -0,0 +1,367 @@
/*******************************************************************************
Copyright (c) 2012-2015 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef _clc46f_h_
#define _clc46f_h_
#ifdef __cplusplus
extern "C" {
#endif
#include "nvtypes.h"
/* class TURING_CHANNEL_GPFIFO */
/*
* Documentation for TURING_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
* chapter "User Control Registers". It is documented as device NV_UDMA.
* The GPFIFO format itself is also documented in dev_pbdma.ref,
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
*
* Note there is no .mfs file for this class.
*/
#define TURING_CHANNEL_GPFIFO_A (0x0000C46F)
#define NVC46F_TYPEDEF TURING_CHANNELChannelGPFifoA
/* dma flow control data structure */
typedef volatile struct Nvc46fControl_struct {
NvU32 Ignored00[0x010]; /* 0000-003f*/
NvU32 Put; /* put offset, read/write 0040-0043*/
NvU32 Get; /* get offset, read only 0044-0047*/
NvU32 Reference; /* reference value, read only 0048-004b*/
NvU32 PutHi; /* high order put offset bits 004c-004f*/
NvU32 Ignored01[0x002]; /* 0050-0057*/
NvU32 TopLevelGet; /* top level get offset, read only 0058-005b*/
NvU32 TopLevelGetHi; /* high order top level get bits 005c-005f*/
NvU32 GetHi; /* high order get offset bits 0060-0063*/
NvU32 Ignored02[0x007]; /* 0064-007f*/
NvU32 Ignored03; /* used to be engine yield 0080-0083*/
NvU32 Ignored04[0x001]; /* 0084-0087*/
NvU32 GPGet; /* GP FIFO get offset, read only 0088-008b*/
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
NvU32 Ignored05[0x5c];
} Nvc46fControl, TuringAControlGPFifo;
/* fields and values */
#define NVC46F_NUMBER_OF_SUBCHANNELS (8)
#define NVC46F_SET_OBJECT (0x00000000)
#define NVC46F_SET_OBJECT_NVCLASS 15:0
#define NVC46F_SET_OBJECT_ENGINE 20:16
#define NVC46F_SET_OBJECT_ENGINE_SW 0x0000001f
#define NVC46F_ILLEGAL (0x00000004)
#define NVC46F_ILLEGAL_HANDLE 31:0
#define NVC46F_NOP (0x00000008)
#define NVC46F_NOP_HANDLE 31:0
#define NVC46F_SEMAPHOREA (0x00000010)
#define NVC46F_SEMAPHOREA_OFFSET_UPPER 7:0
#define NVC46F_SEMAPHOREB (0x00000014)
#define NVC46F_SEMAPHOREB_OFFSET_LOWER 31:2
#define NVC46F_SEMAPHOREC (0x00000018)
#define NVC46F_SEMAPHOREC_PAYLOAD 31:0
#define NVC46F_SEMAPHORED (0x0000001C)
#define NVC46F_SEMAPHORED_OPERATION 4:0
#define NVC46F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
#define NVC46F_SEMAPHORED_OPERATION_RELEASE 0x00000002
#define NVC46F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
#define NVC46F_SEMAPHORED_OPERATION_ACQ_AND 0x00000008
#define NVC46F_SEMAPHORED_OPERATION_REDUCTION 0x00000010
#define NVC46F_SEMAPHORED_ACQUIRE_SWITCH 12:12
#define NVC46F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
#define NVC46F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
#define NVC46F_SEMAPHORED_RELEASE_WFI 20:20
#define NVC46F_SEMAPHORED_RELEASE_WFI_EN 0x00000000
#define NVC46F_SEMAPHORED_RELEASE_WFI_DIS 0x00000001
#define NVC46F_SEMAPHORED_RELEASE_SIZE 24:24
#define NVC46F_SEMAPHORED_RELEASE_SIZE_16BYTE 0x00000000
#define NVC46F_SEMAPHORED_RELEASE_SIZE_4BYTE 0x00000001
#define NVC46F_SEMAPHORED_REDUCTION 30:27
#define NVC46F_SEMAPHORED_REDUCTION_MIN 0x00000000
#define NVC46F_SEMAPHORED_REDUCTION_MAX 0x00000001
#define NVC46F_SEMAPHORED_REDUCTION_XOR 0x00000002
#define NVC46F_SEMAPHORED_REDUCTION_AND 0x00000003
#define NVC46F_SEMAPHORED_REDUCTION_OR 0x00000004
#define NVC46F_SEMAPHORED_REDUCTION_ADD 0x00000005
#define NVC46F_SEMAPHORED_REDUCTION_INC 0x00000006
#define NVC46F_SEMAPHORED_REDUCTION_DEC 0x00000007
#define NVC46F_SEMAPHORED_FORMAT 31:31
#define NVC46F_SEMAPHORED_FORMAT_SIGNED 0x00000000
#define NVC46F_SEMAPHORED_FORMAT_UNSIGNED 0x00000001
#define NVC46F_NON_STALL_INTERRUPT (0x00000020)
#define NVC46F_NON_STALL_INTERRUPT_HANDLE 31:0
#define NVC46F_FB_FLUSH (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
#define NVC46F_FB_FLUSH_HANDLE 31:0
// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
// specifying the page address for a targeted TLB invalidate and the uTLB for
// a targeted REPLAY_CANCEL for UVM.
// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
// rearranged fields.
#define NVC46F_MEM_OP_A (0x00000028)
#define NVC46F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID 5:0 // only relevant for REPLAY_CANCEL_TARGETED
#define NVC46F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE 5:0 // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type
#define NVC46F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID 10:6 // only relevant for REPLAY_CANCEL_TARGETED
#define NVC46F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID 6:0 // only relevant for REPLAY_CANCEL_VA_GLOBAL
#define NVC46F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR 11:11
#define NVC46F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN 0x00000001
#define NVC46F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS 0x00000000
#define NVC46F_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO 31:12
#define NVC46F_MEM_OP_B (0x0000002c)
#define NVC46F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI 31:0
#define NVC46F_MEM_OP_C (0x00000030)
#define NVC46F_MEM_OP_C_MEMBAR_TYPE 2:0
#define NVC46F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR 0x00000000
#define NVC46F_MEM_OP_C_MEMBAR_TYPE_MEMBAR 0x00000001
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PDB 0:0
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE 0x00000000
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL 0x00000001 // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_GPC 1:1
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE 0x00000000
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE 0x00000001
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_REPLAY 4:2 // only relevant if GPC ENABLE
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE 0x00000000
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START 0x00000001
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL 0x00000002
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED 0x00000003
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL 0x00000004
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL 0x00000005
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE 6:5 // only relevant if GPC ENABLE
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE 0x00000000
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY 0x00000001
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE 0x00000002
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE 9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ 0
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE 1
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG 2
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD 3
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK 4
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL 5
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC 6
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL 7
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL 9:7 // Invalidate affects this level and all below
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL 0x00000000 // Invalidate tlb caches at all levels of the page table
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY 0x00000001
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0 0x00000002
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1 0x00000003
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2 0x00000004
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 0x00000005
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 0x00000006
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5 0x00000007
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE 11:10 // only relevant if PDB_ONE
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM 0x00000000
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT 0x00000002
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
#define NVC46F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO 31:12 // only relevant if PDB_ONE
#define NVC46F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG 19:0
// MEM_OP_D MUST be preceded by MEM_OPs A-C.
#define NVC46F_MEM_OP_D (0x00000034)
#define NVC46F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI 26:0 // only relevant if PDB_ONE
#define NVC46F_MEM_OP_D_OPERATION 31:27
#define NVC46F_MEM_OP_D_OPERATION_MEMBAR 0x00000005
#define NVC46F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE 0x00000009
#define NVC46F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED 0x0000000a
#define NVC46F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d
#define NVC46F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e
// CLEAN_LINES is an alias for Tegra/GPU IP usage
#define NVC46F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e
#define NVC46F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
#define NVC46F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
#define NVC46F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS 0x00000015
#define NVC46F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR 0x00000016
#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE 1:0
#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC 0x00000000
#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC 0x00000001
#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL 0x00000002
#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED 0x00000003
#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE 2:2
#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC 0x00000000
#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC 0x00000001
#define NVC46F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK 6:3
#define NVC46F_SET_REFERENCE (0x00000050)
#define NVC46F_SET_REFERENCE_COUNT 31:0
#define NVC46F_SEM_ADDR_LO (0x0000005c)
#define NVC46F_SEM_ADDR_LO_OFFSET 31:2
#define NVC46F_SEM_ADDR_HI (0x00000060)
#define NVC46F_SEM_ADDR_HI_OFFSET 7:0
#define NVC46F_SEM_PAYLOAD_LO (0x00000064)
#define NVC46F_SEM_PAYLOAD_LO_PAYLOAD 31:0
#define NVC46F_SEM_PAYLOAD_HI (0x00000068)
#define NVC46F_SEM_PAYLOAD_HI_PAYLOAD 31:0
#define NVC46F_SEM_EXECUTE (0x0000006c)
#define NVC46F_SEM_EXECUTE_OPERATION 2:0
#define NVC46F_SEM_EXECUTE_OPERATION_ACQUIRE 0x00000000
#define NVC46F_SEM_EXECUTE_OPERATION_RELEASE 0x00000001
#define NVC46F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ 0x00000002
#define NVC46F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ 0x00000003
#define NVC46F_SEM_EXECUTE_OPERATION_ACQ_AND 0x00000004
#define NVC46F_SEM_EXECUTE_OPERATION_ACQ_NOR 0x00000005
#define NVC46F_SEM_EXECUTE_OPERATION_REDUCTION 0x00000006
#define NVC46F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG 12:12
#define NVC46F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS 0x00000000
#define NVC46F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN 0x00000001
#define NVC46F_SEM_EXECUTE_RELEASE_WFI 20:20
#define NVC46F_SEM_EXECUTE_RELEASE_WFI_DIS 0x00000000
#define NVC46F_SEM_EXECUTE_RELEASE_WFI_EN 0x00000001
#define NVC46F_SEM_EXECUTE_PAYLOAD_SIZE 24:24
#define NVC46F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT 0x00000000
#define NVC46F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT 0x00000001
#define NVC46F_SEM_EXECUTE_RELEASE_TIMESTAMP 25:25
#define NVC46F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS 0x00000000
#define NVC46F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN 0x00000001
#define NVC46F_SEM_EXECUTE_REDUCTION 30:27
#define NVC46F_SEM_EXECUTE_REDUCTION_IMIN 0x00000000
#define NVC46F_SEM_EXECUTE_REDUCTION_IMAX 0x00000001
#define NVC46F_SEM_EXECUTE_REDUCTION_IXOR 0x00000002
#define NVC46F_SEM_EXECUTE_REDUCTION_IAND 0x00000003
#define NVC46F_SEM_EXECUTE_REDUCTION_IOR 0x00000004
#define NVC46F_SEM_EXECUTE_REDUCTION_IADD 0x00000005
#define NVC46F_SEM_EXECUTE_REDUCTION_INC 0x00000006
#define NVC46F_SEM_EXECUTE_REDUCTION_DEC 0x00000007
#define NVC46F_SEM_EXECUTE_REDUCTION_FORMAT 31:31
#define NVC46F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED 0x00000000
#define NVC46F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED 0x00000001
#define NVC46F_WFI (0x00000078)
#define NVC46F_WFI_SCOPE 0:0
#define NVC46F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
#define NVC46F_WFI_SCOPE_CURRENT_VEID 0x00000000
#define NVC46F_WFI_SCOPE_ALL 0x00000001
#define NVC46F_CRC_CHECK (0x0000007c)
#define NVC46F_CRC_CHECK_VALUE 31:0
#define NVC46F_YIELD (0x00000080)
#define NVC46F_YIELD_OP 1:0
#define NVC46F_YIELD_OP_NOP 0x00000000
#define NVC46F_YIELD_OP_RUNLIST_TIMESLICE 0x00000002
#define NVC46F_YIELD_OP_TSG 0x00000003
#define NVC46F_CLEAR_FAULTED (0x00000084)
// Note: RM provides the HANDLE as an opaque value; the internal detail fields
// are intentionally not exposed to the driver through these defines.
#define NVC46F_CLEAR_FAULTED_HANDLE 30:0
#define NVC46F_CLEAR_FAULTED_TYPE 31:31
#define NVC46F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
#define NVC46F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
#define NVC46F_QUADRO_VERIFY (0x000000a0)
/* GPFIFO entry format */
#define NVC46F_GP_ENTRY__SIZE 8
#define NVC46F_GP_ENTRY0_FETCH 0:0
#define NVC46F_GP_ENTRY0_FETCH_UNCONDITIONAL 0x00000000
#define NVC46F_GP_ENTRY0_FETCH_CONDITIONAL 0x00000001
#define NVC46F_GP_ENTRY0_GET 31:2
#define NVC46F_GP_ENTRY0_OPERAND 31:0
#define NVC46F_GP_ENTRY1_GET_HI 7:0
#define NVC46F_GP_ENTRY1_LEVEL 9:9
#define NVC46F_GP_ENTRY1_LEVEL_MAIN 0x00000000
#define NVC46F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
#define NVC46F_GP_ENTRY1_LENGTH 30:10
#define NVC46F_GP_ENTRY1_SYNC 31:31
#define NVC46F_GP_ENTRY1_SYNC_PROCEED 0x00000000
#define NVC46F_GP_ENTRY1_SYNC_WAIT 0x00000001
#define NVC46F_GP_ENTRY1_OPCODE 7:0
#define NVC46F_GP_ENTRY1_OPCODE_NOP 0x00000000
#define NVC46F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
#define NVC46F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
#define NVC46F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
/* dma method formats */
#define NVC46F_DMA_METHOD_ADDRESS_OLD 12:2
#define NVC46F_DMA_METHOD_ADDRESS 11:0
#define NVC46F_DMA_SUBDEVICE_MASK 15:4
#define NVC46F_DMA_METHOD_SUBCHANNEL 15:13
#define NVC46F_DMA_TERT_OP 17:16
#define NVC46F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
#define NVC46F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
#define NVC46F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
#define NVC46F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
#define NVC46F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
#define NVC46F_DMA_METHOD_COUNT_OLD 28:18
#define NVC46F_DMA_METHOD_COUNT 28:16
#define NVC46F_DMA_IMMD_DATA 28:16
#define NVC46F_DMA_SEC_OP 31:29
#define NVC46F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
#define NVC46F_DMA_SEC_OP_INC_METHOD (0x00000001)
#define NVC46F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
#define NVC46F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
#define NVC46F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
#define NVC46F_DMA_SEC_OP_ONE_INC (0x00000005)
#define NVC46F_DMA_SEC_OP_RESERVED6 (0x00000006)
#define NVC46F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
/* dma incrementing method format */
#define NVC46F_DMA_INCR_ADDRESS 11:0
#define NVC46F_DMA_INCR_SUBCHANNEL 15:13
#define NVC46F_DMA_INCR_COUNT 28:16
#define NVC46F_DMA_INCR_OPCODE 31:29
#define NVC46F_DMA_INCR_OPCODE_VALUE (0x00000001)
#define NVC46F_DMA_INCR_DATA 31:0
/* dma non-incrementing method format */
#define NVC46F_DMA_NONINCR_ADDRESS 11:0
#define NVC46F_DMA_NONINCR_SUBCHANNEL 15:13
#define NVC46F_DMA_NONINCR_COUNT 28:16
#define NVC46F_DMA_NONINCR_OPCODE 31:29
#define NVC46F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
#define NVC46F_DMA_NONINCR_DATA 31:0
/* dma increment-once method format */
#define NVC46F_DMA_ONEINCR_ADDRESS 11:0
#define NVC46F_DMA_ONEINCR_SUBCHANNEL 15:13
#define NVC46F_DMA_ONEINCR_COUNT 28:16
#define NVC46F_DMA_ONEINCR_OPCODE 31:29
#define NVC46F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
#define NVC46F_DMA_ONEINCR_DATA 31:0
/* dma no-operation format */
#define NVC46F_DMA_NOP (0x00000000)
/* dma immediate-data format */
#define NVC46F_DMA_IMMD_ADDRESS 11:0
#define NVC46F_DMA_IMMD_SUBCHANNEL 15:13
#define NVC46F_DMA_IMMD_DATA 28:16
#define NVC46F_DMA_IMMD_OPCODE 31:29
#define NVC46F_DMA_IMMD_OPCODE_VALUE (0x00000004)
/* dma set sub-device mask format */
#define NVC46F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
#define NVC46F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
#define NVC46F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
/* dma store sub-device mask format */
#define NVC46F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
#define NVC46F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
#define NVC46F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
/* dma use sub-device mask format */
#define NVC46F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
#define NVC46F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
/* dma end-segment format */
#define NVC46F_DMA_ENDSEG_OPCODE 31:29
#define NVC46F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
/* dma legacy incrementing/non-incrementing formats */
#define NVC46F_DMA_ADDRESS 12:2
#define NVC46F_DMA_SUBCH 15:13
#define NVC46F_DMA_OPCODE3 17:16
#define NVC46F_DMA_OPCODE3_NONE (0x00000000)
#define NVC46F_DMA_COUNT 28:18
#define NVC46F_DMA_OPCODE 31:29
#define NVC46F_DMA_OPCODE_METHOD (0x00000000)
#define NVC46F_DMA_OPCODE_NONINC_METHOD (0x00000002)
#define NVC46F_DMA_DATA 31:0
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif /* _clc46f_h_ */

View File

@@ -0,0 +1,369 @@
/*******************************************************************************
Copyright (c) 2012-2015 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef _clc56f_h_
#define _clc56f_h_
#ifdef __cplusplus
extern "C" {
#endif
#include "nvtypes.h"
/* class AMPERE_CHANNEL_GPFIFO */
/*
* Documentation for AMPERE_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
* chapter "User Control Registers". It is documented as device NV_UDMA.
* The GPFIFO format itself is also documented in dev_pbdma.ref,
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
*
* Note there is no .mfs file for this class.
*/
#define AMPERE_CHANNEL_GPFIFO_A (0x0000C56F)
#define NVC56F_TYPEDEF AMPERE_CHANNELChannelGPFifoA
/* dma flow control data structure */
typedef volatile struct Nvc56fControl_struct {
NvU32 Ignored00[0x010]; /* 0000-003f*/
NvU32 Put; /* put offset, read/write 0040-0043*/
NvU32 Get; /* get offset, read only 0044-0047*/
NvU32 Reference; /* reference value, read only 0048-004b*/
NvU32 PutHi; /* high order put offset bits 004c-004f*/
NvU32 Ignored01[0x002]; /* 0050-0057*/
NvU32 TopLevelGet; /* top level get offset, read only 0058-005b*/
NvU32 TopLevelGetHi; /* high order top level get bits 005c-005f*/
NvU32 GetHi; /* high order get offset bits 0060-0063*/
NvU32 Ignored02[0x007]; /* 0064-007f*/
NvU32 Ignored03; /* used to be engine yield 0080-0083*/
NvU32 Ignored04[0x001]; /* 0084-0087*/
NvU32 GPGet; /* GP FIFO get offset, read only 0088-008b*/
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
NvU32 Ignored05[0x5c];
} Nvc56fControl, AmpereAControlGPFifo;
/* fields and values */
#define NVC56F_NUMBER_OF_SUBCHANNELS (8)
#define NVC56F_SET_OBJECT (0x00000000)
#define NVC56F_SET_OBJECT_NVCLASS 15:0
#define NVC56F_SET_OBJECT_ENGINE 20:16
#define NVC56F_SET_OBJECT_ENGINE_SW 0x0000001f
#define NVC56F_ILLEGAL (0x00000004)
#define NVC56F_ILLEGAL_HANDLE 31:0
#define NVC56F_NOP (0x00000008)
#define NVC56F_NOP_HANDLE 31:0
#define NVC56F_SEMAPHOREA (0x00000010)
#define NVC56F_SEMAPHOREA_OFFSET_UPPER 7:0
#define NVC56F_SEMAPHOREB (0x00000014)
#define NVC56F_SEMAPHOREB_OFFSET_LOWER 31:2
#define NVC56F_SEMAPHOREC (0x00000018)
#define NVC56F_SEMAPHOREC_PAYLOAD 31:0
#define NVC56F_SEMAPHORED (0x0000001C)
#define NVC56F_SEMAPHORED_OPERATION 4:0
#define NVC56F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
#define NVC56F_SEMAPHORED_OPERATION_RELEASE 0x00000002
#define NVC56F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
#define NVC56F_SEMAPHORED_OPERATION_ACQ_AND 0x00000008
#define NVC56F_SEMAPHORED_OPERATION_REDUCTION 0x00000010
#define NVC56F_SEMAPHORED_ACQUIRE_SWITCH 12:12
#define NVC56F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
#define NVC56F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
#define NVC56F_SEMAPHORED_RELEASE_WFI 20:20
#define NVC56F_SEMAPHORED_RELEASE_WFI_EN 0x00000000
#define NVC56F_SEMAPHORED_RELEASE_WFI_DIS 0x00000001
#define NVC56F_SEMAPHORED_RELEASE_SIZE 24:24
#define NVC56F_SEMAPHORED_RELEASE_SIZE_16BYTE 0x00000000
#define NVC56F_SEMAPHORED_RELEASE_SIZE_4BYTE 0x00000001
#define NVC56F_SEMAPHORED_REDUCTION 30:27
#define NVC56F_SEMAPHORED_REDUCTION_MIN 0x00000000
#define NVC56F_SEMAPHORED_REDUCTION_MAX 0x00000001
#define NVC56F_SEMAPHORED_REDUCTION_XOR 0x00000002
#define NVC56F_SEMAPHORED_REDUCTION_AND 0x00000003
#define NVC56F_SEMAPHORED_REDUCTION_OR 0x00000004
#define NVC56F_SEMAPHORED_REDUCTION_ADD 0x00000005
#define NVC56F_SEMAPHORED_REDUCTION_INC 0x00000006
#define NVC56F_SEMAPHORED_REDUCTION_DEC 0x00000007
#define NVC56F_SEMAPHORED_FORMAT 31:31
#define NVC56F_SEMAPHORED_FORMAT_SIGNED 0x00000000
#define NVC56F_SEMAPHORED_FORMAT_UNSIGNED 0x00000001
#define NVC56F_NON_STALL_INTERRUPT (0x00000020)
#define NVC56F_NON_STALL_INTERRUPT_HANDLE 31:0
#define NVC56F_FB_FLUSH (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
#define NVC56F_FB_FLUSH_HANDLE 31:0
// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
// specifying the page address for a targeted TLB invalidate and the uTLB for
// a targeted REPLAY_CANCEL for UVM.
// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
// rearranged fields.
#define NVC56F_MEM_OP_A (0x00000028)
#define NVC56F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID 5:0 // only relevant for REPLAY_CANCEL_TARGETED
#define NVC56F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE 5:0 // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type
#define NVC56F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID 10:6 // only relevant for REPLAY_CANCEL_TARGETED
#define NVC56F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE 7:6 // only relevant for invalidates with NVC56F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE for invalidating link TLB only, or non-link TLB only or all TLBs
#define NVC56F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_ALL_TLBS 0
#define NVC56F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_LINK_TLBS 1
#define NVC56F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_NON_LINK_TLBS 2
#define NVC56F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_RSVRVD 3
#define NVC56F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID 6:0 // only relevant for REPLAY_CANCEL_VA_GLOBAL
#define NVC56F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR 11:11
#define NVC56F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN 0x00000001
#define NVC56F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS 0x00000000
#define NVC56F_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO 31:12
#define NVC56F_MEM_OP_B (0x0000002c)
#define NVC56F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI 31:0
#define NVC56F_MEM_OP_C (0x00000030)
#define NVC56F_MEM_OP_C_MEMBAR_TYPE 2:0
#define NVC56F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR 0x00000000
#define NVC56F_MEM_OP_C_MEMBAR_TYPE_MEMBAR 0x00000001
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PDB 0:0
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE 0x00000000
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL 0x00000001 // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_GPC 1:1
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE 0x00000000
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE 0x00000001
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_REPLAY 4:2 // only relevant if GPC ENABLE
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE 0x00000000
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START 0x00000001
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL 0x00000002
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED 0x00000003
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL 0x00000004
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL 0x00000005
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE 6:5 // only relevant if GPC ENABLE
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE 0x00000000
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY 0x00000001
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE 0x00000002
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE 9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ 0
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE 1
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG 2
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD 3
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK 4
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL 5
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC 6
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL 7
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL 9:7 // Invalidate affects this level and all below
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL 0x00000000 // Invalidate tlb caches at all levels of the page table
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY 0x00000001
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0 0x00000002
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1 0x00000003
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2 0x00000004
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 0x00000005
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 0x00000006
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5 0x00000007
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE 11:10 // only relevant if PDB_ONE
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM 0x00000000
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT 0x00000002
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
#define NVC56F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO 31:12 // only relevant if PDB_ONE
#define NVC56F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG 19:0
// MEM_OP_D MUST be preceded by MEM_OPs A-C.
#define NVC56F_MEM_OP_D (0x00000034)
#define NVC56F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI 26:0 // only relevant if PDB_ONE
#define NVC56F_MEM_OP_D_OPERATION 31:27
#define NVC56F_MEM_OP_D_OPERATION_MEMBAR 0x00000005
#define NVC56F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE 0x00000009
#define NVC56F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED 0x0000000a
#define NVC56F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d
#define NVC56F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e
// CLEAN_LINES is an alias for Tegra/GPU IP usage
#define NVC56F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e
#define NVC56F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
#define NVC56F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
#define NVC56F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS 0x00000015
#define NVC56F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR 0x00000016
#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE 1:0
#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC 0x00000000
#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC 0x00000001
#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL 0x00000002
#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED 0x00000003
#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE 2:2
#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC 0x00000000
#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC 0x00000001
#define NVC56F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK 6:3
#define NVC56F_SET_REFERENCE (0x00000050)
#define NVC56F_SET_REFERENCE_COUNT 31:0
#define NVC56F_SEM_ADDR_LO (0x0000005c)
#define NVC56F_SEM_ADDR_LO_OFFSET 31:2
#define NVC56F_SEM_ADDR_HI (0x00000060)
#define NVC56F_SEM_ADDR_HI_OFFSET 7:0
#define NVC56F_SEM_PAYLOAD_LO (0x00000064)
#define NVC56F_SEM_PAYLOAD_LO_PAYLOAD 31:0
#define NVC56F_SEM_PAYLOAD_HI (0x00000068)
#define NVC56F_SEM_PAYLOAD_HI_PAYLOAD 31:0
#define NVC56F_SEM_EXECUTE (0x0000006c)
#define NVC56F_SEM_EXECUTE_OPERATION 2:0
#define NVC56F_SEM_EXECUTE_OPERATION_ACQUIRE 0x00000000
#define NVC56F_SEM_EXECUTE_OPERATION_RELEASE 0x00000001
#define NVC56F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ 0x00000002
#define NVC56F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ 0x00000003
#define NVC56F_SEM_EXECUTE_OPERATION_ACQ_AND 0x00000004
#define NVC56F_SEM_EXECUTE_OPERATION_ACQ_NOR 0x00000005
#define NVC56F_SEM_EXECUTE_OPERATION_REDUCTION 0x00000006
#define NVC56F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG 12:12
#define NVC56F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS 0x00000000
#define NVC56F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN 0x00000001
#define NVC56F_SEM_EXECUTE_RELEASE_WFI 20:20
#define NVC56F_SEM_EXECUTE_RELEASE_WFI_DIS 0x00000000
#define NVC56F_SEM_EXECUTE_RELEASE_WFI_EN 0x00000001
#define NVC56F_SEM_EXECUTE_PAYLOAD_SIZE 24:24
#define NVC56F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT 0x00000000
#define NVC56F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT 0x00000001
#define NVC56F_SEM_EXECUTE_RELEASE_TIMESTAMP 25:25
#define NVC56F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS 0x00000000
#define NVC56F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN 0x00000001
#define NVC56F_SEM_EXECUTE_REDUCTION 30:27
#define NVC56F_SEM_EXECUTE_REDUCTION_IMIN 0x00000000
#define NVC56F_SEM_EXECUTE_REDUCTION_IMAX 0x00000001
#define NVC56F_SEM_EXECUTE_REDUCTION_IXOR 0x00000002
#define NVC56F_SEM_EXECUTE_REDUCTION_IAND 0x00000003
#define NVC56F_SEM_EXECUTE_REDUCTION_IOR 0x00000004
#define NVC56F_SEM_EXECUTE_REDUCTION_IADD 0x00000005
#define NVC56F_SEM_EXECUTE_REDUCTION_INC 0x00000006
#define NVC56F_SEM_EXECUTE_REDUCTION_DEC 0x00000007
#define NVC56F_SEM_EXECUTE_REDUCTION_FORMAT 31:31
#define NVC56F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED 0x00000000
#define NVC56F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED 0x00000001
#define NVC56F_WFI (0x00000078)
#define NVC56F_WFI_SCOPE 0:0
#define NVC56F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
#define NVC56F_WFI_SCOPE_CURRENT_VEID 0x00000000
#define NVC56F_WFI_SCOPE_ALL 0x00000001
#define NVC56F_YIELD (0x00000080)
#define NVC56F_YIELD_OP 1:0
#define NVC56F_YIELD_OP_NOP 0x00000000
#define NVC56F_YIELD_OP_TSG 0x00000003
#define NVC56F_CLEAR_FAULTED (0x00000084)
// Note: RM provides the HANDLE as an opaque value; the internal detail fields
// are intentionally not exposed to the driver through these defines.
#define NVC56F_CLEAR_FAULTED_HANDLE 30:0
#define NVC56F_CLEAR_FAULTED_TYPE 31:31
#define NVC56F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
#define NVC56F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
#define NVC56F_QUADRO_VERIFY (0x000000a0)
/* GPFIFO entry format */
#define NVC56F_GP_ENTRY__SIZE 8
#define NVC56F_GP_ENTRY0_FETCH 0:0
#define NVC56F_GP_ENTRY0_FETCH_UNCONDITIONAL 0x00000000
#define NVC56F_GP_ENTRY0_FETCH_CONDITIONAL 0x00000001
#define NVC56F_GP_ENTRY0_GET 31:2
#define NVC56F_GP_ENTRY0_OPERAND 31:0
#define NVC56F_GP_ENTRY1_GET_HI 7:0
#define NVC56F_GP_ENTRY1_LEVEL 9:9
#define NVC56F_GP_ENTRY1_LEVEL_MAIN 0x00000000
#define NVC56F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
#define NVC56F_GP_ENTRY1_LENGTH 30:10
#define NVC56F_GP_ENTRY1_SYNC 31:31
#define NVC56F_GP_ENTRY1_SYNC_PROCEED 0x00000000
#define NVC56F_GP_ENTRY1_SYNC_WAIT 0x00000001
#define NVC56F_GP_ENTRY1_OPCODE 7:0
#define NVC56F_GP_ENTRY1_OPCODE_NOP 0x00000000
#define NVC56F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
#define NVC56F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
#define NVC56F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
/* dma method formats */
#define NVC56F_DMA_METHOD_ADDRESS_OLD 12:2
#define NVC56F_DMA_METHOD_ADDRESS 11:0
#define NVC56F_DMA_SUBDEVICE_MASK 15:4
#define NVC56F_DMA_METHOD_SUBCHANNEL 15:13
#define NVC56F_DMA_TERT_OP 17:16
#define NVC56F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
#define NVC56F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
#define NVC56F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
#define NVC56F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
#define NVC56F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
#define NVC56F_DMA_METHOD_COUNT_OLD 28:18
#define NVC56F_DMA_METHOD_COUNT 28:16
#define NVC56F_DMA_IMMD_DATA 28:16
#define NVC56F_DMA_SEC_OP 31:29
#define NVC56F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
#define NVC56F_DMA_SEC_OP_INC_METHOD (0x00000001)
#define NVC56F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
#define NVC56F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
#define NVC56F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
#define NVC56F_DMA_SEC_OP_ONE_INC (0x00000005)
#define NVC56F_DMA_SEC_OP_RESERVED6 (0x00000006)
#define NVC56F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
/* dma incrementing method format */
#define NVC56F_DMA_INCR_ADDRESS 11:0
#define NVC56F_DMA_INCR_SUBCHANNEL 15:13
#define NVC56F_DMA_INCR_COUNT 28:16
#define NVC56F_DMA_INCR_OPCODE 31:29
#define NVC56F_DMA_INCR_OPCODE_VALUE (0x00000001)
#define NVC56F_DMA_INCR_DATA 31:0
/* dma non-incrementing method format */
#define NVC56F_DMA_NONINCR_ADDRESS 11:0
#define NVC56F_DMA_NONINCR_SUBCHANNEL 15:13
#define NVC56F_DMA_NONINCR_COUNT 28:16
#define NVC56F_DMA_NONINCR_OPCODE 31:29
#define NVC56F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
#define NVC56F_DMA_NONINCR_DATA 31:0
/* dma increment-once method format */
#define NVC56F_DMA_ONEINCR_ADDRESS 11:0
#define NVC56F_DMA_ONEINCR_SUBCHANNEL 15:13
#define NVC56F_DMA_ONEINCR_COUNT 28:16
#define NVC56F_DMA_ONEINCR_OPCODE 31:29
#define NVC56F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
#define NVC56F_DMA_ONEINCR_DATA 31:0
/* dma no-operation format */
#define NVC56F_DMA_NOP (0x00000000)
/* dma immediate-data format */
#define NVC56F_DMA_IMMD_ADDRESS 11:0
#define NVC56F_DMA_IMMD_SUBCHANNEL 15:13
#define NVC56F_DMA_IMMD_DATA 28:16
#define NVC56F_DMA_IMMD_OPCODE 31:29
#define NVC56F_DMA_IMMD_OPCODE_VALUE (0x00000004)
/* dma set sub-device mask format */
#define NVC56F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
#define NVC56F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
#define NVC56F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
/* dma store sub-device mask format */
#define NVC56F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
#define NVC56F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
#define NVC56F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
/* dma use sub-device mask format */
#define NVC56F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
#define NVC56F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
/* dma end-segment format */
#define NVC56F_DMA_ENDSEG_OPCODE 31:29
#define NVC56F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
/* dma legacy incrementing/non-incrementing formats */
#define NVC56F_DMA_ADDRESS 12:2
#define NVC56F_DMA_SUBCH 15:13
#define NVC56F_DMA_OPCODE3 17:16
#define NVC56F_DMA_OPCODE3_NONE (0x00000000)
#define NVC56F_DMA_COUNT 28:18
#define NVC56F_DMA_OPCODE 31:29
#define NVC56F_DMA_OPCODE_METHOD (0x00000000)
#define NVC56F_DMA_OPCODE_NONINC_METHOD (0x00000002)
#define NVC56F_DMA_DATA 31:0
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif /* _clc56f_h_ */

View File

@@ -0,0 +1,352 @@
/*******************************************************************************
Copyright (c) 1993-2004 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "nvtypes.h"
#ifndef _clc5b5_h_
#define _clc5b5_h_
#ifdef __cplusplus
extern "C" {
#endif
#define TURING_DMA_COPY_A (0x0000C5B5)
typedef volatile struct _clc5b5_tag0 {
NvV32 Reserved00[0x40];
NvV32 Nop; // 0x00000100 - 0x00000103
NvV32 Reserved01[0xF];
NvV32 PmTrigger; // 0x00000140 - 0x00000143
NvV32 Reserved02[0x3F];
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
NvV32 Reserved03[0x2];
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
NvV32 Reserved04[0x6];
NvV32 SetGlobalCounterUpper; // 0x00000280 - 0x00000283
NvV32 SetGlobalCounterLower; // 0x00000284 - 0x00000287
NvV32 SetPageoutStartPAUpper; // 0x00000288 - 0x0000028B
NvV32 SetPageoutStartPALower; // 0x0000028C - 0x0000028F
NvV32 Reserved05[0x1C];
NvV32 LaunchDma; // 0x00000300 - 0x00000303
NvV32 Reserved06[0x3F];
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
NvV32 PitchIn; // 0x00000410 - 0x00000413
NvV32 PitchOut; // 0x00000414 - 0x00000417
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
NvV32 LineCount; // 0x0000041C - 0x0000041F
NvV32 Reserved07[0xB8];
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
NvV32 Reserved08[0x1];
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
NvV32 Reserved09[0x1];
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
NvV32 DstOriginY; // 0x00000750 - 0x00000753
NvV32 Reserved10[0x270];
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
NvV32 Reserved11[0x3BA];
} turing_dma_copy_aControlPio;
#define NVC5B5_NOP (0x00000100)
#define NVC5B5_NOP_PARAMETER 31:0
#define NVC5B5_PM_TRIGGER (0x00000140)
#define NVC5B5_PM_TRIGGER_V 31:0
#define NVC5B5_SET_SEMAPHORE_A (0x00000240)
#define NVC5B5_SET_SEMAPHORE_A_UPPER 16:0
#define NVC5B5_SET_SEMAPHORE_B (0x00000244)
#define NVC5B5_SET_SEMAPHORE_B_LOWER 31:0
#define NVC5B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
#define NVC5B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD 31:0
#define NVC5B5_SET_RENDER_ENABLE_A (0x00000254)
#define NVC5B5_SET_RENDER_ENABLE_A_UPPER 7:0
#define NVC5B5_SET_RENDER_ENABLE_B (0x00000258)
#define NVC5B5_SET_RENDER_ENABLE_B_LOWER 31:0
#define NVC5B5_SET_RENDER_ENABLE_C (0x0000025C)
#define NVC5B5_SET_RENDER_ENABLE_C_MODE 2:0
#define NVC5B5_SET_RENDER_ENABLE_C_MODE_FALSE (0x00000000)
#define NVC5B5_SET_RENDER_ENABLE_C_MODE_TRUE (0x00000001)
#define NVC5B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL (0x00000002)
#define NVC5B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL (0x00000003)
#define NVC5B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL (0x00000004)
#define NVC5B5_SET_SRC_PHYS_MODE (0x00000260)
#define NVC5B5_SET_SRC_PHYS_MODE_TARGET 1:0
#define NVC5B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC5B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC5B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC5B5_SET_SRC_PHYS_MODE_BASIC_KIND 5:2
#define NVC5B5_SET_DST_PHYS_MODE (0x00000264)
#define NVC5B5_SET_DST_PHYS_MODE_TARGET 1:0
#define NVC5B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC5B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC5B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC5B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
#define NVC5B5_SET_GLOBAL_COUNTER_UPPER (0x00000280)
#define NVC5B5_SET_GLOBAL_COUNTER_UPPER_V 31:0
#define NVC5B5_SET_GLOBAL_COUNTER_LOWER (0x00000284)
#define NVC5B5_SET_GLOBAL_COUNTER_LOWER_V 31:0
#define NVC5B5_SET_PAGEOUT_START_PAUPPER (0x00000288)
#define NVC5B5_SET_PAGEOUT_START_PAUPPER_V 4:0
#define NVC5B5_SET_PAGEOUT_START_PALOWER (0x0000028C)
#define NVC5B5_SET_PAGEOUT_START_PALOWER_V 31:0
#define NVC5B5_LAUNCH_DMA (0x00000300)
#define NVC5B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
#define NVC5B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
#define NVC5B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED (0x00000001)
#define NVC5B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED (0x00000002)
#define NVC5B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
#define NVC5B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
#define NVC5B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
#define NVC5B5_LAUNCH_DMA_FLUSH_TYPE 25:25
#define NVC5B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
#define NVC5B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE (0x00000002)
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE (0x00000003)
#define NVC5B5_LAUNCH_DMA_INTERRUPT_TYPE 6:5
#define NVC5B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE (0x00000000)
#define NVC5B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING (0x00000001)
#define NVC5B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING (0x00000002)
#define NVC5B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT 7:7
#define NVC5B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC5B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC5B5_LAUNCH_DMA_DST_MEMORY_LAYOUT 8:8
#define NVC5B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC5B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC5B5_LAUNCH_DMA_MULTI_LINE_ENABLE 9:9
#define NVC5B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE (0x00000000)
#define NVC5B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE (0x00000001)
#define NVC5B5_LAUNCH_DMA_REMAP_ENABLE 10:10
#define NVC5B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
#define NVC5B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
#define NVC5B5_LAUNCH_DMA_FORCE_RMWDISABLE 11:11
#define NVC5B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE (0x00000000)
#define NVC5B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE (0x00000001)
#define NVC5B5_LAUNCH_DMA_SRC_TYPE 12:12
#define NVC5B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
#define NVC5B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
#define NVC5B5_LAUNCH_DMA_DST_TYPE 13:13
#define NVC5B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
#define NVC5B5_LAUNCH_DMA_DST_TYPE_PHYSICAL (0x00000001)
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION 17:14
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN (0x00000000)
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX (0x00000001)
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR (0x00000002)
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND (0x00000003)
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR (0x00000004)
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD (0x00000005)
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC (0x00000006)
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC (0x00000007)
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD (0x0000000A)
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN 18:18
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED (0x00000000)
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED (0x00000001)
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE 19:19
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE (0x00000000)
#define NVC5B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE (0x00000001)
#define NVC5B5_LAUNCH_DMA_SRC_BYPASS_L2 20:20
#define NVC5B5_LAUNCH_DMA_SRC_BYPASS_L2_USE_PTE_SETTING (0x00000000)
#define NVC5B5_LAUNCH_DMA_SRC_BYPASS_L2_FORCE_VOLATILE (0x00000001)
#define NVC5B5_LAUNCH_DMA_DST_BYPASS_L2 21:21
#define NVC5B5_LAUNCH_DMA_DST_BYPASS_L2_USE_PTE_SETTING (0x00000000)
#define NVC5B5_LAUNCH_DMA_DST_BYPASS_L2_FORCE_VOLATILE (0x00000001)
#define NVC5B5_LAUNCH_DMA_VPRMODE 23:22
#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS (0x00000002)
#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID (0x00000003)
#define NVC5B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
#define NVC5B5_LAUNCH_DMA_DISABLE_PLC 26:26
#define NVC5B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
#define NVC5B5_LAUNCH_DMA_DISABLE_PLC_TRUE (0x00000001)
#define NVC5B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
#define NVC5B5_OFFSET_IN_UPPER (0x00000400)
#define NVC5B5_OFFSET_IN_UPPER_UPPER 16:0
#define NVC5B5_OFFSET_IN_LOWER (0x00000404)
#define NVC5B5_OFFSET_IN_LOWER_VALUE 31:0
#define NVC5B5_OFFSET_OUT_UPPER (0x00000408)
#define NVC5B5_OFFSET_OUT_UPPER_UPPER 16:0
#define NVC5B5_OFFSET_OUT_LOWER (0x0000040C)
#define NVC5B5_OFFSET_OUT_LOWER_VALUE 31:0
#define NVC5B5_PITCH_IN (0x00000410)
#define NVC5B5_PITCH_IN_VALUE 31:0
#define NVC5B5_PITCH_OUT (0x00000414)
#define NVC5B5_PITCH_OUT_VALUE 31:0
#define NVC5B5_LINE_LENGTH_IN (0x00000418)
#define NVC5B5_LINE_LENGTH_IN_VALUE 31:0
#define NVC5B5_LINE_COUNT (0x0000041C)
#define NVC5B5_LINE_COUNT_VALUE 31:0
#define NVC5B5_SET_REMAP_CONST_A (0x00000700)
#define NVC5B5_SET_REMAP_CONST_A_V 31:0
#define NVC5B5_SET_REMAP_CONST_B (0x00000704)
#define NVC5B5_SET_REMAP_CONST_B_V 31:0
#define NVC5B5_SET_REMAP_COMPONENTS (0x00000708)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_X 2:0
#define NVC5B5_SET_REMAP_COMPONENTS_DST_X_SRC_X (0x00000000)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y (0x00000001)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z (0x00000002)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_X_SRC_W (0x00000003)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_X_CONST_A (0x00000004)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_X_CONST_B (0x00000005)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE (0x00000006)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_Y 6:4
#define NVC5B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X (0x00000000)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y (0x00000001)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z (0x00000002)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W (0x00000003)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A (0x00000004)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B (0x00000005)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE (0x00000006)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_Z 10:8
#define NVC5B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X (0x00000000)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y (0x00000001)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z (0x00000002)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W (0x00000003)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A (0x00000004)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B (0x00000005)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE (0x00000006)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_W 14:12
#define NVC5B5_SET_REMAP_COMPONENTS_DST_W_SRC_X (0x00000000)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y (0x00000001)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z (0x00000002)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_W_SRC_W (0x00000003)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_W_CONST_A (0x00000004)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_W_CONST_B (0x00000005)
#define NVC5B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE (0x00000006)
#define NVC5B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE 17:16
#define NVC5B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE (0x00000000)
#define NVC5B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO (0x00000001)
#define NVC5B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE (0x00000002)
#define NVC5B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR (0x00000003)
#define NVC5B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS 21:20
#define NVC5B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE (0x00000000)
#define NVC5B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO (0x00000001)
#define NVC5B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE (0x00000002)
#define NVC5B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR (0x00000003)
#define NVC5B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS 25:24
#define NVC5B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE (0x00000000)
#define NVC5B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
#define NVC5B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
#define NVC5B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
#define NVC5B5_SET_DST_BLOCK_SIZE (0x0000070C)
#define NVC5B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
#define NVC5B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC5B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
#define NVC5B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC5B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC5B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC5B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC5B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC5B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC5B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
#define NVC5B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC5B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC5B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC5B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC5B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC5B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC5B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC5B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC5B5_SET_DST_WIDTH (0x00000710)
#define NVC5B5_SET_DST_WIDTH_V 31:0
#define NVC5B5_SET_DST_HEIGHT (0x00000714)
#define NVC5B5_SET_DST_HEIGHT_V 31:0
#define NVC5B5_SET_DST_DEPTH (0x00000718)
#define NVC5B5_SET_DST_DEPTH_V 31:0
#define NVC5B5_SET_DST_LAYER (0x0000071C)
#define NVC5B5_SET_DST_LAYER_V 31:0
#define NVC5B5_SET_DST_ORIGIN (0x00000720)
#define NVC5B5_SET_DST_ORIGIN_X 15:0
#define NVC5B5_SET_DST_ORIGIN_Y 31:16
#define NVC5B5_SET_SRC_BLOCK_SIZE (0x00000728)
#define NVC5B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
#define NVC5B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC5B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
#define NVC5B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC5B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC5B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC5B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC5B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC5B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC5B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
#define NVC5B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC5B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC5B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC5B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC5B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC5B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC5B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC5B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC5B5_SET_SRC_WIDTH (0x0000072C)
#define NVC5B5_SET_SRC_WIDTH_V 31:0
#define NVC5B5_SET_SRC_HEIGHT (0x00000730)
#define NVC5B5_SET_SRC_HEIGHT_V 31:0
#define NVC5B5_SET_SRC_DEPTH (0x00000734)
#define NVC5B5_SET_SRC_DEPTH_V 31:0
#define NVC5B5_SET_SRC_LAYER (0x00000738)
#define NVC5B5_SET_SRC_LAYER_V 31:0
#define NVC5B5_SET_SRC_ORIGIN (0x0000073C)
#define NVC5B5_SET_SRC_ORIGIN_X 15:0
#define NVC5B5_SET_SRC_ORIGIN_Y 31:16
#define NVC5B5_SRC_ORIGIN_X (0x00000744)
#define NVC5B5_SRC_ORIGIN_X_VALUE 31:0
#define NVC5B5_SRC_ORIGIN_Y (0x00000748)
#define NVC5B5_SRC_ORIGIN_Y_VALUE 31:0
#define NVC5B5_DST_ORIGIN_X (0x0000074C)
#define NVC5B5_DST_ORIGIN_X_VALUE 31:0
#define NVC5B5_DST_ORIGIN_Y (0x00000750)
#define NVC5B5_DST_ORIGIN_Y_VALUE 31:0
#define NVC5B5_PM_TRIGGER_END (0x00001114)
#define NVC5B5_PM_TRIGGER_END_V 31:0
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif // _clc5b5_h

View File

@@ -0,0 +1,352 @@
/*******************************************************************************
Copyright (c) 1993-2004 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "nvtypes.h"
#ifndef _clc6b5_h_
#define _clc6b5_h_
#ifdef __cplusplus
extern "C" {
#endif
#define AMPERE_DMA_COPY_A (0x0000C6B5)
typedef volatile struct _clc6b5_tag0 {
NvV32 Reserved00[0x40];
NvV32 Nop; // 0x00000100 - 0x00000103
NvV32 Reserved01[0xF];
NvV32 PmTrigger; // 0x00000140 - 0x00000143
NvV32 Reserved02[0x3F];
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
NvV32 Reserved03[0x2];
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
NvV32 Reserved04[0x6];
NvV32 SetGlobalCounterUpper; // 0x00000280 - 0x00000283
NvV32 SetGlobalCounterLower; // 0x00000284 - 0x00000287
NvV32 SetPageoutStartPAUpper; // 0x00000288 - 0x0000028B
NvV32 SetPageoutStartPALower; // 0x0000028C - 0x0000028F
NvV32 Reserved05[0x1C];
NvV32 LaunchDma; // 0x00000300 - 0x00000303
NvV32 Reserved06[0x3F];
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
NvV32 PitchIn; // 0x00000410 - 0x00000413
NvV32 PitchOut; // 0x00000414 - 0x00000417
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
NvV32 LineCount; // 0x0000041C - 0x0000041F
NvV32 Reserved07[0xB8];
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
NvV32 Reserved08[0x1];
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
NvV32 Reserved09[0x1];
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
NvV32 DstOriginY; // 0x00000750 - 0x00000753
NvV32 Reserved10[0x270];
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
NvV32 Reserved11[0x3BA];
} ampere_dma_copy_aControlPio;
#define NVC6B5_NOP (0x00000100)
#define NVC6B5_NOP_PARAMETER 31:0
#define NVC6B5_PM_TRIGGER (0x00000140)
#define NVC6B5_PM_TRIGGER_V 31:0
#define NVC6B5_SET_SEMAPHORE_A (0x00000240)
#define NVC6B5_SET_SEMAPHORE_A_UPPER 16:0
#define NVC6B5_SET_SEMAPHORE_B (0x00000244)
#define NVC6B5_SET_SEMAPHORE_B_LOWER 31:0
#define NVC6B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
#define NVC6B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD 31:0
#define NVC6B5_SET_RENDER_ENABLE_A (0x00000254)
#define NVC6B5_SET_RENDER_ENABLE_A_UPPER 7:0
#define NVC6B5_SET_RENDER_ENABLE_B (0x00000258)
#define NVC6B5_SET_RENDER_ENABLE_B_LOWER 31:0
#define NVC6B5_SET_RENDER_ENABLE_C (0x0000025C)
#define NVC6B5_SET_RENDER_ENABLE_C_MODE 2:0
#define NVC6B5_SET_RENDER_ENABLE_C_MODE_FALSE (0x00000000)
#define NVC6B5_SET_RENDER_ENABLE_C_MODE_TRUE (0x00000001)
#define NVC6B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL (0x00000002)
#define NVC6B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL (0x00000003)
#define NVC6B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL (0x00000004)
#define NVC6B5_SET_SRC_PHYS_MODE (0x00000260)
#define NVC6B5_SET_SRC_PHYS_MODE_TARGET 1:0
#define NVC6B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC6B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC6B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC6B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM (0x00000003)
#define NVC6B5_SET_SRC_PHYS_MODE_BASIC_KIND 5:2
#define NVC6B5_SET_SRC_PHYS_MODE_PEER_ID 8:6
#define NVC6B5_SET_SRC_PHYS_MODE_FLA 9:9
#define NVC6B5_SET_DST_PHYS_MODE (0x00000264)
#define NVC6B5_SET_DST_PHYS_MODE_TARGET 1:0
#define NVC6B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC6B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC6B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC6B5_SET_DST_PHYS_MODE_TARGET_PEERMEM (0x00000003)
#define NVC6B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
#define NVC6B5_SET_DST_PHYS_MODE_PEER_ID 8:6
#define NVC6B5_SET_DST_PHYS_MODE_FLA 9:9
#define NVC6B5_SET_GLOBAL_COUNTER_UPPER (0x00000280)
#define NVC6B5_SET_GLOBAL_COUNTER_UPPER_V 31:0
#define NVC6B5_SET_GLOBAL_COUNTER_LOWER (0x00000284)
#define NVC6B5_SET_GLOBAL_COUNTER_LOWER_V 31:0
#define NVC6B5_SET_PAGEOUT_START_PAUPPER (0x00000288)
#define NVC6B5_SET_PAGEOUT_START_PAUPPER_V 4:0
#define NVC6B5_SET_PAGEOUT_START_PALOWER (0x0000028C)
#define NVC6B5_SET_PAGEOUT_START_PALOWER_V 31:0
#define NVC6B5_LAUNCH_DMA (0x00000300)
#define NVC6B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
#define NVC6B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
#define NVC6B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED (0x00000001)
#define NVC6B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED (0x00000002)
#define NVC6B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
#define NVC6B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
#define NVC6B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
#define NVC6B5_LAUNCH_DMA_FLUSH_TYPE 25:25
#define NVC6B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
#define NVC6B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE (0x00000002)
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE (0x00000003)
#define NVC6B5_LAUNCH_DMA_INTERRUPT_TYPE 6:5
#define NVC6B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE (0x00000000)
#define NVC6B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING (0x00000001)
#define NVC6B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING (0x00000002)
#define NVC6B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT 7:7
#define NVC6B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC6B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC6B5_LAUNCH_DMA_DST_MEMORY_LAYOUT 8:8
#define NVC6B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC6B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC6B5_LAUNCH_DMA_MULTI_LINE_ENABLE 9:9
#define NVC6B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE (0x00000000)
#define NVC6B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE (0x00000001)
#define NVC6B5_LAUNCH_DMA_REMAP_ENABLE 10:10
#define NVC6B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
#define NVC6B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
#define NVC6B5_LAUNCH_DMA_FORCE_RMWDISABLE 11:11
#define NVC6B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE (0x00000000)
#define NVC6B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE (0x00000001)
#define NVC6B5_LAUNCH_DMA_SRC_TYPE 12:12
#define NVC6B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
#define NVC6B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
#define NVC6B5_LAUNCH_DMA_DST_TYPE 13:13
#define NVC6B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
#define NVC6B5_LAUNCH_DMA_DST_TYPE_PHYSICAL (0x00000001)
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION 17:14
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN (0x00000000)
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX (0x00000001)
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR (0x00000002)
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND (0x00000003)
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR (0x00000004)
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD (0x00000005)
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC (0x00000006)
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC (0x00000007)
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD (0x0000000A)
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN 18:18
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED (0x00000000)
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED (0x00000001)
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE 19:19
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE (0x00000000)
#define NVC6B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE (0x00000001)
#define NVC6B5_LAUNCH_DMA_VPRMODE 23:22
#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS (0x00000002)
#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID (0x00000003)
#define NVC6B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
#define NVC6B5_LAUNCH_DMA_DISABLE_PLC 26:26
#define NVC6B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
#define NVC6B5_LAUNCH_DMA_DISABLE_PLC_TRUE (0x00000001)
#define NVC6B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
#define NVC6B5_OFFSET_IN_UPPER (0x00000400)
#define NVC6B5_OFFSET_IN_UPPER_UPPER 16:0
#define NVC6B5_OFFSET_IN_LOWER (0x00000404)
#define NVC6B5_OFFSET_IN_LOWER_VALUE 31:0
#define NVC6B5_OFFSET_OUT_UPPER (0x00000408)
#define NVC6B5_OFFSET_OUT_UPPER_UPPER 16:0
#define NVC6B5_OFFSET_OUT_LOWER (0x0000040C)
#define NVC6B5_OFFSET_OUT_LOWER_VALUE 31:0
#define NVC6B5_PITCH_IN (0x00000410)
#define NVC6B5_PITCH_IN_VALUE 31:0
#define NVC6B5_PITCH_OUT (0x00000414)
#define NVC6B5_PITCH_OUT_VALUE 31:0
#define NVC6B5_LINE_LENGTH_IN (0x00000418)
#define NVC6B5_LINE_LENGTH_IN_VALUE 31:0
#define NVC6B5_LINE_COUNT (0x0000041C)
#define NVC6B5_LINE_COUNT_VALUE 31:0
#define NVC6B5_SET_REMAP_CONST_A (0x00000700)
#define NVC6B5_SET_REMAP_CONST_A_V 31:0
#define NVC6B5_SET_REMAP_CONST_B (0x00000704)
#define NVC6B5_SET_REMAP_CONST_B_V 31:0
#define NVC6B5_SET_REMAP_COMPONENTS (0x00000708)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_X 2:0
#define NVC6B5_SET_REMAP_COMPONENTS_DST_X_SRC_X (0x00000000)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y (0x00000001)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z (0x00000002)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_X_SRC_W (0x00000003)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_X_CONST_A (0x00000004)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_X_CONST_B (0x00000005)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE (0x00000006)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_Y 6:4
#define NVC6B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X (0x00000000)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y (0x00000001)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z (0x00000002)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W (0x00000003)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A (0x00000004)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B (0x00000005)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE (0x00000006)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_Z 10:8
#define NVC6B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X (0x00000000)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y (0x00000001)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z (0x00000002)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W (0x00000003)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A (0x00000004)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B (0x00000005)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE (0x00000006)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_W 14:12
#define NVC6B5_SET_REMAP_COMPONENTS_DST_W_SRC_X (0x00000000)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y (0x00000001)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z (0x00000002)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_W_SRC_W (0x00000003)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_W_CONST_A (0x00000004)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_W_CONST_B (0x00000005)
#define NVC6B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE (0x00000006)
#define NVC6B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE 17:16
#define NVC6B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE (0x00000000)
#define NVC6B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO (0x00000001)
#define NVC6B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE (0x00000002)
#define NVC6B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR (0x00000003)
#define NVC6B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS 21:20
#define NVC6B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE (0x00000000)
#define NVC6B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO (0x00000001)
#define NVC6B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE (0x00000002)
#define NVC6B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR (0x00000003)
#define NVC6B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS 25:24
#define NVC6B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE (0x00000000)
#define NVC6B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
#define NVC6B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
#define NVC6B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
#define NVC6B5_SET_DST_BLOCK_SIZE (0x0000070C)
#define NVC6B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
#define NVC6B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC6B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
#define NVC6B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC6B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC6B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC6B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC6B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC6B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC6B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
#define NVC6B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC6B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC6B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC6B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC6B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC6B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC6B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC6B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC6B5_SET_DST_WIDTH (0x00000710)
#define NVC6B5_SET_DST_WIDTH_V 31:0
#define NVC6B5_SET_DST_HEIGHT (0x00000714)
#define NVC6B5_SET_DST_HEIGHT_V 31:0
#define NVC6B5_SET_DST_DEPTH (0x00000718)
#define NVC6B5_SET_DST_DEPTH_V 31:0
#define NVC6B5_SET_DST_LAYER (0x0000071C)
#define NVC6B5_SET_DST_LAYER_V 31:0
#define NVC6B5_SET_DST_ORIGIN (0x00000720)
#define NVC6B5_SET_DST_ORIGIN_X 15:0
#define NVC6B5_SET_DST_ORIGIN_Y 31:16
#define NVC6B5_SET_SRC_BLOCK_SIZE (0x00000728)
#define NVC6B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
#define NVC6B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC6B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
#define NVC6B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC6B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC6B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC6B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC6B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC6B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC6B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
#define NVC6B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC6B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC6B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC6B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC6B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC6B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC6B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC6B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC6B5_SET_SRC_WIDTH (0x0000072C)
#define NVC6B5_SET_SRC_WIDTH_V 31:0
#define NVC6B5_SET_SRC_HEIGHT (0x00000730)
#define NVC6B5_SET_SRC_HEIGHT_V 31:0
#define NVC6B5_SET_SRC_DEPTH (0x00000734)
#define NVC6B5_SET_SRC_DEPTH_V 31:0
#define NVC6B5_SET_SRC_LAYER (0x00000738)
#define NVC6B5_SET_SRC_LAYER_V 31:0
#define NVC6B5_SET_SRC_ORIGIN (0x0000073C)
#define NVC6B5_SET_SRC_ORIGIN_X 15:0
#define NVC6B5_SET_SRC_ORIGIN_Y 31:16
#define NVC6B5_SRC_ORIGIN_X (0x00000744)
#define NVC6B5_SRC_ORIGIN_X_VALUE 31:0
#define NVC6B5_SRC_ORIGIN_Y (0x00000748)
#define NVC6B5_SRC_ORIGIN_Y_VALUE 31:0
#define NVC6B5_DST_ORIGIN_X (0x0000074C)
#define NVC6B5_DST_ORIGIN_X_VALUE 31:0
#define NVC6B5_DST_ORIGIN_Y (0x00000750)
#define NVC6B5_DST_ORIGIN_Y_VALUE 31:0
#define NVC6B5_PM_TRIGGER_END (0x00001114)
#define NVC6B5_PM_TRIGGER_END_V 31:0
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif // _clc6b5_h

View File

@@ -0,0 +1,379 @@
/*******************************************************************************
Copyright (c) 1993-2004 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "nvtypes.h"
#ifndef _clc7b5_h_
#define _clc7b5_h_
#ifdef __cplusplus
extern "C" {
#endif
#define AMPERE_DMA_COPY_B (0x0000C7B5)
typedef volatile struct _clc7b5_tag0 {
NvV32 Reserved00[0x40];
NvV32 Nop; // 0x00000100 - 0x00000103
NvV32 Reserved01[0xF];
NvV32 PmTrigger; // 0x00000140 - 0x00000143
NvV32 Reserved02[0x36];
NvV32 SetMonitoredFenceType; // 0x0000021C - 0x0000021F
NvV32 SetMonitoredFenceSignalAddrBaseUpper; // 0x00000220 - 0x00000223
NvV32 SetMonitoredFenceSignalAddrBaseLower; // 0x00000224 - 0x00000227
NvV32 Reserved03[0x6];
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
NvV32 SetSemaphorePayloadUpper; // 0x0000024C - 0x0000024F
NvV32 Reserved04[0x1];
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
NvV32 Reserved05[0x6];
NvV32 SetGlobalCounterUpper; // 0x00000280 - 0x00000283
NvV32 SetGlobalCounterLower; // 0x00000284 - 0x00000287
NvV32 SetPageoutStartPAUpper; // 0x00000288 - 0x0000028B
NvV32 SetPageoutStartPALower; // 0x0000028C - 0x0000028F
NvV32 Reserved06[0x1C];
NvV32 LaunchDma; // 0x00000300 - 0x00000303
NvV32 Reserved07[0x3F];
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
NvV32 PitchIn; // 0x00000410 - 0x00000413
NvV32 PitchOut; // 0x00000414 - 0x00000417
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
NvV32 LineCount; // 0x0000041C - 0x0000041F
NvV32 Reserved08[0xB8];
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
NvV32 Reserved09[0x1];
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
NvV32 Reserved10[0x1];
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
NvV32 DstOriginY; // 0x00000750 - 0x00000753
NvV32 Reserved11[0x270];
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
NvV32 Reserved12[0x3BA];
} ampere_dma_copy_bControlPio;
#define NVC7B5_NOP (0x00000100)
#define NVC7B5_NOP_PARAMETER 31:0
#define NVC7B5_PM_TRIGGER (0x00000140)
#define NVC7B5_PM_TRIGGER_V 31:0
#define NVC7B5_SET_MONITORED_FENCE_TYPE (0x0000021C)
#define NVC7B5_SET_MONITORED_FENCE_TYPE_TYPE 0:0
#define NVC7B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE (0x00000000)
#define NVC7B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE_EXT (0x00000001)
#define NVC7B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER (0x00000220)
#define NVC7B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER_UPPER 16:0
#define NVC7B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER (0x00000224)
#define NVC7B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER_LOWER 31:0
#define NVC7B5_SET_SEMAPHORE_A (0x00000240)
#define NVC7B5_SET_SEMAPHORE_A_UPPER 16:0
#define NVC7B5_SET_SEMAPHORE_B (0x00000244)
#define NVC7B5_SET_SEMAPHORE_B_LOWER 31:0
#define NVC7B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
#define NVC7B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD 31:0
#define NVC7B5_SET_SEMAPHORE_PAYLOAD_UPPER (0x0000024C)
#define NVC7B5_SET_SEMAPHORE_PAYLOAD_UPPER_PAYLOAD 31:0
#define NVC7B5_SET_RENDER_ENABLE_A (0x00000254)
#define NVC7B5_SET_RENDER_ENABLE_A_UPPER 7:0
#define NVC7B5_SET_RENDER_ENABLE_B (0x00000258)
#define NVC7B5_SET_RENDER_ENABLE_B_LOWER 31:0
#define NVC7B5_SET_RENDER_ENABLE_C (0x0000025C)
#define NVC7B5_SET_RENDER_ENABLE_C_MODE 2:0
#define NVC7B5_SET_RENDER_ENABLE_C_MODE_FALSE (0x00000000)
#define NVC7B5_SET_RENDER_ENABLE_C_MODE_TRUE (0x00000001)
#define NVC7B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL (0x00000002)
#define NVC7B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL (0x00000003)
#define NVC7B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL (0x00000004)
#define NVC7B5_SET_SRC_PHYS_MODE (0x00000260)
#define NVC7B5_SET_SRC_PHYS_MODE_TARGET 1:0
#define NVC7B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC7B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC7B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC7B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM (0x00000003)
#define NVC7B5_SET_SRC_PHYS_MODE_BASIC_KIND 5:2
#define NVC7B5_SET_SRC_PHYS_MODE_PEER_ID 8:6
#define NVC7B5_SET_SRC_PHYS_MODE_FLA 9:9
#define NVC7B5_SET_DST_PHYS_MODE (0x00000264)
#define NVC7B5_SET_DST_PHYS_MODE_TARGET 1:0
#define NVC7B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC7B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC7B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC7B5_SET_DST_PHYS_MODE_TARGET_PEERMEM (0x00000003)
#define NVC7B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
#define NVC7B5_SET_DST_PHYS_MODE_PEER_ID 8:6
#define NVC7B5_SET_DST_PHYS_MODE_FLA 9:9
#define NVC7B5_SET_GLOBAL_COUNTER_UPPER (0x00000280)
#define NVC7B5_SET_GLOBAL_COUNTER_UPPER_V 31:0
#define NVC7B5_SET_GLOBAL_COUNTER_LOWER (0x00000284)
#define NVC7B5_SET_GLOBAL_COUNTER_LOWER_V 31:0
#define NVC7B5_SET_PAGEOUT_START_PAUPPER (0x00000288)
#define NVC7B5_SET_PAGEOUT_START_PAUPPER_V 4:0
#define NVC7B5_SET_PAGEOUT_START_PALOWER (0x0000028C)
#define NVC7B5_SET_PAGEOUT_START_PALOWER_V 31:0
#define NVC7B5_LAUNCH_DMA (0x00000300)
#define NVC7B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
#define NVC7B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
#define NVC7B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED (0x00000001)
#define NVC7B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED (0x00000002)
#define NVC7B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
#define NVC7B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
#define NVC7B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
#define NVC7B5_LAUNCH_DMA_FLUSH_TYPE 25:25
#define NVC7B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
#define NVC7B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_NO_TIMESTAMP (0x00000001)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_WITH_TIMESTAMP (0x00000002)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE (0x00000002)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE (0x00000003)
#define NVC7B5_LAUNCH_DMA_INTERRUPT_TYPE 6:5
#define NVC7B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE (0x00000000)
#define NVC7B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING (0x00000001)
#define NVC7B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING (0x00000002)
#define NVC7B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT 7:7
#define NVC7B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC7B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC7B5_LAUNCH_DMA_DST_MEMORY_LAYOUT 8:8
#define NVC7B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC7B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC7B5_LAUNCH_DMA_MULTI_LINE_ENABLE 9:9
#define NVC7B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE (0x00000000)
#define NVC7B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE (0x00000001)
#define NVC7B5_LAUNCH_DMA_REMAP_ENABLE 10:10
#define NVC7B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
#define NVC7B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
#define NVC7B5_LAUNCH_DMA_FORCE_RMWDISABLE 11:11
#define NVC7B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE (0x00000000)
#define NVC7B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE (0x00000001)
#define NVC7B5_LAUNCH_DMA_SRC_TYPE 12:12
#define NVC7B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
#define NVC7B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
#define NVC7B5_LAUNCH_DMA_DST_TYPE 13:13
#define NVC7B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
#define NVC7B5_LAUNCH_DMA_DST_TYPE_PHYSICAL (0x00000001)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION 17:14
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN (0x00000000)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX (0x00000001)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR (0x00000002)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND (0x00000003)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR (0x00000004)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD (0x00000005)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC (0x00000006)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC (0x00000007)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDA (0x00000008)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDB (0x00000009)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD (0x0000000A)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMIN (0x0000000B)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMAX (0x0000000C)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDC (0x0000000D)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDD (0x0000000E)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDE (0x0000000F)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN 18:18
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED (0x00000000)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED (0x00000001)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE 19:19
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE (0x00000000)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE (0x00000001)
#define NVC7B5_LAUNCH_DMA_VPRMODE 23:22
#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS (0x00000002)
#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID (0x00000003)
#define NVC7B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
#define NVC7B5_LAUNCH_DMA_DISABLE_PLC 26:26
#define NVC7B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
#define NVC7B5_LAUNCH_DMA_DISABLE_PLC_TRUE (0x00000001)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE 27:27
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_ONE_WORD (0x00000000)
#define NVC7B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_TWO_WORD (0x00000001)
#define NVC7B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
#define NVC7B5_OFFSET_IN_UPPER (0x00000400)
#define NVC7B5_OFFSET_IN_UPPER_UPPER 16:0
#define NVC7B5_OFFSET_IN_LOWER (0x00000404)
#define NVC7B5_OFFSET_IN_LOWER_VALUE 31:0
#define NVC7B5_OFFSET_OUT_UPPER (0x00000408)
#define NVC7B5_OFFSET_OUT_UPPER_UPPER 16:0
#define NVC7B5_OFFSET_OUT_LOWER (0x0000040C)
#define NVC7B5_OFFSET_OUT_LOWER_VALUE 31:0
#define NVC7B5_PITCH_IN (0x00000410)
#define NVC7B5_PITCH_IN_VALUE 31:0
#define NVC7B5_PITCH_OUT (0x00000414)
#define NVC7B5_PITCH_OUT_VALUE 31:0
#define NVC7B5_LINE_LENGTH_IN (0x00000418)
#define NVC7B5_LINE_LENGTH_IN_VALUE 31:0
#define NVC7B5_LINE_COUNT (0x0000041C)
#define NVC7B5_LINE_COUNT_VALUE 31:0
#define NVC7B5_SET_REMAP_CONST_A (0x00000700)
#define NVC7B5_SET_REMAP_CONST_A_V 31:0
#define NVC7B5_SET_REMAP_CONST_B (0x00000704)
#define NVC7B5_SET_REMAP_CONST_B_V 31:0
#define NVC7B5_SET_REMAP_COMPONENTS (0x00000708)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_X 2:0
#define NVC7B5_SET_REMAP_COMPONENTS_DST_X_SRC_X (0x00000000)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y (0x00000001)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z (0x00000002)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_X_SRC_W (0x00000003)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_X_CONST_A (0x00000004)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_X_CONST_B (0x00000005)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE (0x00000006)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_Y 6:4
#define NVC7B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X (0x00000000)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y (0x00000001)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z (0x00000002)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W (0x00000003)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A (0x00000004)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B (0x00000005)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE (0x00000006)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_Z 10:8
#define NVC7B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X (0x00000000)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y (0x00000001)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z (0x00000002)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W (0x00000003)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A (0x00000004)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B (0x00000005)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE (0x00000006)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_W 14:12
#define NVC7B5_SET_REMAP_COMPONENTS_DST_W_SRC_X (0x00000000)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y (0x00000001)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z (0x00000002)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_W_SRC_W (0x00000003)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_W_CONST_A (0x00000004)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_W_CONST_B (0x00000005)
#define NVC7B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE (0x00000006)
#define NVC7B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE 17:16
#define NVC7B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE (0x00000000)
#define NVC7B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO (0x00000001)
#define NVC7B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE (0x00000002)
#define NVC7B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR (0x00000003)
#define NVC7B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS 21:20
#define NVC7B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE (0x00000000)
#define NVC7B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO (0x00000001)
#define NVC7B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE (0x00000002)
#define NVC7B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR (0x00000003)
#define NVC7B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS 25:24
#define NVC7B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE (0x00000000)
#define NVC7B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
#define NVC7B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
#define NVC7B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
#define NVC7B5_SET_DST_BLOCK_SIZE (0x0000070C)
#define NVC7B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
#define NVC7B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC7B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
#define NVC7B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC7B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC7B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC7B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC7B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC7B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC7B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
#define NVC7B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC7B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC7B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC7B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC7B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC7B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC7B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC7B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC7B5_SET_DST_WIDTH (0x00000710)
#define NVC7B5_SET_DST_WIDTH_V 31:0
#define NVC7B5_SET_DST_HEIGHT (0x00000714)
#define NVC7B5_SET_DST_HEIGHT_V 31:0
#define NVC7B5_SET_DST_DEPTH (0x00000718)
#define NVC7B5_SET_DST_DEPTH_V 31:0
#define NVC7B5_SET_DST_LAYER (0x0000071C)
#define NVC7B5_SET_DST_LAYER_V 31:0
#define NVC7B5_SET_DST_ORIGIN (0x00000720)
#define NVC7B5_SET_DST_ORIGIN_X 15:0
#define NVC7B5_SET_DST_ORIGIN_Y 31:16
#define NVC7B5_SET_SRC_BLOCK_SIZE (0x00000728)
#define NVC7B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
#define NVC7B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC7B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
#define NVC7B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC7B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC7B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC7B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC7B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC7B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC7B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
#define NVC7B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC7B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC7B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC7B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC7B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC7B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC7B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC7B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC7B5_SET_SRC_WIDTH (0x0000072C)
#define NVC7B5_SET_SRC_WIDTH_V 31:0
#define NVC7B5_SET_SRC_HEIGHT (0x00000730)
#define NVC7B5_SET_SRC_HEIGHT_V 31:0
#define NVC7B5_SET_SRC_DEPTH (0x00000734)
#define NVC7B5_SET_SRC_DEPTH_V 31:0
#define NVC7B5_SET_SRC_LAYER (0x00000738)
#define NVC7B5_SET_SRC_LAYER_V 31:0
#define NVC7B5_SET_SRC_ORIGIN (0x0000073C)
#define NVC7B5_SET_SRC_ORIGIN_X 15:0
#define NVC7B5_SET_SRC_ORIGIN_Y 31:16
#define NVC7B5_SRC_ORIGIN_X (0x00000744)
#define NVC7B5_SRC_ORIGIN_X_VALUE 31:0
#define NVC7B5_SRC_ORIGIN_Y (0x00000748)
#define NVC7B5_SRC_ORIGIN_Y_VALUE 31:0
#define NVC7B5_DST_ORIGIN_X (0x0000074C)
#define NVC7B5_DST_ORIGIN_X_VALUE 31:0
#define NVC7B5_DST_ORIGIN_Y (0x00000750)
#define NVC7B5_DST_ORIGIN_Y_VALUE 31:0
#define NVC7B5_PM_TRIGGER_END (0x00001114)
#define NVC7B5_PM_TRIGGER_END_V 31:0
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif // _clc7b5_h

View File

@@ -0,0 +1,51 @@
/*******************************************************************************
Copyright (c) 2013-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef _ctrl2080mc_h_
#define _ctrl2080mc_h_
/* valid architecture values */
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_T13X (0xE0000013)
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000 (0x00000110)
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200 (0x00000120)
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100 (0x00000130)
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100 (0x00000140)
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100 (0x00000160)
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100 (0x00000170)
/* valid ARCHITECTURE_GP10x implementation values */
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GP100 (0x00000000)
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GP000 (0x00000001)
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA100 (0x00000000)
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA000 (0x00000001)
#endif /* _ctrl2080mc_h_ */

View File

@@ -0,0 +1,480 @@
/*******************************************************************************
Copyright (c) 2003-2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __ga100_dev_fault_h__
#define __ga100_dev_fault_h__
/* This file is autogenerated. Do not edit */
#define NV_PFAULT /* ----G */
#define NV_PFAULT_MMU_ENG_ID_GRAPHICS 64 /* */
#define NV_PFAULT_MMU_ENG_ID_DISPLAY 1 /* */
#define NV_PFAULT_MMU_ENG_ID_GSP 2 /* */
#define NV_PFAULT_MMU_ENG_ID_IFB 9 /* */
#define NV_PFAULT_MMU_ENG_ID_FLA 4 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1 128 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2 192 /* */
#define NV_PFAULT_MMU_ENG_ID_SEC 14 /* */
#define NV_PFAULT_MMU_ENG_ID_PERF 8 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC 25 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC0 25 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC1 26 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC2 27 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC3 28 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC4 29 /* */
#define NV_PFAULT_MMU_ENG_ID_NVJPG0 30 /* */
#define NV_PFAULT_MMU_ENG_ID_GRCOPY 15 /* */
#define NV_PFAULT_MMU_ENG_ID_CE0 15 /* */
#define NV_PFAULT_MMU_ENG_ID_CE1 16 /* */
#define NV_PFAULT_MMU_ENG_ID_CE2 17 /* */
#define NV_PFAULT_MMU_ENG_ID_CE3 18 /* */
#define NV_PFAULT_MMU_ENG_ID_CE4 19 /* */
#define NV_PFAULT_MMU_ENG_ID_CE5 20 /* */
#define NV_PFAULT_MMU_ENG_ID_CE6 21 /* */
#define NV_PFAULT_MMU_ENG_ID_CE7 22 /* */
#define NV_PFAULT_MMU_ENG_ID_CE8 23 /* */
#define NV_PFAULT_MMU_ENG_ID_CE9 24 /* */
#define NV_PFAULT_MMU_ENG_ID_PWR_PMU 6 /* */
#define NV_PFAULT_MMU_ENG_ID_PTP 3 /* */
#define NV_PFAULT_MMU_ENG_ID_NVENC0 11 /* */
#define NV_PFAULT_MMU_ENG_ID_NVENC1 12 /* */
#define NV_PFAULT_MMU_ENG_ID_NVENC2 13 /* */
#define NV_PFAULT_MMU_ENG_ID_OFA0 10 /* */
#define NV_PFAULT_MMU_ENG_ID_PHYSICAL 31 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST0 32 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST1 33 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST2 34 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST3 35 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST4 36 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST5 37 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST6 38 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST7 39 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST8 40 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST9 41 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST10 42 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST11 43 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST12 44 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST13 45 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST14 46 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST15 47 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST16 48 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST17 49 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST18 50 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST19 51 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST20 52 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST21 53 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST22 54 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST23 55 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST24 56 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST25 57 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST26 58 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST27 59 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST28 60 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST29 61 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST30 62 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST31 63 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN0 128 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN1 129 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN2 130 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN3 131 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN4 132 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN5 133 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN6 134 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN7 135 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN8 136 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN9 137 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN10 138 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN11 139 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN12 140 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN13 141 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN14 142 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN15 143 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN16 144 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN17 145 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN18 146 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN19 147 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN20 148 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN21 149 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN22 150 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN23 151 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN24 152 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN25 153 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN26 154 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN27 155 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN28 156 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN29 157 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN30 158 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN31 159 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN32 160 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN33 161 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN34 162 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN35 163 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN36 164 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN37 165 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN38 166 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN39 167 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN40 168 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN41 169 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN42 170 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN43 171 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN44 172 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN45 173 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN46 174 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN47 175 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN48 176 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN49 177 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN50 178 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN51 179 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN52 180 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN53 181 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN54 182 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN55 183 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN56 184 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN57 185 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN58 186 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN59 187 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN60 188 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN61 189 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN62 190 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN63 191 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN0 192 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN1 193 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN2 194 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN3 195 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN4 196 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN5 197 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN6 198 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN7 199 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN8 200 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN9 201 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN10 202 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN11 203 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN12 204 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN13 205 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN14 206 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN15 207 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN16 208 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN17 209 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN18 210 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN19 211 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN20 212 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN21 213 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN22 214 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN23 215 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN24 216 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN25 217 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN26 218 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN27 219 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN28 220 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN29 221 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN30 222 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN31 223 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN32 224 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN33 225 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN34 226 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN35 227 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN36 228 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN37 229 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN38 230 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN39 231 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN40 232 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN41 233 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN42 234 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN43 235 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN44 236 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN45 237 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN46 238 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN47 239 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN48 240 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN49 241 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN50 242 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN51 243 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN52 244 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN53 245 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN54 246 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN55 247 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN56 248 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN57 249 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN58 250 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN59 251 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN60 252 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN61 253 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN62 254 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN63 255 /* */
#define NV_PFAULT_FAULT_TYPE 4:0 /* */
#define NV_PFAULT_FAULT_TYPE_PDE 0x00000000 /* */
#define NV_PFAULT_FAULT_TYPE_PDE_SIZE 0x00000001 /* */
#define NV_PFAULT_FAULT_TYPE_PTE 0x00000002 /* */
#define NV_PFAULT_FAULT_TYPE_VA_LIMIT_VIOLATION 0x00000003 /* */
#define NV_PFAULT_FAULT_TYPE_UNBOUND_INST_BLOCK 0x00000004 /* */
#define NV_PFAULT_FAULT_TYPE_PRIV_VIOLATION 0x00000005 /* */
#define NV_PFAULT_FAULT_TYPE_RO_VIOLATION 0x00000006 /* */
#define NV_PFAULT_FAULT_TYPE_WO_VIOLATION 0x00000007 /* */
#define NV_PFAULT_FAULT_TYPE_PITCH_MASK_VIOLATION 0x00000008 /* */
#define NV_PFAULT_FAULT_TYPE_WORK_CREATION 0x00000009 /* */
#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_APERTURE 0x0000000a /* */
#define NV_PFAULT_FAULT_TYPE_COMPRESSION_FAILURE 0x0000000b /* */
#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_KIND 0x0000000c /* */
#define NV_PFAULT_FAULT_TYPE_REGION_VIOLATION 0x0000000d /* */
#define NV_PFAULT_FAULT_TYPE_POISONED 0x0000000e /* */
#define NV_PFAULT_FAULT_TYPE_ATOMIC_VIOLATION 0x0000000f /* */
#define NV_PFAULT_CLIENT 14:8 /* */
#define NV_PFAULT_CLIENT_GPC_T1_0 0x00000000 /* */
#define NV_PFAULT_CLIENT_GPC_T1_1 0x00000001 /* */
#define NV_PFAULT_CLIENT_GPC_T1_2 0x00000002 /* */
#define NV_PFAULT_CLIENT_GPC_T1_3 0x00000003 /* */
#define NV_PFAULT_CLIENT_GPC_T1_4 0x00000004 /* */
#define NV_PFAULT_CLIENT_GPC_T1_5 0x00000005 /* */
#define NV_PFAULT_CLIENT_GPC_T1_6 0x00000006 /* */
#define NV_PFAULT_CLIENT_GPC_T1_7 0x00000007 /* */
#define NV_PFAULT_CLIENT_GPC_PE_0 0x00000008 /* */
#define NV_PFAULT_CLIENT_GPC_PE_1 0x00000009 /* */
#define NV_PFAULT_CLIENT_GPC_PE_2 0x0000000A /* */
#define NV_PFAULT_CLIENT_GPC_PE_3 0x0000000B /* */
#define NV_PFAULT_CLIENT_GPC_PE_4 0x0000000C /* */
#define NV_PFAULT_CLIENT_GPC_PE_5 0x0000000D /* */
#define NV_PFAULT_CLIENT_GPC_PE_6 0x0000000E /* */
#define NV_PFAULT_CLIENT_GPC_PE_7 0x0000000F /* */
#define NV_PFAULT_CLIENT_GPC_RAST 0x00000010 /* */
#define NV_PFAULT_CLIENT_GPC_GCC 0x00000011 /* */
#define NV_PFAULT_CLIENT_GPC_GPCCS 0x00000012 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_0 0x00000013 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_1 0x00000014 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_2 0x00000015 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_3 0x00000016 /* */
#define NV_PFAULT_CLIENT_GPC_T1_8 0x00000021 /* */
#define NV_PFAULT_CLIENT_GPC_T1_9 0x00000022 /* */
#define NV_PFAULT_CLIENT_GPC_T1_10 0x00000023 /* */
#define NV_PFAULT_CLIENT_GPC_T1_11 0x00000024 /* */
#define NV_PFAULT_CLIENT_GPC_T1_12 0x00000025 /* */
#define NV_PFAULT_CLIENT_GPC_T1_13 0x00000026 /* */
#define NV_PFAULT_CLIENT_GPC_T1_14 0x00000027 /* */
#define NV_PFAULT_CLIENT_GPC_T1_15 0x00000028 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_0 0x00000029 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_1 0x0000002A /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_2 0x0000002B /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_3 0x0000002C /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_4 0x0000002D /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_5 0x0000002E /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_6 0x0000002F /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_7 0x00000030 /* */
#define NV_PFAULT_CLIENT_GPC_PE_8 0x00000031 /* */
#define NV_PFAULT_CLIENT_GPC_PE_9 0x00000032 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_8 0x00000033 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_9 0x00000034 /* */
#define NV_PFAULT_CLIENT_GPC_T1_16 0x00000035 /* */
#define NV_PFAULT_CLIENT_GPC_T1_17 0x00000036 /* */
#define NV_PFAULT_CLIENT_GPC_T1_18 0x00000037 /* */
#define NV_PFAULT_CLIENT_GPC_T1_19 0x00000038 /* */
#define NV_PFAULT_CLIENT_GPC_PE_10 0x00000039 /* */
#define NV_PFAULT_CLIENT_GPC_PE_11 0x0000003A /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_10 0x0000003B /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_11 0x0000003C /* */
#define NV_PFAULT_CLIENT_GPC_T1_20 0x0000003D /* */
#define NV_PFAULT_CLIENT_GPC_T1_21 0x0000003E /* */
#define NV_PFAULT_CLIENT_GPC_T1_22 0x0000003F /* */
#define NV_PFAULT_CLIENT_GPC_T1_23 0x00000040 /* */
#define NV_PFAULT_CLIENT_GPC_PE_12 0x00000041 /* */
#define NV_PFAULT_CLIENT_GPC_PE_13 0x00000042 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_12 0x00000043 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_13 0x00000044 /* */
#define NV_PFAULT_CLIENT_GPC_T1_24 0x00000045 /* */
#define NV_PFAULT_CLIENT_GPC_T1_25 0x00000046 /* */
#define NV_PFAULT_CLIENT_GPC_T1_26 0x00000047 /* */
#define NV_PFAULT_CLIENT_GPC_T1_27 0x00000048 /* */
#define NV_PFAULT_CLIENT_GPC_PE_14 0x00000049 /* */
#define NV_PFAULT_CLIENT_GPC_PE_15 0x0000004A /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_14 0x0000004B /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_15 0x0000004C /* */
#define NV_PFAULT_CLIENT_GPC_T1_28 0x0000004D /* */
#define NV_PFAULT_CLIENT_GPC_T1_29 0x0000004E /* */
#define NV_PFAULT_CLIENT_GPC_T1_30 0x0000004F /* */
#define NV_PFAULT_CLIENT_GPC_T1_31 0x00000050 /* */
#define NV_PFAULT_CLIENT_GPC_PE_16 0x00000051 /* */
#define NV_PFAULT_CLIENT_GPC_PE_17 0x00000052 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_16 0x00000053 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_17 0x00000054 /* */
#define NV_PFAULT_CLIENT_GPC_T1_32 0x00000055 /* */
#define NV_PFAULT_CLIENT_GPC_T1_33 0x00000056 /* */
#define NV_PFAULT_CLIENT_GPC_T1_34 0x00000057 /* */
#define NV_PFAULT_CLIENT_GPC_T1_35 0x00000058 /* */
#define NV_PFAULT_CLIENT_GPC_PE_18 0x00000059 /* */
#define NV_PFAULT_CLIENT_GPC_PE_19 0x0000005A /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_18 0x0000005B /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_19 0x0000005C /* */
#define NV_PFAULT_CLIENT_GPC_T1_36 0x0000005D /* */
#define NV_PFAULT_CLIENT_GPC_T1_37 0x0000005E /* */
#define NV_PFAULT_CLIENT_GPC_T1_38 0x0000005F /* */
#define NV_PFAULT_CLIENT_GPC_T1_39 0x00000060 /* */
#define NV_PFAULT_CLIENT_GPC_ROP_0 0x00000070 /* */
#define NV_PFAULT_CLIENT_GPC_ROP_1 0x00000071 /* */
#define NV_PFAULT_CLIENT_GPC_ROP_2 0x00000072 /* */
#define NV_PFAULT_CLIENT_GPC_ROP_3 0x00000073 /* */
#define NV_PFAULT_CLIENT_GPC_GPM 0x00000017 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_0 0x00000018 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_1 0x00000019 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_2 0x0000001A /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_3 0x0000001B /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_4 0x0000001C /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_5 0x0000001D /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_6 0x0000001E /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_7 0x0000001F /* */
#define NV_PFAULT_CLIENT_GPC_RGG_UTLB 0x00000020 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_8 0x00000031 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_9 0x00000032 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_10 0x00000033 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_11 0x00000034 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_12 0x00000035 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_13 0x00000036 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_14 0x00000037 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_15 0x00000038 /* */
#define NV_PFAULT_CLIENT_HUB_VIP 0x00000000 /* */
#define NV_PFAULT_CLIENT_HUB_CE0 0x00000001 /* */
#define NV_PFAULT_CLIENT_HUB_CE1 0x00000002 /* */
#define NV_PFAULT_CLIENT_HUB_DNISO 0x00000003 /* */
#define NV_PFAULT_CLIENT_HUB_DISPNISO 0x00000003 /* */
#define NV_PFAULT_CLIENT_HUB_FE0 0x00000004 /* */
#define NV_PFAULT_CLIENT_HUB_FE 0x00000004 /* */
#define NV_PFAULT_CLIENT_HUB_FECS0 0x00000005 /* */
#define NV_PFAULT_CLIENT_HUB_FECS 0x00000005 /* */
#define NV_PFAULT_CLIENT_HUB_HOST 0x00000006 /* */
#define NV_PFAULT_CLIENT_HUB_HOST_CPU 0x00000007 /* */
#define NV_PFAULT_CLIENT_HUB_HOST_CPU_NB 0x00000008 /* */
#define NV_PFAULT_CLIENT_HUB_ISO 0x00000009 /* */
#define NV_PFAULT_CLIENT_HUB_MMU 0x0000000A /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC0 0x0000000B /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC 0x0000000B /* */
#define NV_PFAULT_CLIENT_HUB_NVENC1 0x0000000D /* */
#define NV_PFAULT_CLIENT_HUB_NISO 0x0000000E /* */
#define NV_PFAULT_CLIENT_HUB_ACTRS 0x0000000E /* */
#define NV_PFAULT_CLIENT_HUB_P2P 0x0000000F /* */
#define NV_PFAULT_CLIENT_HUB_PD 0x00000010 /* */
#define NV_PFAULT_CLIENT_HUB_PERF0 0x00000011 /* */
#define NV_PFAULT_CLIENT_HUB_PERF 0x00000011 /* */
#define NV_PFAULT_CLIENT_HUB_PMU 0x00000012 /* */
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD 0x00000013 /* */
#define NV_PFAULT_CLIENT_HUB_SCC 0x00000014 /* */
#define NV_PFAULT_CLIENT_HUB_SCC_NB 0x00000015 /* */
#define NV_PFAULT_CLIENT_HUB_SEC 0x00000016 /* */
#define NV_PFAULT_CLIENT_HUB_SSYNC 0x00000017 /* */
#define NV_PFAULT_CLIENT_HUB_GRCOPY 0x00000018 /* */
#define NV_PFAULT_CLIENT_HUB_CE2 0x00000018 /* */
#define NV_PFAULT_CLIENT_HUB_XV 0x00000019 /* */
#define NV_PFAULT_CLIENT_HUB_MMU_NB 0x0000001A /* */
#define NV_PFAULT_CLIENT_HUB_NVENC0 0x0000001B /* */
#define NV_PFAULT_CLIENT_HUB_NVENC 0x0000001B /* */
#define NV_PFAULT_CLIENT_HUB_DFALCON 0x0000001C /* */
#define NV_PFAULT_CLIENT_HUB_SKED0 0x0000001D /* */
#define NV_PFAULT_CLIENT_HUB_SKED 0x0000001D /* */
#define NV_PFAULT_CLIENT_HUB_AFALCON 0x0000001E /* */
#define NV_PFAULT_CLIENT_HUB_DONT_CARE 0x0000001F /* */
#define NV_PFAULT_CLIENT_HUB_HSCE0 0x00000020 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE1 0x00000021 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE2 0x00000022 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE3 0x00000023 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE4 0x00000024 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE5 0x00000025 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE6 0x00000026 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE7 0x00000027 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE8 0x00000028 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE9 0x00000029 /* */
#define NV_PFAULT_CLIENT_HUB_HSHUB 0x0000002A /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X0 0x0000002B /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X1 0x0000002C /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X2 0x0000002D /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X3 0x0000002E /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X4 0x0000002F /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X5 0x00000030 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X6 0x00000031 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X7 0x00000032 /* */
#define NV_PFAULT_CLIENT_HUB_NVENC2 0x00000033 /* */
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER0 0x00000034 /* */
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER1 0x00000035 /* */
#define NV_PFAULT_CLIENT_HUB_DWBIF 0x00000036 /* */
#define NV_PFAULT_CLIENT_HUB_FBFALCON 0x00000037 /* */
#define NV_PFAULT_CLIENT_HUB_CE_SHIM 0x00000038 /* */
#define NV_PFAULT_CLIENT_HUB_GSP 0x00000039 /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC1 0x0000003A /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC2 0x0000003B /* */
#define NV_PFAULT_CLIENT_HUB_NVJPG0 0x0000003C /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC3 0x0000003D /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC4 0x0000003E /* */
#define NV_PFAULT_CLIENT_HUB_OFA0 0x0000003F /* */
#define NV_PFAULT_CLIENT_HUB_HSCE10 0x00000040 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE11 0x00000041 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE12 0x00000042 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE13 0x00000043 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE14 0x00000044 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE15 0x00000045 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X8 0x00000046 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X9 0x00000047 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X10 0x00000048 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X11 0x00000049 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X12 0x0000004A /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X13 0x0000004B /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X14 0x0000004C /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X15 0x0000004D /* */
#define NV_PFAULT_CLIENT_HUB_FE1 0x0000004E /* */
#define NV_PFAULT_CLIENT_HUB_FE2 0x0000004F /* */
#define NV_PFAULT_CLIENT_HUB_FE3 0x00000050 /* */
#define NV_PFAULT_CLIENT_HUB_FE4 0x00000051 /* */
#define NV_PFAULT_CLIENT_HUB_FE5 0x00000052 /* */
#define NV_PFAULT_CLIENT_HUB_FE6 0x00000053 /* */
#define NV_PFAULT_CLIENT_HUB_FE7 0x00000054 /* */
#define NV_PFAULT_CLIENT_HUB_FECS1 0x00000055 /* */
#define NV_PFAULT_CLIENT_HUB_FECS2 0x00000056 /* */
#define NV_PFAULT_CLIENT_HUB_FECS3 0x00000057 /* */
#define NV_PFAULT_CLIENT_HUB_FECS4 0x00000058 /* */
#define NV_PFAULT_CLIENT_HUB_FECS5 0x00000059 /* */
#define NV_PFAULT_CLIENT_HUB_FECS6 0x0000005A /* */
#define NV_PFAULT_CLIENT_HUB_FECS7 0x0000005B /* */
#define NV_PFAULT_CLIENT_HUB_SKED1 0x0000005C /* */
#define NV_PFAULT_CLIENT_HUB_SKED2 0x0000005D /* */
#define NV_PFAULT_CLIENT_HUB_SKED3 0x0000005E /* */
#define NV_PFAULT_CLIENT_HUB_SKED4 0x0000005F /* */
#define NV_PFAULT_CLIENT_HUB_SKED5 0x00000060 /* */
#define NV_PFAULT_CLIENT_HUB_SKED6 0x00000061 /* */
#define NV_PFAULT_CLIENT_HUB_SKED7 0x00000062 /* */
#define NV_PFAULT_CLIENT_HUB_ESC 0x00000063 /* */
#define NV_PFAULT_ACCESS_TYPE 19:16 /* */
#define NV_PFAULT_ACCESS_TYPE_READ 0x00000000 /* */
#define NV_PFAULT_ACCESS_TYPE_WRITE 0x00000001 /* */
#define NV_PFAULT_ACCESS_TYPE_ATOMIC 0x00000002 /* */
#define NV_PFAULT_ACCESS_TYPE_PREFETCH 0x00000003 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_READ 0x00000000 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_WRITE 0x00000001 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC 0x00000002 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_STRONG 0x00000002 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_PREFETCH 0x00000003 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_WEAK 0x00000004 /* */
#define NV_PFAULT_ACCESS_TYPE_PHYS_READ 0x00000008 /* */
#define NV_PFAULT_ACCESS_TYPE_PHYS_WRITE 0x00000009 /* */
#define NV_PFAULT_ACCESS_TYPE_PHYS_ATOMIC 0x0000000a /* */
#define NV_PFAULT_ACCESS_TYPE_PHYS_PREFETCH 0x0000000b /* */
#define NV_PFAULT_MMU_CLIENT_TYPE 20:20 /* */
#define NV_PFAULT_MMU_CLIENT_TYPE_GPC 0x00000000 /* */
#define NV_PFAULT_MMU_CLIENT_TYPE_HUB 0x00000001 /* */
#define NV_PFAULT_GPC_ID 28:24 /* */
#define NV_PFAULT_PROTECTED_MODE 29:29 /* */
#define NV_PFAULT_REPLAYABLE_FAULT_EN 30:30 /* */
#define NV_PFAULT_VALID 31:31 /* */
#endif // __ga100_dev_fault_h__

View File

@@ -0,0 +1,782 @@
/*******************************************************************************
Copyright (c) 2003-2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __ga100_dev_runlist_h__
#define __ga100_dev_runlist_h__
/* This file is autogenerated. Do not edit */
#define NV_RUNLIST 0x000003ff:0x00000000 /* RW--D */
#define NV_CHRAM 0x00001fff:0x00000000 /* RW--D */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK 0x040 /* RW-4R */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION 3:0 /* RWIVF */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_DEFAULT_PRIV_LEVEL 15 /* RWI-V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_ALL_LEVELS_ENABLED 0x0000000F /* RW--V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_ONLY_LEVEL3_ENABLED 0x00000008 /* RW--V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0 0:0 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1 1:1 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2 2:2 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3 3:3 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION 7:4 /* RWIVF */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_DEFAULT_PRIV_LEVEL 8 /* RWI-V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_ALL_LEVELS_ENABLED 0x0F /* RW--V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_ONLY_LEVEL3_ENABLED 0x08 /* RW--V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0 4:4 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1 5:5 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2 6:6 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3 7:7 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_VIOLATION 8:8 /* RWIVF */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_VIOLATION_REPORT_ERROR 0x00000001 /* RWI-V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_READ_VIOLATION_SOLDIER_ON 0x00000000 /* RW--V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_VIOLATION 9:9 /* RWIVF */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_VIOLATION_REPORT_ERROR 0x00000001 /* RWI-V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_WRITE_VIOLATION_SOLDIER_ON 0x00000000 /* RW--V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL 10:10 /* RWIVF */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_BLOCKED 0x00000001 /* RWI-V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_LOWERED 0x00000000 /* RW--V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL 11:11 /* RWIVF */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_BLOCKED 0x00000001 /* RWI-V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_LOWERED 0x00000000 /* RW--V */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_ENABLE 31:12 /* RWIVF */
#define NV_RUNLIST_INT_CYA_PRIV_LEVEL_MASK_SOURCE_ENABLE_ALL_SOURCES_ENABLED 0x000FFFFF /* RWI-V */
#define NV_RUNLIST_INT_CYA_SPARE 0x044 /* RW-4R */
#define NV_RUNLIST_INT_CYA_SPARE__PRIV_LEVEL_MASK 0x040 /* */
#define NV_RUNLIST_INT_CYA_SPARE_DATA 31:0 /* RWIUF */
#define NV_RUNLIST_INT_CYA_SPARE_DATA_INIT 0x00000000 /* RWI-V */
#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE 0:0 /* */
#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE_VAL 1:1 /* */
#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE_VAL_1MTHD 0x00000000 /* */
#define NV_RUNLIST_INT_CYA_SPARE_FORCE_FE_MTHD_THROTTLE_VAL_2MTHD 0x00000001 /* */
#define NV_RUNLIST_CONFIG 0x000 /* RW-4R */
#define NV_RUNLIST_CONFIG_SEM_ACQ_STRENGTH 0:0 /* RWIVF */
#define NV_RUNLIST_CONFIG_SEM_ACQ_STRENGTH_WEAK 0x00000000 /* RWI-V */
#define NV_RUNLIST_CONFIG_SEM_ACQ_STRENGTH_STRONG 0x00000001 /* RW--V */
#define NV_RUNLIST_CONFIG_SEM_REL_STRENGTH 4:4 /* RWIVF */
#define NV_RUNLIST_CONFIG_SEM_REL_STRENGTH_WEAK 0x00000000 /* RW--V */
#define NV_RUNLIST_CONFIG_SEM_REL_STRENGTH_STRONG 0x00000001 /* RWI-V */
#define NV_RUNLIST_CONFIG_L2_EVICT 9:8 /* RWIVF */
#define NV_RUNLIST_CONFIG_L2_EVICT_FIRST 0x00000000 /* RWI-V */
#define NV_RUNLIST_CONFIG_L2_EVICT_NORMAL 0x00000001 /* RW--V */
#define NV_RUNLIST_CONFIG_L2_EVICT_LAST 0x00000002 /* RW--V */
#define NV_RUNLIST_CONFIG_SUBCH4 10:10 /* RWXVF */
#define NV_RUNLIST_CONFIG_SUBCH4_INACTIVE 0x00000000 /* RW--V */
#define NV_RUNLIST_CONFIG_SUBCH4_ACTIVE 0x00000001 /* RW--V */
#define NV_RUNLIST_CHANNEL_CONFIG 0x004 /* R--4R */
#define NV_RUNLIST_CHANNEL_CONFIG_NUM_CHANNELS_LOG2 3:0 /* C--UF */
#define NV_RUNLIST_CHANNEL_CONFIG_NUM_CHANNELS_LOG2_2K 11 /* C---V */
#define NV_RUNLIST_CHANNEL_CONFIG_CHRAM_BAR0_OFFSET 31:4 /* R-XVF */
#define NV_RUNLIST_DOORBELL_CONFIG 0x008 /* R--4R */
#define NV_RUNLIST_DOORBELL_CONFIG_ID 31:16 /* R-XVF */
#define NV_RUNLIST_FB_CONFIG 0x00C /* R--4R */
#define NV_RUNLIST_FB_CONFIG_FB_THREAD_ID 7:0 /* R-XVF */
#define NV_RUNLIST_FB_CONFIG_ESC_ID 15:8 /* R-XVF */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG(i) (0x300+(i)*4) /* RW-4A */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG__SIZE_1 64 /* */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_MASK 11:0 /* */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_MASK_HW 10:0 /* RWIVF */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_MASK_HW_INIT 2047 /* RWI-V */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_SET 27:16 /* */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_SET_HW 26:16 /* RWIVF */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_SET_HW_INIT 0x0 /* RWI-V */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_PENDING_ENABLE 31:31 /* RWIVF */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_PENDING_ENABLE_TRUE 1 /* RW--V */
#define NV_RUNLIST_VIRTUAL_CHANNEL_CFG_PENDING_ENABLE_FALSE 0 /* RWI-V */
#define NV_RUNLIST_PBDMA_CONFIG(i) (0x010+(i)*4) /* R--4A */
#define NV_RUNLIST_PBDMA_CONFIG__SIZE_1 2 /* */
#define NV_RUNLIST_PBDMA_CONFIG_PBDMA_ID 7:0 /* R-XUF */
#define NV_RUNLIST_PBDMA_CONFIG_PBDMA_BAR0_OFFSET 25:10 /* R-XUF */
#define NV_RUNLIST_PBDMA_CONFIG_VALID 31:31 /* R-XUF */
#define NV_RUNLIST_PBDMA_CONFIG_VALID_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_PBDMA_CONFIG_VALID_FALSE 0x00000000 /* R---V */
#define NV_RUNLIST_ACQ_PRETEST 0x020 /* RW-4R */
#define NV_RUNLIST_ACQ_PRETEST_TIMEOUT 7:0 /* RWIUF */
#define NV_RUNLIST_ACQ_PRETEST_TIMEOUT_8 0x00000008 /* RWI-V */
#define NV_RUNLIST_ACQ_PRETEST_TIMESCALE 15:12 /* RWIUF */
#define NV_RUNLIST_ACQ_PRETEST_TIMESCALE_0 0x00000000 /* RWI-V */
#define NV_RUNLIST_ACQ_PRETEST_TIMESCALE_10 0x0000000a /* RW--V */
#define NV_RUNLIST_IDLE_FILTER 0x024 /* RW-4R */
#define NV_RUNLIST_IDLE_FILTER_PERIOD 7:0 /* RWIUF */
#define NV_RUNLIST_IDLE_FILTER_PERIOD_INIT 0x00000050 /* RWI-V */
#define NV_RUNLIST_IDLE_FILTER_PERIOD__PROD 0x00000064 /* RW--V */
#define NV_RUNLIST_IDLE_FILTER_PERIOD_8 0x00000008 /* RW--V */
#define NV_RUNLIST_IDLE_FILTER_PERIOD_32 0x00000020 /* RW--V */
#define NV_RUNLIST_USERD_WRITEBACK 0x028 /* RW-4R */
#define NV_RUNLIST_USERD_WRITEBACK_TIMER 7:0 /* RWIUF */
#define NV_RUNLIST_USERD_WRITEBACK_TIMER_DISABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_USERD_WRITEBACK_TIMER_SHORT 0x00000003 /* RW--V */
#define NV_RUNLIST_USERD_WRITEBACK_TIMER_100US 0x00000064 /* RWI-V */
#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE 15:12 /* RWIUF */
#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE_0 0x00000000 /* RWI-V */
#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE_SHORT 0x00000000 /* */
#define NV_RUNLIST_USERD_WRITEBACK_TIMESCALE_100US 0x00000000 /* */
#define NV_RUNLIST_ESCHED_CONFIG 0x02c /* C--4R */
#define NV_RUNLIST_ESCHED_CONFIG_ESCHED_CLASS_ID 15:0 /* C--UF */
#define NV_RUNLIST_ESCHED_CONFIG_ESCHED_CLASS_ID_VALUE 50543 /* C---V */
#define NV_CHRAM_CHANNEL(i) (0x000+(i)*4) /* RW-4A */
#define NV_CHRAM_CHANNEL__SIZE_1 2048 /* */
#define NV_CHRAM_CHANNEL_WRITE_CONTROL 0:0 /* -WIVF */
#define NV_CHRAM_CHANNEL_WRITE_CONTROL_ONES_SET_BITS 0x00000000 /* -WI-V */
#define NV_CHRAM_CHANNEL_WRITE_CONTROL_ONES_CLEAR_BITS 0x00000001 /* -W--V */
#define NV_CHRAM_CHANNEL_ENABLE 1:1 /* RWIVF */
#define NV_CHRAM_CHANNEL_ENABLE_NOT_IN_USE 0x00000000 /* RWI-V */
#define NV_CHRAM_CHANNEL_ENABLE_IN_USE 0x00000001 /* RW--V */
#define NV_CHRAM_CHANNEL_NEXT 2:2 /* RWIVF */
#define NV_CHRAM_CHANNEL_NEXT_FALSE 0x00000000 /* RWI-V */
#define NV_CHRAM_CHANNEL_NEXT_TRUE 0x00000001 /* RW--V */
#define NV_CHRAM_CHANNEL_BUSY 3:3 /* R-IVF */
#define NV_CHRAM_CHANNEL_BUSY_FALSE 0x00000000 /* R-I-V */
#define NV_CHRAM_CHANNEL_BUSY_TRUE 0x00000001 /* R---V */
#define NV_CHRAM_CHANNEL_PBDMA_FAULTED 4:4 /* RWIVF */
#define NV_CHRAM_CHANNEL_PBDMA_FAULTED_FALSE 0x00000000 /* RWI-V */
#define NV_CHRAM_CHANNEL_PBDMA_FAULTED_TRUE 0x00000001 /* RW--V */
#define NV_CHRAM_CHANNEL_ENG_FAULTED 5:5 /* RWIVF */
#define NV_CHRAM_CHANNEL_ENG_FAULTED_FALSE 0x00000000 /* RWI-V */
#define NV_CHRAM_CHANNEL_ENG_FAULTED_TRUE 0x00000001 /* RW--V */
#define NV_CHRAM_CHANNEL_ON_PBDMA 6:6 /* R-IVF */
#define NV_CHRAM_CHANNEL_ON_PBDMA_FALSE 0x00000000 /* R-I-V */
#define NV_CHRAM_CHANNEL_ON_PBDMA_TRUE 0x00000001 /* R---V */
#define NV_CHRAM_CHANNEL_ON_ENG 7:7 /* R-IVF */
#define NV_CHRAM_CHANNEL_ON_ENG_FALSE 0x00000000 /* R-I-V */
#define NV_CHRAM_CHANNEL_ON_ENG_TRUE 0x00000001 /* R---V */
#define NV_CHRAM_CHANNEL_PENDING 8:8 /* RWIVF */
#define NV_CHRAM_CHANNEL_PENDING_FALSE 0x00000000 /* RWI-V */
#define NV_CHRAM_CHANNEL_PENDING_TRUE 0x00000001 /* RW--V */
#define NV_CHRAM_CHANNEL_CTX_RELOAD 9:9 /* RWIVF */
#define NV_CHRAM_CHANNEL_CTX_RELOAD_FALSE 0x00000000 /* RWI-V */
#define NV_CHRAM_CHANNEL_CTX_RELOAD_TRUE 0x00000001 /* RW--V */
#define NV_CHRAM_CHANNEL_PBDMA_BUSY 10:10 /* R-IVF */
#define NV_CHRAM_CHANNEL_PBDMA_BUSY_FALSE 0x00000000 /* R-I-V */
#define NV_CHRAM_CHANNEL_PBDMA_BUSY_TRUE 0x00000001 /* R---V */
#define NV_CHRAM_CHANNEL_ENG_BUSY 11:11 /* R-IVF */
#define NV_CHRAM_CHANNEL_ENG_BUSY_FALSE 0x00000000 /* R-I-V */
#define NV_CHRAM_CHANNEL_ENG_BUSY_TRUE 0x00000001 /* R---V */
#define NV_CHRAM_CHANNEL_ACQUIRE_FAIL 12:12 /* RWIVF */
#define NV_CHRAM_CHANNEL_ACQUIRE_FAIL_FALSE 0x00000000 /* RWI-V */
#define NV_CHRAM_CHANNEL_ACQUIRE_FAIL_TRUE 0x00000001 /* RW--V */
#define NV_CHRAM_CHANNEL_STATUS 12:8 /* */
#define NV_CHRAM_CHANNEL_STATUS_IDLE 0x00000000 /* */
#define NV_CHRAM_CHANNEL_STATUS_PENDING 0x00000001 /* */
#define NV_CHRAM_CHANNEL_STATUS_PENDING_CTX_RELOAD 0x00000003 /* */
#define NV_CHRAM_CHANNEL_STATUS_PENDING_ACQUIRE_FAIL 0x00000011 /* */
#define NV_CHRAM_CHANNEL_STATUS_PENDING_ACQUIRE_FAIL_CTX_RELOAD 0x00000013 /* */
#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY 0x00000004 /* */
#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY_AND_ENG_BUSY 0x0000000C /* */
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY 0x00000008 /* */
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING_ACQUIRE_FAIL 0x00000019 /* */
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING 0x00000009 /* */
#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY_CTX_RELOAD 0x00000006 /* */
#define NV_CHRAM_CHANNEL_STATUS_PBDMA_BUSY_ENG_BUSY_CTX_RELOAD 0x0000000E /* */
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_CTX_RELOAD 0x0000000A /* */
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING_CTX_RELOAD 0x0000000B /* */
#define NV_CHRAM_CHANNEL_STATUS_ENG_BUSY_PENDING_ACQUIRE_FAIL_CTX_RELOAD 0x0000001B /* */
#define NV_CHRAM_CHANNEL_UPDATE 31:0 /* */
#define NV_CHRAM_CHANNEL_UPDATE_ENABLE_CHANNEL 0x00000002 /* */
#define NV_CHRAM_CHANNEL_UPDATE_DISABLE_CHANNEL 0x00000003 /* */
#define NV_CHRAM_CHANNEL_UPDATE_FORCE_CTX_RELOAD 0x00000200 /* */
#define NV_CHRAM_CHANNEL_UPDATE_RESET_PBDMA_FAULTED 0x00000011 /* */
#define NV_CHRAM_CHANNEL_UPDATE_RESET_ENG_FAULTED 0x00000021 /* */
#define NV_CHRAM_CHANNEL_UPDATE_CLEAR_CHANNEL 0xFFFFFFFF /* */
#define NV_RUNLIST_SUBMIT_BASE_LO 0x080 /* RW-4R */
#define NV_RUNLIST_SUBMIT_BASE_LO_PTR_LO 31:12 /* RWIUF */
#define NV_RUNLIST_SUBMIT_BASE_LO_PTR_LO_NULL 0x00000000 /* RWI-V */
#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET 1:0 /* RWIVF */
#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET_VID_MEM 0x0 /* RWI-V */
#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET_SYS_MEM_COHERENT 0x2 /* RW--V */
#define NV_RUNLIST_SUBMIT_BASE_LO_TARGET_SYS_MEM_NONCOHERENT 0x3 /* RW--V */
#define NV_RUNLIST_SUBMIT_BASE_LO_PTR_ALIGN_SHIFT 12 /* */
#define NV_RUNLIST_SUBMIT_BASE_HI 0x084 /* RW-4R */
#define NV_RUNLIST_SUBMIT_BASE_HI_PTR_HI 7:0 /* RWIUF */
#define NV_RUNLIST_SUBMIT_BASE_HI_PTR_HI_NULL 0x00000000 /* RWI-V */
#define NV_RUNLIST_SUBMIT 0x088 /* RW-4R */
#define NV_RUNLIST_SUBMIT_LENGTH 15:0 /* RWIUF */
#define NV_RUNLIST_SUBMIT_LENGTH_ZERO 0x00000000 /* RWI-V */
#define NV_RUNLIST_SUBMIT_LENGTH_MAX 0x0000ffff /* RW--V */
#define NV_RUNLIST_SUBMIT_OFFSET 31:16 /* RWIVF */
#define NV_RUNLIST_SUBMIT_OFFSET_ZERO 0x00000000 /* RWI-V */
#define NV_RUNLIST_SUBMIT_INFO 0x08C /* R--4R */
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID 13:0 /* */
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_HW 10:0 /* R-IUF */
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_HW_DEFAULT 0x00000000 /* R-I-V */
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_VALID 14:14 /* R-IUF */
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_VALID_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_TSGID_VALID_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_SUBMIT_INFO_PENDING 15:15 /* R-IVF */
#define NV_RUNLIST_SUBMIT_INFO_PENDING_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_SUBMIT_INFO_PENDING_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_OFFSET 31:16 /* R-IVF */
#define NV_RUNLIST_SUBMIT_INFO_PREEMPTED_OFFSET_ZERO 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK(i) (0x190+(i)*4) /* RW-4A */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION 3:0 /* RWIVF */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_DEFAULT_PRIV_LEVEL 15 /* RWI-V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_ALL_LEVELS_ENABLED 0x0000000F /* RW--V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_ONLY_LEVEL3_ENABLED 0x00000008 /* RW--V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0 0:0 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1 1:1 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL1_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2 2:2 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL2_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3 3:3 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL3_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION 7:4 /* RWIVF */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_DEFAULT_PRIV_LEVEL 15 /* RWI-V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_ALL_LEVELS_ENABLED 0x0F /* RW--V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_ONLY_LEVEL3_ENABLED 0x08 /* RW--V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0 4:4 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL0_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1 5:5 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL1_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2 6:6 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL2_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3 7:7 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_ENABLE 0x00000001 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_PROTECTION_LEVEL3_DISABLE 0x00000000 /* */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_VIOLATION 8:8 /* RWIVF */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_VIOLATION_REPORT_ERROR 0x00000001 /* RWI-V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_READ_VIOLATION_SOLDIER_ON 0x00000000 /* RW--V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_VIOLATION 9:9 /* RWIVF */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_VIOLATION_REPORT_ERROR 0x00000001 /* RWI-V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_WRITE_VIOLATION_SOLDIER_ON 0x00000000 /* RW--V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL 10:10 /* RWIVF */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_BLOCKED 0x00000001 /* RWI-V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_READ_CONTROL_LOWERED 0x00000000 /* RW--V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL 11:11 /* RWIVF */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_BLOCKED 0x00000001 /* RWI-V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_WRITE_CONTROL_LOWERED 0x00000000 /* RW--V */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_ENABLE 31:12 /* RWIVF */
#define NV_RUNLIST_INTR_PRIV_LEVEL_MASK_SOURCE_ENABLE_ALL_SOURCES_ENABLED 0x000FFFFF /* RWI-V */
#define NV_RUNLIST_INTR_VECTORID(i) (0x160+(i)*4) /* RW-4A */
#define NV_RUNLIST_INTR_VECTORID__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_VECTORID__PRIV_LEVEL_MASK "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /* */
#define NV_RUNLIST_INTR_VECTORID_VECTOR 11:0 /* RWXUF */
#define NV_RUNLIST_INTR_VECTORID_GSP 30:30 /* RWIUF */
#define NV_RUNLIST_INTR_VECTORID_GSP_DISABLE 0 /* RW--V */
#define NV_RUNLIST_INTR_VECTORID_GSP_ENABLE 1 /* RWI-V */
#define NV_RUNLIST_INTR_VECTORID_CPU 31:31 /* RWIUF */
#define NV_RUNLIST_INTR_VECTORID_CPU_DISABLE 0 /* RW--V */
#define NV_RUNLIST_INTR_VECTORID_CPU_ENABLE 1 /* RWI-V */
#define NV_RUNLIST_INTR_RETRIGGER(i) (0x180+(i)*4) /* -W-4A */
#define NV_RUNLIST_INTR_RETRIGGER__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_RETRIGGER__PRIV_LEVEL_MASK "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /* */
#define NV_RUNLIST_INTR_RETRIGGER_TRIGGER 0:0 /* -W-VF */
#define NV_RUNLIST_INTR_RETRIGGER_TRIGGER_TRUE 1 /* -W--V */
#define NV_RUNLIST_INTR_0 0x100 /* RW-4R */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0 0:0 /* RWIVF */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0_NOT_PENDING 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG0_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1 1:1 /* RWIVF */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1_NOT_PENDING 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG1_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2 2:2 /* RWIVF */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2_NOT_PENDING 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG2_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG(i) (i):(i) /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG__SIZE_1 3 /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG_NOT_PENDING 0x00000000 /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG_PENDING 0x00000001 /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_ENG_RESET 0x00000001 /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE(i) ((i)+1):((i)+1) /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE_NOT_PENDING 0x00000000 /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE_PENDING 0x00000001 /* */
#define NV_RUNLIST_INTR_0_CTXSW_TIMEOUT_GRCE_RESET 0x00000001 /* */
#define NV_RUNLIST_INTR_0_RUNLIST_IDLE 4:4 /* RWIVF */
#define NV_RUNLIST_INTR_0_RUNLIST_IDLE_NOT_PENDING 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_0_RUNLIST_IDLE_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_RUNLIST_IDLE_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE 5:5 /* RWXVF */
#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE_NOT_PENDING 0x00000000 /* R---V */
#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_RUNLIST_AND_ENG_IDLE_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE 6:6 /* RWXVF */
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_NOT_PENDING 0x00000000 /* R---V */
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE 7:7 /* RWXVF */
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE_NOT_PENDING 0x00000000 /* R---V */
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_RUNLIST_ACQUIRE_AND_ENG_IDLE_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_BAD_TSG 12:12 /* RWIVF */
#define NV_RUNLIST_INTR_0_BAD_TSG_NOT_PENDING 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_0_BAD_TSG_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_BAD_TSG_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE 8:8 /* RWIVF */
#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE_NOT_PENDING 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_TSG_PREEMPT_COMPLETE_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_0 16:16 /* R-XVF */
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_0_NOT_PENDING 0x00000000 /* R---V */
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_0_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_0 17:17 /* R-XVF */
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_0_NOT_PENDING 0x00000000 /* R---V */
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_0_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_1 18:18 /* R-XVF */
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_1_NOT_PENDING 0x00000000 /* R---V */
#define NV_RUNLIST_INTR_0_PBDMA0_INTR_TREE_1_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_1 19:19 /* R-XVF */
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_1_NOT_PENDING 0x00000000 /* R---V */
#define NV_RUNLIST_INTR_0_PBDMA1_INTR_TREE_1_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j(i,j) (16+(i)+(j)*2):(16+(i)+(j)*2) /* */
#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j__SIZE_2 2 /* */
#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j_NOT_PENDING 0x00000000 /* */
#define NV_RUNLIST_INTR_0_PBDMAi_INTR_TREE_j_PENDING 0x00000001 /* */
#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE 9:9 /* RWIVF */
#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE_NOT_PENDING 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE_PENDING 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_0_RUNLIST_PREEMPT_COMPLETE_RESET 0x00000001 /* -W--V */
#define NV_RUNLIST_INTR_0_RUNLIST_EVENT 9:9 /* */
#define NV_RUNLIST_INTR_0_MASK_SET 0x110 /* RW-4R */
#define NV_RUNLIST_INTR_0_MASK_SET_TSG_PREEMPT_COMPLETE 8:8 /* RWIVF */
#define NV_RUNLIST_INTR_0_MASK_SET_TSG_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_MASK_SET_TSG_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_MASK_CLEAR 0x118 /* RW-4R */
#define NV_RUNLIST_INTR_0_MASK_CLEAR_TSG_PREEMPT_COMPLETE 8:8 /* RWIVF */
#define NV_RUNLIST_INTR_0_MASK_CLEAR_TSG_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_MASK_CLEAR_TSG_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE(i) (0x120+(i)*8) /* RW-4A */
#define NV_RUNLIST_INTR_0_EN_SET_TREE__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE__PRIV_LEVEL_MASK "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG0 0:0 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG0_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG0_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG1 1:1 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG1_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG1_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG2 2:2 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG2_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG2_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG(i) (i):(i) /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG__SIZE_1 3 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG_DISABLED 0x00000000 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_ENG_ENABLED 0x00000001 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE(i) ((i)+1):((i)+1) /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE_DISABLED 0x00000000 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_CTXSW_TIMEOUT_GRCE_ENABLED 0x00000001 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_IDLE 4:4 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_IDLE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_IDLE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_AND_ENG_IDLE 5:5 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_AND_ENG_IDLE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_AND_ENG_IDLE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE 6:6 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE 7:7 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_TSG_PREEMPT_COMPLETE 8:8 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_TSG_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_TSG_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_PREEMPT_COMPLETE 9:9 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_RUNLIST_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_BAD_TSG 12:12 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_BAD_TSG_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_BAD_TSG_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_0 16:16 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_0_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_0_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_0 17:17 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_0_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_0_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_1 18:18 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_1_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA0_INTR_TREE_1_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_1 19:19 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_1_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMA1_INTR_TREE_1_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j(i,j) (16+(i)+(j)*2):(16+(i)+(j)*2) /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j__SIZE_2 2 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j_DISABLED 0x00000000 /* */
#define NV_RUNLIST_INTR_0_EN_SET_TREE_PBDMAi_INTR_TREE_j_ENABLED 0x00000001 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE(i) (0x140+(i)*8) /* RW-4A */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE__PRIV_LEVEL_MASK "NV_RUNLIST_INTR_PRIV_LEVEL_MASK" /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG0 0:0 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG0_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG0_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG1 1:1 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG1_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG1_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG2 2:2 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG2_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG2_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG(i) (i):(i) /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG__SIZE_1 3 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG_DISABLED 0x00000000 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_ENG_ENABLED 0x00000001 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE(i) ((i)+1):((i)+1) /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE_DISABLED 0x00000000 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_CTXSW_TIMEOUT_GRCE_ENABLED 0x00000001 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_IDLE 4:4 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_IDLE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_IDLE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_AND_ENG_IDLE 5:5 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_AND_ENG_IDLE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_AND_ENG_IDLE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE 6:6 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE 7:7 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_ACQUIRE_AND_ENG_IDLE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_TSG_PREEMPT_COMPLETE 8:8 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_TSG_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_TSG_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_PREEMPT_COMPLETE 9:9 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_PREEMPT_COMPLETE_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_RUNLIST_PREEMPT_COMPLETE_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_BAD_TSG 12:12 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_BAD_TSG_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_BAD_TSG_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_0 16:16 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_0_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_0_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_0 17:17 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_0_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_0_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_1 18:18 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_1_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA0_INTR_TREE_1_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_1 19:19 /* RWIVF */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_1_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMA1_INTR_TREE_1_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j(i,j) (16+(i)+(j)*2):(16+(i)+(j)*2) /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j__SIZE_1 2 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j__SIZE_2 2 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j_DISABLED 0x00000000 /* */
#define NV_RUNLIST_INTR_0_EN_CLEAR_TREE_PBDMAi_INTR_TREE_j_ENABLED 0x00000001 /* */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO(i) (0x224+(i)*64) /* R--4A */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_PREV_TSGID 13:0 /* R-IUF */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_PREV_TSGID_DEFAULT 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE 15:14 /* R-IUF */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE_LOAD 0x00000001 /* R-I-V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE_SAVE 0x00000002 /* R---V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_CTXSW_STATE_SWITCH 0x00000003 /* R---V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_NEXT_TSGID 29:16 /* R-IUF */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_NEXT_TSGID_DEFAULT 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS 31:30 /* R-IUF */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_AWAITING_ACK 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_ENG_WAS_RESET 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_ACK_RECEIVED 0x00000002 /* R---V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_INFO_STATUS_DROPPED_TIMEOUT 0x00000003 /* R---V */
#define NV_RUNLIST_INFO 0x108 /* R--4R */
#define NV_RUNLIST_INFO_RUNLIST_IDLE_INTR_ARM 0:0 /* R-IUF */
#define NV_RUNLIST_INFO_RUNLIST_IDLE_INTR_ARM_UNARMED 0x00000000 /* R-I-V */
#define NV_RUNLIST_INFO_RUNLIST_IDLE_INTR_ARM_ARMED 0x00000001 /* R---V */
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_INTR_ARM 1:1 /* R-IUF */
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_INTR_ARM_UNARMED 0x00000000 /* R-I-V */
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_INTR_ARM_ARMED 0x00000001 /* R---V */
#define NV_RUNLIST_INFO_RUNLIST_AND_ENG_IDLE_INTR_ARM 4:4 /* R-IUF */
#define NV_RUNLIST_INFO_RUNLIST_AND_ENG_IDLE_INTR_ARM_UNARMED 0x00000000 /* R-I-V */
#define NV_RUNLIST_INFO_RUNLIST_AND_ENG_IDLE_INTR_ARM_ARMED 0x00000001 /* R---V */
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_AND_ENG_IDLE_INTR_ARM 5:5 /* R-IUF */
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_AND_ENG_IDLE_INTR_ARM_UNARMED 0x00000000 /* R-I-V */
#define NV_RUNLIST_INFO_RUNLIST_ACQUIRE_AND_ENG_IDLE_INTR_ARM_ARMED 0x00000001 /* R---V */
#define NV_RUNLIST_INFO_ENG_IDLE 8:8 /* R-IUF */
#define NV_RUNLIST_INFO_ENG_IDLE_FALSE 0x00000000 /* R---V */
#define NV_RUNLIST_INFO_ENG_IDLE_TRUE 0x00000001 /* R-I-V */
#define NV_RUNLIST_INFO_RUNLIST_IDLE 9:9 /* R-IUF */
#define NV_RUNLIST_INFO_RUNLIST_IDLE_FALSE 0x00000000 /* R---V */
#define NV_RUNLIST_INFO_RUNLIST_IDLE_TRUE 0x00000001 /* R-I-V */
#define NV_RUNLIST_INFO_RUNLIST_FETCH_STATUS 10:10 /* R-IVF */
#define NV_RUNLIST_INFO_RUNLIST_FETCH_STATUS_IDLE 0x00000000 /* R-I-V */
#define NV_RUNLIST_INFO_RUNLIST_FETCH_STATUS_BUSY 0x00000001 /* R---V */
#define NV_RUNLIST_INFO_ACQUIRE_STILL_PENDING 12:12 /* R-IUF */
#define NV_RUNLIST_INFO_ACQUIRE_STILL_PENDING_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_INFO_ACQUIRE_STILL_PENDING_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_INFO_RUNLIST_FETCH_NACKED 13:13 /* R-IUF */
#define NV_RUNLIST_INFO_RUNLIST_FETCH_NACKED_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_INFO_RUNLIST_FETCH_NACKED_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_BAD_TSG 0x174 /* R--4R */
#define NV_RUNLIST_INTR_BAD_TSG_CODE 3:0 /* R-IVF */
#define NV_RUNLIST_INTR_BAD_TSG_CODE_NO_ERROR 0x00000000 /* R-I-V */
#define NV_RUNLIST_INTR_BAD_TSG_CODE_ZERO_LENGTH_TSG 0x00000001 /* R---V */
#define NV_RUNLIST_INTR_BAD_TSG_CODE_MAX_LENGTH_EXCEEDED 0x00000002 /* R---V */
#define NV_RUNLIST_INTR_BAD_TSG_CODE_RUNLIST_OVERFLOW 0x00000003 /* R---V */
#define NV_RUNLIST_INTR_BAD_TSG_CODE_EXPECTED_A_CHID_ENTRY 0x00000004 /* R---V */
#define NV_RUNLIST_INTR_BAD_TSG_CODE_EXPECTED_A_TSG_HEADER 0x00000005 /* R---V */
#define NV_RUNLIST_INTR_BAD_TSG_CODE_INVALID_RUNQUEUE 0x00000006 /* R---V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG(i) (0x220+(i)*64) /* RW-4A */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_PERIOD 30:0 /* RWIVF */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_PERIOD_INIT 0x003fffff /* RWI-V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_PERIOD_MAX 0x7fffffff /* RW--V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_DETECTION 31:31 /* RWIVF */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_DETECTION_DISABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG_DETECTION_ENABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG 0x050 /* RW-4R */
#define NV_RUNLIST_BLKCG_IDLE_CG_DLY_CNT 5:0 /* RWIVF */
#define NV_RUNLIST_BLKCG_IDLE_CG_DLY_CNT_INIT 0x00000000 /* RWI-V */
#define NV_RUNLIST_BLKCG_IDLE_CG_DLY_CNT__PROD 0x00000002 /* RW--V */
#define NV_RUNLIST_BLKCG_IDLE_CG_EN 6:6 /* RWIVF */
#define NV_RUNLIST_BLKCG_IDLE_CG_EN_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_BLKCG_IDLE_CG_EN_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_BLKCG_IDLE_CG_EN__PROD 0x00000001 /* RW--V */
#define NV_RUNLIST_BLKCG_STATE_CG_EN 7:7 /* */
#define NV_RUNLIST_BLKCG_STATE_CG_EN_ENABLED 0x00000001 /* */
#define NV_RUNLIST_BLKCG_STATE_CG_EN_DISABLED 0x00000000 /* */
#define NV_RUNLIST_BLKCG_STATE_CG_EN__PROD 0x00000000 /* */
#define NV_RUNLIST_BLKCG_STALL_CG_DLY_CNT 13:8 /* */
#define NV_RUNLIST_BLKCG_STALL_CG_DLY_CNT_INIT 0x00000000 /* */
#define NV_RUNLIST_BLKCG_STALL_CG_DLY_CNT__PROD 0x00000002 /* */
#define NV_RUNLIST_BLKCG_STALL_CG_EN 14:14 /* RWIVF */
#define NV_RUNLIST_BLKCG_STALL_CG_EN_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_BLKCG_STALL_CG_EN_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_BLKCG_STALL_CG_EN__PROD 0x00000001 /* RW--V */
#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN 15:15 /* */
#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN_ENABLED 0x00000001 /* */
#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN_DISABLED 0x00000000 /* */
#define NV_RUNLIST_BLKCG_QUIESCENT_CG_EN__PROD 0x00000001 /* */
#define NV_RUNLIST_BLKCG_WAKEUP_DLY_CNT 19:16 /* RWIVF */
#define NV_RUNLIST_BLKCG_WAKEUP_DLY_CNT_INIT 0x00000000 /* RWI-V */
#define NV_RUNLIST_BLKCG_WAKEUP_DLY_CNT__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG_THROT_CLK_CNT 23:20 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_CNT_INIT 0x0000000f /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_CNT_FULLSPEED 0x0000000f /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_CNT__PROD 0x00000000 /* */
#define NV_RUNLIST_BLKCG_DI_DT_SKEW_VAL 27:24 /* */
#define NV_RUNLIST_BLKCG_DI_DT_SKEW_VAL_INIT 0x00000000 /* */
#define NV_RUNLIST_BLKCG_DI_DT_SKEW_VAL__PROD 0x00000000 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_EN 28:28 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_EN_ENABLED 0x00000001 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_EN_DISABLED 0x00000000 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_EN__PROD 0x00000000 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER 29:29 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER_EN 0x00000001 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER_DIS 0x00000000 /* */
#define NV_RUNLIST_BLKCG_THROT_CLK_SW_OVER__PROD 0x00000000 /* */
#define NV_RUNLIST_BLKCG_PAUSE_CG_EN 30:30 /* */
#define NV_RUNLIST_BLKCG_PAUSE_CG_EN_ENABLED 0x00000001 /* */
#define NV_RUNLIST_BLKCG_PAUSE_CG_EN_DISABLED 0x00000000 /* */
#define NV_RUNLIST_BLKCG_PAUSE_CG_EN__PROD 0x00000000 /* */
#define NV_RUNLIST_BLKCG_HALT_CG_EN 31:31 /* */
#define NV_RUNLIST_BLKCG_HALT_CG_EN_ENABLED 0x00000001 /* */
#define NV_RUNLIST_BLKCG_HALT_CG_EN_DISABLED 0x00000000 /* */
#define NV_RUNLIST_BLKCG_HALT_CG_EN__PROD 0x00000000 /* */
#define NV_RUNLIST_BLKCG1 0x054 /* RW-4R */
#define NV_RUNLIST_BLKCG1_MONITOR_CG_EN 0:0 /* RWIVF */
#define NV_RUNLIST_BLKCG1_MONITOR_CG_EN_ENABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_BLKCG1_MONITOR_CG_EN_DISABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG 16:1 /* */
#define NV_RUNLIST_BLKCG1_SLCG_ENABLED 0x00000000 /* */
#define NV_RUNLIST_BLKCG1_SLCG_DISABLED 0x0000FFFF /* */
#define NV_RUNLIST_BLKCG1_SLCG__PROD 0x00000001 /* */
#define NV_RUNLIST_BLKCG1_SLCG_RLP 1:1 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_RLP_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_RLP_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_RLP__PROD 0x00000001 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_EVH 3:3 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_EVH_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_EVH_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_EVH__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_EISM 7:7 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_EISM_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_EISM_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_EISM__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_LB 8:8 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_LB_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_LB_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_LB__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL 9:9 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_CTL__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP 10:10 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_GP__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB 11:11 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_PBDMA_PB__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_PRI 13:13 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_PRI_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_PRI_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_PRI__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_CHSW 14:14 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_CHSW_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_CHSW_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_CHSW__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_XBAR 15:15 /* RWIVF */
#define NV_RUNLIST_BLKCG1_SLCG_XBAR_ENABLED 0x00000000 /* RW--V */
#define NV_RUNLIST_BLKCG1_SLCG_XBAR_DISABLED 0x00000001 /* RWI-V */
#define NV_RUNLIST_BLKCG1_SLCG_XBAR__PROD 0x00000000 /* RW--V */
#define NV_RUNLIST_SLCG_MISC 0x05C /* RW-4R */
#define NV_RUNLIST_SLCG_MISC_EXTRA_BUSY_CLKS 3:0 /* RWIVF */
#define NV_RUNLIST_SLCG_MISC_EXTRA_BUSY_CLKS_ZERO 0x00000000 /* RWI-V */
#define NV_RUNLIST_INTERNAL_DOORBELL 0x090 /* -W-4R */
#define NV_RUNLIST_INTERNAL_DOORBELL_CHID 11:0 /* */
#define NV_RUNLIST_INTERNAL_DOORBELL_CHID_HW 10:0 /* -WXUF */
#define NV_RUNLIST_INTERNAL_DOORBELL_GFID 21:16 /* -WXUF */
#define NV_RUNLIST_SCHED_DISABLE 0x094 /* RW-4R */
#define NV_RUNLIST_SCHED_DISABLE_RUNLIST 0:0 /* RWIVF */
#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_ENABLED 0x00000000 /* RWI-V */
#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_DISABLED 0x00000001 /* RW--V */
#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_FALSE 0x00000000 /* */
#define NV_RUNLIST_SCHED_DISABLE_RUNLIST_TRUE 0x00000001 /* */
#define NV_RUNLIST_PREEMPT 0x098 /* RW-4R */
#define NV_RUNLIST_PREEMPT_ID 11:0 /* */
#define NV_RUNLIST_PREEMPT_ID_HW 10:0 /* RWIUF */
#define NV_RUNLIST_PREEMPT_ID_HW_NULL 0x00000000 /* RWI-V */
#define NV_RUNLIST_PREEMPT_TSG_PREEMPT_PENDING 20:20 /* R-IVF */
#define NV_RUNLIST_PREEMPT_TSG_PREEMPT_PENDING_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_PREEMPT_TSG_PREEMPT_PENDING_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_PREEMPT_RUNLIST_PREEMPT_PENDING 21:21 /* R-IVF */
#define NV_RUNLIST_PREEMPT_RUNLIST_PREEMPT_PENDING_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_PREEMPT_RUNLIST_PREEMPT_PENDING_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_PREEMPT_TYPE 25:24 /* RWIVF */
#define NV_RUNLIST_PREEMPT_TYPE_RUNLIST 0x00000000 /* RWI-V */
#define NV_RUNLIST_PREEMPT_TYPE_TSG 0x00000001 /* RW--V */
#define NV_RUNLIST_ENGINE_STATUS0(i) (0x200+(i)*64) /* R--4A */
#define NV_RUNLIST_ENGINE_STATUS0__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_STATUS0_TSGID 11:0 /* */
#define NV_RUNLIST_ENGINE_STATUS0_TSGID_HW 10:0 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS 15:13 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_INVALID 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_VALID 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_CTXSW_SAVE 0x00000005 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_CTXSW_LOAD 0x00000006 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS0_CTX_STATUS_CTXSW_SWITCH 0x00000007 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS0_CTX 13:13 /* */
#define NV_RUNLIST_ENGINE_STATUS0_CTX_INVALID 0x00000000 /* */
#define NV_RUNLIST_ENGINE_STATUS0_CTX_VALID 0x00000001 /* */
#define NV_RUNLIST_ENGINE_STATUS0_CTXLOAD 14:14 /* */
#define NV_RUNLIST_ENGINE_STATUS0_CTXLOAD_NOT_IN_PROGRESS 0x00000000 /* */
#define NV_RUNLIST_ENGINE_STATUS0_CTXLOAD_IN_PROGRESS 0x00000001 /* */
#define NV_RUNLIST_ENGINE_STATUS0_CTXSW 15:15 /* */
#define NV_RUNLIST_ENGINE_STATUS0_CTXSW_NOT_IN_PROGRESS 0x00000000 /* */
#define NV_RUNLIST_ENGINE_STATUS0_CTXSW_IN_PROGRESS 0x00000001 /* */
#define NV_RUNLIST_ENGINE_STATUS0_NEXT_TSGID 27:16 /* */
#define NV_RUNLIST_ENGINE_STATUS0_NEXT_TSGID_HW 26:16 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS0_ENG_RELOAD 29:29 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS0_ENG_RELOAD_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS0_ENG_RELOAD_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS0_FAULTED 30:30 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS0_FAULTED_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS0_FAULTED_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS0_ENGINE 31:31 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS0_ENGINE_IDLE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS0_ENGINE_BUSY 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS1(i) (0x204+(i)*64) /* R--4A */
#define NV_RUNLIST_ENGINE_STATUS1__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_STATUS1_GFID 5:0 /* R-XVF */
#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID 13:8 /* R-XVF */
#define NV_RUNLIST_ENGINE_STATUS1_INTR_ID 20:16 /* R-XVF */
#define NV_RUNLIST_ENGINE_STATUS1_GFID_VALID 30:30 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS1_GFID_VALID_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS1_GFID_VALID_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID_VALID 31:31 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID_VALID_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS1_NEXT_GFID_VALID_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL(i,j) (0x208+(i)*64+(j)*4) /* R--4A */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL__SIZE_2 2 /* */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_CHID 11:0 /* */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_CHID_HW 10:0 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_VALID 15:15 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_VALID_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_VALID_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_CHID 27:16 /* */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_CHID_HW 26:16 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_VALID 31:31 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_VALID_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_CHANNEL_NEXT_VALID_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG(i) (0x228+(i)*64) /* R--4A */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_IF_EN 0:0 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_IF_EN_DISABLED 0x00000000 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_IF_EN_ENABLED 0x00000001 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_NO_CREDITS 8:8 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_NO_CREDITS_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_NO_CREDITS_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_WFI 12:12 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_WFI_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE0_WFI_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_NO_CREDITS 16:16 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_NO_CREDITS_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_NO_CREDITS_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_WFI 20:20 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_WFI_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_PIPE1_WFI_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_DEBUG_ENGINE_ID 29:24 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_INST(i) (0x210+(i)*64) /* R--4A */
#define NV_RUNLIST_ENGINE_STATUS_INST__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET 1:0 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET_VID_MEM 0x00000000 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET_SYS_MEM_COHERENT 0x00000002 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_INST_TARGET_SYS_MEM_NONCOHERENT 0x00000003 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_INST_VALID 11:11 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_INST_VALID_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_INST_VALID_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_INST_PTR_LO 31:12 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_INST_HI(i) (0x214+(i)*64) /* R--4A */
#define NV_RUNLIST_ENGINE_STATUS_INST_HI__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_STATUS_INST_HI_PTR_HI 31:0 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_INST_HI_PTR_HI_ZERO 0x00000000 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST(i) (0x218+(i)*64) /* R--4A */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET 1:0 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET_VID_MEM 0x00000000 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET_SYS_MEM_COHERENT 0x00000002 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_TARGET_SYS_MEM_NONCOHERENT 0x00000003 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_VALID 11:11 /* R-IVF */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_VALID_FALSE 0x00000000 /* R-I-V */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_VALID_TRUE 0x00000001 /* R---V */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_PTR_LO 31:12 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI(i) (0x21C+(i)*64) /* R--4A */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI__SIZE_1 3 /* */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI_PTR_HI 31:0 /* R-XUF */
#define NV_RUNLIST_ENGINE_STATUS_NEXT_INST_HI_PTR_HI_ZERO 0x00000000 /* R---V */
#endif // __ga100_dev_runlist_h__

View File

@@ -0,0 +1,339 @@
/*******************************************************************************
Copyright (c) 2003-2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __gm107_dev_mmu_h__
#define __gm107_dev_mmu_h__
/* This file is autogenerated. Do not edit */
#define NV_MMU_PDE /* ----G */
#define NV_MMU_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
#define NV_MMU_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
#define NV_MMU_PDE_SIZE_FULL 0x00000000 /* RW--V */
#define NV_MMU_PDE_SIZE_HALF 0x00000001 /* RW--V */
#define NV_MMU_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
#define NV_MMU_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
#define NV_MMU_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
#define NV_MMU_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
#define NV_MMU_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_PDE__SIZE 8
#define NV_MMU_PTE /* ----G */
#define NV_MMU_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
#define NV_MMU_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
#define NV_MMU_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
#define NV_MMU_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
#define NV_MMU_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
#define NV_MMU_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
#define NV_MMU_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PTE_LOCK (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_PTE_LOCK_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_LOCK_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_COMPTAGLINE (1*32+28):(1*32+12) /* RWXVF */
#define NV_MMU_PTE_READ_DISABLE (1*32+30):(1*32+30) /* RWXVF */
#define NV_MMU_PTE_READ_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_READ_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_WRITE_DISABLE (1*32+31):(1*32+31) /* RWXVF */
#define NV_MMU_PTE_WRITE_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_WRITE_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_PTE__SIZE 8
#define NV_MMU_PTE_COMPTAGS_NONE 0x0 /* */
#define NV_MMU_PTE_COMPTAGS_1 0x1 /* */
#define NV_MMU_PTE_COMPTAGS_2 0x2 /* */
#define NV_MMU_PTE_KIND (1*32+11):(1*32+4) /* RWXVF */
#define NV_MMU_PTE_KIND_INVALID 0xff /* R---V */
#define NV_MMU_PTE_KIND_PITCH 0x00 /* R---V */
#define NV_MMU_PTE_KIND_Z16 0x01 /* R---V */
#define NV_MMU_PTE_KIND_Z16_2C 0x02 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS2_2C 0x03 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS4_2C 0x04 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS8_2C 0x05 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS16_2C 0x06 /* R---V */
#define NV_MMU_PTE_KIND_Z16_2Z 0x07 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS2_2Z 0x08 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS4_2Z 0x09 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS8_2Z 0x0a /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS16_2Z 0x0b /* R---V */
#define NV_MMU_PTE_KIND_Z16_4CZ 0x0c /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS2_4CZ 0x0d /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS4_4CZ 0x0e /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS8_4CZ 0x0f /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS16_4CZ 0x10 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24 0x11 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_1Z 0x12 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS2_1Z 0x13 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS4_1Z 0x14 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS8_1Z 0x15 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS16_1Z 0x16 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_2CZ 0x17 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS2_2CZ 0x18 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS4_2CZ 0x19 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS8_2CZ 0x1a /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS16_2CZ 0x1b /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_2CS 0x1c /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS2_2CS 0x1d /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS4_2CS 0x1e /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS8_2CS 0x1f /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS16_2CS 0x20 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_4CSZV 0x21 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS2_4CSZV 0x22 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS4_4CSZV 0x23 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS8_4CSZV 0x24 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS16_4CSZV 0x25 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12 0x26 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4 0x27 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8 0x28 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24 0x29 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_1ZV 0x2e /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_1ZV 0x2f /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_1ZV 0x30 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_1ZV 0x31 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2CS 0x32 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2CS 0x33 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2CS 0x34 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2CS 0x35 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2CZV 0x3a /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2CZV 0x3b /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2CZV 0x3c /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2CZV 0x3d /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2ZV 0x3e /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2ZV 0x3f /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2ZV 0x40 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2ZV 0x41 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_4CSZV 0x42 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_4CSZV 0x43 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_4CSZV 0x44 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_4CSZV 0x45 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8 0x46 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_1Z 0x47 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS2_1Z 0x48 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS4_1Z 0x49 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS8_1Z 0x4a /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS16_1Z 0x4b /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_2CS 0x4c /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS2_2CS 0x4d /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS4_2CS 0x4e /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS8_2CS 0x4f /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS16_2CS 0x50 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_2CZ 0x51 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS2_2CZ 0x52 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS4_2CZ 0x53 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS8_2CZ 0x54 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS16_2CZ 0x55 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_4CSZV 0x56 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS2_4CSZV 0x57 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS4_4CSZV 0x58 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS8_4CSZV 0x59 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS16_4CSZV 0x5a /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12 0x5b /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4 0x5c /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8 0x5d /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24 0x5e /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_1ZV 0x63 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_1ZV 0x64 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_1ZV 0x65 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_1ZV 0x66 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2CS 0x67 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2CS 0x68 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2CS 0x69 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2CS 0x6a /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2CZV 0x6f /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2CZV 0x70 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2CZV 0x71 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2CZV 0x72 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2ZV 0x73 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2ZV 0x74 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2ZV 0x75 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2ZV 0x76 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_4CSZV 0x77 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_4CSZV 0x78 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_4CSZV 0x79 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_4CSZV 0x7a /* R---V */
#define NV_MMU_PTE_KIND_ZF32 0x7b /* R---V */
#define NV_MMU_PTE_KIND_ZF32_1Z 0x7c /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS2_1Z 0x7d /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS4_1Z 0x7e /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS8_1Z 0x7f /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS16_1Z 0x80 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_2CS 0x81 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS2_2CS 0x82 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS4_2CS 0x83 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS8_2CS 0x84 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS16_2CS 0x85 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_2CZ 0x86 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS2_2CZ 0x87 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS4_2CZ 0x88 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS8_2CZ 0x89 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS16_2CZ 0x8a /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12 0x8b /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4 0x8c /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8 0x8d /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24 0x8e /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1CS 0x8f /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1CS 0x90 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1CS 0x91 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1CS 0x92 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1ZV 0x97 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1ZV 0x98 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1ZV 0x99 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1ZV 0x9a /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1CZV 0x9b /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1CZV 0x9c /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1CZV 0x9d /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1CZV 0x9e /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_2CS 0x9f /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_2CS 0xa0 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_2CS 0xa1 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_2CS 0xa2 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_2CSZV 0xa3 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_2CSZV 0xa4 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_2CSZV 0xa5 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_2CSZV 0xa6 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12 0xa7 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4 0xa8 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8 0xa9 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24 0xaa /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1CS 0xab /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1CS 0xac /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1CS 0xad /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1CS 0xae /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1ZV 0xb3 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1ZV 0xb4 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1ZV 0xb5 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1ZV 0xb6 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1CZV 0xb7 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1CZV 0xb8 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1CZV 0xb9 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1CZV 0xba /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_2CS 0xbb /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_2CS 0xbc /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_2CS 0xbd /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_2CS 0xbe /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_2CSZV 0xbf /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_2CSZV 0xc0 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_2CSZV 0xc1 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_2CSZV 0xc2 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8 0xc3 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_1CS 0xc4 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_1CS 0xc5 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_1CS 0xc6 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_1CS 0xc7 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_1CS 0xc8 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_2CSZV 0xce /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_2CSZV 0xcf /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_2CSZV 0xd0 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_2CSZV 0xd1 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_2CSZV 0xd2 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_2CS 0xd3 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_2CS 0xd4 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_2CS 0xd5 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_2CS 0xd6 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_2CS 0xd7 /* R---V */
#define NV_MMU_PTE_KIND_GENERIC_16BX2 0xfe /* R---V */
#define NV_MMU_PTE_KIND_C32_2C 0xd8 /* R---V */
#define NV_MMU_PTE_KIND_C32_2CBR 0xd9 /* R---V */
#define NV_MMU_PTE_KIND_C32_2CBA 0xda /* R---V */
#define NV_MMU_PTE_KIND_C32_2CRA 0xdb /* R---V */
#define NV_MMU_PTE_KIND_C32_2BRA 0xdc /* R---V */
#define NV_MMU_PTE_KIND_C32_MS2_2C 0xdd /* R---V */
#define NV_MMU_PTE_KIND_C32_MS2_2CBR 0xde /* R---V */
#define NV_MMU_PTE_KIND_C32_MS2_2CRA 0xcc /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2C 0xdf /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2CBR 0xe0 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2CBA 0xe1 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2CRA 0xe2 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2BRA 0xe3 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS8_MS16_2C 0xe4 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS8_MS16_2CRA 0xe5 /* R---V */
#define NV_MMU_PTE_KIND_C64_2C 0xe6 /* R---V */
#define NV_MMU_PTE_KIND_C64_2CBR 0xe7 /* R---V */
#define NV_MMU_PTE_KIND_C64_2CBA 0xe8 /* R---V */
#define NV_MMU_PTE_KIND_C64_2CRA 0xe9 /* R---V */
#define NV_MMU_PTE_KIND_C64_2BRA 0xea /* R---V */
#define NV_MMU_PTE_KIND_C64_MS2_2C 0xeb /* R---V */
#define NV_MMU_PTE_KIND_C64_MS2_2CBR 0xec /* R---V */
#define NV_MMU_PTE_KIND_C64_MS2_2CRA 0xcd /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2C 0xed /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2CBR 0xee /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2CBA 0xef /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2CRA 0xf0 /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2BRA 0xf1 /* R---V */
#define NV_MMU_PTE_KIND_C64_MS8_MS16_2C 0xf2 /* R---V */
#define NV_MMU_PTE_KIND_C64_MS8_MS16_2CRA 0xf3 /* R---V */
#define NV_MMU_PTE_KIND_C128_2C 0xf4 /* R---V */
#define NV_MMU_PTE_KIND_C128_2CR 0xf5 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS2_2C 0xf6 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS2_2CR 0xf7 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS4_2C 0xf8 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS4_2CR 0xf9 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS8_MS16_2C 0xfa /* R---V */
#define NV_MMU_PTE_KIND_C128_MS8_MS16_2CR 0xfb /* R---V */
#define NV_MMU_PTE_KIND_X8C24 0xfc /* R---V */
#define NV_MMU_PTE_KIND_PITCH_NO_SWIZZLE 0xfd /* R---V */
#define NV_MMU_PTE_KIND_SMSKED_MESSAGE 0xca /* R---V */
#define NV_MMU_PTE_KIND_SMHOST_MESSAGE 0xcb /* R---V */
#endif // __gm107_dev_mmu_h__

View File

@@ -0,0 +1,203 @@
/*******************************************************************************
Copyright (c) 2003-2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __gp100_dev_fault_h__
#define __gp100_dev_fault_h__
/* This file is autogenerated. Do not edit */
#define NV_PFAULT_MMU_ENG_ID_GRAPHICS 0 /* */
#define NV_PFAULT_MMU_ENG_ID_DISPLAY 1 /* */
#define NV_PFAULT_MMU_ENG_ID_IFB 3 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1 4 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2 5 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST0 6 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST1 7 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST2 8 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST3 9 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST4 10 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST5 11 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST6 12 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST7 13 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST8 14 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST9 15 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST10 16 /* */
#define NV_PFAULT_MMU_ENG_ID_SEC 18 /* */
#define NV_PFAULT_MMU_ENG_ID_PERF 19 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC 2 /* */
#define NV_PFAULT_MMU_ENG_ID_GRCOPY 27 /* */
#define NV_PFAULT_MMU_ENG_ID_CE0 21 /* */
#define NV_PFAULT_MMU_ENG_ID_CE1 22 /* */
#define NV_PFAULT_MMU_ENG_ID_CE2 27 /* */
#define NV_PFAULT_MMU_ENG_ID_CE3 28 /* */
#define NV_PFAULT_MMU_ENG_ID_CE4 29 /* */
#define NV_PFAULT_MMU_ENG_ID_CE5 30 /* */
#define NV_PFAULT_MMU_ENG_ID_PWR_PMU 23 /* */
#define NV_PFAULT_MMU_ENG_ID_PTP 24 /* */
#define NV_PFAULT_MMU_ENG_ID_NVENC 25 /* */
#define NV_PFAULT_MMU_ENG_ID_NVENC0 25 /* */
#define NV_PFAULT_MMU_ENG_ID_NVENC1 17 /* */
#define NV_PFAULT_MMU_ENG_ID_NVENC2 20 /* */
#define NV_PFAULT_MMU_ENG_ID_PHYSICAL 31 /* */
#define NV_PFAULT_FAULT_TYPE 4:0 /* */
#define NV_PFAULT_FAULT_TYPE_PDE 0x00000000 /* */
#define NV_PFAULT_FAULT_TYPE_PDE_SIZE 0x00000001 /* */
#define NV_PFAULT_FAULT_TYPE_PTE 0x00000002 /* */
#define NV_PFAULT_FAULT_TYPE_VA_LIMIT_VIOLATION 0x00000003 /* */
#define NV_PFAULT_FAULT_TYPE_UNBOUND_INST_BLOCK 0x00000004 /* */
#define NV_PFAULT_FAULT_TYPE_PRIV_VIOLATION 0x00000005 /* */
#define NV_PFAULT_FAULT_TYPE_RO_VIOLATION 0x00000006 /* */
#define NV_PFAULT_FAULT_TYPE_PITCH_MASK_VIOLATION 0x00000008 /* */
#define NV_PFAULT_FAULT_TYPE_WORK_CREATION 0x00000009 /* */
#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_APERTURE 0x0000000a /* */
#define NV_PFAULT_FAULT_TYPE_COMPRESSION_FAILURE 0x0000000b /* */
#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_KIND 0x0000000c /* */
#define NV_PFAULT_FAULT_TYPE_REGION_VIOLATION 0x0000000d /* */
#define NV_PFAULT_FAULT_TYPE_POISONED 0x0000000e /* */
#define NV_PFAULT_FAULT_TYPE_ATOMIC_VIOLATION 0x0000000f /* */
#define NV_PFAULT_CLIENT 14:8 /* */
#define NV_PFAULT_CLIENT_GPC_L1_0 0x00000000 /* */
#define NV_PFAULT_CLIENT_GPC_T1_0 0x00000001 /* */
#define NV_PFAULT_CLIENT_GPC_PE_0 0x00000002 /* */
#define NV_PFAULT_CLIENT_GPC_L1_1 0x00000003 /* */
#define NV_PFAULT_CLIENT_GPC_T1_1 0x00000004 /* */
#define NV_PFAULT_CLIENT_GPC_PE_1 0x00000005 /* */
#define NV_PFAULT_CLIENT_GPC_L1_2 0x00000006 /* */
#define NV_PFAULT_CLIENT_GPC_T1_2 0x00000007 /* */
#define NV_PFAULT_CLIENT_GPC_PE_2 0x00000008 /* */
#define NV_PFAULT_CLIENT_GPC_L1_3 0x00000009 /* */
#define NV_PFAULT_CLIENT_GPC_T1_3 0x0000000A /* */
#define NV_PFAULT_CLIENT_GPC_PE_3 0x0000000B /* */
#define NV_PFAULT_CLIENT_GPC_RAST 0x0000000C /* */
#define NV_PFAULT_CLIENT_GPC_GCC 0x0000000D /* */
#define NV_PFAULT_CLIENT_GPC_GPCCS 0x0000000E /* */
#define NV_PFAULT_CLIENT_GPC_PROP_0 0x0000000F /* */
#define NV_PFAULT_CLIENT_GPC_PROP_1 0x00000010 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_2 0x00000011 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_3 0x00000012 /* */
#define NV_PFAULT_CLIENT_GPC_L1_4 0x00000014 /* */
#define NV_PFAULT_CLIENT_GPC_T1_4 0x00000015 /* */
#define NV_PFAULT_CLIENT_GPC_PE_4 0x00000016 /* */
#define NV_PFAULT_CLIENT_GPC_L1_5 0x00000017 /* */
#define NV_PFAULT_CLIENT_GPC_T1_5 0x00000018 /* */
#define NV_PFAULT_CLIENT_GPC_PE_5 0x00000019 /* */
#define NV_PFAULT_CLIENT_GPC_L1_6 0x0000001A /* */
#define NV_PFAULT_CLIENT_GPC_T1_6 0x0000001B /* */
#define NV_PFAULT_CLIENT_GPC_PE_6 0x0000001C /* */
#define NV_PFAULT_CLIENT_GPC_L1_7 0x0000001D /* */
#define NV_PFAULT_CLIENT_GPC_T1_7 0x0000001E /* */
#define NV_PFAULT_CLIENT_GPC_PE_7 0x0000001F /* */
#define NV_PFAULT_CLIENT_GPC_L1_8 0x00000020 /* */
#define NV_PFAULT_CLIENT_GPC_T1_8 0x00000021 /* */
#define NV_PFAULT_CLIENT_GPC_PE_8 0x00000022 /* */
#define NV_PFAULT_CLIENT_GPC_L1_9 0x00000023 /* */
#define NV_PFAULT_CLIENT_GPC_T1_9 0x00000024 /* */
#define NV_PFAULT_CLIENT_GPC_PE_9 0x00000025 /* */
#define NV_PFAULT_CLIENT_GPC_L1_10 0x00000026 /* */
#define NV_PFAULT_CLIENT_GPC_T1_10 0x00000027 /* */
#define NV_PFAULT_CLIENT_GPC_PE_10 0x00000028 /* */
#define NV_PFAULT_CLIENT_GPC_L1_11 0x00000029 /* */
#define NV_PFAULT_CLIENT_GPC_T1_11 0x0000002A /* */
#define NV_PFAULT_CLIENT_GPC_PE_11 0x0000002B /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_0 0x00000030 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_1 0x00000031 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_2 0x00000032 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_3 0x00000033 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_4 0x00000034 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_5 0x00000035 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_6 0x00000036 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_7 0x00000037 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_8 0x00000038 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_9 0x00000039 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_10 0x0000003A /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_11 0x0000003B /* */
#define NV_PFAULT_CLIENT_GPC_GPM 0x00000013 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_0 0x00000014 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_1 0x00000015 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_2 0x00000016 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_3 0x00000017 /* */
#define NV_PFAULT_CLIENT_GPC_RGG_UTLB 0x00000018 /* */
#define NV_PFAULT_CLIENT_HUB_CE0 0x00000001 /* */
#define NV_PFAULT_CLIENT_HUB_CE1 0x00000002 /* */
#define NV_PFAULT_CLIENT_HUB_DNISO 0x00000003 /* */
#define NV_PFAULT_CLIENT_HUB_FE 0x00000004 /* */
#define NV_PFAULT_CLIENT_HUB_FECS 0x00000005 /* */
#define NV_PFAULT_CLIENT_HUB_HOST 0x00000006 /* */
#define NV_PFAULT_CLIENT_HUB_HOST_CPU 0x00000007 /* */
#define NV_PFAULT_CLIENT_HUB_HOST_CPU_NB 0x00000008 /* */
#define NV_PFAULT_CLIENT_HUB_ISO 0x00000009 /* */
#define NV_PFAULT_CLIENT_HUB_MMU 0x0000000A /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC 0x0000000B /* */
#define NV_PFAULT_CLIENT_HUB_NVENC1 0x0000000D /* */
#define NV_PFAULT_CLIENT_HUB_NVENC2 0x00000033 /* */
#define NV_PFAULT_CLIENT_HUB_NISO 0x0000000E /* */
#define NV_PFAULT_CLIENT_HUB_P2P 0x0000000F /* */
#define NV_PFAULT_CLIENT_HUB_PD 0x00000010 /* */
#define NV_PFAULT_CLIENT_HUB_PERF 0x00000011 /* */
#define NV_PFAULT_CLIENT_HUB_PMU 0x00000012 /* */
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD 0x00000013 /* */
#define NV_PFAULT_CLIENT_HUB_SCC 0x00000014 /* */
#define NV_PFAULT_CLIENT_HUB_SCC_NB 0x00000015 /* */
#define NV_PFAULT_CLIENT_HUB_SEC 0x00000016 /* */
#define NV_PFAULT_CLIENT_HUB_SSYNC 0x00000017 /* */
#define NV_PFAULT_CLIENT_HUB_VIP 0x00000000 /* */
#define NV_PFAULT_CLIENT_HUB_GRCOPY 0x00000018 /* */
#define NV_PFAULT_CLIENT_HUB_CE2 0x00000018 /* */
#define NV_PFAULT_CLIENT_HUB_XV 0x00000019 /* */
#define NV_PFAULT_CLIENT_HUB_MMU_NB 0x0000001A /* */
#define NV_PFAULT_CLIENT_HUB_NVENC 0x0000001B /* */
#define NV_PFAULT_CLIENT_HUB_NVENC0 0x0000001B /* */
#define NV_PFAULT_CLIENT_HUB_DFALCON 0x0000001C /* */
#define NV_PFAULT_CLIENT_HUB_SKED 0x0000001D /* */
#define NV_PFAULT_CLIENT_HUB_AFALCON 0x0000001E /* */
#define NV_PFAULT_CLIENT_HUB_HSCE0 0x00000020 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE1 0x00000021 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE2 0x00000022 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE3 0x00000023 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE4 0x00000024 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE5 0x00000025 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE6 0x00000026 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE7 0x00000027 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE8 0x00000028 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE9 0x00000029 /* */
#define NV_PFAULT_CLIENT_HUB_HSHUB 0x0000002A /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X0 0x0000002B /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X1 0x0000002C /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X2 0x0000002D /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X3 0x0000002E /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X4 0x0000002F /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X5 0x00000030 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X6 0x00000031 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X7 0x00000032 /* */
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER0 0x00000034 /* */
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER1 0x00000035 /* */
#define NV_PFAULT_CLIENT_HUB_DONT_CARE 0x0000001F /* */
#define NV_PFAULT_ACCESS_TYPE 18:16 /* */
#define NV_PFAULT_ACCESS_TYPE_READ 0x00000000 /* */
#define NV_PFAULT_ACCESS_TYPE_WRITE 0x00000001 /* */
#define NV_PFAULT_ACCESS_TYPE_ATOMIC 0x00000002 /* */
#define NV_PFAULT_ACCESS_TYPE_PREFETCH 0x00000003 /* */
#define NV_PFAULT_MMU_CLIENT_TYPE 20:20 /* */
#define NV_PFAULT_MMU_CLIENT_TYPE_GPC 0x00000000 /* */
#define NV_PFAULT_MMU_CLIENT_TYPE_HUB 0x00000001 /* */
#define NV_PFAULT_GPC_ID 28:24 /* */
#endif // __gp100_dev_fault_h__

View File

@@ -0,0 +1,71 @@
/*******************************************************************************
Copyright (c) 2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
// Excerpt of gp100/dev_fb.h
#ifndef __dev_fb_h__
#define __dev_fb_h__
#define NV_PFB_PRI_MMU_INVALIDATE_ALL_VA 0:0 /* RWXVF */
#define NV_PFB_PRI_MMU_INVALIDATE_ALL_VA_FALSE 0x00000000 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_ALL_VA_TRUE 0x00000001 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_ALL_PDB 1:1 /* RWXVF */
#define NV_PFB_PRI_MMU_INVALIDATE_ALL_PDB_FALSE 0x00000000 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_ALL_PDB_TRUE 0x00000001 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_REPLAY 5:3 /* RWXVF */
#define NV_PFB_PRI_MMU_INVALIDATE_REPLAY_NONE 0x00000000 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_REPLAY_START 0x00000001 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_REPLAY_START_ACK_ALL 0x00000002 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_REPLAY_CANCEL_TARGETED 0x00000003 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_REPLAY_CANCEL_GLOBAL 0x00000004 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_REPLAY_CANCEL 0x00000004 /* */
#define NV_PFB_PRI_MMU_INVALIDATE_SYS_MEMBAR 6:6 /* RWXVF */
#define NV_PFB_PRI_MMU_INVALIDATE_SYS_MEMBAR_FALSE 0x00000000 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_SYS_MEMBAR_TRUE 0x00000001 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_ACK 8:7 /* RWXVF */
#define NV_PFB_PRI_MMU_INVALIDATE_ACK_NONE_REQUIRED 0x00000000 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_ACK_INTRANODE 0x00000002 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_ACK_GLOBALLY 0x00000001 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_CANCEL_CLIENT_ID 14:9 /* RWXVF */
#define NV_PFB_PRI_MMU_INVALIDATE_CANCEL_GPC_ID 19:15 /* RWXVF */
#define NV_PFB_PRI_MMU_INVALIDATE_CANCEL_CLIENT_TYPE 20:20 /* RWXVF */
#define NV_PFB_PRI_MMU_INVALIDATE_CANCEL_CLIENT_TYPE_GPC 0x00000000 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_CANCEL_CLIENT_TYPE_HUB 0x00000001 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL 26:24 /* RWXVF */
#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL_ALL 0x00000000 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL_PTE_ONLY 0x00000001 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL_UP_TO_PDE0 0x00000002 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL_UP_TO_PDE1 0x00000003 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL_UP_TO_PDE2 0x00000004 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL_UP_TO_PDE3 0x00000005 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL_UP_TO_PDE4 0x00000006 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_CACHE_LEVEL_UP_TO_PDE5 0x00000007 /* RW--V */
#define NV_PFB_PRI_MMU_INVALIDATE_TRIGGER 31:31 /* -WEVF */
#define NV_PFB_PRI_MMU_INVALIDATE_TRIGGER_FALSE 0x00000000 /* -WE-V */
#define NV_PFB_PRI_MMU_INVALIDATE_TRIGGER_TRUE 0x00000001 /* -W--T */
#define NV_PFB_PRI_MMU_PAGE_FAULT_CTRL_PRF_FILTER 1:0 /* RWEVF */
#define NV_PFB_PRI_MMU_PAGE_FAULT_CTRL_PRF_FILTER_SEND_ALL 0x00000000 /* RWE-V */
#define NV_PFB_PRI_MMU_PAGE_FAULT_CTRL_PRF_FILTER_SEND_NONE 0x00000003 /* RW--V */
#endif // __dev_fb_h__

View File

@@ -0,0 +1,625 @@
/*******************************************************************************
Copyright (c) 2003-2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __gp100_dev_mmu_h__
#define __gp100_dev_mmu_h__
/* This file is autogenerated. Do not edit */
#define NV_MMU_PDE /* ----G */
#define NV_MMU_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
#define NV_MMU_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
#define NV_MMU_PDE_SIZE_FULL 0x00000000 /* RW--V */
#define NV_MMU_PDE_SIZE_HALF 0x00000001 /* RW--V */
#define NV_MMU_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
#define NV_MMU_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
#define NV_MMU_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
#define NV_MMU_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
#define NV_MMU_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_PDE__SIZE 8
#define NV_MMU_PTE /* ----G */
#define NV_MMU_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
#define NV_MMU_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
#define NV_MMU_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
#define NV_MMU_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
#define NV_MMU_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
#define NV_MMU_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
#define NV_MMU_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PTE_LOCK (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_PTE_LOCK_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_LOCK_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_COMPTAGLINE (1*32+18+11):(1*32+12) /* RWXVF */
#define NV_MMU_PTE_READ_DISABLE (1*32+30):(1*32+30) /* RWXVF */
#define NV_MMU_PTE_READ_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_READ_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_WRITE_DISABLE (1*32+31):(1*32+31) /* RWXVF */
#define NV_MMU_PTE_WRITE_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_WRITE_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_PTE__SIZE 8
#define NV_MMU_PTE_COMPTAGS_NONE 0x0 /* */
#define NV_MMU_PTE_COMPTAGS_1 0x1 /* */
#define NV_MMU_PTE_COMPTAGS_2 0x2 /* */
#define NV_MMU_PTE_KIND (1*32+11):(1*32+4) /* RWXVF */
#define NV_MMU_PTE_KIND_INVALID 0xff /* R---V */
#define NV_MMU_PTE_KIND_PITCH 0x00 /* R---V */
#define NV_MMU_PTE_KIND_Z16 0x01 /* R---V */
#define NV_MMU_PTE_KIND_Z16_2C 0x02 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS2_2C 0x03 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS4_2C 0x04 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS8_2C 0x05 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS16_2C 0x06 /* R---V */
#define NV_MMU_PTE_KIND_Z16_2Z 0x07 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS2_2Z 0x08 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS4_2Z 0x09 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS8_2Z 0x0a /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS16_2Z 0x0b /* R---V */
#define NV_MMU_PTE_KIND_Z16_2CZ 0x36 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS2_2CZ 0x37 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS4_2CZ 0x38 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS8_2CZ 0x39 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS16_2CZ 0x5f /* R---V */
#define NV_MMU_PTE_KIND_Z16_4CZ 0x0c /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS2_4CZ 0x0d /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS4_4CZ 0x0e /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS8_4CZ 0x0f /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS16_4CZ 0x10 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24 0x11 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_1Z 0x12 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS2_1Z 0x13 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS4_1Z 0x14 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS8_1Z 0x15 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS16_1Z 0x16 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_2CZ 0x17 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS2_2CZ 0x18 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS4_2CZ 0x19 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS8_2CZ 0x1a /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS16_2CZ 0x1b /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_2CS 0x1c /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS2_2CS 0x1d /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS4_2CS 0x1e /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS8_2CS 0x1f /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS16_2CS 0x20 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_4CSZV 0x21 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS2_4CSZV 0x22 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS4_4CSZV 0x23 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS8_4CSZV 0x24 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS16_4CSZV 0x25 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12 0x26 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4 0x27 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8 0x28 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24 0x29 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_1ZV 0x2e /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_1ZV 0x2f /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_1ZV 0x30 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_1ZV 0x31 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2CS 0x32 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2CS 0x33 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2CS 0x34 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2CS 0x35 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2CZV 0x3a /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2CZV 0x3b /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2CZV 0x3c /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2CZV 0x3d /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2ZV 0x3e /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2ZV 0x3f /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2ZV 0x40 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2ZV 0x41 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_4CSZV 0x42 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_4CSZV 0x43 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_4CSZV 0x44 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_4CSZV 0x45 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8 0x46 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_1Z 0x47 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS2_1Z 0x48 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS4_1Z 0x49 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS8_1Z 0x4a /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS16_1Z 0x4b /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_2CS 0x4c /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS2_2CS 0x4d /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS4_2CS 0x4e /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS8_2CS 0x4f /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS16_2CS 0x50 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_2CZ 0x51 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS2_2CZ 0x52 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS4_2CZ 0x53 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS8_2CZ 0x54 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS16_2CZ 0x55 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_4CSZV 0x56 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS2_4CSZV 0x57 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS4_4CSZV 0x58 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS8_4CSZV 0x59 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS16_4CSZV 0x5a /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12 0x5b /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4 0x5c /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8 0x5d /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24 0x5e /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_1ZV 0x63 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_1ZV 0x64 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_1ZV 0x65 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_1ZV 0x66 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2CS 0x67 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2CS 0x68 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2CS 0x69 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2CS 0x6a /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2CZV 0x6f /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2CZV 0x70 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2CZV 0x71 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2CZV 0x72 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2ZV 0x73 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2ZV 0x74 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2ZV 0x75 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2ZV 0x76 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_4CSZV 0x77 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_4CSZV 0x78 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_4CSZV 0x79 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_4CSZV 0x7a /* R---V */
#define NV_MMU_PTE_KIND_ZF32 0x7b /* R---V */
#define NV_MMU_PTE_KIND_ZF32_1Z 0x7c /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS2_1Z 0x7d /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS4_1Z 0x7e /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS8_1Z 0x7f /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS16_1Z 0x80 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_2CS 0x81 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS2_2CS 0x82 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS4_2CS 0x83 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS8_2CS 0x84 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS16_2CS 0x85 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_2CZ 0x86 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS2_2CZ 0x87 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS4_2CZ 0x88 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS8_2CZ 0x89 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS16_2CZ 0x8a /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12 0x8b /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4 0x8c /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8 0x8d /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24 0x8e /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1CS 0x8f /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1CS 0x90 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1CS 0x91 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1CS 0x92 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1ZV 0x97 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1ZV 0x98 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1ZV 0x99 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1ZV 0x9a /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1CZV 0x9b /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1CZV 0x9c /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1CZV 0x9d /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1CZV 0x9e /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_2CS 0x9f /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_2CS 0xa0 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_2CS 0xa1 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_2CS 0xa2 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_2CSZV 0xa3 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_2CSZV 0xa4 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_2CSZV 0xa5 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_2CSZV 0xa6 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12 0xa7 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4 0xa8 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8 0xa9 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24 0xaa /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1CS 0xab /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1CS 0xac /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1CS 0xad /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1CS 0xae /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1ZV 0xb3 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1ZV 0xb4 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1ZV 0xb5 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1ZV 0xb6 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1CZV 0xb7 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1CZV 0xb8 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1CZV 0xb9 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1CZV 0xba /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_2CS 0xbb /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_2CS 0xbc /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_2CS 0xbd /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_2CS 0xbe /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_2CSZV 0xbf /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_2CSZV 0xc0 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_2CSZV 0xc1 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_2CSZV 0xc2 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8 0xc3 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_1CS 0xc4 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_1CS 0xc5 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_1CS 0xc6 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_1CS 0xc7 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_1CS 0xc8 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_2CSZV 0xce /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_2CSZV 0xcf /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_2CSZV 0xd0 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_2CSZV 0xd1 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_2CSZV 0xd2 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_2CS 0xd3 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_2CS 0xd4 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_2CS 0xd5 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_2CS 0xd6 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_2CS 0xd7 /* R---V */
#define NV_MMU_PTE_KIND_S8 0x2a /* R---V */
#define NV_MMU_PTE_KIND_S8_2S 0x2b /* R---V */
#define NV_MMU_PTE_KIND_GENERIC_16BX2 0xfe /* R---V */
#define NV_MMU_PTE_KIND_C32_2C 0xd8 /* R---V */
#define NV_MMU_PTE_KIND_C32_2CBR 0xd9 /* R---V */
#define NV_MMU_PTE_KIND_C32_2CBA 0xda /* R---V */
#define NV_MMU_PTE_KIND_C32_2CRA 0xdb /* R---V */
#define NV_MMU_PTE_KIND_C32_2BRA 0xdc /* R---V */
#define NV_MMU_PTE_KIND_C32_MS2_2C 0xdd /* R---V */
#define NV_MMU_PTE_KIND_C32_MS2_2CBR 0xde /* R---V */
#define NV_MMU_PTE_KIND_C32_MS2_2CRA 0xcc /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2C 0xdf /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2CBR 0xe0 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2CBA 0xe1 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2CRA 0xe2 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2BRA 0xe3 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_4CBRA 0x2c /* R---V */
#define NV_MMU_PTE_KIND_C32_MS8_MS16_2C 0xe4 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS8_MS16_2CRA 0xe5 /* R---V */
#define NV_MMU_PTE_KIND_C64_2C 0xe6 /* R---V */
#define NV_MMU_PTE_KIND_C64_2CBR 0xe7 /* R---V */
#define NV_MMU_PTE_KIND_C64_2CBA 0xe8 /* R---V */
#define NV_MMU_PTE_KIND_C64_2CRA 0xe9 /* R---V */
#define NV_MMU_PTE_KIND_C64_2BRA 0xea /* R---V */
#define NV_MMU_PTE_KIND_C64_MS2_2C 0xeb /* R---V */
#define NV_MMU_PTE_KIND_C64_MS2_2CBR 0xec /* R---V */
#define NV_MMU_PTE_KIND_C64_MS2_2CRA 0xcd /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2C 0xed /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2CBR 0xee /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2CBA 0xef /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2CRA 0xf0 /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2BRA 0xf1 /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_4CBRA 0x2d /* R---V */
#define NV_MMU_PTE_KIND_C64_MS8_MS16_2C 0xf2 /* R---V */
#define NV_MMU_PTE_KIND_C64_MS8_MS16_2CRA 0xf3 /* R---V */
#define NV_MMU_PTE_KIND_C128_2C 0xf4 /* R---V */
#define NV_MMU_PTE_KIND_C128_2CR 0xf5 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS2_2C 0xf6 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS2_2CR 0xf7 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS4_2C 0xf8 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS4_2CR 0xf9 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS8_MS16_2C 0xfa /* R---V */
#define NV_MMU_PTE_KIND_C128_MS8_MS16_2CR 0xfb /* R---V */
#define NV_MMU_PTE_KIND_X8C24 0xfc /* R---V */
#define NV_MMU_PTE_KIND_PITCH_NO_SWIZZLE 0xfd /* R---V */
#define NV_MMU_PTE_KIND_SMSKED_MESSAGE 0xca /* R---V */
#define NV_MMU_PTE_KIND_SMHOST_MESSAGE 0xcb /* R---V */
#define NV_MMU_VER1_PDE /* ----G */
#define NV_MMU_VER1_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
#define NV_MMU_VER1_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
#define NV_MMU_VER1_PDE_SIZE_FULL 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE_HALF 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
#define NV_MMU_VER1_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_VER1_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER1_PDE__SIZE 8
#define NV_MMU_VER1_PTE /* ----G */
#define NV_MMU_VER1_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
#define NV_MMU_VER1_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
#define NV_MMU_VER1_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
#define NV_MMU_VER1_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
#define NV_MMU_VER1_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_VER1_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_VER1_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_VER1_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
#define NV_MMU_VER1_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER1_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
#define NV_MMU_VER1_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_COMPTAGLINE (1*32+18+11):(1*32+12) /* RWXVF */
#define NV_MMU_VER1_PTE_KIND (1*32+11):(1*32+4) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER1_PTE__SIZE 8
#define NV_MMU_VER1_PTE_COMPTAGS_NONE 0x0 /* */
#define NV_MMU_VER1_PTE_COMPTAGS_1 0x1 /* */
#define NV_MMU_VER1_PTE_COMPTAGS_2 0x2 /* */
#define NV_MMU_NEW_PDE /* ----G */
#define NV_MMU_NEW_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_NEW_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_NEW_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_NEW_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_NEW_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_PDE_VOL 3:3 /* RWXVF */
#define NV_MMU_NEW_PDE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_PDE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_PDE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_NEW_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_NEW_PDE__SIZE 8
#define NV_MMU_NEW_DUAL_PDE /* ----G */
#define NV_MMU_NEW_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
#define NV_MMU_NEW_DUAL_PDE__SIZE 16
#define NV_MMU_NEW_PTE /* ----G */
#define NV_MMU_NEW_PTE_VALID 0:0 /* RWXVF */
#define NV_MMU_NEW_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_NEW_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_PTE_VOL 3:3 /* RWXVF */
#define NV_MMU_NEW_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_PTE_ENCRYPTED 4:4 /* RWXVF */
#define NV_MMU_NEW_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_NEW_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_NEW_PTE_PRIVILEGE 5:5 /* RWXVF */
#define NV_MMU_NEW_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_READ_ONLY 6:6 /* RWXVF */
#define NV_MMU_NEW_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_NEW_PTE_COMPTAGLINE (18+35):36 /* RWXVF */
#define NV_MMU_NEW_PTE_KIND 63:56 /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_NEW_PTE__SIZE 8
#define NV_MMU_VER2_PDE /* ----G */
#define NV_MMU_VER2_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_VER2_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_VER2_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER2_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_VER2_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_PDE_VOL 3:3 /* RWXVF */
#define NV_MMU_VER2_PDE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_PDE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_PDE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_VER2_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER2_PDE__SIZE 8
#define NV_MMU_VER2_DUAL_PDE /* ----G */
#define NV_MMU_VER2_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
#define NV_MMU_VER2_DUAL_PDE__SIZE 16
#define NV_MMU_VER2_PTE /* ----G */
#define NV_MMU_VER2_PTE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER2_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_VER2_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_PTE_VOL 3:3 /* RWXVF */
#define NV_MMU_VER2_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_PTE_ENCRYPTED 4:4 /* RWXVF */
#define NV_MMU_VER2_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_VER2_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_VER2_PTE_PRIVILEGE 5:5 /* RWXVF */
#define NV_MMU_VER2_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_READ_ONLY 6:6 /* RWXVF */
#define NV_MMU_VER2_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_VER2_PTE_COMPTAGLINE (18+35):36 /* RWXVF */
#define NV_MMU_VER2_PTE_KIND 63:56 /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER2_PTE__SIZE 8
#endif // __gp100_dev_mmu_h__

View File

@@ -0,0 +1,400 @@
/*******************************************************************************
Copyright (c) 2003-2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __tu102_dev_fault_h__
#define __tu102_dev_fault_h__
/* This file is autogenerated. Do not edit */
#define NV_PFAULT_MMU_ENG_ID_GRAPHICS 64 /* */
#define NV_PFAULT_MMU_ENG_ID_DISPLAY 1 /* */
#define NV_PFAULT_MMU_ENG_ID_GSP 2 /* */
#define NV_PFAULT_MMU_ENG_ID_IFB 9 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1 128 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2 192 /* */
#define NV_PFAULT_MMU_ENG_ID_SEC 14 /* */
#define NV_PFAULT_MMU_ENG_ID_PERF 8 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC 10 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC0 10 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC1 25 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC2 26 /* */
#define NV_PFAULT_MMU_ENG_ID_NVJPG0 24 /* */
#define NV_PFAULT_MMU_ENG_ID_GRCOPY 15 /* */
#define NV_PFAULT_MMU_ENG_ID_CE0 15 /* */
#define NV_PFAULT_MMU_ENG_ID_CE1 16 /* */
#define NV_PFAULT_MMU_ENG_ID_CE2 17 /* */
#define NV_PFAULT_MMU_ENG_ID_CE3 18 /* */
#define NV_PFAULT_MMU_ENG_ID_CE4 19 /* */
#define NV_PFAULT_MMU_ENG_ID_CE5 20 /* */
#define NV_PFAULT_MMU_ENG_ID_CE6 21 /* */
#define NV_PFAULT_MMU_ENG_ID_CE7 22 /* */
#define NV_PFAULT_MMU_ENG_ID_CE8 23 /* */
#define NV_PFAULT_MMU_ENG_ID_PWR_PMU 6 /* */
#define NV_PFAULT_MMU_ENG_ID_PTP 3 /* */
#define NV_PFAULT_MMU_ENG_ID_NVENC0 11 /* */
#define NV_PFAULT_MMU_ENG_ID_NVENC1 12 /* */
#define NV_PFAULT_MMU_ENG_ID_NVENC2 13 /* */
#define NV_PFAULT_MMU_ENG_ID_PHYSICAL 31 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST0 32 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST1 33 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST2 34 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST3 35 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST4 36 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST5 37 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST6 38 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST7 39 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST8 40 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST9 41 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST10 42 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST11 43 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST12 44 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST13 45 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST14 46 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN0 128 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN1 129 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN2 130 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN3 131 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN4 132 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN5 133 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN6 134 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN7 135 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN8 136 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN9 137 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN10 138 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN11 139 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN12 140 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN13 141 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN14 142 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN15 143 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN16 144 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN17 145 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN18 146 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN19 147 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN20 148 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN21 149 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN22 150 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN23 151 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN24 152 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN25 153 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN26 154 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN27 155 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN28 156 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN29 157 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN30 158 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN31 159 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN32 160 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN33 161 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN34 162 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN35 163 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN36 164 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN37 165 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN38 166 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN39 167 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN40 168 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN41 169 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN42 170 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN43 171 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN44 172 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN45 173 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN46 174 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN47 175 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN48 176 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN49 177 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN50 178 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN51 179 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN52 180 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN53 181 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN54 182 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN55 183 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN56 184 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN57 185 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN58 186 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN59 187 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN60 188 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN61 189 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN62 190 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN63 191 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN0 192 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN1 193 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN2 194 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN3 195 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN4 196 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN5 197 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN6 198 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN7 199 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN8 200 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN9 201 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN10 202 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN11 203 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN12 204 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN13 205 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN14 206 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN15 207 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN16 208 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN17 209 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN18 210 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN19 211 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN20 212 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN21 213 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN22 214 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN23 215 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN24 216 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN25 217 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN26 218 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN27 219 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN28 220 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN29 221 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN30 222 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN31 223 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN32 224 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN33 225 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN34 226 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN35 227 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN36 228 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN37 229 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN38 230 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN39 231 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN40 232 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN41 233 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN42 234 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN43 235 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN44 236 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN45 237 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN46 238 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN47 239 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN48 240 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN49 241 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN50 242 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN51 243 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN52 244 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN53 245 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN54 246 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN55 247 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN56 248 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN57 249 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN58 250 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN59 251 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN60 252 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN61 253 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN62 254 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN63 255 /* */
#define NV_PFAULT_FAULT_TYPE 4:0 /* */
#define NV_PFAULT_FAULT_TYPE_PDE 0x00000000 /* */
#define NV_PFAULT_FAULT_TYPE_PDE_SIZE 0x00000001 /* */
#define NV_PFAULT_FAULT_TYPE_PTE 0x00000002 /* */
#define NV_PFAULT_FAULT_TYPE_VA_LIMIT_VIOLATION 0x00000003 /* */
#define NV_PFAULT_FAULT_TYPE_UNBOUND_INST_BLOCK 0x00000004 /* */
#define NV_PFAULT_FAULT_TYPE_PRIV_VIOLATION 0x00000005 /* */
#define NV_PFAULT_FAULT_TYPE_RO_VIOLATION 0x00000006 /* */
#define NV_PFAULT_FAULT_TYPE_WO_VIOLATION 0x00000007 /* */
#define NV_PFAULT_FAULT_TYPE_PITCH_MASK_VIOLATION 0x00000008 /* */
#define NV_PFAULT_FAULT_TYPE_WORK_CREATION 0x00000009 /* */
#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_APERTURE 0x0000000a /* */
#define NV_PFAULT_FAULT_TYPE_COMPRESSION_FAILURE 0x0000000b /* */
#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_KIND 0x0000000c /* */
#define NV_PFAULT_FAULT_TYPE_REGION_VIOLATION 0x0000000d /* */
#define NV_PFAULT_FAULT_TYPE_POISONED 0x0000000e /* */
#define NV_PFAULT_FAULT_TYPE_ATOMIC_VIOLATION 0x0000000f /* */
#define NV_PFAULT_CLIENT 14:8 /* */
#define NV_PFAULT_CLIENT_GPC_T1_0 0x00000000 /* */
#define NV_PFAULT_CLIENT_GPC_T1_1 0x00000001 /* */
#define NV_PFAULT_CLIENT_GPC_T1_2 0x00000002 /* */
#define NV_PFAULT_CLIENT_GPC_T1_3 0x00000003 /* */
#define NV_PFAULT_CLIENT_GPC_T1_4 0x00000004 /* */
#define NV_PFAULT_CLIENT_GPC_T1_5 0x00000005 /* */
#define NV_PFAULT_CLIENT_GPC_T1_6 0x00000006 /* */
#define NV_PFAULT_CLIENT_GPC_T1_7 0x00000007 /* */
#define NV_PFAULT_CLIENT_GPC_PE_0 0x00000008 /* */
#define NV_PFAULT_CLIENT_GPC_PE_1 0x00000009 /* */
#define NV_PFAULT_CLIENT_GPC_PE_2 0x0000000A /* */
#define NV_PFAULT_CLIENT_GPC_PE_3 0x0000000B /* */
#define NV_PFAULT_CLIENT_GPC_PE_4 0x0000000C /* */
#define NV_PFAULT_CLIENT_GPC_PE_5 0x0000000D /* */
#define NV_PFAULT_CLIENT_GPC_PE_6 0x0000000E /* */
#define NV_PFAULT_CLIENT_GPC_PE_7 0x0000000F /* */
#define NV_PFAULT_CLIENT_GPC_RAST 0x00000010 /* */
#define NV_PFAULT_CLIENT_GPC_GCC 0x00000011 /* */
#define NV_PFAULT_CLIENT_GPC_GPCCS 0x00000012 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_0 0x00000013 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_1 0x00000014 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_2 0x00000015 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_3 0x00000016 /* */
#define NV_PFAULT_CLIENT_GPC_T1_8 0x00000021 /* */
#define NV_PFAULT_CLIENT_GPC_T1_9 0x00000022 /* */
#define NV_PFAULT_CLIENT_GPC_T1_10 0x00000023 /* */
#define NV_PFAULT_CLIENT_GPC_T1_11 0x00000024 /* */
#define NV_PFAULT_CLIENT_GPC_T1_12 0x00000025 /* */
#define NV_PFAULT_CLIENT_GPC_T1_13 0x00000026 /* */
#define NV_PFAULT_CLIENT_GPC_T1_14 0x00000027 /* */
#define NV_PFAULT_CLIENT_GPC_T1_15 0x00000028 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_0 0x00000029 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_1 0x0000002A /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_2 0x0000002B /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_3 0x0000002C /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_4 0x0000002D /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_5 0x0000002E /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_6 0x0000002F /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_7 0x00000030 /* */
#define NV_PFAULT_CLIENT_GPC_PE_8 0x00000031 /* */
#define NV_PFAULT_CLIENT_GPC_PE_9 0x00000032 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_8 0x00000033 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_9 0x00000034 /* */
#define NV_PFAULT_CLIENT_GPC_T1_16 0x00000035 /* */
#define NV_PFAULT_CLIENT_GPC_T1_17 0x00000036 /* */
#define NV_PFAULT_CLIENT_GPC_T1_18 0x00000037 /* */
#define NV_PFAULT_CLIENT_GPC_T1_19 0x00000038 /* */
#define NV_PFAULT_CLIENT_GPC_PE_10 0x00000039 /* */
#define NV_PFAULT_CLIENT_GPC_PE_11 0x0000003A /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_10 0x0000003B /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_11 0x0000003C /* */
#define NV_PFAULT_CLIENT_GPC_T1_20 0x0000003D /* */
#define NV_PFAULT_CLIENT_GPC_T1_21 0x0000003E /* */
#define NV_PFAULT_CLIENT_GPC_T1_22 0x0000003F /* */
#define NV_PFAULT_CLIENT_GPC_T1_23 0x00000040 /* */
#define NV_PFAULT_CLIENT_GPC_PE_12 0x00000041 /* */
#define NV_PFAULT_CLIENT_GPC_PE_13 0x00000042 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_12 0x00000043 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_13 0x00000044 /* */
#define NV_PFAULT_CLIENT_GPC_T1_24 0x00000045 /* */
#define NV_PFAULT_CLIENT_GPC_T1_25 0x00000046 /* */
#define NV_PFAULT_CLIENT_GPC_T1_26 0x00000047 /* */
#define NV_PFAULT_CLIENT_GPC_T1_27 0x00000048 /* */
#define NV_PFAULT_CLIENT_GPC_PE_14 0x00000049 /* */
#define NV_PFAULT_CLIENT_GPC_PE_15 0x0000004A /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_14 0x0000004B /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_15 0x0000004C /* */
#define NV_PFAULT_CLIENT_GPC_T1_28 0x0000004D /* */
#define NV_PFAULT_CLIENT_GPC_T1_29 0x0000004E /* */
#define NV_PFAULT_CLIENT_GPC_T1_30 0x0000004F /* */
#define NV_PFAULT_CLIENT_GPC_T1_31 0x00000050 /* */
#define NV_PFAULT_CLIENT_GPC_PE_16 0x00000051 /* */
#define NV_PFAULT_CLIENT_GPC_PE_17 0x00000052 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_16 0x00000053 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_17 0x00000054 /* */
#define NV_PFAULT_CLIENT_GPC_T1_32 0x00000055 /* */
#define NV_PFAULT_CLIENT_GPC_T1_33 0x00000056 /* */
#define NV_PFAULT_CLIENT_GPC_T1_34 0x00000057 /* */
#define NV_PFAULT_CLIENT_GPC_T1_35 0x00000058 /* */
#define NV_PFAULT_CLIENT_GPC_PE_18 0x00000059 /* */
#define NV_PFAULT_CLIENT_GPC_PE_19 0x0000005A /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_18 0x0000005B /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_19 0x0000005C /* */
#define NV_PFAULT_CLIENT_GPC_T1_36 0x0000005D /* */
#define NV_PFAULT_CLIENT_GPC_T1_37 0x0000005E /* */
#define NV_PFAULT_CLIENT_GPC_T1_38 0x0000005F /* */
#define NV_PFAULT_CLIENT_GPC_T1_39 0x00000060 /* */
#define NV_PFAULT_CLIENT_GPC_GPM 0x00000017 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_0 0x00000018 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_1 0x00000019 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_2 0x0000001A /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_3 0x0000001B /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_4 0x0000001C /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_5 0x0000001D /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_6 0x0000001E /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_7 0x0000001F /* */
#define NV_PFAULT_CLIENT_GPC_RGG_UTLB 0x00000020 /* */
#define NV_PFAULT_CLIENT_HUB_CE0 0x00000001 /* */
#define NV_PFAULT_CLIENT_HUB_CE1 0x00000002 /* */
#define NV_PFAULT_CLIENT_HUB_DNISO 0x00000003 /* */
#define NV_PFAULT_CLIENT_HUB_FE 0x00000004 /* */
#define NV_PFAULT_CLIENT_HUB_FECS 0x00000005 /* */
#define NV_PFAULT_CLIENT_HUB_HOST 0x00000006 /* */
#define NV_PFAULT_CLIENT_HUB_HOST_CPU 0x00000007 /* */
#define NV_PFAULT_CLIENT_HUB_HOST_CPU_NB 0x00000008 /* */
#define NV_PFAULT_CLIENT_HUB_ISO 0x00000009 /* */
#define NV_PFAULT_CLIENT_HUB_MMU 0x0000000A /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC 0x0000000B /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC0 0x0000000B /* */
#define NV_PFAULT_CLIENT_HUB_NVENC1 0x0000000D /* */
#define NV_PFAULT_CLIENT_HUB_NVENC2 0x00000033 /* */
#define NV_PFAULT_CLIENT_HUB_NISO 0x0000000E /* */
#define NV_PFAULT_CLIENT_HUB_P2P 0x0000000F /* */
#define NV_PFAULT_CLIENT_HUB_PD 0x00000010 /* */
#define NV_PFAULT_CLIENT_HUB_PERF 0x00000011 /* */
#define NV_PFAULT_CLIENT_HUB_PMU 0x00000012 /* */
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD 0x00000013 /* */
#define NV_PFAULT_CLIENT_HUB_SCC 0x00000014 /* */
#define NV_PFAULT_CLIENT_HUB_SCC_NB 0x00000015 /* */
#define NV_PFAULT_CLIENT_HUB_SEC 0x00000016 /* */
#define NV_PFAULT_CLIENT_HUB_SSYNC 0x00000017 /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC1 0x0000003A /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC2 0x0000003B /* */
#define NV_PFAULT_CLIENT_HUB_NVJPG0 0x0000003C /* */
#define NV_PFAULT_CLIENT_HUB_VIP 0x00000000 /* */
#define NV_PFAULT_CLIENT_HUB_GRCOPY 0x00000018 /* */
#define NV_PFAULT_CLIENT_HUB_CE2 0x00000018 /* */
#define NV_PFAULT_CLIENT_HUB_XV 0x00000019 /* */
#define NV_PFAULT_CLIENT_HUB_MMU_NB 0x0000001A /* */
#define NV_PFAULT_CLIENT_HUB_NVENC 0x0000001B /* */
#define NV_PFAULT_CLIENT_HUB_NVENC0 0x0000001B /* */
#define NV_PFAULT_CLIENT_HUB_DFALCON 0x0000001C /* */
#define NV_PFAULT_CLIENT_HUB_SKED 0x0000001D /* */
#define NV_PFAULT_CLIENT_HUB_AFALCON 0x0000001E /* */
#define NV_PFAULT_CLIENT_HUB_HSCE0 0x00000020 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE1 0x00000021 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE2 0x00000022 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE3 0x00000023 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE4 0x00000024 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE5 0x00000025 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE6 0x00000026 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE7 0x00000027 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE8 0x00000028 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE9 0x00000029 /* */
#define NV_PFAULT_CLIENT_HUB_HSHUB 0x0000002A /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X0 0x0000002B /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X1 0x0000002C /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X2 0x0000002D /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X3 0x0000002E /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X4 0x0000002F /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X5 0x00000030 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X6 0x00000031 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X7 0x00000032 /* */
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER0 0x00000034 /* */
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER1 0x00000035 /* */
#define NV_PFAULT_CLIENT_HUB_DWBIF 0x00000036 /* */
#define NV_PFAULT_CLIENT_HUB_FBFALCON 0x00000037 /* */
#define NV_PFAULT_CLIENT_HUB_CE_SHIM 0x00000038 /* */
#define NV_PFAULT_CLIENT_HUB_GSP 0x00000039 /* */
#define NV_PFAULT_CLIENT_HUB_DONT_CARE 0x0000001F /* */
#define NV_PFAULT_ACCESS_TYPE 19:16 /* */
#define NV_PFAULT_ACCESS_TYPE_READ 0x00000000 /* */
#define NV_PFAULT_ACCESS_TYPE_WRITE 0x00000001 /* */
#define NV_PFAULT_ACCESS_TYPE_ATOMIC 0x00000002 /* */
#define NV_PFAULT_ACCESS_TYPE_PREFETCH 0x00000003 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_READ 0x00000000 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_WRITE 0x00000001 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC 0x00000002 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_STRONG 0x00000002 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_PREFETCH 0x00000003 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_WEAK 0x00000004 /* */
#define NV_PFAULT_ACCESS_TYPE_PHYS_READ 0x00000008 /* */
#define NV_PFAULT_ACCESS_TYPE_PHYS_WRITE 0x00000009 /* */
#define NV_PFAULT_ACCESS_TYPE_PHYS_ATOMIC 0x0000000a /* */
#define NV_PFAULT_ACCESS_TYPE_PHYS_PREFETCH 0x0000000b /* */
#define NV_PFAULT_MMU_CLIENT_TYPE 20:20 /* */
#define NV_PFAULT_MMU_CLIENT_TYPE_GPC 0x00000000 /* */
#define NV_PFAULT_MMU_CLIENT_TYPE_HUB 0x00000001 /* */
#define NV_PFAULT_GPC_ID 28:24 /* */
#define NV_PFAULT_PROTECTED_MODE 29:29 /* */
#define NV_PFAULT_REPLAYABLE_FAULT_EN 30:30 /* */
#define NV_PFAULT_VALID 31:31 /* */
#endif // __tu102_dev_fault_h__

View File

@@ -0,0 +1,649 @@
/*******************************************************************************
Copyright (c) 2003-2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __tu102_dev_mmu_h__
#define __tu102_dev_mmu_h__
/* This file is autogenerated. Do not edit */
#define NV_MMU_PDE /* ----G */
#define NV_MMU_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
#define NV_MMU_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
#define NV_MMU_PDE_SIZE_FULL 0x00000000 /* RW--V */
#define NV_MMU_PDE_SIZE_HALF 0x00000001 /* RW--V */
#define NV_MMU_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
#define NV_MMU_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
#define NV_MMU_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
#define NV_MMU_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
#define NV_MMU_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_PDE__SIZE 8
#define NV_MMU_PTE /* ----G */
#define NV_MMU_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
#define NV_MMU_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
#define NV_MMU_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
#define NV_MMU_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
#define NV_MMU_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
#define NV_MMU_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
#define NV_MMU_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PTE_LOCK (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_PTE_LOCK_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_LOCK_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_COMPTAGLINE (1*32+20+11):(1*32+12) /* RWXVF */
#define NV_MMU_PTE_READ_DISABLE (1*32+30):(1*32+30) /* RWXVF */
#define NV_MMU_PTE_READ_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_READ_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_WRITE_DISABLE (1*32+31):(1*32+31) /* RWXVF */
#define NV_MMU_PTE_WRITE_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_WRITE_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_PTE__SIZE 8
#define NV_MMU_PTE_COMPTAGS_NONE 0x0 /* */
#define NV_MMU_PTE_COMPTAGS_1 0x1 /* */
#define NV_MMU_PTE_COMPTAGS_2 0x2 /* */
#define NV_MMU_PTE_KIND (1*32+11):(1*32+4) /* RWXVF */
#define NV_MMU_PTE_KIND_INVALID 0x07 /* R---V */
#define NV_MMU_PTE_KIND_PITCH 0x00 /* R---V */
#define NV_MMU_PTE_KIND_GENERIC_MEMORY 0x06 /* R---V */
#define NV_MMU_PTE_KIND_Z16 0x01 /* R---V */
#define NV_MMU_PTE_KIND_S8 0x02 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24 0x03 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8 0x04 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8 0x05 /* R---V */
#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE 0x08 /* R---V */
#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE_DISABLE_PLC 0x09 /* R---V */
#define NV_MMU_PTE_KIND_S8_COMPRESSIBLE_DISABLE_PLC 0x0A /* R---V */
#define NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC 0x0B /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC 0x0C /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC 0x0D /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC 0x0E /* R---V */
#define NV_MMU_PTE_KIND_SMSKED_MESSAGE 0x0F /* R---V */
#define NV_MMU_PTE_KIND_Z16_2C 0x2a /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS2_2C 0x11 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS4_2C 0xC3 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS8_2C 0x46 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS16_2C 0x6c /* R---V */
#define NV_MMU_PTE_KIND_Z16_2Z 0x6b /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS2_2Z 0x10 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS4_2Z 0x60 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS8_2Z 0x61 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS16_2Z 0x62 /* R---V */
#define NV_MMU_PTE_KIND_Z16_2CZ 0x36 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS2_2CZ 0x37 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS4_2CZ 0x38 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS8_2CZ 0x39 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS16_2CZ 0x5f /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_1Z 0x12 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS2_1Z 0x13 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS4_1Z 0x14 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS8_1Z 0x15 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS16_1Z 0x16 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_2CZ 0x17 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS2_2CZ 0x18 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS4_2CZ 0x19 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS8_2CZ 0x1a /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS16_2CZ 0x1b /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_2CS 0x1c /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS2_2CS 0x1d /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS4_2CS 0x1e /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS8_2CS 0x1f /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS16_2CS 0x20 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_4CSZV 0x21 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS2_4CSZV 0x22 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS4_4CSZV 0x23 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS8_4CSZV 0x24 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS16_4CSZV 0x25 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12 0x26 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4 0x27 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8 0x28 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24 0x29 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_1ZV 0x2e /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_1ZV 0x2f /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_1ZV 0x30 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_1ZV 0x31 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2CS 0x32 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2CS 0x33 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2CS 0x34 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2CS 0x35 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2CZV 0x3a /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2CZV 0x3b /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2CZV 0x3c /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2CZV 0x3d /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2ZV 0x3e /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2ZV 0x3f /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2ZV 0x40 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2ZV 0x41 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_4CSZV 0x42 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_4CSZV 0x43 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_4CSZV 0x44 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_4CSZV 0x45 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_1Z 0x47 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS2_1Z 0x48 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS4_1Z 0x49 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS8_1Z 0x4a /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS16_1Z 0x4b /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_2CS 0x4c /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS2_2CS 0x4d /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS4_2CS 0x4e /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS8_2CS 0x4f /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS16_2CS 0x50 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_2CZ 0x51 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS2_2CZ 0x52 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS4_2CZ 0x53 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS8_2CZ 0x54 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS16_2CZ 0x55 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_4CSZV 0x56 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS2_4CSZV 0x57 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS4_4CSZV 0x58 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS8_4CSZV 0x59 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS16_4CSZV 0x5a /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12 0x5b /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4 0x5c /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8 0x5d /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24 0x5e /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_1ZV 0x63 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_1ZV 0x64 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_1ZV 0x65 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_1ZV 0x66 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2CS 0x67 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2CS 0x68 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2CS 0x69 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2CS 0x6a /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2CZV 0x6f /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2CZV 0x70 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2CZV 0x71 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2CZV 0x72 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2ZV 0x73 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2ZV 0x74 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2ZV 0x75 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2ZV 0x76 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_4CSZV 0x77 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_4CSZV 0x78 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_4CSZV 0x79 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_4CSZV 0x7a /* R---V */
#define NV_MMU_PTE_KIND_ZF32 0x7b /* R---V */
#define NV_MMU_PTE_KIND_ZF32_1Z 0x7c /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS2_1Z 0x7d /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS4_1Z 0x7e /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS8_1Z 0x7f /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS16_1Z 0x80 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_2CS 0x81 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS2_2CS 0x82 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS4_2CS 0x83 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS8_2CS 0x84 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS16_2CS 0x85 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_2CZ 0x86 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS2_2CZ 0x87 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS4_2CZ 0x88 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS8_2CZ 0x89 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS16_2CZ 0x8a /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12 0x8b /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4 0x8c /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8 0x8d /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24 0x8e /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1CS 0x8f /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1CS 0x90 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1CS 0x91 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1CS 0x92 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1ZV 0x97 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1ZV 0x98 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1ZV 0x99 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1ZV 0x9a /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1CZV 0x9b /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1CZV 0x9c /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1CZV 0x9d /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1CZV 0x9e /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_2CS 0x9f /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_2CS 0xa0 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_2CS 0xa1 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_2CS 0xa2 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_2CSZV 0xa3 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_2CSZV 0xa4 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_2CSZV 0xa5 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_2CSZV 0xa6 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12 0xa7 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4 0xa8 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8 0xa9 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24 0xaa /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1CS 0xab /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1CS 0xac /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1CS 0xad /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1CS 0xae /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1ZV 0xb3 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1ZV 0xb4 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1ZV 0xb5 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1ZV 0xb6 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1CZV 0xb7 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1CZV 0xb8 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1CZV 0xb9 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1CZV 0xba /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_2CS 0xbb /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_2CS 0xbc /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_2CS 0xbd /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_2CS 0xbe /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_2CSZV 0xbf /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_2CSZV 0xc0 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_2CSZV 0xc1 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_2CSZV 0xc2 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_1CS 0xc4 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_1CS 0xc5 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_1CS 0xc6 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_1CS 0xc7 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_1CS 0xc8 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_2CSZV 0xce /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_2CSZV 0xcf /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_2CSZV 0xd0 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_2CSZV 0xd1 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_2CSZV 0xd2 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_2CS 0xd3 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_2CS 0xd4 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_2CS 0xd5 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_2CS 0xd6 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_2CS 0xd7 /* R---V */
#define NV_MMU_PTE_KIND_S8_2S 0x2b /* R---V */
#define NV_MMU_PTE_KIND_GENERIC_16BX2 0xfe /* R---V */
#define NV_MMU_PTE_KIND_C32_2C 0xd8 /* R---V */
#define NV_MMU_PTE_KIND_C32_2CBR 0xd9 /* R---V */
#define NV_MMU_PTE_KIND_C32_2CBA 0xda /* R---V */
#define NV_MMU_PTE_KIND_C32_2CRA 0xdb /* R---V */
#define NV_MMU_PTE_KIND_C32_2BRA 0xdc /* R---V */
#define NV_MMU_PTE_KIND_C32_MS2_2C 0xdd /* R---V */
#define NV_MMU_PTE_KIND_C32_MS2_2CBR 0xde /* R---V */
#define NV_MMU_PTE_KIND_C32_MS2_4CBRA 0xcc /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2C 0xdf /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2CBR 0xe0 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2CBA 0xe1 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2CRA 0xe2 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2BRA 0xe3 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_4CBRA 0x2c /* R---V */
#define NV_MMU_PTE_KIND_C32_MS8_MS16_2C 0xe4 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS8_MS16_2CRA 0xe5 /* R---V */
#define NV_MMU_PTE_KIND_C64_2C 0xe6 /* R---V */
#define NV_MMU_PTE_KIND_C64_2CBR 0xe7 /* R---V */
#define NV_MMU_PTE_KIND_C64_2CBA 0xe8 /* R---V */
#define NV_MMU_PTE_KIND_C64_2CRA 0xe9 /* R---V */
#define NV_MMU_PTE_KIND_C64_2BRA 0xea /* R---V */
#define NV_MMU_PTE_KIND_C64_MS2_2C 0xeb /* R---V */
#define NV_MMU_PTE_KIND_C64_MS2_2CBR 0xec /* R---V */
#define NV_MMU_PTE_KIND_C64_MS2_4CBRA 0xcd /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2C 0xed /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2CBR 0xee /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2CBA 0xef /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2CRA 0xf0 /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2BRA 0xf1 /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_4CBRA 0x2d /* R---V */
#define NV_MMU_PTE_KIND_C64_MS8_MS16_2C 0xf2 /* R---V */
#define NV_MMU_PTE_KIND_C64_MS8_MS16_2CRA 0xf3 /* R---V */
#define NV_MMU_PTE_KIND_C128_2C 0xf4 /* R---V */
#define NV_MMU_PTE_KIND_C128_2CR 0xf5 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS2_2C 0xf6 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS2_2CR 0xf7 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS4_2C 0xf8 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS4_2CR 0xf9 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS8_MS16_2C 0xfa /* R---V */
#define NV_MMU_PTE_KIND_C128_MS8_MS16_2CR 0xfb /* R---V */
#define NV_MMU_PTE_KIND_X8C24 0xfc /* R---V */
#define NV_MMU_PTE_KIND_PITCH_NO_SWIZZLE 0xfd /* R---V */
#define NV_MMU_PTE_KIND_SMHOST_MESSAGE 0xcb /* R---V */
#define NV_MMU_VER1_PDE /* ----G */
#define NV_MMU_VER1_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
#define NV_MMU_VER1_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
#define NV_MMU_VER1_PDE_SIZE_FULL 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE_HALF 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
#define NV_MMU_VER1_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_VER1_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER1_PDE__SIZE 8
#define NV_MMU_VER1_PTE /* ----G */
#define NV_MMU_VER1_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
#define NV_MMU_VER1_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
#define NV_MMU_VER1_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
#define NV_MMU_VER1_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
#define NV_MMU_VER1_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_VER1_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_VER1_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_VER1_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
#define NV_MMU_VER1_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER1_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
#define NV_MMU_VER1_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_COMPTAGLINE (1*32+20+11):(1*32+12) /* RWXVF */
#define NV_MMU_VER1_PTE_KIND (1*32+11):(1*32+4) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER1_PTE__SIZE 8
#define NV_MMU_VER1_PTE_COMPTAGS_NONE 0x0 /* */
#define NV_MMU_VER1_PTE_COMPTAGS_1 0x1 /* */
#define NV_MMU_VER1_PTE_COMPTAGS_2 0x2 /* */
#define NV_MMU_NEW_PDE /* ----G */
#define NV_MMU_NEW_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_NEW_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_NEW_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_NEW_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_NEW_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_PDE_VOL 3:3 /* RWXVF */
#define NV_MMU_NEW_PDE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_PDE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_PDE_NO_ATS 5:5 /* RWXVF */
#define NV_MMU_NEW_PDE_NO_ATS_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_NO_ATS_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_NEW_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_NEW_PDE__SIZE 8
#define NV_MMU_NEW_DUAL_PDE /* ----G */
#define NV_MMU_NEW_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
#define NV_MMU_NEW_DUAL_PDE__SIZE 16
#define NV_MMU_NEW_PTE /* ----G */
#define NV_MMU_NEW_PTE_VALID 0:0 /* RWXVF */
#define NV_MMU_NEW_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_NEW_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_PTE_VOL 3:3 /* RWXVF */
#define NV_MMU_NEW_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_PTE_ENCRYPTED 4:4 /* RWXVF */
#define NV_MMU_NEW_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_NEW_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_NEW_PTE_PRIVILEGE 5:5 /* RWXVF */
#define NV_MMU_NEW_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_READ_ONLY 6:6 /* RWXVF */
#define NV_MMU_NEW_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_NEW_PTE_COMPTAGLINE (20+35):36 /* RWXVF */
#define NV_MMU_NEW_PTE_KIND 63:56 /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_NEW_PTE__SIZE 8
#define NV_MMU_VER2_PDE /* ----G */
#define NV_MMU_VER2_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_VER2_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_VER2_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER2_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_VER2_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_PDE_VOL 3:3 /* RWXVF */
#define NV_MMU_VER2_PDE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_PDE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_PDE_NO_ATS 5:5 /* RWXVF */
#define NV_MMU_VER2_PDE_NO_ATS_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_NO_ATS_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_VER2_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER2_PDE__SIZE 8
#define NV_MMU_VER2_DUAL_PDE /* ----G */
#define NV_MMU_VER2_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
#define NV_MMU_VER2_DUAL_PDE__SIZE 16
#define NV_MMU_VER2_PTE /* ----G */
#define NV_MMU_VER2_PTE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER2_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_VER2_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_PTE_VOL 3:3 /* RWXVF */
#define NV_MMU_VER2_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_PTE_ENCRYPTED 4:4 /* RWXVF */
#define NV_MMU_VER2_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_VER2_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_VER2_PTE_PRIVILEGE 5:5 /* RWXVF */
#define NV_MMU_VER2_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_READ_ONLY 6:6 /* RWXVF */
#define NV_MMU_VER2_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_VER2_PTE_COMPTAGLINE (20+35):36 /* RWXVF */
#define NV_MMU_VER2_PTE_KIND 63:56 /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER2_PTE__SIZE 8
#define NV_MMU_CLIENT /* ----G */
#define NV_MMU_CLIENT_KIND 2:0 /* RWXVF */
#define NV_MMU_CLIENT_KIND_Z16 0x1 /* R---V */
#define NV_MMU_CLIENT_KIND_S8 0x2 /* R---V */
#define NV_MMU_CLIENT_KIND_S8Z24 0x3 /* R---V */
#define NV_MMU_CLIENT_KIND_ZF32_X24S8 0x4 /* R---V */
#define NV_MMU_CLIENT_KIND_Z24S8 0x5 /* R---V */
#define NV_MMU_CLIENT_KIND_GENERIC_MEMORY 0x6 /* R---V */
#define NV_MMU_CLIENT_KIND_INVALID 0x7 /* R---V */
#endif // __tu102_dev_mmu_h__

View File

@@ -0,0 +1,263 @@
/*******************************************************************************
Copyright (c) 2003-2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __gv100_dev_fault_h__
#define __gv100_dev_fault_h__
/* This file is autogenerated. Do not edit */
#define NV_PFAULT_MMU_ENG_ID_GRAPHICS 64 /* */
#define NV_PFAULT_MMU_ENG_ID_DISPLAY 1 /* */
#define NV_PFAULT_MMU_ENG_ID_GSP 2 /* */
#define NV_PFAULT_MMU_ENG_ID_IFB 8 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1 4 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2 5 /* */
#define NV_PFAULT_MMU_ENG_ID_SEC 14 /* */
#define NV_PFAULT_MMU_ENG_ID_PERF 9 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC 10 /* */
#define NV_PFAULT_MMU_ENG_ID_GRCOPY 15 /* */
#define NV_PFAULT_MMU_ENG_ID_CE0 15 /* */
#define NV_PFAULT_MMU_ENG_ID_CE1 16 /* */
#define NV_PFAULT_MMU_ENG_ID_CE2 17 /* */
#define NV_PFAULT_MMU_ENG_ID_CE3 18 /* */
#define NV_PFAULT_MMU_ENG_ID_CE4 19 /* */
#define NV_PFAULT_MMU_ENG_ID_CE5 20 /* */
#define NV_PFAULT_MMU_ENG_ID_CE6 21 /* */
#define NV_PFAULT_MMU_ENG_ID_CE7 22 /* */
#define NV_PFAULT_MMU_ENG_ID_CE8 23 /* */
#define NV_PFAULT_MMU_ENG_ID_PWR_PMU 6 /* */
#define NV_PFAULT_MMU_ENG_ID_PTP 3 /* */
#define NV_PFAULT_MMU_ENG_ID_NVENC0 11 /* */
#define NV_PFAULT_MMU_ENG_ID_NVENC1 12 /* */
#define NV_PFAULT_MMU_ENG_ID_NVENC2 13 /* */
#define NV_PFAULT_MMU_ENG_ID_PHYSICAL 31 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST0 32 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST1 33 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST2 34 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST3 35 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST4 36 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST5 37 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST6 38 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST7 39 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST8 40 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST9 41 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST10 42 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST11 43 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST12 44 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST13 45 /* */
#define NV_PFAULT_FAULT_TYPE 4:0 /* */
#define NV_PFAULT_FAULT_TYPE_PDE 0x00000000 /* */
#define NV_PFAULT_FAULT_TYPE_PDE_SIZE 0x00000001 /* */
#define NV_PFAULT_FAULT_TYPE_PTE 0x00000002 /* */
#define NV_PFAULT_FAULT_TYPE_VA_LIMIT_VIOLATION 0x00000003 /* */
#define NV_PFAULT_FAULT_TYPE_UNBOUND_INST_BLOCK 0x00000004 /* */
#define NV_PFAULT_FAULT_TYPE_PRIV_VIOLATION 0x00000005 /* */
#define NV_PFAULT_FAULT_TYPE_RO_VIOLATION 0x00000006 /* */
#define NV_PFAULT_FAULT_TYPE_WO_VIOLATION 0x00000007 /* */
#define NV_PFAULT_FAULT_TYPE_PITCH_MASK_VIOLATION 0x00000008 /* */
#define NV_PFAULT_FAULT_TYPE_WORK_CREATION 0x00000009 /* */
#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_APERTURE 0x0000000a /* */
#define NV_PFAULT_FAULT_TYPE_COMPRESSION_FAILURE 0x0000000b /* */
#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_KIND 0x0000000c /* */
#define NV_PFAULT_FAULT_TYPE_REGION_VIOLATION 0x0000000d /* */
#define NV_PFAULT_FAULT_TYPE_POISONED 0x0000000e /* */
#define NV_PFAULT_FAULT_TYPE_ATOMIC_VIOLATION 0x0000000f /* */
#define NV_PFAULT_CLIENT 14:8 /* */
#define NV_PFAULT_CLIENT_GPC_T1_0 0x00000000 /* */
#define NV_PFAULT_CLIENT_GPC_T1_1 0x00000001 /* */
#define NV_PFAULT_CLIENT_GPC_T1_2 0x00000002 /* */
#define NV_PFAULT_CLIENT_GPC_T1_3 0x00000003 /* */
#define NV_PFAULT_CLIENT_GPC_T1_4 0x00000004 /* */
#define NV_PFAULT_CLIENT_GPC_T1_5 0x00000005 /* */
#define NV_PFAULT_CLIENT_GPC_T1_6 0x00000006 /* */
#define NV_PFAULT_CLIENT_GPC_T1_7 0x00000007 /* */
#define NV_PFAULT_CLIENT_GPC_PE_0 0x00000008 /* */
#define NV_PFAULT_CLIENT_GPC_PE_1 0x00000009 /* */
#define NV_PFAULT_CLIENT_GPC_PE_2 0x0000000A /* */
#define NV_PFAULT_CLIENT_GPC_PE_3 0x0000000B /* */
#define NV_PFAULT_CLIENT_GPC_PE_4 0x0000000C /* */
#define NV_PFAULT_CLIENT_GPC_PE_5 0x0000000D /* */
#define NV_PFAULT_CLIENT_GPC_PE_6 0x0000000E /* */
#define NV_PFAULT_CLIENT_GPC_PE_7 0x0000000F /* */
#define NV_PFAULT_CLIENT_GPC_RAST 0x00000010 /* */
#define NV_PFAULT_CLIENT_GPC_GCC 0x00000011 /* */
#define NV_PFAULT_CLIENT_GPC_GPCCS 0x00000012 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_0 0x00000013 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_1 0x00000014 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_2 0x00000015 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_3 0x00000016 /* */
#define NV_PFAULT_CLIENT_GPC_T1_8 0x00000021 /* */
#define NV_PFAULT_CLIENT_GPC_T1_9 0x00000022 /* */
#define NV_PFAULT_CLIENT_GPC_T1_10 0x00000023 /* */
#define NV_PFAULT_CLIENT_GPC_T1_11 0x00000024 /* */
#define NV_PFAULT_CLIENT_GPC_T1_12 0x00000025 /* */
#define NV_PFAULT_CLIENT_GPC_T1_13 0x00000026 /* */
#define NV_PFAULT_CLIENT_GPC_T1_14 0x00000027 /* */
#define NV_PFAULT_CLIENT_GPC_T1_15 0x00000028 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_0 0x00000029 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_1 0x0000002A /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_2 0x0000002B /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_3 0x0000002C /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_4 0x0000002D /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_5 0x0000002E /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_6 0x0000002F /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_7 0x00000030 /* */
#define NV_PFAULT_CLIENT_GPC_PE_8 0x00000031 /* */
#define NV_PFAULT_CLIENT_GPC_PE_9 0x00000032 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_8 0x00000033 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_9 0x00000034 /* */
#define NV_PFAULT_CLIENT_GPC_T1_16 0x00000035 /* */
#define NV_PFAULT_CLIENT_GPC_T1_17 0x00000036 /* */
#define NV_PFAULT_CLIENT_GPC_T1_18 0x00000037 /* */
#define NV_PFAULT_CLIENT_GPC_T1_19 0x00000038 /* */
#define NV_PFAULT_CLIENT_GPC_PE_10 0x00000039 /* */
#define NV_PFAULT_CLIENT_GPC_PE_11 0x0000003A /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_10 0x0000003B /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_11 0x0000003C /* */
#define NV_PFAULT_CLIENT_GPC_T1_20 0x0000003D /* */
#define NV_PFAULT_CLIENT_GPC_T1_21 0x0000003E /* */
#define NV_PFAULT_CLIENT_GPC_T1_22 0x0000003F /* */
#define NV_PFAULT_CLIENT_GPC_T1_23 0x00000040 /* */
#define NV_PFAULT_CLIENT_GPC_PE_12 0x00000041 /* */
#define NV_PFAULT_CLIENT_GPC_PE_13 0x00000042 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_12 0x00000043 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_13 0x00000044 /* */
#define NV_PFAULT_CLIENT_GPC_T1_24 0x00000045 /* */
#define NV_PFAULT_CLIENT_GPC_T1_25 0x00000046 /* */
#define NV_PFAULT_CLIENT_GPC_T1_26 0x00000047 /* */
#define NV_PFAULT_CLIENT_GPC_T1_27 0x00000048 /* */
#define NV_PFAULT_CLIENT_GPC_PE_14 0x00000049 /* */
#define NV_PFAULT_CLIENT_GPC_PE_15 0x0000004A /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_14 0x0000004B /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_15 0x0000004C /* */
#define NV_PFAULT_CLIENT_GPC_T1_28 0x0000004D /* */
#define NV_PFAULT_CLIENT_GPC_T1_29 0x0000004E /* */
#define NV_PFAULT_CLIENT_GPC_T1_30 0x0000004F /* */
#define NV_PFAULT_CLIENT_GPC_T1_31 0x00000050 /* */
#define NV_PFAULT_CLIENT_GPC_PE_16 0x00000051 /* */
#define NV_PFAULT_CLIENT_GPC_PE_17 0x00000052 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_16 0x00000053 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_17 0x00000054 /* */
#define NV_PFAULT_CLIENT_GPC_T1_32 0x00000055 /* */
#define NV_PFAULT_CLIENT_GPC_T1_33 0x00000056 /* */
#define NV_PFAULT_CLIENT_GPC_T1_34 0x00000057 /* */
#define NV_PFAULT_CLIENT_GPC_T1_35 0x00000058 /* */
#define NV_PFAULT_CLIENT_GPC_PE_18 0x00000059 /* */
#define NV_PFAULT_CLIENT_GPC_PE_19 0x0000005A /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_18 0x0000005B /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_19 0x0000005C /* */
#define NV_PFAULT_CLIENT_GPC_T1_36 0x0000005D /* */
#define NV_PFAULT_CLIENT_GPC_T1_37 0x0000005E /* */
#define NV_PFAULT_CLIENT_GPC_T1_38 0x0000005F /* */
#define NV_PFAULT_CLIENT_GPC_T1_39 0x00000060 /* */
#define NV_PFAULT_CLIENT_GPC_GPM 0x00000017 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_0 0x00000018 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_1 0x00000019 /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_2 0x0000001A /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_3 0x0000001B /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_4 0x0000001C /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_5 0x0000001D /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_6 0x0000001E /* */
#define NV_PFAULT_CLIENT_GPC_LTP_UTLB_7 0x0000001F /* */
#define NV_PFAULT_CLIENT_GPC_RGG_UTLB 0x00000020 /* */
#define NV_PFAULT_CLIENT_HUB_CE0 0x00000001 /* */
#define NV_PFAULT_CLIENT_HUB_CE1 0x00000002 /* */
#define NV_PFAULT_CLIENT_HUB_DNISO 0x00000003 /* */
#define NV_PFAULT_CLIENT_HUB_FE 0x00000004 /* */
#define NV_PFAULT_CLIENT_HUB_FECS 0x00000005 /* */
#define NV_PFAULT_CLIENT_HUB_HOST 0x00000006 /* */
#define NV_PFAULT_CLIENT_HUB_HOST_CPU 0x00000007 /* */
#define NV_PFAULT_CLIENT_HUB_HOST_CPU_NB 0x00000008 /* */
#define NV_PFAULT_CLIENT_HUB_ISO 0x00000009 /* */
#define NV_PFAULT_CLIENT_HUB_MMU 0x0000000A /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC 0x0000000B /* */
#define NV_PFAULT_CLIENT_HUB_NVENC1 0x0000000D /* */
#define NV_PFAULT_CLIENT_HUB_NVENC2 0x00000033 /* */
#define NV_PFAULT_CLIENT_HUB_NISO 0x0000000E /* */
#define NV_PFAULT_CLIENT_HUB_P2P 0x0000000F /* */
#define NV_PFAULT_CLIENT_HUB_PD 0x00000010 /* */
#define NV_PFAULT_CLIENT_HUB_PERF 0x00000011 /* */
#define NV_PFAULT_CLIENT_HUB_PMU 0x00000012 /* */
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD 0x00000013 /* */
#define NV_PFAULT_CLIENT_HUB_SCC 0x00000014 /* */
#define NV_PFAULT_CLIENT_HUB_SCC_NB 0x00000015 /* */
#define NV_PFAULT_CLIENT_HUB_SEC 0x00000016 /* */
#define NV_PFAULT_CLIENT_HUB_SSYNC 0x00000017 /* */
#define NV_PFAULT_CLIENT_HUB_VIP 0x00000000 /* */
#define NV_PFAULT_CLIENT_HUB_GRCOPY 0x00000018 /* */
#define NV_PFAULT_CLIENT_HUB_CE2 0x00000018 /* */
#define NV_PFAULT_CLIENT_HUB_XV 0x00000019 /* */
#define NV_PFAULT_CLIENT_HUB_MMU_NB 0x0000001A /* */
#define NV_PFAULT_CLIENT_HUB_NVENC 0x0000001B /* */
#define NV_PFAULT_CLIENT_HUB_NVENC0 0x0000001B /* */
#define NV_PFAULT_CLIENT_HUB_DFALCON 0x0000001C /* */
#define NV_PFAULT_CLIENT_HUB_SKED 0x0000001D /* */
#define NV_PFAULT_CLIENT_HUB_AFALCON 0x0000001E /* */
#define NV_PFAULT_CLIENT_HUB_HSCE0 0x00000020 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE1 0x00000021 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE2 0x00000022 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE3 0x00000023 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE4 0x00000024 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE5 0x00000025 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE6 0x00000026 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE7 0x00000027 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE8 0x00000028 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE9 0x00000029 /* */
#define NV_PFAULT_CLIENT_HUB_HSHUB 0x0000002A /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X0 0x0000002B /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X1 0x0000002C /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X2 0x0000002D /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X3 0x0000002E /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X4 0x0000002F /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X5 0x00000030 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X6 0x00000031 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X7 0x00000032 /* */
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER0 0x00000034 /* */
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER1 0x00000035 /* */
#define NV_PFAULT_CLIENT_HUB_DWBIF 0x00000036 /* */
#define NV_PFAULT_CLIENT_HUB_FBFALCON 0x00000037 /* */
#define NV_PFAULT_CLIENT_HUB_CE_SHIM 0x00000038 /* */
#define NV_PFAULT_CLIENT_HUB_GSP 0x00000039 /* */
#define NV_PFAULT_CLIENT_HUB_DONT_CARE 0x0000001F /* */
#define NV_PFAULT_ACCESS_TYPE 19:16 /* */
#define NV_PFAULT_ACCESS_TYPE_READ 0x00000000 /* */
#define NV_PFAULT_ACCESS_TYPE_WRITE 0x00000001 /* */
#define NV_PFAULT_ACCESS_TYPE_ATOMIC 0x00000002 /* */
#define NV_PFAULT_ACCESS_TYPE_PREFETCH 0x00000003 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_READ 0x00000000 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_WRITE 0x00000001 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC 0x00000002 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_STRONG 0x00000002 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_PREFETCH 0x00000003 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_WEAK 0x00000004 /* */
#define NV_PFAULT_ACCESS_TYPE_PHYS_READ 0x00000008 /* */
#define NV_PFAULT_ACCESS_TYPE_PHYS_WRITE 0x00000009 /* */
#define NV_PFAULT_ACCESS_TYPE_PHYS_ATOMIC 0x0000000a /* */
#define NV_PFAULT_ACCESS_TYPE_PHYS_PREFETCH 0x0000000b /* */
#define NV_PFAULT_MMU_CLIENT_TYPE 20:20 /* */
#define NV_PFAULT_MMU_CLIENT_TYPE_GPC 0x00000000 /* */
#define NV_PFAULT_MMU_CLIENT_TYPE_HUB 0x00000001 /* */
#define NV_PFAULT_GPC_ID 28:24 /* */
#define NV_PFAULT_PROTECTED_MODE 29:29 /* */
#define NV_PFAULT_ATS_FAULT 30:30 /* */
#define NV_PFAULT_VALID 31:31 /* */
#endif // __gv100_dev_fault_h__

View File

@@ -0,0 +1,103 @@
/*******************************************************************************
Copyright (c) 2017 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
// Excerpt of gv100/dev_fb.h
#ifndef __dev_fb_h__
#define __dev_fb_h__
#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO(i) (0x00100E24+(i)*20) /* RW-4A */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO__SIZE_1 2 /* */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_ADDR_MODE 0:0 /* RW-VF */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_ADDR_MODE_VIRTUAL 0x00000000 /* RW--V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_ADDR_MODE_PHYSICAL 0x00000001 /* RW--V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_PHYS_APERTURE 2:1 /* RW-VF */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_PHYS_APERTURE_LOCAL 0x00000000 /* RW--V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_PHYS_APERTURE_SYS_COH 0x00000002 /* RW--V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_PHYS_APERTURE_SYS_NCOH 0x00000003 /* RW--V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_PHYS_VOL 3:3 /* RW-VF */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_LO_ADDR 31:12 /* RW-VF */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_HI(i) (0x00100E28+(i)*20) /* RW-4A */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_HI__SIZE_1 2 /* */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_HI_ADDR 31:0 /* RW-VF */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET(i) (0x00100E2C+(i)*20) /* RW-4A */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET__SIZE_1 2 /* */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_PTR 19:0 /* RWEVF */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_PTR_RESET 0x00000000 /* RWE-V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_GETPTR_CORRUPTED 30:30 /* RWEVF */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_GETPTR_CORRUPTED_NO 0x00000000 /* RWE-V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_GETPTR_CORRUPTED_YES 0x00000001 /* RW--V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_GETPTR_CORRUPTED_CLEAR 0x00000001 /* RW--V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_OVERFLOW 31:31 /* RWEVF */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_OVERFLOW_NO 0x00000000 /* RWE-V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_OVERFLOW_YES 0x00000001 /* RW--V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_GET_OVERFLOW_CLEAR 0x00000001 /* RW--V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT(i) (0x00100E30+(i)*20) /* R--4A */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT__SIZE_1 2 /* */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_PTR 19:0 /* R-EVF */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_PTR_RESET 0x00000000 /* R-E-V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_GETPTR_CORRUPTED 30:30 /* R-EVF */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_GETPTR_CORRUPTED_NO 0x00000000 /* R-E-V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_GETPTR_CORRUPTED_YES 0x00000001 /* R---V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_OVERFLOW 31:31 /* R-EVF */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_OVERFLOW_NO 0x00000000 /* R-E-V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_OVERFLOW_YES 0x00000001 /* R---V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE(i) (0x00100E34+(i)*20) /* RW-4A */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE__SIZE_1 2 /* */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_VAL 19:0 /* RWEVF */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_VAL_RESET 0x00000000 /* RWE-V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_OVERFLOW_INTR 29:29 /* RWEVF */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_OVERFLOW_INTR_DISABLE 0x00000000 /* RWE-V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_OVERFLOW_INTR_ENABLE 0x00000001 /* RW--V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_SET_DEFAULT 30:30 /* RWEVF */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_SET_DEFAULT_NO 0x00000000 /* RWE-V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_SET_DEFAULT_YES 0x00000001 /* RW--V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_ENABLE 31:31 /* RWEVF */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_ENABLE_FALSE 0x00000000 /* RWE-V */
#define NV_PFB_PRI_MMU_FAULT_BUFFER_SIZE_ENABLE_TRUE 0x00000001 /* RW--V */
#define NV_PFB_PRI_MMU_FAULT_ADDR_LO 0x00100E4C /* R--4R */
#define NV_PFB_PRI_MMU_FAULT_ADDR_LO_PHYS_APERTURE 1:0 /* R-EVF */
#define NV_PFB_PRI_MMU_FAULT_ADDR_LO_PHYS_APERTURE_LOCAL 0x00000000 /* R-E-V */
#define NV_PFB_PRI_MMU_FAULT_ADDR_LO_PHYS_APERTURE_PEER 0x00000001 /* R---V */
#define NV_PFB_PRI_MMU_FAULT_ADDR_LO_PHYS_APERTURE_SYS_COH 0x00000002 /* R---V */
#define NV_PFB_PRI_MMU_FAULT_ADDR_LO_PHYS_APERTURE_SYS_NCOH 0x00000003 /* R---V */
#define NV_PFB_PRI_MMU_FAULT_ADDR_LO_ADDR 31:12 /* R-EVF */
#define NV_PFB_PRI_MMU_FAULT_ADDR_LO_ADDR_RESET 0x00000000 /* R-E-V */
#define NV_PFB_PRI_MMU_FAULT_ADDR_HI 0x00100E50 /* R--4R */
#define NV_PFB_PRI_MMU_FAULT_ADDR_HI_ADDR 31:0 /* R-EVF */
#define NV_PFB_PRI_MMU_FAULT_ADDR_HI_ADDR_RESET 0x00000000 /* R-E-V */
#define NV_PFB_PRI_MMU_FAULT_INST_LO 0x00100E54 /* R--4R */
#define NV_PFB_PRI_MMU_FAULT_INST_LO_ENGINE_ID 8:0 /* R-EVF */
#define NV_PFB_PRI_MMU_FAULT_INST_LO_ENGINE_ID_RESET 0x00000000 /* R-E-V */
#define NV_PFB_PRI_MMU_FAULT_INST_LO_APERTURE 11:10 /* R-EVF */
#define NV_PFB_PRI_MMU_FAULT_INST_LO_APERTURE_VID_MEM 0x00000000 /* R---V */
#define NV_PFB_PRI_MMU_FAULT_INST_LO_APERTURE_SYS_MEM_COHERENT 0x00000002 /* R---V */
#define NV_PFB_PRI_MMU_FAULT_INST_LO_APERTURE_SYS_MEM_NONCOHERENT 0x00000003 /* R---V */
#define NV_PFB_PRI_MMU_FAULT_INST_LO_APERTURE_RESET 0x00000000 /* R-E-V */
#define NV_PFB_PRI_MMU_FAULT_INST_LO_ADDR 31:12 /* R-EVF */
#define NV_PFB_PRI_MMU_FAULT_INST_LO_ADDR_RESET 0x00000000 /* R-E-V */
#define NV_PFB_PRI_MMU_FAULT_INST_HI 0x00100E58 /* R--4R */
#define NV_PFB_PRI_MMU_FAULT_INST_HI_ADDR 31:0 /* R-EVF */
#define NV_PFB_PRI_MMU_FAULT_INST_HI_ADDR_RESET 0x00000000 /* R-E-V */
#endif

View File

@@ -0,0 +1,661 @@
/*******************************************************************************
Copyright (c) 2003-2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __gv100_dev_mmu_h__
#define __gv100_dev_mmu_h__
/* This file is autogenerated. Do not edit */
#define NV_MMU_PDE /* ----G */
#define NV_MMU_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
#define NV_MMU_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
#define NV_MMU_PDE_SIZE_FULL 0x00000000 /* RW--V */
#define NV_MMU_PDE_SIZE_HALF 0x00000001 /* RW--V */
#define NV_MMU_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
#define NV_MMU_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
#define NV_MMU_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
#define NV_MMU_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
#define NV_MMU_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_PDE__SIZE 8
#define NV_MMU_PTE /* ----G */
#define NV_MMU_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
#define NV_MMU_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
#define NV_MMU_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
#define NV_MMU_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
#define NV_MMU_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
#define NV_MMU_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
#define NV_MMU_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PTE_LOCK (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_PTE_LOCK_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_LOCK_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_COMPTAGLINE (1*32+18+11):(1*32+12) /* RWXVF */
#define NV_MMU_PTE_READ_DISABLE (1*32+30):(1*32+30) /* RWXVF */
#define NV_MMU_PTE_READ_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_READ_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_WRITE_DISABLE (1*32+31):(1*32+31) /* RWXVF */
#define NV_MMU_PTE_WRITE_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_WRITE_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_PTE__SIZE 8
#define NV_MMU_PTE_COMPTAGS_NONE 0x0 /* */
#define NV_MMU_PTE_COMPTAGS_1 0x1 /* */
#define NV_MMU_PTE_COMPTAGS_2 0x2 /* */
#define NV_MMU_PTE_KIND (1*32+11):(1*32+4) /* RWXVF */
#define NV_MMU_PTE_KIND_INVALID 0xff /* R---V */
#define NV_MMU_PTE_KIND_PITCH 0x00 /* R---V */
#define NV_MMU_PTE_KIND_Z16 0x01 /* R---V */
#define NV_MMU_PTE_KIND_Z16_2C 0x02 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS2_2C 0x03 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS4_2C 0x04 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS8_2C 0x05 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS16_2C 0x06 /* R---V */
#define NV_MMU_PTE_KIND_Z16_2Z 0x07 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS2_2Z 0x08 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS4_2Z 0x09 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS8_2Z 0x0a /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS16_2Z 0x0b /* R---V */
#define NV_MMU_PTE_KIND_Z16_2CZ 0x36 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS2_2CZ 0x37 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS4_2CZ 0x38 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS8_2CZ 0x39 /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS16_2CZ 0x5f /* R---V */
#define NV_MMU_PTE_KIND_Z16_4CZ 0x0c /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS2_4CZ 0x0d /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS4_4CZ 0x0e /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS8_4CZ 0x0f /* R---V */
#define NV_MMU_PTE_KIND_Z16_MS16_4CZ 0x10 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24 0x11 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_1Z 0x12 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS2_1Z 0x13 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS4_1Z 0x14 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS8_1Z 0x15 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS16_1Z 0x16 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_2CZ 0x17 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS2_2CZ 0x18 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS4_2CZ 0x19 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS8_2CZ 0x1a /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS16_2CZ 0x1b /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_2CS 0x1c /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS2_2CS 0x1d /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS4_2CS 0x1e /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS8_2CS 0x1f /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS16_2CS 0x20 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_4CSZV 0x21 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS2_4CSZV 0x22 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS4_4CSZV 0x23 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS8_4CSZV 0x24 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_MS16_4CSZV 0x25 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12 0x26 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4 0x27 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8 0x28 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24 0x29 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_1ZV 0x2e /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_1ZV 0x2f /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_1ZV 0x30 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_1ZV 0x31 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2CS 0x32 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2CS 0x33 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2CS 0x34 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2CS 0x35 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2CZV 0x3a /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2CZV 0x3b /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2CZV 0x3c /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2CZV 0x3d /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_2ZV 0x3e /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_2ZV 0x3f /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_2ZV 0x40 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_2ZV 0x41 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC12_4CSZV 0x42 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS4_VC4_4CSZV 0x43 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC8_4CSZV 0x44 /* R---V */
#define NV_MMU_PTE_KIND_V8Z24_MS8_VC24_4CSZV 0x45 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8 0x46 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_1Z 0x47 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS2_1Z 0x48 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS4_1Z 0x49 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS8_1Z 0x4a /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS16_1Z 0x4b /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_2CS 0x4c /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS2_2CS 0x4d /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS4_2CS 0x4e /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS8_2CS 0x4f /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS16_2CS 0x50 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_2CZ 0x51 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS2_2CZ 0x52 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS4_2CZ 0x53 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS8_2CZ 0x54 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS16_2CZ 0x55 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_4CSZV 0x56 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS2_4CSZV 0x57 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS4_4CSZV 0x58 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS8_4CSZV 0x59 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_MS16_4CSZV 0x5a /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12 0x5b /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4 0x5c /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8 0x5d /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24 0x5e /* R---V */
#define NV_MMU_PTE_KIND_YUV_B8C1_2Y 0x60 /* R---V */
#define NV_MMU_PTE_KIND_YUV_B8C2_2Y 0x61 /* R---V */
#define NV_MMU_PTE_KIND_YUV_B10C1_2Y 0x62 /* R---V */
#define NV_MMU_PTE_KIND_YUV_B10C2_2Y 0x6b /* R---V */
#define NV_MMU_PTE_KIND_YUV_B12C1_2Y 0x6c /* R---V */
#define NV_MMU_PTE_KIND_YUV_B12C2_2Y 0x6d /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_1ZV 0x63 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_1ZV 0x64 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_1ZV 0x65 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_1ZV 0x66 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2CS 0x67 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2CS 0x68 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2CS 0x69 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2CS 0x6a /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2CZV 0x6f /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2CZV 0x70 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2CZV 0x71 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2CZV 0x72 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_2ZV 0x73 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_2ZV 0x74 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_2ZV 0x75 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_2ZV 0x76 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC12_4CSZV 0x77 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS4_VC4_4CSZV 0x78 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC8_4CSZV 0x79 /* R---V */
#define NV_MMU_PTE_KIND_Z24V8_MS8_VC24_4CSZV 0x7a /* R---V */
#define NV_MMU_PTE_KIND_ZF32 0x7b /* R---V */
#define NV_MMU_PTE_KIND_ZF32_1Z 0x7c /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS2_1Z 0x7d /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS4_1Z 0x7e /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS8_1Z 0x7f /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS16_1Z 0x80 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_2CS 0x81 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS2_2CS 0x82 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS4_2CS 0x83 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS8_2CS 0x84 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS16_2CS 0x85 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_2CZ 0x86 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS2_2CZ 0x87 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS4_2CZ 0x88 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS8_2CZ 0x89 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_MS16_2CZ 0x8a /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12 0x8b /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4 0x8c /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8 0x8d /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24 0x8e /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1CS 0x8f /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1CS 0x90 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1CS 0x91 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1CS 0x92 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1ZV 0x97 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1ZV 0x98 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1ZV 0x99 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1ZV 0x9a /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_1CZV 0x9b /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_1CZV 0x9c /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_1CZV 0x9d /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_1CZV 0x9e /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_2CS 0x9f /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_2CS 0xa0 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_2CS 0xa1 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_2CS 0xa2 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC12_2CSZV 0xa3 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS4_VC4_2CSZV 0xa4 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC8_2CSZV 0xa5 /* R---V */
#define NV_MMU_PTE_KIND_X8Z24_X16V8S8_MS8_VC24_2CSZV 0xa6 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12 0xa7 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4 0xa8 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8 0xa9 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24 0xaa /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1CS 0xab /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1CS 0xac /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1CS 0xad /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1CS 0xae /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1ZV 0xb3 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1ZV 0xb4 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1ZV 0xb5 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1ZV 0xb6 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_1CZV 0xb7 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_1CZV 0xb8 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_1CZV 0xb9 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_1CZV 0xba /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_2CS 0xbb /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_2CS 0xbc /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_2CS 0xbd /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_2CS 0xbe /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC12_2CSZV 0xbf /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS4_VC4_2CSZV 0xc0 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC8_2CSZV 0xc1 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X16V8S8_MS8_VC24_2CSZV 0xc2 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8 0xc3 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_1CS 0xc4 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_1CS 0xc5 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_1CS 0xc6 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_1CS 0xc7 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_1CS 0xc8 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_2CSZV 0xce /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_2CSZV 0xcf /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_2CSZV 0xd0 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_2CSZV 0xd1 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_2CSZV 0xd2 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_2CS 0xd3 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS2_2CS 0xd4 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS4_2CS 0xd5 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS8_2CS 0xd6 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_MS16_2CS 0xd7 /* R---V */
#define NV_MMU_PTE_KIND_S8 0x2a /* R---V */
#define NV_MMU_PTE_KIND_S8_2S 0x2b /* R---V */
#define NV_MMU_PTE_KIND_GENERIC_16BX2 0xfe /* R---V */
#define NV_MMU_PTE_KIND_C32_2C 0xd8 /* R---V */
#define NV_MMU_PTE_KIND_C32_2CBR 0xd9 /* R---V */
#define NV_MMU_PTE_KIND_C32_2CBA 0xda /* R---V */
#define NV_MMU_PTE_KIND_C32_2CRA 0xdb /* R---V */
#define NV_MMU_PTE_KIND_C32_2BRA 0xdc /* R---V */
#define NV_MMU_PTE_KIND_C32_MS2_2C 0xdd /* R---V */
#define NV_MMU_PTE_KIND_C32_MS2_2CBR 0xde /* R---V */
#define NV_MMU_PTE_KIND_C32_MS2_4CBRA 0xcc /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2C 0xdf /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2CBR 0xe0 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2CBA 0xe1 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2CRA 0xe2 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_2BRA 0xe3 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS4_4CBRA 0x2c /* R---V */
#define NV_MMU_PTE_KIND_C32_MS8_MS16_2C 0xe4 /* R---V */
#define NV_MMU_PTE_KIND_C32_MS8_MS16_2CRA 0xe5 /* R---V */
#define NV_MMU_PTE_KIND_C64_2C 0xe6 /* R---V */
#define NV_MMU_PTE_KIND_C64_2CBR 0xe7 /* R---V */
#define NV_MMU_PTE_KIND_C64_2CBA 0xe8 /* R---V */
#define NV_MMU_PTE_KIND_C64_2CRA 0xe9 /* R---V */
#define NV_MMU_PTE_KIND_C64_2BRA 0xea /* R---V */
#define NV_MMU_PTE_KIND_C64_MS2_2C 0xeb /* R---V */
#define NV_MMU_PTE_KIND_C64_MS2_2CBR 0xec /* R---V */
#define NV_MMU_PTE_KIND_C64_MS2_4CBRA 0xcd /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2C 0xed /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2CBR 0xee /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2CBA 0xef /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2CRA 0xf0 /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_2BRA 0xf1 /* R---V */
#define NV_MMU_PTE_KIND_C64_MS4_4CBRA 0x2d /* R---V */
#define NV_MMU_PTE_KIND_C64_MS8_MS16_2C 0xf2 /* R---V */
#define NV_MMU_PTE_KIND_C64_MS8_MS16_2CRA 0xf3 /* R---V */
#define NV_MMU_PTE_KIND_C128_2C 0xf4 /* R---V */
#define NV_MMU_PTE_KIND_C128_2CR 0xf5 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS2_2C 0xf6 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS2_2CR 0xf7 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS4_2C 0xf8 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS4_2CR 0xf9 /* R---V */
#define NV_MMU_PTE_KIND_C128_MS8_MS16_2C 0xfa /* R---V */
#define NV_MMU_PTE_KIND_C128_MS8_MS16_2CR 0xfb /* R---V */
#define NV_MMU_PTE_KIND_X8C24 0xfc /* R---V */
#define NV_MMU_PTE_KIND_PITCH_NO_SWIZZLE 0xfd /* R---V */
#define NV_MMU_PTE_KIND_SMSKED_MESSAGE 0xca /* R---V */
#define NV_MMU_PTE_KIND_SMHOST_MESSAGE 0xcb /* R---V */
#define NV_MMU_VER1_PDE /* ----G */
#define NV_MMU_VER1_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
#define NV_MMU_VER1_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
#define NV_MMU_VER1_PDE_SIZE_FULL 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE_HALF 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
#define NV_MMU_VER1_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_VER1_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER1_PDE__SIZE 8
#define NV_MMU_VER1_PTE /* ----G */
#define NV_MMU_VER1_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
#define NV_MMU_VER1_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
#define NV_MMU_VER1_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
#define NV_MMU_VER1_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
#define NV_MMU_VER1_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_VER1_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_VER1_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_VER1_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
#define NV_MMU_VER1_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER1_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
#define NV_MMU_VER1_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_COMPTAGLINE (1*32+18+11):(1*32+12) /* RWXVF */
#define NV_MMU_VER1_PTE_KIND (1*32+11):(1*32+4) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER1_PTE__SIZE 8
#define NV_MMU_VER1_PTE_COMPTAGS_NONE 0x0 /* */
#define NV_MMU_VER1_PTE_COMPTAGS_1 0x1 /* */
#define NV_MMU_VER1_PTE_COMPTAGS_2 0x2 /* */
#define NV_MMU_NEW_PDE /* ----G */
#define NV_MMU_NEW_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_NEW_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_NEW_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_NEW_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_NEW_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_PDE_VOL 3:3 /* RWXVF */
#define NV_MMU_NEW_PDE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_PDE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_PDE_NO_ATS 5:5 /* RWXVF */
#define NV_MMU_NEW_PDE_NO_ATS_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_NO_ATS_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_NEW_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_NEW_PDE__SIZE 8
#define NV_MMU_NEW_DUAL_PDE /* ----G */
#define NV_MMU_NEW_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
#define NV_MMU_NEW_DUAL_PDE__SIZE 16
#define NV_MMU_NEW_PTE /* ----G */
#define NV_MMU_NEW_PTE_VALID 0:0 /* RWXVF */
#define NV_MMU_NEW_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_NEW_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_PTE_VOL 3:3 /* RWXVF */
#define NV_MMU_NEW_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_PTE_ENCRYPTED 4:4 /* RWXVF */
#define NV_MMU_NEW_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_NEW_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_NEW_PTE_PRIVILEGE 5:5 /* RWXVF */
#define NV_MMU_NEW_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_READ_ONLY 6:6 /* RWXVF */
#define NV_MMU_NEW_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_NEW_PTE_COMPTAGLINE (18+35):36 /* RWXVF */
#define NV_MMU_NEW_PTE_KIND 63:56 /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_NEW_PTE__SIZE 8
#define NV_MMU_VER2_PDE /* ----G */
#define NV_MMU_VER2_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_VER2_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_VER2_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER2_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_VER2_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_PDE_VOL 3:3 /* RWXVF */
#define NV_MMU_VER2_PDE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_PDE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_PDE_NO_ATS 5:5 /* RWXVF */
#define NV_MMU_VER2_PDE_NO_ATS_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_NO_ATS_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_VER2_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER2_PDE__SIZE 8
#define NV_MMU_VER2_DUAL_PDE /* ----G */
#define NV_MMU_VER2_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
#define NV_MMU_VER2_DUAL_PDE__SIZE 16
#define NV_MMU_VER2_PTE /* ----G */
#define NV_MMU_VER2_PTE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER2_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_VER2_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_PTE_VOL 3:3 /* RWXVF */
#define NV_MMU_VER2_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_PTE_ENCRYPTED 4:4 /* RWXVF */
#define NV_MMU_VER2_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_VER2_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_VER2_PTE_PRIVILEGE 5:5 /* RWXVF */
#define NV_MMU_VER2_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_READ_ONLY 6:6 /* RWXVF */
#define NV_MMU_VER2_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_VER2_PTE_COMPTAGLINE (18+35):36 /* RWXVF */
#define NV_MMU_VER2_PTE_KIND 63:56 /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER2_PTE__SIZE 8
#define NV_MMU_BASIC /* ----G */
#define NV_MMU_BASIC_KIND 3:0 /* RWXVF */
#define NV_MMU_BASIC_KIND_TRANSPARENT 0x0 /* R---V */
#define NV_MMU_BASIC_KIND_GENERIC 0x1 /* R---V */
#define NV_MMU_BASIC_KIND_S8 0x2 /* R---V */
#define NV_MMU_BASIC_KIND_Z16 0x3 /* R---V */
#define NV_MMU_BASIC_KIND_Z24S8 0x4 /* R---V */
#define NV_MMU_BASIC_KIND_ZF32 0x5 /* R---V */
#define NV_MMU_BASIC_KIND_ZF32_X24S8 0x6 /* R---V */
#define NV_MMU_BASIC_KIND_RSVRD0 0x7 /* R---V */
#define NV_MMU_BASIC_KIND_PITCH 0x8 /* R---V */
#define NV_MMU_BASIC_KIND_GENERIC_C 0x9 /* R---V */
#define NV_MMU_BASIC_KIND_S8_C 0xa /* R---V */
#define NV_MMU_BASIC_KIND_Z16_C 0xb /* R---V */
#define NV_MMU_BASIC_KIND_Z24S8_C 0xc /* R---V */
#define NV_MMU_BASIC_KIND_ZF32_C 0xd /* R---V */
#define NV_MMU_BASIC_KIND_ZF32_X24S8_C 0xe /* R---V */
#define NV_MMU_BASIC_KIND_INVALID 0xf /* R---V */
#endif // __gv100_dev_mmu_h__

View File

@@ -0,0 +1,577 @@
/*******************************************************************************
Copyright (c) 2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "nv-kthread-q.h"
#include <linux/vmalloc.h>
#include <linux/kthread.h>
#include <linux/string.h>
#include <linux/completion.h>
#include <linux/module.h>
#include <linux/cpumask.h>
#include <linux/mm.h>
// If NV_BUILD_MODULE_INSTANCES is not defined, do it here in order to avoid
// build warnings/errors when including nv-linux.h as it expects the definition
// to be present.
#ifndef NV_BUILD_MODULE_INSTANCES
#define NV_BUILD_MODULE_INSTANCES 1
#endif
#include "nv-linux.h"
// Below are just a very few lines of printing and test assertion support.
// It is important to avoid dependencies on other modules, because nv-kthread-q
// and its self test are supposed to only require:
//
// -- Linux kernel functions and macros
//
// In order to avoid external dependencies (specifically, NV_STATUS codes), all
// functions in this file return non-zero upon failure, and zero upon success.
#ifndef NVIDIA_PRETTY_PRINTING_PREFIX
#define NVIDIA_PRETTY_PRINTING_PREFIX "nvidia: "
#endif
// This prints even on release builds:
#define NVQ_TEST_PRINT(fmt, ...) \
printk(KERN_INFO NVIDIA_PRETTY_PRINTING_PREFIX "%s:%u[pid:%d] " fmt, \
__FUNCTION__, \
__LINE__, \
current->pid, \
##__VA_ARGS__)
// Caution: This macro will return out of the current scope
#define TEST_CHECK_RET(cond) \
do { \
if (unlikely(!(cond))) { \
NVQ_TEST_PRINT("Test check failed, condition '%s' not true\n", \
#cond); \
on_nvq_assert(); \
return -1; \
} \
} while(0)
// Most test failures will do things such as just hang or crash. However, in
// order to detect bugs that are less fatal, simply count how many queue items
// actually ran.
#define NUM_Q_ITEMS_IN_BASIC_TEST 6
#define NUM_RESCHEDULE_CALLBACKS 10
#define NUM_TEST_Q_ITEMS (100 * 1000)
#define NUM_TEST_KTHREADS 8
#define NUM_Q_ITEMS_IN_MULTITHREAD_TEST (NUM_TEST_Q_ITEMS * NUM_TEST_KTHREADS)
// This exists in order to have a function to place a breakpoint on:
void on_nvq_assert(void)
{
(void)NULL;
}
////////////////////////////////////////////////////////////////////////////////
// Basic start-stop test
typedef struct basic_start_stop_args
{
int value_to_write;
int *where_to_write;
} basic_start_stop_args_t;
static void _basic_start_stop_callback(void *args)
{
basic_start_stop_args_t *start_stop_args = (basic_start_stop_args_t*)args;
*start_stop_args->where_to_write = start_stop_args->value_to_write;
}
static int _basic_start_stop_test(void)
{
int i, was_scheduled;
int result = 0;
nv_kthread_q_item_t q_item[NUM_Q_ITEMS_IN_BASIC_TEST];
int callback_values_written[NUM_Q_ITEMS_IN_BASIC_TEST];
basic_start_stop_args_t start_stop_args[NUM_Q_ITEMS_IN_BASIC_TEST];
nv_kthread_q_t local_q;
// Do a redudant stop to ensure stop is supported on zero initialized memory
// No crash should occur
memset(&local_q, 0, sizeof(nv_kthread_q_t));
nv_kthread_q_stop(&local_q);
// Do a quick start-stop cycle first:
result = nv_kthread_q_init(&local_q, "q_to_stop");
TEST_CHECK_RET(result == 0);
nv_kthread_q_stop(&local_q);
// call another q_stop and it shouldn't crash and should return fine
nv_kthread_q_stop(&local_q);
memset(&start_stop_args, 0, sizeof(start_stop_args));
memset(callback_values_written, 0, sizeof(callback_values_written));
// All the callback arguments point to the same nv_kthread_q:
for (i = 0; i < NUM_Q_ITEMS_IN_BASIC_TEST; ++i) {
start_stop_args[i].value_to_write = i;
start_stop_args[i].where_to_write = &callback_values_written[i];
}
result = nv_kthread_q_init(&local_q, "basic_q");
TEST_CHECK_RET(result == 0);
// Launch 3 items, then flush the queue.
//
// Each iteration sends a different instance of args to the callback
// function.
for (i = 0; i < 3; ++i) {
nv_kthread_q_item_init(&q_item[i],
_basic_start_stop_callback,
&start_stop_args[i]);
was_scheduled = nv_kthread_q_schedule_q_item(&local_q, &q_item[i]);
result |= (!was_scheduled);
}
// It is legal to flush more than once, so flush twice in a row:
nv_kthread_q_flush(&local_q);
nv_kthread_q_flush(&local_q);
// Launch the remaining items, then stop (which flushes) the queue:
for (i = 3; i < NUM_Q_ITEMS_IN_BASIC_TEST; ++i) {
nv_kthread_q_item_init(&q_item[i],
_basic_start_stop_callback,
&start_stop_args[i]);
was_scheduled = nv_kthread_q_schedule_q_item(&local_q, &q_item[i]);
result |= (!was_scheduled);
}
nv_kthread_q_stop(&local_q);
// Verify that all the callbacks ran and wrote their values:
for (i = 0; i < NUM_Q_ITEMS_IN_BASIC_TEST; ++i)
TEST_CHECK_RET(callback_values_written[i] == i);
return result;
}
////////////////////////////////////////////////////////////////////////////////
// Multithreaded test
typedef struct multithread_args
{
nv_kthread_q_t *test_q;
atomic_t *test_wide_accumulator;
atomic_t per_thread_accumulator;
} multithread_args_t;
static void _multithread_callback(void *args)
{
multithread_args_t *multithread_args = (multithread_args_t*)(args);
atomic_inc(multithread_args->test_wide_accumulator);
atomic_inc(&multithread_args->per_thread_accumulator);
}
//
// Return values:
// 0: Success
// -ENOMEM: vmalloc failed
// -EINVAL: test failed due to mismatched accumulator counts
//
static int _multithreaded_q_kthread_function(void *args)
{
int i, was_scheduled;
int result = 0;
int per_thread_count;
int test_wide_count;
multithread_args_t *multithread_args = (multithread_args_t*)args;
nv_kthread_q_item_t *q_items;
size_t alloc_size = NUM_TEST_Q_ITEMS * sizeof(nv_kthread_q_item_t);
q_items = vmalloc(alloc_size);
if (!q_items) {
result = -ENOMEM;
goto done;
}
memset(q_items, 0, alloc_size);
for (i = 0; i < NUM_TEST_Q_ITEMS; ++i) {
nv_kthread_q_item_init(&q_items[i],
_multithread_callback,
multithread_args);
was_scheduled = nv_kthread_q_schedule_q_item(multithread_args->test_q,
&q_items[i]);
result |= (!was_scheduled);
}
nv_kthread_q_flush(multithread_args->test_q);
per_thread_count = atomic_read(&multithread_args->per_thread_accumulator);
if (per_thread_count != NUM_TEST_Q_ITEMS) {
NVQ_TEST_PRINT("per_thread_count: Expected: %d, actual: %d\n",
NUM_TEST_Q_ITEMS, per_thread_count);
goto done;
}
test_wide_count = atomic_read(multithread_args->test_wide_accumulator);
if (test_wide_count < NUM_TEST_Q_ITEMS) {
NVQ_TEST_PRINT("test_wide_count: Expected: >= %d, actual: %d\n",
NUM_TEST_Q_ITEMS, test_wide_count);
goto done;
}
done:
if (q_items)
vfree(q_items);
while (!kthread_should_stop())
schedule();
return result;
}
static int _multithreaded_q_test(void)
{
int i, j;
int result = 0;
struct task_struct *kthreads[NUM_TEST_KTHREADS];
multithread_args_t multithread_args[NUM_TEST_KTHREADS];
nv_kthread_q_t local_q;
atomic_t local_accumulator;
memset(multithread_args, 0, sizeof(multithread_args));
memset(kthreads, 0, sizeof(kthreads));
atomic_set(&local_accumulator, 0);
result = nv_kthread_q_init(&local_q, "multithread_test_q");
TEST_CHECK_RET(result == 0);
for (i = 0; i < NUM_TEST_KTHREADS; ++i) {
multithread_args[i].test_q = &local_q;
multithread_args[i].test_wide_accumulator = &local_accumulator;
kthreads[i] = kthread_run(_multithreaded_q_kthread_function,
&multithread_args[i],
"nvq_test_kthread");
if (IS_ERR(kthreads[i]))
goto failed;
}
// Stop all of the test kthreads, then stop the queue. Collect any
// non-zero (failure) return values from the kthreads, and use those
// later to report a test failure.
for (i = 0; i < NUM_TEST_KTHREADS; ++i) {
result |= kthread_stop(kthreads[i]);
}
nv_kthread_q_stop(&local_q);
TEST_CHECK_RET(atomic_read(&local_accumulator) ==
NUM_Q_ITEMS_IN_MULTITHREAD_TEST);
return result;
failed:
NVQ_TEST_PRINT("kthread_run[%d] failed: errno: %ld\n",
i, PTR_ERR(kthreads[i]));
// Stop any threads that had successfully started:
for (j = 0; j < i; ++j)
kthread_stop(kthreads[j]);
nv_kthread_q_stop(&local_q);
return -1;
}
////////////////////////////////////////////////////////////////////////////////
// Self-rescheduling test
typedef struct resched_args
{
nv_kthread_q_t test_q;
nv_kthread_q_item_t q_item;
atomic_t accumulator;
atomic_t stop_rescheduling_callbacks;
int test_failure;
} resched_args_t;
static void _reschedule_callback(void *args)
{
int was_scheduled;
resched_args_t *resched_args = (resched_args_t*)args;
// This test promises to add one to accumulator, for each time through.
atomic_inc(&resched_args->accumulator);
if (atomic_read(&resched_args->stop_rescheduling_callbacks) == 0) {
nv_kthread_q_item_init(&resched_args->q_item,
_reschedule_callback,
resched_args);
was_scheduled = nv_kthread_q_schedule_q_item(&resched_args->test_q,
&resched_args->q_item);
if (!was_scheduled) {
resched_args->test_failure = 1;
}
}
// Ensure thread relinquishes control else we hang in single-core environments
schedule();
}
// Verify that re-scheduling the same q_item, from within its own
// callback, works.
static int _reschedule_same_item_from_its_own_callback_test(void)
{
int was_scheduled;
int result = 0;
resched_args_t resched_args;
memset(&resched_args, 0, sizeof(resched_args));
result = nv_kthread_q_init(&resched_args.test_q, "resched_test_q");
TEST_CHECK_RET(result == 0);
nv_kthread_q_item_init(&resched_args.q_item,
_reschedule_callback,
&resched_args);
was_scheduled = nv_kthread_q_schedule_q_item(&resched_args.test_q,
&resched_args.q_item);
result |= (!was_scheduled);
// Wait for a few callback items to run
while(atomic_read(&resched_args.accumulator) < NUM_RESCHEDULE_CALLBACKS)
schedule();
// Stop the callbacks from rescheduling themselves. This requires two
// levels of flushing: one flush to wait for any callbacks that missed
// the .stop_rescheduling_callbacks change, and another for any pending
// callbacks that were scheduled from within the callback.
atomic_set(&resched_args.stop_rescheduling_callbacks, 1);
// Stop the queue. This is guaranteed to do a (double) flush, and that
// flush takes care of any pending callbacks that we rescheduled from
// within the callback function.
nv_kthread_q_stop(&resched_args.test_q);
return (result || resched_args.test_failure);
}
////////////////////////////////////////////////////////////////////////////////
// Rescheduling the exact same q_item test
typedef struct same_q_item_args
{
atomic_t test_accumulator;
} same_q_item_args_t;
static void _same_q_item_callback(void *args)
{
same_q_item_args_t *same_q_item_args = (same_q_item_args_t*)(args);
atomic_inc(&same_q_item_args->test_accumulator);
}
static int _same_q_item_test(void)
{
int result, i;
int num_scheduled = 0;
same_q_item_args_t same_q_item_args;
nv_kthread_q_t local_q;
nv_kthread_q_item_t q_item;
memset(&same_q_item_args, 0, sizeof(same_q_item_args));
result = nv_kthread_q_init(&local_q, "same_q_item_test_q");
TEST_CHECK_RET(result == 0);
nv_kthread_q_item_init(&q_item,
_same_q_item_callback,
&same_q_item_args);
// Attempt to queue up many copies of the same q_item, then stop the queue.
// This is an attempt to launch enough q_items that at least some of them
// end up being pending in the queue, and exercise the "if already pending"
// logic.
//
// Some manual testing indicates that launching 1000 q_items in a tight loop
// causes between 1 and 20 copies to run. Obviously this is extremely
// dependent on the particular test machine and kernel and more, but it
// shows that 1000 is not unreasonable.
for (i = 0; i < 1000; ++i)
num_scheduled += nv_kthread_q_schedule_q_item(&local_q, &q_item);
nv_kthread_q_stop(&local_q);
// At least one item will have run, but not necessarily any more than that.
TEST_CHECK_RET(atomic_read(&same_q_item_args.test_accumulator) >= 1);
TEST_CHECK_RET(atomic_read(&same_q_item_args.test_accumulator) == num_scheduled);
return 0;
}
// Returns true if any of the stack pages are not resident on the indicated node.
static bool stack_mismatch(const struct task_struct *thread, int preferred_node)
{
unsigned num_stack_pages, i;
char* stack = (char*) thread->stack;
// If the stack has not been allocated using vmalloc, the physical pages
// are all on the same node, so just check the first page
if (!is_vmalloc_addr(stack)) {
struct page *page = virt_to_page(stack);
int node = page_to_nid(page);
return node != preferred_node;
}
num_stack_pages = THREAD_SIZE >> PAGE_SHIFT;
// The physical pages backing the stack may be discontiguous, so check them
// all.
for (i = 0; i < num_stack_pages; i++) {
char *curr_stack_page = stack + i * PAGE_SIZE;
struct page *page = vmalloc_to_page(curr_stack_page);
int node = page_to_nid(page);
if (node != preferred_node)
return true;
}
return false;
}
static void _check_cpu_affinity_callback(void *args)
{
struct task_struct *thread = get_current();
int *preferred_node = (int *) args;
int *ret = preferred_node + 1;
*ret = stack_mismatch(thread, *preferred_node);
}
// Verify that the stack of the kernel thread created by
// nv_kthread_q_init_on_node is resident on the specified NUMA node.
//
// nv_kthread_q_init_on_node does not guarantee that the thread's stack
// will be resident on the passed node, but in practice the preference is mostly
// honored so we invoke the function multiple times and allow a percentage of
// failures per node.
static int _check_cpu_affinity_test(void)
{
int result, node;
nv_kthread_q_t local_q;
// If the API does not support CPU affinity, check whether the correct
// error code is returned.
// Non-affinitized queue allocation has been verified by previous test
// so just ensure that the affinitized version also works.
if (!NV_KTHREAD_Q_SUPPORTS_AFFINITY()) {
result = nv_kthread_q_init_on_node(&local_q, "should_fail", 0);
TEST_CHECK_RET(result == -ENOTSUPP);
return 0;
}
for_each_online_node(node) {
unsigned i;
const unsigned max_i = 100;
unsigned stack_mismatches = 0;
// Allow up to 20% of the stacks to be resident on a node different from
// the one requested.
const int alloc_mismatch_percentage = 20;
// Only test on CPU nodes which have memory
if (!nv_numa_node_has_memory(node) || !node_state(node, N_CPU))
continue;
for (i = 0; i < max_i; i++) {
unsigned j;
int thread_args[2];
nv_kthread_q_item_t q_item;
char q_name[64];
nv_kthread_q_item_init(&q_item, _check_cpu_affinity_callback, thread_args);
snprintf(q_name, sizeof(q_name), "test_q_%d", node);
result = nv_kthread_q_init_on_node(&local_q, q_name, node);
TEST_CHECK_RET(result == 0);
// The second entry contains the value returned by the callback:
// 0 if no mismatch found, and 1 otherwise.
thread_args[0] = node;
thread_args[1] = 0;
// Run several iterations to ensure that the thread's stack does
// not migrate after initialization.
for (j = 0; j < 25; j++) {
result = nv_kthread_q_schedule_q_item(&local_q, &q_item);
// nv_kthread_q_schedule_q_item() returns non-zero value if the
// item was successfully scheduled.
if (result == 0) {
nv_kthread_q_stop(&local_q);
TEST_CHECK_RET(false);
}
nv_kthread_q_flush(&local_q);
// Count as failure if any of the stack pages is resident on a
// another node on any iteration.
if (thread_args[1] == 1) {
stack_mismatches++;
break;
}
}
nv_kthread_q_stop(&local_q);
if ((100 * stack_mismatches / max_i) > alloc_mismatch_percentage)
TEST_CHECK_RET(false);
}
}
return 0;
}
////////////////////////////////////////////////////////////////////////////////
// Top-level test entry point
int nv_kthread_q_run_self_test(void)
{
int result;
result = _basic_start_stop_test();
TEST_CHECK_RET(result == 0);
result = _reschedule_same_item_from_its_own_callback_test();
TEST_CHECK_RET(result == 0);
result = _multithreaded_q_test();
TEST_CHECK_RET(result == 0);
result = _same_q_item_test();
TEST_CHECK_RET(result == 0);
result = _check_cpu_affinity_test();
TEST_CHECK_RET(result == 0);
return 0;
}

View File

@@ -0,0 +1,335 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nv-kthread-q.h"
#include "nv-list-helpers.h"
#include <linux/kthread.h>
#include <linux/interrupt.h>
#include <linux/completion.h>
#include <linux/module.h>
#include <linux/mm.h>
#if defined(NV_LINUX_BUG_H_PRESENT)
#include <linux/bug.h>
#else
#include <asm/bug.h>
#endif
// Today's implementation is a little simpler and more limited than the
// API description allows for in nv-kthread-q.h. Details include:
//
// 1. Each nv_kthread_q instance is a first-in, first-out queue.
//
// 2. Each nv_kthread_q instance is serviced by exactly one kthread.
//
// You can create any number of queues, each of which gets its own
// named kernel thread (kthread). You can then insert arbitrary functions
// into the queue, and those functions will be run in the context of the
// queue's kthread.
#ifndef WARN
// Only *really* old kernels (2.6.9) end up here. Just use a simple printk
// to implement this, because such kernels won't be supported much longer.
#define WARN(condition, format...) ({ \
int __ret_warn_on = !!(condition); \
if (unlikely(__ret_warn_on)) \
printk(KERN_ERR format); \
unlikely(__ret_warn_on); \
})
#endif
#define NVQ_WARN(fmt, ...) \
do { \
if (in_interrupt()) { \
WARN(1, "nv_kthread_q: [in interrupt]: " fmt, \
##__VA_ARGS__); \
} \
else { \
WARN(1, "nv_kthread_q: task: %s: " fmt, \
current->comm, \
##__VA_ARGS__); \
} \
} while (0)
static int _main_loop(void *args)
{
nv_kthread_q_t *q = (nv_kthread_q_t *)args;
nv_kthread_q_item_t *q_item = NULL;
unsigned long flags;
while (1) {
// Normally this thread is never interrupted. However,
// down_interruptible (instead of down) is called here,
// in order to avoid being classified as a potentially
// hung task, by the kernel watchdog.
while (down_interruptible(&q->q_sem))
NVQ_WARN("Interrupted during semaphore wait\n");
if (atomic_read(&q->main_loop_should_exit))
break;
spin_lock_irqsave(&q->q_lock, flags);
// The q_sem semaphore prevents us from getting here unless there is
// at least one item in the list, so an empty list indicates a bug.
if (unlikely(list_empty(&q->q_list_head))) {
spin_unlock_irqrestore(&q->q_lock, flags);
NVQ_WARN("_main_loop: Empty queue: q: 0x%p\n", q);
continue;
}
// Consume one item from the queue
q_item = list_first_entry(&q->q_list_head,
nv_kthread_q_item_t,
q_list_node);
list_del_init(&q_item->q_list_node);
spin_unlock_irqrestore(&q->q_lock, flags);
// Run the item
q_item->function_to_run(q_item->function_args);
// Make debugging a little simpler by clearing this between runs:
q_item = NULL;
}
while (!kthread_should_stop())
schedule();
return 0;
}
void nv_kthread_q_stop(nv_kthread_q_t *q)
{
// check if queue has been properly initialized
if (unlikely(!q->q_kthread))
return;
nv_kthread_q_flush(q);
// If this assertion fires, then a caller likely either broke the API rules,
// by adding items after calling nv_kthread_q_stop, or possibly messed up
// with inadequate flushing of self-rescheduling q_items.
if (unlikely(!list_empty(&q->q_list_head)))
NVQ_WARN("list not empty after flushing\n");
if (likely(!atomic_read(&q->main_loop_should_exit))) {
atomic_set(&q->main_loop_should_exit, 1);
// Wake up the kthread so that it can see that it needs to stop:
up(&q->q_sem);
kthread_stop(q->q_kthread);
q->q_kthread = NULL;
}
}
// When CONFIG_VMAP_STACK is defined, the kernel thread stack allocator used by
// kthread_create_on_node relies on a 2 entry, per-core cache to minimize
// vmalloc invocations. The cache is NUMA-unaware, so when there is a hit, the
// stack location ends up being a function of the core assigned to the current
// thread, instead of being a function of the specified NUMA node. The cache was
// added to the kernel in commit ac496bf48d97f2503eaa353996a4dd5e4383eaf0
// ("fork: Optimize task creation by caching two thread stacks per CPU if
// CONFIG_VMAP_STACK=y")
//
// To work around the problematic cache, we create up to three kernel threads
// -If the first thread's stack is resident on the preferred node, return this
// thread.
// -Otherwise, create a second thread. If its stack is resident on the
// preferred node, stop the first thread and return this one.
// -Otherwise, create a third thread. The stack allocator does not find a
// cached stack, and so falls back to vmalloc, which takes the NUMA hint into
// consideration. The first two threads are then stopped.
//
// When CONFIG_VMAP_STACK is not defined, the first kernel thread is returned.
//
// This function is never invoked when there is no NUMA preference (preferred
// node is NUMA_NO_NODE).
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
nv_kthread_q_t *q,
int preferred_node,
const char *q_name)
{
unsigned i, j;
const static unsigned attempts = 3;
struct task_struct *thread[3];
for (i = 0;; i++) {
struct page *stack;
thread[i] = kthread_create_on_node(threadfn, q, preferred_node, q_name);
if (unlikely(IS_ERR(thread[i]))) {
// Instead of failing, pick the previous thread, even if its
// stack is not allocated on the preferred node.
if (i > 0)
i--;
break;
}
// vmalloc is not used to allocate the stack, so simply return the
// thread, even if its stack may not be allocated on the preferred node
if (!is_vmalloc_addr(thread[i]->stack))
break;
// Ran out of attempts - return thread even if its stack may not be
// allocated on the preferred node
if ((i == (attempts - 1)))
break;
// Get the NUMA node where the first page of the stack is resident. If
// it is the preferred node, select this thread.
stack = vmalloc_to_page(thread[i]->stack);
if (page_to_nid(stack) == preferred_node)
break;
}
for (j = i; j > 0; j--)
kthread_stop(thread[j - 1]);
return thread[i];
}
#endif
int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferred_node)
{
memset(q, 0, sizeof(*q));
INIT_LIST_HEAD(&q->q_list_head);
spin_lock_init(&q->q_lock);
sema_init(&q->q_sem, 0);
if (preferred_node == NV_KTHREAD_NO_NODE) {
q->q_kthread = kthread_create(_main_loop, q, q_name);
}
else {
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
q->q_kthread = thread_create_on_node(_main_loop, q, preferred_node, q_name);
#else
return -ENOTSUPP;
#endif
}
if (IS_ERR(q->q_kthread)) {
int err = PTR_ERR(q->q_kthread);
// Clear q_kthread before returning so that nv_kthread_q_stop() can be
// safely called on it making error handling easier.
q->q_kthread = NULL;
return err;
}
wake_up_process(q->q_kthread);
return 0;
}
// Returns true (non-zero) if the item was actually scheduled, and false if the
// item was already pending in a queue.
static int _raw_q_schedule(nv_kthread_q_t *q, nv_kthread_q_item_t *q_item)
{
unsigned long flags;
int ret = 1;
spin_lock_irqsave(&q->q_lock, flags);
if (likely(list_empty(&q_item->q_list_node)))
list_add_tail(&q_item->q_list_node, &q->q_list_head);
else
ret = 0;
spin_unlock_irqrestore(&q->q_lock, flags);
if (likely(ret))
up(&q->q_sem);
return ret;
}
void nv_kthread_q_item_init(nv_kthread_q_item_t *q_item,
nv_q_func_t function_to_run,
void *function_args)
{
INIT_LIST_HEAD(&q_item->q_list_node);
q_item->function_to_run = function_to_run;
q_item->function_args = function_args;
}
// Returns true (non-zero) if the q_item got scheduled, false otherwise.
int nv_kthread_q_schedule_q_item(nv_kthread_q_t *q,
nv_kthread_q_item_t *q_item)
{
if (unlikely(atomic_read(&q->main_loop_should_exit))) {
NVQ_WARN("Not allowed: nv_kthread_q_schedule_q_item was "
"called with a non-alive q: 0x%p\n", q);
return 0;
}
return _raw_q_schedule(q, q_item);
}
static void _q_flush_function(void *args)
{
struct completion *completion = (struct completion *)args;
complete(completion);
}
static void _raw_q_flush(nv_kthread_q_t *q)
{
nv_kthread_q_item_t q_item;
DECLARE_COMPLETION(completion);
nv_kthread_q_item_init(&q_item, _q_flush_function, &completion);
_raw_q_schedule(q, &q_item);
// Wait for the flush item to run. Once it has run, then all of the
// previously queued items in front of it will have run, so that means
// the flush is complete.
wait_for_completion(&completion);
}
void nv_kthread_q_flush(nv_kthread_q_t *q)
{
if (unlikely(atomic_read(&q->main_loop_should_exit))) {
NVQ_WARN("Not allowed: nv_kthread_q_flush was called after "
"nv_kthread_q_stop. q: 0x%p\n", q);
return;
}
// This 2x flush is not a typing mistake. The queue really does have to be
// flushed twice, in order to take care of the case of a q_item that
// reschedules itself.
_raw_q_flush(q);
_raw_q_flush(q);
}

View File

@@ -0,0 +1,34 @@
/*******************************************************************************
Copyright (c) 2015-2018 NVidia Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "nvtypes.h"
#include "nvCpuUuid.h"
const NvProcessorUuid NV_PROCESSOR_UUID_CPU_DEFAULT =
{
{
// Produced via uuidgen(1): 73772a14-2c41-4750-a27b-d4d74e0f5ea6:
0xa6, 0x5e, 0x0f, 0x4e, 0xd7, 0xd4, 0x7b, 0xa2,
0x50, 0x47, 0x41, 0x2c, 0x14, 0x2a, 0x77, 0x73
}
};

View File

@@ -0,0 +1,113 @@
NVIDIA_UVM_SOURCES ?=
NVIDIA_UVM_SOURCES_CXX ?=
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c
NVIDIA_UVM_SOURCES += nvidia-uvm/nvstatus.c
NVIDIA_UVM_SOURCES += nvidia-uvm/nvCpuUuid.c
NVIDIA_UVM_SOURCES += nvidia-uvm/nv-kthread-q.c
NVIDIA_UVM_SOURCES += nvidia-uvm/nv-kthread-q-selftest.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_tools.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_global.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_isr.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_procfs.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_space.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_space_mm.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_semaphore.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_mem.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rm_mem.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_channel.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_lock.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hal.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_allocator.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_range.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_policy.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_block.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_group.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_replayable_faults.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_non_replayable_faults.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_access_counters.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_events.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_module.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pte_batch.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_tlb_batch.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_push.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pushbuffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_thread_context.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_tracker.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_ce.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_fault_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_access_counter_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_ce.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_fault_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_fault_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_access_counter_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_access_counter_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_fault_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_ce.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_policy.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_utils.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_kvmalloc.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pmm_sysmem.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pmm_gpu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_migrate.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_populate_pageable.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_migrate_pageable.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_map_external.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_user_channel.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hmm.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_heuristics.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_thrashing.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_prefetch.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_ibm.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_faults.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test_rng.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_allocator_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_semaphore_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hmm_sanity_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_mem_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rm_mem_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_page_tree_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_tracker_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_push_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_channel_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ce_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_host_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_lock_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_utils_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_kvmalloc_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pmm_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pmm_sysmem_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_events_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_module_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_get_rm_ptes_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_fault_buffer_flush_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_peer_identity_mappings_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_block_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_group_tree_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_thread_context_test.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree_test.c

View File

@@ -0,0 +1,112 @@
###########################################################################
# Kbuild fragment for nvidia-uvm.ko
###########################################################################
UVM_BUILD_TYPE = release
#
# Define NVIDIA_UVM_{SOURCES,OBJECTS}
#
NVIDIA_UVM_OBJECTS =
include $(src)/nvidia-uvm/nvidia-uvm-sources.Kbuild
NVIDIA_UVM_OBJECTS += $(patsubst %.c,%.o,$(NVIDIA_UVM_SOURCES))
# Some linux kernel functions rely on being built with optimizations on and
# to work around this we put wrappers for them in a separate file that's built
# with optimizations on in debug builds and skipped in other builds.
# Notably gcc 4.4 supports per function optimization attributes that would be
# easier to use, but is too recent to rely on for now.
NVIDIA_UVM_DEBUG_OPTIMIZED_SOURCE := nvidia-uvm/uvm_debug_optimized.c
NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT := $(patsubst %.c,%.o,$(NVIDIA_UVM_DEBUG_OPTIMIZED_SOURCE))
ifneq ($(UVM_BUILD_TYPE),debug)
# Only build the wrappers on debug builds
NVIDIA_UVM_OBJECTS := $(filter-out $(NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT), $(NVIDIA_UVM_OBJECTS))
endif
obj-m += nvidia-uvm.o
nvidia-uvm-y := $(NVIDIA_UVM_OBJECTS)
NVIDIA_UVM_KO = nvidia-uvm/nvidia-uvm.ko
#
# Define nvidia-uvm.ko-specific CFLAGS.
#
ifeq ($(UVM_BUILD_TYPE),debug)
NVIDIA_UVM_CFLAGS += -DDEBUG $(call cc-option,-Og,-O0) -g
else
ifeq ($(UVM_BUILD_TYPE),develop)
# -DDEBUG is required, in order to allow pr_devel() print statements to
# work:
NVIDIA_UVM_CFLAGS += -DDEBUG
NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_DEVELOP
endif
NVIDIA_UVM_CFLAGS += -O2
endif
NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_ENABLED
NVIDIA_UVM_CFLAGS += -DNVIDIA_UNDEF_LEGACY_BIT_MACROS
NVIDIA_UVM_CFLAGS += -DLinux
NVIDIA_UVM_CFLAGS += -D__linux__
NVIDIA_UVM_CFLAGS += -I$(src)/nvidia-uvm
$(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_UVM_OBJECTS), $(NVIDIA_UVM_CFLAGS))
ifeq ($(UVM_BUILD_TYPE),debug)
# Force optimizations on for the wrappers
$(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT), $(NVIDIA_UVM_CFLAGS) -O2)
endif
#
# Register the conftests needed by nvidia-uvm.ko
#
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_UVM_OBJECTS)
NV_CONFTEST_FUNCTION_COMPILE_TESTS += address_space_init_once
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vzalloc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += wait_on_bit_lock_argument_count
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
NV_CONFTEST_FUNCTION_COMPILE_TESTS += proc_remove
NV_CONFTEST_FUNCTION_COMPILE_TESTS += bitmap_clear
NV_CONFTEST_FUNCTION_COMPILE_TESTS += usleep_range
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_empty
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_replace_slot
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pnv_npu2_init_context
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cpumask_of_node
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pci_bus_address
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
NV_CONFTEST_TYPE_COMPILE_TESTS += kuid_t
NV_CONFTEST_TYPE_COMPILE_TESTS += address_space
NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages_remote
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_has_address
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += node_states_n_memory
NV_CONFTEST_TYPE_COMPILE_TESTS += kmem_cache_has_kobj_remove_work
NV_CONFTEST_TYPE_COMPILE_TESTS += sysfs_slab_unlink
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_invalidate_range
NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_vma_added_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += make_device_exclusive_range
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg

View File

@@ -0,0 +1,82 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2014-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvstatus.h"
#if !defined(NV_PRINTF_STRING_SECTION)
#if defined(NVRM) && NVCPU_IS_RISCV64
#define NV_PRINTF_STRING_SECTION __attribute__ ((section (".logging")))
#else // defined(NVRM) && NVCPU_IS_RISCV64
#define NV_PRINTF_STRING_SECTION
#endif // defined(NVRM) && NVCPU_IS_RISCV64
#endif // !defined(NV_PRINTF_STRING_SECTION)
/*
* Include nvstatuscodes.h twice. Once for creating constant strings in the
* the NV_PRINTF_STRING_SECTION section of the ececutable, and once to build
* the g_StatusCodeList table.
*/
#undef NV_STATUS_CODE
#undef SDK_NVSTATUSCODES_H
#define NV_STATUS_CODE( name, code, string ) static NV_PRINTF_STRING_SECTION \
const char rm_pvt_##name##_str[] = string " [" #name "]";
#include "nvstatuscodes.h"
#undef NV_STATUS_CODE
#undef SDK_NVSTATUSCODES_H
#define NV_STATUS_CODE( name, code, string ) { name, rm_pvt_##name##_str },
static struct NvStatusCodeString
{
NV_STATUS statusCode;
const char *statusString;
} g_StatusCodeList[] = {
#include "nvstatuscodes.h"
{ 0xffffffff, "Unknown error code!" } // Some compilers don't like the trailing ','
};
#undef NV_STATUS_CODE
/*!
* @brief Given an NV_STATUS code, returns the corresponding status string.
*
* @param[in] nvStatusIn NV_STATUS code for which the string is required
*
* @returns Corresponding status string from the nvstatuscodes.h
*
* TODO: Bug 200025711: convert this to an array-indexed lookup, instead of a linear search
*
*/
const char *nvstatusToString(NV_STATUS nvStatusIn)
{
static NV_PRINTF_STRING_SECTION const char rm_pvt_UNKNOWN_str[] = "Unknown error code!";
NvU32 i;
NvU32 n = ((NvU32)(sizeof(g_StatusCodeList))/(NvU32)(sizeof(g_StatusCodeList[0])));
for (i = 0; i < n; i++)
{
if (g_StatusCodeList[i].statusCode == nvStatusIn)
{
return g_StatusCodeList[i].statusString;
}
}
return rm_pvt_UNKNOWN_str;
}

1149
kernel-open/nvidia-uvm/uvm.c Normal file

File diff suppressed because it is too large Load Diff

3902
kernel-open/nvidia-uvm/uvm.h Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,102 @@
/*******************************************************************************
Copyright (c) 2018-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_global.h"
#include "uvm_hal.h"
#include "uvm_gpu.h"
#include "uvm_mem.h"
#include "uvm_ampere_fault_buffer.h"
void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
{
parent_gpu->tlb_batch.va_invalidate_supported = true;
parent_gpu->tlb_batch.va_range_invalidate_supported = true;
// TODO: Bug 1767241: Run benchmarks to figure out a good number
parent_gpu->tlb_batch.max_ranges = 8;
parent_gpu->utlb_per_gpc_count = uvm_ampere_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Ampere covers 128 TB and that's the minimum
// size that can be used.
parent_gpu->rm_va_base = 0;
parent_gpu->rm_va_size = 128ull * 1024 * 1024 * 1024 * 1024;
parent_gpu->uvm_mem_va_base = 384ull * 1024 * 1024 * 1024 * 1024;
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
// See uvm_mmu.h for mapping placement
parent_gpu->flat_vidmem_va_base = 132ull * 1024 * 1024 * 1024 * 1024;
parent_gpu->flat_sysmem_va_base = 256ull * 1024 * 1024 * 1024 * 1024;
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
// Not all units on Ampere support 49-bit addressing, including those which
// access channel buffers.
parent_gpu->max_channel_va = 1ULL << 40;
parent_gpu->max_host_va = 1ULL << 40;
// Ampere can map sysmem with any page size
parent_gpu->can_map_sysmem_with_large_pages = true;
// Prefetch instructions will generate faults
parent_gpu->prefetch_fault_supported = true;
// Ampere can place GPFIFO in vidmem
parent_gpu->gpfifo_in_vidmem_supported = true;
parent_gpu->replayable_faults_supported = true;
parent_gpu->non_replayable_faults_supported = true;
parent_gpu->access_counters_supported = true;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;
parent_gpu->has_clear_faulted_channel_sw_method = true;
parent_gpu->has_clear_faulted_channel_method = false;
parent_gpu->smc.supported = true;
parent_gpu->sparse_mappings_supported = true;
UVM_ASSERT(parent_gpu->rm_info.gpuArch == NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100);
if (parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA100 ||
parent_gpu->rm_info.gpuImplementation == NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GA000)
parent_gpu->map_remap_larger_page_promotion = true;
else
parent_gpu->map_remap_larger_page_promotion = false;
parent_gpu->plc_supported = true;
}

View File

@@ -0,0 +1,230 @@
/*******************************************************************************
Copyright (c) 2018-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_hal.h"
#include "uvm_hal_types.h"
#include "clc6b5.h"
#include "clc7b5.h"
#include "clc56f.h" // Needed because HAL ce_init pushes SET_OBJECT
bool uvm_hal_ampere_ce_method_validate_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
{
if (!uvm_channel_is_proxy(push->channel))
return true;
switch (method_address) {
case NVC56F_SET_OBJECT:
case NVC6B5_SET_SEMAPHORE_A:
case NVC6B5_SET_SEMAPHORE_B:
case NVC6B5_SET_SEMAPHORE_PAYLOAD:
case NVC6B5_SET_SRC_PHYS_MODE:
case NVC6B5_SET_DST_PHYS_MODE:
case NVC6B5_LAUNCH_DMA:
case NVC6B5_OFFSET_IN_UPPER:
case NVC6B5_OFFSET_IN_LOWER:
case NVC6B5_OFFSET_OUT_UPPER:
case NVC6B5_OFFSET_OUT_LOWER:
case NVC6B5_LINE_LENGTH_IN:
case NVC6B5_SET_REMAP_CONST_A:
case NVC6B5_SET_REMAP_CONST_B:
case NVC6B5_SET_REMAP_COMPONENTS:
return true;
}
UVM_ERR_PRINT("Unsupported CE method 0x%x\n", method_address);
return false;
}
static NvU32 ce_aperture(uvm_aperture_t aperture)
{
BUILD_BUG_ON(HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB) !=
HWCONST(C6B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB));
BUILD_BUG_ON(HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM) !=
HWCONST(C6B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM));
BUILD_BUG_ON(HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) !=
HWCONST(C6B5, SET_DST_PHYS_MODE, TARGET, PEERMEM));
if (aperture == UVM_APERTURE_SYS) {
return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM);
}
else if (aperture == UVM_APERTURE_VID) {
return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB);
}
else {
return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) |
HWVALUE(C6B5, SET_SRC_PHYS_MODE, FLA, 0) |
HWVALUE(C6B5, SET_SRC_PHYS_MODE, PEER_ID, UVM_APERTURE_PEER_ID(aperture));
}
}
// Push SET_{SRC,DST}_PHYS mode if needed and return LAUNCH_DMA_{SRC,DST}_TYPE
// flags
NvU32 uvm_hal_ampere_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
{
NvU32 launch_dma_src_dst_type = 0;
if (src.is_virtual)
launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, SRC_TYPE, VIRTUAL);
else
launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, SRC_TYPE, PHYSICAL);
if (dst.is_virtual)
launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, DST_TYPE, VIRTUAL);
else
launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
if (!src.is_virtual && !dst.is_virtual) {
NV_PUSH_2U(C6B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture),
SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
}
else if (!src.is_virtual) {
NV_PUSH_1U(C6B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture));
}
else if (!dst.is_virtual) {
NV_PUSH_1U(C6B5, SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
}
return launch_dma_src_dst_type;
}
NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void)
{
return HWCONST(C7B5, LAUNCH_DMA, DISABLE_PLC, TRUE);
}
bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
{
NvU64 push_begin_gpu_va;
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu))
return true;
if (uvm_channel_is_proxy(push->channel)) {
if (dst.is_virtual) {
UVM_ERR_PRINT("Destination address of memcopy must be physical, not virtual\n");
return false;
}
if (dst.aperture != UVM_APERTURE_VID) {
UVM_ERR_PRINT("Destination address of memcopy must be in vidmem\n");
return false;
}
// The source address is irrelevant, since it is a pushbuffer offset
if (!IS_ALIGNED(dst.address, 8)){
UVM_ERR_PRINT("Destination address of memcopy is not 8-byte aligned");
return false;
}
if (!src.is_virtual) {
UVM_ERR_PRINT("Source address of memcopy must be virtual\n");
return false;
}
push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
if ((src.address < push_begin_gpu_va) || (src.address >= push_begin_gpu_va + uvm_push_get_size(push))) {
UVM_ERR_PRINT("Source address of memcopy must point to pushbuffer\n");
return false;
}
}
else {
// TODO: Bug 3429418: When in SR-IOV heavy, a memcopy/memset pushed to a
// UVM internal channel cannot use peer physical addresses.
if (!dst.is_virtual && !uvm_aperture_is_peer(dst.aperture)) {
UVM_ERR_PRINT("Destination address of memcopy must be virtual, not physical (aperture: %s)\n",
uvm_gpu_address_aperture_string(dst));
return false;
}
if (!src.is_virtual && !uvm_aperture_is_peer(src.aperture)) {
UVM_ERR_PRINT("Source address of memcopy must be virtual, not physical (aperture: %s)\n",
uvm_gpu_address_aperture_string(src));
return false;
}
}
return true;
}
// In SR-IOV heavy (GA100 only), the UVM driver is expected to push a patched
// version of an inlined memcopy to the proxy channels. The patching consists in
// passing the offset of the inlined data within the push as the source virtual
// address, instead of passing its GPU VA.
//
// Copies pushed to internal channels use the GPU VA of the inlined data,
// irrespective of the virtualization mode.
void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_t *src)
{
if (!uvm_channel_is_proxy(push->channel))
return;
src->address -= uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
}
bool uvm_hal_ampere_ce_memset_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu))
return true;
if (uvm_channel_is_proxy(push->channel)) {
if (dst.is_virtual) {
UVM_ERR_PRINT("Destination address of memset must be physical, not virtual\n");
return false;
}
if (dst.aperture != UVM_APERTURE_VID) {
UVM_ERR_PRINT("Destination address of memset must be in vidmem\n");
return false;
}
if (!IS_ALIGNED(dst.address, 8)){
UVM_ERR_PRINT("Destination address of memset is not 8-byte aligned");
return false;
}
// Disallow memsets that don't match the page table/directory entry
// size. PDE0 entries are 16 bytes wide, but those are written using a
// memcopy.
//
// The memset size is not checked to be a multiple of the element size
// because the check is not exclusive of SR-IOV heavy, and it is already
// present in the uvm_hal_*_memset_* functions.
if (element_size != 8) {
UVM_ERR_PRINT("Memset data must be 8 bytes wide, but found %zu instead\n", element_size);
return false;
}
}
// TODO: Bug 3429418: When in SR-IOV heavy, a memcopy/memset pushed to a
// UVM internal channel cannot use peer physical addresses.
else if (!dst.is_virtual && !uvm_aperture_is_peer(dst.aperture)) {
UVM_ERR_PRINT("Destination address of memset must be virtual, not physical (aperture: %s)\n",
uvm_gpu_address_aperture_string(dst));
return false;
}
return true;
}

View File

@@ -0,0 +1,88 @@
/*******************************************************************************
Copyright (c) 2018-2019 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_HAL_AMPERE_FAULT_BUFFER_H__
#define __UVM_HAL_AMPERE_FAULT_BUFFER_H__
#include "nvtypes.h"
#include "uvm_common.h"
#include "uvm_gpu.h"
// There are up to 8 TPCs per GPC in Ampere, and there are 2 LTP uTLB per TPC.
// Besides, there is one RGG uTLB per GPC. Each TPC has a number of clients
// that can make requests to its uTLBs: 1xTPCCS, 1xPE, 2xT1. Requests from
// these units are routed as follows to the 2 LTP uTLBs:
//
// -------- ---------
// | T1_0 | -----------------> | uTLB0 |
// -------- ---------
//
// -------- ---------
// | T1_1 | -----------------> | uTLB1 |
// -------- --------> ---------
// | ^
// ------- | |
// | PE | ----------- |
// ------- |
// |
// --------- |
// | TPCCS | -----------------------
// ---------
//
//
// The client ids are local to their GPC and the id mapping is linear across
// TPCs: TPC_n has TPCCS_n, PE_n, T1_p, and T1_q, where p=2*n and q=p+1.
//
// NV_PFAULT_CLIENT_GPC_LTP_UTLB_n and NV_PFAULT_CLIENT_GPC_RGG_UTLB enums can
// be ignored. These will never be reported in a fault message, and should
// never be used in an invalidate. Therefore, we define our own values.
typedef enum {
UVM_AMPERE_GPC_UTLB_ID_RGG = 0,
UVM_AMPERE_GPC_UTLB_ID_LTP0 = 1,
UVM_AMPERE_GPC_UTLB_ID_LTP1 = 2,
UVM_AMPERE_GPC_UTLB_ID_LTP2 = 3,
UVM_AMPERE_GPC_UTLB_ID_LTP3 = 4,
UVM_AMPERE_GPC_UTLB_ID_LTP4 = 5,
UVM_AMPERE_GPC_UTLB_ID_LTP5 = 6,
UVM_AMPERE_GPC_UTLB_ID_LTP6 = 7,
UVM_AMPERE_GPC_UTLB_ID_LTP7 = 8,
UVM_AMPERE_GPC_UTLB_ID_LTP8 = 9,
UVM_AMPERE_GPC_UTLB_ID_LTP9 = 10,
UVM_AMPERE_GPC_UTLB_ID_LTP10 = 11,
UVM_AMPERE_GPC_UTLB_ID_LTP11 = 12,
UVM_AMPERE_GPC_UTLB_ID_LTP12 = 13,
UVM_AMPERE_GPC_UTLB_ID_LTP13 = 14,
UVM_AMPERE_GPC_UTLB_ID_LTP14 = 15,
UVM_AMPERE_GPC_UTLB_ID_LTP15 = 16,
UVM_AMPERE_GPC_UTLB_COUNT,
} uvm_ampere_gpc_utlb_id_t;
static NvU32 uvm_ampere_get_utlbs_per_gpc(uvm_parent_gpu_t *parent_gpu)
{
NvU32 utlbs = parent_gpu->rm_info.maxTpcPerGpcCount * 2 + 1;
UVM_ASSERT(utlbs <= UVM_AMPERE_GPC_UTLB_COUNT);
return utlbs;
}
#endif

View File

@@ -0,0 +1,435 @@
/*******************************************************************************
Copyright (c) 2018-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_hal.h"
#include "uvm_global.h"
#include "uvm_user_channel.h"
#include "uvm_push_macros.h"
#include "hwref/ampere/ga100/dev_runlist.h"
#include "clc56f.h"
#include "clc076.h"
bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu))
return true;
if (uvm_channel_is_privileged(push->channel)) {
switch (method_address) {
case NVC56F_SET_OBJECT:
case NVC56F_NON_STALL_INTERRUPT:
case NVC56F_MEM_OP_A:
case NVC56F_MEM_OP_B:
case NVC56F_MEM_OP_C:
case NVC56F_MEM_OP_D:
case NVC56F_SEM_ADDR_LO:
case NVC56F_SEM_ADDR_HI:
case NVC56F_SEM_PAYLOAD_LO:
case NVC56F_SEM_PAYLOAD_HI:
case NVC56F_SEM_EXECUTE:
case NVC56F_WFI:
case NVC56F_NOP:
return true;
}
UVM_ERR_PRINT("Unsupported Host method 0x%x\n", method_address);
return false;
}
else if (method_address == NVC56F_MEM_OP_D) {
NvU32 operation = READ_HWVALUE(method_data, C56F, MEM_OP_D, OPERATION);
// Prohibit privileged operations from being pushed to non-privileged
// channels.
// TLB invalidations.
if ((operation == NVC56F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE) ||
(operation == NVC56F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED)) {
UVM_ERR_PRINT("Pushed privileged operation 0x%x to non-privileged channel\n", operation);
return false;
}
// Access counter clearing is a privileged operation. But access
// counters are not supported on SR-IOV heavy, so the presence of the
// operation indicates a missing check for access counters support.
if (operation == NVC56F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR) {
UVM_ERR_PRINT("Pushed access counters operation 0x%x, but access counters are not supported\n", operation);
return false;
}
}
return true;
}
bool uvm_hal_ampere_host_sw_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
{
if (!uvm_channel_is_proxy(push->channel))
return true;
switch (method_address) {
case NVC076_SET_OBJECT:
case NVC076_CLEAR_FAULTED_A:
case NVC076_CLEAR_FAULTED_B:
case NVC076_FAULT_CANCEL_A:
case NVC076_FAULT_CANCEL_B:
case NVC076_FAULT_CANCEL_C:
return true;
}
UVM_ERR_PRINT("Unsupported SW method 0x%x\n", method_address);
return false;
}
void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *fault)
{
uvm_spin_loop_t spin;
NvU32 channel_faulted_mask = 0;
NvU32 clear_type_value = 0;
UVM_ASSERT(!user_channel->gpu->parent->has_clear_faulted_channel_method);
if (fault->fault_source.mmu_engine_type == UVM_MMU_ENGINE_TYPE_HOST) {
clear_type_value = NV_CHRAM_CHANNEL_UPDATE_RESET_PBDMA_FAULTED;
channel_faulted_mask = HWCONST(_CHRAM, CHANNEL, PBDMA_FAULTED, TRUE);
}
else if (fault->fault_source.mmu_engine_type == UVM_MMU_ENGINE_TYPE_CE) {
clear_type_value = NV_CHRAM_CHANNEL_UPDATE_RESET_ENG_FAULTED;
channel_faulted_mask = HWCONST(_CHRAM, CHANNEL, ENG_FAULTED, TRUE);
}
else {
UVM_ASSERT_MSG(false, "Unsupported MMU engine type %s\n",
uvm_mmu_engine_type_string(fault->fault_source.mmu_engine_type));
}
// Wait for the channel to have the FAULTED bit set as this can race with
// interrupt notification
UVM_SPIN_WHILE(!(UVM_GPU_READ_ONCE(*user_channel->chram_channel_register) & channel_faulted_mask), &spin);
UVM_GPU_WRITE_ONCE(*user_channel->chram_channel_register, clear_type_value);
wmb();
UVM_GPU_WRITE_ONCE(*user_channel->work_submission_offset, user_channel->work_submission_token);
}
static NvU32 instance_ptr_aperture_type_to_hw_value(uvm_aperture_t aperture)
{
switch (aperture) {
case UVM_APERTURE_SYS:
return HWCONST(C076, CLEAR_FAULTED_A, INST_APERTURE, SYS_MEM_COHERENT);
case UVM_APERTURE_VID:
return HWCONST(C076, CLEAR_FAULTED_A, INST_APERTURE, VID_MEM);
default:
UVM_ASSERT_MSG(false, "Invalid aperture_type %d\n", aperture);
}
return 0;
}
static void instance_ptr_address_to_hw_values(NvU64 instance_ptr_address,
NvU32 *instance_ptr_lo,
NvU32 *instance_ptr_hi)
{
// instance_ptr must be 4K aligned
UVM_ASSERT_MSG(IS_ALIGNED(instance_ptr_address, 1 << 12), "instance_ptr 0x%llx\n", instance_ptr_address);
instance_ptr_address >>= 12;
*instance_ptr_lo = instance_ptr_address & HWMASK(C076, CLEAR_FAULTED_A, INST_LOW);
*instance_ptr_hi = instance_ptr_address >> HWSIZE(C076, CLEAR_FAULTED_A, INST_LOW);
}
static NvU32 mmu_engine_type_to_hw_value(uvm_mmu_engine_type_t mmu_engine_type)
{
switch (mmu_engine_type) {
case UVM_MMU_ENGINE_TYPE_HOST:
return HWCONST(C076, CLEAR_FAULTED_A, TYPE, PBDMA_FAULTED);
case UVM_MMU_ENGINE_TYPE_CE:
return HWCONST(C076, CLEAR_FAULTED_A, TYPE, ENG_FAULTED);
default:
UVM_ASSERT_MSG(false, "Unsupported MMU engine type %s\n",
uvm_mmu_engine_type_string(mmu_engine_type));
}
return 0;
}
void uvm_hal_ampere_host_clear_faulted_channel_sw_method(uvm_push_t *push,
uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *fault)
{
NvU32 clear_type_value;
NvU32 aperture_type_value;
NvU32 instance_ptr_lo, instance_ptr_hi;
uvm_gpu_phys_address_t instance_ptr = user_channel->instance_ptr.addr;
UVM_ASSERT(user_channel->gpu->parent->has_clear_faulted_channel_sw_method);
clear_type_value = mmu_engine_type_to_hw_value(fault->fault_source.mmu_engine_type);
aperture_type_value = instance_ptr_aperture_type_to_hw_value(instance_ptr.aperture);
instance_ptr_address_to_hw_values(instance_ptr.address, &instance_ptr_lo, &instance_ptr_hi);
NV_PUSH_2U(C076, CLEAR_FAULTED_A, HWVALUE(C076, CLEAR_FAULTED_A, INST_LOW, instance_ptr_lo) |
aperture_type_value |
clear_type_value,
CLEAR_FAULTED_B, HWVALUE(C076, CLEAR_FAULTED_B, INST_HI, instance_ptr_hi));
}
// Copy from Pascal, this version sets TLB_INVALIDATE_INVAL_SCOPE.
void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
uvm_membar_t membar)
{
NvU32 aperture_value;
NvU32 page_table_level;
NvU32 pdb_lo;
NvU32 pdb_hi;
NvU32 ack_value = 0;
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
if (pdb.aperture == UVM_APERTURE_VID)
aperture_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
else
aperture_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
pdb.address >>= 12;
pdb_lo = pdb.address & HWMASK(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
// PDE3 is the highest level on Pascal, see the comment in uvm_pascal_mmu.c
// for details.
UVM_ASSERT_MSG(depth < NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
page_table_level = NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;
if (membar != UVM_MEMBAR_NONE) {
// If a GPU or SYS membar is needed, ACK_TYPE needs to be set to
// GLOBALLY to make sure all the pending accesses can be picked up by
// the membar.
ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
}
NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
MEM_OP_B, 0,
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
aperture_value |
ack_value,
MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
uvm_hal_tlb_invalidate_membar(push, membar);
}
// Copy from Volta, this version sets TLB_INVALIDATE_INVAL_SCOPE.
void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
uvm_membar_t membar)
{
NvU32 aperture_value;
NvU32 page_table_level;
NvU32 pdb_lo;
NvU32 pdb_hi;
NvU32 ack_value = 0;
NvU32 va_lo;
NvU32 va_hi;
NvU64 end;
NvU64 actual_base;
NvU64 actual_size;
NvU64 actual_end;
NvU32 log2_invalidation_size;
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
// The invalidation size must be a power-of-two number of pages containing
// the passed interval
end = base + size - 1;
log2_invalidation_size = __fls((unsigned long)(end ^ base)) + 1;
if (log2_invalidation_size == 64) {
// Invalidate everything
gpu->parent->host_hal->tlb_invalidate_all(push, pdb, depth, membar);
return;
}
// The hardware aligns the target address down to the invalidation size.
actual_size = 1ULL << log2_invalidation_size;
actual_base = UVM_ALIGN_DOWN(base, actual_size);
actual_end = actual_base + actual_size - 1;
UVM_ASSERT(actual_end >= end);
// The invalidation size field expects log2(invalidation size in 4K), not
// log2(invalidation size in bytes)
log2_invalidation_size -= 12;
// Address to invalidate, as a multiple of 4K.
base >>= 12;
va_lo = base & HWMASK(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
va_hi = base >> HWSIZE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
if (pdb.aperture == UVM_APERTURE_VID)
aperture_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
else
aperture_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
pdb.address >>= 12;
pdb_lo = pdb.address & HWMASK(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
// PDE3 is the highest level on Pascal-Ampere , see the comment in
// uvm_pascal_mmu.c for details.
UVM_ASSERT_MSG(depth < NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
page_table_level = NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;
if (membar != UVM_MEMBAR_NONE) {
// If a GPU or SYS membar is needed, ACK_TYPE needs to be set to
// GLOBALLY to make sure all the pending accesses can be picked up by
// the membar.
ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
}
NV_PUSH_4U(C56F, MEM_OP_A, HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo) |
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
MEM_OP_B, HWVALUE(C56F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
aperture_value |
ack_value,
MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
uvm_hal_tlb_invalidate_membar(push, membar);
}
// Copy from Pascal, this version sets TLB_INVALIDATE_INVAL_SCOPE.
void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params)
{
NvU32 ack_value = 0;
NvU32 invalidate_gpc_value = 0;
NvU32 aperture_value = 0;
NvU32 pdb_lo = 0;
NvU32 pdb_hi = 0;
NvU32 page_table_level = 0;
uvm_membar_t membar;
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
if (pdb.aperture == UVM_APERTURE_VID)
aperture_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
else
aperture_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
pdb.address >>= 12;
pdb_lo = pdb.address & HWMASK(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
if (params->page_table_level != UvmInvalidatePageTableLevelAll) {
// PDE3 is the highest level on Pascal, see the comment in
// uvm_pascal_mmu.c for details.
page_table_level = min((NvU32)UvmInvalidatePageTableLevelPde3, params->page_table_level) - 1;
}
if (params->membar != UvmInvalidateTlbMemBarNone) {
// If a GPU or SYS membar is needed, ack_value needs to be set to
// GLOBALLY to make sure all the pending accesses can be picked up by
// the membar.
ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
}
if (params->disable_gpc_invalidate)
invalidate_gpc_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
else
invalidate_gpc_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE);
if (params->target_va_mode == UvmTargetVaModeTargeted) {
NvU64 va = params->va >> 12;
NvU32 va_lo = va & HWMASK(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
NvU32 va_hi = va >> HWSIZE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo) |
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
MEM_OP_B, HWVALUE(C56F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
invalidate_gpc_value |
aperture_value |
ack_value,
MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
}
else {
NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
MEM_OP_B, 0,
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
invalidate_gpc_value |
aperture_value |
ack_value,
MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
}
if (params->membar == UvmInvalidateTlbMemBarSys)
membar = UVM_MEMBAR_SYS;
else if (params->membar == UvmInvalidateTlbMemBarLocal)
membar = UVM_MEMBAR_GPU;
else
membar = UVM_MEMBAR_NONE;
uvm_hal_tlb_invalidate_membar(push, membar);
}

View File

@@ -0,0 +1,162 @@
/*******************************************************************************
Copyright (c) 2018-2020 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
// For Ampere, UVM page tree 'depth' maps to hardware as follows:
//
// UVM depth HW level VA bits
// 0 PDE3 48:47
// 1 PDE2 46:38
// 2 PDE1 (or 512M PTE) 37:29
// 3 PDE0 (dual 64k/4k PDE, or 2M PTE) 28:21
// 4 PTE_64K / PTE_4K 20:16 / 20:12
#include "uvm_types.h"
#include "uvm_global.h"
#include "uvm_hal.h"
#include "uvm_ampere_fault_buffer.h"
#include "hwref/ampere/ga100/dev_fault.h"
uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id)
{
if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST31)
return UVM_MMU_ENGINE_TYPE_HOST;
if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_CE0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_CE9)
return UVM_MMU_ENGINE_TYPE_CE;
// We shouldn't be servicing faults from any other engines
UVM_ASSERT_MSG(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_GRAPHICS && mmu_engine_id < NV_PFAULT_MMU_ENG_ID_BAR1,
"Unexpected engine ID: 0x%x\n", mmu_engine_id);
return UVM_MMU_ENGINE_TYPE_GRAPHICS;
}
static NvU32 page_table_depth_ampere(NvU32 page_size)
{
// The common-case is page_size == UVM_PAGE_SIZE_2M, hence the first check
if (page_size == UVM_PAGE_SIZE_2M)
return 3;
else if (page_size == UVM_PAGE_SIZE_512M)
return 2;
else
return 4;
}
static NvU32 page_sizes_ampere(void)
{
return UVM_PAGE_SIZE_512M | UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
}
static uvm_mmu_mode_hal_t ampere_mmu_mode_hal;
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size)
{
static bool initialized = false;
UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
// TODO: Bug 1789555: RM should reject the creation of GPU VA spaces with
// 128K big page size for Pascal+ GPUs
if (big_page_size == UVM_PAGE_SIZE_128K)
return NULL;
if (!initialized) {
uvm_mmu_mode_hal_t *turing_mmu_mode_hal = uvm_hal_mmu_mode_turing(big_page_size);
UVM_ASSERT(turing_mmu_mode_hal);
// The assumption made is that arch_hal->mmu_mode_hal() will be
// called under the global lock the first time, so check it here.
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
ampere_mmu_mode_hal = *turing_mmu_mode_hal;
ampere_mmu_mode_hal.page_table_depth = page_table_depth_ampere;
ampere_mmu_mode_hal.page_sizes = page_sizes_ampere;
initialized = true;
}
return &ampere_mmu_mode_hal;
}
NvU16 uvm_hal_ampere_mmu_client_id_to_utlb_id(NvU16 client_id)
{
switch (client_id) {
case NV_PFAULT_CLIENT_GPC_RAST:
case NV_PFAULT_CLIENT_GPC_GCC:
case NV_PFAULT_CLIENT_GPC_GPCCS:
return UVM_AMPERE_GPC_UTLB_ID_RGG;
case NV_PFAULT_CLIENT_GPC_T1_0:
return UVM_AMPERE_GPC_UTLB_ID_LTP0;
case NV_PFAULT_CLIENT_GPC_T1_1:
case NV_PFAULT_CLIENT_GPC_PE_0:
case NV_PFAULT_CLIENT_GPC_TPCCS_0:
return UVM_AMPERE_GPC_UTLB_ID_LTP1;
case NV_PFAULT_CLIENT_GPC_T1_2:
return UVM_AMPERE_GPC_UTLB_ID_LTP2;
case NV_PFAULT_CLIENT_GPC_T1_3:
case NV_PFAULT_CLIENT_GPC_PE_1:
case NV_PFAULT_CLIENT_GPC_TPCCS_1:
return UVM_AMPERE_GPC_UTLB_ID_LTP3;
case NV_PFAULT_CLIENT_GPC_T1_4:
return UVM_AMPERE_GPC_UTLB_ID_LTP4;
case NV_PFAULT_CLIENT_GPC_T1_5:
case NV_PFAULT_CLIENT_GPC_PE_2:
case NV_PFAULT_CLIENT_GPC_TPCCS_2:
return UVM_AMPERE_GPC_UTLB_ID_LTP5;
case NV_PFAULT_CLIENT_GPC_T1_6:
return UVM_AMPERE_GPC_UTLB_ID_LTP6;
case NV_PFAULT_CLIENT_GPC_T1_7:
case NV_PFAULT_CLIENT_GPC_PE_3:
case NV_PFAULT_CLIENT_GPC_TPCCS_3:
return UVM_AMPERE_GPC_UTLB_ID_LTP7;
case NV_PFAULT_CLIENT_GPC_T1_8:
return UVM_AMPERE_GPC_UTLB_ID_LTP8;
case NV_PFAULT_CLIENT_GPC_T1_9:
case NV_PFAULT_CLIENT_GPC_PE_4:
case NV_PFAULT_CLIENT_GPC_TPCCS_4:
return UVM_AMPERE_GPC_UTLB_ID_LTP9;
case NV_PFAULT_CLIENT_GPC_T1_10:
return UVM_AMPERE_GPC_UTLB_ID_LTP10;
case NV_PFAULT_CLIENT_GPC_T1_11:
case NV_PFAULT_CLIENT_GPC_PE_5:
case NV_PFAULT_CLIENT_GPC_TPCCS_5:
return UVM_AMPERE_GPC_UTLB_ID_LTP11;
case NV_PFAULT_CLIENT_GPC_T1_12:
return UVM_AMPERE_GPC_UTLB_ID_LTP12;
case NV_PFAULT_CLIENT_GPC_T1_13:
case NV_PFAULT_CLIENT_GPC_PE_6:
case NV_PFAULT_CLIENT_GPC_TPCCS_6:
return UVM_AMPERE_GPC_UTLB_ID_LTP13;
case NV_PFAULT_CLIENT_GPC_T1_14:
return UVM_AMPERE_GPC_UTLB_ID_LTP14;
case NV_PFAULT_CLIENT_GPC_T1_15:
case NV_PFAULT_CLIENT_GPC_PE_7:
case NV_PFAULT_CLIENT_GPC_TPCCS_7:
return UVM_AMPERE_GPC_UTLB_ID_LTP15;
default:
UVM_ASSERT_MSG(false, "Invalid client value: 0x%x\n", client_id);
}
return 0;
}

View File

@@ -0,0 +1,256 @@
/*******************************************************************************
Copyright (c) 2015-2019 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_API_H__
#define __UVM_API_H__
#include "uvm_types.h"
#include "uvm_ioctl.h"
#include "uvm_linux.h"
#include "uvm_lock.h"
#include "uvm_thread_context.h"
#include "uvm_kvmalloc.h"
#include "uvm_va_space.h"
#include "nv_uvm_types.h"
// This weird number comes from UVM_PREVENT_MIGRATION_RANGE_GROUPS_PARAMS. That
// ioctl is called frequently so we don't want to allocate a copy every time.
// It's a little over 256 bytes in size.
#define UVM_MAX_IOCTL_PARAM_STACK_SIZE 288
// The UVM_ROUTE_CMD_* macros are only intended for use in the ioctl routines
// If the BUILD_BUG_ON fires, use __UVM_ROUTE_CMD_ALLOC instead.
#define __UVM_ROUTE_CMD_STACK(cmd, params_type, function_name, do_init_check) \
case cmd: \
{ \
params_type params; \
BUILD_BUG_ON(sizeof(params) > UVM_MAX_IOCTL_PARAM_STACK_SIZE); \
if (nv_copy_from_user(&params, (void __user*)arg, sizeof(params))) \
return -EFAULT; \
\
params.rmStatus = uvm_global_get_status(); \
if (params.rmStatus == NV_OK) { \
if (do_init_check) \
params.rmStatus = uvm_va_space_initialized(uvm_va_space_get(filp)); \
if (likely(params.rmStatus == NV_OK)) \
params.rmStatus = function_name(&params, filp); \
} \
\
if (nv_copy_to_user((void __user*)arg, &params, sizeof(params))) \
return -EFAULT; \
\
return 0; \
}
// We need to concatenate cmd##_PARAMS here to avoid the preprocessor's argument
// prescan. Attempting concatenation in the lower-level macro will fail because
// it will have been expanded to a literal by then.
#define UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(cmd, function_name) \
__UVM_ROUTE_CMD_STACK(cmd, cmd##_PARAMS, function_name, false)
#define UVM_ROUTE_CMD_STACK_INIT_CHECK(cmd, function_name) \
__UVM_ROUTE_CMD_STACK(cmd, cmd##_PARAMS, function_name, true)
// If the BUILD_BUG_ON fires, use __UVM_ROUTE_CMD_STACK instead
#define __UVM_ROUTE_CMD_ALLOC(cmd, params_type, function_name, do_init_check) \
case cmd: \
{ \
int ret = 0; \
params_type *params = uvm_kvmalloc(sizeof(*params)); \
if (!params) \
return -ENOMEM; \
BUILD_BUG_ON(sizeof(*params) <= UVM_MAX_IOCTL_PARAM_STACK_SIZE); \
if (nv_copy_from_user(params, (void __user*)arg, sizeof(*params))) { \
uvm_kvfree(params); \
return -EFAULT; \
} \
\
params->rmStatus = uvm_global_get_status(); \
if (params->rmStatus == NV_OK) { \
if (do_init_check) \
params->rmStatus = uvm_va_space_initialized(uvm_va_space_get(filp)); \
if (likely(params->rmStatus == NV_OK)) \
params->rmStatus = function_name(params, filp); \
} \
\
if (nv_copy_to_user((void __user*)arg, params, sizeof(*params))) \
ret = -EFAULT; \
\
uvm_kvfree(params); \
return ret; \
}
#define UVM_ROUTE_CMD_ALLOC_NO_INIT_CHECK(cmd, function_name) \
__UVM_ROUTE_CMD_ALLOC(cmd, cmd##_PARAMS, function_name, false)
#define UVM_ROUTE_CMD_ALLOC_INIT_CHECK(cmd, function_name) \
__UVM_ROUTE_CMD_ALLOC(cmd, cmd##_PARAMS, function_name, true)
// Wrap an entry point into the UVM module.
//
// An entry function with signature
//
// return_type foo(...);
//
// is required to have a counterpart of the form
//
// return_type foo_entry(...) {
// UVM_ENTRY_RET(foo(...));
// }
//
// An entry function with signature
//
// void foo(...);
//
// is required to have a counterpart of the form
//
// void foo_entry(...) {
// UVM_ENTRY_VOID(foo(...));
// }
//
// Invocations of foo must be replaced by invocations of foo_entry at the entry
// points.
#define UVM_ENTRY_WRAP(line) \
do { \
bool added; \
\
if (in_interrupt()) { \
line; \
} \
else if (uvm_thread_context_wrapper_is_used()) { \
uvm_thread_context_wrapper_t thread_context_wrapper; \
\
added = uvm_thread_context_add(&thread_context_wrapper.context); \
line; \
if (added) \
uvm_thread_context_remove(&thread_context_wrapper.context); \
} \
else { \
uvm_thread_context_t thread_context; \
\
added = uvm_thread_context_add(&thread_context); \
line; \
if (added) \
uvm_thread_context_remove(&thread_context); \
} \
} while (0) \
// Wrapper for non-void functions
#define UVM_ENTRY_RET(func_call) \
do { \
typeof(func_call) ret; \
UVM_ENTRY_WRAP((ret = (func_call))); \
return ret; \
} while (0) \
// Wrapper for void functions
#define UVM_ENTRY_VOID UVM_ENTRY_WRAP
// Validate input ranges from the user with specific alignment requirement
static bool uvm_api_range_invalid_aligned(NvU64 base, NvU64 length, NvU64 alignment)
{
return !IS_ALIGNED(base, alignment) ||
!IS_ALIGNED(length, alignment) ||
base == 0 ||
length == 0 ||
base + length < base; // Overflow
}
// Most APIs require PAGE_SIZE alignment
static bool uvm_api_range_invalid(NvU64 base, NvU64 length)
{
return uvm_api_range_invalid_aligned(base, length, PAGE_SIZE);
}
// Some APIs can only enforce 4K alignment as it's the smallest GPU page size
// even when the smallest host page is larger (e.g. 64K on ppc64le).
static bool uvm_api_range_invalid_4k(NvU64 base, NvU64 length)
{
return uvm_api_range_invalid_aligned(base, length, UVM_PAGE_SIZE_4K);
}
// Verify alignment on a 64K boundary.
static bool uvm_api_range_invalid_64k(NvU64 base, NvU64 length)
{
return uvm_api_range_invalid_aligned(base, length, UVM_PAGE_SIZE_64K);
}
// Returns true if the interval [start, start + length -1] is entirely covered
// by vmas.
//
// LOCKING: mm->mmap_lock must be held in at least read mode.
bool uvm_is_valid_vma_range(struct mm_struct *mm, NvU64 start, NvU64 length);
// Check that the interval [base, base + length) is fully covered by UVM
// managed ranges (NV_OK is returned), or (if ATS is enabled and mm != NULL)
// fully covered by valid vmas (NV_WARN_NOTHING_TO_DO is returned), or (if HMM
// is enabled and mm != NULL) fully covered by valid vmas (NV_OK is returned).
// Any other input results in a return status of NV_ERR_INVALID_ADDRESS.
//
// LOCKING: va_space->lock must be held in at least read mode. If mm != NULL,
// mm->mmap_lock must also be held in at least read mode.
NV_STATUS uvm_api_range_type_check(uvm_va_space_t *va_space, struct mm_struct *mm, NvU64 base, NvU64 length);
NV_STATUS uvm_api_pageable_mem_access_on_gpu(UVM_PAGEABLE_MEM_ACCESS_ON_GPU_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_register_gpu(UVM_REGISTER_GPU_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_unregister_gpu(UVM_UNREGISTER_GPU_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_create_range_group(UVM_CREATE_RANGE_GROUP_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_destroy_range_group(UVM_DESTROY_RANGE_GROUP_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_enable_peer_access(UVM_ENABLE_PEER_ACCESS_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_disable_peer_access(UVM_DISABLE_PEER_ACCESS_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_set_range_group(UVM_SET_RANGE_GROUP_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_create_external_range(UVM_CREATE_EXTERNAL_RANGE_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_map_external_allocation(UVM_MAP_EXTERNAL_ALLOCATION_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_map_external_sparse(UVM_MAP_EXTERNAL_SPARSE_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_free(UVM_FREE_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_prevent_migration_range_groups(UVM_PREVENT_MIGRATION_RANGE_GROUPS_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_allow_migration_range_groups(UVM_ALLOW_MIGRATION_RANGE_GROUPS_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_set_preferred_location(const UVM_SET_PREFERRED_LOCATION_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_unset_preferred_location(const UVM_UNSET_PREFERRED_LOCATION_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_set_accessed_by(const UVM_SET_ACCESSED_BY_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_unset_accessed_by(const UVM_UNSET_ACCESSED_BY_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_register_gpu_va_space(UVM_REGISTER_GPU_VASPACE_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_unregister_gpu_va_space(UVM_UNREGISTER_GPU_VASPACE_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_register_channel(UVM_REGISTER_CHANNEL_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_unregister_channel(UVM_UNREGISTER_CHANNEL_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_enable_read_duplication(const UVM_ENABLE_READ_DUPLICATION_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_disable_read_duplication(const UVM_DISABLE_READ_DUPLICATION_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_enable_system_wide_atomics(UVM_ENABLE_SYSTEM_WIDE_ATOMICS_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_disable_system_wide_atomics(UVM_DISABLE_SYSTEM_WIDE_ATOMICS_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_tools_event_queue_enable_events(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_tools_event_queue_disable_events(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_tools_enable_counters(UVM_TOOLS_ENABLE_COUNTERS_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_tools_disable_counters(UVM_TOOLS_DISABLE_COUNTERS_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_tools_read_process_memory(UVM_TOOLS_READ_PROCESS_MEMORY_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_tools_write_process_memory(UVM_TOOLS_WRITE_PROCESS_MEMORY_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_map_dynamic_parallelism_region(UVM_MAP_DYNAMIC_PARALLELISM_REGION_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_unmap_external(UVM_UNMAP_EXTERNAL_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_migrate_range_group(UVM_MIGRATE_RANGE_GROUP_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params, struct file *filp);
NV_STATUS uvm_api_populate_pageable(const UVM_POPULATE_PAGEABLE_PARAMS *params, struct file *filp);
#endif // __UVM_API_H__

View File

@@ -0,0 +1,193 @@
/*******************************************************************************
Copyright (c) 2018-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_va_space.h"
#include "uvm_ats.h"
#include "uvm_global.h"
#include "uvm_gpu.h"
static int uvm_ats_mode = 1;
module_param(uvm_ats_mode, int, S_IRUGO);
MODULE_PARM_DESC(uvm_ats_mode, "Set to 0 to disable ATS (Address Translation Services). "
"Any other value is ignored. Has no effect unless the "
"platform supports ATS.");
void uvm_ats_init(const UvmPlatformInfo *platform_info)
{
g_uvm_global.ats.supported = platform_info->atsSupported;
g_uvm_global.ats.enabled = uvm_ats_mode &&
g_uvm_global.ats.supported &&
UVM_ATS_SUPPORTED() &&
uvm_va_space_mm_enabled_system();
}
void uvm_ats_init_va_space(uvm_va_space_t *va_space)
{
if (UVM_ATS_IBM_SUPPORTED())
uvm_ats_ibm_init_va_space(va_space);
}
NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)
{
if (UVM_ATS_IBM_SUPPORTED()) {
// uvm_ibm_add_gpu() needs to be called even if ATS is disabled since it
// sets parent_gpu->npu. Not setting parent_gpu->npu will result in
// incorrect NVLink addresses. See dma_addr_to_gpu_addr().
return uvm_ats_ibm_add_gpu(parent_gpu);
}
return NV_OK;
}
void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu)
{
if (UVM_ATS_IBM_SUPPORTED()) {
// uvm_ibm_remove_gpu() needs to be called even if ATS is disabled since
// uvm_ibm_add_gpu() is called even in that case and
// uvm_ibm_remove_gpu() needs to undo the work done by
// uvm_ats_add_gpu() (gpu retained_count etc.).
uvm_ats_ibm_remove_gpu(parent_gpu);
}
}
NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
{
NV_STATUS status = NV_OK;
UVM_ASSERT(gpu_va_space);
if (!gpu_va_space->ats.enabled)
return status;
uvm_assert_lockable_order(UVM_LOCK_ORDER_MMAP_LOCK);
uvm_assert_lockable_order(UVM_LOCK_ORDER_VA_SPACE);
if (UVM_ATS_IBM_SUPPORTED())
status = uvm_ats_ibm_bind_gpu(gpu_va_space);
return status;
}
void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
{
UVM_ASSERT(gpu_va_space);
if (!gpu_va_space->ats.enabled)
return;
if (UVM_ATS_IBM_SUPPORTED())
uvm_ats_ibm_unbind_gpu(gpu_va_space);
}
NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
{
NV_STATUS status = NV_OK;
uvm_va_space_t *va_space;
uvm_gpu_id_t gpu_id;
UVM_ASSERT(gpu_va_space);
if (!gpu_va_space->ats.enabled)
return status;
va_space = gpu_va_space->va_space;
UVM_ASSERT(va_space);
uvm_assert_rwsem_locked_write(&va_space->lock);
gpu_id = gpu_va_space->gpu->id;
// Prevent multiple registrations of the same gpu_va_space for ATS access.
if (uvm_processor_mask_test(&va_space->ats.registered_gpu_va_spaces, gpu_id))
return NV_ERR_INVALID_DEVICE;
if (UVM_ATS_IBM_SUPPORTED())
status = uvm_ats_ibm_register_gpu_va_space(gpu_va_space);
if (status == NV_OK)
uvm_processor_mask_set(&va_space->ats.registered_gpu_va_spaces, gpu_id);
return status;
}
void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
{
uvm_gpu_id_t gpu_id;
uvm_va_space_t *va_space;
UVM_ASSERT(gpu_va_space);
if (!gpu_va_space->ats.enabled)
return;
va_space = gpu_va_space->va_space;
gpu_id = gpu_va_space->gpu->id;
if (UVM_ATS_IBM_SUPPORTED())
uvm_ats_ibm_unregister_gpu_va_space(gpu_va_space);
uvm_va_space_down_write(va_space);
uvm_processor_mask_clear(&va_space->ats.registered_gpu_va_spaces, gpu_id);
uvm_va_space_up_write(va_space);
}
void uvm_ats_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
{
// We can only reach here from the mmu_notifier callbacks and these callbacks
// wouldn't have been registered if ATS wasn't enabled.
UVM_ASSERT(g_uvm_global.ats.enabled);
if (UVM_ATS_IBM_SUPPORTED())
uvm_ats_ibm_invalidate(va_space, start, end);
}

View File

@@ -0,0 +1,152 @@
/*******************************************************************************
Copyright (c) 2018-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_ATS_H__
#define __UVM_ATS_H__
#include "uvm_linux.h"
#include "uvm_forward_decl.h"
#include "uvm_ats_ibm.h"
#include "nv_uvm_types.h"
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED())
typedef struct
{
// Mask of gpu_va_spaces which are registered for ATS access. The mask is
// indexed by gpu->id. This mask is protected by the VA space lock.
uvm_processor_mask_t registered_gpu_va_spaces;
union
{
uvm_ibm_va_space_t ibm;
};
} uvm_ats_va_space_t;
typedef struct
{
// Each GPU VA space can have ATS enabled or disabled in its hardware
// state. This is controlled by user space when it allocates that GPU VA
// space object from RM. This flag indicates the mode user space
// requested when allocating this GPU VA space.
bool enabled;
NvU32 pasid;
union
{
uvm_ibm_gpu_va_space_t ibm;
};
} uvm_ats_gpu_va_space_t;
// Initializes driver-wide ATS state
//
// LOCKING: None
void uvm_ats_init(const UvmPlatformInfo *platform_info);
// Initializes ATS specific GPU state
//
// LOCKING: None
void uvm_ats_init_va_space(uvm_va_space_t *va_space);
// Enables ATS feature on the GPU.
//
// LOCKING: g_uvm_global.global lock mutex must be held.
NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu);
// Disables ATS feature on the GPU. The caller is responsible for ensuring
// that the GPU won't issue ATS requests anymore prior to calling this function.
//
// LOCKING: g_uvm_global.global lock mutex must be held.
void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu);
// Creates a binding on the GPU for the mm associated with the VA space
// (va_space_mm). Multiple calls to this function are tracked and refcounted for
// the specific {gpu, mm} pair. A successful uvm_ats_add_gpu() must precede a
// call to this function.
//
// LOCKING: mmap_lock must be lockable.
// VA space lock must be lockable.
// gpu_va_space->gpu must be retained.
NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space);
// Decrements the refcount on the {gpu, mm} pair. Removes the binding from the
// mm (va_space_mm) to this GPU when the refcount reaches zero.
//
// LOCKING: None
void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space);
// Enables ATS access on the GPU for the mm_struct associated with the VA space
// (va_space_mm) and assigns a PASID. A successful uvm_ats_bind_gpu() must
// precede a call to this function. Returns NV_ERR_INVALID_DEVICE if the
// gpu_va_space is already registered for ATS access.
//
// LOCKING: The VA space lock must be held in write mode.
// mm has to be retained prior to calling this function.
// current->mm->mmap_lock must be held in write mode iff
// UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1.
NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
// Disables ATS access for the gpu_va_space. Prior to calling this function,
// the caller must guarantee that the GPU will no longer make any ATS
// accesses in this GPU VA space, and that no ATS fault handling for this
// GPU will be attempted.
//
// LOCKING: This function may block on mmap_lock and will acquire the VA space
// lock, so neither lock must be held.
void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
// Synchronously invalidate ATS translations cached by GPU TLBs. The
// invalidate applies to all GPUs with active GPU VA spaces in va_space, and
// covers all pages touching any part of the given range. end is inclusive.
//
// GMMU translations in the given range are not guaranteed to be
// invalidated.
//
// LOCKING: No locks are required, but this function may be called with
// interrupts disabled.
void uvm_ats_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end);
#endif // __UVM_ATS_H__

View File

@@ -0,0 +1,232 @@
/*******************************************************************************
Copyright (c) 2018 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_tools.h"
#include "uvm_va_range.h"
#include "uvm_ats_faults.h"
#include "uvm_migrate_pageable.h"
static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
NvU64 fault_addr,
uvm_fault_access_type_t access_type)
{
uvm_va_space_t *va_space = gpu_va_space->va_space;
struct mm_struct *mm = va_space->va_space_mm.mm;
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
NV_STATUS status;
NvU64 start;
NvU64 length;
// Request uvm_migrate_pageable() to touch the corresponding page after
// population.
// Under virtualization ATS provides two translations:
// 1) guest virtual -> guest physical
// 2) guest physical -> host physical
//
// The overall ATS translation will fault if either of those translations is
// invalid. The get_user_pages() call above handles translation #1, but not
// #2. We don't know if we're running as a guest, but in case we are we can
// force that translation to be valid by touching the guest physical address
// from the CPU. If the translation is not valid then the access will cause
// a hypervisor fault. Note that dma_map_page() can't establish mappings
// used by GPU ATS SVA translations. GPU accesses to host physical addresses
// obtained as a result of the address translation request uses the CPU
// address space instead of the IOMMU address space since the translated
// host physical address isn't necessarily an IOMMU address. The only way to
// establish guest physical to host physical mapping in the CPU address
// space is to touch the page from the CPU.
//
// We assume that the hypervisor mappings are all VM_PFNMAP, VM_SHARED, and
// VM_WRITE, meaning that the mappings are all granted write access on any
// fault and that the kernel will never revoke them.
// drivers/vfio/pci/vfio_pci_nvlink2.c enforces this. Thus we can assume
// that a read fault is always sufficient to also enable write access on the
// guest translation.
uvm_migrate_args_t uvm_migrate_args =
{
.va_space = va_space,
.mm = mm,
.start = fault_addr,
.length = PAGE_SIZE,
.dst_id = gpu_va_space->gpu->parent->id,
.dst_node_id = -1,
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
.touch = true,
.skip_mapped = true,
.user_space_start = &start,
.user_space_length = &length,
};
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
// TODO: Bug 2103669: Service more than a single fault at a time
//
// We are trying to use migrate_vma API in the kernel (if it exists) to
// populate and map the faulting region on the GPU. We want to do this only
// on the first touch. That is, pages which are not already mapped. So, we
// set skip_mapped to true. For pages already mapped, this will only handle
// PTE upgrades if needed.
status = uvm_migrate_pageable(&uvm_migrate_args);
if (status == NV_WARN_NOTHING_TO_DO)
status = NV_OK;
UVM_ASSERT(status != NV_ERR_MORE_PROCESSING_REQUIRED);
return status;
}
NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
uvm_fault_buffer_entry_t *current_entry,
uvm_ats_fault_invalidate_t *ats_invalidate)
{
NvU64 gmmu_region_base;
bool in_gmmu_region;
NV_STATUS status = NV_OK;
uvm_fault_access_type_t service_access_type;
UVM_ASSERT(g_uvm_global.ats.enabled);
UVM_ASSERT(gpu_va_space->ats.enabled);
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
UVM_ASSERT(current_entry->fault_access_type ==
uvm_fault_access_type_mask_highest(current_entry->access_type_mask));
service_access_type = current_entry->fault_access_type;
// ATS lookups are disabled on all addresses within the same
// UVM_GMMU_ATS_GRANULARITY as existing GMMU mappings (see documentation in
// uvm_mmu.h). User mode is supposed to reserve VAs as appropriate to
// prevent any system memory allocations from falling within the NO_ATS
// range of other GMMU mappings, so this shouldn't happen during normal
// operation. However, since this scenario may lead to infinite fault loops,
// we handle it by canceling the fault.
//
// TODO: Bug 2103669: Remove redundant VA range lookups
gmmu_region_base = UVM_ALIGN_DOWN(current_entry->fault_address, UVM_GMMU_ATS_GRANULARITY);
in_gmmu_region = !uvm_va_space_range_empty(current_entry->va_space,
gmmu_region_base,
gmmu_region_base + UVM_GMMU_ATS_GRANULARITY - 1);
if (in_gmmu_region) {
status = NV_ERR_INVALID_ADDRESS;
}
else {
// TODO: Bug 2103669: Service more than a single fault at a time
status = uvm_ats_service_fault(gpu_va_space, current_entry->fault_address, service_access_type);
}
// Do not flag prefetch faults as fatal unless something fatal happened
if (status == NV_ERR_INVALID_ADDRESS) {
if (current_entry->fault_access_type != UVM_FAULT_ACCESS_TYPE_PREFETCH) {
current_entry->is_fatal = true;
current_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
// Compute cancel mode for replayable faults
if (current_entry->is_replayable) {
if (service_access_type == UVM_FAULT_ACCESS_TYPE_READ || in_gmmu_region)
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
else
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_WRITE_AND_ATOMIC;
// If there are pending read accesses on the same page, we have to
// service them before we can cancel the write/atomic faults. So we
// retry with read fault access type.
if (!in_gmmu_region &&
current_entry->fault_access_type > UVM_FAULT_ACCESS_TYPE_READ &&
uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ)) {
status = uvm_ats_service_fault(gpu_va_space,
current_entry->fault_address,
UVM_FAULT_ACCESS_TYPE_READ);
// If read accesses are also invalid, cancel the fault. If a
// different error code is returned, exit
if (status == NV_ERR_INVALID_ADDRESS)
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
else if (status != NV_OK)
return status;
}
}
}
else {
current_entry->is_invalid_prefetch = true;
}
// Do not fail overall fault servicing due to logical errors
status = NV_OK;
}
// The Linux kernel never invalidates TLB entries on mapping permission
// upgrade. This is a problem if the GPU has cached entries with the old
// permission. The GPU will re-fetch the entry if the PTE is invalid and
// page size is not 4K (this is the case on P9). However, if a page gets
// upgraded from R/O to R/W and GPU has the PTEs cached with R/O
// permissions we will enter an infinite loop because we just forward the
// fault to the Linux kernel and it will see that the permissions in the
// page table are correct. Therefore, we flush TLB entries on ATS write
// faults.
if (!current_entry->is_fatal && current_entry->fault_access_type > UVM_FAULT_ACCESS_TYPE_READ) {
if (!ats_invalidate->write_faults_in_batch) {
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->write_faults_tlb_batch);
ats_invalidate->write_faults_in_batch = true;
}
uvm_tlb_batch_invalidate(&ats_invalidate->write_faults_tlb_batch,
current_entry->fault_address,
PAGE_SIZE,
PAGE_SIZE,
UVM_MEMBAR_NONE);
}
return status;
}
NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
uvm_ats_fault_invalidate_t *ats_invalidate,
uvm_tracker_t *out_tracker)
{
NV_STATUS status;
uvm_push_t push;
if (!ats_invalidate->write_faults_in_batch)
return NV_OK;
UVM_ASSERT(gpu_va_space);
UVM_ASSERT(gpu_va_space->ats.enabled);
status = uvm_push_begin(gpu_va_space->gpu->channel_manager,
UVM_CHANNEL_TYPE_MEMOPS,
&push,
"Invalidate ATS entries");
if (status == NV_OK) {
uvm_tlb_batch_end(&ats_invalidate->write_faults_tlb_batch, &push, UVM_MEMBAR_NONE);
uvm_push_end(&push);
// Add this push to the GPU's tracker so that fault replays/clears can
// wait on it
status = uvm_tracker_add_push_safe(out_tracker, &push);
}
ats_invalidate->write_faults_in_batch = false;
return status;
}

View File

@@ -0,0 +1,47 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_linux.h"
#include "uvm_forward_decl.h"
#include "uvm_lock.h"
#include "uvm_global.h"
#include "uvm_va_space.h"
NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
uvm_fault_buffer_entry_t *current_entry,
uvm_ats_fault_invalidate_t *ats_invalidate);
// This function performs pending TLB invalidations for ATS and clears the
// ats_invalidate->write_faults_in_batch flag
NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
uvm_ats_fault_invalidate_t *ats_invalidate,
uvm_tracker_t *out_tracker);
static bool uvm_ats_can_service_faults(uvm_gpu_va_space_t *gpu_va_space, struct mm_struct *mm)
{
if (mm)
uvm_assert_mmap_lock_locked(mm);
if (gpu_va_space->ats.enabled)
UVM_ASSERT(g_uvm_global.ats.enabled);
return gpu_va_space->ats.enabled && mm;
}

View File

@@ -0,0 +1,715 @@
/*******************************************************************************
Copyright (c) 2018-2019 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_api.h"
#include "uvm_lock.h"
#include "uvm_kvmalloc.h"
#include "uvm_global.h"
#include "uvm_va_space.h"
#include "uvm_va_space_mm.h"
#include "uvm_ats_ibm.h"
#include "uvm_common.h"
#include <linux/pci.h>
#if UVM_IBM_NPU_SUPPORTED()
#include <linux/of.h>
#include <linux/sizes.h>
#include <asm/pci-bridge.h>
#include <asm/io.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#define NPU_ATSD_REG_MAP_SIZE 32
// There are three 8-byte registers in each ATSD mapping:
#define NPU_ATSD_REG_LAUNCH 0
#define NPU_ATSD_REG_AVA 1
#define NPU_ATSD_REG_STAT 2
// Fields within the NPU_ATSD_REG_LAUNCH register:
// "PRS" (process-scoped) bit. 1 means to limit invalidates to the specified
// PASID.
#define NPU_ATSD_REG_LAUNCH_PASID_ENABLE 13
// "PID" field. This specifies the PASID target of the invalidate.
#define NPU_ATSD_REG_LAUNCH_PASID_VAL 38
// "IS" bit. 0 means the specified virtual address range will be invalidated. 1
// means all entries will be invalidated.
#define NPU_ATSD_REG_LAUNCH_INVAL_ALL 12
// "AP" field. This encodes the size of a range-based invalidate.
#define NPU_ATSD_REG_LAUNCH_INVAL_SIZE 17
// "No flush" bit. 0 will trigger a flush (membar) from the GPU following the
// invalidate, 1 will not.
#define NPU_ATSD_REG_LAUNCH_FLUSH_DISABLE 39
// Helper to iterate over the active NPUs in the given VA space (all NPUs with
// GPUs that have GPU VA spaces registered in this VA space).
#define for_each_npu_index_in_va_space(npu_index, va_space) \
for (({uvm_assert_rwlock_locked(&(va_space)->ats.ibm.rwlock); \
(npu_index) = find_first_bit((va_space)->ats.ibm.npu_active_mask, NV_MAX_NPUS);}); \
(npu_index) < NV_MAX_NPUS; \
(npu_index) = find_next_bit((va_space)->ats.ibm.npu_active_mask, NV_MAX_NPUS, (npu_index) + 1))
// An invalidate requires operating on one set of registers in each NPU. This
// struct tracks which register set (id) is in use per NPU for a given
// operation.
typedef struct
{
NvU8 ids[NV_MAX_NPUS];
} uvm_atsd_regs_t;
// Get the index of the input npu pointer within UVM's global npus array
static size_t uvm_ibm_npu_index(uvm_ibm_npu_t *npu)
{
size_t npu_index = npu - &g_uvm_global.npus[0];
UVM_ASSERT(npu_index < ARRAY_SIZE(g_uvm_global.npus));
return npu_index;
}
// Find an existing NPU matching pci_domain, or return an empty NPU slot if none
// is found. Returns NULL if no slots are available.
static uvm_ibm_npu_t *uvm_ibm_npu_find(int pci_domain)
{
size_t i;
uvm_ibm_npu_t *npu, *first_free = NULL;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
for (i = 0; i < ARRAY_SIZE(g_uvm_global.npus); i++) {
npu = &g_uvm_global.npus[i];
if (npu->num_retained_gpus == 0) {
if (!first_free)
first_free = npu;
}
else if (npu->pci_domain == pci_domain) {
return npu;
}
}
return first_free;
}
static void uvm_ibm_npu_destroy(uvm_ibm_npu_t *npu)
{
size_t i;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
UVM_ASSERT(npu->num_retained_gpus == 0);
UVM_ASSERT(bitmap_empty(npu->atsd_regs.locks, UVM_MAX_ATSD_REGS));
for (i = 0; i < npu->atsd_regs.count; i++) {
UVM_ASSERT(npu->atsd_regs.io_addrs[i]);
iounmap(npu->atsd_regs.io_addrs[i]);
}
memset(npu, 0, sizeof(*npu));
}
static NV_STATUS uvm_ibm_npu_init(uvm_ibm_npu_t *npu, struct pci_dev *npu_dev)
{
struct pci_controller *hose;
size_t i, reg_count, reg_size = sizeof(npu->atsd_regs.io_addrs[0]);
int ret;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
UVM_ASSERT(npu->num_retained_gpus == 0);
UVM_ASSERT(bitmap_empty(npu->atsd_regs.locks, UVM_MAX_ATSD_REGS));
npu->pci_domain = pci_domain_nr(npu_dev->bus);
if (!UVM_ATS_IBM_SUPPORTED_IN_DRIVER())
return NV_OK;
hose = pci_bus_to_host(npu_dev->bus);
ret = of_property_count_elems_of_size(hose->dn, "ibm,mmio-atsd", reg_size);
if (ret < 0) {
UVM_ERR_PRINT("Failed to query NPU %d ATSD register count: %d\n", npu->pci_domain, ret);
return errno_to_nv_status(ret);
}
// For ATS to be enabled globally, we must have NPU ATSD registers
reg_count = ret;
if (reg_count == 0 || reg_count > UVM_MAX_ATSD_REGS) {
UVM_ERR_PRINT("NPU %d has invalid ATSD register count: %zu\n", npu->pci_domain, reg_count);
return NV_ERR_INVALID_STATE;
}
// Map the ATSD registers
for (i = 0; i < reg_count; i++) {
u64 phys_addr;
__be64 __iomem *io_addr;
ret = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", i, &phys_addr);
UVM_ASSERT(ret == 0);
io_addr = ioremap(phys_addr, NPU_ATSD_REG_MAP_SIZE);
if (!io_addr) {
uvm_ibm_npu_destroy(npu);
return NV_ERR_NO_MEMORY;
}
npu->atsd_regs.io_addrs[npu->atsd_regs.count++] = io_addr;
}
return NV_OK;
}
NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu)
{
struct pci_dev *npu_dev = pnv_pci_get_npu_dev(parent_gpu->pci_dev, 0);
uvm_ibm_npu_t *npu;
NV_STATUS status;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
if (!npu_dev)
return NV_OK;
npu = uvm_ibm_npu_find(pci_domain_nr(npu_dev->bus));
if (!npu) {
// If this happens then we can't support the system configuation until
// NV_MAX_NPUS is updated. Return the same error as when the number of
// GPUs exceeds UVM_MAX_GPUS.
UVM_ERR_PRINT("No more NPU slots available, update NV_MAX_NPUS\n");
return NV_ERR_INSUFFICIENT_RESOURCES;
}
if (npu->num_retained_gpus == 0) {
status = uvm_ibm_npu_init(npu, npu_dev);
if (status != NV_OK)
return status;
}
// This npu field could be read concurrently by a thread in the ATSD
// invalidate path. We don't need to provide ordering with those threads
// because those invalidates won't apply to the GPU being added until a GPU
// VA space on this GPU is registered.
npu->atsd_regs.num_membars = max(npu->atsd_regs.num_membars, parent_gpu->num_hshub_tlb_invalidate_membars);
parent_gpu->npu = npu;
++npu->num_retained_gpus;
return NV_OK;
}
void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu)
{
uvm_ibm_npu_t *npu = parent_gpu->npu;
uvm_parent_gpu_t *other_parent_gpu;
NvU32 num_membars_new = 0;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
if (!npu)
return;
UVM_ASSERT(npu->num_retained_gpus > 0);
if (--npu->num_retained_gpus == 0) {
uvm_ibm_npu_destroy(npu);
}
else {
// Re-calculate the membar count
for_each_parent_gpu(other_parent_gpu) {
// The current GPU being removed should've already been removed from
// the global list.
UVM_ASSERT(other_parent_gpu != parent_gpu);
if (other_parent_gpu->npu == npu)
num_membars_new = max(num_membars_new, other_parent_gpu->num_hshub_tlb_invalidate_membars);
}
UVM_ASSERT(num_membars_new > 0);
npu->atsd_regs.num_membars = num_membars_new;
}
}
#if UVM_ATS_IBM_SUPPORTED()
void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space)
{
uvm_ibm_va_space_t *ibm_va_space;
UVM_ASSERT(va_space);
ibm_va_space = &va_space->ats.ibm;
uvm_rwlock_irqsave_init(&ibm_va_space->rwlock, UVM_LOCK_ORDER_LEAF);
}
#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
static void npu_release_dummy(struct npu_context *npu_context, void *va_mm)
{
// See the comment on the call to pnv_npu2_init_context()
}
static NV_STATUS uvm_ats_ibm_register_gpu_va_space_kernel(uvm_gpu_va_space_t *gpu_va_space)
{
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
struct npu_context *npu_context;
// pnv_npu2_init_context() registers current->mm with
// mmu_notifier_register(). We need that to match the mm we passed to our
// own mmu_notifier_register() for this VA space.
if (current->mm != va_space->va_space_mm.mm)
return NV_ERR_NOT_SUPPORTED;
uvm_assert_mmap_lock_locked_write(current->mm);
uvm_assert_rwsem_locked_write(&va_space->lock);
// pnv_npu2_init_context() doesn't handle being called multiple times for
// the same GPU under the same mm, which could happen if multiple VA spaces
// are created in this process. To handle that we pass the VA space pointer
// as the callback parameter: the callback values are shared by all devices
// under this mm, so pnv_npu2_init_context() enforces that the values match
// the ones already registered to the mm.
//
// Otherwise we don't use the callback, since we have our own callback
// registered under the va_space_mm that will be called at the same point
// (mmu_notifier release).
npu_context = pnv_npu2_init_context(gpu_va_space->gpu->parent->pci_dev,
(MSR_DR | MSR_PR | MSR_HV),
npu_release_dummy,
va_space);
if (IS_ERR(npu_context)) {
int err = PTR_ERR(npu_context);
// We'll get -EINVAL if the callback value (va_space) differs from the
// one already registered to the npu_context associated with this mm.
// That can only happen when multiple VA spaces attempt registration
// within the same process, which is disallowed and should return
// NV_ERR_NOT_SUPPORTED.
if (err == -EINVAL)
return NV_ERR_NOT_SUPPORTED;
return errno_to_nv_status(err);
}
ibm_gpu_va_space->npu_context = npu_context;
return NV_OK;
}
static void uvm_ats_ibm_unregister_gpu_va_space_kernel(uvm_gpu_va_space_t *gpu_va_space)
{
uvm_gpu_va_space_state_t state;
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_ibm_va_space_t *ibm_va_space;
uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
if (!ibm_gpu_va_space->npu_context)
return;
// va_space is guaranteed to not be NULL if ibm_gpu_va_space->npu_context is
// not NULL.
UVM_ASSERT(va_space);
state = uvm_gpu_va_space_state(gpu_va_space);
UVM_ASSERT(state == UVM_GPU_VA_SPACE_STATE_INIT || state == UVM_GPU_VA_SPACE_STATE_DEAD);
ibm_va_space = &va_space->ats.ibm;
// pnv_npu2_destroy_context() may in turn call mmu_notifier_unregister().
// If uvm_va_space_mm_shutdown() is concurrently executing in another
// thread, mmu_notifier_unregister() will wait for
// uvm_va_space_mm_shutdown() to finish. uvm_va_space_mm_shutdown() takes
// mmap_lock and the VA space lock, so we can't be holding those locks on
// this path.
uvm_assert_unlocked_order(UVM_LOCK_ORDER_MMAP_LOCK);
uvm_assert_unlocked_order(UVM_LOCK_ORDER_VA_SPACE);
pnv_npu2_destroy_context(ibm_gpu_va_space->npu_context, gpu_va_space->gpu->parent->pci_dev);
ibm_gpu_va_space->npu_context = NULL;
}
#else
static void uvm_ats_ibm_register_gpu_va_space_driver(uvm_gpu_va_space_t *gpu_va_space)
{
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
uvm_gpu_t *gpu = gpu_va_space->gpu;
size_t npu_index = uvm_ibm_npu_index(gpu->parent->npu);
uvm_ibm_va_space_t *ibm_va_space;
UVM_ASSERT(va_space);
ibm_va_space = &va_space->ats.ibm;
uvm_assert_rwsem_locked_write(&va_space->lock);
uvm_write_lock_irqsave(&ibm_va_space->rwlock);
// If this is the first GPU VA space to use this NPU in the VA space, mark
// the NPU as active so invalidates are issued to it.
if (ibm_va_space->npu_ref_counts[npu_index] == 0) {
// If this is the first active NPU in the entire VA space, we have to
// tell the kernel to send TLB invalidations to the IOMMU. See kernel
// commit 03b8abedf4f4965e7e9e0d4f92877c42c07ce19f for background.
//
// This is safe to do without holding mm_users high or mmap_lock.
if (bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS))
mm_context_add_copro(va_space->va_space_mm.mm);
UVM_ASSERT(!test_bit(npu_index, ibm_va_space->npu_active_mask));
__set_bit(npu_index, ibm_va_space->npu_active_mask);
}
else {
UVM_ASSERT(test_bit(npu_index, ibm_va_space->npu_active_mask));
}
++ibm_va_space->npu_ref_counts[npu_index];
// As soon as this lock is dropped, invalidates on this VA space's mm may
// begin issuing ATSDs to this NPU.
uvm_write_unlock_irqrestore(&ibm_va_space->rwlock);
ibm_gpu_va_space->did_ibm_driver_init = true;
}
static void uvm_ats_ibm_unregister_gpu_va_space_driver(uvm_gpu_va_space_t *gpu_va_space)
{
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_gpu_t *gpu = gpu_va_space->gpu;
size_t npu_index = uvm_ibm_npu_index(gpu->parent->npu);
bool do_remove = false;
uvm_ibm_va_space_t *ibm_va_space;
uvm_ibm_gpu_va_space_t *ibm_gpu_va_space = &gpu_va_space->ats.ibm;
if (!ibm_gpu_va_space->did_ibm_driver_init)
return;
UVM_ASSERT(va_space);
ibm_va_space = &va_space->ats.ibm;
// Note that we aren't holding the VA space lock here, so another thread
// could be in uvm_ats_ibm_register_gpu_va_space() for this same GPU right
// now. The write lock and ref counts below will handle that case.
// Once we return from this function with a bit cleared in the
// npu_active_mask, we have to guarantee that this VA space no longer
// accesses that NPU's ATSD registers. This is needed in case GPU unregister
// needs to unmap those registers. We use the reader/writer lock to
// guarantee this, which means that invalidations must not access the ATSD
// registers outside of the lock.
//
// Future work: if we could synchronize_srcu() on the mmu_notifier SRCU we
// might do that here instead to flush out all invalidates. That would allow
// us to avoid taking a read lock in the invalidate path, though we'd have
// to be careful when clearing the mask bit relative to the synchronize, and
// we'd have to be careful in cases where this thread doesn't hold a
// reference to mm_users.
uvm_write_lock_irqsave(&ibm_va_space->rwlock);
UVM_ASSERT(ibm_va_space->npu_ref_counts[npu_index] > 0);
UVM_ASSERT(test_bit(npu_index, ibm_va_space->npu_active_mask));
--ibm_va_space->npu_ref_counts[npu_index];
if (ibm_va_space->npu_ref_counts[npu_index] == 0) {
__clear_bit(npu_index, ibm_va_space->npu_active_mask);
if (bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS))
do_remove = true;
}
uvm_write_unlock_irqrestore(&ibm_va_space->rwlock);
if (do_remove) {
// mm_context_remove_copro() must be called outside of the spinlock
// because it may issue invalidates across CPUs in this mm. The
// coprocessor count is atomically refcounted by that function, so it's
// safe to call here even if another thread jumps in with a register and
// calls mm_context_add_copro() between this thread's unlock and this
// call.
UVM_ASSERT(va_space->va_space_mm.mm);
mm_context_remove_copro(va_space->va_space_mm.mm);
}
}
#endif // UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
static mm_context_id_t va_space_pasid(uvm_va_space_t *va_space)
{
struct mm_struct *mm = va_space->va_space_mm.mm;
UVM_ASSERT(mm);
return mm->context.id;
}
NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
{
uvm_va_space_t *va_space = gpu_va_space->va_space;
NV_STATUS status = NV_OK;
UVM_ASSERT(gpu_va_space->ats.enabled);
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_INIT);
UVM_ASSERT(va_space->va_space_mm.mm);
uvm_assert_rwsem_locked_write(&va_space->lock);
#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
status = uvm_ats_ibm_register_gpu_va_space_kernel(gpu_va_space);
#else
uvm_ats_ibm_register_gpu_va_space_driver(gpu_va_space);
#endif
gpu_va_space->ats.pasid = (NvU32) va_space_pasid(gpu_va_space->va_space);
return status;
}
void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
{
#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
uvm_ats_ibm_unregister_gpu_va_space_kernel(gpu_va_space);
#else
uvm_ats_ibm_unregister_gpu_va_space_driver(gpu_va_space);
#endif
gpu_va_space->ats.pasid = -1U;
}
#if UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
// Find any available ATSD register set in this NPU and return that index. This
// will busy wait until a register set is free.
static NvU8 atsd_reg_acquire(uvm_ibm_npu_t *npu)
{
uvm_spin_loop_t spin;
size_t i;
bool first = true;
while (1) {
// Using for_each_clear_bit is racy, since the bits could change at any
// point. That's ok since we'll either just retry or use a real atomic
// to lock the bit. Checking for clear bits first avoids spamming
// atomics in the contended case.
for_each_clear_bit(i, npu->atsd_regs.locks, npu->atsd_regs.count) {
if (!test_and_set_bit_lock(i, npu->atsd_regs.locks))
return (NvU8)i;
}
// Back off and try again, avoiding the overhead of initializing the
// tracking timers unless we need them.
if (first) {
uvm_spin_loop_init(&spin);
first = false;
}
else {
UVM_SPIN_LOOP(&spin);
}
}
}
static void atsd_reg_release(uvm_ibm_npu_t *npu, NvU8 reg)
{
UVM_ASSERT(reg < npu->atsd_regs.count);
UVM_ASSERT(test_bit(reg, npu->atsd_regs.locks));
clear_bit_unlock(reg, npu->atsd_regs.locks);
}
static __be64 atsd_reg_read(uvm_ibm_npu_t *npu, NvU8 reg, size_t offset)
{
__be64 __iomem *io_addr = npu->atsd_regs.io_addrs[reg] + offset;
UVM_ASSERT(reg < npu->atsd_regs.count);
return __raw_readq(io_addr);
}
static void atsd_reg_write(uvm_ibm_npu_t *npu, NvU8 reg, size_t offset, NvU64 val)
{
__be64 __iomem *io_addr = npu->atsd_regs.io_addrs[reg] + offset;
UVM_ASSERT(reg < npu->atsd_regs.count);
__raw_writeq_be(val, io_addr);
}
// Acquire a set of registers in each NPU which is active in va_space
static void atsd_regs_acquire(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
{
size_t i;
for_each_npu_index_in_va_space(i, va_space)
regs->ids[i] = atsd_reg_acquire(&g_uvm_global.npus[i]);
}
static void atsd_regs_release(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
{
size_t i;
for_each_npu_index_in_va_space(i, va_space)
atsd_reg_release(&g_uvm_global.npus[i], regs->ids[i]);
}
// Write the provided value to each NPU active in va_space at the provided
// register offset.
static void atsd_regs_write(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, size_t offset, NvU64 val)
{
size_t i;
for_each_npu_index_in_va_space(i, va_space)
atsd_reg_write(&g_uvm_global.npus[i], regs->ids[i], offset, val);
}
// Wait for all prior operations issued to active NPUs in va_space on the given
// registers to finish.
static void atsd_regs_wait(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
{
uvm_spin_loop_t spin;
size_t i;
for_each_npu_index_in_va_space(i, va_space) {
UVM_SPIN_WHILE(atsd_reg_read(&g_uvm_global.npus[i], regs->ids[i], NPU_ATSD_REG_STAT), &spin)
;
}
}
// Encode an invalidate targeting the given pasid and the given size for the
// NPU_ATSD_REG_LAUNCH register. The target address is encoded separately.
//
// psize must be one of the MMU_PAGE_* values defined in powerpc's asm/mmu.h. A
// psize of MMU_PAGE_COUNT means to invalidate the entire address space.
static NvU64 atsd_get_launch_val(mm_context_id_t pasid, int psize)
{
NvU64 val = 0;
val |= PPC_BIT(NPU_ATSD_REG_LAUNCH_PASID_ENABLE);
val |= pasid << PPC_BITLSHIFT(NPU_ATSD_REG_LAUNCH_PASID_VAL);
if (psize == MMU_PAGE_COUNT) {
val |= PPC_BIT(NPU_ATSD_REG_LAUNCH_INVAL_ALL);
}
else {
// The NPU registers do not support arbitrary sizes
UVM_ASSERT(psize == MMU_PAGE_64K || psize == MMU_PAGE_2M || psize == MMU_PAGE_1G);
val |= (NvU64)mmu_get_ap(psize) << PPC_BITLSHIFT(NPU_ATSD_REG_LAUNCH_INVAL_SIZE);
}
return val;
}
// Return the encoded size to use for an ATSD targeting the given range, in one
// of the MMU_PAGE_* values defined in powerpc's asm/mmu.h. A return value of
// MMU_PAGE_COUNT means the entire address space must be invalidated.
//
// start is an in/out parameter. On return start will be set to the aligned
// starting address to use for the ATSD. end is inclusive.
static int atsd_calc_size(NvU64 *start, NvU64 end)
{
// ATSDs have high latency, so we prefer to over-invalidate rather than
// issue multiple precise invalidates. Supported sizes are only 64K, 2M, and
// 1G.
*start = UVM_ALIGN_DOWN(*start, SZ_64K);
end = UVM_ALIGN_DOWN(end, SZ_64K);
if (*start == end)
return MMU_PAGE_64K;
*start = UVM_ALIGN_DOWN(*start, SZ_2M);
end = UVM_ALIGN_DOWN(end, SZ_2M);
if (*start == end)
return MMU_PAGE_2M;
*start = UVM_ALIGN_DOWN(*start, SZ_1G);
end = UVM_ALIGN_DOWN(end, SZ_1G);
if (*start == end)
return MMU_PAGE_1G;
return MMU_PAGE_COUNT;
}
// Issue an ATSD to all NPUs and wait for completion
static void atsd_launch_wait(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, NvU64 val)
{
atsd_regs_write(va_space, regs, NPU_ATSD_REG_LAUNCH, val);
atsd_regs_wait(va_space, regs);
}
// Issue and wait for the required membars following an invalidate
static void atsd_issue_membars(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
{
size_t i;
NvU32 num_membars = 0;
// These membars are issued using ATSDs which target a reserved PASID of 0.
// That PASID is valid on the GPU in order for the membar to be valid, but
// 0 will never be used by the kernel for an actual address space so the
// ATSD won't actually invalidate any entries.
NvU64 val = atsd_get_launch_val(0, MMU_PAGE_COUNT);
for_each_npu_index_in_va_space(i, va_space) {
uvm_ibm_npu_t *npu = &g_uvm_global.npus[i];
num_membars = max(num_membars, npu->atsd_regs.num_membars);
}
for (i = 0; i < num_membars; i++)
atsd_launch_wait(va_space, regs, val);
}
static void uvm_ats_ibm_invalidate_all(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs)
{
NvU64 val = atsd_get_launch_val(va_space_pasid(va_space), MMU_PAGE_COUNT);
atsd_launch_wait(va_space, regs, val);
atsd_issue_membars(va_space, regs);
}
static void uvm_ats_ibm_invalidate_range(uvm_va_space_t *va_space, uvm_atsd_regs_t *regs, NvU64 start, int psize)
{
NvU64 val = atsd_get_launch_val(va_space_pasid(va_space), psize);
// Barriers are expensive, so write all address registers first then do a
// single barrier for all of them.
atsd_regs_write(va_space, regs, NPU_ATSD_REG_AVA, start);
eieio();
atsd_launch_wait(va_space, regs, val);
atsd_issue_membars(va_space, regs);
}
#endif // UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
{
#if UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
unsigned long irq_flags;
uvm_atsd_regs_t regs;
NvU64 atsd_start = start;
int psize = atsd_calc_size(&atsd_start, end);
uvm_ibm_va_space_t *ibm_va_space = &va_space->ats.ibm;
BUILD_BUG_ON(order_base_2(UVM_MAX_ATSD_REGS) > 8*sizeof(regs.ids[0]));
// We must hold this lock in at least read mode when accessing NPU
// registers. See the comment in uvm_ats_ibm_unregister_gpu_va_space_driver.
uvm_read_lock_irqsave(&ibm_va_space->rwlock, irq_flags);
if (!bitmap_empty(ibm_va_space->npu_active_mask, NV_MAX_NPUS)) {
atsd_regs_acquire(va_space, &regs);
if (psize == MMU_PAGE_COUNT)
uvm_ats_ibm_invalidate_all(va_space, &regs);
else
uvm_ats_ibm_invalidate_range(va_space, &regs, atsd_start, psize);
atsd_regs_release(va_space, &regs);
}
uvm_read_unlock_irqrestore(&ibm_va_space->rwlock, irq_flags);
#else
UVM_ASSERT_MSG(0, "This function should not be called on this kernel version\n");
#endif // UVM_ATS_IBM_SUPPORTED_IN_DRIVER()
}
#endif // UVM_ATS_IBM_SUPPORTED
#endif // UVM_IBM_NPU_SUPPORTED

View File

@@ -0,0 +1,266 @@
/*******************************************************************************
Copyright (c) 2018-2019 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_ATS_IBM_H__
#define __UVM_ATS_IBM_H__
#include "uvm_linux.h"
#include "uvm_forward_decl.h"
#include "uvm_hal_types.h"
#if defined(NVCPU_PPC64LE) && defined(NV_PNV_PCI_GET_NPU_DEV_PRESENT)
#include <asm/mmu.h>
#if defined(NV_MAX_NPUS)
#define UVM_IBM_NPU_SUPPORTED() 1
#else
#define UVM_IBM_NPU_SUPPORTED() 0
#endif
#else
#define UVM_IBM_NPU_SUPPORTED() 0
#endif
#if defined(NV_ASM_OPAL_API_H_PRESENT)
// For OPAL_NPU_INIT_CONTEXT
#include <asm/opal-api.h>
#endif
// Timeline of kernel changes:
//
// 0) Before 1ab66d1fbadad86b1f4a9c7857e193af0ee0022c
// - No NPU-ATS code existed, nor did the OPAL_NPU_INIT_CONTEXT firmware
// call.
// - NV_PNV_NPU2_INIT_CONTEXT_PRESENT Not defined
// - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID Not defined
// - OPAL_NPU_INIT_CONTEXT Not defined
// - ATS support type None
//
// 1) NPU ATS code added: 1ab66d1fbadad86b1f4a9c7857e193af0ee0022c, v4.12
// (2017-04-03)
// - This commit added initial support for NPU ATS, including the necessary
// OPAL firmware calls. This support was developmental and required
// several bug fixes before it could be used in production.
// - NV_PNV_NPU2_INIT_CONTEXT_PRESENT Defined
// - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID Not defined
// - OPAL_NPU_INIT_CONTEXT Defined
// - ATS support type None
//
// 2) NPU ATS code fixed: a1409adac748f0db655e096521bbe6904aadeb98, v4.17
// (2018-04-11)
// - This commit changed the function signature for pnv_npu2_init_context's
// callback parameter. Since all required bug fixes went in prior to this
// change, we can use the callback signature as a flag to indicate
// whether the PPC arch layer in the kernel supports ATS in production.
// - NV_PNV_NPU2_INIT_CONTEXT_PRESENT Defined
// - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID Defined
// - OPAL_NPU_INIT_CONTEXT Defined
// - ATS support type Kernel
//
// 3) NPU ATS code removed: 7eb3cf761927b2687164e182efa675e6c09cfe44, v5.3
// (2019-06-25)
// - This commit removed NPU-ATS support from the PPC arch layer, so the
// driver needs to handle things instead. pnv_npu2_init_context is no
// longer present, so we use OPAL_NPU_INIT_CONTEXT to differentiate
// between this state and scenario #0.
// - NV_PNV_NPU2_INIT_CONTEXT_PRESENT Not defined
// - NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID Not defined
// - OPAL_NPU_INIT_CONTEXT Defined
// - ATS support type Driver
//
#if defined(NV_PNV_NPU2_INIT_CONTEXT_CALLBACK_RETURNS_VOID)
#define UVM_ATS_IBM_SUPPORTED_IN_KERNEL() 1
#define UVM_ATS_IBM_SUPPORTED_IN_DRIVER() 0
#elif !defined(NV_PNV_NPU2_INIT_CONTEXT_PRESENT) && defined(OPAL_NPU_INIT_CONTEXT) && UVM_CAN_USE_MMU_NOTIFIERS()
#define UVM_ATS_IBM_SUPPORTED_IN_KERNEL() 0
#define UVM_ATS_IBM_SUPPORTED_IN_DRIVER() 1
#else
#define UVM_ATS_IBM_SUPPORTED_IN_KERNEL() 0
#define UVM_ATS_IBM_SUPPORTED_IN_DRIVER() 0
#endif
#define UVM_ATS_IBM_SUPPORTED() (UVM_ATS_IBM_SUPPORTED_IN_KERNEL() || UVM_ATS_IBM_SUPPORTED_IN_DRIVER())
// Maximum number of parallel ATSD register sets per NPU
#define UVM_MAX_ATSD_REGS 16
typedef struct
{
#if UVM_IBM_NPU_SUPPORTED()
// These are the active NPUs in this VA space, that is, all NPUs with
// GPUs that have GPU VA spaces registered in this VA space.
//
// If a bit is clear in npu_active_mask then the corresponding entry of
// npu_ref_counts is 0. If a bit is set then the corresponding entry of
// npu_ref_counts is greater than 0.
NvU32 npu_ref_counts[NV_MAX_NPUS];
DECLARE_BITMAP(npu_active_mask, NV_MAX_NPUS);
#endif
// Lock protecting npu_ref_counts and npu_active_mask. Invalidations
// take this lock for read. GPU VA space register and unregister take
// this lock for write. Since all invalidations take the lock for read
// for the duration of the invalidate, taking the lock for write also
// flushes all invalidates.
//
// This is a spinlock because the invalidation code paths may be called
// with interrupts disabled, so those paths can't take the VA space
// lock. We could use a normal exclusive spinlock instead, but a reader/
// writer lock is preferred to allow concurrent invalidates in the same
// VA space.
uvm_rwlock_irqsave_t rwlock;
} uvm_ibm_va_space_t;
typedef struct
{
#if UVM_ATS_IBM_SUPPORTED_IN_KERNEL()
struct npu_context *npu_context;
#endif
// Used on the teardown path to know what to clean up. npu_context acts
// as the equivalent flag for kernel-provided support.
bool did_ibm_driver_init;
} uvm_ibm_gpu_va_space_t;
struct uvm_ibm_npu_struct
{
// Number of retained GPUs under this NPU. The other fields in this struct
// are only valid if this is non-zero.
unsigned int num_retained_gpus;
// PCI domain containing this NPU. This acts as a unique system-wide ID for
// this UVM NPU.
int pci_domain;
// The ATS-related fields are only valid when ATS support is enabled and
// UVM_ATS_IBM_SUPPORTED_IN_DRIVER() is 1.
struct
{
// Mapped addresses of the ATSD trigger registers. There may be more
// than one set of identical registers per NPU to enable concurrent
// invalidates.
//
// These will not be accessed unless there is a GPU VA space registered
// on a GPU under this NPU. They are protected by bit locks in the locks
// field.
__be64 __iomem *io_addrs[UVM_MAX_ATSD_REGS];
// Actual number of registers in the io_addrs array
size_t count;
// Bitmask for allocation and locking of the registers. Bit index n
// corresponds to io_addrs[n]. A set bit means that index is in use
// (locked).
DECLARE_BITMAP(locks, UVM_MAX_ATSD_REGS);
// Max value of any uvm_parent_gpu_t::num_hshub_tlb_invalidate_membars
// for all retained GPUs under this NPU.
NvU32 num_membars;
} atsd_regs;
};
#if UVM_IBM_NPU_SUPPORTED()
NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu);
void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu);
#else
static NV_STATUS uvm_ats_ibm_add_gpu(uvm_parent_gpu_t *parent_gpu)
{
return NV_OK;
}
static void uvm_ats_ibm_remove_gpu(uvm_parent_gpu_t *parent_gpu)
{
}
#endif // UVM_IBM_NPU_SUPPORTED
#if UVM_ATS_IBM_SUPPORTED()
// Initializes IBM specific GPU state.
//
// LOCKING: None
void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space);
// Enables ATS access for the gpu_va_space on the mm_struct associated with
// the VA space (va_space_mm).
//
// If UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1, NV_ERR_NOT_SUPPORTED is
// returned if current->mm does not match va_space_mm.mm or if a GPU VA
// space within another VA space has already called this function on the
// same mm.
//
// If UVM_ATS_IBM_SUPPORTED_IN_DRIVER() is 1 there are no such restrictions.
//
// LOCKING: The VA space lock must be held in write mode.
// current->mm->mmap_lock must be held in write mode iff
// UVM_ATS_IBM_SUPPORTED_IN_KERNEL() is 1.
NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
// Disables ATS access for the gpu_va_space. Prior to calling this function,
// the caller must guarantee that the GPU will no longer make any ATS
// accesses in this GPU VA space, and that no ATS fault handling for this
// GPU will be attempted.
//
// LOCKING: This function may block on mmap_lock and the VA space lock, so
// neither must be held.
void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
// Synchronously invalidate ATS translations cached by GPU TLBs. The
// invalidate applies to all GPUs with active GPU VA spaces in va_space, and
// covers all pages touching any part of the given range. end is inclusive.
//
// GMMU translations in the given range are not guaranteed to be
// invalidated.
//
// LOCKING: No locks are required, but this function may be called with
// interrupts disabled.
void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end);
#else
static void uvm_ats_ibm_init_va_space(uvm_va_space_t *va_space)
{
}
static NV_STATUS uvm_ats_ibm_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
{
return NV_OK;
}
static void uvm_ats_ibm_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
{
}
static void uvm_ats_ibm_invalidate(uvm_va_space_t *va_space, NvU64 start, NvU64 end)
{
}
#endif // UVM_ATS_IBM_SUPPORTED
static NV_STATUS uvm_ats_ibm_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
{
return NV_OK;
}
static void uvm_ats_ibm_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
{
}
#endif // __UVM_ATS_IBM_H__

View File

@@ -0,0 +1,680 @@
/*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_channel.h"
#include "uvm_global.h"
#include "uvm_hal.h"
#include "uvm_push.h"
#include "uvm_test.h"
#include "uvm_tracker.h"
#include "uvm_va_space.h"
#include "uvm_rm_mem.h"
#include "uvm_mem.h"
#define CE_TEST_MEM_SIZE (2 * 1024 * 1024)
#define CE_TEST_MEM_END_SIZE 32
#define CE_TEST_MEM_BEGIN_SIZE 32
#define CE_TEST_MEM_MIDDLE_SIZE (CE_TEST_MEM_SIZE - CE_TEST_MEM_BEGIN_SIZE - CE_TEST_MEM_END_SIZE)
#define CE_TEST_MEM_MIDDLE_OFFSET (CE_TEST_MEM_BEGIN_SIZE)
#define CE_TEST_MEM_END_OFFSET (CE_TEST_MEM_SIZE - CE_TEST_MEM_BEGIN_SIZE)
#define CE_TEST_MEM_COUNT 5
static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
{
NvU32 i;
NV_STATUS status;
uvm_rm_mem_t *mem[CE_TEST_MEM_COUNT] = { NULL };
uvm_rm_mem_t *host_mem = NULL;
NvU32 *host_ptr;
NvU64 host_mem_gpu_va, mem_gpu_va;
NvU64 dst_va;
NvU64 src_va;
uvm_push_t push;
bool is_proxy;
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, CE_TEST_MEM_SIZE, &host_mem);
TEST_CHECK_GOTO(status == NV_OK, done);
host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
memset(host_ptr, 0, CE_TEST_MEM_SIZE);
for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
status = uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, CE_TEST_MEM_SIZE, &mem[i]);
TEST_CHECK_GOTO(status == NV_OK, done);
}
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "Non-pipelined test");
TEST_CHECK_GOTO(status == NV_OK, done);
is_proxy = uvm_channel_is_proxy(push.channel);
host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, is_proxy);
// All of the following CE transfers are done from a single (L)CE and
// disabling pipelining is enough to order them when needed. Only push_end
// needs a MEMBAR SYS to order everything with the CPU.
// Initialize to a bad value
for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy);
uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
gpu->parent->ce_hal->memset_v_4(&push, mem_gpu_va, 1337 + i, CE_TEST_MEM_SIZE);
}
// Set the first buffer to 1
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
mem_gpu_va = uvm_rm_mem_get_gpu_va(mem[0], gpu, is_proxy);
gpu->parent->ce_hal->memset_v_4(&push, mem_gpu_va, 1, CE_TEST_MEM_SIZE);
for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
NvU32 dst = i + 1;
if (dst == CE_TEST_MEM_COUNT)
dst_va = host_mem_gpu_va;
else
dst_va = uvm_rm_mem_get_gpu_va(mem[dst], gpu, is_proxy);
src_va = uvm_rm_mem_get_gpu_va(mem[i], gpu, is_proxy);
// The first memcpy needs to be non-pipelined as otherwise the previous
// memset/memcpy to the source may not be done yet.
// Alternate the order of copying the beginning and the end
if (i % 2 == 0) {
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
gpu->parent->ce_hal->memcopy_v_to_v(&push, dst_va + CE_TEST_MEM_END_OFFSET, src_va + CE_TEST_MEM_END_OFFSET, CE_TEST_MEM_END_SIZE);
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
gpu->parent->ce_hal->memcopy_v_to_v(&push,
dst_va + CE_TEST_MEM_MIDDLE_OFFSET,
src_va + CE_TEST_MEM_MIDDLE_OFFSET,
CE_TEST_MEM_MIDDLE_SIZE);
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
gpu->parent->ce_hal->memcopy_v_to_v(&push, dst_va, src_va, CE_TEST_MEM_BEGIN_SIZE);
}
else {
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
gpu->parent->ce_hal->memcopy_v_to_v(&push, dst_va, src_va, CE_TEST_MEM_BEGIN_SIZE);
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
gpu->parent->ce_hal->memcopy_v_to_v(&push,
dst_va + CE_TEST_MEM_MIDDLE_OFFSET,
src_va + CE_TEST_MEM_MIDDLE_OFFSET,
CE_TEST_MEM_MIDDLE_SIZE);
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
gpu->parent->ce_hal->memcopy_v_to_v(&push,
dst_va + CE_TEST_MEM_END_OFFSET,
src_va + CE_TEST_MEM_END_OFFSET,
CE_TEST_MEM_END_SIZE);
}
}
status = uvm_push_end_and_wait(&push);
TEST_CHECK_GOTO(status == NV_OK, done);
for (i = 0; i < CE_TEST_MEM_SIZE / sizeof(NvU32); ++i) {
if (host_ptr[i] != 1) {
UVM_TEST_PRINT("host_ptr[%u] = %u instead of 1\n", i, host_ptr[i]);
status = NV_ERR_INVALID_STATE;
goto done;
}
}
done:
for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
uvm_rm_mem_free(mem[i]);
}
uvm_rm_mem_free(host_mem);
return status;
}
#define REDUCTIONS 32
static NV_STATUS test_membar(uvm_gpu_t *gpu)
{
NvU32 i;
NV_STATUS status;
uvm_rm_mem_t *host_mem = NULL;
NvU32 *host_ptr;
NvU64 host_mem_gpu_va;
uvm_push_t push;
NvU32 value;
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(NvU32), &host_mem);
TEST_CHECK_GOTO(status == NV_OK, done);
host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
*host_ptr = 0;
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Membar test");
TEST_CHECK_GOTO(status == NV_OK, done);
host_mem_gpu_va = uvm_rm_mem_get_gpu_va(host_mem, gpu, uvm_channel_is_proxy(push.channel));
for (i = 0; i < REDUCTIONS; ++i) {
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
gpu->parent->ce_hal->semaphore_reduction_inc(&push, host_mem_gpu_va, REDUCTIONS + 1);
}
// Without a sys membar the channel tracking semaphore can and does complete
// before all the reductions.
status = uvm_push_end_and_wait(&push);
TEST_CHECK_GOTO(status == NV_OK, done);
value = *host_ptr;
if (value != REDUCTIONS) {
UVM_TEST_PRINT("Value = %u instead of %u, GPU %s\n", value, REDUCTIONS, uvm_gpu_name(gpu));
status = NV_ERR_INVALID_STATE;
goto done;
}
done:
uvm_rm_mem_free(host_mem);
return status;
}
static void push_memset(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t element_size, size_t size)
{
switch (element_size) {
case 1:
uvm_push_get_gpu(push)->parent->ce_hal->memset_1(push, dst, (NvU8)value, size);
break;
case 4:
uvm_push_get_gpu(push)->parent->ce_hal->memset_4(push, dst, (NvU32)value, size);
break;
case 8:
uvm_push_get_gpu(push)->parent->ce_hal->memset_8(push, dst, value, size);
break;
default:
UVM_ASSERT(0);
}
}
static NV_STATUS test_unaligned_memset(uvm_gpu_t *gpu,
uvm_gpu_address_t gpu_verif_addr,
NvU8 *cpu_verif_addr,
size_t size,
size_t element_size,
size_t offset)
{
uvm_push_t push;
NV_STATUS status;
size_t i;
NvU64 value64 = (offset + 2) * (1ull << 32) + (offset + 1);
NvU64 test_value, expected_value = 0;
uvm_gpu_address_t dst;
// Copy a single element at an unaligned position and make sure it doesn't
// clobber anything else
TEST_CHECK_RET(gpu_verif_addr.address % element_size == 0);
TEST_CHECK_RET(offset + element_size <= size);
dst = gpu_verif_addr;
dst.address += offset;
memset(cpu_verif_addr, (NvU8)(~value64), size);
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push,
"memset_%zu offset %zu",
element_size, offset);
TEST_CHECK_RET(status == NV_OK);
push_memset(&push, dst, value64, element_size, element_size);
status = uvm_push_end_and_wait(&push);
TEST_CHECK_RET(status == NV_OK);
// Make sure all bytes of element are present
test_value = 0;
memcpy(&test_value, cpu_verif_addr + offset, element_size);
switch (element_size) {
case 1:
expected_value = (NvU8)value64;
break;
case 4:
expected_value = (NvU32)value64;
break;
case 8:
expected_value = value64;
break;
default:
UVM_ASSERT(0);
}
if (test_value != expected_value) {
UVM_TEST_PRINT("memset_%zu offset %zu failed, written value is 0x%llx instead of 0x%llx\n",
element_size, offset, test_value, expected_value);
return NV_ERR_INVALID_STATE;
}
// Make sure all other bytes are unchanged
for (i = 0; i < size; i++) {
if (i >= offset && i < offset + element_size)
continue;
if (cpu_verif_addr[i] != (NvU8)(~value64)) {
UVM_TEST_PRINT("memset_%zu offset %zu failed, immutable byte %zu changed value from 0x%x to 0x%x\n",
element_size, offset, i, (NvU8)(~value64),
cpu_verif_addr[i]);
return NV_ERR_INVALID_STATE;
}
}
return NV_OK;
}
static NV_STATUS test_memcpy_and_memset_inner(uvm_gpu_t *gpu,
uvm_gpu_address_t dst,
uvm_gpu_address_t src,
size_t size,
size_t element_size,
uvm_gpu_address_t gpu_verif_addr,
void *cpu_verif_addr,
int test_iteration)
{
uvm_push_t push;
size_t i;
const char *src_type = src.is_virtual ? "virtual" : "physical";
const char *src_loc = src.aperture == UVM_APERTURE_SYS ? "sysmem" : "vidmem";
const char *dst_type = dst.is_virtual ? "virtual" : "physical";
const char *dst_loc = dst.aperture == UVM_APERTURE_SYS ? "sysmem" : "vidmem";
NvU64 value64 = (test_iteration + 2) * (1ull << 32) + (test_iteration + 1);
NvU64 test_value = 0, expected_value = 0;
TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager,
UVM_CHANNEL_TYPE_GPU_INTERNAL,
&push,
"Memset %s %s (0x%llx) and memcopy to %s %s (0x%llx), iter %d",
src_type,
src_loc,
src.address,
dst_type,
dst_loc,
dst.address,
test_iteration));
// Waive if any of the input addresses is physical but the channel does not
// support physical addressing
if (!uvm_channel_is_privileged(push.channel) && (!dst.is_virtual || !src.is_virtual)) {
TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
return NV_OK;
}
// The input virtual addresses exist in UVM's internal address space, not
// the proxy address space
if (uvm_channel_is_proxy(push.channel)) {
TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
return NV_ERR_INVALID_STATE;
}
// Memset src with the appropriate element size, then memcpy to dst and from
// dst to the verif location (physical sysmem).
push_memset(&push, src, value64, element_size, size);
gpu->parent->ce_hal->memcopy(&push, dst, src, size);
gpu->parent->ce_hal->memcopy(&push, gpu_verif_addr, dst, size);
TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
for (i = 0; i < size / element_size; i++) {
switch (element_size) {
case 1:
expected_value = (NvU8)value64;
test_value = ((NvU8 *)cpu_verif_addr)[i];
break;
case 4:
expected_value = (NvU32)value64;
test_value = ((NvU32 *)cpu_verif_addr)[i];
break;
case 8:
expected_value = value64;
test_value = ((NvU64 *)cpu_verif_addr)[i];
break;
default:
UVM_ASSERT(0);
}
if (test_value != expected_value) {
UVM_TEST_PRINT("memset_%zu of %s %s and memcpy into %s %s failed, value[%zu] = 0x%llx instead of 0x%llx\n",
element_size, src_type, src_loc, dst_type, dst_loc,
i, test_value, expected_value);
return NV_ERR_INVALID_STATE;
}
}
return NV_OK;
}
static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
{
NV_STATUS status = NV_OK;
bool is_proxy_va_space;
uvm_gpu_address_t gpu_verif_addr;
void *cpu_verif_addr;
uvm_mem_t *verif_mem = NULL;
uvm_mem_t *sys_uvm_mem = NULL;
uvm_mem_t *gpu_uvm_mem = NULL;
uvm_rm_mem_t *sys_rm_mem = NULL;
uvm_rm_mem_t *gpu_rm_mem = NULL;
uvm_gpu_address_t gpu_addresses[4];
NvU64 gpu_va;
size_t size;
static const size_t element_sizes[] = {1, 4, 8};
const size_t iterations = 4;
size_t i, j, k, s;
uvm_mem_alloc_params_t mem_params = {0};
size = gpu->big_page.internal_size;
TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, &verif_mem), done);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, gpu), done);
gpu_verif_addr = uvm_mem_gpu_address_virtual_kernel(verif_mem, gpu);
cpu_verif_addr = uvm_mem_get_cpu_addr_kernel(verif_mem);
for (i = 0; i < iterations; ++i) {
for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
TEST_NV_CHECK_GOTO(test_unaligned_memset(gpu,
gpu_verif_addr,
cpu_verif_addr,
size,
element_sizes[s],
i),
done);
}
}
// Using a page size equal to the allocation size ensures that the UVM
// memories about to be allocated are physically contiguous. And since the
// size is a valid GPU page size, the memories can be virtually mapped on
// the GPU if needed.
mem_params.size = size;
mem_params.page_size = size;
mem_params.mm = current->mm;
// Physical address in sysmem
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &sys_uvm_mem), done);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(sys_uvm_mem, gpu), done);
gpu_addresses[0] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
// Physical address in vidmem
mem_params.backing_gpu = gpu;
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done);
gpu_addresses[1] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
// Virtual address (in UVM's internal address space) backed by vidmem
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, &gpu_rm_mem), done);
is_proxy_va_space = false;
gpu_va = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
gpu_addresses[2] = uvm_gpu_address_virtual(gpu_va);
// Virtual address (in UVM's internal address space) backed by sysmem
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, &sys_rm_mem), done);
gpu_va = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
gpu_addresses[3] = uvm_gpu_address_virtual(gpu_va);
for (i = 0; i < iterations; ++i) {
for (j = 0; j < ARRAY_SIZE(gpu_addresses); ++j) {
for (k = 0; k < ARRAY_SIZE(gpu_addresses); ++k) {
for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
gpu_addresses[k],
gpu_addresses[j],
size,
element_sizes[s],
gpu_verif_addr,
cpu_verif_addr,
i),
done);
}
}
}
}
done:
uvm_rm_mem_free(sys_rm_mem);
uvm_rm_mem_free(gpu_rm_mem);
uvm_mem_free(gpu_uvm_mem);
uvm_mem_free(sys_uvm_mem);
uvm_mem_free(verif_mem);
return status;
}
static NV_STATUS test_semaphore_alloc_sem(uvm_gpu_t *gpu, size_t size, uvm_mem_t **mem_out)
{
NvU64 gpu_va;
NV_STATUS status = NV_OK;
uvm_mem_t *mem = NULL;
TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, &mem));
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(mem, gpu), error);
gpu_va = uvm_mem_get_gpu_va_kernel(mem, gpu);
// This semaphore resides in the uvm_mem region, i.e., it has the GPU VA
// MSbit set. The intent is to validate semaphore operations when the
// semaphore's VA is in the high-end of the GPU effective virtual address
// space spectrum, i.e., its VA upper-bit is set.
TEST_CHECK_GOTO(gpu_va & (1ULL << (gpu->address_space_tree.hal->num_va_bits() - 1)), error);
*mem_out = mem;
return NV_OK;
error:
uvm_mem_free(mem);
return status;
}
// test_semaphore_reduction_inc is similar in concept to test_membar(). It uses
// uvm_mem (instead of uvm_rm_mem) as the semaphore, i.e., it assumes that the
// CE HAL has been validated, since uvm_mem needs the CE memset/memcopy to be
// operational as a pre-requisite for GPU PTE writes. The purpose of
// test_semaphore_reduction_inc is to validate the reduction inc operation on
// semaphores with their VA's upper-bit set.
static NV_STATUS test_semaphore_reduction_inc(uvm_gpu_t *gpu)
{
NV_STATUS status;
uvm_push_t push;
uvm_mem_t *mem;
NvU64 gpu_va;
NvU32 i;
NvU32 *host_ptr = NULL;
NvU32 value;
// Semaphore reduction needs 1 word (4 bytes).
const size_t size = sizeof(NvU32);
status = test_semaphore_alloc_sem(gpu, size, &mem);
TEST_CHECK_RET(status == NV_OK);
// Initialize the counter of reductions.
host_ptr = uvm_mem_get_cpu_addr_kernel(mem);
TEST_CHECK_GOTO(host_ptr != NULL, done);
*host_ptr = 0;
gpu_va = uvm_mem_get_gpu_va_kernel(mem, gpu);
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "semaphore_reduction_inc test");
TEST_CHECK_GOTO(status == NV_OK, done);
for (i = 0; i < REDUCTIONS; i++) {
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va, i+1);
}
status = uvm_push_end_and_wait(&push);
TEST_CHECK_GOTO(status == NV_OK, done);
value = *host_ptr;
if (value != REDUCTIONS) {
UVM_TEST_PRINT("Value = %u instead of %u, GPU %s\n", value, REDUCTIONS, uvm_gpu_name(gpu));
status = NV_ERR_INVALID_STATE;
goto done;
}
done:
uvm_mem_free(mem);
return status;
}
static NV_STATUS test_semaphore_release(uvm_gpu_t *gpu)
{
NV_STATUS status;
uvm_push_t push;
uvm_mem_t *mem;
NvU64 gpu_va;
NvU32 value;
NvU32 *host_ptr = NULL;
NvU32 payload = 0xA5A55A5A;
// Semaphore release needs 1 word (4 bytes).
const size_t size = sizeof(NvU32);
status = test_semaphore_alloc_sem(gpu, size, &mem);
TEST_CHECK_RET(status == NV_OK);
// Initialize the payload.
host_ptr = uvm_mem_get_cpu_addr_kernel(mem);
TEST_CHECK_GOTO(host_ptr != NULL, done);
*host_ptr = 0;
gpu_va = uvm_mem_get_gpu_va_kernel(mem, gpu);
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "semaphore_release test");
TEST_CHECK_GOTO(status == NV_OK, done);
gpu->parent->ce_hal->semaphore_release(&push, gpu_va, payload);
status = uvm_push_end_and_wait(&push);
TEST_CHECK_GOTO(status == NV_OK, done);
value = *host_ptr;
if (value != payload) {
UVM_TEST_PRINT("Semaphore payload = %u instead of %u, GPU %s\n", value, payload, uvm_gpu_name(gpu));
status = NV_ERR_INVALID_STATE;
goto done;
}
done:
uvm_mem_free(mem);
return status;
}
static NV_STATUS test_semaphore_timestamp(uvm_gpu_t *gpu)
{
NV_STATUS status;
uvm_push_t push;
uvm_mem_t *mem;
NvU64 gpu_va;
NvU32 i;
NvU64 *timestamp;
NvU64 last_timestamp = 0;
// 2 iterations:
// 1: compare retrieved timestamp with 0;
// 2: compare retrieved timestamp with previous timestamp (obtained in 1).
const NvU32 iterations = 2;
// The semaphore is 4 words long (16 bytes).
const size_t size = 16;
status = test_semaphore_alloc_sem(gpu, size, &mem);
TEST_CHECK_RET(status == NV_OK);
timestamp = uvm_mem_get_cpu_addr_kernel(mem);
TEST_CHECK_GOTO(timestamp != NULL, done);
memset(timestamp, 0, size);
// Shift the timestamp pointer to where the semaphore timestamp info is.
timestamp += 1;
gpu_va = uvm_mem_get_gpu_va_kernel(mem, gpu);
for (i = 0; i < iterations; i++) {
status = uvm_push_begin(gpu->channel_manager,
UVM_CHANNEL_TYPE_GPU_INTERNAL,
&push,
"semaphore_timestamp test, iter: %u",
i);
TEST_CHECK_GOTO(status == NV_OK, done);
gpu->parent->ce_hal->semaphore_timestamp(&push, gpu_va);
status = uvm_push_end_and_wait(&push);
TEST_CHECK_GOTO(status == NV_OK, done);
TEST_CHECK_GOTO(*timestamp != 0, done);
TEST_CHECK_GOTO(*timestamp >= last_timestamp, done);
last_timestamp = *timestamp;
}
done:
uvm_mem_free(mem);
return status;
}
static NV_STATUS test_ce(uvm_va_space_t *va_space, bool skipTimestampTest)
{
uvm_gpu_t *gpu;
for_each_va_space_gpu(gpu, va_space) {
TEST_NV_CHECK_RET(test_non_pipelined(gpu));
TEST_NV_CHECK_RET(test_membar(gpu));
TEST_NV_CHECK_RET(test_memcpy_and_memset(gpu));
TEST_NV_CHECK_RET(test_semaphore_reduction_inc(gpu));
TEST_NV_CHECK_RET(test_semaphore_release(gpu));
if (!skipTimestampTest)
TEST_NV_CHECK_RET(test_semaphore_timestamp(gpu));
}
return NV_OK;
}
NV_STATUS uvm_test_ce_sanity(UVM_TEST_CE_SANITY_PARAMS *params, struct file *filp)
{
NV_STATUS status;
uvm_va_space_t *va_space = uvm_va_space_get(filp);
uvm_va_space_down_read_rm(va_space);
status = test_ce(va_space, params->skipTimestampTest);
if (status != NV_OK)
goto done;
done:
uvm_va_space_up_read_rm(va_space);
return status;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,487 @@
/*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_CHANNEL_H__
#define __UVM_CHANNEL_H__
#include "nv_uvm_types.h"
#include "uvm_forward_decl.h"
#include "uvm_gpu_semaphore.h"
#include "uvm_pushbuffer.h"
#include "uvm_tracker.h"
//
// UVM channels
//
// A channel manager is created as part of the GPU addition. This involves
// creating channels for each of the supported types (uvm_channel_type_t) in
// separate channel pools possibly using different CE instances in the HW. Each
// channel has a uvm_gpu_tracking_semaphore_t and a set of uvm_gpfifo_entry_t
// (one per each HW GPFIFO entry) allowing to track completion of pushes on the
// channel.
//
// Beginning a push on a channel implies reserving a GPFIFO entry in that
// channel and hence there can only be as many on-going pushes per channel as
// there are free GPFIFO entries. This ensures that ending a push won't have to
// wait for a GPFIFO entry to free up.
//
// Channel types
typedef enum
{
// CPU to GPU copies
UVM_CHANNEL_TYPE_CPU_TO_GPU,
// GPU to CPU copies
UVM_CHANNEL_TYPE_GPU_TO_CPU,
// Memsets and copies within the GPU
UVM_CHANNEL_TYPE_GPU_INTERNAL,
// Memops and small memsets/copies for writing PTEs
UVM_CHANNEL_TYPE_MEMOPS,
// GPU to GPU peer copies
UVM_CHANNEL_TYPE_GPU_TO_GPU,
UVM_CHANNEL_TYPE_CE_COUNT,
// ^^^^^^
// Channel types backed by a CE.
UVM_CHANNEL_TYPE_COUNT = UVM_CHANNEL_TYPE_CE_COUNT,
} uvm_channel_type_t;
typedef enum
{
// A pool that contains CE channels owned by UVM.
UVM_CHANNEL_POOL_TYPE_CE = (1 << 0),
// A proxy pool contains only proxy channels, so it only exists in SR-IOV
// heavy. The pool is only used for UVM_CHANNEL_TYPE_MEMOPS pushes.
//
// A proxy channel is a privileged CE channel owned by the vGPU plugin. A
// proxy channel cannot be manipulated directly by the UVM driver, who
// instead can only submit work to it by invoking an RM API.
//
// There is a single proxy pool and channel per GPU.
UVM_CHANNEL_POOL_TYPE_CE_PROXY = (1 << 1),
UVM_CHANNEL_POOL_TYPE_COUNT = 2,
// A mask used to select pools of any type.
UVM_CHANNEL_POOL_TYPE_MASK = ((1U << UVM_CHANNEL_POOL_TYPE_COUNT) - 1)
} uvm_channel_pool_type_t;
struct uvm_gpfifo_entry_struct
{
// Offset of the pushbuffer in the pushbuffer allocation used by this entry
NvU32 pushbuffer_offset;
// Size of the pushbuffer used for this entry
NvU32 pushbuffer_size;
// List node used by the pushbuffer tracking
struct list_head pending_list_node;
// Channel tracking semaphore value that indicates completion of this entry
NvU64 tracking_semaphore_value;
// Push info for the pending push that used this GPFIFO entry
uvm_push_info_t *push_info;
};
// A channel pool is a set of channels that use the same engine. For example,
// all channels in a CE pool share the same (logical) Copy Engine.
typedef struct
{
// Owning channel manager
uvm_channel_manager_t *manager;
// Channels in this pool
uvm_channel_t *channels;
// Number of elements in the channel array
NvU32 num_channels;
// Index of the engine associated with the pool (index is an offset from the
// first engine of the same engine type.)
unsigned engine_index;
// Pool type: Refer to the uvm_channel_pool_type_t enum.
uvm_channel_pool_type_t pool_type;
// Lock protecting the state of channels in the pool
uvm_spinlock_t lock;
} uvm_channel_pool_t;
struct uvm_channel_struct
{
// Owning pool
uvm_channel_pool_t *pool;
// The channel name contains the CE index, and (for UVM internal channels)
// the HW runlist and channel IDs.
char name[64];
// Array of gpfifo entries, one per each HW GPFIFO
uvm_gpfifo_entry_t *gpfifo_entries;
// Number of GPFIFO entries in gpfifo_entries
NvU32 num_gpfifo_entries;
// Latest GPFIFO entry submitted to the GPU
// Updated when new pushes are submitted to the GPU in
// uvm_channel_end_push().
NvU32 cpu_put;
// Latest GPFIFO entry completed by the GPU
// Updated by uvm_channel_update_progress() after checking pending GPFIFOs
// for completion.
NvU32 gpu_get;
// Number of currently on-going pushes on this channel
// A new push is only allowed to begin on the channel if there is a free
// GPFIFO entry for it.
NvU32 current_pushes_count;
// Array of uvm_push_info_t for all pending pushes on the channel
uvm_push_info_t *push_infos;
// Array of uvm_push_acquire_info_t for all pending pushes on the channel.
// Each entry corresponds to the push_infos entry with the same index.
uvm_push_acquire_info_t *push_acquire_infos;
// List of uvm_push_info_entry_t that are currently available. A push info
// entry is not available if it has been assigned to a push
// (uvm_push_begin), and the GPFIFO entry associated with the push has not
// been marked as completed.
struct list_head available_push_infos;
// GPU tracking semaphore tracking the work in the channel
// Each push on the channel increments the semaphore, see
// uvm_channel_end_push().
uvm_gpu_tracking_semaphore_t tracking_sem;
// RM channel information
union
{
// UVM internal channels
struct
{
// UVM-RM interface handle
uvmGpuChannelHandle handle;
// Channel state populated by RM. Includes the GPFIFO, error
// notifier, work submission information etc.
UvmGpuChannelInfo channel_info;
};
// Proxy channels (SR-IOV heavy only)
struct
{
// UVM-RM interface handle
UvmGpuPagingChannelHandle handle;
// Channel state populated by RM. Includes the error notifier.
UvmGpuPagingChannelInfo channel_info;
} proxy;
};
struct
{
struct proc_dir_entry *dir;
struct proc_dir_entry *info;
struct proc_dir_entry *pushes;
} procfs;
// Information managed by the tools event notification mechanism. Mainly
// used to keep a list of channels with pending events, which is needed
// to collect the timestamps of asynchronous operations.
struct
{
struct list_head channel_list_node;
NvU32 pending_event_count;
} tools;
};
struct uvm_channel_manager_struct
{
// The owning GPU
uvm_gpu_t *gpu;
// The pushbuffer used for all pushes done with this channel manager
uvm_pushbuffer_t *pushbuffer;
// Array of channel pools.
uvm_channel_pool_t *channel_pools;
// Number of elements in the pool array
unsigned num_channel_pools;
// Mask containing the indexes of the usable Copy Engines. Each usable CE
// has a pool associated with it, see channel_manager_ce_pool
DECLARE_BITMAP(ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
struct
{
// Pools to be used by each channel type by default.
//
// Transfers of a given type may use a pool different from that in
// default_for_type[type]. For example, transfers to NvLink GPU
// peers may instead use the more optimal pool stored in the gpu_to_gpu
// array
uvm_channel_pool_t *default_for_type[UVM_CHANNEL_TYPE_COUNT];
// Optimal pools to use when writing from the owning GPU to its NvLink
// peers.
// If there is no optimal pool (the entry is NULL), use default pool
// default_for_type[UVM_CHANNEL_GPU_TO_GPU] instead.
uvm_channel_pool_t *gpu_to_gpu[UVM_ID_MAX_GPUS];
} pool_to_use;
struct
{
struct proc_dir_entry *channels_dir;
struct proc_dir_entry *pending_pushes;
} procfs;
struct
{
NvU32 num_gpfifo_entries;
UVM_BUFFER_LOCATION gpfifo_loc;
UVM_BUFFER_LOCATION gpput_loc;
UVM_BUFFER_LOCATION pushbuffer_loc;
} conf;
};
// Create a channel manager for the GPU
NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);
static bool uvm_channel_is_proxy(uvm_channel_t *channel)
{
UVM_ASSERT(channel->pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
return channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
}
static bool uvm_channel_is_ce(uvm_channel_t *channel)
{
UVM_ASSERT(channel->pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
return (channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_is_proxy(channel);
}
// Proxy channels are used to push page tree related methods, so their channel
// type is UVM_CHANNEL_TYPE_MEMOPS.
static uvm_channel_type_t uvm_channel_proxy_channel_type(void)
{
return UVM_CHANNEL_TYPE_MEMOPS;
}
// Privileged channels support all the Host and engine methods, while
// non-privileged channels don't support privileged methods.
//
// A major limitation of non-privileged CE channels is lack of physical
// addressing support.
bool uvm_channel_is_privileged(uvm_channel_t *channel);
// Destroy the channel manager
void uvm_channel_manager_destroy(uvm_channel_manager_t *channel_manager);
// Get the current status of the channel
// Returns NV_OK if the channel is in a good state and NV_ERR_RC_ERROR
// otherwise. Notably this never sets the global fatal error.
NV_STATUS uvm_channel_get_status(uvm_channel_t *channel);
// Check for channel errors
// Checks for channel errors by calling uvm_channel_get_status(). If an error
// occurred, sets the global fatal error and prints errors.
NV_STATUS uvm_channel_check_errors(uvm_channel_t *channel);
// Check errors on all channels in the channel manager
// Also includes uvm_global_get_status
NV_STATUS uvm_channel_manager_check_errors(uvm_channel_manager_t *channel_manager);
// Retrieve the GPFIFO entry that caused a channel error
// The channel has to be in error state prior to calling this function.
uvm_gpfifo_entry_t *uvm_channel_get_fatal_entry(uvm_channel_t *channel);
// Update progress of a specific channel
// Returns the number of still pending GPFIFO entries for that channel.
// Notably some of the pending GPFIFO entries might be already completed, but
// the update early-outs after completing a fixed number of them to spread the
// cost of the updates across calls.
NvU32 uvm_channel_update_progress(uvm_channel_t *channel);
// Update progress of all channels
// Returns the number of still pending GPFIFO entries for all channels.
// Notably some of the pending GPFIFO entries might be already completed, but
// the update early-outs after completing a fixed number of them to spread the
// cost of the updates across calls.
NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager);
// Wait for all channels to idle
// It waits for anything that is running, but doesn't prevent new work from
// beginning.
NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager);
// Get the GPU VA of semaphore_channel's tracking semaphore within the VA space
// associated with access_channel.
//
// The channels can belong to different GPUs, the same GPU, or even be
// identical, in which case uvm_channel_tracking_semaphore_get_gpu_va can be
// used instead.
NvU64 uvm_channel_tracking_semaphore_get_gpu_va_in_channel(uvm_channel_t *semaphore_channel,
uvm_channel_t *access_channel);
// See above.
static NvU64 uvm_channel_tracking_semaphore_get_gpu_va(uvm_channel_t *channel)
{
return uvm_channel_tracking_semaphore_get_gpu_va_in_channel(channel, channel);
}
// Check whether the channel completed a value
bool uvm_channel_is_value_completed(uvm_channel_t *channel, NvU64 value);
// Update and get the latest completed value by the channel
NvU64 uvm_channel_update_completed_value(uvm_channel_t *channel);
// Select and reserve a channel with the specified type for a push
NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager,
uvm_channel_type_t type,
uvm_channel_t **channel_out);
// Select and reserve a channel for a transfer from channel_manager->gpu to
// dst_gpu.
NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *channel_manager,
uvm_gpu_t *dst_gpu,
uvm_channel_t **channel_out);
// Reserve a specific channel for a push
NV_STATUS uvm_channel_reserve(uvm_channel_t *channel);
// Set optimal CE for P2P transfers between manager->gpu and peer
void uvm_channel_manager_set_p2p_ce(uvm_channel_manager_t *manager, uvm_gpu_t *peer, NvU32 optimal_ce);
// Begin a push on a previously reserved channel
// Should be used by uvm_push_*() only.
NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push);
// End a push
// Should be used by uvm_push_end() only.
void uvm_channel_end_push(uvm_push_t *push);
const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type);
const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type);
void uvm_channel_print_pending_pushes(uvm_channel_t *channel);
static uvm_gpu_t *uvm_channel_get_gpu(uvm_channel_t *channel)
{
return channel->pool->manager->gpu;
}
// Index of a channel within the owning pool
static unsigned uvm_channel_index_in_pool(const uvm_channel_t *channel)
{
return channel - channel->pool->channels;
}
NvU32 uvm_channel_update_progress_all(uvm_channel_t *channel);
// Return an arbitrary channel of the given type(s)
uvm_channel_t *uvm_channel_any_of_type(uvm_channel_manager_t *manager, NvU32 pool_type_mask);
// Return an arbitrary channel of any type
static uvm_channel_t *uvm_channel_any(uvm_channel_manager_t *manager)
{
return uvm_channel_any_of_type(manager, UVM_CHANNEL_POOL_TYPE_MASK);
}
// Helper to iterate over all the channels in a pool.
#define uvm_for_each_channel_in_pool(channel, pool) \
for (({UVM_ASSERT(pool->channels); \
channel = pool->channels;}); \
channel != pool->channels + pool->num_channels; \
channel++)
uvm_channel_pool_t *uvm_channel_pool_first(uvm_channel_manager_t *manager, NvU32 pool_type_mask);
uvm_channel_pool_t *uvm_channel_pool_next(uvm_channel_manager_t *manager,
uvm_channel_pool_t *curr_pool,
NvU32 pool_type_mask);
// Helper to iterate over all the channel pools of the given type(s) in a GPU.
// The pool mask must not be zero.
#define uvm_for_each_pool_of_type(pool, manager, pool_type_mask) \
for (pool = uvm_channel_pool_first(manager, pool_type_mask); \
pool != NULL; \
pool = uvm_channel_pool_next(manager, pool, pool_type_mask))
#define uvm_for_each_pool(pool, manager) uvm_for_each_pool_of_type(pool, manager, UVM_CHANNEL_POOL_TYPE_MASK)
#endif // __UVM_CHANNEL_H__

View File

@@ -0,0 +1,844 @@
/*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_global.h"
#include "uvm_channel.h"
#include "uvm_hal.h"
#include "uvm_push.h"
#include "uvm_test.h"
#include "uvm_test_rng.h"
#include "uvm_va_space.h"
#include "uvm_tracker.h"
#include "uvm_thread_context.h"
#include "uvm_gpu_semaphore.h"
#include "uvm_kvmalloc.h"
#define TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU 1024
#define TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU_EMU 64
// Schedule pushes one after another on all GPUs and channel types that copy and
// increment a counter into an adjacent memory location in a buffer. And then
// verify that all the values are correct on the CPU.
static NV_STATUS test_ordering(uvm_va_space_t *va_space)
{
NV_STATUS status;
uvm_gpu_t *gpu;
bool exclude_proxy_channel_type;
NvU32 i, j;
uvm_rm_mem_t *mem = NULL;
NvU32 *host_mem;
uvm_push_t push;
NvU64 gpu_va;
uvm_tracker_t tracker = UVM_TRACKER_INIT();
NvU32 value = 0;
const NvU32 iters_per_channel_type_per_gpu = g_uvm_global.num_simulated_devices > 0 ?
TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU_EMU :
TEST_ORDERING_ITERS_PER_CHANNEL_TYPE_PER_GPU;
const NvU32 values_count = iters_per_channel_type_per_gpu;
const size_t buffer_size = sizeof(NvU32) * values_count;
gpu = uvm_va_space_find_first_gpu(va_space);
TEST_CHECK_RET(gpu != NULL);
status = uvm_rm_mem_alloc_and_map_all(gpu, UVM_RM_MEM_TYPE_SYS, buffer_size, &mem);
TEST_CHECK_GOTO(status == NV_OK, done);
host_mem = (NvU32*)uvm_rm_mem_get_cpu_va(mem);
memset(host_mem, 0, buffer_size);
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Initial memset");
TEST_CHECK_GOTO(status == NV_OK, done);
gpu_va = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel));
// Semaphore release as part of uvm_push_end() will do the membar
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
gpu->parent->ce_hal->memset_v_4(&push, gpu_va, 0, buffer_size);
uvm_push_end(&push);
TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), done);
exclude_proxy_channel_type = uvm_gpu_uses_proxy_channel_pool(gpu);
for (i = 0; i < iters_per_channel_type_per_gpu; ++i) {
for (j = 0; j < UVM_CHANNEL_TYPE_CE_COUNT; ++j) {
uvm_channel_type_t channel_type = j;
// Proxy channels don't support the virtual memcopies that are about
// to be pushed, so don't test the proxy channel type in any of the
// GPUs.
if (exclude_proxy_channel_type && (channel_type == uvm_channel_proxy_channel_type()))
continue;
for_each_va_space_gpu(gpu, va_space) {
NvU64 gpu_va_base;
NvU64 gpu_va_src;
NvU64 gpu_va_dst;
status = uvm_push_begin_acquire(gpu->channel_manager,
channel_type,
&tracker,
&push,
"memcpy and inc to %u",
value + 1);
TEST_CHECK_GOTO(status == NV_OK, done);
gpu_va_base = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(push.channel));
gpu_va_src = gpu_va_base + (value % values_count) * sizeof(NvU32);
gpu_va_dst = gpu_va_base + ((value + 1) % values_count) * sizeof(NvU32);
// The semaphore reduction will do a membar before the reduction
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
gpu->parent->ce_hal->memcopy_v_to_v(&push, gpu_va_dst, gpu_va_src, sizeof(NvU32));
// The following reduction is done from the same GPU, but the
// previous memcpy is to uncached sysmem and that bypasses L2
// and hence requires a SYSMEMBAR to be ordered.
gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va_dst, ++value);
uvm_push_end(&push);
uvm_tracker_clear(&tracker);
TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), done);
}
}
}
status = uvm_tracker_wait(&tracker);
TEST_CHECK_GOTO(status == NV_OK, done);
// At this moment, this should hold:
// mem[value % values_count] == value
// mem[(value + 1) % values_count] == value + 1 - values_count
// And in general, for i=[0, values_count):
// mem[(value + 1 + i) % values_count] == value + 1 - values_count + i
// Verify that
for (i = 0; i < values_count; ++i) {
NvU32 index = (value + 1 + i) % values_count;
NvU32 expected = (value + 1 + i) - values_count;
if (host_mem[index] != expected) {
UVM_TEST_PRINT("Bad value at host_mem[%u] = %u instead of %u\n", index, host_mem[index], expected);
status = NV_ERR_INVALID_STATE;
goto done;
}
}
done:
uvm_tracker_wait(&tracker);
uvm_rm_mem_free(mem);
return status;
}
static NV_STATUS uvm_test_rc_for_gpu(uvm_gpu_t *gpu)
{
uvm_push_t push;
uvm_channel_pool_t *pool;
uvm_gpfifo_entry_t *fatal_entry;
uvm_push_info_t *push_info;
int fatal_line;
uvm_tracker_entry_t tracker_entry;
NV_STATUS status;
uvm_tracker_t tracker = UVM_TRACKER_INIT();
uvm_channel_manager_t *manager = gpu->channel_manager;
// Submit a bunch of successful pushes on each channel first so that the
// fatal one is behind a bunch of work (notably more than
// uvm_channel_update_progress() completes by default).
uvm_for_each_pool(pool, manager) {
uvm_channel_t *channel;
uvm_for_each_channel_in_pool(channel, pool) {
NvU32 i;
for (i = 0; i < 512; ++i) {
status = uvm_push_begin_on_channel(channel, &push, "Non-faulting push");
TEST_CHECK_RET(status == NV_OK);
uvm_push_end(&push);
}
}
}
// Check RC on a proxy channel (SR-IOV heavy) or internal channel (any other
// mode). It is not allowed to use a virtual address in a memset pushed to
// a proxy channel, so we use a physical address instead.
if (uvm_gpu_uses_proxy_channel_pool(gpu)) {
uvm_gpu_address_t dst_address;
// Save the line number the push that's supposed to fail was started on
fatal_line = __LINE__ + 1;
TEST_NV_CHECK_RET(uvm_push_begin(manager, uvm_channel_proxy_channel_type(), &push, "Fatal push 0x%X", 0xBAD));
// Memset targeting a physical address beyond the vidmem size. The
// passed physical address is not the vidmem size reported by RM
// because the reported size can be smaller than the actual physical
// size, such that accessing a GPA at the reported size may be allowed
// by VMMU.
//
// GA100 GPUs have way less than UVM_GPU_MAX_PHYS_MEM vidmem, so using
// that value as physical address should result on an error
dst_address = uvm_gpu_address_physical(UVM_APERTURE_VID, UVM_GPU_MAX_PHYS_MEM - 8);
gpu->parent->ce_hal->memset_8(&push, dst_address, 0, 8);
}
else {
fatal_line = __LINE__ + 1;
TEST_NV_CHECK_RET(uvm_push_begin(manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Fatal push 0x%X", 0xBAD));
// Memset that should fault on 0xFFFFFFFF
gpu->parent->ce_hal->memset_v_4(&push, 0xFFFFFFFF, 0, 4);
}
uvm_push_end(&push);
uvm_push_get_tracker_entry(&push, &tracker_entry);
uvm_tracker_overwrite_with_push(&tracker, &push);
status = uvm_channel_manager_wait(manager);
TEST_CHECK_RET(status == NV_ERR_RC_ERROR);
TEST_CHECK_RET(uvm_channel_get_status(push.channel) == NV_ERR_RC_ERROR);
fatal_entry = uvm_channel_get_fatal_entry(push.channel);
TEST_CHECK_RET(fatal_entry != NULL);
push_info = fatal_entry->push_info;
TEST_CHECK_RET(push_info != NULL);
TEST_CHECK_RET(push_info->line == fatal_line);
TEST_CHECK_RET(strcmp(push_info->function, __FUNCTION__) == 0);
TEST_CHECK_RET(strcmp(push_info->filename, kbasename(__FILE__)) == 0);
if (uvm_push_info_is_tracking_descriptions())
TEST_CHECK_RET(strcmp(push_info->description, "Fatal push 0xBAD") == 0);
TEST_CHECK_RET(uvm_global_get_status() == NV_ERR_RC_ERROR);
// Check that waiting for an entry after a global fatal error makes the
// entry completed.
TEST_CHECK_RET(!uvm_tracker_is_entry_completed(&tracker_entry));
TEST_CHECK_RET(uvm_tracker_wait_for_entry(&tracker_entry) == NV_ERR_RC_ERROR);
TEST_CHECK_RET(uvm_tracker_is_entry_completed(&tracker_entry));
// Check that waiting for a tracker after a global fatal error, clears all
// the entries from the tracker.
TEST_CHECK_RET(!uvm_tracker_is_empty(&tracker));
TEST_CHECK_RET(uvm_tracker_wait(&tracker) == NV_ERR_RC_ERROR);
TEST_CHECK_RET(uvm_tracker_is_empty(&tracker));
TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_RC_ERROR);
return NV_OK;
}
static NV_STATUS test_rc(uvm_va_space_t *va_space)
{
uvm_gpu_t *gpu;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
for_each_va_space_gpu(gpu, va_space) {
NV_STATUS test_status, create_status;
// The GPU channel manager is destroyed and then re-created after
// testing RC, so this test requires exclusive access to the GPU.
TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
g_uvm_global.disable_fatal_error_assert = true;
test_status = uvm_test_rc_for_gpu(gpu);
g_uvm_global.disable_fatal_error_assert = false;
uvm_channel_manager_destroy(gpu->channel_manager);
create_status = uvm_channel_manager_create(gpu, &gpu->channel_manager);
TEST_NV_CHECK_RET(test_status);
TEST_NV_CHECK_RET(create_status);
}
return NV_OK;
}
typedef struct
{
uvm_push_t push;
uvm_tracker_t tracker;
uvm_gpu_semaphore_t semaphore;
NvU32 queued_counter_value;
NvU32 queued_counter_repeat;
uvm_rm_mem_t *counter_mem;
uvm_rm_mem_t *counter_snapshots_mem;
uvm_rm_mem_t *other_stream_counter_snapshots_mem;
NvU32 *counter_snapshots;
NvU32 *other_stream_counter_snapshots;
NvU32 *other_stream_counter_expected;
} uvm_test_stream_t;
#define MAX_COUNTER_REPEAT_COUNT 10 * 1024
// For each iter, snapshot the first and last counter value
#define TEST_SNAPSHOT_SIZE(it) (2 * it * sizeof(NvU32))
static void snapshot_counter(uvm_push_t *push,
uvm_rm_mem_t *counter_mem,
uvm_rm_mem_t *snapshot_mem,
NvU32 index,
NvU32 counters_count)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
NvU64 counter_gpu_va;
NvU64 snapshot_gpu_va;
bool is_proxy_channel;
NvU32 last_counter_offset = (counters_count - 1) * sizeof(NvU32);
if (counters_count == 0)
return;
is_proxy_channel = uvm_channel_is_proxy(push->channel);
counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel);
snapshot_gpu_va = uvm_rm_mem_get_gpu_va(snapshot_mem, gpu, is_proxy_channel) + index * 2 * sizeof(NvU32);
// Copy the last and first counter to a snapshot for later verification.
// Membar will be done by uvm_push_end()
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
gpu->parent->ce_hal->memcopy_v_to_v(push,
snapshot_gpu_va + sizeof(NvU32),
counter_gpu_va + last_counter_offset,
sizeof(NvU32));
// Membar will be done by uvm_push_end()
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
gpu->parent->ce_hal->memcopy_v_to_v(push, snapshot_gpu_va, counter_gpu_va, sizeof(NvU32));
}
static void set_counter(uvm_push_t *push, uvm_rm_mem_t *counter_mem, NvU32 value, NvU32 count)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
NvU64 counter_gpu_va;
bool is_proxy_channel;
is_proxy_channel = uvm_channel_is_proxy(push->channel);
counter_gpu_va = uvm_rm_mem_get_gpu_va(counter_mem, gpu, is_proxy_channel);
gpu->parent->ce_hal->memset_v_4(push, counter_gpu_va, value, count * sizeof(NvU32));
}
static uvm_channel_type_t random_ce_channel_type(uvm_test_rng_t *rng)
{
return (uvm_channel_type_t)uvm_test_rng_range_32(rng, 0, UVM_CHANNEL_TYPE_CE_COUNT - 1);
}
static uvm_channel_type_t random_ce_channel_type_except(uvm_test_rng_t *rng, uvm_channel_type_t exception)
{
uvm_channel_type_t channel_type;
UVM_ASSERT(exception < UVM_CHANNEL_TYPE_CE_COUNT);
channel_type = (uvm_channel_type_t)uvm_test_rng_range_32(rng, 0, UVM_CHANNEL_TYPE_CE_COUNT - 2);
if (channel_type >= exception)
channel_type++;
UVM_ASSERT(channel_type < UVM_CHANNEL_TYPE_CE_COUNT);
return channel_type;
}
static uvm_channel_type_t gpu_random_internal_ce_channel_type(uvm_gpu_t *gpu, uvm_test_rng_t *rng)
{
if (uvm_gpu_uses_proxy_channel_pool(gpu))
return random_ce_channel_type_except(rng, uvm_channel_proxy_channel_type());
return random_ce_channel_type(rng);
}
static uvm_gpu_t *random_va_space_gpu(uvm_test_rng_t *rng, uvm_va_space_t *va_space)
{
uvm_gpu_t *gpu;
NvU32 gpu_count = uvm_processor_mask_get_gpu_count(&va_space->registered_gpus);
NvU32 gpu_index = uvm_test_rng_range_32(rng, 0, gpu_count - 1);
UVM_ASSERT(gpu_count > 0);
for_each_va_space_gpu(gpu, va_space) {
if (gpu_index-- == 0)
return gpu;
}
UVM_ASSERT(0);
return NULL;
}
static void test_memset_rm_mem(uvm_push_t *push, uvm_rm_mem_t *rm_mem, NvU32 value)
{
uvm_gpu_t *gpu;
NvU64 gpu_va;
UVM_ASSERT(rm_mem->size % 4 == 0);
gpu = uvm_push_get_gpu(push);
gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, uvm_channel_is_proxy(push->channel));
gpu->parent->ce_hal->memset_v_4(push, gpu_va, value, rm_mem->size);
}
// This test schedules a randomly sized memset on a random channel and GPU in a
// "stream" that has operations ordered by acquiring the tracker of the previous
// operation. It also snapshots the memset done by the previous operation in the
// stream to verify it later on the CPU. Each iteration also optionally acquires
// a different stream and snapshots its memset.
// The test ioctl is expected to be called at the same time from multiple
// threads and contains some schedule() calls to help get as many threads
// through the init phase before other threads continue. It also has a random
// schedule() call in the main loop scheduling GPU work.
static NV_STATUS stress_test_all_gpus_in_va(uvm_va_space_t *va_space,
NvU32 num_streams,
NvU32 iterations_per_stream,
NvU32 seed,
NvU32 verbose)
{
NV_STATUS status = NV_OK;
uvm_gpu_t *gpu;
NvU32 i, j;
uvm_test_stream_t *streams;
uvm_test_rng_t rng;
uvm_test_rng_init(&rng, seed);
gpu = uvm_va_space_find_first_gpu(va_space);
TEST_CHECK_RET(gpu != NULL);
streams = uvm_kvmalloc_zero(sizeof(*streams) * num_streams);
TEST_CHECK_RET(streams != NULL);
// Initialize all the trackers first so that clean up on error can always
// wait for them.
for (i = 0; i < num_streams; ++i) {
uvm_test_stream_t *stream = &streams[i];
uvm_tracker_init(&stream->tracker);
}
for (i = 0; i < num_streams; ++i) {
uvm_test_stream_t *stream = &streams[i];
status = uvm_gpu_semaphore_alloc(gpu->semaphore_pool, &stream->semaphore);
if (status != NV_OK)
goto done;
stream->queued_counter_value = 0;
status = uvm_rm_mem_alloc_and_map_all(gpu,
UVM_RM_MEM_TYPE_SYS,
MAX_COUNTER_REPEAT_COUNT * sizeof(NvU32),
&stream->counter_mem);
TEST_CHECK_GOTO(status == NV_OK, done);
status = uvm_rm_mem_alloc_and_map_all(gpu,
UVM_RM_MEM_TYPE_SYS,
TEST_SNAPSHOT_SIZE(iterations_per_stream),
&stream->counter_snapshots_mem);
TEST_CHECK_GOTO(status == NV_OK, done);
stream->counter_snapshots = (NvU32*)uvm_rm_mem_get_cpu_va(stream->counter_snapshots_mem);
status = uvm_rm_mem_alloc_and_map_all(gpu,
UVM_RM_MEM_TYPE_SYS,
TEST_SNAPSHOT_SIZE(iterations_per_stream),
&stream->other_stream_counter_snapshots_mem);
TEST_CHECK_GOTO(status == NV_OK, done);
stream->other_stream_counter_snapshots = (NvU32*)uvm_rm_mem_get_cpu_va(stream->other_stream_counter_snapshots_mem);
stream->other_stream_counter_expected = uvm_kvmalloc_zero(sizeof(NvU32) * iterations_per_stream);
if (stream->other_stream_counter_expected == NULL) {
status = NV_ERR_NO_MEMORY;
goto done;
}
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_CPU_TO_GPU, &stream->push, "stream %u init", i);
TEST_CHECK_GOTO(status == NV_OK, done);
test_memset_rm_mem(&stream->push, stream->counter_mem, 0);
test_memset_rm_mem(&stream->push, stream->counter_snapshots_mem, 0);
test_memset_rm_mem(&stream->push, stream->other_stream_counter_snapshots_mem, 0);
status = uvm_push_end_and_wait(&stream->push);
TEST_CHECK_GOTO(status == NV_OK, done);
if (fatal_signal_pending(current)) {
status = NV_ERR_SIGNAL_PENDING;
goto done;
}
// Let other threads run
schedule();
}
if (verbose > 0) {
UVM_TEST_PRINT("Init done, seed %u, GPUs:\n", seed);
for_each_va_space_gpu(gpu, va_space) {
UVM_TEST_PRINT(" GPU %s\n", uvm_gpu_name(gpu));
}
}
for (i = 0; i < iterations_per_stream; ++i) {
for (j = 0; j < num_streams; ++j) {
uvm_test_stream_t *stream = &streams[j];
uvm_channel_type_t channel_type;
gpu = random_va_space_gpu(&rng, va_space);
if (fatal_signal_pending(current)) {
status = NV_ERR_SIGNAL_PENDING;
goto done;
}
// Select a random channel type. In SR-IOV heavy the selection has
// to exclude the type associated with proxy channels, because they
// do not support the virtual memcopies/memsets pushed by
// snapshot_counter and set_counter
channel_type = gpu_random_internal_ce_channel_type(gpu, &rng);
status = uvm_push_begin_acquire(gpu->channel_manager,
channel_type,
&stream->tracker,
&stream->push,
"stream %u payload %u gid %u channel_type %u",
j,
stream->queued_counter_value,
uvm_id_value(gpu->id),
channel_type);
TEST_CHECK_GOTO(status == NV_OK, done);
snapshot_counter(&stream->push,
stream->counter_mem,
stream->counter_snapshots_mem,
i,
stream->queued_counter_repeat);
// Set a random number [2, MAX_COUNTER_REPEAT_COUNT] of counters
stream->queued_counter_repeat = uvm_test_rng_range_32(&rng, 2, MAX_COUNTER_REPEAT_COUNT);
set_counter(&stream->push,
stream->counter_mem,
++stream->queued_counter_value,
stream->queued_counter_repeat);
if (uvm_test_rng_range_32(&rng, 0, 1) == 0) {
NvU32 random_stream_index = uvm_test_rng_range_32(&rng, 0, num_streams - 1);
uvm_test_stream_t *random_stream = &streams[random_stream_index];
uvm_push_acquire_tracker(&stream->push, &random_stream->tracker);
snapshot_counter(&stream->push,
random_stream->counter_mem,
stream->other_stream_counter_snapshots_mem,
i,
random_stream->queued_counter_repeat);
}
uvm_push_end(&stream->push);
uvm_tracker_clear(&stream->tracker);
TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&stream->tracker, &stream->push), done);
}
// Randomly schedule other threads
if (uvm_test_rng_range_32(&rng, 0, 9) == 0)
schedule();
}
if (verbose > 0)
UVM_TEST_PRINT("All work scheduled\n");
// Let other threads run
schedule();
for (i = 0; i < num_streams; ++i) {
uvm_test_stream_t *stream = &streams[i];
status = uvm_tracker_wait(&stream->tracker);
if (status != NV_OK) {
UVM_TEST_PRINT("Failed to wait for the tracker for stream %u: %s\n", i, nvstatusToString(status));
goto done;
}
for (j = 0; j < iterations_per_stream; ++j) {
NvU32 snapshot_last = stream->counter_snapshots[j * 2];
NvU32 snapshot_first = stream->counter_snapshots[j * 2 + 1];
if (snapshot_last != j || snapshot_first != j) {
UVM_TEST_PRINT("Stream %u counter snapshot[%u] = %u,%u instead of %u,%u\n",
i,
j,
snapshot_last,
snapshot_first,
j,
j);
status = NV_ERR_INVALID_STATE;
goto done;
}
}
for (j = 0; j < iterations_per_stream; ++j) {
NvU32 snapshot_last = stream->other_stream_counter_snapshots[j * 2];
NvU32 snapshot_first = stream->other_stream_counter_snapshots[j * 2 + 1];
NvU32 expected = stream->other_stream_counter_expected[j];
if (snapshot_last < expected || snapshot_first < expected) {
UVM_TEST_PRINT("Stream %u other_counter snapshot[%u] = %u,%u which is < of %u,%u\n",
i,
j,
snapshot_last,
snapshot_first,
expected,
expected);
status = NV_ERR_INVALID_STATE;
goto done;
}
}
}
if (verbose > 0)
UVM_TEST_PRINT("Verification done\n");
schedule();
done:
// Wait for all the trackers first before freeing up memory as streams
// references each other's buffers.
for (i = 0; i < num_streams; ++i) {
uvm_test_stream_t *stream = &streams[i];
uvm_tracker_wait(&stream->tracker);
}
for (i = 0; i < num_streams; ++i) {
uvm_test_stream_t *stream = &streams[i];
uvm_gpu_semaphore_free(&stream->semaphore);
uvm_rm_mem_free(stream->other_stream_counter_snapshots_mem);
uvm_rm_mem_free(stream->counter_snapshots_mem);
uvm_rm_mem_free(stream->counter_mem);
uvm_tracker_deinit(&stream->tracker);
uvm_kvfree(stream->other_stream_counter_expected);
}
uvm_kvfree(streams);
if (verbose > 0)
UVM_TEST_PRINT("Cleanup done\n");
return status;
}
NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct file *filp)
{
NV_STATUS status;
uvm_va_space_t *va_space = uvm_va_space_get(filp);
uvm_mutex_lock(&g_uvm_global.global_lock);
uvm_va_space_down_read_rm(va_space);
status = test_ordering(va_space);
if (status != NV_OK)
goto done;
if (g_uvm_global.num_simulated_devices == 0) {
status = test_rc(va_space);
if (status != NV_OK)
goto done;
}
done:
uvm_va_space_up_read_rm(va_space);
uvm_mutex_unlock(&g_uvm_global.global_lock);
return status;
}
static NV_STATUS uvm_test_channel_stress_stream(uvm_va_space_t *va_space,
const UVM_TEST_CHANNEL_STRESS_PARAMS *params)
{
NV_STATUS status;
if (params->iterations == 0 || params->num_streams == 0)
return NV_ERR_INVALID_PARAMETER;
// TODO: Bug 1764963: Rework the test to not rely on the global lock as that
// serializes all the threads calling this at the same time.
uvm_mutex_lock(&g_uvm_global.global_lock);
uvm_va_space_down_read_rm(va_space);
status = stress_test_all_gpus_in_va(va_space,
params->num_streams,
params->iterations,
params->seed,
params->verbose);
if (status != NV_OK)
goto done;
done:
uvm_va_space_up_read_rm(va_space);
uvm_mutex_unlock(&g_uvm_global.global_lock);
return status;
}
static NV_STATUS uvm_test_channel_stress_update_channels(uvm_va_space_t *va_space,
const UVM_TEST_CHANNEL_STRESS_PARAMS *params)
{
NV_STATUS status = NV_OK;
uvm_test_rng_t rng;
NvU32 i;
uvm_test_rng_init(&rng, params->seed);
uvm_va_space_down_read(va_space);
for (i = 0; i < params->iterations; ++i) {
uvm_gpu_t *gpu = random_va_space_gpu(&rng, va_space);
uvm_channel_manager_update_progress(gpu->channel_manager);
if (fatal_signal_pending(current)) {
status = NV_ERR_SIGNAL_PENDING;
goto done;
}
}
done:
uvm_va_space_up_read(va_space);
return status;
}
static NV_STATUS uvm_test_channel_noop_push(uvm_va_space_t *va_space,
const UVM_TEST_CHANNEL_STRESS_PARAMS *params)
{
NV_STATUS status = NV_OK;
uvm_push_t push;
uvm_test_rng_t rng;
uvm_gpu_t *gpu;
NvU32 i;
uvm_test_rng_init(&rng, params->seed);
uvm_va_space_down_read(va_space);
for (i = 0; i < params->iterations; ++i) {
uvm_channel_type_t channel_type = random_ce_channel_type(&rng);
gpu = random_va_space_gpu(&rng, va_space);
status = uvm_push_begin(gpu->channel_manager, channel_type, &push, "noop push");
if (status != NV_OK)
goto done;
// Push an actual noop method so that the push doesn't get optimized
// away if we ever detect empty pushes.
gpu->parent->host_hal->noop(&push, UVM_METHOD_SIZE);
uvm_push_end(&push);
if (fatal_signal_pending(current)) {
status = NV_ERR_SIGNAL_PENDING;
goto done;
}
}
if (params->verbose > 0)
UVM_TEST_PRINT("Noop pushes: completed %u pushes seed: %u\n", i, params->seed);
for_each_va_space_gpu_in_mask(gpu, va_space, &va_space->registered_gpu_va_spaces) {
NV_STATUS wait_status = uvm_channel_manager_wait(gpu->channel_manager);
if (status == NV_OK)
status = wait_status;
}
done:
uvm_va_space_up_read(va_space);
return status;
}
NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct file *filp)
{
uvm_va_space_t *va_space = uvm_va_space_get(filp);
switch (params->mode) {
case UVM_TEST_CHANNEL_STRESS_MODE_STREAM:
return uvm_test_channel_stress_stream(va_space, params);
case UVM_TEST_CHANNEL_STRESS_MODE_UPDATE_CHANNELS:
return uvm_test_channel_stress_update_channels(va_space, params);
case UVM_TEST_CHANNEL_STRESS_MODE_NOOP_PUSH:
return uvm_test_channel_noop_push(va_space, params);
default:
return NV_ERR_INVALID_PARAMETER;
}
}

View File

@@ -0,0 +1,322 @@
/*******************************************************************************
Copyright (c) 2013-2021 NVIDIA Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
*******************************************************************************/
#include "uvm_common.h"
#include "uvm_linux.h"
#include "uvm_forward_decl.h"
// TODO: Bug 1710855: Tweak this number through benchmarks
#define UVM_SPIN_LOOP_SCHEDULE_TIMEOUT_NS (10*1000ULL)
#define UVM_SPIN_LOOP_PRINT_TIMEOUT_SEC 30ULL
// Default to debug prints being enabled for debug and develop builds and
// disabled for release builds.
static int uvm_debug_prints = UVM_IS_DEBUG() || UVM_IS_DEVELOP();
// Make the module param writable so that prints can be enabled or disabled at
// any time by modifying the module parameter.
module_param(uvm_debug_prints, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(uvm_debug_prints, "Enable uvm debug prints.");
bool uvm_debug_prints_enabled()
{
return uvm_debug_prints != 0;
}
// This parameter allows a program in user mode to call the kernel tests
// defined in this module. This parameter should only be used for testing and
// must not be set to true otherwise since it breaks security when it is
// enabled. By default and for safety reasons this parameter is set to false.
int uvm_enable_builtin_tests __read_mostly = 0;
module_param(uvm_enable_builtin_tests, int, S_IRUGO);
MODULE_PARM_DESC(uvm_enable_builtin_tests,
"Enable the UVM built-in tests. (This is a security risk)");
//
// Convert kernel errno codes to corresponding NV_STATUS
//
NV_STATUS errno_to_nv_status(int errnoCode)
{
if (errnoCode < 0)
errnoCode = -errnoCode;
switch (errnoCode)
{
case 0:
return NV_OK;
case E2BIG:
case EINVAL:
return NV_ERR_INVALID_ARGUMENT;
case EACCES:
return NV_ERR_INVALID_ACCESS_TYPE;
case EADDRINUSE:
case EADDRNOTAVAIL:
return NV_ERR_UVM_ADDRESS_IN_USE;
case EFAULT:
return NV_ERR_INVALID_ADDRESS;
case EOVERFLOW:
return NV_ERR_OUT_OF_RANGE;
case EINTR:
case EBUSY:
case EAGAIN:
return NV_ERR_BUSY_RETRY;
case ENXIO:
case ENODEV:
return NV_ERR_MODULE_LOAD_FAILED;
case ENOMEM:
return NV_ERR_NO_MEMORY;
case EPERM:
return NV_ERR_INSUFFICIENT_PERMISSIONS;
case ESRCH:
return NV_ERR_PID_NOT_FOUND;
case ETIMEDOUT:
return NV_ERR_TIMEOUT;
case EEXIST:
return NV_ERR_IN_USE;
case ENOSYS:
case EOPNOTSUPP:
return NV_ERR_NOT_SUPPORTED;
case ENOENT:
return NV_ERR_NO_VALID_PATH;
case EIO:
return NV_ERR_RC_ERROR;
case ENODATA:
return NV_ERR_OBJECT_NOT_FOUND;
default:
return NV_ERR_GENERIC;
};
}
// Returns POSITIVE errno
int nv_status_to_errno(NV_STATUS status)
{
switch (status) {
case NV_OK:
return 0;
case NV_ERR_BUSY_RETRY:
return EAGAIN;
case NV_ERR_INSUFFICIENT_PERMISSIONS:
return EPERM;
case NV_ERR_GPU_UUID_NOT_FOUND:
return ENODEV;
case NV_ERR_INSUFFICIENT_RESOURCES:
case NV_ERR_NO_MEMORY:
return ENOMEM;
case NV_ERR_INVALID_ACCESS_TYPE:
return EACCES;
case NV_ERR_INVALID_ADDRESS:
return EFAULT;
case NV_ERR_INVALID_ARGUMENT:
case NV_ERR_INVALID_DEVICE:
case NV_ERR_INVALID_PARAMETER:
case NV_ERR_INVALID_REQUEST:
case NV_ERR_INVALID_STATE:
return EINVAL;
case NV_ERR_NOT_SUPPORTED:
return ENOSYS;
case NV_ERR_OBJECT_NOT_FOUND:
return ENODATA;
case NV_ERR_MODULE_LOAD_FAILED:
return ENXIO;
case NV_ERR_OVERLAPPING_UVM_COMMIT:
case NV_ERR_UVM_ADDRESS_IN_USE:
return EADDRINUSE;
case NV_ERR_PID_NOT_FOUND:
return ESRCH;
case NV_ERR_TIMEOUT:
case NV_ERR_TIMEOUT_RETRY:
return ETIMEDOUT;
case NV_ERR_IN_USE:
return EEXIST;
case NV_ERR_NO_VALID_PATH:
return ENOENT;
case NV_ERR_RC_ERROR:
case NV_ERR_ECC_ERROR:
return EIO;
case NV_ERR_OUT_OF_RANGE:
return EOVERFLOW;
default:
UVM_ASSERT_MSG(0, "No errno conversion set up for NV_STATUS %s\n", nvstatusToString(status));
return EINVAL;
}
}
//
// This routine retrieves the process ID of current, but makes no attempt to
// refcount or lock the pid in place.
//
unsigned uvm_get_stale_process_id(void)
{
return (unsigned)task_tgid_vnr(current);
}
unsigned uvm_get_stale_thread_id(void)
{
return (unsigned)task_pid_vnr(current);
}
//
// A simple security rule for allowing access to UVM user space memory: if you
// are the same user as the owner of the memory, or if you are root, then you
// are granted access. The idea is to allow debuggers and profilers to work, but
// without opening up any security holes.
//
NvBool uvm_user_id_security_check(uid_t euidTarget)
{
return (NV_CURRENT_EUID() == euidTarget) ||
(UVM_ROOT_UID == euidTarget);
}
void on_uvm_test_fail(void)
{
(void)NULL;
}
void on_uvm_assert(void)
{
(void)NULL;
#ifdef __COVERITY__
__coverity_panic__()
#endif
}
NV_STATUS uvm_spin_loop(uvm_spin_loop_t *spin)
{
NvU64 curr = NV_GETTIME();
// This schedule() is required for functionality, not just system
// performance. It allows RM to run and unblock the UVM driver:
//
// - UVM must service faults in order for RM to idle/preempt a context
// - RM must service interrupts which stall UVM (SW methods, stalling CE
// interrupts, etc) in order for UVM to service faults
//
// Even though UVM's bottom half is preemptable, we have encountered cases
// in which a user thread running in RM won't preempt the UVM driver's
// thread unless the UVM driver thread gives up its timeslice. This is also
// theoretically possible if the RM thread has a low nice priority.
//
// TODO: Bug 1710855: Look into proper prioritization of these threads as a longer-term
// solution.
if (curr - spin->start_time_ns >= UVM_SPIN_LOOP_SCHEDULE_TIMEOUT_NS && NV_MAY_SLEEP()) {
schedule();
curr = NV_GETTIME();
}
cpu_relax();
// TODO: Bug 1710855: Also check fatal_signal_pending() here if the caller can handle it.
if (curr - spin->print_time_ns >= 1000*1000*1000*UVM_SPIN_LOOP_PRINT_TIMEOUT_SEC) {
spin->print_time_ns = curr;
return NV_ERR_TIMEOUT_RETRY;
}
return NV_OK;
}
// This formats a GPU UUID, in a UVM-friendly way. That is, nearly the same as
// what nvidia-smi reports. It will always prefix the UUID with UVM-GPU so
// that we know that we have a real, binary formatted UUID that will work in
// the UVM APIs.
//
// It comes out like this:
//
// UVM-GPU-d802726c-df8d-a3c3-ec53-48bdec201c27
//
// This routine will always null-terminate the string for you. This is true
// even if the buffer was too small!
//
// Return value is the number of non-null characters written.
//
// Note that if you were to let the NV2080_CTRL_CMD_GPU_GET_GID_INFO command
// return it's default format, which is ascii, not binary, then you would get
// this back:
//
// GPU-d802726c-df8d-a3c3-ec53-48bdec201c27
//
// ...which is actually a character string, and won't work for UVM API calls.
// So it's very important to be able to see the difference.
//
static char uvm_digit_to_hex(unsigned value)
{
if (value >= 10)
return value - 10 + 'a';
else
return value + '0';
}
int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessorUuid *pUuidStruct)
{
char *str = buffer+8;
unsigned i;
unsigned dashMask = 1 << 4 | 1 << 6 | 1 << 8 | 1 << 10;
memcpy(buffer, "UVM-GPU-", 8);
if (bufferLength < (8 /*prefix*/+ 16 * 2 /*digits*/ + 4 * 1 /*dashes*/ + 1 /*null*/))
return *buffer = 0;
for (i = 0; i < 16; i++) {
*str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] >> 4);
*str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] & 0xF);
if (dashMask & (1 << (i+1)))
*str++ = '-';
}
*str = 0;
return (int)(str-buffer);
}

View File

@@ -0,0 +1,357 @@
/*******************************************************************************
Copyright (c) 2013-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef _UVM_COMMON_H
#define _UVM_COMMON_H
#ifdef DEBUG
#define UVM_IS_DEBUG() 1
#else
#define UVM_IS_DEBUG() 0
#endif
// NVIDIA_UVM_DEVELOP implies DEBUG, but not vice-versa
// TODO Bug 1773100: Figure out the right distinction between develop and debug
// builds.
#ifdef NVIDIA_UVM_DEVELOP
#define UVM_IS_DEVELOP() 1
#else
#define UVM_IS_DEVELOP() 0
#endif
#include "uvm_types.h"
#include "uvm_linux.h"
enum {
NVIDIA_UVM_PRIMARY_MINOR_NUMBER = 0,
NVIDIA_UVM_TOOLS_MINOR_NUMBER = 1,
// to ensure backward-compatiblity and correct counting, please insert any
// new minor devices just above the following field:
NVIDIA_UVM_NUM_MINOR_DEVICES
};
#define UVM_GPU_UUID_TEXT_BUFFER_LENGTH (8+16*2+4+1)
int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessorUuid *pGpuUuid);
#define UVM_PRINT_FUNC_PREFIX(func, prefix, fmt, ...) \
func(prefix "%s:%u %s[pid:%d]" fmt, \
kbasename(__FILE__), \
__LINE__, \
__FUNCTION__, \
current->pid, \
##__VA_ARGS__)
#define UVM_PRINT_FUNC(func, fmt, ...) \
UVM_PRINT_FUNC_PREFIX(func, "", fmt, ##__VA_ARGS__)
// Check whether UVM_{ERR,DBG,INFO)_PRINT* should be enabled
bool uvm_debug_prints_enabled(void);
// A printing helper like UVM_PRINT_FUNC_PREFIX that only prints if
// uvm_debug_prints_enabled() returns true.
#define UVM_PRINT_FUNC_PREFIX_CHECK(func, prefix, fmt, ...) \
do { \
if (uvm_debug_prints_enabled()) { \
UVM_PRINT_FUNC_PREFIX(func, prefix, fmt, ##__VA_ARGS__); \
} \
} while (0)
#define UVM_ASSERT_PRINT(fmt, ...) \
UVM_PRINT_FUNC_PREFIX(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_ERR_PRINT(fmt, ...) \
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_ERR_PRINT_RL(fmt, ...) \
UVM_PRINT_FUNC_PREFIX_CHECK(printk_ratelimited, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_DBG_PRINT(fmt, ...) \
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_DBG_PRINT_RL(fmt, ...) \
UVM_PRINT_FUNC_PREFIX_CHECK(printk_ratelimited, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_INFO_PRINT(fmt, ...) \
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
//
// Please see the documentation of format_uuid_to_buffer, for details on what
// this routine prints for you.
//
#define UVM_DBG_PRINT_UUID(msg, uuidPtr) \
do { \
char uuidBuffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH]; \
format_uuid_to_buffer(uuidBuffer, sizeof(uuidBuffer), uuidPtr); \
UVM_DBG_PRINT("%s: %s\n", msg, uuidBuffer); \
} while (0)
#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
UVM_ERR_PRINT("ERROR: %s : " msg "\n", nvstatusToString(rmStatus), ##__VA_ARGS__)
#define UVM_ERR_PRINT_UUID(msg, uuidPtr, ...) \
do { \
char uuidBuffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH]; \
format_uuid_to_buffer(uuidBuffer, sizeof(uuidBuffer), uuidPtr); \
UVM_ERR_PRINT("ERROR: %s : " msg "\n", uuidBuffer, ##__VA_ARGS__); \
} while (0)
#define UVM_PANIC() UVM_PRINT_FUNC(panic, "\n")
#define UVM_PANIC_MSG(fmt, ...) UVM_PRINT_FUNC(panic, ": " fmt, ##__VA_ARGS__)
#define UVM_PANIC_ON_MSG(cond, fmt, ...) \
do { \
if (unlikely(cond)) \
UVM_PANIC_MSG(fmt, ##__VA_ARGS__); \
} while (0)
#define UVM_PANIC_ON(cond) UVM_PANIC_ON_MSG(cond, "failed cond %s\n", #cond)
// expr may include function calls. Use sizeof to prevent it from being
// evaluated while also preventing unused variable warnings. sizeof() can't be
// used on a bitfield however, so use ! to force the expression to evaluate as
// an int.
#define UVM_IGNORE_EXPR(expr) ((void)sizeof(!(expr)))
#define UVM_IGNORE_EXPR2(expr1, expr2) \
do { \
UVM_IGNORE_EXPR(expr1); \
UVM_IGNORE_EXPR(expr2); \
} while (0)
// NO-OP function to break on_uvm_test_fail - that is just to set a breakpoint
void on_uvm_test_fail(void);
// NO-OP function to break on_uvm_assert - that is just to set a breakpoint
// Unlike on_uvm_test_fail it provides 'panic' coverity semantics
void on_uvm_assert(void);
// UVM_ASSERT_RELEASE and UVM_ASSERT_MSG_RELEASE are always enabled, even on
// release builds.
#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...) \
do { \
if (unlikely(!(expr))) { \
UVM_ASSERT_PRINT("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
dump_stack(); \
on_uvm_assert(); \
} \
} while (0)
#define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...) _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__)
#define UVM_ASSERT_RELEASE(expr) _UVM_ASSERT_MSG_RELEASE(expr, #expr, "\n")
// Prevent function calls in expr and the print argument list from being
// evaluated.
#define UVM_ASSERT_MSG_IGNORE(expr, fmt, ...) \
do { \
UVM_IGNORE_EXPR(expr); \
UVM_NO_PRINT(fmt, ##__VA_ARGS__); \
} while (0)
// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity builds
#if UVM_IS_DEBUG() || defined __COVERITY__
#define UVM_ASSERT_MSG UVM_ASSERT_MSG_RELEASE
#define UVM_ASSERT UVM_ASSERT_RELEASE
#else
#define UVM_ASSERT_MSG(expr, fmt, ...) UVM_ASSERT_MSG_IGNORE(expr, fmt, ##__VA_ARGS__)
#define UVM_ASSERT(expr) UVM_ASSERT_MSG_IGNORE(expr, "\n")
#endif
// Provide a short form of UUID's, typically for use in debug printing:
#define ABBREV_UUID(uuid) (unsigned)(uuid)
static inline NvBool uvm_uuid_is_cpu(const NvProcessorUuid *uuid)
{
return memcmp(uuid, &NV_PROCESSOR_UUID_CPU_DEFAULT, sizeof(*uuid)) == 0;
}
#define UVM_ALIGN_DOWN(x, a) ({ \
typeof(x) _a = a; \
UVM_ASSERT(is_power_of_2(_a)); \
(x) & ~(_a - 1); \
})
#define UVM_ALIGN_UP(x, a) ({ \
typeof(x) _a = a; \
UVM_ASSERT(is_power_of_2(_a)); \
((x) + _a - 1) & ~(_a - 1); \
})
#define UVM_PAGE_ALIGN_UP(value) UVM_ALIGN_UP(value, PAGE_SIZE)
#define UVM_PAGE_ALIGN_DOWN(value) UVM_ALIGN_DOWN(value, PAGE_SIZE)
// These macros provide a convenient way to string-ify enum values.
#define UVM_ENUM_STRING_CASE(value) case value: return #value
#define UVM_ENUM_STRING_DEFAULT() default: return "UNKNOWN"
// Divide by a dynamic value known at runtime to be a power of 2. ilog2 is
// optimized as a single instruction in many processors, whereas integer
// division is always slow.
static inline NvU32 uvm_div_pow2_32(NvU32 numerator, NvU32 denominator_pow2)
{
UVM_ASSERT(is_power_of_2(denominator_pow2));
UVM_ASSERT(denominator_pow2);
return numerator >> ilog2(denominator_pow2);
}
static inline NvU64 uvm_div_pow2_64(NvU64 numerator, NvU64 denominator_pow2)
{
UVM_ASSERT(is_power_of_2(denominator_pow2));
UVM_ASSERT(denominator_pow2);
return numerator >> ilog2(denominator_pow2);
}
#define SUM_FROM_0_TO_N(n) (((n) * ((n) + 1)) / 2)
// Start and end are inclusive
static inline NvBool uvm_ranges_overlap(NvU64 a_start, NvU64 a_end, NvU64 b_start, NvU64 b_end)
{
// De Morgan's of: !(a_end < b_start || b_end < a_start)
return a_end >= b_start && b_end >= a_start;
}
static int debug_mode(void)
{
#ifdef DEBUG
return 1;
#else
return 0;
#endif
}
static inline void kmem_cache_destroy_safe(struct kmem_cache **ppCache)
{
if (ppCache)
{
if (*ppCache)
kmem_cache_destroy(*ppCache);
*ppCache = NULL;
}
}
static const uid_t UVM_ROOT_UID = 0;
typedef struct
{
NvU64 start_time_ns;
NvU64 print_time_ns;
} uvm_spin_loop_t;
static inline void uvm_spin_loop_init(uvm_spin_loop_t *spin)
{
NvU64 curr = NV_GETTIME();
spin->start_time_ns = curr;
spin->print_time_ns = curr;
}
// Periodically yields the CPU when not called from interrupt context. Returns
// NV_ERR_TIMEOUT_RETRY if the caller should print a warning that we've been
// waiting too long, and NV_OK otherwise.
NV_STATUS uvm_spin_loop(uvm_spin_loop_t *spin);
static NvU64 uvm_spin_loop_elapsed(const uvm_spin_loop_t *spin)
{
NvU64 curr = NV_GETTIME();
return curr - spin->start_time_ns;
}
#define UVM_SPIN_LOOP(__spin) ({ \
NV_STATUS __status = uvm_spin_loop(__spin); \
if (__status == NV_ERR_TIMEOUT_RETRY) { \
UVM_DBG_PRINT("Warning: stuck waiting for %llus\n", \
uvm_spin_loop_elapsed(__spin) / (1000*1000*1000)); \
\
if (uvm_debug_prints_enabled()) \
dump_stack(); \
} \
__status; \
})
// Execute the loop code while cond is true. Invokes uvm_spin_loop_iter at the
// end of each iteration.
#define UVM_SPIN_WHILE(cond, spin) \
if (cond) \
for (uvm_spin_loop_init(spin); (cond); UVM_SPIN_LOOP(spin))
//
// Documentation for the internal routines listed below may be found in the
// implementation file(s).
//
NV_STATUS errno_to_nv_status(int errnoCode);
int nv_status_to_errno(NV_STATUS status);
unsigned uvm_get_stale_process_id(void);
unsigned uvm_get_stale_thread_id(void);
NvBool uvm_user_id_security_check(uid_t euidTarget);
extern int uvm_enable_builtin_tests;
static inline void uvm_init_character_device(struct cdev *cdev, const struct file_operations *fops)
{
cdev_init(cdev, fops);
cdev->owner = THIS_MODULE;
}
typedef struct
{
int rm_control_fd;
NvHandle user_client;
NvHandle user_object;
} uvm_rm_user_object_t;
// Macro used to compare two values for types that support less than operator.
// It returns -1 if a < b, 1 if a > b and 0 if a == 0
#define UVM_CMP_DEFAULT(a,b) \
({ \
typeof(a) _a = a; \
typeof(b) _b = b; \
int __ret; \
BUILD_BUG_ON(sizeof(a) != sizeof(b)); \
if (_a < _b) \
__ret = -1; \
else if (_b < _a) \
__ret = 1; \
else \
__ret = 0; \
\
__ret; \
})
// Returns whether the input file was opened against the UVM character device
// file. A NULL input returns false.
bool uvm_file_is_nvidia_uvm(struct file *filp);
// Reads the first word in the supplied struct page.
static inline void uvm_touch_page(struct page *page)
{
char *mapping;
UVM_ASSERT(page);
mapping = (char *) kmap(page);
(void)UVM_READ_ONCE(*mapping);
kunmap(page);
}
#endif /* _UVM_COMMON_H */

View File

@@ -0,0 +1,53 @@
/*******************************************************************************
Copyright (c) 2015 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
// This file provides simple wrappers that are always built with optimizations
// turned on to WAR issues with functions that don't build correctly otherwise.
#include "uvm_linux.h"
int nv_atomic_xchg(atomic_t *val, int new)
{
return atomic_xchg(val, new);
}
int nv_atomic_cmpxchg(atomic_t *val, int old, int new)
{
return atomic_cmpxchg(val, old, new);
}
long nv_atomic_long_cmpxchg(atomic_long_t *val, long old, long new)
{
return atomic_long_cmpxchg(val, old, new);
}
unsigned long nv_copy_from_user(void *to, const void __user *from, unsigned long n)
{
return copy_from_user(to, from, n);
}
unsigned long nv_copy_to_user(void __user *to, const void *from, unsigned long n)
{
return copy_to_user(to, from, n);
}

View File

@@ -0,0 +1,38 @@
/*******************************************************************************
Copyright (c) 2019 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_EXTERN_DECL_H__
#define __UVM_EXTERN_DECL_H__
#include "uvm_linux.h"
#include "uvm_forward_decl.h"
extern int uvm_enable_debug_procfs;
extern unsigned uvm_perf_map_remote_on_native_atomics_fault;
extern uvm_global_t g_uvm_global;
extern bool uvm_global_is_suspended(void);
#endif //__UVM_EXTERN_DECL_H__

View File

@@ -0,0 +1,69 @@
/*******************************************************************************
Copyright (c) 2016-2019 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_common.h"
#include "uvm_linux.h"
#include "uvm_global.h"
#include "uvm_gpu.h"
#include "uvm_gpu_replayable_faults.h"
#include "uvm_test.h"
#include "uvm_va_space.h"
NV_STATUS uvm_test_fault_buffer_flush(UVM_TEST_FAULT_BUFFER_FLUSH_PARAMS *params, struct file *filp)
{
NV_STATUS status = NV_OK;
uvm_va_space_t *va_space = uvm_va_space_get(filp);
uvm_gpu_t *gpu;
uvm_global_processor_mask_t retained_gpus;
NvU64 i;
uvm_global_processor_mask_zero(&retained_gpus);
uvm_va_space_down_read(va_space);
for_each_va_space_gpu(gpu, va_space) {
if (gpu->parent->replayable_faults_supported)
uvm_global_processor_mask_set(&retained_gpus, gpu->global_id);
}
uvm_global_mask_retain(&retained_gpus);
uvm_va_space_up_read(va_space);
if (uvm_global_processor_mask_empty(&retained_gpus))
return NV_ERR_INVALID_DEVICE;
for (i = 0; i < params->iterations; i++) {
if (fatal_signal_pending(current)) {
status = NV_ERR_SIGNAL_PENDING;
break;
}
for_each_global_gpu_in_mask(gpu, &retained_gpus)
TEST_CHECK_GOTO(uvm_gpu_fault_buffer_flush(gpu) == NV_OK, out);
}
out:
uvm_global_mask_release(&retained_gpus);
return status;
}

View File

@@ -0,0 +1,98 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_FORWARD_DECL_H__
#define __UVM_FORWARD_DECL_H__
typedef struct uvm_global_struct uvm_global_t;
typedef struct uvm_gpu_struct uvm_gpu_t;
typedef struct uvm_parent_gpu_struct uvm_parent_gpu_t;
typedef struct uvm_rm_mem_struct uvm_rm_mem_t;
typedef struct uvm_mem_struct uvm_mem_t;
typedef struct uvm_host_hal_struct uvm_host_hal_t;
typedef struct uvm_ce_hal_struct uvm_ce_hal_t;
typedef struct uvm_arch_hal_struct uvm_arch_hal_t;
typedef struct uvm_fault_buffer_hal_struct uvm_fault_buffer_hal_t;
typedef struct uvm_access_counter_buffer_hal_struct uvm_access_counter_buffer_hal_t;
typedef struct uvm_gpu_semaphore_struct uvm_gpu_semaphore_t;
typedef struct uvm_gpu_tracking_semaphore_struct uvm_gpu_tracking_semaphore_t;
typedef struct uvm_gpu_semaphore_pool_struct uvm_gpu_semaphore_pool_t;
typedef struct uvm_gpu_semaphore_pool_page_struct uvm_gpu_semaphore_pool_page_t;
typedef struct uvm_gpu_peer_struct uvm_gpu_peer_t;
typedef struct uvm_mmu_mode_hal_struct uvm_mmu_mode_hal_t;
typedef struct uvm_channel_manager_struct uvm_channel_manager_t;
typedef struct uvm_channel_struct uvm_channel_t;
typedef struct uvm_user_channel_struct uvm_user_channel_t;
typedef struct uvm_push_struct uvm_push_t;
typedef struct uvm_push_info_struct uvm_push_info_t;
typedef struct uvm_push_acquire_info_struct uvm_push_acquire_info_t;
typedef struct uvm_pushbuffer_struct uvm_pushbuffer_t;
typedef struct uvm_gpfifo_entry_struct uvm_gpfifo_entry_t;
typedef struct uvm_va_policy_struct uvm_va_policy_t;
typedef struct uvm_va_range_struct uvm_va_range_t;
typedef struct uvm_va_block_struct uvm_va_block_t;
typedef struct uvm_va_block_test_struct uvm_va_block_test_t;
typedef struct uvm_va_block_wrapper_struct uvm_va_block_wrapper_t;
typedef struct uvm_va_space_struct uvm_va_space_t;
typedef struct uvm_va_space_mm_struct uvm_va_space_mm_t;
typedef struct uvm_make_resident_context_struct uvm_make_resident_context_t;
typedef struct uvm_gpu_va_space_struct uvm_gpu_va_space_t;
typedef struct uvm_thread_context_lock_struct uvm_thread_context_lock_t;
typedef struct uvm_thread_context_struct uvm_thread_context_t;
typedef struct uvm_thread_context_wrapper_struct uvm_thread_context_wrapper_t;
typedef struct uvm_perf_module_struct uvm_perf_module_t;
typedef struct uvm_page_table_range_vec_struct uvm_page_table_range_vec_t;
typedef struct uvm_page_table_range_struct uvm_page_table_range_t;
typedef struct uvm_page_tree_struct uvm_page_tree_t;
typedef struct uvm_fault_buffer_entry_struct uvm_fault_buffer_entry_t;
typedef struct uvm_pte_batch_struct uvm_pte_batch_t;
typedef struct uvm_tlb_batch_struct uvm_tlb_batch_t;
typedef struct uvm_fault_service_batch_context_struct uvm_fault_service_batch_context_t;
typedef struct uvm_service_block_context_struct uvm_service_block_context_t;
typedef struct uvm_ats_fault_invalidate_struct uvm_ats_fault_invalidate_t;
typedef struct uvm_replayable_fault_buffer_info_struct uvm_replayable_fault_buffer_info_t;
typedef struct uvm_non_replayable_fault_buffer_info_struct uvm_non_replayable_fault_buffer_info_t;
typedef struct uvm_access_counter_buffer_entry_struct uvm_access_counter_buffer_entry_t;
typedef struct uvm_access_counter_service_batch_context_struct uvm_access_counter_service_batch_context_t;
typedef struct uvm_pmm_sysmem_mappings_struct uvm_pmm_sysmem_mappings_t;
typedef struct uvm_reverse_map_struct uvm_reverse_map_t;
typedef struct uvm_ibm_npu_struct uvm_ibm_npu_t;
#endif //__UVM_FORWARD_DECL_H__

View File

@@ -0,0 +1,352 @@
/*******************************************************************************
Copyright (c) 2016-2021 NVidia Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_test.h"
#include "uvm_test_ioctl.h"
#include "uvm_global.h"
#include "uvm_gpu.h"
#include "uvm_hal.h"
#include "uvm_va_space.h"
#include "uvm_mmu.h"
#include "nv_uvm_types.h"
#include "nv_uvm_interface.h"
#include "uvm_common.h"
#define get_rm_ptes(offset, size, ext_map_info) \
uvm_rm_locked_call( \
nvUvmInterfaceGetExternalAllocPtes(gpu_va_space, \
duped_memory, \
offset, \
size, \
ext_map_info))
static uvm_aperture_t get_aperture(uvm_va_space_t *va_space,
uvm_gpu_t *memory_owning_gpu,
uvm_gpu_t *memory_mapping_gpu,
UvmGpuMemoryInfo *memory_info,
bool sli_supported)
{
if (memory_info->sysmem) {
return UVM_APERTURE_SYS;
}
else {
if (memory_mapping_gpu != memory_owning_gpu && !sli_supported)
return uvm_gpu_peer_aperture(memory_mapping_gpu, memory_owning_gpu);
return UVM_APERTURE_VID;
}
}
static bool is_cacheable(UvmGpuExternalMappingInfo *ext_mapping_info, uvm_aperture_t aperture)
{
if (ext_mapping_info->cachingType == UvmRmGpuCachingTypeForceCached)
return true;
else if (ext_mapping_info->cachingType == UvmRmGpuCachingTypeForceUncached)
return false;
else if (aperture == UVM_APERTURE_VID)
return true;
return false;
}
static NvU32 get_protection(UvmGpuExternalMappingInfo *ext_mapping_info)
{
if (ext_mapping_info->mappingType == UvmRmGpuMappingTypeReadWriteAtomic ||
ext_mapping_info->mappingType == UvmRmGpuMappingTypeDefault)
return UVM_PROT_READ_WRITE_ATOMIC;
else if (ext_mapping_info->mappingType == UvmRmGpuMappingTypeReadWrite)
return UVM_PROT_READ_WRITE;
else
return UVM_PROT_READ_ONLY;
}
static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
uvm_gpu_t *memory_mapping_gpu,
NvU64 mapping_offset,
NvU64 mapping_size,
UvmGpuExternalMappingInfo *ext_mapping_info,
UvmGpuMemoryInfo *memory_info,
bool sli_supported)
{
NvU32 index = 0, total_pte_count = 0, skip = 0, page_size = 0;
uvm_aperture_t aperture = 0;
NvU32 prot;
NvU64 phys_offset, pte;
uvm_mmu_mode_hal_t *hal;
NvU64 pte_flags = UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED;
uvm_gpu_t *memory_owning_gpu = NULL;
TEST_CHECK_RET(memory_info->contig);
hal = uvm_gpu_va_space_get(va_space, memory_mapping_gpu)->page_tables.hal;
page_size = memory_info->pageSize;
// Verify that make_pte supports this page size
TEST_CHECK_RET(page_size & hal->page_sizes());
total_pte_count = mapping_size ? (mapping_size / page_size) : (memory_info->size / page_size);
TEST_CHECK_RET(total_pte_count);
TEST_CHECK_RET(ext_mapping_info->numWrittenPtes <= total_pte_count);
TEST_CHECK_RET(ext_mapping_info->numRemainingPtes == (total_pte_count - ext_mapping_info->numWrittenPtes));
skip = ext_mapping_info->pteSize / sizeof(NvU64);
TEST_CHECK_RET(skip);
memory_owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &memory_info->uuid);
if (memory_owning_gpu == NULL)
return NV_ERR_INVALID_DEVICE;
// TODO: Bug 1903234: Once RM supports indirect peer mappings, we'll need to
// update this test since the aperture will be SYS. Depending on how
// RM implements things, we might not be able to compare the physical
// addresses either.
aperture = get_aperture(va_space, memory_owning_gpu, memory_mapping_gpu, memory_info, sli_supported);
if (is_cacheable(ext_mapping_info, aperture))
pte_flags |= UVM_MMU_PTE_FLAGS_CACHED;
prot = get_protection(ext_mapping_info);
phys_offset = mapping_offset;
// Add the physical offset for nvswitch connected peer mappings
if (uvm_aperture_is_peer(aperture) && uvm_gpus_are_nvswitch_connected(memory_mapping_gpu, memory_owning_gpu))
phys_offset += memory_owning_gpu->parent->nvswitch_info.fabric_memory_window_start;
for (index = 0; index < ext_mapping_info->numWrittenPtes; index++) {
pte = hal->make_pte(aperture,
memory_info->physAddr + phys_offset,
prot,
pte_flags);
TEST_CHECK_RET(pte == ext_mapping_info->pteBuffer[index * skip]);
phys_offset += page_size;
}
return NV_OK;
}
static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_GET_RM_PTES_PARAMS *params)
{
NV_STATUS status = NV_OK;
NV_STATUS free_status;
uvm_gpu_t *memory_mapping_gpu;
NvHandle duped_memory;
UvmGpuExternalMappingInfo ext_mapping_info;
UvmGpuMemoryInfo memory_info;
NvU64 pte_buffer[16] = {0};
NvU32 size = 0;
uvmGpuAddressSpaceHandle gpu_va_space;
uvmGpuDeviceHandle rm_device;
NvHandle client, memory;
client = params->hClient;
memory = params->hMemory;
// Note: This check is safe as single GPU test does not run on SLI enabled devices.
memory_mapping_gpu = uvm_va_space_get_gpu_by_uuid_with_gpu_va_space(va_space, &params->gpu_uuid);
if (!memory_mapping_gpu)
return NV_ERR_INVALID_DEVICE;
gpu_va_space = memory_mapping_gpu->rm_address_space;
rm_device = uvm_gpu_device_handle(memory_mapping_gpu);
status = uvm_rm_locked_call(nvUvmInterfaceDupMemory(rm_device, client, memory, &duped_memory, &memory_info));
if (status != NV_OK)
return status;
TEST_CHECK_GOTO(uvm_processor_uuid_eq(&memory_info.uuid, &params->gpu_uuid), done);
TEST_CHECK_GOTO((memory_info.size == params->size), done);
size = params->size;
memset(&ext_mapping_info, 0, sizeof(ext_mapping_info));
ext_mapping_info.pteBuffer = pte_buffer;
ext_mapping_info.pteBufferSize = 1;
TEST_CHECK_GOTO((get_rm_ptes(size + 1, 0, &ext_mapping_info) == NV_ERR_INVALID_BASE), done);
TEST_CHECK_GOTO((get_rm_ptes(0, size + 1, &ext_mapping_info) == NV_ERR_INVALID_LIMIT), done);
TEST_CHECK_GOTO((get_rm_ptes(1, 0, &ext_mapping_info) == NV_ERR_INVALID_ARGUMENT), done);
TEST_CHECK_GOTO((get_rm_ptes(0, size - 1, &ext_mapping_info) == NV_ERR_INVALID_ARGUMENT), done);
TEST_CHECK_GOTO((get_rm_ptes(0, 0, &ext_mapping_info) == NV_ERR_BUFFER_TOO_SMALL), done);
ext_mapping_info.pteBufferSize = sizeof(pte_buffer);
TEST_CHECK_GOTO(get_rm_ptes(0, 0, &ext_mapping_info) == NV_OK, done);
TEST_CHECK_GOTO(verify_mapping_info(va_space,
memory_mapping_gpu,
0,
0,
&ext_mapping_info,
&memory_info,
false) == NV_OK, done);
TEST_CHECK_GOTO(get_rm_ptes(memory_info.pageSize, 0, &ext_mapping_info) == NV_OK, done);
TEST_CHECK_GOTO(verify_mapping_info(va_space,
memory_mapping_gpu,
memory_info.pageSize,
0,
&ext_mapping_info,
&memory_info,
false) == NV_OK, done);
TEST_CHECK_GOTO(get_rm_ptes(0, size - memory_info.pageSize, &ext_mapping_info) == NV_OK, done);
TEST_CHECK_GOTO(verify_mapping_info(va_space,
memory_mapping_gpu,
0,
size - memory_info.pageSize,
&ext_mapping_info,
&memory_info,
false) == NV_OK, done);
ext_mapping_info.mappingType = UvmRmGpuMappingTypeReadWrite;
ext_mapping_info.cachingType = UvmRmGpuCachingTypeForceCached;
TEST_CHECK_GOTO(get_rm_ptes(memory_info.pageSize, size - memory_info.pageSize, &ext_mapping_info) == NV_OK, done);
TEST_CHECK_GOTO(verify_mapping_info(va_space,
memory_mapping_gpu,
memory_info.pageSize,
size - memory_info.pageSize,
&ext_mapping_info,
&memory_info,
false) == NV_OK, done);
ext_mapping_info.mappingType = UvmRmGpuMappingTypeReadOnly;
ext_mapping_info.cachingType = UvmRmGpuCachingTypeForceUncached;
TEST_CHECK_GOTO(get_rm_ptes(size - memory_info.pageSize, memory_info.pageSize, &ext_mapping_info) == NV_OK, done);
TEST_CHECK_GOTO(verify_mapping_info(va_space,
memory_mapping_gpu,
size - memory_info.pageSize,
memory_info.pageSize,
&ext_mapping_info,
&memory_info,
false) == NV_OK, done);
done:
free_status = uvm_rm_locked_call(nvUvmInterfaceFreeDupedHandle(rm_device, duped_memory));
if (status == NV_OK)
status = free_status;
return status;
}
static NV_STATUS test_get_rm_ptes_multi_gpu(uvm_va_space_t *va_space, UVM_TEST_GET_RM_PTES_PARAMS *params)
{
NV_STATUS status = NV_OK;
NV_STATUS free_status;
uvm_gpu_t *memory_mapping_gpu;
NvHandle duped_memory;
UvmGpuExternalMappingInfo ext_mapping_info;
UvmGpuMemoryInfo memory_info;
uvmGpuDeviceHandle rm_device;
NvU64 pte_buffer[16] = {0};
uvmGpuAddressSpaceHandle gpu_va_space;
memory_mapping_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &params->gpu_uuid);
if (!memory_mapping_gpu)
return NV_ERR_INVALID_DEVICE;
gpu_va_space = memory_mapping_gpu->rm_address_space;
rm_device = uvm_gpu_device_handle(memory_mapping_gpu);
status = uvm_rm_locked_call(nvUvmInterfaceDupMemory(rm_device,
params->hClient,
params->hMemory,
&duped_memory,
&memory_info));
if (status != NV_OK)
return status;
memset(&ext_mapping_info, 0, sizeof(ext_mapping_info));
memset(pte_buffer, 0, sizeof(pte_buffer));
ext_mapping_info.pteBuffer = pte_buffer;
ext_mapping_info.pteBufferSize = sizeof(pte_buffer);
switch (params->test_mode) {
case UVM_TEST_GET_RM_PTES_MULTI_GPU_SUPPORTED:
case UVM_TEST_GET_RM_PTES_MULTI_GPU_SLI_SUPPORTED:
TEST_CHECK_GOTO(get_rm_ptes(0, 0, &ext_mapping_info) == NV_OK, done);
TEST_CHECK_GOTO(verify_mapping_info(va_space,
memory_mapping_gpu,
0,
0,
&ext_mapping_info,
&memory_info,
(params->test_mode ==
UVM_TEST_GET_RM_PTES_MULTI_GPU_SLI_SUPPORTED)) == NV_OK, done);
break;
case UVM_TEST_GET_RM_PTES_MULTI_GPU_NOT_SUPPORTED:
TEST_CHECK_GOTO(get_rm_ptes(0, 0, &ext_mapping_info) == NV_ERR_NOT_SUPPORTED, done);
break;
default:
status = NV_ERR_INVALID_PARAMETER;
}
done:
free_status = uvm_rm_locked_call(nvUvmInterfaceFreeDupedHandle(rm_device, duped_memory));
if (status == NV_OK)
status = free_status;
return status;
}
NV_STATUS uvm_test_get_rm_ptes(UVM_TEST_GET_RM_PTES_PARAMS *params, struct file *filp)
{
NV_STATUS status;
uvm_va_space_t *va_space = uvm_va_space_get(filp);
uvm_va_space_down_read_rm(va_space);
switch (params->test_mode) {
case UVM_TEST_GET_RM_PTES_SINGLE_GPU:
status = test_get_rm_ptes_single_gpu(va_space, params);
break;
case UVM_TEST_GET_RM_PTES_MULTI_GPU_SUPPORTED:
case UVM_TEST_GET_RM_PTES_MULTI_GPU_SLI_SUPPORTED:
case UVM_TEST_GET_RM_PTES_MULTI_GPU_NOT_SUPPORTED:
status = test_get_rm_ptes_multi_gpu(va_space, params);
break;
default:
status = NV_ERR_INVALID_PARAMETER;
}
uvm_va_space_up_read_rm(va_space);
return status;
}

View File

@@ -0,0 +1,473 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_api.h"
#include "uvm_ats.h"
#include "uvm_global.h"
#include "uvm_gpu_replayable_faults.h"
#include "uvm_mem.h"
#include "uvm_perf_events.h"
#include "uvm_procfs.h"
#include "uvm_thread_context.h"
#include "uvm_va_range.h"
#include "uvm_kvmalloc.h"
#include "uvm_mmu.h"
#include "uvm_perf_heuristics.h"
#include "uvm_pmm_sysmem.h"
#include "uvm_migrate.h"
#include "uvm_gpu_access_counters.h"
#include "uvm_va_space_mm.h"
#include "nv_uvm_interface.h"
uvm_global_t g_uvm_global;
static struct UvmOpsUvmEvents g_exported_uvm_ops;
static bool g_ops_registered = false;
static NV_STATUS uvm_register_callbacks(void)
{
NV_STATUS status = NV_OK;
g_exported_uvm_ops.suspend = uvm_suspend_entry;
g_exported_uvm_ops.resume = uvm_resume_entry;
g_exported_uvm_ops.startDevice = NULL;
g_exported_uvm_ops.stopDevice = NULL;
g_exported_uvm_ops.isrTopHalf = uvm_isr_top_half_entry;
// Register the UVM callbacks with the main GPU driver:
status = uvm_rm_locked_call(nvUvmInterfaceRegisterUvmCallbacks(&g_exported_uvm_ops));
if (status != NV_OK)
return status;
g_ops_registered = true;
return NV_OK;
}
// Calling this function more than once is harmless:
static void uvm_unregister_callbacks(void)
{
if (g_ops_registered) {
uvm_rm_locked_call_void(nvUvmInterfaceDeRegisterUvmOps());
g_ops_registered = false;
}
}
NV_STATUS uvm_global_init(void)
{
NV_STATUS status;
UvmPlatformInfo platform_info;
// Initialization of thread contexts happened already, during registration
// (addition) of the thread context associated with the UVM module entry
// point that is calling this function.
UVM_ASSERT(uvm_thread_context_global_initialized());
uvm_mutex_init(&g_uvm_global.global_lock, UVM_LOCK_ORDER_GLOBAL);
uvm_init_rwsem(&g_uvm_global.pm.lock, UVM_LOCK_ORDER_GLOBAL_PM);
uvm_spin_lock_irqsave_init(&g_uvm_global.gpu_table_lock, UVM_LOCK_ORDER_LEAF);
uvm_mutex_init(&g_uvm_global.va_spaces.lock, UVM_LOCK_ORDER_VA_SPACES_LIST);
INIT_LIST_HEAD(&g_uvm_global.va_spaces.list);
status = uvm_kvmalloc_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_kvmalloc_init() failed: %s\n", nvstatusToString(status));
goto error;
}
status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.global_q, "UVM global queue"));
if (status != NV_OK) {
UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
goto error;
}
status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.deferred_release_q, "UVM deferred release queue"));
if (status != NV_OK) {
UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
goto error;
}
status = uvm_procfs_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_procfs_init() failed: %s\n", nvstatusToString(status));
goto error;
}
status = uvm_rm_locked_call(nvUvmInterfaceSessionCreate(&g_uvm_global.rm_session_handle, &platform_info));
if (status != NV_OK) {
UVM_ERR_PRINT("nvUvmInterfaceSessionCreate() failed: %s\n", nvstatusToString(status));
return status;
}
uvm_ats_init(&platform_info);
g_uvm_global.num_simulated_devices = 0;
status = uvm_gpu_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_gpu_init() failed: %s\n", nvstatusToString(status));
goto error;
}
status = uvm_pmm_sysmem_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_pmm_sysmem_init() failed: %s\n", nvstatusToString(status));
goto error;
}
status = uvm_mmu_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_mmu_init() failed: %s\n", nvstatusToString(status));
goto error;
}
status = uvm_mem_global_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_mem_gloal_init() failed: %s\n", nvstatusToString(status));
goto error;
}
status = uvm_va_policy_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_va_policy_init() failed: %s\n", nvstatusToString(status));
goto error;
}
status = uvm_va_range_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_va_range_init() failed: %s\n", nvstatusToString(status));
goto error;
}
status = uvm_range_group_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_range_group_init() failed: %s\n", nvstatusToString(status));
goto error;
}
status = uvm_migrate_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_migrate_init() failed: %s\n", nvstatusToString(status));
goto error;
}
status = uvm_perf_events_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_perf_events_init() failed: %s\n", nvstatusToString(status));
goto error;
}
status = uvm_perf_heuristics_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_perf_heuristics_init() failed: %s\n", nvstatusToString(status));
goto error;
}
status = uvm_service_block_context_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_service_block_context_init failed: %s\n", nvstatusToString(status));
goto error;
}
// This sets up the ISR (interrupt service routine), by hooking into RM's top-half ISR callback. As soon as this
// call completes, GPU interrupts will start arriving, so it's important to be prepared to receive interrupts before
// this point:
status = uvm_register_callbacks();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_register_callbacks failed: %s\n", nvstatusToString(status));
goto error;
}
return NV_OK;
error:
uvm_global_exit();
return status;
}
void uvm_global_exit(void)
{
uvm_assert_mutex_unlocked(&g_uvm_global.global_lock);
// Guarantee completion of any release callbacks scheduled after the flush
// in uvm_resume().
nv_kthread_q_flush(&g_uvm_global.deferred_release_q);
uvm_unregister_callbacks();
uvm_service_block_context_exit();
uvm_perf_heuristics_exit();
uvm_perf_events_exit();
uvm_migrate_exit();
uvm_range_group_exit();
uvm_va_range_exit();
uvm_va_policy_exit();
uvm_mem_global_exit();
uvm_pmm_sysmem_exit();
uvm_gpu_exit();
if (g_uvm_global.rm_session_handle != 0)
uvm_rm_locked_call_void(nvUvmInterfaceSessionDestroy(g_uvm_global.rm_session_handle));
uvm_procfs_exit();
nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
nv_kthread_q_stop(&g_uvm_global.global_q);
uvm_assert_mutex_unlocked(&g_uvm_global.va_spaces.lock);
UVM_ASSERT(list_empty(&g_uvm_global.va_spaces.list));
uvm_thread_context_global_exit();
uvm_kvmalloc_exit();
}
// Signal to the top-half ISR whether calls from the RM's top-half ISR are to
// be completed without processing.
static void uvm_gpu_set_isr_suspended(uvm_gpu_t *gpu, bool is_suspended)
{
uvm_spin_lock_irqsave(&gpu->parent->isr.interrupts_lock);
gpu->parent->isr.is_suspended = is_suspended;
uvm_spin_unlock_irqrestore(&gpu->parent->isr.interrupts_lock);
}
static NV_STATUS uvm_suspend(void)
{
uvm_va_space_t *va_space = NULL;
uvm_global_gpu_id_t gpu_id;
uvm_gpu_t *gpu;
// Upon entry into this function, the following is true:
// * GPU interrupts are enabled
// * Any number of fault or access counter notifications could
// be pending
// * No new fault notifications will appear, but new access
// counter notifications could
// * Any of the bottom halves could be running
// * New bottom halves of all types could be scheduled as GPU
// interrupts are handled
// Due to this, the sequence of suspend operations for each GPU is the
// following:
// * Flush the fault buffer to prevent fault interrupts when
// the top-half ISR is suspended
// * Suspend access counter processing
// * Suspend the top-half ISR
// * Flush relevant kthread queues (bottom half, etc.)
// Some locks acquired by this function, such as pm.lock, are released
// by uvm_resume(). This is contrary to the lock tracking code's
// expectations, so lock tracking is disabled.
uvm_thread_context_lock_disable_tracking();
// Take the global power management lock in write mode to lock out
// most user-facing entry points.
uvm_down_write(&g_uvm_global.pm.lock);
nv_kthread_q_flush(&g_uvm_global.global_q);
// Though global_lock isn't held here, pm.lock indirectly prevents the
// addition and removal of GPUs, since these operations can currently
// only occur in response to ioctl() calls.
for_each_global_gpu_id_in_mask(gpu_id, &g_uvm_global.retained_gpus) {
gpu = uvm_gpu_get(gpu_id);
// Since fault buffer state may be lost across sleep cycles, UVM must
// ensure any outstanding replayable faults are dismissed. The RM
// guarantees that all user channels have been preempted before
// uvm_suspend() is called, which implies that no user channels can be
// stalled on faults when this point is reached.
if (gpu->parent->replayable_faults_supported)
uvm_gpu_fault_buffer_flush(gpu);
// TODO: Bug 2535118: flush the non-replayable fault buffer
// Stop access counter interrupt processing for the duration of this
// sleep cycle to defend against potential interrupt storms in
// the suspend path: if rate limiting is applied to access counter
// interrupts in the bottom half in the future, the bottom half flush
// below will no longer be able to guarantee that all outstanding
// notifications have been handled.
uvm_gpu_access_counters_set_ignore(gpu, true);
uvm_gpu_set_isr_suspended(gpu, true);
nv_kthread_q_flush(&gpu->parent->isr.bottom_half_q);
if (gpu->parent->isr.non_replayable_faults.handling)
nv_kthread_q_flush(&gpu->parent->isr.kill_channel_q);
}
// Acquire each VA space's lock in write mode to lock out VMA open and
// release callbacks. These entry points do not have feasible early exit
// options, and so aren't suitable for synchronization with pm.lock.
uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
list_for_each_entry(va_space, &g_uvm_global.va_spaces.list, list_node)
uvm_va_space_down_write(va_space);
uvm_mutex_unlock(&g_uvm_global.va_spaces.lock);
uvm_thread_context_lock_enable_tracking();
g_uvm_global.pm.is_suspended = true;
return NV_OK;
}
NV_STATUS uvm_suspend_entry(void)
{
UVM_ENTRY_RET(uvm_suspend());
}
static NV_STATUS uvm_resume(void)
{
uvm_va_space_t *va_space = NULL;
uvm_global_gpu_id_t gpu_id;
uvm_gpu_t *gpu;
g_uvm_global.pm.is_suspended = false;
// Some locks released by this function, such as pm.lock, were acquired
// by uvm_suspend(). This is contrary to the lock tracking code's
// expectations, so lock tracking is disabled.
uvm_thread_context_lock_disable_tracking();
// Release each VA space's lock.
uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
list_for_each_entry(va_space, &g_uvm_global.va_spaces.list, list_node)
uvm_va_space_up_write(va_space);
uvm_mutex_unlock(&g_uvm_global.va_spaces.lock);
// pm.lock is held in lieu of global_lock to prevent GPU addition/removal
for_each_global_gpu_id_in_mask(gpu_id, &g_uvm_global.retained_gpus) {
gpu = uvm_gpu_get(gpu_id);
// Bring the fault buffer software state back in sync with the
// hardware state.
uvm_gpu_fault_buffer_resume(gpu->parent);
uvm_gpu_set_isr_suspended(gpu, false);
// Reenable access counter interrupt processing unless notifications
// have been set to be suppressed.
uvm_gpu_access_counters_set_ignore(gpu, false);
}
uvm_up_write(&g_uvm_global.pm.lock);
uvm_thread_context_lock_enable_tracking();
// Force completion of any release callbacks successfully queued for
// deferred completion while suspended. The deferred release
// queue is not guaranteed to remain empty following this flush since
// some threads that failed to acquire pm.lock in uvm_release() may
// not have scheduled their handlers yet.
nv_kthread_q_flush(&g_uvm_global.deferred_release_q);
return NV_OK;
}
NV_STATUS uvm_resume_entry(void)
{
UVM_ENTRY_RET(uvm_resume());
}
bool uvm_global_is_suspended(void)
{
return g_uvm_global.pm.is_suspended;
}
void uvm_global_set_fatal_error_impl(NV_STATUS error)
{
NV_STATUS previous_error;
UVM_ASSERT(error != NV_OK);
previous_error = nv_atomic_cmpxchg(&g_uvm_global.fatal_error, NV_OK, error);
if (previous_error == NV_OK) {
UVM_ERR_PRINT("Encountered a global fatal error: %s\n", nvstatusToString(error));
}
else {
UVM_ERR_PRINT("Encountered a global fatal error: %s after a global error has been already set: %s\n",
nvstatusToString(error), nvstatusToString(previous_error));
}
}
NV_STATUS uvm_global_reset_fatal_error(void)
{
if (!uvm_enable_builtin_tests) {
UVM_ASSERT_MSG(0, "Resetting global fatal error without tests being enabled\n");
return NV_ERR_INVALID_STATE;
}
return nv_atomic_xchg(&g_uvm_global.fatal_error, NV_OK);
}
void uvm_global_mask_retain(const uvm_global_processor_mask_t *mask)
{
uvm_gpu_t *gpu;
for_each_global_gpu_in_mask(gpu, mask)
uvm_gpu_retain(gpu);
}
void uvm_global_mask_release(const uvm_global_processor_mask_t *mask)
{
uvm_global_gpu_id_t gpu_id;
if (uvm_global_processor_mask_empty(mask))
return;
uvm_mutex_lock(&g_uvm_global.global_lock);
// Do not use for_each_global_gpu_in_mask as it reads the GPU state and it
// might get destroyed
for_each_global_gpu_id_in_mask(gpu_id, mask)
uvm_gpu_release_locked(uvm_gpu_get(gpu_id));
uvm_mutex_unlock(&g_uvm_global.global_lock);
}
NV_STATUS uvm_global_mask_check_ecc_error(uvm_global_processor_mask_t *gpus)
{
uvm_gpu_t *gpu;
for_each_global_gpu_in_mask(gpu, gpus) {
NV_STATUS status = uvm_gpu_check_ecc_error(gpu);
if (status != NV_OK)
return status;
}
return NV_OK;
}

View File

@@ -0,0 +1,416 @@
/*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_GLOBAL_H__
#define __UVM_GLOBAL_H__
#include "nv_uvm_types.h"
#include "uvm_extern_decl.h"
#include "uvm_linux.h"
#include "uvm_common.h"
#include "uvm_processors.h"
#include "uvm_gpu.h"
#include "uvm_lock.h"
#include "uvm_ats_ibm.h"
// Global state of the uvm driver
struct uvm_global_struct
{
// Mask of retained GPUs.
// Note that GPUs are added to this mask as the last step of add_gpu() and
// removed from it as the first step of remove_gpu() implying that a GPU
// that's being initialized or deinitialized will not be in it.
uvm_global_processor_mask_t retained_gpus;
// Array of the parent GPUs registered with UVM. Note that GPUs will have
// ids offset by 1 to accomodate the UVM_GLOBAL_ID_CPU so e.g.
// parent_gpus[0] will have GPU id = 1. A GPU entry is unused iff it does
// not exist (is a NULL pointer) in this table.
uvm_parent_gpu_t *parent_gpus[UVM_MAX_GPUS];
// A global RM session (RM client)
// Created on module load and destroyed on module unload
uvmGpuSessionHandle rm_session_handle;
// peer-to-peer table
// peer info is added and removed from this table when usermode
// driver calls UvmEnablePeerAccess and UvmDisablePeerAccess
// respectively.
uvm_gpu_peer_t peers[UVM_MAX_UNIQUE_GPU_PAIRS];
// peer-to-peer copy mode
// Pascal+ GPUs support virtual addresses in p2p copies.
// Ampere+ GPUs add support for physical addresses in p2p copies.
uvm_gpu_peer_copy_mode_t peer_copy_mode;
// Stores an NV_STATUS, once it becomes != NV_OK, the driver should refuse to
// do most anything other than try and clean up as much as possible.
// An example of a fatal error is an unrecoverable ECC error on one of the
// GPUs.
atomic_t fatal_error;
// A flag to disable the assert on fatal error
// To be used by tests and only consulted if tests are enabled.
bool disable_fatal_error_assert;
// Lock protecting the global state
uvm_mutex_t global_lock;
struct
{
// Lock synchronizing user threads with power management activity
uvm_rw_semaphore_t lock;
// Power management state flag; tested by UVM_GPU_WRITE_ONCE()
// and UVM_GPU_READ_ONCE() to detect accesses to GPUs when
// UVM is suspended.
bool is_suspended;
} pm;
// This lock synchronizes addition and removal of GPUs from UVM's global
// table. It must be held whenever g_uvm_global.parent_gpus[] is written. In
// order to read from this table, you must hold either the gpu_table_lock,
// or the global_lock.
//
// This is a leaf lock.
uvm_spinlock_irqsave_t gpu_table_lock;
// Number of simulated/emulated devices that have registered with UVM
unsigned num_simulated_devices;
// A single queue for deferred work that is non-GPU-specific.
nv_kthread_q_t global_q;
// A single queue for deferred f_ops->release() handling. Items scheduled to
// run on it may block for the duration of system sleep cycles, stalling
// the queue and preventing any other items from running.
nv_kthread_q_t deferred_release_q;
struct
{
// Indicates whether the system HW supports ATS. This field is set once
// during global initialization (uvm_global_init), and can be read
// afterwards without acquiring any locks.
bool supported;
// On top of HW platform support, ATS support can be overridden using
// the module parameter uvm_ats_mode. This field is set once during
// global initialization (uvm_global_init), and can be read afterwards
// without acquiring any locks.
bool enabled;
} ats;
#if UVM_IBM_NPU_SUPPORTED()
// On IBM systems this array tracks the active NPUs (the NPUs which are
// attached to retained GPUs).
uvm_ibm_npu_t npus[NV_MAX_NPUS];
#endif
// List of all active VA spaces
struct
{
uvm_mutex_t lock;
struct list_head list;
} va_spaces;
// Notify a registered process about the driver state after it's unloaded.
// The intent is to systematically report any error during the driver
// teardown. unload_state is used for testing only.
struct
{
// ptr points to a 8-byte buffer within page.
NvU64 *ptr;
struct page *page;
} unload_state;
};
// Initialize global uvm state
NV_STATUS uvm_global_init(void);
// Deinitialize global state (called from module exit)
void uvm_global_exit(void);
// Prepare for entry into a system sleep state
NV_STATUS uvm_suspend_entry(void);
// Recover after exit from a system sleep state
NV_STATUS uvm_resume_entry(void);
// Add parent GPU to the global table.
//
// LOCKING: requires that you hold the global lock and gpu_table_lock
static void uvm_global_add_parent_gpu(uvm_parent_gpu_t *parent_gpu)
{
NvU32 gpu_index = uvm_id_gpu_index(parent_gpu->id);
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
UVM_ASSERT(!g_uvm_global.parent_gpus[gpu_index]);
g_uvm_global.parent_gpus[gpu_index] = parent_gpu;
}
// Remove parent GPU from the global table.
//
// LOCKING: requires that you hold the global lock and gpu_table_lock
static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
{
NvU32 gpu_index = uvm_id_gpu_index(parent_gpu->id);
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index]);
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index] == parent_gpu);
g_uvm_global.parent_gpus[gpu_index] = NULL;
}
// Get a gpu by its global id.
// Returns a pointer to the GPU object, or NULL if not found.
//
// LOCKING: requires that you hold the gpu_table_lock, the global_lock, or have
// retained the gpu.
static uvm_gpu_t *uvm_gpu_get(uvm_global_gpu_id_t global_gpu_id)
{
uvm_parent_gpu_t *parent_gpu;
parent_gpu = g_uvm_global.parent_gpus[uvm_id_gpu_index_from_global_gpu_id(global_gpu_id)];
if (!parent_gpu)
return NULL;
return parent_gpu->gpus[uvm_global_id_sub_processor_index(global_gpu_id)];
}
// Get a gpu by its processor id.
// Returns a pointer to the GPU object, or NULL if not found.
//
// LOCKING: requires that you hold the gpu_table_lock, the global_lock, or have
// retained the gpu.
static uvm_gpu_t *uvm_gpu_get_by_processor_id(uvm_processor_id_t id)
{
uvm_global_gpu_id_t global_id = uvm_global_gpu_id_from_gpu_id(id);
uvm_gpu_t *gpu = uvm_gpu_get(global_id);
if (gpu)
UVM_ASSERT(!gpu->parent->smc.enabled);
return gpu;
}
static uvmGpuSessionHandle uvm_gpu_session_handle(uvm_gpu_t *gpu)
{
if (gpu->parent->smc.enabled)
return gpu->smc.rm_session_handle;
return g_uvm_global.rm_session_handle;
}
// Use these READ_ONCE()/WRITE_ONCE() wrappers when accessing GPU resources
// in BAR0/BAR1 to detect cases in which GPUs are accessed when UVM is
// suspended.
#define UVM_GPU_WRITE_ONCE(x, val) do { \
UVM_ASSERT(!uvm_global_is_suspended()); \
UVM_WRITE_ONCE(x, val); \
} while (0)
#define UVM_GPU_READ_ONCE(x) ({ \
UVM_ASSERT(!uvm_global_is_suspended()); \
UVM_READ_ONCE(x); \
})
static bool global_is_fatal_error_assert_disabled(void)
{
// Only allow the assert to be disabled if tests are enabled
if (!uvm_enable_builtin_tests)
return false;
return g_uvm_global.disable_fatal_error_assert;
}
// Set a global fatal error
// Once that happens the the driver should refuse to do anything other than try
// and clean up as much as possible.
// An example of a fatal error is an unrecoverable ECC error on one of the
// GPUs.
// Use a macro so that the assert below provides precise file and line info and
// a backtrace.
#define uvm_global_set_fatal_error(error) \
do { \
if (!global_is_fatal_error_assert_disabled()) \
UVM_ASSERT_MSG(0, "Fatal error: %s\n", nvstatusToString(error)); \
uvm_global_set_fatal_error_impl(error); \
} while (0)
void uvm_global_set_fatal_error_impl(NV_STATUS error);
// Get the global status
static NV_STATUS uvm_global_get_status(void)
{
return atomic_read(&g_uvm_global.fatal_error);
}
// Reset global fatal error
// This is to be used by tests triggering the global error on purpose only.
// Returns the value of the global error field that existed just before this
// reset call was made.
NV_STATUS uvm_global_reset_fatal_error(void);
static uvm_gpu_t *uvm_global_processor_mask_find_first_gpu(const uvm_global_processor_mask_t *global_gpus)
{
uvm_gpu_t *gpu;
uvm_global_gpu_id_t gpu_id = uvm_global_processor_mask_find_first_gpu_id(global_gpus);
if (UVM_GLOBAL_ID_IS_INVALID(gpu_id))
return NULL;
gpu = uvm_gpu_get(gpu_id);
// If there is valid GPU id in the mask, assert that the corresponding
// uvm_gpu_t is present. Otherwise it would stop a
// for_each_global_gpu_in_mask() loop pre-maturely. Today, this could only
// happen in remove_gpu() because the GPU being removed is deleted from the
// global table very early.
UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_global_id_value(gpu_id));
return gpu;
}
static uvm_gpu_t *__uvm_global_processor_mask_find_next_gpu(const uvm_global_processor_mask_t *global_gpus, uvm_gpu_t *gpu)
{
uvm_global_gpu_id_t gpu_id;
UVM_ASSERT(gpu);
gpu_id = uvm_global_processor_mask_find_next_id(global_gpus, uvm_global_gpu_id_next(gpu->global_id));
if (UVM_GLOBAL_ID_IS_INVALID(gpu_id))
return NULL;
gpu = uvm_gpu_get(gpu_id);
// See comment in uvm_global_processor_mask_find_first_gpu().
UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_global_id_value(gpu_id));
return gpu;
}
// Helper to iterate over all GPUs in the input mask
#define for_each_global_gpu_in_mask(gpu, global_mask) \
for (gpu = uvm_global_processor_mask_find_first_gpu(global_mask); \
gpu != NULL; \
gpu = __uvm_global_processor_mask_find_next_gpu(global_mask, gpu))
// Helper to iterate over all GPUs retained by the UVM driver (across all va spaces)
#define for_each_global_gpu(gpu) \
for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \
gpu = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);}); \
gpu != NULL; \
gpu = __uvm_global_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))
// LOCKING: Must hold either the global_lock or the gpu_table_lock
static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *parent_gpu)
{
NvU32 i;
if (parent_gpu) {
NvU32 gpu_index = uvm_id_gpu_index(parent_gpu->id);
i = gpu_index + 1;
}
else {
i = 0;
}
parent_gpu = NULL;
while (i < UVM_MAX_GPUS) {
if (g_uvm_global.parent_gpus[i]) {
parent_gpu = g_uvm_global.parent_gpus[i];
break;
}
i++;
}
return parent_gpu;
}
// LOCKING: Must hold the global_lock
static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent_gpu, uvm_gpu_t *cur_gpu)
{
uvm_gpu_t *gpu = NULL;
uvm_global_gpu_id_t global_gpu_id;
NvU32 sub_processor_index;
NvU32 cur_sub_processor_index;
UVM_ASSERT(parent_gpu);
global_gpu_id = uvm_global_gpu_id_from_gpu_id(parent_gpu->id);
cur_sub_processor_index = cur_gpu ? uvm_global_id_sub_processor_index(cur_gpu->global_id) : -1;
sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_ID_MAX_SUB_PROCESSORS, cur_sub_processor_index + 1);
if (sub_processor_index < UVM_ID_MAX_SUB_PROCESSORS) {
gpu = uvm_gpu_get(uvm_global_id_from_value(uvm_global_id_value(global_gpu_id) + sub_processor_index));
UVM_ASSERT(gpu != NULL);
}
return gpu;
}
// LOCKING: Must hold either the global_lock or the gpu_table_lock
#define for_each_parent_gpu(parent_gpu) \
for ((parent_gpu) = uvm_global_find_next_parent_gpu(NULL); \
(parent_gpu) != NULL; \
(parent_gpu) = uvm_global_find_next_parent_gpu((parent_gpu)))
// LOCKING: Must hold the global_lock
#define for_each_gpu_in_parent(parent_gpu, gpu) \
for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \
(gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), NULL);}); \
(gpu) != NULL; \
(gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), (gpu)))
// Helper which calls uvm_gpu_retain on each GPU in mask
void uvm_global_mask_retain(const uvm_global_processor_mask_t *mask);
// Helper which calls uvm_gpu_release_locked on each GPU in mask.
//
// LOCKING: this function takes and releases the global lock if the input mask
// is not empty
void uvm_global_mask_release(const uvm_global_processor_mask_t *mask);
// Check for ECC errors for all GPUs in a mask
// Notably this check cannot be performed where it's not safe to call into RM.
NV_STATUS uvm_global_mask_check_ecc_error(uvm_global_processor_mask_t *gpus);
// Pre-allocate fault service contexts.
NV_STATUS uvm_service_block_context_init(void);
// Release fault service contexts if any exist.
void uvm_service_block_context_exit(void);
#endif // __UVM_GLOBAL_H__

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,88 @@
/*******************************************************************************
Copyright (c) 2017 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_GPU_ACCESS_COUNTERS_H__
#define __UVM_GPU_ACCESS_COUNTERS_H__
#include "uvm_common.h"
#include "uvm_forward_decl.h"
#include "uvm_test_ioctl.h"
NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
void uvm_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
bool uvm_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
void uvm_gpu_service_access_counters(uvm_gpu_t *gpu);
void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu);
// Ignore or unignore access counters notifications. Ignoring means that the
// bottom half is a no-op which just leaves notifications in the HW buffer
// without being serviced and without inspecting any SW state.
//
// To avoid interrupt storms, access counter interrupts will be disabled while
// ignored. Access counter bottom halves may still be scheduled in the top half
// when other interrupts arrive and the top half sees that there are also
// pending access counter notifications.
//
// When uningoring, the interrupt conditions will be re-evaluated to trigger
// processing of buffered notifications, if any exist.
void uvm_gpu_access_counters_set_ignore(uvm_gpu_t *gpu, bool do_ignore);
// Return whether the VA space has access counter migrations enabled. The
// caller must ensure that the VA space cannot go away.
bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space);
// Global perf initialization/cleanup functions
NV_STATUS uvm_perf_access_counters_init(void);
void uvm_perf_access_counters_exit(void);
// VA space Initialization/cleanup functions. See comments in
// uvm_perf_heuristics.h
NV_STATUS uvm_perf_access_counters_load(uvm_va_space_t *va_space);
void uvm_perf_access_counters_unload(uvm_va_space_t *va_space);
// Check whether access counters should be enabled when the given GPU is
// registered on any VA space.
bool uvm_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu);
// Functions used to enable/disable access counters on a GPU in the given VA
// space.
//
// A per-GPU reference counter tracks the number of VA spaces in which access
// counters are currently enabled. The hardware notifications and interrupts on
// the GPU are enabled the first time any VA space invokes
// uvm_gpu_access_counters_enable, and disabled when the last VA space invokes
// uvm_gpu_access_counters_disable
//
// Locking: the VA space lock must not be held by the caller since these
// functions may take the access counters ISR lock.
NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
#endif // __UVM_GPU_ACCESS_COUNTERS_H__

View File

@@ -0,0 +1,774 @@
/*******************************************************************************
Copyright (c) 2016-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_api.h"
#include "uvm_global.h"
#include "uvm_gpu_isr.h"
#include "uvm_hal.h"
#include "uvm_gpu.h"
#include "uvm_gpu_access_counters.h"
#include "uvm_gpu_non_replayable_faults.h"
#include "uvm_thread_context.h"
// Level-based vs pulse-based interrupts
// =====================================
// Turing switches to pulse-based interrupts for replayable/non-replayable
// faults and access counter notifications. Prior GPUs use level-based
// interrupts.
//
// Level-based interrupts are rearmed automatically as long as the interrupt
// condition is set. Pulse-based interrupts, on the other hand, are
// re-triggered by clearing their interrupt line and forcing the interrupt
// condition to be re-evaluated. However, RM re-triggers all top-level
// interrupts when exiting its top half. Thus, both level-based and pulse-based
// interrupts need to be disabled at interrupt handling boundaries, in order to
// avoid interrupt storms.
//
// Moreover, in order to make sure that pulse-based interrupts are not missed,
// we need to clear the interrupt bit and force a interrupt condition
// re-evaluation after interrupts are re-enabled. In the case of replayable
// faults and access counter notifications the interrupt condition is
// re-evaluated by writing to GET. Non-replayable faults work the same way, but
// they are currently owned by RM, so UVM doesn't have to do anything.
// For use by the nv_kthread_q that is servicing the replayable fault bottom
// half, only.
static void replayable_faults_isr_bottom_half_entry(void *args);
// For use by the nv_kthread_q that is servicing the replayable fault bottom
// half, only.
static void non_replayable_faults_isr_bottom_half_entry(void *args);
// For use by the nv_kthread_q that is servicing the replayable fault bottom
// half, only.
static void access_counters_isr_bottom_half_entry(void *args);
// Increments the reference count tracking whether replayable page fault
// interrupts should be disabled. The caller is guaranteed that replayable page
// faults are disabled upon return. Interrupts might already be disabled prior
// to making this call. Each call is ref-counted, so this must be paired with a
// call to uvm_gpu_replayable_faults_intr_enable().
//
// parent_gpu->isr.interrupts_lock must be held to call this function.
static void uvm_gpu_replayable_faults_intr_disable(uvm_parent_gpu_t *parent_gpu);
// Decrements the reference count tracking whether replayable page fault
// interrupts should be disabled. Only once the count reaches 0 are the HW
// interrupts actually enabled, so this call does not guarantee that the
// interrupts have been re-enabled upon return.
//
// uvm_gpu_replayable_faults_intr_disable() must have been called prior to
// calling this function.
//
// parent_gpu->isr.interrupts_lock must be held to call this function.
static void uvm_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *parent_gpu);
static unsigned schedule_replayable_faults_handler(uvm_parent_gpu_t *parent_gpu)
{
// handling gets set to false for all handlers during removal, so quit if
// the GPU is in the process of being removed.
if (parent_gpu->isr.replayable_faults.handling) {
// Use raw call instead of UVM helper. Ownership will be recorded in the
// bottom half. See comment replayable_faults_isr_bottom_half().
if (down_trylock(&parent_gpu->isr.replayable_faults.service_lock.sem) == 0) {
if (uvm_gpu_replayable_faults_pending(parent_gpu)) {
nv_kref_get(&parent_gpu->gpu_kref);
// Interrupts need to be disabled here to avoid an interrupt
// storm
uvm_gpu_replayable_faults_intr_disable(parent_gpu);
// Schedule a bottom half, but do *not* release the GPU ISR
// lock. The bottom half releases the GPU ISR lock as part of
// its cleanup.
nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
&parent_gpu->isr.replayable_faults.bottom_half_q_item);
return 1;
}
else {
up(&parent_gpu->isr.replayable_faults.service_lock.sem);
}
}
}
return 0;
}
static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_gpu)
{
// handling gets set to false for all handlers during removal, so quit if
// the GPU is in the process of being removed.
if (parent_gpu->isr.non_replayable_faults.handling) {
// Non-replayable_faults are stored in a synchronized circular queue
// shared by RM/UVM. Therefore, we can query the number of pending
// faults. This type of faults are not replayed and since RM advances
// GET to PUT when copying the fault packets to the queue, no further
// interrupts will be triggered by the gpu and faults may stay
// unserviced. Therefore, if there is a fault in the queue, we schedule
// a bottom half unconditionally.
if (uvm_gpu_non_replayable_faults_pending(parent_gpu)) {
bool scheduled;
nv_kref_get(&parent_gpu->gpu_kref);
scheduled = nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
&parent_gpu->isr.non_replayable_faults.bottom_half_q_item) != 0;
// If the q_item did not get scheduled because it was already
// queued, that instance will handle the pending faults. Just
// drop the GPU kref.
if (!scheduled)
uvm_parent_gpu_kref_put(parent_gpu);
return 1;
}
}
return 0;
}
static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
{
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
if (!parent_gpu->isr.access_counters.handling_ref_count)
return 0;
if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem))
return 0;
if (!uvm_gpu_access_counters_pending(parent_gpu)) {
up(&parent_gpu->isr.access_counters.service_lock.sem);
return 0;
}
nv_kref_get(&parent_gpu->gpu_kref);
// Interrupts need to be disabled to avoid an interrupt storm
uvm_gpu_access_counters_intr_disable(parent_gpu);
nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
&parent_gpu->isr.access_counters.bottom_half_q_item);
return 1;
}
// This is called from RM's top-half ISR (see: the nvidia_isr() function), and UVM is given a
// chance to handle the interrupt, before most of the RM processing. UVM communicates what it
// did, back to RM, via the return code:
//
// NV_OK:
// UVM handled an interrupt.
//
// NV_WARN_MORE_PROCESSING_REQUIRED:
// UVM did not schedule a bottom half, because it was unable to get the locks it
// needed, but there is still UVM work to be done. RM will return "not handled" to the
// Linux kernel, *unless* RM handled other faults in its top half. In that case, the
// fact that UVM did not handle its interrupt is lost. However, life and interrupt
// processing continues anyway: the GPU will soon raise another interrupt, because
// that's what it does when there are replayable page faults remaining (GET != PUT in
// the fault buffer).
//
// NV_ERR_NO_INTR_PENDING:
// UVM did not find any work to do. Currently this is handled in RM in exactly the same
// way as NV_WARN_MORE_PROCESSING_REQUIRED is handled. However, the extra precision is
// available for the future. RM's interrupt handling tends to evolve as new chips and
// new interrupts get created.
static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
{
uvm_parent_gpu_t *parent_gpu;
unsigned num_handlers_scheduled = 0;
NV_STATUS status;
if (!in_interrupt() && in_atomic()) {
// Early-out if we're not in interrupt context, but memory allocations
// require GFP_ATOMIC. This happens with CONFIG_DEBUG_SHIRQ enabled,
// where the interrupt handler is called as part of its removal to make
// sure it's prepared for being called even when it's being freed.
// This breaks the assumption that the UVM driver is called in atomic
// context only in the interrupt context, which the thread context
// management relies on.
return NV_OK;
}
if (!gpu_uuid) {
// This can happen early in the main GPU driver initialization, because
// that involves testing interrupts before the GPU is fully set up.
return NV_ERR_NO_INTR_PENDING;
}
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
parent_gpu = uvm_parent_gpu_get_by_uuid_locked(gpu_uuid);
if (parent_gpu == NULL) {
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
return NV_ERR_NO_INTR_PENDING;
}
// We take a reference during the top half, and an additional reference for
// each scheduled bottom. References are dropped at the end of the bottom
// halves.
nv_kref_get(&parent_gpu->gpu_kref);
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
// Now that we got a GPU object, lock it so that it can't be removed without us noticing.
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
++parent_gpu->isr.interrupt_count;
if (parent_gpu->isr.is_suspended) {
status = NV_ERR_NO_INTR_PENDING;
}
else {
num_handlers_scheduled += schedule_replayable_faults_handler(parent_gpu);
num_handlers_scheduled += schedule_non_replayable_faults_handler(parent_gpu);
num_handlers_scheduled += schedule_access_counters_handler(parent_gpu);
if (num_handlers_scheduled == 0)
status = NV_WARN_MORE_PROCESSING_REQUIRED;
else
status = NV_OK;
}
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
uvm_parent_gpu_kref_put(parent_gpu);
return status;
}
NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid)
{
UVM_ENTRY_RET(uvm_isr_top_half(gpu_uuid));
}
static NV_STATUS init_queue_on_node(nv_kthread_q_t *queue, const char *name, int node)
{
#if UVM_THREAD_AFFINITY_SUPPORTED()
if (node != -1 && !cpumask_empty(uvm_cpumask_of_node(node))) {
NV_STATUS status;
status = errno_to_nv_status(nv_kthread_q_init_on_node(queue, name, node));
if (status != NV_OK)
return status;
return errno_to_nv_status(set_cpus_allowed_ptr(queue->q_kthread, uvm_cpumask_of_node(node)));
}
#endif
return errno_to_nv_status(nv_kthread_q_init(queue, name));
}
NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status = NV_OK;
char kthread_name[TASK_COMM_LEN + 1];
if (parent_gpu->replayable_faults_supported) {
status = uvm_gpu_fault_buffer_init(parent_gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to initialize GPU fault buffer: %s, GPU: %s\n",
nvstatusToString(status),
parent_gpu->name);
return status;
}
nv_kthread_q_item_init(&parent_gpu->isr.replayable_faults.bottom_half_q_item,
replayable_faults_isr_bottom_half_entry,
parent_gpu);
parent_gpu->isr.replayable_faults.stats.cpu_exec_count =
uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.replayable_faults.stats.cpu_exec_count) * num_possible_cpus());
if (!parent_gpu->isr.replayable_faults.stats.cpu_exec_count)
return NV_ERR_NO_MEMORY;
parent_gpu->isr.replayable_faults.handling = true;
snprintf(kthread_name, sizeof(kthread_name), "UVM GPU%u BH", uvm_id_value(parent_gpu->id));
status = init_queue_on_node(&parent_gpu->isr.bottom_half_q, kthread_name, parent_gpu->closest_cpu_numa_node);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed in nv_kthread_q_init for bottom_half_q: %s, GPU %s\n",
nvstatusToString(status),
parent_gpu->name);
return status;
}
if (parent_gpu->non_replayable_faults_supported) {
nv_kthread_q_item_init(&parent_gpu->isr.non_replayable_faults.bottom_half_q_item,
non_replayable_faults_isr_bottom_half_entry,
parent_gpu);
parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count =
uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count) *
num_possible_cpus());
if (!parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count)
return NV_ERR_NO_MEMORY;
parent_gpu->isr.non_replayable_faults.handling = true;
snprintf(kthread_name, sizeof(kthread_name), "UVM GPU%u KC", uvm_id_value(parent_gpu->id));
status = init_queue_on_node(&parent_gpu->isr.kill_channel_q,
kthread_name,
parent_gpu->closest_cpu_numa_node);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed in nv_kthread_q_init for kill_channel_q: %s, GPU %s\n",
nvstatusToString(status),
parent_gpu->name);
return status;
}
}
if (parent_gpu->access_counters_supported) {
status = uvm_gpu_init_access_counters(parent_gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
nvstatusToString(status),
parent_gpu->name);
return status;
}
nv_kthread_q_item_init(&parent_gpu->isr.access_counters.bottom_half_q_item,
access_counters_isr_bottom_half_entry,
parent_gpu);
// Access counters interrupts are initially disabled. They are
// dynamically enabled when the GPU is registered on a VA space.
parent_gpu->isr.access_counters.handling_ref_count = 0;
parent_gpu->isr.access_counters.stats.cpu_exec_count =
uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters.stats.cpu_exec_count) * num_possible_cpus());
if (!parent_gpu->isr.access_counters.stats.cpu_exec_count)
return NV_ERR_NO_MEMORY;
}
}
return NV_OK;
}
void uvm_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu)
{
nv_kthread_q_flush(&parent_gpu->isr.bottom_half_q);
nv_kthread_q_flush(&parent_gpu->isr.kill_channel_q);
}
void uvm_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
// Now that the GPU is safely out of the global table, lock the GPU and mark
// it as no longer handling interrupts so the top half knows not to schedule
// any more bottom halves.
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
uvm_gpu_replayable_faults_intr_disable(parent_gpu);
parent_gpu->isr.replayable_faults.was_handling = parent_gpu->isr.replayable_faults.handling;
parent_gpu->isr.non_replayable_faults.was_handling = parent_gpu->isr.non_replayable_faults.handling;
parent_gpu->isr.replayable_faults.handling = false;
parent_gpu->isr.non_replayable_faults.handling = false;
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
// Flush all bottom half ISR work items and stop the nv_kthread_q that is
// servicing this GPU's bottom halves. Note that this requires that the
// bottom half never take the global lock, since we're holding it here.
//
// Note that it's safe to call nv_kthread_q_stop() even if
// nv_kthread_q_init() failed in uvm_gpu_init_isr().
nv_kthread_q_stop(&parent_gpu->isr.bottom_half_q);
nv_kthread_q_stop(&parent_gpu->isr.kill_channel_q);
}
void uvm_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
{
// Return ownership to RM:
if (parent_gpu->isr.replayable_faults.was_handling) {
// No user threads could have anything left on
// replayable_faults.disable_intr_ref_count since they must retain the
// GPU across uvm_gpu_replayable_faults_isr_lock/
// uvm_gpu_replayable_faults_isr_unlock. This means the
// uvm_gpu_replayable_faults_disable_intr above could only have raced
// with bottom halves.
//
// If we cleared replayable_faults.handling before the bottom half got
// to its uvm_gpu_replayable_faults_isr_unlock, when it eventually
// reached uvm_gpu_replayable_faults_isr_unlock it would have skipped
// the disable, leaving us with extra ref counts here.
//
// In any case we're guaranteed that replayable faults interrupts are
// disabled and can't get re-enabled, so we can safely ignore the ref
// count value and just clean things up.
UVM_ASSERT_MSG(parent_gpu->isr.replayable_faults.disable_intr_ref_count > 0,
"%s replayable_faults.disable_intr_ref_count: %llu\n",
parent_gpu->name,
parent_gpu->isr.replayable_faults.disable_intr_ref_count);
uvm_gpu_fault_buffer_deinit(parent_gpu);
}
if (parent_gpu->access_counters_supported) {
// It is safe to deinitialize access counters even if they have not been
// successfully initialized.
uvm_gpu_deinit_access_counters(parent_gpu);
}
uvm_kvfree(parent_gpu->isr.replayable_faults.stats.cpu_exec_count);
uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
uvm_kvfree(parent_gpu->isr.access_counters.stats.cpu_exec_count);
}
static uvm_gpu_t *find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
{
uvm_global_gpu_id_t global_gpu_id = uvm_global_gpu_id_from_gpu_id(parent_gpu->id);
uvm_gpu_t *gpu;
// When SMC is enabled, there's no longer a 1:1 relationship between the
// parent and the partitions. But because all relevant interrupt paths
// are shared, as is the fault reporting logic, it's sufficient here
// to proceed with any valid uvm_gpu_t, even if the corresponding partition
// didn't cause all, or even any of the interrupts.
// The bottom half handlers will later find the appropriate partitions by
// attributing the notifications to VA spaces as necessary.
if (parent_gpu->smc.enabled) {
NvU32 sub_processor_index;
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
sub_processor_index = find_first_bit(parent_gpu->valid_gpus, UVM_ID_MAX_SUB_PROCESSORS);
if (sub_processor_index < UVM_ID_MAX_SUB_PROCESSORS) {
gpu = uvm_gpu_get(uvm_global_id_from_value(uvm_global_id_value(global_gpu_id) + sub_processor_index));
UVM_ASSERT(gpu != NULL);
}
else {
gpu = NULL;
}
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
}
else {
gpu = uvm_gpu_get(global_gpu_id);
UVM_ASSERT(gpu != NULL);
}
return gpu;
}
static void replayable_faults_isr_bottom_half(void *args)
{
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
uvm_gpu_t *gpu;
unsigned int cpu;
gpu = find_first_valid_gpu(parent_gpu);
if (gpu == NULL)
goto put_kref;
UVM_ASSERT(parent_gpu->replayable_faults_supported);
// Record the lock ownership
// The service_lock semaphore is taken in the top half using a raw
// semaphore call (down_trylock()). Here, the lock "ownership" is recorded,
// using a direct call to uvm_record_lock(). The pair of the two raw calls
// result in an ownership "transfer" between the top and bottom halves.
// Due to this ownership transfer, other usages of the service_lock can
// use the UVM (un)lock helpers to handle lock ownership and record keeping.
uvm_record_lock(&parent_gpu->isr.replayable_faults.service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
// Multiple bottom halves for replayable faults can be running
// concurrently, but only one can be running this function for a given GPU
// since we enter with the replayable_faults.service_lock held.
cpu = get_cpu();
++parent_gpu->isr.replayable_faults.stats.bottom_half_count;
cpumask_set_cpu(cpu, &parent_gpu->isr.replayable_faults.stats.cpus_used_mask);
++parent_gpu->isr.replayable_faults.stats.cpu_exec_count[cpu];
put_cpu();
uvm_gpu_service_replayable_faults(gpu);
uvm_gpu_replayable_faults_isr_unlock(parent_gpu);
put_kref:
uvm_parent_gpu_kref_put(parent_gpu);
}
static void replayable_faults_isr_bottom_half_entry(void *args)
{
UVM_ENTRY_VOID(replayable_faults_isr_bottom_half(args));
}
static void non_replayable_faults_isr_bottom_half(void *args)
{
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
uvm_gpu_t *gpu;
unsigned int cpu;
gpu = find_first_valid_gpu(parent_gpu);
if (gpu == NULL)
goto put_kref;
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
uvm_gpu_non_replayable_faults_isr_lock(parent_gpu);
// Multiple bottom halves for non-replayable faults can be running
// concurrently, but only one can enter this section for a given GPU
// since we acquired the non_replayable_faults.service_lock
cpu = get_cpu();
++parent_gpu->isr.non_replayable_faults.stats.bottom_half_count;
cpumask_set_cpu(cpu, &parent_gpu->isr.non_replayable_faults.stats.cpus_used_mask);
++parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count[cpu];
put_cpu();
uvm_gpu_service_non_replayable_fault_buffer(gpu);
uvm_gpu_non_replayable_faults_isr_unlock(parent_gpu);
put_kref:
uvm_parent_gpu_kref_put(parent_gpu);
}
static void non_replayable_faults_isr_bottom_half_entry(void *args)
{
UVM_ENTRY_VOID(non_replayable_faults_isr_bottom_half(args));
}
static void access_counters_isr_bottom_half(void *args)
{
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
uvm_gpu_t *gpu;
unsigned int cpu;
gpu = find_first_valid_gpu(parent_gpu);
if (gpu == NULL)
goto put_kref;
UVM_ASSERT(parent_gpu->access_counters_supported);
uvm_record_lock(&parent_gpu->isr.access_counters.service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
// Multiple bottom halves for counter notifications can be running
// concurrently, but only one can be running this function for a given GPU
// since we enter with the access_counters_isr_lock held.
cpu = get_cpu();
++parent_gpu->isr.access_counters.stats.bottom_half_count;
cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters.stats.cpus_used_mask);
++parent_gpu->isr.access_counters.stats.cpu_exec_count[cpu];
put_cpu();
uvm_gpu_service_access_counters(gpu);
uvm_gpu_access_counters_isr_unlock(parent_gpu);
put_kref:
uvm_parent_gpu_kref_put(parent_gpu);
}
static void access_counters_isr_bottom_half_entry(void *args)
{
UVM_ENTRY_VOID(access_counters_isr_bottom_half(args));
}
void uvm_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
// Bump the disable ref count. This guarantees that the bottom half or
// another thread trying to take the replayable_faults.service_lock won't
// inadvertently re-enable interrupts during this locking sequence.
uvm_gpu_replayable_faults_intr_disable(parent_gpu);
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
// Now that we know replayable fault interrupts can't get enabled, take the
// lock.
uvm_down(&parent_gpu->isr.replayable_faults.service_lock);
}
void uvm_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
// The following sequence is delicate:
//
// 1) Enable replayable page fault interrupts
// 2) Rearm pulse based interrupts
// 3) Unlock GPU isr.replayable_faults.service_lock (mutex)
// 4) Unlock isr.interrupts_lock (spin lock)
//
// ...because the moment that page fault interrupts are reenabled, a top
// half might start receiving them. A top-half cannot run on the core
// executing this code as interrupts are disabled as long as the
// interrupts_lock is held. If it runs on a different core, it's going to
// spin waiting for the interrupts_lock to be released by this core before
// attempting to acquire the service_lock mutex. Hence there is no risk of
// the top-half missing interrupts after they are reenabled, but before the
// service_lock mutex is released.
if (parent_gpu->isr.replayable_faults.handling) {
// Turn page fault interrupts back on, unless remove_gpu() has already removed this GPU
// from the GPU table. remove_gpu() indicates that situation by setting
// gpu->replayable_faults.handling to false.
//
// This path can only be taken from the bottom half. User threads
// calling this function must have previously retained the GPU, so they
// can't race with remove_gpu.
//
// TODO: Bug 1766600: Assert that we're in a bottom half thread, once
// that's tracked by the lock assertion code.
//
// Note that if we're in the bottom half and the GPU was removed before
// we checked replayable_faults.handling, we won't drop our interrupt
// disable ref count from the corresponding top-half call to
// uvm_gpu_replayable_faults_intr_disable. That's ok because remove_gpu
// ignores the refcount after waiting for the bottom half to finish.
uvm_gpu_replayable_faults_intr_enable(parent_gpu);
// Rearm pulse interrupts. This guarantees that the state of the pending
// interrupt is current and the top level rearm performed by RM is only
// going to trigger it if necessary. This avoids both of the possible
// bad cases:
// 1) GET != PUT but interrupt state is not pending
// This could lead to the interrupt being lost.
// 2) GET == PUT but interrupt state is pending
// This could lead to an interrupt storm as the top-half would see
// no work to be done, but the interrupt would get constantly
// retriggered by RM's top level rearm.
// clear_replayable_faults is a no-op for architectures that don't
// support pulse-based interrupts.
parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
parent_gpu->fault_buffer_info.replayable.cached_get);
}
// This unlock call has to be out-of-order unlock due to interrupts_lock
// still being held. Otherwise, it would result in a lock order violation.
uvm_up_out_of_order(&parent_gpu->isr.replayable_faults.service_lock);
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
}
void uvm_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
uvm_down(&parent_gpu->isr.non_replayable_faults.service_lock);
}
void uvm_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
uvm_up(&parent_gpu->isr.non_replayable_faults.service_lock);
}
void uvm_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu)
{
// See comments in uvm_gpu_replayable_faults_isr_lock
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
uvm_gpu_access_counters_intr_disable(parent_gpu);
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
uvm_down(&parent_gpu->isr.access_counters.service_lock);
}
void uvm_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
// See comments in uvm_gpu_replayable_faults_isr_unlock
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
uvm_gpu_access_counters_intr_enable(parent_gpu);
if (parent_gpu->isr.access_counters.handling_ref_count > 0) {
parent_gpu->access_counter_buffer_hal->clear_access_counter_notifications(parent_gpu,
parent_gpu->access_counter_buffer_info.cached_get);
}
// This unlock call has to be out-of-order unlock due to interrupts_lock
// still being held. Otherwise, it would result in a lock order violation.
uvm_up_out_of_order(&parent_gpu->isr.access_counters.service_lock);
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
}
static void uvm_gpu_replayable_faults_intr_disable(uvm_parent_gpu_t *parent_gpu)
{
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
if (parent_gpu->isr.replayable_faults.handling && parent_gpu->isr.replayable_faults.disable_intr_ref_count == 0)
parent_gpu->fault_buffer_hal->disable_replayable_faults(parent_gpu);
++parent_gpu->isr.replayable_faults.disable_intr_ref_count;
}
static void uvm_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *parent_gpu)
{
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
UVM_ASSERT(parent_gpu->isr.replayable_faults.disable_intr_ref_count > 0);
--parent_gpu->isr.replayable_faults.disable_intr_ref_count;
if (parent_gpu->isr.replayable_faults.handling && parent_gpu->isr.replayable_faults.disable_intr_ref_count == 0)
parent_gpu->fault_buffer_hal->enable_replayable_faults(parent_gpu);
}
void uvm_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
{
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
// The read of handling_ref_count could race with a write from
// gpu_access_counters_enable/disable, since here we may not hold the
// ISR lock. But those functions are invoked with the interrupt disabled
// (disable_intr_ref_count > 0), so the check always returns false when the
// race occurs
if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(parent_gpu);
}
++parent_gpu->isr.access_counters.disable_intr_ref_count;
}
void uvm_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu)
{
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
UVM_ASSERT(parent_gpu->isr.access_counters.disable_intr_ref_count > 0);
--parent_gpu->isr.access_counters.disable_intr_ref_count;
if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(parent_gpu);
}
}

View File

@@ -0,0 +1,196 @@
/*******************************************************************************
Copyright (c) 2016-2019 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_GPU_ISR_H__
#define __UVM_GPU_ISR_H__
#include "nv-kthread-q.h"
#include "uvm_common.h"
#include "uvm_lock.h"
#include "uvm_forward_decl.h"
// ISR handling state for a specific interrupt type
typedef struct
{
// Protects against changes to the GPU data structures used by the handling
// routines of this interrupt type.
uvm_semaphore_t service_lock;
// Bottom-half to be executed for this interrupt. There is one bottom-half
// per interrupt type.
nv_kthread_q_item_t bottom_half_q_item;
union
{
// Used for replayable and non-replayable faults.
struct
{
// This is set to true during add_gpu(), if the GPU supports the
// interrupt. It is set back to false during remove_gpu().
// interrupts_lock must be held in order to write this variable.
bool handling;
// Variable set in uvm_gpu_disable_isr() during remove_gpu() to
// indicate if this type of interrupt was being handled by the
// driver.
bool was_handling;
};
// Used for access counters.
//
// If the GPU does not support access counters, the ref count is always
// zero. Otherwise, the refcount is incremented when the GPU is
// registered in a VA space for the first time, and decremented when
// unregistered or the VA space is destroyed.
//
// Locking: protected by the GPU access counters ISR lock. Naked
// accesses are allowed during GPU addition and removal.
NvU64 handling_ref_count;
};
struct
{
// Number of the bottom-half invocations for this interrupt on a GPU over
// its lifetime
NvU64 bottom_half_count;
// A bitmask of the CPUs on which the bottom half has executed. The
// corresponding bit gets set once the bottom half executes on that
// CPU.
// This mask is useful when testing that the bottom half is getting
// executed on the correct set of CPUs.
struct cpumask cpus_used_mask;
// An array (one per possible CPU), which holds the number of times the
// bottom half has executed on that CPU.
NvU64 *cpu_exec_count;
} stats;
// This is the number of times the function that disables this type of
// interrupt has been called without a corresponding call to the function
// that enables it. If this is > 0, interrupts are disabled. This field is
// protected by interrupts_lock. This field is only valid for interrupts
// directly owned by UVM:
// - replayable_faults
// - access_counters
NvU64 disable_intr_ref_count;
} uvm_intr_handler_t;
// State for all ISR handling in UVM
typedef struct
{
// This is set by uvm_suspend() and uvm_resume() to indicate whether
// top-half ISR processing is suspended for power management. Calls from
// the RM's top-half are to be completed without processing when this
// flag is set to true.
bool is_suspended;
// There is exactly one nv_kthread_q per GPU. It is used for the ISR bottom
// halves. So N CPUs will be servicing M GPUs, in general. There is one
// bottom-half per interrupt type.
nv_kthread_q_t bottom_half_q;
// Protects the state of interrupts (enabled/disabled) and whether the GPU is
// currently handling them. Taken in both interrupt and process context.
uvm_spinlock_irqsave_t interrupts_lock;
uvm_intr_handler_t replayable_faults;
uvm_intr_handler_t non_replayable_faults;
uvm_intr_handler_t access_counters;
// Kernel thread used to kill channels on fatal non-replayable faults.
// This is needed because we cannot call into RM from the bottom-half to
// avoid deadlocks.
nv_kthread_q_t kill_channel_q;
// Number of top-half ISRs called for this GPU over its lifetime
NvU64 interrupt_count;
} uvm_isr_info_t;
// Entry point for interrupt handling. This is called from RM's top half
NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid);
// Initialize ISR handling state
NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu);
// Flush any currently scheduled bottom halves. This is called during GPU
// removal.
void uvm_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu);
// Prevent new bottom halves from being scheduled. This is called during parent
// GPU removal.
void uvm_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu);
// Destroy ISR handling state and return interrupt ownership to RM. This is
// called during parent GPU removal
void uvm_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
// Take parent_gpu->isr.replayable_faults.service_lock from a non-top/bottom
// half thread. This will also disable replayable page fault interrupts (if
// supported by the GPU) because the top half attempts to take this lock, and we
// would cause an interrupt storm if we didn't disable them first.
//
// At least one GPU under the parent must have been previously retained.
void uvm_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
// Unlock parent_gpu->isr.replayable_faults.service_lock. This call may
// re-enable replayable page fault interrupts. Unlike
// uvm_gpu_replayable_faults_isr_lock(), which should only called from
// non-top/bottom half threads, this can be called by any thread.
void uvm_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
// Lock/unlock routines for non-replayable faults. These do not need to prevent
// interrupt storms since the GPU fault buffers for non-replayable faults are
// managed by RM. Unlike uvm_gpu_replayable_faults_isr_lock, no GPUs under
// the parent need to have been previously retained.
void uvm_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
void uvm_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
// See uvm_gpu_replayable_faults_isr_lock/unlock
void uvm_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu);
void uvm_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu);
// Increments the reference count tracking whether access counter interrupts
// should be disabled. The caller is guaranteed that access counter interrupts
// are disabled upon return. Interrupts might already be disabled prior to
// making this call. Each call is ref-counted, so this must be paired with a
// call to uvm_gpu_access_counters_intr_enable().
//
// parent_gpu->isr.interrupts_lock must be held to call this function.
void uvm_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
// Decrements the reference count tracking whether access counter interrupts
// should be disabled. Only once the count reaches 0 are the HW interrupts
// actually enabled, so this call does not guarantee that the interrupts have
// been re-enabled upon return.
//
// uvm_gpu_access_counters_intr_disable() must have been called prior to calling
// this function.
//
// NOTE: For pulse-based interrupts, the caller is responsible for re-arming
// the interrupt.
//
// parent_gpu->isr.interrupts_lock must be held to call this function.
void uvm_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);
#endif // __UVM_GPU_ISR_H__

View File

@@ -0,0 +1,683 @@
/*******************************************************************************
Copyright (c) 2017-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "nv_uvm_interface.h"
#include "uvm_common.h"
#include "uvm_api.h"
#include "uvm_gpu_non_replayable_faults.h"
#include "uvm_gpu.h"
#include "uvm_hal.h"
#include "uvm_lock.h"
#include "uvm_tools.h"
#include "uvm_user_channel.h"
#include "uvm_va_space_mm.h"
#include "uvm_va_block.h"
#include "uvm_va_range.h"
#include "uvm_kvmalloc.h"
#include "uvm_ats_faults.h"
// In the context of a CUDA application using Unified Memory, it is sometimes
// assumed that there is a single type of fault, originated by a memory
// load/store in a SM (Graphics Engine), which itself can be traced back to a
// memory access in a CUDA kernel written by a developer. In reality, faults can
// also be triggered by other parts of the GPU i.e. by other engines, as the
// result of developer-facing APIs, or operations initiated by a user-mode
// driver. The Graphics Engine faults are called replayable faults, while the
// rest are called non-replayable. The differences between the two types of
// faults go well beyond the engine originating the fault.
//
// A non-replayable fault originates in an engine other than Graphics. UVM
// services non-replayable faults from the Copy and PBDMA (Host/ESCHED) Engines.
// Non-replayable faults originated in other engines are considered fatal, and
// do not reach the UVM driver. While UVM can distinguish between faults
// originated in the Copy Engine and faults originated in the PBDMA Engine, in
// practice they are all processed in the same way. Replayable fault support in
// Graphics was introduced in Pascal, and non-replayable fault support in CE and
// PBDMA Engines was introduced in Volta; all non-replayable faults were fatal
// before Volta.
//
// An example of a Copy Engine non-replayable fault is a memory copy between two
// virtual addresses on a GPU, in which either the source or destination
// pointers are not currently mapped to a physical address in the page tables of
// the GPU. An example of a PBDMA non-replayable fault is a semaphore acquire in
// which the semaphore virtual address passed as argument is currently not
// mapped to any physical address.
//
// Non-replayable faults originated in the CE and PBDMA Engines result in HW
// preempting the channel associated with the fault, a mechanism called "fault
// and switch". More precisely, the switching out affects not only the channel
// that caused the fault, but all the channels in the same Time Slice Group
// (TSG). SW intervention is required so all the channels in the TSG can be
// scheduled again, but channels in other TSGs can be scheduled and resume their
// normal execution. In the case of the non-replayable faults serviced by UVM,
// the driver clears a channel's faulted bit upon successful servicing, but it
// is only when the servicing has completed for all the channels in the TSG that
// they are all allowed to be switched in. Non-replayable faults originated in
// engines other than CE and PBDMA are fatal because these other units lack
// hardware support for the "fault and switch" and restart mechanisms just
// described.
// On the other hand, replayable faults block preemption of the channel until
// software (UVM) services the fault. This is sometimes known as "fault and
// stall". Note that replayable faults prevent the execution of other channels,
// which are stalled until the fault is serviced.
//
// The "non-replayable" naming alludes to the fact that, historically, these
// faults indicated a fatal condition so there was no recovery ("replay")
// process, and SW could not ignore or drop the fault. As discussed before, this
// is no longer the case and while at times the hardware documentation uses the
// "fault and replay" expression for CE and PBDMA faults, we reserve that
// expression for Graphics faults and favor the term "fault and reschedule"
// instead. Replaying a fault does not necessarily imply that UVM has serviced
// it. For example, the UVM driver may choose to ignore the replayable faults
// associated with a GPU for some period of time if it detects that there is
// thrashing going on, and the GPU needs to be throttled. The fault entries
// corresponding to the ignored faults are never saved by UVM, but new entries
// (and new interrupts) will be generated by hardware each time after UVM issues
// a replay.
//
// While replayable faults are always the responsibility of UVM, the servicing
// of non-replayable faults is split between RM and UVM. In the case of
// replayable faults, UVM has sole SW ownership of the hardware buffer
// containing the faults, and it is responsible for updating the GET pointer to
// signal the hardware that a number of faults have been read. UVM also reads
// the PUT pointer value written by hardware. But in the case of non-replayable
// faults, UVM reads the fault entries out of a regular CPU buffer, shared with
// RM, called "shadow buffer". RM is responsible for accessing the actual
// non-replayable hardware buffer, reading the PUT pointer, updating the GET
// pointer, and moving CE and PBDMA faults from the hardware buffer to the
// shadow buffer. Because the Resource Manager owns the HW buffer, UVM needs to
// call RM when servicing a non-replayable fault, first to figure out if there
// is a pending fault, and then to read entries from the shadow buffer.
//
// Once UVM has parsed a non-replayable fault entry corresponding to managed
// memory, and identified the VA block associated with it, the servicing logic
// for that block is identical to that of a replayable fault, see
// uvm_va_block_service_locked. Another similarity between the two types of
// faults is that they use the same entry format, uvm_fault_buffer_entry_t.
// There is no error handling in this function. The caller is in charge of
// calling uvm_gpu_fault_buffer_deinit_non_replayable_faults on failure.
NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
non_replayable_faults->shadow_buffer_copy = NULL;
non_replayable_faults->fault_cache = NULL;
non_replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize /
parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
non_replayable_faults->shadow_buffer_copy =
uvm_kvmalloc_zero(parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize);
if (!non_replayable_faults->shadow_buffer_copy)
return NV_ERR_NO_MEMORY;
non_replayable_faults->fault_cache = uvm_kvmalloc_zero(non_replayable_faults->max_faults *
sizeof(*non_replayable_faults->fault_cache));
if (!non_replayable_faults->fault_cache)
return NV_ERR_NO_MEMORY;
uvm_tracker_init(&non_replayable_faults->clear_faulted_tracker);
uvm_tracker_init(&non_replayable_faults->fault_service_tracker);
return NV_OK;
}
void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
if (non_replayable_faults->fault_cache) {
UVM_ASSERT(uvm_tracker_is_empty(&non_replayable_faults->clear_faulted_tracker));
uvm_tracker_deinit(&non_replayable_faults->clear_faulted_tracker);
UVM_ASSERT(uvm_tracker_is_empty(&non_replayable_faults->fault_service_tracker));
uvm_tracker_deinit(&non_replayable_faults->fault_service_tracker);
}
uvm_kvfree(non_replayable_faults->shadow_buffer_copy);
uvm_kvfree(non_replayable_faults->fault_cache);
non_replayable_faults->shadow_buffer_copy = NULL;
non_replayable_faults->fault_cache = NULL;
}
bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status;
NvBool has_pending_faults;
UVM_ASSERT(parent_gpu->isr.non_replayable_faults.handling);
status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
&has_pending_faults);
UVM_ASSERT(status == NV_OK);
return has_pending_faults == NV_TRUE;
}
static NvU32 fetch_non_replayable_fault_buffer_entries(uvm_gpu_t *gpu)
{
NV_STATUS status;
NvU32 i = 0;
NvU32 cached_faults = 0;
uvm_fault_buffer_entry_t *fault_cache;
NvU32 entry_size = gpu->parent->fault_buffer_hal->entry_size(gpu->parent);
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
char *current_hw_entry = (char *)non_replayable_faults->shadow_buffer_copy;
fault_cache = non_replayable_faults->fault_cache;
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.non_replayable_faults.service_lock));
UVM_ASSERT(gpu->parent->non_replayable_faults_supported);
status = nvUvmInterfaceGetNonReplayableFaults(&gpu->parent->fault_buffer_info.rm_info,
non_replayable_faults->shadow_buffer_copy,
&cached_faults);
UVM_ASSERT(status == NV_OK);
// Parse all faults
for (i = 0; i < cached_faults; ++i) {
uvm_fault_buffer_entry_t *fault_entry = &non_replayable_faults->fault_cache[i];
gpu->parent->fault_buffer_hal->parse_non_replayable_entry(gpu->parent, current_hw_entry, fault_entry);
// The GPU aligns the fault addresses to 4k, but all of our tracking is
// done in PAGE_SIZE chunks which might be larger.
fault_entry->fault_address = UVM_PAGE_ALIGN_DOWN(fault_entry->fault_address);
// Make sure that all fields in the entry are properly initialized
fault_entry->va_space = NULL;
fault_entry->is_fatal = (fault_entry->fault_type >= UVM_FAULT_TYPE_FATAL);
fault_entry->filtered = false;
fault_entry->num_instances = 1;
fault_entry->access_type_mask = uvm_fault_access_type_mask_bit(fault_entry->fault_access_type);
INIT_LIST_HEAD(&fault_entry->merged_instances_list);
fault_entry->non_replayable.buffer_index = i;
if (fault_entry->is_fatal) {
// Record the fatal fault event later as we need the va_space locked
fault_entry->fatal_reason = UvmEventFatalReasonInvalidFaultType;
}
else {
fault_entry->fatal_reason = UvmEventFatalReasonInvalid;
}
current_hw_entry += entry_size;
}
return cached_faults;
}
// In SRIOV, the UVM (guest) driver does not have access to the privileged
// registers used to clear the faulted bit. Instead, UVM requests host RM to do
// the clearing on its behalf, using a SW method.
static bool use_clear_faulted_channel_sw_method(uvm_gpu_t *gpu)
{
if (uvm_gpu_is_virt_mode_sriov(gpu)) {
UVM_ASSERT(gpu->parent->has_clear_faulted_channel_sw_method);
return true;
}
return false;
}
static NV_STATUS clear_faulted_method_on_gpu(uvm_gpu_t *gpu,
uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *fault_entry,
NvU32 batch_id,
uvm_tracker_t *tracker)
{
NV_STATUS status;
uvm_push_t push;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
UVM_ASSERT(!fault_entry->is_fatal);
status = uvm_push_begin_acquire(gpu->channel_manager,
UVM_CHANNEL_TYPE_MEMOPS,
tracker,
&push,
"Clearing set bit for address 0x%llx",
fault_entry->fault_address);
if (status != NV_OK) {
UVM_ERR_PRINT("Error acquiring tracker before clearing faulted: %s, GPU %s\n",
nvstatusToString(status),
uvm_gpu_name(gpu));
return status;
}
if (use_clear_faulted_channel_sw_method(gpu))
gpu->parent->host_hal->clear_faulted_channel_sw_method(&push, user_channel, fault_entry);
else
gpu->parent->host_hal->clear_faulted_channel_method(&push, user_channel, fault_entry);
uvm_tools_broadcast_replay(gpu, &push, batch_id, fault_entry->fault_source.client_type);
uvm_push_end(&push);
// Add this push to the GPU's clear_faulted_tracker so GPU removal can wait
// on it.
status = uvm_tracker_add_push_safe(&non_replayable_faults->clear_faulted_tracker, &push);
// Add this push to the channel's clear_faulted_tracker so user channel
// removal can wait on it instead of using the per-GPU tracker, which would
// require a lock.
if (status == NV_OK)
status = uvm_tracker_add_push_safe(&user_channel->clear_faulted_tracker, &push);
return status;
}
static NV_STATUS clear_faulted_register_on_gpu(uvm_gpu_t *gpu,
uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *fault_entry,
NvU32 batch_id,
uvm_tracker_t *tracker)
{
NV_STATUS status;
UVM_ASSERT(!gpu->parent->has_clear_faulted_channel_method);
// We need to wait for all pending work before writing to the channel
// register
status = uvm_tracker_wait(tracker);
if (status != NV_OK)
return status;
gpu->parent->host_hal->clear_faulted_channel_register(user_channel, fault_entry);
uvm_tools_broadcast_replay_sync(gpu, batch_id, fault_entry->fault_source.client_type);
return NV_OK;
}
static NV_STATUS clear_faulted_on_gpu(uvm_gpu_t *gpu,
uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *fault_entry,
NvU32 batch_id,
uvm_tracker_t *tracker)
{
if (gpu->parent->has_clear_faulted_channel_method || use_clear_faulted_channel_sw_method(gpu))
return clear_faulted_method_on_gpu(gpu, user_channel, fault_entry, batch_id, tracker);
return clear_faulted_register_on_gpu(gpu, user_channel, fault_entry, batch_id, tracker);
}
static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
uvm_va_block_t *va_block,
uvm_va_block_retry_t *va_block_retry,
uvm_fault_buffer_entry_t *fault_entry,
uvm_service_block_context_t *service_context)
{
NV_STATUS status = NV_OK;
uvm_page_index_t page_index;
uvm_perf_thrashing_hint_t thrashing_hint;
uvm_processor_id_t new_residency;
bool read_duplicate;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
uvm_va_range_t *va_range = va_block->va_range;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
UVM_ASSERT(!fault_entry->is_fatal);
uvm_assert_rwsem_locked(&va_space->lock);
UVM_ASSERT(fault_entry->va_space == va_space);
UVM_ASSERT(fault_entry->fault_address >= va_block->start);
UVM_ASSERT(fault_entry->fault_address <= va_block->end);
service_context->block_context.policy = uvm_va_policy_get(va_block, fault_entry->fault_address);
if (service_context->num_retries == 0) {
// notify event to tools/performance heuristics. For now we use a
// unique batch id per fault, since we clear the faulted channel for
// each fault.
uvm_perf_event_notify_gpu_fault(&va_space->perf_events,
va_block,
gpu->id,
service_context->block_context.policy->preferred_location,
fault_entry,
++non_replayable_faults->batch_id,
false);
}
// Check logical permissions
status = uvm_va_range_check_logical_permissions(va_range,
gpu->id,
fault_entry->fault_access_type,
uvm_range_group_address_migratable(va_space,
fault_entry->fault_address));
if (status != NV_OK) {
fault_entry->is_fatal = true;
fault_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
return NV_OK;
}
// TODO: Bug 1880194: Revisit thrashing detection
thrashing_hint.type = UVM_PERF_THRASHING_HINT_TYPE_NONE;
service_context->read_duplicate_count = 0;
service_context->thrashing_pin_count = 0;
page_index = uvm_va_block_cpu_page_index(va_block, fault_entry->fault_address);
// Compute new residency and update the masks
new_residency = uvm_va_block_select_residency(va_block,
page_index,
gpu->id,
fault_entry->access_type_mask,
service_context->block_context.policy,
&thrashing_hint,
UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS,
&read_duplicate);
// Initialize the minimum necessary state in the fault service context
uvm_processor_mask_zero(&service_context->resident_processors);
// Set new residency and update the masks
uvm_processor_mask_set(&service_context->resident_processors, new_residency);
// The masks need to be fully zeroed as the fault region may grow due to prefetching
uvm_page_mask_zero(&service_context->per_processor_masks[uvm_id_value(new_residency)].new_residency);
uvm_page_mask_set(&service_context->per_processor_masks[uvm_id_value(new_residency)].new_residency, page_index);
if (read_duplicate) {
uvm_page_mask_zero(&service_context->read_duplicate_mask);
uvm_page_mask_set(&service_context->read_duplicate_mask, page_index);
service_context->read_duplicate_count = 1;
}
service_context->access_type[page_index] = fault_entry->fault_access_type;
service_context->region = uvm_va_block_region_for_page(page_index);
status = uvm_va_block_service_locked(gpu->id, va_block, va_block_retry, service_context);
++service_context->num_retries;
return status;
}
static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
struct mm_struct *mm,
uvm_va_block_t *va_block,
uvm_fault_buffer_entry_t *fault_entry)
{
NV_STATUS status, tracker_status;
uvm_va_block_retry_t va_block_retry;
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.non_replayable.block_service_context;
service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
service_context->num_retries = 0;
service_context->block_context.mm = mm;
uvm_mutex_lock(&va_block->lock);
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, &va_block_retry,
service_managed_fault_in_block_locked(gpu,
va_block,
&va_block_retry,
fault_entry,
service_context));
tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer_info.non_replayable.fault_service_tracker,
&va_block->tracker);
uvm_mutex_unlock(&va_block->lock);
return status == NV_OK? tracker_status: status;
}
// See uvm_unregister_channel for comments on the the channel destruction
// sequence.
static void kill_channel_delayed(void *_user_channel)
{
uvm_user_channel_t *user_channel = (uvm_user_channel_t *)_user_channel;
uvm_va_space_t *va_space = user_channel->kill_channel.va_space;
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
uvm_va_space_down_read_rm(va_space);
if (user_channel->gpu_va_space) {
// RM handles the fault, which will do the correct fault reporting in the
// kernel logs and will initiate channel teardown
NV_STATUS status = nvUvmInterfaceReportNonReplayableFault(uvm_gpu_device_handle(user_channel->gpu),
user_channel->kill_channel.fault_packet);
UVM_ASSERT(status == NV_OK);
}
uvm_va_space_up_read_rm(va_space);
uvm_user_channel_release(user_channel);
}
static void kill_channel_delayed_entry(void *user_channel)
{
UVM_ENTRY_VOID(kill_channel_delayed(user_channel));
}
static void schedule_kill_channel(uvm_gpu_t *gpu,
uvm_fault_buffer_entry_t *fault_entry,
uvm_user_channel_t *user_channel)
{
uvm_va_space_t *va_space = fault_entry->va_space;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
void *packet = (char *)non_replayable_faults->shadow_buffer_copy +
(fault_entry->non_replayable.buffer_index * gpu->parent->fault_buffer_hal->entry_size(gpu->parent));
UVM_ASSERT(gpu);
UVM_ASSERT(va_space);
UVM_ASSERT(user_channel);
if (user_channel->kill_channel.scheduled)
return;
user_channel->kill_channel.scheduled = true;
user_channel->kill_channel.va_space = va_space;
// Save the packet to be handled by RM in the channel structure
memcpy(user_channel->kill_channel.fault_packet, packet, gpu->parent->fault_buffer_hal->entry_size(gpu->parent));
// Retain the channel here so it is not prematurely destroyed. It will be
// released after forwarding the fault to RM in kill_channel_delayed.
uvm_user_channel_retain(user_channel);
// Schedule a work item to kill the channel
nv_kthread_q_item_init(&user_channel->kill_channel.kill_channel_q_item,
kill_channel_delayed_entry,
user_channel);
nv_kthread_q_schedule_q_item(&gpu->parent->isr.kill_channel_q,
&user_channel->kill_channel.kill_channel_q_item);
}
static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
struct mm_struct *mm,
uvm_fault_buffer_entry_t *fault_entry,
NV_STATUS lookup_status)
{
uvm_gpu_t *gpu = gpu_va_space->gpu;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
uvm_ats_fault_invalidate_t *ats_invalidate = &non_replayable_faults->ats_invalidate;
NV_STATUS status = lookup_status;
UVM_ASSERT(!fault_entry->is_fatal);
// Avoid dropping fault events when the VA block is not found or cannot be created
uvm_perf_event_notify_gpu_fault(&fault_entry->va_space->perf_events,
NULL,
gpu->id,
UVM_ID_INVALID,
fault_entry,
++non_replayable_faults->batch_id,
false);
if (status != NV_ERR_INVALID_ADDRESS)
return status;
if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
ats_invalidate->write_faults_in_batch = false;
// The VA isn't managed. See if ATS knows about it.
status = uvm_ats_service_fault_entry(gpu_va_space, fault_entry, ats_invalidate);
// Invalidate ATS TLB entries if needed
if (status == NV_OK) {
status = uvm_ats_invalidate_tlbs(gpu_va_space,
ats_invalidate,
&non_replayable_faults->fault_service_tracker);
}
}
else {
UVM_ASSERT(fault_entry->fault_access_type != UVM_FAULT_ACCESS_TYPE_PREFETCH);
fault_entry->is_fatal = true;
fault_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
// Do not return error due to logical errors in the application
status = NV_OK;
}
return status;
}
static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_entry)
{
NV_STATUS status;
uvm_user_channel_t *user_channel;
uvm_va_block_t *va_block;
uvm_va_space_t *va_space = NULL;
struct mm_struct *mm;
uvm_gpu_va_space_t *gpu_va_space;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
uvm_va_block_context_t *va_block_context =
&gpu->parent->fault_buffer_info.non_replayable.block_service_context.block_context;
status = uvm_gpu_fault_entry_to_va_space(gpu, fault_entry, &va_space);
if (status != NV_OK) {
// The VA space lookup will fail if we're running concurrently with
// removal of the channel from the VA space (channel unregister, GPU VA
// space unregister, VA space destroy, etc). The other thread will stop
// the channel and remove the channel from the table, so the faulting
// condition will be gone. In the case of replayable faults we need to
// flush the buffer, but here we can just ignore the entry and proceed
// on.
//
// Note that we can't have any subcontext issues here, since non-
// replayable faults only use the address space of their channel.
UVM_ASSERT(status == NV_ERR_INVALID_CHANNEL);
UVM_ASSERT(!va_space);
return NV_OK;
}
UVM_ASSERT(va_space);
// If an mm is registered with the VA space, we have to retain it
// in order to lock it before locking the VA space. It is guaranteed
// to remain valid until we release. If no mm is registered, we
// can only service managed faults, not ATS/HMM faults.
mm = uvm_va_space_mm_retain_lock(va_space);
uvm_va_space_down_read(va_space);
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
if (!gpu_va_space) {
// The va_space might have gone away. See the comment above.
status = NV_OK;
goto exit_no_channel;
}
fault_entry->va_space = va_space;
user_channel = uvm_gpu_va_space_get_user_channel(gpu_va_space, fault_entry->instance_ptr);
if (!user_channel) {
// The channel might have gone away. See the comment above.
status = NV_OK;
goto exit_no_channel;
}
fault_entry->fault_source.channel_id = user_channel->hw_channel_id;
if (!fault_entry->is_fatal) {
status = uvm_va_block_find_create(fault_entry->va_space,
mm,
fault_entry->fault_address,
va_block_context,
&va_block);
if (status == NV_OK)
status = service_managed_fault_in_block(gpu_va_space->gpu, mm, va_block, fault_entry);
else
status = service_non_managed_fault(gpu_va_space, mm, fault_entry, status);
// We are done, we clear the faulted bit on the channel, so it can be
// re-scheduled again
if (status == NV_OK && !fault_entry->is_fatal) {
status = clear_faulted_on_gpu(gpu,
user_channel,
fault_entry,
non_replayable_faults->batch_id,
&non_replayable_faults->fault_service_tracker);
uvm_tracker_clear(&non_replayable_faults->fault_service_tracker);
}
}
if (fault_entry->is_fatal)
uvm_tools_record_gpu_fatal_fault(gpu->parent->id, fault_entry->va_space, fault_entry, fault_entry->fatal_reason);
if (status != NV_OK || fault_entry->is_fatal)
schedule_kill_channel(gpu, fault_entry, user_channel);
exit_no_channel:
uvm_va_space_up_read(va_space);
uvm_va_space_mm_release_unlock(va_space, mm);
return status;
}
void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu)
{
NV_STATUS status = NV_OK;
NvU32 cached_faults;
// If this handler is modified to handle fewer than all of the outstanding
// faults, then special handling will need to be added to uvm_suspend()
// to guarantee that fault processing has completed before control is
// returned to the RM.
while ((cached_faults = fetch_non_replayable_fault_buffer_entries(gpu)) > 0) {
NvU32 i;
// Differently to replayable faults, we do not batch up and preprocess
// non-replayable faults since getting multiple faults on the same
// memory region is not very likely
for (i = 0; i < cached_faults; ++i) {
status = service_fault(gpu, &gpu->parent->fault_buffer_info.non_replayable.fault_cache[i]);
if (status != NV_OK)
break;
}
}
if (status != NV_OK)
UVM_DBG_PRINT("Error servicing non-replayable faults on GPU: %s\n", uvm_gpu_name(gpu));
}

View File

@@ -0,0 +1,37 @@
/*******************************************************************************
Copyright (c) 2017 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_GPU_NON_REPLAYABLE_FAULTS_H__
#define __UVM_GPU_NON_REPLAYABLE_FAULTS_H__
#include <nvstatus.h>
#include "uvm_forward_decl.h"
bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu);
NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
#endif // __UVM_GPU_NON_REPLAYABLE_FAULTS_H__

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,78 @@
/*******************************************************************************
Copyright (c) 2015 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_GPU_PAGE_FAULT_H__
#define __UVM_GPU_PAGE_FAULT_H__
#include "nvtypes.h"
#include "uvm_types.h"
#include "uvm_hal_types.h"
#include "uvm_tracker.h"
typedef enum
{
// Issue a fault replay after all faults for a block within a batch have been serviced
UVM_PERF_FAULT_REPLAY_POLICY_BLOCK = 0,
// Issue a fault replay after each fault batch has been serviced
UVM_PERF_FAULT_REPLAY_POLICY_BATCH,
// Like UVM_PERF_FAULT_REPLAY_POLICY_BATCH but only one batch of faults is serviced. The fault buffer is flushed
// before issuing the replay. The potential benefit is that we can resume execution of some SMs earlier, if SMs
// are faulting on different sets of pages.
UVM_PERF_FAULT_REPLAY_POLICY_BATCH_FLUSH,
// Issue a fault replay after all faults in the buffer have been serviced
UVM_PERF_FAULT_REPLAY_POLICY_ONCE,
// TODO: Bug 1768226: Implement uTLB-aware fault replay policy
UVM_PERF_FAULT_REPLAY_POLICY_MAX,
} uvm_perf_fault_replay_policy_t;
const char *uvm_perf_fault_replay_policy_string(uvm_perf_fault_replay_policy_t fault_replay);
NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu);
void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu);
void uvm_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu);
bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
// Clear valid bit for all remaining unserviced faults in the buffer, set GET to
// PUT, and push a fault replay of type UVM_FAULT_REPLAY_TYPE_START. It does not
// wait for the replay to complete before returning. The pushed replay is added
// to the GPU's replay_tracker.
//
// LOCKING: Takes gpu->isr_lock
NV_STATUS uvm_gpu_fault_buffer_flush(uvm_gpu_t *gpu);
// Enable/disable HW support for prefetch-initiated faults
void uvm_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
void uvm_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
// Service pending replayable faults on the given GPU. This function must be
// only called from the ISR bottom half
void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu);
#endif // __UVM_GPU_PAGE_FAULT_H__

View File

@@ -0,0 +1,551 @@
/*******************************************************************************
Copyright (c) 2015 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_gpu_semaphore.h"
#include "uvm_lock.h"
#include "uvm_global.h"
#include "uvm_kvmalloc.h"
#define UVM_SEMAPHORE_SIZE 4
#define UVM_SEMAPHORE_PAGE_SIZE PAGE_SIZE
#define UVM_SEMAPHORE_COUNT_PER_PAGE (PAGE_SIZE / UVM_SEMAPHORE_SIZE)
// The top nibble of the canary base is intentionally 0. The rest of the value
// is arbitrary. See the comments below on make_canary.
#define UVM_SEMAPHORE_CANARY_BASE 0x0badc0de
#define UVM_SEMAPHORE_CANARY_MASK 0xf0000000
struct uvm_gpu_semaphore_pool_struct
{
// The GPU owning the pool
uvm_gpu_t *gpu;
// List of all the semaphore pages belonging to the pool
struct list_head pages;
// Count of free semaphores among all the pages
NvU32 free_semaphores_count;
// Lock protecting the state of the pool
uvm_mutex_t mutex;
};
struct uvm_gpu_semaphore_pool_page_struct
{
// Allocation backing the page
uvm_rm_mem_t *memory;
// Pool the page is part of
uvm_gpu_semaphore_pool_t *pool;
// Node in the list of all pages in a semaphore pool
struct list_head all_pages_node;
// Mask indicating free semaphore indices within the page
DECLARE_BITMAP(free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
};
static NvU32 get_index(uvm_gpu_semaphore_t *semaphore)
{
NvU32 offset;
NvU32 index;
UVM_ASSERT(semaphore->payload != NULL);
UVM_ASSERT(semaphore->page != NULL);
offset = (char*)semaphore->payload - (char*)uvm_rm_mem_get_cpu_va(semaphore->page->memory);
UVM_ASSERT(offset % UVM_SEMAPHORE_SIZE == 0);
index = offset / UVM_SEMAPHORE_SIZE;
UVM_ASSERT(index < UVM_SEMAPHORE_COUNT_PER_PAGE);
return index;
}
// Use canary values on debug builds to catch semaphore use-after-free. We can
// catch release-after-free by simply setting the payload to a known value at
// free then checking it on alloc or pool free, but catching acquire-after-free
// is a little trickier.
//
// In order to make still-pending GEQ acquires stall indefinitely we need to
// reduce the current payload as much as we can, subject to two restrictions:
//
// 1) The pending acquires could be comparing against values much less than and
// much greater than the current payload, so we have to set the payload to a
// value reasonably less than the acquires which we might expect to be
// pending.
//
// 2) Going over halfway past a pending acquire on the 32-bit number wheel will
// cause Host to wrap and think the acquire succeeded. So we shouldn't reduce
// by more than 2^31.
//
// To handle these restrictions we'll deal with quadrants of 2^32, under the
// assumption that it's unlikely for a payload to outpace a pending acquire by
// more than 2^30.
//
// We also need for the base value to have some 0s in the upper significant
// bits, otherwise those bits might carry us past the quadrant boundary when we
// OR them in.
static NvU32 make_canary(NvU32 payload)
{
NvU32 prev_quadrant = payload - (1 << 30);
return (prev_quadrant & UVM_SEMAPHORE_CANARY_MASK) | UVM_SEMAPHORE_CANARY_BASE;
}
static bool is_canary(NvU32 val)
{
return (val & ~UVM_SEMAPHORE_CANARY_MASK) == UVM_SEMAPHORE_CANARY_BASE;
}
static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
{
NV_STATUS status;
uvm_gpu_semaphore_pool_page_t *pool_page;
NvU32 *payloads;
size_t i;
uvm_assert_mutex_locked(&pool->mutex);
pool_page = uvm_kvmalloc_zero(sizeof(*pool_page));
if (!pool_page)
return NV_ERR_NO_MEMORY;
pool_page->pool = pool;
status = uvm_rm_mem_alloc_and_map_all(pool->gpu, UVM_RM_MEM_TYPE_SYS, UVM_SEMAPHORE_PAGE_SIZE, &pool_page->memory);
if (status != NV_OK)
goto error;
// All semaphores are initially free
bitmap_fill(pool_page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
list_add(&pool_page->all_pages_node, &pool->pages);
pool->free_semaphores_count += UVM_SEMAPHORE_COUNT_PER_PAGE;
// Initialize the semaphore payloads to known values
if (UVM_IS_DEBUG()) {
payloads = uvm_rm_mem_get_cpu_va(pool_page->memory);
for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
payloads[i] = make_canary(0);
}
return NV_OK;
error:
uvm_kvfree(pool_page);
return status;
}
static void pool_free_page(uvm_gpu_semaphore_pool_page_t *page)
{
uvm_gpu_semaphore_pool_t *pool;
NvU32 *payloads;
size_t i;
UVM_ASSERT(page);
pool = page->pool;
uvm_assert_mutex_locked(&pool->mutex);
// Assert that no semaphores are still allocated
UVM_ASSERT(bitmap_full(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE));
UVM_ASSERT_MSG(pool->free_semaphores_count >= UVM_SEMAPHORE_COUNT_PER_PAGE,
"count: %u\n",
pool->free_semaphores_count);
// Check for semaphore release-after-free
if (UVM_IS_DEBUG()) {
payloads = uvm_rm_mem_get_cpu_va(page->memory);
for (i = 0; i < UVM_SEMAPHORE_COUNT_PER_PAGE; i++)
UVM_ASSERT(is_canary(payloads[i]));
}
pool->free_semaphores_count -= UVM_SEMAPHORE_COUNT_PER_PAGE;
list_del(&page->all_pages_node);
uvm_rm_mem_free(page->memory);
uvm_kvfree(page);
}
NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaphore_t *semaphore)
{
NV_STATUS status = NV_OK;
uvm_gpu_semaphore_pool_page_t *page;
memset(semaphore, 0, sizeof(*semaphore));
uvm_mutex_lock(&pool->mutex);
if (pool->free_semaphores_count == 0)
status = pool_alloc_page(pool);
if (status != NV_OK)
goto done;
list_for_each_entry(page, &pool->pages, all_pages_node) {
NvU32 semaphore_index = find_first_bit(page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
if (semaphore_index == UVM_SEMAPHORE_COUNT_PER_PAGE)
continue;
semaphore->payload = (NvU32*)((char*)uvm_rm_mem_get_cpu_va(page->memory) + semaphore_index * UVM_SEMAPHORE_SIZE);
semaphore->page = page;
// Check for semaphore release-after-free
UVM_ASSERT(is_canary(uvm_gpu_semaphore_get_payload(semaphore)));
uvm_gpu_semaphore_set_payload(semaphore, 0);
__clear_bit(semaphore_index, page->free_semaphores);
--pool->free_semaphores_count;
goto done;
}
UVM_ASSERT_MSG(0, "Failed to find a semaphore after allocating a new page\n");
status = NV_ERR_GENERIC;
done:
uvm_mutex_unlock(&pool->mutex);
return status;
}
void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore)
{
uvm_gpu_semaphore_pool_page_t *page;
uvm_gpu_semaphore_pool_t *pool;
NvU32 index;
UVM_ASSERT(semaphore);
// uvm_gpu_semaphore_t is to be embedded in other structures so it should always
// be accessible, but it may not be initialized in error cases. Early out if
// page is NULL indicating the semaphore hasn't been allocated successfully.
page = semaphore->page;
if (page == NULL)
return;
pool = page->pool;
index = get_index(semaphore);
// Write a known value lower than the current payload in an attempt to catch
// release-after-free and acquire-after-free.
if (UVM_IS_DEBUG())
uvm_gpu_semaphore_set_payload(semaphore, make_canary(uvm_gpu_semaphore_get_payload(semaphore)));
uvm_mutex_lock(&pool->mutex);
semaphore->page = NULL;
semaphore->payload = NULL;
++pool->free_semaphores_count;
__set_bit(index, page->free_semaphores);
uvm_mutex_unlock(&pool->mutex);
}
NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out)
{
uvm_gpu_semaphore_pool_t *pool;
pool = uvm_kvmalloc_zero(sizeof(*pool));
if (!pool)
return NV_ERR_NO_MEMORY;
uvm_mutex_init(&pool->mutex, UVM_LOCK_ORDER_GPU_SEMAPHORE_POOL);
INIT_LIST_HEAD(&pool->pages);
pool->free_semaphores_count = 0;
pool->gpu = gpu;
*pool_out = pool;
return NV_OK;
}
void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool)
{
uvm_gpu_semaphore_pool_page_t *page;
uvm_gpu_semaphore_pool_page_t *next_page;
if (!pool)
return;
// No other thread should be touching the pool once it's being destroyed
uvm_assert_mutex_unlocked(&pool->mutex);
// Keep pool_free_page happy
uvm_mutex_lock(&pool->mutex);
list_for_each_entry_safe(page, next_page, &pool->pages, all_pages_node)
pool_free_page(page);
UVM_ASSERT_MSG(pool->free_semaphores_count == 0, "unused: %u", pool->free_semaphores_count);
UVM_ASSERT(list_empty(&pool->pages));
uvm_mutex_unlock(&pool->mutex);
uvm_kvfree(pool);
}
NV_STATUS uvm_gpu_semaphore_pool_map_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu)
{
NV_STATUS status = NV_OK;
uvm_gpu_semaphore_pool_page_t *page;
UVM_ASSERT(pool);
UVM_ASSERT(gpu);
uvm_mutex_lock(&pool->mutex);
list_for_each_entry(page, &pool->pages, all_pages_node) {
status = uvm_rm_mem_map_gpu(page->memory, gpu);
if (status != NV_OK)
goto done;
}
done:
uvm_mutex_unlock(&pool->mutex);
return status;
}
void uvm_gpu_semaphore_pool_unmap_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu)
{
uvm_gpu_semaphore_pool_page_t *page;
UVM_ASSERT(pool);
UVM_ASSERT(gpu);
uvm_mutex_lock(&pool->mutex);
list_for_each_entry(page, &pool->pages, all_pages_node)
uvm_rm_mem_unmap_gpu(page->memory, gpu);
uvm_mutex_unlock(&pool->mutex);
}
NvU64 uvm_gpu_semaphore_get_gpu_uvm_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu)
{
return uvm_gpu_semaphore_get_gpu_va(semaphore, gpu, false);
}
NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu)
{
return uvm_gpu_semaphore_get_gpu_va(semaphore, gpu, true);
}
NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space)
{
NvU32 index = get_index(semaphore);
NvU64 base_va = uvm_rm_mem_get_gpu_va(semaphore->page->memory, gpu, is_proxy_va_space);
return base_va + UVM_SEMAPHORE_SIZE * index;
}
NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore)
{
return UVM_GPU_READ_ONCE(*semaphore->payload);
}
void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload)
{
// Provide a guarantee that all memory accesses prior to setting the payload
// won't be moved past it.
// Use a big hammer mb() as set_payload() is not used in any performance path
// today.
// This could likely be optimized to be either an smp_store_release() or use
// an smp_mb__before_atomic() barrier. The former is a recent addition to
// kernel though, and it's not clear whether combining the latter with a
// regular 32bit store is well defined in all cases. Both also seem to risk
// being optimized out on non-SMP configs (we need them for interacting with
// the GPU correctly even on non-SMP).
mb();
UVM_GPU_WRITE_ONCE(*semaphore->payload, payload);
}
// This function is intended to catch channels which have been left dangling in
// trackers after their owning GPUs have been destroyed.
static bool tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t *tracking_sem)
{
uvm_gpu_t *gpu = tracking_sem->semaphore.page->pool->gpu;
uvm_gpu_t *table_gpu;
UVM_ASSERT_MSG(gpu->magic == UVM_GPU_MAGIC_VALUE, "Corruption detected: magic number is 0x%llx\n", gpu->magic);
// It's ok for the GPU to not be in the global table, since add_gpu operates
// on trackers before adding the GPU to the table, and remove_gpu operates
// on trackers after removing the GPU. We rely on the magic value to catch
// those cases.
//
// But if a pointer is in the table it must match.
table_gpu = uvm_gpu_get(gpu->global_id);
if (table_gpu)
UVM_ASSERT(table_gpu == gpu);
// Return a boolean so this function can be used in assertions for
// conditional compilation
return true;
}
NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem)
{
NV_STATUS status;
memset(tracking_sem, 0, sizeof(*tracking_sem));
status = uvm_gpu_semaphore_alloc(pool, &tracking_sem->semaphore);
if (status != NV_OK)
return status;
UVM_ASSERT(uvm_gpu_semaphore_get_payload(&tracking_sem->semaphore) == 0);
uvm_spin_lock_init(&tracking_sem->lock, UVM_LOCK_ORDER_LEAF);
atomic64_set(&tracking_sem->completed_value, 0);
tracking_sem->queued_value = 0;
return NV_OK;
}
void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem)
{
uvm_gpu_semaphore_free(&tracking_sem->semaphore);
}
static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
{
NvU64 old_value = atomic64_read(&tracking_semaphore->completed_value);
// The semaphore value is the bottom 32 bits of completed_value
NvU32 old_sem_value = (NvU32)old_value;
NvU32 new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);
NvU64 new_value;
uvm_assert_spinlock_locked(&tracking_semaphore->lock);
// The following logic to update the completed value is very subtle, it
// helps to read https://www.kernel.org/doc/Documentation/memory-barriers.txt
// before going through this code.
if (old_sem_value == new_sem_value) {
// No progress since the last update.
// No additional memory barrier required in this case as completed_value
// is always updated under the spinlock that this thread just acquired.
// That guarantees full ordering with all the accesses the thread that
// updated completed_value did under the lock including the GPU
// semaphore read.
return old_value;
}
// Replace the bottom 32-bits with the new semaphore value
new_value = (old_value & 0xFFFFFFFF00000000ull) | new_sem_value;
// If we've wrapped around, add 2^32 to the value
// Notably the user of the GPU tracking semaphore needs to guarantee that
// the value is updated often enough to notice the wrap around each time it
// happens. In case of a channel tracking semaphore that's released for each
// push, it's easily guaranteed because of the small number of GPFIFO
// entries available per channel (there could be at most as many pending
// pushes as GPFIFO entries).
if (new_sem_value < old_sem_value)
new_value += 1ULL << 32;
// Use an atomic write even though the spinlock is held so that the value can
// be (carefully) read atomically outside of the lock.
//
// atomic64_set() on its own doesn't imply any memory barriers and we need
// prior memory accesses (in particular the read of the GPU semaphore
// payload) by this thread to be visible to other threads that see the newly
// set completed_value. smp_mb__before_atomic() provides that ordering.
//
// Also see the comment and matching smp_mb__after_atomic() barrier in
// uvm_gpu_tracking_semaphore_is_value_completed().
//
// Notably as of 4.3, atomic64_set_release() and atomic64_read_acquire()
// have been added that are exactly what we need and could be slightly
// faster on arm and powerpc than the implementation below. But at least in
// 4.3 the implementation looks broken for arm32 (it maps directly to
// smp_load_acquire() and that doesn't support 64-bit reads on 32-bit
// architectures) so instead of dealing with that just use a slightly bigger
// hammer.
smp_mb__before_atomic();
atomic64_set(&tracking_semaphore->completed_value, new_value);
// For this thread, we don't want any later accesses to be ordered above the
// GPU semaphore read. This could be accomplished by using a
// smp_load_acquire() for reading it, but given that it's also a pretty
// recent addition to the kernel, just leverage smp_mb__after_atomic() that
// guarantees that no accesses will be ordered above the atomic (and hence
// the GPU semaphore read).
//
// Notably the soon following uvm_spin_unlock() is a release barrier that
// allows later memory accesses to be reordered above it and hence doesn't
// provide the necessary ordering with the GPU semaphore read.
//
// Also notably this would still need to be handled if we ever switch to
// atomic64_set_release() and atomic64_read_acquire() for accessing
// completed_value.
smp_mb__after_atomic();
return new_value;
}
NvU64 uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
{
NvU64 completed;
// Check that the GPU which owns the semaphore is still present
UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));
uvm_spin_lock(&tracking_semaphore->lock);
completed = update_completed_value_locked(tracking_semaphore);
uvm_spin_unlock(&tracking_semaphore->lock);
return completed;
}
bool uvm_gpu_tracking_semaphore_is_value_completed(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 value)
{
NvU64 completed = atomic64_read(&tracking_sem->completed_value);
// Check that the GPU which owns the semaphore is still present
UVM_ASSERT(tracking_semaphore_check_gpu(tracking_sem));
if (completed >= value) {
// atomic64_read() doesn't imply any memory barriers and we need all
// subsequent memory accesses in this thread to be ordered after the
// atomic read of the completed value above as that will also order them
// with any accesses (in particular the GPU semaphore read) performed by
// the other thread prior to it setting the completed_value we read.
// smp_mb__after_atomic() provides that ordering.
//
// Also see the comment in update_completed_value_locked().
smp_mb__after_atomic();
return true;
}
return uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem) >= value;
}

View File

@@ -0,0 +1,181 @@
/*******************************************************************************
Copyright (c) 2015 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_GPU_SEMAPHORE_H__
#define __UVM_GPU_SEMAPHORE_H__
#include "uvm_forward_decl.h"
#include "uvm_lock.h"
#include "uvm_rm_mem.h"
#include "uvm_linux.h"
// A GPU semaphore is a memory location accessible by the GPUs and the CPU
// that's used for synchronization among them.
// The GPU has primitives to acquire (wait for) and release (set) 4-byte memory
// locations. The same memory can be accessed by multiple GPUs and the CPU
// allowing for different synchronization schemes.
//
// The UVM driver maintains a per-GPU semaphore pool that grows on demand as
// semaphores are allocated out of it.
//
// TODO: Bug 200194638: Add support for timestamps (the GPU also supports
// releasing 16-byte semaphores that include an 8-byte timestamp).
struct uvm_gpu_semaphore_struct
{
// The semaphore pool page the semaphore came from
uvm_gpu_semaphore_pool_page_t *page;
// Pointer to the memory location
NvU32 *payload;
};
// A primitive used for tracking progress of the GPU
// Whenever a stream of GPU operations needs to be synchronized it increments
// the semaphore's payload as the last step so that other processors
// can acquire (wait for) it.
// The primitive maintains a 64-bit counter on top of the 32-bit GPU semaphore
// to support 2^64 synchronization points instead of just 2^32. The logic relies
// on being able to notice every time the 32-bit counter wraps around (see
// update_completed_value()).
struct uvm_gpu_tracking_semaphore_struct
{
uvm_gpu_semaphore_t semaphore;
// Last completed value
// The bottom 32-bits will always match the latest semaphore payload seen in
// update_completed_value_locked().
atomic64_t completed_value;
// Lock protecting updates to the completed_value
uvm_spinlock_t lock;
// Last queued value
// All accesses to the queued value should be handled by the user of the GPU
// tracking semaphore.
NvU64 queued_value;
};
// Create a semaphore pool for a GPU.
NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out);
// Destroy a semaphore pool
// Locking:
// - Global lock needs to be held in read mode (for unmapping from all GPUs)
// - Internally acquires:
// - GPU semaphore pool lock
// - RM API lock
// - RM GPUs lock
void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool);
// Allocate a semaphore from the pool.
// The semaphore will be mapped on all GPUs currently registered with the UVM
// driver, and on all new GPUs which will be registered in the future.
// The mappings are added to UVM's internal address space, and (in SR-IOV heavy)
// to the proxy address space.
//
// The semaphore's payload will be initially set to 0.
//
// Locking:
// - Global lock needs to be held in read mode (for mapping on all GPUs)
// - Internally synchronized and hence safe to be called from multiple threads
// - Internally acquires:
// - GPU semaphore pool lock
// - RM API lock
// - RM GPUs lock
NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaphore_t *semaphore);
// Free a semaphore
// Locking:
// - Internally synchronized and hence safe to be called from multiple threads
void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore);
// Map all the semaphores from the pool on a GPU
//
// The mappings are added to UVM's internal address space, and (in SR-IOV heavy)
// to the proxy address space.
NV_STATUS uvm_gpu_semaphore_pool_map_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu);
// Unmap all the semaphores from the pool from a GPU
//
// The unmapping affects all the VA spaces where the semaphores are currently
// mapped.
void uvm_gpu_semaphore_pool_unmap_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu);
// Get the GPU VA of a semaphore in UVM's internal address space.
NvU64 uvm_gpu_semaphore_get_gpu_uvm_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu);
// Get the GPU VA of a semaphore in the proxy address space.
NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu);
NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space);
// Read the 32-bit payload of the semaphore
// Notably doesn't provide any memory ordering guarantees and needs to be used with
// care. For an example of what needs to be considered see
// uvm_gpu_tracking_semaphore_update_completed_value().
NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore);
// Set the 32-bit payload of the semaphore
// Guarantees that all memory accesses preceding setting the payload won't be
// moved past it.
void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload);
// Allocate a GPU tracking semaphore from the pool
// Locking same as uvm_gpu_semaphore_alloc()
NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem);
// Free a GPU tracking semaphore
// Locking same as uvm_gpu_semaphore_free()
void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem);
// Check whether a specific value has been completed
//
// If true is returned, guarantees that all operations ordered prior to a
// processor (commonly a GPU) completing the specific value will be visible to
// the caller.
//
// In case a GPU is supposed to complete a value, care needs to be taken for all GPU
// operations to be ordered correctly with the semaphore release that sets the value.
// In case it's the CPU completing the value, uvm_gpu_semaphore_set_payload()
// should be used that provides the necessary ordering guarantees.
//
// Locking: this operation is internally synchronized and hence safe to be
// called from multiple threads.
bool uvm_gpu_tracking_semaphore_is_value_completed(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 value);
// Update and return the completed value
//
// Provides the same guarantees as if uvm_gpu_tracking_semaphore_is_value_completed()
// returned true for the returned completed value.
//
// Locking: this operation is internally synchronized and hence safe to be
// called from multiple threads.
NvU64 uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semaphore_t *tracking_sem);
// See the comments for uvm_gpu_tracking_semaphore_is_value_completed
static bool uvm_gpu_tracking_semaphore_is_completed(uvm_gpu_tracking_semaphore_t *tracking_sem)
{
return uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, tracking_sem->queued_value);
}
#endif // __UVM_GPU_SEMAPHORE_H__

View File

@@ -0,0 +1,165 @@
/*******************************************************************************
Copyright (c) 2015-2019 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_global.h"
#include "uvm_gpu_semaphore.h"
#include "uvm_test.h"
#include "uvm_va_space.h"
#include "uvm_kvmalloc.h"
static NV_STATUS add_and_test(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU32 increment_by)
{
NvU64 new_value;
NvU64 completed = uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem);
new_value = completed + increment_by;
tracking_sem->queued_value = new_value;
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem) == completed);
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, 0));
if (completed > 0)
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, completed - 1));
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, completed));
TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, completed + 1));
TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value));
TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_completed(tracking_sem));
uvm_gpu_semaphore_set_payload(&tracking_sem->semaphore, (NvU32)new_value);
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem) == new_value);
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, completed));
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value));
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value - 1));
TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value + 1));
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_completed(tracking_sem));
return NV_OK;
}
static NV_STATUS test_tracking(uvm_va_space_t *va_space)
{
NV_STATUS status;
uvm_gpu_tracking_semaphore_t tracking_sem;
int i;
uvm_gpu_t *gpu = uvm_va_space_find_first_gpu(va_space);
if (gpu == NULL)
return NV_ERR_INVALID_STATE;
status = uvm_gpu_tracking_semaphore_alloc(gpu->semaphore_pool, &tracking_sem);
if (status != NV_OK)
return status;
status = add_and_test(&tracking_sem, 1);
if (status != NV_OK)
goto done;
for (i = 0; i < 100; ++i) {
status = add_and_test(&tracking_sem, UINT_MAX - 1);
if (status != NV_OK)
goto done;
}
done:
uvm_gpu_tracking_semaphore_free(&tracking_sem);
return status;
}
#define NUM_SEMAPHORES_PER_GPU 4096
static NV_STATUS test_alloc(uvm_va_space_t *va_space)
{
NV_STATUS status = NV_OK;
uvm_gpu_t *gpu;
uvm_gpu_semaphore_t *semaphores;
int i;
NvU32 semaphore_count;
NvU32 gpu_count = uvm_processor_mask_get_gpu_count(&va_space->registered_gpus);
NvU32 current_semaphore = 0;
if (gpu_count == 0)
return NV_ERR_INVALID_STATE;
semaphore_count = gpu_count * NUM_SEMAPHORES_PER_GPU;
semaphores = uvm_kvmalloc_zero(semaphore_count * sizeof(*semaphores));
if (semaphores == NULL)
return NV_ERR_NO_MEMORY;
for (i = 0; i < NUM_SEMAPHORES_PER_GPU; ++i) {
for_each_va_space_gpu(gpu, va_space) {
status = uvm_gpu_semaphore_alloc(gpu->semaphore_pool, &semaphores[current_semaphore++]);
if (status != NV_OK)
goto done;
}
}
for (i = 0; i < current_semaphore; ++i) {
for_each_va_space_gpu(gpu, va_space) {
NvU64 gpu_va;
gpu_va = uvm_gpu_semaphore_get_gpu_uvm_va(&semaphores[i], gpu);
TEST_CHECK_GOTO(gpu_va != 0, done);
// In SR-IOV heavy, there should be a mapping in the proxy VA space
// too.
if (uvm_gpu_uses_proxy_channel_pool(gpu)) {
gpu_va = uvm_gpu_semaphore_get_gpu_proxy_va(&semaphores[i], gpu);
TEST_CHECK_GOTO(gpu_va != 0, done);
}
uvm_gpu_semaphore_set_payload(&semaphores[i], 1);
TEST_CHECK_GOTO(uvm_gpu_semaphore_get_payload(&semaphores[i]) == 1, done);
}
}
done:
for (i = 0; i < current_semaphore; ++i)
uvm_gpu_semaphore_free(&semaphores[i]);
uvm_kvfree(semaphores);
return status;
}
NV_STATUS uvm_test_gpu_semaphore_sanity(UVM_TEST_GPU_SEMAPHORE_SANITY_PARAMS *params, struct file *filp)
{
NV_STATUS status;
uvm_va_space_t *va_space = uvm_va_space_get(filp);
uvm_mutex_lock(&g_uvm_global.global_lock);
uvm_va_space_down_read_rm(va_space);
status = test_alloc(va_space);
if (status != NV_OK)
goto done;
status = test_tracking(va_space);
if (status != NV_OK)
goto done;
done:
uvm_va_space_up_read_rm(va_space);
uvm_mutex_unlock(&g_uvm_global.global_lock);
return status;
}

View File

@@ -0,0 +1,991 @@
/*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_hal.h"
#include "uvm_kvmalloc.h"
#include "cla16f.h"
#include "clb069.h"
#include "clb06f.h"
#include "clb0b5.h"
#include "clc06f.h"
#include "clc0b5.h"
#include "clc1b5.h"
#include "ctrl2080mc.h"
#include "clc3b5.h"
#include "clc36f.h"
#include "clc369.h"
#include "clc365.h"
#include "clc46f.h"
#include "clc5b5.h"
#include "clc6b5.h"
#include "clc56f.h"
#include "clc7b5.h"
#define CE_OP_COUNT (sizeof(uvm_ce_hal_t) / sizeof(void *))
#define HOST_OP_COUNT (sizeof(uvm_host_hal_t) / sizeof(void *))
#define ARCH_OP_COUNT (sizeof(uvm_arch_hal_t) / sizeof(void *))
#define FAULT_BUFFER_OP_COUNT (sizeof(uvm_fault_buffer_hal_t) / sizeof(void *))
#define ACCESS_COUNTER_BUFFER_OP_COUNT (sizeof(uvm_access_counter_buffer_hal_t) / sizeof(void *))
// Table for copy engine functions.
// Each entry is associated with a copy engine class through the 'class' field.
// By setting the 'parent_class' field, a class will inherit the parent class's
// functions for any fields left NULL when uvm_hal_init_table() runs upon module
// load. The parent class must appear earlier in the array than the child.
static uvm_hal_class_ops_t ce_table[] =
{
{
.id = MAXWELL_DMA_COPY_A,
.u.ce_ops = {
.init = uvm_hal_maxwell_ce_init,
.method_validate = uvm_hal_method_validate_stub,
.semaphore_release = uvm_hal_maxwell_ce_semaphore_release,
.semaphore_timestamp = uvm_hal_maxwell_ce_semaphore_timestamp,
.semaphore_reduction_inc = uvm_hal_maxwell_ce_semaphore_reduction_inc,
.offset_out = uvm_hal_maxwell_ce_offset_out,
.offset_in_out = uvm_hal_maxwell_ce_offset_in_out,
.phys_mode = uvm_hal_maxwell_ce_phys_mode,
.plc_mode = uvm_hal_maxwell_ce_plc_mode,
.memcopy_validate = uvm_hal_ce_memcopy_validate_stub,
.memcopy_patch_src = uvm_hal_ce_memcopy_patch_src_stub,
.memcopy = uvm_hal_maxwell_ce_memcopy,
.memcopy_v_to_v = uvm_hal_maxwell_ce_memcopy_v_to_v,
.memset_validate = uvm_hal_ce_memset_validate_stub,
.memset_1 = uvm_hal_maxwell_ce_memset_1,
.memset_4 = uvm_hal_maxwell_ce_memset_4,
.memset_8 = uvm_hal_maxwell_ce_memset_8,
.memset_v_4 = uvm_hal_maxwell_ce_memset_v_4,
}
},
{
.id = PASCAL_DMA_COPY_A,
.parent_id = MAXWELL_DMA_COPY_A,
.u.ce_ops = {
.semaphore_release = uvm_hal_pascal_ce_semaphore_release,
.semaphore_timestamp = uvm_hal_pascal_ce_semaphore_timestamp,
.semaphore_reduction_inc = uvm_hal_pascal_ce_semaphore_reduction_inc,
.offset_out = uvm_hal_pascal_ce_offset_out,
.offset_in_out = uvm_hal_pascal_ce_offset_in_out,
}
},
{
.id = PASCAL_DMA_COPY_B,
.parent_id = PASCAL_DMA_COPY_A,
.u.ce_ops = {}
},
{
.id = VOLTA_DMA_COPY_A,
.parent_id = PASCAL_DMA_COPY_B,
.u.ce_ops = {},
},
{
.id = TURING_DMA_COPY_A,
.parent_id = VOLTA_DMA_COPY_A,
.u.ce_ops = {},
},
{
.id = AMPERE_DMA_COPY_A,
.parent_id = TURING_DMA_COPY_A,
.u.ce_ops = {
.method_validate = uvm_hal_ampere_ce_method_validate_c6b5,
.phys_mode = uvm_hal_ampere_ce_phys_mode,
.memcopy_validate = uvm_hal_ampere_ce_memcopy_validate_c6b5,
.memcopy_patch_src = uvm_hal_ampere_ce_memcopy_patch_src_c6b5,
.memset_validate = uvm_hal_ampere_ce_memset_validate_c6b5,
},
},
{
.id = AMPERE_DMA_COPY_B,
.parent_id = AMPERE_DMA_COPY_A,
.u.ce_ops = {
.method_validate = uvm_hal_method_validate_stub,
.plc_mode = uvm_hal_ampere_ce_plc_mode_c7b5,
.memcopy_validate = uvm_hal_ce_memcopy_validate_stub,
.memcopy_patch_src = uvm_hal_ce_memcopy_patch_src_stub,
.memset_validate = uvm_hal_ce_memset_validate_stub,
},
},
};
// Table for GPFIFO functions. Same idea as the copy engine table.
static uvm_hal_class_ops_t host_table[] =
{
{
// This host class is reported for GM10x
.id = KEPLER_CHANNEL_GPFIFO_B,
.u.host_ops = {
.init = uvm_hal_maxwell_host_init_noop,
.method_validate = uvm_hal_method_validate_stub,
.sw_method_validate = uvm_hal_method_validate_stub,
.wait_for_idle = uvm_hal_maxwell_host_wait_for_idle,
.membar_sys = uvm_hal_maxwell_host_membar_sys,
// No MEMBAR GPU until Pascal, just do a MEMBAR SYS.
.membar_gpu = uvm_hal_maxwell_host_membar_sys,
.noop = uvm_hal_maxwell_host_noop,
.interrupt = uvm_hal_maxwell_host_interrupt,
.semaphore_acquire = uvm_hal_maxwell_host_semaphore_acquire,
.semaphore_release = uvm_hal_maxwell_host_semaphore_release,
.semaphore_timestamp = uvm_hal_maxwell_host_semaphore_timestamp,
.set_gpfifo_entry = uvm_hal_maxwell_host_set_gpfifo_entry,
.write_gpu_put = uvm_hal_maxwell_host_write_gpu_put,
.tlb_invalidate_all = uvm_hal_maxwell_host_tlb_invalidate_all_a16f,
.tlb_invalidate_va = uvm_hal_maxwell_host_tlb_invalidate_va,
.tlb_invalidate_test = uvm_hal_maxwell_host_tlb_invalidate_test,
.replay_faults = uvm_hal_maxwell_replay_faults_unsupported,
.cancel_faults_global = uvm_hal_maxwell_cancel_faults_global_unsupported,
.cancel_faults_targeted = uvm_hal_maxwell_cancel_faults_targeted_unsupported,
.cancel_faults_va = uvm_hal_maxwell_cancel_faults_va_unsupported,
.clear_faulted_channel_sw_method = uvm_hal_maxwell_host_clear_faulted_channel_sw_method_unsupported,
.clear_faulted_channel_method = uvm_hal_maxwell_host_clear_faulted_channel_method_unsupported,
.clear_faulted_channel_register = uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported,
.access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported,
.access_counter_clear_type = uvm_hal_maxwell_access_counter_clear_type_unsupported,
.access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported,
.get_time = uvm_hal_maxwell_get_time,
}
},
{
// This host class is reported for GM20x
.id = MAXWELL_CHANNEL_GPFIFO_A,
.parent_id = KEPLER_CHANNEL_GPFIFO_B,
.u.host_ops = {
.tlb_invalidate_all = uvm_hal_maxwell_host_tlb_invalidate_all_b06f,
}
},
{
.id = PASCAL_CHANNEL_GPFIFO_A,
.parent_id = MAXWELL_CHANNEL_GPFIFO_A,
.u.host_ops = {
.init = uvm_hal_pascal_host_init,
.membar_sys = uvm_hal_pascal_host_membar_sys,
.membar_gpu = uvm_hal_pascal_host_membar_gpu,
.tlb_invalidate_all = uvm_hal_pascal_host_tlb_invalidate_all,
.tlb_invalidate_va = uvm_hal_pascal_host_tlb_invalidate_va,
.tlb_invalidate_test = uvm_hal_pascal_host_tlb_invalidate_test,
.replay_faults = uvm_hal_pascal_replay_faults,
.cancel_faults_global = uvm_hal_pascal_cancel_faults_global,
.cancel_faults_targeted = uvm_hal_pascal_cancel_faults_targeted,
}
},
{
.id = VOLTA_CHANNEL_GPFIFO_A,
.parent_id = PASCAL_CHANNEL_GPFIFO_A,
.u.host_ops = {
.write_gpu_put = uvm_hal_volta_host_write_gpu_put,
.tlb_invalidate_va = uvm_hal_volta_host_tlb_invalidate_va,
.replay_faults = uvm_hal_volta_replay_faults,
.cancel_faults_va = uvm_hal_volta_cancel_faults_va,
.clear_faulted_channel_method = uvm_hal_volta_host_clear_faulted_channel_method,
.access_counter_clear_all = uvm_hal_volta_access_counter_clear_all,
.access_counter_clear_type = uvm_hal_volta_access_counter_clear_type,
.access_counter_clear_targeted = uvm_hal_volta_access_counter_clear_targeted,
.semaphore_timestamp = uvm_hal_volta_host_semaphore_timestamp,
}
},
{
.id = TURING_CHANNEL_GPFIFO_A,
.parent_id = VOLTA_CHANNEL_GPFIFO_A,
.u.host_ops = {
.semaphore_acquire = uvm_hal_turing_host_semaphore_acquire,
.semaphore_release = uvm_hal_turing_host_semaphore_release,
.clear_faulted_channel_method = uvm_hal_turing_host_clear_faulted_channel_method,
.set_gpfifo_entry = uvm_hal_turing_host_set_gpfifo_entry,
}
},
{
.id = AMPERE_CHANNEL_GPFIFO_A,
.parent_id = TURING_CHANNEL_GPFIFO_A,
.u.host_ops = {
.method_validate = uvm_hal_ampere_host_method_validate,
.sw_method_validate = uvm_hal_ampere_host_sw_method_validate,
.clear_faulted_channel_sw_method = uvm_hal_ampere_host_clear_faulted_channel_sw_method,
.clear_faulted_channel_register = uvm_hal_ampere_host_clear_faulted_channel_register,
.tlb_invalidate_all = uvm_hal_ampere_host_tlb_invalidate_all,
.tlb_invalidate_va = uvm_hal_ampere_host_tlb_invalidate_va,
.tlb_invalidate_test = uvm_hal_ampere_host_tlb_invalidate_test,
}
},
};
static uvm_hal_class_ops_t arch_table[] =
{
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000,
.u.arch_ops = {
.init_properties = uvm_hal_maxwell_arch_init_properties,
.mmu_mode_hal = uvm_hal_mmu_mode_maxwell,
.enable_prefetch_faults = uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported,
.disable_prefetch_faults = uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported,
.mmu_engine_id_to_type = uvm_hal_maxwell_mmu_engine_id_to_type_unsupported,
.mmu_client_id_to_utlb_id = uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported,
}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000,
.u.arch_ops = {}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200,
.u.arch_ops = {
.init_properties = uvm_hal_pascal_arch_init_properties,
.mmu_mode_hal = uvm_hal_mmu_mode_pascal,
.enable_prefetch_faults = uvm_hal_pascal_mmu_enable_prefetch_faults,
.disable_prefetch_faults = uvm_hal_pascal_mmu_disable_prefetch_faults,
.mmu_client_id_to_utlb_id = uvm_hal_pascal_mmu_client_id_to_utlb_id,
}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
.u.arch_ops = {
.init_properties = uvm_hal_volta_arch_init_properties,
.mmu_mode_hal = uvm_hal_mmu_mode_volta,
.mmu_engine_id_to_type = uvm_hal_volta_mmu_engine_id_to_type,
.mmu_client_id_to_utlb_id = uvm_hal_volta_mmu_client_id_to_utlb_id,
},
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
.u.arch_ops = {
.init_properties = uvm_hal_turing_arch_init_properties,
.mmu_mode_hal = uvm_hal_mmu_mode_turing,
.mmu_engine_id_to_type = uvm_hal_turing_mmu_engine_id_to_type,
},
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
.u.arch_ops = {
.init_properties = uvm_hal_ampere_arch_init_properties,
.mmu_mode_hal = uvm_hal_mmu_mode_ampere,
.mmu_engine_id_to_type = uvm_hal_ampere_mmu_engine_id_to_type,
.mmu_client_id_to_utlb_id = uvm_hal_ampere_mmu_client_id_to_utlb_id,
},
},
};
static uvm_hal_class_ops_t fault_buffer_table[] =
{
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000,
.u.fault_buffer_ops = {
.enable_replayable_faults = uvm_hal_maxwell_enable_replayable_faults_unsupported,
.disable_replayable_faults = uvm_hal_maxwell_disable_replayable_faults_unsupported,
.clear_replayable_faults = uvm_hal_maxwell_clear_replayable_faults_unsupported,
.read_put = uvm_hal_maxwell_fault_buffer_read_put_unsupported,
.read_get = uvm_hal_maxwell_fault_buffer_read_get_unsupported,
.write_get = uvm_hal_maxwell_fault_buffer_write_get_unsupported,
.get_ve_id = uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported,
.parse_entry = uvm_hal_maxwell_fault_buffer_parse_entry_unsupported,
.entry_is_valid = uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported,
.entry_clear_valid = uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported,
.entry_size = uvm_hal_maxwell_fault_buffer_entry_size_unsupported,
.parse_non_replayable_entry = uvm_hal_maxwell_fault_buffer_parse_non_replayable_entry_unsupported,
}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000,
.u.fault_buffer_ops = {}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200,
.u.fault_buffer_ops = {
.enable_replayable_faults = uvm_hal_pascal_enable_replayable_faults,
.disable_replayable_faults = uvm_hal_pascal_disable_replayable_faults,
.clear_replayable_faults = uvm_hal_pascal_clear_replayable_faults,
.read_put = uvm_hal_pascal_fault_buffer_read_put,
.read_get = uvm_hal_pascal_fault_buffer_read_get,
.write_get = uvm_hal_pascal_fault_buffer_write_get,
.parse_entry = uvm_hal_pascal_fault_buffer_parse_entry,
.entry_is_valid = uvm_hal_pascal_fault_buffer_entry_is_valid,
.entry_clear_valid = uvm_hal_pascal_fault_buffer_entry_clear_valid,
.entry_size = uvm_hal_pascal_fault_buffer_entry_size,
}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
.u.fault_buffer_ops = {
.read_put = uvm_hal_volta_fault_buffer_read_put,
.read_get = uvm_hal_volta_fault_buffer_read_get,
.write_get = uvm_hal_volta_fault_buffer_write_get,
.get_ve_id = uvm_hal_volta_fault_buffer_get_ve_id,
.parse_entry = uvm_hal_volta_fault_buffer_parse_entry,
.parse_non_replayable_entry = uvm_hal_volta_fault_buffer_parse_non_replayable_entry,
}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
.u.fault_buffer_ops = {
.disable_replayable_faults = uvm_hal_turing_disable_replayable_faults,
.clear_replayable_faults = uvm_hal_turing_clear_replayable_faults,
}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
.u.fault_buffer_ops = {}
},
};
static uvm_hal_class_ops_t access_counter_buffer_table[] =
{
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000,
.u.access_counter_buffer_ops = {
.enable_access_counter_notifications = uvm_hal_maxwell_enable_access_counter_notifications_unsupported,
.disable_access_counter_notifications = uvm_hal_maxwell_disable_access_counter_notifications_unsupported,
.clear_access_counter_notifications = uvm_hal_maxwell_clear_access_counter_notifications_unsupported,
.parse_entry = uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported,
.entry_is_valid = uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported,
.entry_clear_valid = uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported,
.entry_size = uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported,
}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM000,
.u.access_counter_buffer_ops = {}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GM200,
.u.access_counter_buffer_ops = {}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
.u.access_counter_buffer_ops = {
.enable_access_counter_notifications = uvm_hal_volta_enable_access_counter_notifications,
.disable_access_counter_notifications = uvm_hal_volta_disable_access_counter_notifications,
.clear_access_counter_notifications = uvm_hal_volta_clear_access_counter_notifications,
.parse_entry = uvm_hal_volta_access_counter_buffer_parse_entry,
.entry_is_valid = uvm_hal_volta_access_counter_buffer_entry_is_valid,
.entry_clear_valid = uvm_hal_volta_access_counter_buffer_entry_clear_valid,
.entry_size = uvm_hal_volta_access_counter_buffer_entry_size,
}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
.u.access_counter_buffer_ops = {
.disable_access_counter_notifications = uvm_hal_turing_disable_access_counter_notifications,
.clear_access_counter_notifications = uvm_hal_turing_clear_access_counter_notifications,
}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
.u.access_counter_buffer_ops = {}
},
};
static inline uvm_hal_class_ops_t *ops_find_by_id(uvm_hal_class_ops_t *table, NvU32 row_count, NvU32 id)
{
NvLength i;
// go through array and match on class.
for (i = 0; i < row_count; i++) {
if (table[i].id == id)
return table + i;
}
return NULL;
}
// use memcmp to check for function pointer assignment in a well defined,
// general way.
static inline bool op_is_null(uvm_hal_class_ops_t *row, NvLength op_idx, NvLength op_offset)
{
void *temp = NULL;
return memcmp(&temp, (char *)row + op_offset + sizeof(void *) * op_idx, sizeof(void *)) == 0;
}
// use memcpy to copy function pointers in a well defined, general way.
static inline void op_copy(uvm_hal_class_ops_t *dst, uvm_hal_class_ops_t *src, NvLength op_idx, NvLength op_offset)
{
void *m_dst = (char *)dst + op_offset + sizeof(void *) * op_idx;
void *m_src = (char *)src + op_offset + sizeof(void *) * op_idx;
memcpy(m_dst, m_src, sizeof(void *));
}
static inline NV_STATUS ops_init_from_parent(uvm_hal_class_ops_t *table,
NvU32 row_count,
NvLength op_count,
NvLength op_offset)
{
NvLength i;
for (i = 0; i < row_count; i++) {
NvLength j;
uvm_hal_class_ops_t *parent = NULL;
if (table[i].parent_id != 0) {
parent = ops_find_by_id(table, i, table[i].parent_id);
if (parent == NULL)
return NV_ERR_INVALID_CLASS;
// Go through all the ops and assign from parent's corresponding op
// if NULL
for (j = 0; j < op_count; j++) {
if (op_is_null(table + i, j, op_offset))
op_copy(table + i, parent, j, op_offset);
}
}
// At this point, it is an error to have missing HAL operations
for (j = 0; j < op_count; j++) {
if (op_is_null(table + i, j, op_offset))
return NV_ERR_INVALID_STATE;
}
}
return NV_OK;
}
NV_STATUS uvm_hal_init_table(void)
{
NV_STATUS status;
status = ops_init_from_parent(ce_table, ARRAY_SIZE(ce_table), CE_OP_COUNT, offsetof(uvm_hal_class_ops_t, u.ce_ops));
if (status != NV_OK) {
UVM_ERR_PRINT("ops_init_from_parent(ce_table) failed: %s\n", nvstatusToString(status));
return status;
}
status = ops_init_from_parent(host_table, ARRAY_SIZE(host_table), HOST_OP_COUNT, offsetof(uvm_hal_class_ops_t, u.host_ops));
if (status != NV_OK) {
UVM_ERR_PRINT("ops_init_from_parent(host_table) failed: %s\n", nvstatusToString(status));
return status;
}
status = ops_init_from_parent(arch_table, ARRAY_SIZE(arch_table), ARCH_OP_COUNT, offsetof(uvm_hal_class_ops_t, u.arch_ops));
if (status != NV_OK) {
UVM_ERR_PRINT("ops_init_from_parent(arch_table) failed: %s\n", nvstatusToString(status));
return status;
}
status = ops_init_from_parent(fault_buffer_table,
ARRAY_SIZE(fault_buffer_table),
FAULT_BUFFER_OP_COUNT,
offsetof(uvm_hal_class_ops_t, u.fault_buffer_ops));
if (status != NV_OK) {
UVM_ERR_PRINT("ops_init_from_parent(fault_buffer_table) failed: %s\n", nvstatusToString(status));
return status;
}
status = ops_init_from_parent(access_counter_buffer_table,
ARRAY_SIZE(access_counter_buffer_table),
ACCESS_COUNTER_BUFFER_OP_COUNT,
offsetof(uvm_hal_class_ops_t, u.access_counter_buffer_ops));
if (status != NV_OK) {
UVM_ERR_PRINT("ops_init_from_parent(access_counter_buffer_table) failed: %s\n", nvstatusToString(status));
return status;
}
return NV_OK;
}
NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
{
const UvmGpuInfo *gpu_info = &parent_gpu->rm_info;
uvm_hal_class_ops_t *class_ops;
class_ops = ops_find_by_id(ce_table, ARRAY_SIZE(ce_table), gpu_info->ceClass);
if (class_ops == NULL) {
UVM_ERR_PRINT("Unsupported ce class: 0x%X, GPU %s\n", gpu_info->ceClass, parent_gpu->name);
return NV_ERR_INVALID_CLASS;
}
parent_gpu->ce_hal = &class_ops->u.ce_ops;
class_ops = ops_find_by_id(host_table, ARRAY_SIZE(host_table), gpu_info->hostClass);
if (class_ops == NULL) {
UVM_ERR_PRINT("Unsupported host class: 0x%X, GPU %s\n", gpu_info->hostClass, parent_gpu->name);
return NV_ERR_INVALID_CLASS;
}
parent_gpu->host_hal = &class_ops->u.host_ops;
class_ops = ops_find_by_id(arch_table, ARRAY_SIZE(arch_table), gpu_info->gpuArch);
if (class_ops == NULL) {
UVM_ERR_PRINT("Unsupported GPU architecture: 0x%X, GPU %s\n", gpu_info->gpuArch, parent_gpu->name);
return NV_ERR_INVALID_CLASS;
}
parent_gpu->arch_hal = &class_ops->u.arch_ops;
class_ops = ops_find_by_id(fault_buffer_table, ARRAY_SIZE(fault_buffer_table), gpu_info->gpuArch);
if (class_ops == NULL) {
UVM_ERR_PRINT("Fault buffer HAL not found, GPU %s, arch: 0x%X\n", parent_gpu->name, gpu_info->gpuArch);
return NV_ERR_INVALID_CLASS;
}
parent_gpu->fault_buffer_hal = &class_ops->u.fault_buffer_ops;
class_ops = ops_find_by_id(access_counter_buffer_table,
ARRAY_SIZE(access_counter_buffer_table),
gpu_info->gpuArch);
if (class_ops == NULL) {
UVM_ERR_PRINT("Access counter HAL not found, GPU %s, arch: 0x%X\n", parent_gpu->name, gpu_info->gpuArch);
return NV_ERR_INVALID_CLASS;
}
parent_gpu->access_counter_buffer_hal = &class_ops->u.access_counter_buffer_ops;
return NV_OK;
}
void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
{
parent_gpu->arch_hal->init_properties(parent_gpu);
// Override the HAL when in non-passthrough virtualization
// TODO: Bug 200692962: [UVM] Add support for access counters in UVM on SR-IOV configurations
if (parent_gpu->virt_mode != UVM_VIRT_MODE_NONE)
parent_gpu->access_counters_supported = false;
}
void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar)
{
uvm_gpu_t *gpu;
NvU32 i;
if (membar == UVM_MEMBAR_NONE)
return;
gpu = uvm_push_get_gpu(push);
for (i = 0; i < gpu->parent->num_hshub_tlb_invalidate_membars; i++)
gpu->parent->host_hal->membar_gpu(push);
uvm_hal_membar(gpu, push, membar);
}
const char *uvm_aperture_string(uvm_aperture_t aperture)
{
BUILD_BUG_ON(UVM_APERTURE_MAX != 12);
switch (aperture) {
UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_0);
UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_1);
UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_2);
UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_3);
UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_4);
UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_5);
UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_6);
UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_7);
UVM_ENUM_STRING_CASE(UVM_APERTURE_PEER_MAX);
UVM_ENUM_STRING_CASE(UVM_APERTURE_SYS);
UVM_ENUM_STRING_CASE(UVM_APERTURE_VID);
UVM_ENUM_STRING_CASE(UVM_APERTURE_DEFAULT);
UVM_ENUM_STRING_DEFAULT();
}
}
const char *uvm_prot_string(uvm_prot_t prot)
{
BUILD_BUG_ON(UVM_PROT_MAX != 4);
switch (prot) {
UVM_ENUM_STRING_CASE(UVM_PROT_NONE);
UVM_ENUM_STRING_CASE(UVM_PROT_READ_ONLY);
UVM_ENUM_STRING_CASE(UVM_PROT_READ_WRITE);
UVM_ENUM_STRING_CASE(UVM_PROT_READ_WRITE_ATOMIC);
UVM_ENUM_STRING_DEFAULT();
}
}
const char *uvm_membar_string(uvm_membar_t membar)
{
switch (membar) {
UVM_ENUM_STRING_CASE(UVM_MEMBAR_SYS);
UVM_ENUM_STRING_CASE(UVM_MEMBAR_GPU);
UVM_ENUM_STRING_CASE(UVM_MEMBAR_NONE);
}
return "UNKNOWN";
}
const char *uvm_fault_access_type_string(uvm_fault_access_type_t fault_access_type)
{
BUILD_BUG_ON(UVM_FAULT_ACCESS_TYPE_COUNT != 5);
switch (fault_access_type) {
UVM_ENUM_STRING_CASE(UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG);
UVM_ENUM_STRING_CASE(UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK);
UVM_ENUM_STRING_CASE(UVM_FAULT_ACCESS_TYPE_WRITE);
UVM_ENUM_STRING_CASE(UVM_FAULT_ACCESS_TYPE_READ);
UVM_ENUM_STRING_CASE(UVM_FAULT_ACCESS_TYPE_PREFETCH);
UVM_ENUM_STRING_DEFAULT();
}
}
const char *uvm_fault_type_string(uvm_fault_type_t fault_type)
{
BUILD_BUG_ON(UVM_FAULT_TYPE_COUNT != 16);
switch (fault_type) {
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_INVALID_PDE);
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_INVALID_PTE);
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_ATOMIC);
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_WRITE);
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_READ);
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_PDE_SIZE);
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_VA_LIMIT_VIOLATION);
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_UNBOUND_INST_BLOCK);
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_PRIV_VIOLATION);
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_PITCH_MASK_VIOLATION);
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_WORK_CREATION);
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_UNSUPPORTED_APERTURE);
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_COMPRESSION_FAILURE);
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_UNSUPPORTED_KIND);
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_REGION_VIOLATION);
UVM_ENUM_STRING_CASE(UVM_FAULT_TYPE_POISONED);
UVM_ENUM_STRING_DEFAULT();
}
}
const char *uvm_fault_client_type_string(uvm_fault_client_type_t fault_client_type)
{
BUILD_BUG_ON(UVM_FAULT_CLIENT_TYPE_COUNT != 2);
switch (fault_client_type) {
UVM_ENUM_STRING_CASE(UVM_FAULT_CLIENT_TYPE_GPC);
UVM_ENUM_STRING_CASE(UVM_FAULT_CLIENT_TYPE_HUB);
UVM_ENUM_STRING_DEFAULT();
}
}
const char *uvm_mmu_engine_type_string(uvm_mmu_engine_type_t mmu_engine_type)
{
BUILD_BUG_ON(UVM_MMU_ENGINE_TYPE_COUNT != 3);
switch (mmu_engine_type) {
UVM_ENUM_STRING_CASE(UVM_MMU_ENGINE_TYPE_GRAPHICS);
UVM_ENUM_STRING_CASE(UVM_MMU_ENGINE_TYPE_HOST);
UVM_ENUM_STRING_CASE(UVM_MMU_ENGINE_TYPE_CE);
UVM_ENUM_STRING_DEFAULT();
}
}
void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry)
{
UVM_DBG_PRINT("fault_address: 0x%llx\n", entry->fault_address);
UVM_DBG_PRINT(" fault_instance_ptr: {0x%llx:%s}\n", entry->instance_ptr.address,
uvm_aperture_string(entry->instance_ptr.aperture));
UVM_DBG_PRINT(" fault_type: %s\n", uvm_fault_type_string(entry->fault_type));
UVM_DBG_PRINT(" fault_access_type: %s\n", uvm_fault_access_type_string(entry->fault_access_type));
UVM_DBG_PRINT(" is_replayable: %s\n", entry->is_replayable? "true": "false");
UVM_DBG_PRINT(" is_virtual: %s\n", entry->is_virtual? "true": "false");
UVM_DBG_PRINT(" in_protected_mode: %s\n", entry->in_protected_mode? "true": "false");
UVM_DBG_PRINT(" fault_source.client_type: %s\n", uvm_fault_client_type_string(entry->fault_source.client_type));
UVM_DBG_PRINT(" fault_source.client_id: %d\n", entry->fault_source.client_id);
UVM_DBG_PRINT(" fault_source.gpc_id: %d\n", entry->fault_source.gpc_id);
UVM_DBG_PRINT(" fault_source.mmu_engine_id: %d\n", entry->fault_source.mmu_engine_id);
UVM_DBG_PRINT(" fault_source.mmu_engine_type: %s\n",
uvm_mmu_engine_type_string(entry->fault_source.mmu_engine_type));
UVM_DBG_PRINT(" timestamp: %llu\n", entry->timestamp);
}
const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type)
{
BUILD_BUG_ON(UVM_ACCESS_COUNTER_TYPE_MAX != 2);
switch (access_counter_type) {
UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MIMC);
UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MOMC);
UVM_ENUM_STRING_DEFAULT();
}
}
void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry)
{
if (!entry->address.is_virtual) {
UVM_DBG_PRINT("physical address: {0x%llx:%s}\n", entry->address.address,
uvm_aperture_string(entry->address.aperture));
}
else {
UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address.address);
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n", entry->virtual_info.instance_ptr.address,
uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
UVM_DBG_PRINT(" mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->virtual_info.mmu_engine_type));
UVM_DBG_PRINT(" mmu_engine_id %u\n", entry->virtual_info.mmu_engine_id);
UVM_DBG_PRINT(" ve_id %u\n", entry->virtual_info.ve_id);
}
UVM_DBG_PRINT(" is_virtual %u\n", entry->address.is_virtual);
UVM_DBG_PRINT(" counter_type %s\n", uvm_access_counter_type_string(entry->counter_type));
UVM_DBG_PRINT(" counter_value %u\n", entry->counter_value);
UVM_DBG_PRINT(" subgranularity 0x%08x\n", entry->sub_granularity);
UVM_DBG_PRINT(" bank %u\n", entry->bank);
UVM_DBG_PRINT(" tag %x\n", entry->tag);
}
bool uvm_hal_method_validate_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
{
return true;
}
bool uvm_hal_ce_memcopy_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
{
return true;
}
void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src)
{
}
bool uvm_hal_ce_memset_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
{
return true;
}

View File

@@ -0,0 +1,818 @@
/*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_HAL_H__
#define __UVM_HAL_H__
#include "uvm_types.h"
#include "uvm_common.h"
#include "uvm_forward_decl.h"
#include "uvm_hal_types.h"
#include "uvm_push.h"
#include "uvm_gpu.h"
#include "uvm_test_ioctl.h"
// A dummy method validation that always returns true; it can be used to skip
// CE/Host/SW method validations for a given architecture
bool uvm_hal_method_validate_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
typedef void (*uvm_hal_init_t)(uvm_push_t *push);
void uvm_hal_maxwell_ce_init(uvm_push_t *push);
void uvm_hal_maxwell_host_init_noop(uvm_push_t *push);
void uvm_hal_pascal_host_init(uvm_push_t *push);
// Host method validation
typedef bool (*uvm_hal_host_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
// SW method validation
typedef bool (*uvm_hal_host_sw_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
bool uvm_hal_ampere_host_sw_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
// Wait for idle
typedef void (*uvm_hal_wait_for_idle_t)(uvm_push_t *push);
void uvm_hal_maxwell_host_wait_for_idle(uvm_push_t *push);
// Membar SYS
typedef void (*uvm_hal_membar_sys_t)(uvm_push_t *push);
void uvm_hal_maxwell_host_membar_sys(uvm_push_t *push);
void uvm_hal_pascal_host_membar_sys(uvm_push_t *push);
// Membar GPU
typedef void (*uvm_hal_membar_gpu_t)(uvm_push_t *push);
void uvm_hal_pascal_host_membar_gpu(uvm_push_t *push);
// Put a noop in the pushbuffer of the given size in bytes.
// The size needs to be a multiple of 4.
typedef void (*uvm_hal_noop_t)(uvm_push_t *push, NvU32 size);
void uvm_hal_maxwell_host_noop(uvm_push_t *push, NvU32 size);
// Host-generated interrupt method. This will generate a call to
// uvm_isr_top_half_entry.
//
// This is a non-stalling interrupt, which means that it's fire-and-forget. Host
// will not stall method processing nor stop channel switching, which means that
// we cannot directly identify in software which channel generated the
// interrupt.
//
// We must set up software state before pushing the interrupt, and check any
// possible interrupt condition on receiving an interrupt callback.
typedef void (*uvm_hal_interrupt_t)(uvm_push_t *push);
void uvm_hal_maxwell_host_interrupt(uvm_push_t *push);
// Issue a TLB invalidate applying to all VAs in a PDB.
//
// The PTE caches (TLBs) are always invalidated. The PDE caches for all VAs in
// the PDB are invalidated from the specified depth down to the PTEs. This
// allows for optimizations if the caller isn't writing all levels of the PDEs.
// Depth follows the MMU code convention where depth 0 is the top level and here
// means to invalidate everything. See uvm_pascal_mmu.c for an example of depth
// mapping to HW PDE levels. Notably 2M PTEs are considered PDEs as far as the
// TLBs are concerned and hence on Pascal the depth needs to be at most 3 for
// them to be included in the invalidation.
//
// If the membar parameter is not UVM_MEMBAR_NONE, the specified membar is
// performed logically after the TLB invalidate such that all physical memory
// accesses using the old translations are ordered to the scope of the membar.
typedef void (*uvm_hal_host_tlb_invalidate_all_t)(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
uvm_membar_t membar);
void uvm_hal_maxwell_host_tlb_invalidate_all_a16f(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
uvm_membar_t membar);
void uvm_hal_maxwell_host_tlb_invalidate_all_b06f(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
uvm_membar_t membar);
void uvm_hal_pascal_host_tlb_invalidate_all(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
uvm_membar_t membar);
void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
uvm_membar_t membar);
// Issue a TLB invalidate applying to the specified VA range in a PDB.
//
// The PTE caches (TLBs) for each page size aligned VA within the VA range
// are always invalidated. The PDE caches covering the specified VA
// range in the PDB are invalidated from the specified depth down to the PTEs.
// Specifying the depth allows for optimizations if the caller isn't writing all
// levels of the PDEs. Specifying the page size allows for optimizations if
// the caller can guarantee caches for smaller page sizes don't need to be
// invalidated.
//
// Depth follows the MMU code convention where depth 0 is the top level and here
// means to invalidate all levels. See uvm_pascal_mmu.c for an example of depth
// mapping to HW PDE levels. Notably 2M PTEs are considered PDEs as far as the
// TLBs are concerned and hence on Pascal the depth needs to be at most 3 for
// them to be included in the invalidation.
//
// If the membar parameter is not UVM_MEMBAR_NONE, the specified membar is
// performed logically after the TLB invalidate such that all physical memory
// accesses using the old translations are ordered to the scope of the membar.
//
// Note that this can end up pushing a lot of methods for big ranges so it's
// better not to use it directly. Instead, uvm_tlb_batch* APIs should be used
// that automatically switch between targeted VA invalidates and invalidate all.
typedef void (*uvm_hal_host_tlb_invalidate_va_t)(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
uvm_membar_t membar);
void uvm_hal_maxwell_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
uvm_membar_t membar);
void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
uvm_membar_t membar);
void uvm_hal_volta_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
uvm_membar_t membar);
void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
uvm_membar_t membar);
typedef void (*uvm_hal_host_tlb_invalidate_test_t)(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
void uvm_hal_maxwell_host_tlb_invalidate_test(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
void uvm_hal_pascal_host_tlb_invalidate_test(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
// By default all semaphore release operations include a membar sys before the
// operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with
// uvm_push_set_flag().
typedef void (*uvm_hal_semaphore_release_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_pascal_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
// Release a semaphore including a timestamp at the specific GPU VA.
//
// This operation writes 16 bytes of memory and the VA needs to be 16-byte
// aligned. The value of the released payload is unspecified and shouldn't be
// relied on, only the timestamp should be of interest.
typedef void (*uvm_hal_semaphore_timestamp_t)(uvm_push_t *push, NvU64 gpu_va);
void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
void uvm_hal_pascal_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
void uvm_hal_volta_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
typedef void (*uvm_hal_semaphore_acquire_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_turing_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
typedef void (*uvm_hal_host_set_gpfifo_entry_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
void uvm_hal_turing_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
typedef void (*uvm_hal_host_write_gpu_put_t)(uvm_channel_t *channel, NvU32 gpu_put);
void uvm_hal_maxwell_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put);
void uvm_hal_volta_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put);
// Return the current GPU time in nanoseconds
typedef NvU64 (*uvm_hal_get_time_t)(uvm_gpu_t *gpu);
NvU64 uvm_hal_maxwell_get_time(uvm_gpu_t *gpu);
// Internal helpers used by the CE hal
// Used to handle the offset encoding differences between architectures
typedef void (*uvm_hal_ce_offset_out_t)(uvm_push_t *push, NvU64 offset);
void uvm_hal_maxwell_ce_offset_out(uvm_push_t *push, NvU64 offset);
void uvm_hal_pascal_ce_offset_out(uvm_push_t *push, NvU64 offset);
typedef void (*uvm_hal_ce_offset_in_out_t)(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
void uvm_hal_maxwell_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
void uvm_hal_pascal_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
typedef NvU32 (*uvm_hal_ce_phys_mode_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
NvU32 uvm_hal_maxwell_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
NvU32 uvm_hal_ampere_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
typedef NvU32 (*uvm_hal_ce_plc_mode_t)(void);
NvU32 uvm_hal_maxwell_ce_plc_mode(void);
NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void);
// CE method validation
typedef bool (*uvm_hal_ce_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
bool uvm_hal_ampere_ce_method_validate_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
// Memcopy validation.
// The validation happens at the start of the memcopy (uvm_hal_memcopy_t)
// execution. Use uvm_hal_ce_memcopy_validate_stub to skip the validation for
// a given architecture.
typedef bool (*uvm_hal_ce_memcopy_validate)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
bool uvm_hal_ce_memcopy_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
// Patching of the memcopy source; if not needed for a given architecture use
// the (empty) uvm_hal_ce_memcopy_patch_src_stub implementation
typedef void (*uvm_hal_ce_memcopy_patch_src)(uvm_push_t *push, uvm_gpu_address_t *src);
void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src);
void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_t *src);
// Memcopy size bytes from src to dst.
//
// By default all CE transfer operations include a membar sys after the
// operation and are not pipelined. This can be affected by using
// UVM_PUSH_FLAG_NEXT_CE_* flags with uvm_push_set_flag().
typedef void (*uvm_hal_memcopy_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
// Simple wrapper for uvm_hal_memcopy_t with both addresses being virtual
typedef void (*uvm_hal_memcopy_v_to_v_t)(uvm_push_t *push, NvU64 dst, NvU64 src, size_t size);
void uvm_hal_maxwell_ce_memcopy_v_to_v(uvm_push_t *push, NvU64 dst, NvU64 src, size_t size);
// Memset validation.
// The validation happens at the start of the memset (uvm_hal_memset_*_t)
// execution. Use uvm_hal_ce_memset_validate_stub to skip the validation for
// a given architecture.
typedef bool (*uvm_hal_ce_memset_validate)(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
bool uvm_hal_ce_memset_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
bool uvm_hal_ampere_ce_memset_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
// Memset size bytes at dst to a given N-byte input value.
//
// Size has to be a multiple of the element size. For example, the size passed
// to uvm_hal_memset_4_t must be a multiple of 4 bytes.
//
// By default all CE transfer operations include a membar sys after the
// operation and are not pipelined. This can be affected by using
// UVM_PUSH_FLAG_NEXT_CE_* flags with uvm_push_set_flag().
typedef void (*uvm_hal_memset_1_t)(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
typedef void (*uvm_hal_memset_4_t)(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
typedef void (*uvm_hal_memset_8_t)(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
// Simple wrapper for uvm_hal_memset_4_t with the address being virtual.
typedef void (*uvm_hal_memset_v_4_t)(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size);
void uvm_hal_maxwell_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
void uvm_hal_maxwell_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
void uvm_hal_maxwell_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size);
// Increments the semaphore by 1, or resets to 0 if the incremented value would
// exceed the payload.
//
// By default all CE semaphore operations include a membar sys before the
// semaphore operation. This can be affected by using UVM_PUSH_FLAG_NEXT_CE_*
// flags with uvm_push_set_flag().
typedef void (*uvm_hal_semaphore_reduction_inc_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_pascal_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
// Initialize GPU architecture dependent properties
typedef void (*uvm_hal_arch_init_properties_t)(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
// Retrieve the page-tree HAL for a given big page size
typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU32 big_page_size);
typedef void (*uvm_hal_mmu_enable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_mmu_disable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size);
void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_pascal_mmu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
// Convert a faulted MMU engine ID to a UVM engine type. Only engines which have
// faults serviced by UVM are handled. On Pascal the only such engine is
// GRAPHICS, so no translation is provided.
typedef uvm_mmu_engine_type_t (*uvm_hal_mmu_engine_id_to_type_t)(NvU16 mmu_engine_id);
uvm_mmu_engine_type_t uvm_hal_maxwell_mmu_engine_id_to_type_unsupported(NvU16 mmu_engine_id);
uvm_mmu_engine_type_t uvm_hal_volta_mmu_engine_id_to_type(NvU16 mmu_engine_id);
uvm_mmu_engine_type_t uvm_hal_turing_mmu_engine_id_to_type(NvU16 mmu_engine_id);
uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id);
typedef NvU16 (*uvm_hal_mmu_client_id_to_utlb_id_t)(NvU16 client_id);
NvU16 uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported(NvU16 client_id);
NvU16 uvm_hal_pascal_mmu_client_id_to_utlb_id(NvU16 client_id);
NvU16 uvm_hal_volta_mmu_client_id_to_utlb_id(NvU16 client_id);
NvU16 uvm_hal_ampere_mmu_client_id_to_utlb_id(NvU16 client_id);
// Replayable faults
typedef void (*uvm_hal_enable_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_disable_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_clear_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
typedef NvU32 (*uvm_hal_fault_buffer_read_put_t)(uvm_parent_gpu_t *parent_gpu);
typedef NvU32 (*uvm_hal_fault_buffer_read_get_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_fault_buffer_write_get_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
typedef NvU8 (*uvm_hal_fault_buffer_get_ve_id_t)(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
// Parse the entry on the given buffer index. This also clears the valid bit of
// the entry in the buffer.
typedef void (*uvm_hal_fault_buffer_parse_entry_t)(uvm_parent_gpu_t *gpu,
NvU32 index,
uvm_fault_buffer_entry_t *buffer_entry);
typedef bool (*uvm_hal_fault_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
typedef void (*uvm_hal_fault_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
typedef NvU32 (*uvm_hal_fault_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_fault_buffer_replay_t)(uvm_push_t *push, uvm_fault_replay_type_t type);
typedef void (*uvm_hal_fault_cancel_global_t)(uvm_push_t *push, uvm_gpu_phys_address_t instance_ptr);
typedef void (*uvm_hal_fault_cancel_targeted_t)(uvm_push_t *push,
uvm_gpu_phys_address_t instance_ptr,
NvU32 gpc_id,
NvU32 client_id);
void uvm_hal_maxwell_enable_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_disable_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_clear_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get);
NvU32 uvm_hal_maxwell_fault_buffer_read_put_unsupported(uvm_parent_gpu_t *parent_gpu);
NvU32 uvm_hal_maxwell_fault_buffer_read_get_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_fault_buffer_write_get_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
NvU8 uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
void uvm_hal_maxwell_fault_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
NvU32 index,
uvm_fault_buffer_entry_t *buffer_entry);
void uvm_hal_pascal_enable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_pascal_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_pascal_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
NvU32 uvm_hal_pascal_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu);
NvU32 uvm_hal_pascal_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_pascal_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_hal_pascal_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
NvU32 index,
uvm_fault_buffer_entry_t *buffer_entry);
NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu);
NvU32 uvm_hal_volta_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index);
NvU8 uvm_hal_volta_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
void uvm_hal_volta_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
NvU32 index,
uvm_fault_buffer_entry_t *buffer_entry);
void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_turing_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
NvU32 uvm_hal_maxwell_fault_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu);
bool uvm_hal_pascal_fault_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_hal_pascal_fault_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
NvU32 uvm_hal_pascal_fault_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_fault_buffer_parse_non_replayable_entry_t)(uvm_parent_gpu_t *parent_gpu,
void *fault_packet,
uvm_fault_buffer_entry_t *buffer_entry);
void uvm_hal_maxwell_fault_buffer_parse_non_replayable_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
void *fault_packet,
uvm_fault_buffer_entry_t *buffer_entry);
void uvm_hal_volta_fault_buffer_parse_non_replayable_entry(uvm_parent_gpu_t *parent_gpu,
void *fault_packet,
uvm_fault_buffer_entry_t *buffer_entry);
void uvm_hal_maxwell_cancel_faults_global_unsupported(uvm_push_t *push, uvm_gpu_phys_address_t instance_ptr);
void uvm_hal_pascal_cancel_faults_global(uvm_push_t *push, uvm_gpu_phys_address_t instance_ptr);
// Trigger fault replay on the GPU where the given pushbuffer is located.
void uvm_hal_maxwell_replay_faults_unsupported(uvm_push_t *push, uvm_fault_replay_type_t type);
void uvm_hal_maxwell_cancel_faults_targeted_unsupported(uvm_push_t *push,
uvm_gpu_phys_address_t instance_ptr,
NvU32 gpc_id,
NvU32 client_id);
void uvm_hal_pascal_replay_faults(uvm_push_t *push, uvm_fault_replay_type_t type);
void uvm_hal_pascal_cancel_faults_targeted(uvm_push_t *push,
uvm_gpu_phys_address_t instance_ptr,
NvU32 gpc_id,
NvU32 client_id);
typedef void (*uvm_hal_fault_cancel_va_t)(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
const uvm_fault_buffer_entry_t *fault_entry,
uvm_fault_cancel_va_mode_t cancel_va_mode);
void uvm_hal_maxwell_cancel_faults_va_unsupported(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
const uvm_fault_buffer_entry_t *fault_entry,
uvm_fault_cancel_va_mode_t cancel_va_mode);
void uvm_hal_volta_replay_faults(uvm_push_t *push, uvm_fault_replay_type_t type);
void uvm_hal_volta_cancel_faults_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
const uvm_fault_buffer_entry_t *fault_entry,
uvm_fault_cancel_va_mode_t cancel_va_mode);
typedef void (*uvm_hal_host_clear_faulted_channel_method_t)(uvm_push_t *push,
uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *buffer_entry);
void uvm_hal_maxwell_host_clear_faulted_channel_method_unsupported(uvm_push_t *push,
uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *buffer_entry);
void uvm_hal_volta_host_clear_faulted_channel_method(uvm_push_t *push,
uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *buffer_entry);
void uvm_hal_turing_host_clear_faulted_channel_method(uvm_push_t *push,
uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *buffer_entry);
typedef void (*uvm_hal_host_clear_faulted_channel_register_t)(uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *buffer_entry);
void uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported(uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *buffer_entry);
void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *buffer_entry);
typedef void (*uvm_hal_host_clear_faulted_channel_sw_method_t)(uvm_push_t *push,
uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *buffer_entry);
void uvm_hal_maxwell_host_clear_faulted_channel_sw_method_unsupported(uvm_push_t *push,
uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *buffer_entry);
void uvm_hal_ampere_host_clear_faulted_channel_sw_method(uvm_push_t *push,
uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *buffer_entry);
void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry);
void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry);
// Access counters
typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
// Parse the entry on the given buffer index. This also clears the valid bit of
// the entry in the buffer.
typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_parent_gpu_t *parent_gpu,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry);
typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
typedef NvU32 (*uvm_hal_access_counter_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_access_counter_clear_all_t)(uvm_push_t *push);
typedef void (*uvm_hal_access_counter_clear_type_t)(uvm_push_t *push, uvm_access_counter_type_t type);
typedef void (*uvm_hal_access_counter_clear_targeted_t)(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get);
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry);
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push);
void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type);
void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
void uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
void uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry);
bool uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
NvU32 uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_volta_access_counter_clear_all(uvm_push_t *push);
void uvm_hal_volta_access_counter_clear_type(uvm_push_t *push, uvm_access_counter_type_t type);
void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
struct uvm_host_hal_struct
{
uvm_hal_init_t init;
uvm_hal_host_method_validate method_validate;
uvm_hal_host_sw_method_validate sw_method_validate;
uvm_hal_wait_for_idle_t wait_for_idle;
uvm_hal_membar_sys_t membar_sys;
uvm_hal_membar_gpu_t membar_gpu;
uvm_hal_noop_t noop;
uvm_hal_interrupt_t interrupt;
uvm_hal_semaphore_release_t semaphore_release;
uvm_hal_semaphore_acquire_t semaphore_acquire;
uvm_hal_semaphore_timestamp_t semaphore_timestamp;
uvm_hal_host_set_gpfifo_entry_t set_gpfifo_entry;
uvm_hal_host_write_gpu_put_t write_gpu_put;
uvm_hal_host_tlb_invalidate_all_t tlb_invalidate_all;
uvm_hal_host_tlb_invalidate_va_t tlb_invalidate_va;
uvm_hal_host_tlb_invalidate_test_t tlb_invalidate_test;
uvm_hal_fault_buffer_replay_t replay_faults;
uvm_hal_fault_cancel_global_t cancel_faults_global;
uvm_hal_fault_cancel_targeted_t cancel_faults_targeted;
uvm_hal_fault_cancel_va_t cancel_faults_va;
uvm_hal_host_clear_faulted_channel_method_t clear_faulted_channel_sw_method;
uvm_hal_host_clear_faulted_channel_method_t clear_faulted_channel_method;
uvm_hal_host_clear_faulted_channel_register_t clear_faulted_channel_register;
uvm_hal_access_counter_clear_all_t access_counter_clear_all;
uvm_hal_access_counter_clear_type_t access_counter_clear_type;
uvm_hal_access_counter_clear_targeted_t access_counter_clear_targeted;
uvm_hal_get_time_t get_time;
};
struct uvm_ce_hal_struct
{
uvm_hal_init_t init;
uvm_hal_ce_method_validate method_validate;
uvm_hal_semaphore_release_t semaphore_release;
uvm_hal_semaphore_timestamp_t semaphore_timestamp;
uvm_hal_ce_offset_out_t offset_out;
uvm_hal_ce_offset_in_out_t offset_in_out;
uvm_hal_ce_phys_mode_t phys_mode;
uvm_hal_ce_plc_mode_t plc_mode;
uvm_hal_ce_memcopy_validate memcopy_validate;
uvm_hal_ce_memcopy_patch_src memcopy_patch_src;
uvm_hal_memcopy_t memcopy;
uvm_hal_memcopy_v_to_v_t memcopy_v_to_v;
uvm_hal_ce_memset_validate memset_validate;
uvm_hal_memset_1_t memset_1;
uvm_hal_memset_4_t memset_4;
uvm_hal_memset_8_t memset_8;
uvm_hal_memset_v_4_t memset_v_4;
uvm_hal_semaphore_reduction_inc_t semaphore_reduction_inc;
};
struct uvm_arch_hal_struct
{
uvm_hal_arch_init_properties_t init_properties;
uvm_hal_lookup_mode_hal_t mmu_mode_hal;
uvm_hal_mmu_enable_prefetch_faults_t enable_prefetch_faults;
uvm_hal_mmu_disable_prefetch_faults_t disable_prefetch_faults;
uvm_hal_mmu_engine_id_to_type_t mmu_engine_id_to_type;
uvm_hal_mmu_client_id_to_utlb_id_t mmu_client_id_to_utlb_id;
};
struct uvm_fault_buffer_hal_struct
{
uvm_hal_enable_replayable_faults_t enable_replayable_faults;
uvm_hal_disable_replayable_faults_t disable_replayable_faults;
uvm_hal_clear_replayable_faults_t clear_replayable_faults;
uvm_hal_fault_buffer_read_put_t read_put;
uvm_hal_fault_buffer_read_get_t read_get;
uvm_hal_fault_buffer_write_get_t write_get;
uvm_hal_fault_buffer_get_ve_id_t get_ve_id;
uvm_hal_fault_buffer_parse_entry_t parse_entry;
uvm_hal_fault_buffer_entry_is_valid_t entry_is_valid;
uvm_hal_fault_buffer_entry_clear_valid_t entry_clear_valid;
uvm_hal_fault_buffer_entry_size_t entry_size;
uvm_hal_fault_buffer_parse_non_replayable_entry_t parse_non_replayable_entry;
};
struct uvm_access_counter_buffer_hal_struct
{
uvm_hal_enable_access_counter_notifications_t enable_access_counter_notifications;
uvm_hal_disable_access_counter_notifications_t disable_access_counter_notifications;
uvm_hal_clear_access_counter_notifications_t clear_access_counter_notifications;
uvm_hal_access_counter_buffer_parse_entry_t parse_entry;
uvm_hal_access_counter_buffer_entry_is_valid_t entry_is_valid;
uvm_hal_access_counter_buffer_entry_clear_valid_t entry_clear_valid;
uvm_hal_access_counter_buffer_entry_size_t entry_size;
};
typedef struct
{
// id is either a hardware class or GPU architecture
NvU32 id;
NvU32 parent_id;
union
{
// host_ops: id is a hardware class
uvm_host_hal_t host_ops;
// ce_ops: id is a hardware class
uvm_ce_hal_t ce_ops;
// arch_ops: id is an architecture
uvm_arch_hal_t arch_ops;
// fault_buffer_ops: id is an architecture
uvm_fault_buffer_hal_t fault_buffer_ops;
// access_counter_buffer_ops: id is an architecture
uvm_access_counter_buffer_hal_t access_counter_buffer_ops;
} u;
} uvm_hal_class_ops_t;
NV_STATUS uvm_hal_init_table(void);
NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu);
// Helper to push a SYS or GPU membar based on the membar type
//
// Notably this doesn't just get the GPU from the push object to support the
// test mode of the page tree code that doesn't do real pushes.
static void uvm_hal_membar(uvm_gpu_t *gpu, uvm_push_t *push, uvm_membar_t membar)
{
switch (membar) {
case UVM_MEMBAR_SYS:
gpu->parent->host_hal->membar_sys(push);
break;
case UVM_MEMBAR_GPU:
gpu->parent->host_hal->membar_gpu(push);
break;
case UVM_MEMBAR_NONE:
break;
}
}
static void uvm_hal_wfi_membar(uvm_push_t *push, uvm_membar_t membar)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
gpu->parent->host_hal->wait_for_idle(push);
uvm_hal_membar(gpu, push, membar);
}
// Internal helper used by the TLB invalidate hal functions. This issues the
// appropriate Host membar(s) after a TLB invalidate.
void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar);
#endif // __UVM_HAL_H__

View File

@@ -0,0 +1,533 @@
/*******************************************************************************
Copyright (c) 2016-2019 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_HAL_TYPES_H__
#define __UVM_HAL_TYPES_H__
#include "uvm_common.h"
#include "uvm_forward_decl.h"
#include "uvm_processors.h"
#define UVM_GPU_MMU_MAX_FAULT_PACKET_SIZE 32
typedef enum
{
UVM_APERTURE_PEER_0,
UVM_APERTURE_PEER_1,
UVM_APERTURE_PEER_2,
UVM_APERTURE_PEER_3,
UVM_APERTURE_PEER_4,
UVM_APERTURE_PEER_5,
UVM_APERTURE_PEER_6,
UVM_APERTURE_PEER_7,
UVM_APERTURE_PEER_MAX,
UVM_APERTURE_SYS,
UVM_APERTURE_VID,
// DEFAULT is a special value to let MMU pick the location of page tables
UVM_APERTURE_DEFAULT,
UVM_APERTURE_MAX
} uvm_aperture_t;
const char *uvm_aperture_string(uvm_aperture_t aperture);
static bool uvm_aperture_is_peer(uvm_aperture_t aperture)
{
return (aperture >= UVM_APERTURE_PEER_0) && (aperture < UVM_APERTURE_PEER_MAX);
}
static inline NvU32 UVM_APERTURE_PEER_ID(uvm_aperture_t aperture)
{
UVM_ASSERT(uvm_aperture_is_peer(aperture));
return (NvU32)aperture;
}
static inline uvm_aperture_t UVM_APERTURE_PEER(NvU32 id)
{
uvm_aperture_t aperture = (uvm_aperture_t)id;
UVM_ASSERT(UVM_APERTURE_PEER_ID(aperture) == id);
return aperture;
}
// A physical GPU address
typedef struct
{
NvU64 address;
uvm_aperture_t aperture;
} uvm_gpu_phys_address_t;
// Create a physical GPU address
static uvm_gpu_phys_address_t uvm_gpu_phys_address(uvm_aperture_t aperture, NvU64 address)
{
return (uvm_gpu_phys_address_t){ address, aperture };
}
// Compare two gpu physical addresses
static int uvm_gpu_phys_addr_cmp(uvm_gpu_phys_address_t a, uvm_gpu_phys_address_t b)
{
int result = UVM_CMP_DEFAULT(a.aperture, b.aperture);
if (result != 0)
return result;
return UVM_CMP_DEFAULT(a.address, b.address);
}
// A physical or virtual address directly accessible by a GPU.
// This implies that the address already went through identity mapping and IOMMU
// translations and is only valid for a specific GPU.
typedef struct
{
// Physical or virtual address
// In general, only valid for a specific GPU
NvU64 address;
// Aperture for a physical address
uvm_aperture_t aperture;
// Whether the address is virtual
bool is_virtual;
} uvm_gpu_address_t;
// Create a virtual GPU address
static uvm_gpu_address_t uvm_gpu_address_virtual(NvU64 va)
{
uvm_gpu_address_t address = {0};
address.address = va;
address.aperture = UVM_APERTURE_MAX;
address.is_virtual = true;
return address;
}
// Create a physical GPU address
static uvm_gpu_address_t uvm_gpu_address_physical(uvm_aperture_t aperture, NvU64 pa)
{
uvm_gpu_address_t address = {0};
address.aperture = aperture;
address.address = pa;
return address;
}
// Create a GPU address from a physical GPU address
static uvm_gpu_address_t uvm_gpu_address_from_phys(uvm_gpu_phys_address_t phys_address)
{
return uvm_gpu_address_physical(phys_address.aperture, phys_address.address);
}
static const char *uvm_gpu_address_aperture_string(uvm_gpu_address_t addr)
{
if (addr.is_virtual)
return "VIRTUAL";
return uvm_aperture_string(addr.aperture);
}
// Compare two gpu addresses
static int uvm_gpu_addr_cmp(uvm_gpu_address_t a, uvm_gpu_address_t b)
{
int result = UVM_CMP_DEFAULT(a.is_virtual, b.is_virtual);
if (result != 0)
return result;
if (a.is_virtual) {
return UVM_CMP_DEFAULT(a.address, b.address);
}
else {
uvm_gpu_phys_address_t phys_a = { a.address, a.aperture };
uvm_gpu_phys_address_t phys_b = { b.address, b.aperture };
return uvm_gpu_phys_addr_cmp(phys_a, phys_b);
}
}
// For processors with no concept of an atomic fault (the CPU and pre-Pascal
// GPUs), UVM_PROT_READ_WRITE and UVM_PROT_READ_WRITE_ATOMIC are
// interchangeable.
typedef enum
{
UVM_PROT_NONE,
UVM_PROT_READ_ONLY,
UVM_PROT_READ_WRITE,
UVM_PROT_READ_WRITE_ATOMIC,
UVM_PROT_MAX
} uvm_prot_t;
const char *uvm_prot_string(uvm_prot_t prot);
typedef enum
{
UVM_MEMBAR_NONE,
UVM_MEMBAR_GPU,
UVM_MEMBAR_SYS,
} uvm_membar_t;
const char *uvm_membar_string(uvm_membar_t membar);
// Types of memory accesses that can cause a replayable fault on the GPU. They
// are ordered by access "intrusiveness" to simplify fault preprocessing (e.g.
// to implement fault coalescing)
typedef enum
{
UVM_FAULT_ACCESS_TYPE_PREFETCH = 0,
UVM_FAULT_ACCESS_TYPE_READ,
UVM_FAULT_ACCESS_TYPE_WRITE,
UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK,
UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG,
UVM_FAULT_ACCESS_TYPE_COUNT
} uvm_fault_access_type_t;
const char *uvm_fault_access_type_string(uvm_fault_access_type_t fault_access_type);
static NvU32 uvm_fault_access_type_mask_bit(uvm_fault_access_type_t fault_access_type)
{
BUILD_BUG_ON(UVM_FAULT_ACCESS_TYPE_COUNT >= 32);
UVM_ASSERT(fault_access_type >= 0);
UVM_ASSERT(fault_access_type < UVM_FAULT_ACCESS_TYPE_COUNT);
return (NvU32)1 << fault_access_type;
}
static bool uvm_fault_access_type_mask_test(NvU32 mask, uvm_fault_access_type_t fault_access_type)
{
return uvm_fault_access_type_mask_bit(fault_access_type) & mask;
}
static void uvm_fault_access_type_mask_set(NvU32 *mask, uvm_fault_access_type_t fault_access_type)
{
*mask |= uvm_fault_access_type_mask_bit(fault_access_type);
}
static uvm_fault_access_type_t uvm_fault_access_type_mask_highest(NvU32 mask)
{
int pos;
UVM_ASSERT((1 << UVM_FAULT_ACCESS_TYPE_COUNT) > mask);
UVM_ASSERT(mask != 0);
pos = __fls(mask);
UVM_ASSERT(pos < UVM_FAULT_ACCESS_TYPE_COUNT);
return pos;
}
static uvm_fault_access_type_t uvm_fault_access_type_mask_lowest(NvU32 mask)
{
int pos;
UVM_ASSERT((1 << UVM_FAULT_ACCESS_TYPE_COUNT) > mask);
UVM_ASSERT(mask != 0);
pos = __ffs(mask);
UVM_ASSERT(pos < UVM_FAULT_ACCESS_TYPE_COUNT);
return pos;
}
typedef enum
{
// Cancel all accesses on the page
UVM_FAULT_CANCEL_VA_MODE_ALL = 0,
// Cancel write and atomic accesses on the page
UVM_FAULT_CANCEL_VA_MODE_WRITE_AND_ATOMIC,
UVM_FAULT_CANCEL_VA_MODE_COUNT,
} uvm_fault_cancel_va_mode_t;
// Types of faults that can show up in the fault buffer. Non-UVM related faults are grouped in FATAL category
// since we don't care about the specific type
typedef enum
{
UVM_FAULT_TYPE_INVALID_PDE = 0,
UVM_FAULT_TYPE_INVALID_PTE,
UVM_FAULT_TYPE_ATOMIC,
// WRITE to READ-ONLY
UVM_FAULT_TYPE_WRITE,
// READ to WRITE-ONLY (ATS)
UVM_FAULT_TYPE_READ,
// The next values are considered fatal and are not handled by the UVM driver
UVM_FAULT_TYPE_FATAL,
// Values required for tools
UVM_FAULT_TYPE_PDE_SIZE = UVM_FAULT_TYPE_FATAL,
UVM_FAULT_TYPE_VA_LIMIT_VIOLATION,
UVM_FAULT_TYPE_UNBOUND_INST_BLOCK,
UVM_FAULT_TYPE_PRIV_VIOLATION,
UVM_FAULT_TYPE_PITCH_MASK_VIOLATION,
UVM_FAULT_TYPE_WORK_CREATION,
UVM_FAULT_TYPE_UNSUPPORTED_APERTURE,
UVM_FAULT_TYPE_COMPRESSION_FAILURE,
UVM_FAULT_TYPE_UNSUPPORTED_KIND,
UVM_FAULT_TYPE_REGION_VIOLATION,
UVM_FAULT_TYPE_POISONED,
UVM_FAULT_TYPE_COUNT
} uvm_fault_type_t;
const char *uvm_fault_type_string(uvm_fault_type_t fault_type);
// Main MMU client type that triggered the fault
typedef enum
{
UVM_FAULT_CLIENT_TYPE_GPC = 0,
UVM_FAULT_CLIENT_TYPE_HUB,
UVM_FAULT_CLIENT_TYPE_COUNT
} uvm_fault_client_type_t;
const char *uvm_fault_client_type_string(uvm_fault_client_type_t fault_client_type);
typedef enum
{
UVM_MMU_ENGINE_TYPE_GRAPHICS = 0,
UVM_MMU_ENGINE_TYPE_HOST,
UVM_MMU_ENGINE_TYPE_CE,
UVM_MMU_ENGINE_TYPE_COUNT,
} uvm_mmu_engine_type_t;
const char *uvm_mmu_engine_type_string(uvm_mmu_engine_type_t mmu_engine_type);
// HW unit that triggered the fault. We include the fields required for fault cancelling. Including more information
// might be useful for performance heuristics in the future
typedef struct
{
uvm_fault_client_type_t client_type : order_base_2(UVM_FAULT_CLIENT_TYPE_COUNT) + 1;
uvm_mmu_engine_type_t mmu_engine_type : order_base_2(UVM_MMU_ENGINE_TYPE_COUNT) + 1;
NvU16 client_id;
NvU16 mmu_engine_id;
union
{
struct
{
NvU16 utlb_id;
NvU8 gpc_id;
};
// TODO: Bug 3283289: the channel ID, which is only populated for
// non-replayable faults, is never consumed.
NvU16 channel_id;
};
// Identifier of the subcontext that caused the fault. HW uses it as an
// offset in the instance block to obtain the GPU VA space PDB of the
// faulting process.
NvU8 ve_id;
} uvm_fault_source_t;
struct uvm_fault_buffer_entry_struct
{
//
// The next fields are filled by the fault buffer parsing code
//
// Virtual address of the faulting request aligned to CPU page size
NvU64 fault_address;
// GPU timestamp in (nanoseconds) when the fault was inserted in the fault
// buffer
NvU64 timestamp;
uvm_gpu_phys_address_t instance_ptr;
uvm_fault_source_t fault_source;
uvm_fault_type_t fault_type : order_base_2(UVM_FAULT_TYPE_COUNT) + 1;
uvm_fault_access_type_t fault_access_type : order_base_2(UVM_FAULT_ACCESS_TYPE_COUNT) + 1;
//
// The next fields are managed by the fault handling code
//
uvm_va_space_t *va_space;
// This is set to true when some fault could not be serviced and a
// cancel command needs to be issued
bool is_fatal : 1;
// This is set to true for all GPU faults on a page that is thrashing
bool is_throttled : 1;
// This is set to true if the fault has prefetch access type and the
// address or the access privileges are not valid
bool is_invalid_prefetch : 1;
bool is_replayable : 1;
bool is_virtual : 1;
bool in_protected_mode : 1;
bool filtered : 1;
// Reason for the fault to be fatal
UvmEventFatalReason fatal_reason : order_base_2(UvmEventNumFatalReasons) + 1;
// Mode to be used to cancel faults. This must be set according to the
// fatal fault reason and the fault access types of the merged fault
// instances.
union
{
struct
{
uvm_fault_cancel_va_mode_t cancel_va_mode : order_base_2(UVM_FAULT_CANCEL_VA_MODE_COUNT) + 1;
} replayable;
struct
{
NvU32 buffer_index;
} non_replayable;
};
// List of duplicate fault buffer entries that have been merged into this
// one
struct list_head merged_instances_list;
// Access types to this page for all accesses that have been coalesced at
// fetch time. It must include, at least, fault_access_type
NvU32 access_type_mask;
// Number of faults with the same properties that have been coalesced at
// fetch time
NvU16 num_instances;
};
typedef enum
{
// Completes when all fault replays are in-flight
UVM_FAULT_REPLAY_TYPE_START = 0,
// Completes when all faulting accesses have been correctly translated or faulted again
UVM_FAULT_REPLAY_TYPE_START_ACK_ALL,
UVM_FAULT_REPLAY_TYPE_MAX
} uvm_fault_replay_type_t;
static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2)
{
BUILD_BUG_ON(UVM_MEMBAR_NONE >= UVM_MEMBAR_GPU);
BUILD_BUG_ON(UVM_MEMBAR_GPU >= UVM_MEMBAR_SYS);
return max(membar_1, membar_2);
}
typedef enum
{
UVM_ACCESS_COUNTER_TYPE_MIMC = 0,
UVM_ACCESS_COUNTER_TYPE_MOMC,
UVM_ACCESS_COUNTER_TYPE_MAX,
} uvm_access_counter_type_t;
const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type);
struct uvm_access_counter_buffer_entry_struct
{
// Whether this counter refers to outbound accesses to remote GPUs or
// sysmem (MIMC), or it refers to inbound accesses from CPU or a non-peer
// GPU (whose accesses are routed through the CPU, too) to vidmem (MOMC)
uvm_access_counter_type_t counter_type;
// Address of the region for which a notification was sent
uvm_gpu_address_t address;
// These fields are only valid if address.is_virtual is true
union
{
struct
{
// Instance pointer of one of the channels in the TSG that triggered the
// notification
uvm_gpu_phys_address_t instance_ptr;
uvm_mmu_engine_type_t mmu_engine_type;
NvU32 mmu_engine_id;
// Identifier of the subcontext that performed the memory accesses that
// triggered the notification. This value, combined with the instance_ptr,
// is needed to obtain the GPU VA space of the process that triggered the
// notification.
NvU32 ve_id;
// VA space for the address that triggered the notification
uvm_va_space_t *va_space;
} virtual_info;
// These fields are only valid if address.is_virtual is false
struct
{
// Processor id where data is resident
//
// Although this information is not tied to a VA space, we can use
// a regular processor id because P2P is not allowed between
// partitioned GPUs.
uvm_processor_id_t resident_id;
} physical_info;
};
// Number of times the tracked region was accessed since the last time it
// was cleared. Counter values saturate at the maximum value supported by
// the GPU (2^16 - 1 in Volta)
NvU32 counter_value;
// When the granularity of the tracked regions is greater than 64KB, the
// region is split into 32 equal subregions. Each bit in this field
// represents one of those subregions. 1 means that the subregion has been
// accessed
NvU32 sub_granularity;
// Opaque fields provided by HW, required for targeted clear of a counter
NvU32 bank;
NvU32 tag;
};
static uvm_prot_t uvm_fault_access_type_to_prot(uvm_fault_access_type_t access_type)
{
switch (access_type) {
case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
return UVM_PROT_READ_WRITE_ATOMIC;
case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
case UVM_FAULT_ACCESS_TYPE_WRITE:
return UVM_PROT_READ_WRITE;
default:
// Prefetch faults, if not ignored, are handled like read faults and require
// a mapping with, at least, READ_ONLY access permission
return UVM_PROT_READ_ONLY;
}
}
#endif // __UVM_HAL_TYPES_H__

View File

@@ -0,0 +1,790 @@
/*******************************************************************************
Copyright (c) 2016-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_hmm.h"
static bool uvm_disable_hmm = false;
module_param(uvm_disable_hmm, bool, 0444);
MODULE_PARM_DESC(uvm_disable_hmm,
"Force-disable HMM functionality in the UVM driver. "
"Default: false (i.e, HMM is potentially enabled). Ignored if "
"HMM is not supported in the driver, or if ATS settings "
"conflict with HMM.");
#if UVM_IS_CONFIG_HMM()
#include <linux/hmm.h>
#include <linux/userfaultfd_k.h>
#include "uvm_common.h"
#include "uvm_gpu.h"
#include "uvm_va_block_types.h"
#include "uvm_va_space_mm.h"
#include "uvm_va_space.h"
#include "uvm_va_range.h"
#include "uvm_range_tree.h"
#include "uvm_lock.h"
#include "uvm_api.h"
#include "uvm_va_policy.h"
bool uvm_hmm_is_enabled_system_wide(void)
{
return !uvm_disable_hmm && !g_uvm_global.ats.enabled && uvm_va_space_mm_enabled_system();
}
bool uvm_hmm_is_enabled(uvm_va_space_t *va_space)
{
// TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
return uvm_hmm_is_enabled_system_wide() &&
uvm_va_space_mm_enabled(va_space) &&
!(va_space->initialization_flags & UVM_INIT_FLAGS_DISABLE_HMM) &&
!va_space->hmm.disable;
}
static uvm_va_block_t *hmm_va_block_from_node(uvm_range_tree_node_t *node)
{
if (!node)
return NULL;
return container_of(node, uvm_va_block_t, hmm.node);
}
NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
{
struct mm_struct *mm = va_space->va_space_mm.mm;
if (!uvm_hmm_is_enabled(va_space))
return NV_OK;
uvm_assert_mmap_lock_locked_write(mm);
uvm_assert_rwsem_locked_write(&va_space->lock);
// TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
// Disable HMM by default for each va_space until enough functionality is
// implemented that this can be enabled by default.
// Note that it can be enabled for testing under controlled circumstances.
va_space->hmm.disable = true;
return NV_OK;
}
NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space)
{
uvm_hmm_va_space_t *hmm_va_space = &va_space->hmm;
struct mm_struct *mm = va_space->va_space_mm.mm;
int ret;
if (!uvm_hmm_is_enabled_system_wide() || !mm)
return NV_WARN_NOTHING_TO_DO;
uvm_assert_mmap_lock_locked_write(mm);
uvm_assert_rwsem_locked_write(&va_space->lock);
// Temporarily enable HMM for testing.
va_space->hmm.disable = false;
// Initialize MMU interval notifiers for this process.
// This allows mmu_interval_notifier_insert() to be called without holding
// the mmap_lock for write.
// Note: there is no __mmu_notifier_unregister(), this call just allocates
// memory which is attached to the mm_struct and freed when the mm_struct
// is freed.
ret = __mmu_notifier_register(NULL, mm);
if (ret)
return errno_to_nv_status(ret);
uvm_range_tree_init(&hmm_va_space->blocks);
uvm_mutex_init(&hmm_va_space->blocks_lock, UVM_LOCK_ORDER_LEAF);
return NV_OK;
}
void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
{
uvm_hmm_va_space_t *hmm_va_space = &va_space->hmm;
uvm_range_tree_node_t *node, *next;
uvm_va_block_t *va_block;
if (!uvm_hmm_is_enabled(va_space) || uvm_va_space_initialized(va_space) != NV_OK)
return;
uvm_assert_rwsem_locked_write(&va_space->lock);
// The blocks_lock is not needed when the va_space lock is held for write.
uvm_range_tree_for_each_safe(node, next, &hmm_va_space->blocks) {
va_block = hmm_va_block_from_node(node);
uvm_range_tree_remove(&hmm_va_space->blocks, node);
mmu_interval_notifier_remove(&va_block->hmm.notifier);
uvm_va_block_kill(va_block);
}
// TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
va_space->hmm.disable = true;
}
static bool hmm_invalidate(uvm_va_block_t *va_block,
const struct mmu_notifier_range *range,
unsigned long cur_seq)
{
struct mmu_interval_notifier *mni = &va_block->hmm.notifier;
NvU64 start, end;
// The MMU_NOTIFY_RELEASE event isn't really needed since mn_itree_release()
// doesn't remove the interval notifiers from the struct_mm so there will
// be a full range MMU_NOTIFY_UNMAP event after the release from
// unmap_vmas() during exit_mmap().
if (range->event == MMU_NOTIFY_SOFT_DIRTY || range->event == MMU_NOTIFY_RELEASE)
return true;
// Blockable is only set false by
// mmu_notifier_invalidate_range_start_nonblock() which is only called in
// __oom_reap_task_mm().
if (!mmu_notifier_range_blockable(range))
return false;
// Ignore invalidation callbacks for device private pages since the
// invalidation is handled as part of the migration process.
// Note that the va_space pointer won't be NULL if the callback is for
// MMU_NOTIFY_MIGRATE/MMU_NOTIFY_EXCLUSIVE because the va_block lock
// is already held and we have to prevent recursively getting the lock.
if ((range->event == MMU_NOTIFY_MIGRATE || range->event == MMU_NOTIFY_EXCLUSIVE) &&
range->owner == va_block->hmm.va_space)
return true;
uvm_mutex_lock(&va_block->lock);
// Ignore this invalidation callback if the block is dead.
if (uvm_va_block_is_dead(va_block))
goto unlock;
mmu_interval_set_seq(mni, cur_seq);
// Note: unmap_vmas() does MMU_NOTIFY_UNMAP [0, 0xffffffffffffffff]
start = range->start;
end = (range->end == ULONG_MAX) ? range->end : range->end - 1;
if (start < va_block->start)
start = va_block->start;
if (end > va_block->end)
end = va_block->end;
if (range->event == MMU_NOTIFY_UNMAP)
uvm_va_policy_clear(va_block, start, end);
unlock:
uvm_mutex_unlock(&va_block->lock);
return true;
}
static bool uvm_hmm_invalidate_entry(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
unsigned long cur_seq)
{
uvm_va_block_t *va_block = container_of(mni, uvm_va_block_t, hmm.notifier);
UVM_ENTRY_RET(hmm_invalidate(va_block, range, cur_seq));
}
static const struct mmu_interval_notifier_ops uvm_hmm_notifier_ops =
{
.invalidate = uvm_hmm_invalidate_entry,
};
NV_STATUS uvm_hmm_va_block_find(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_block_t **va_block_ptr)
{
uvm_range_tree_node_t *node;
if (!uvm_hmm_is_enabled(va_space))
return NV_ERR_INVALID_ADDRESS;
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
uvm_assert_rwsem_locked(&va_space->lock);
uvm_mutex_lock(&va_space->hmm.blocks_lock);
node = uvm_range_tree_find(&va_space->hmm.blocks, addr);
uvm_mutex_unlock(&va_space->hmm.blocks_lock);
if (!node)
return NV_ERR_OBJECT_NOT_FOUND;
*va_block_ptr = hmm_va_block_from_node(node);
return NV_OK;
}
static bool uvm_hmm_vma_is_valid(struct vm_area_struct *vma,
unsigned long addr,
bool allow_unreadable_vma)
{
// UVM doesn't support userfaultfd. hmm_range_fault() doesn't support
// VM_IO, VM_PFNMAP, or VM_MIXEDMAP VMAs. It also doesn't support
// VMAs without VM_READ but we allow those VMAs to have policy set on
// them.
return vma &&
addr >= vma->vm_start &&
!userfaultfd_armed(vma) &&
!(vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) &&
(allow_unreadable_vma || (vma->vm_flags & VM_READ));
}
static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
NvU64 addr,
bool allow_unreadable_vma,
uvm_va_block_context_t *va_block_context,
uvm_va_block_t **va_block_ptr)
{
struct mm_struct *mm = va_space->va_space_mm.mm;
struct vm_area_struct *vma;
uvm_va_block_t *va_block;
NvU64 start, end;
NV_STATUS status;
int ret;
if (!uvm_hmm_is_enabled(va_space))
return NV_ERR_INVALID_ADDRESS;
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
UVM_ASSERT(mm);
uvm_assert_mmap_lock_locked(mm);
uvm_assert_rwsem_locked(&va_space->lock);
UVM_ASSERT(PAGE_ALIGNED(addr));
// Note that we have to allow PROT_NONE VMAs so that policies can be set.
vma = find_vma(mm, addr);
if (!uvm_hmm_vma_is_valid(vma, addr, allow_unreadable_vma))
return NV_ERR_INVALID_ADDRESS;
// Since we only hold the va_space read lock, there can be multiple
// parallel va_block insertions.
uvm_mutex_lock(&va_space->hmm.blocks_lock);
va_block = hmm_va_block_from_node(uvm_range_tree_find(&va_space->hmm.blocks, addr));
if (va_block)
goto done;
// The va_block is always created to cover the whole aligned
// UVM_VA_BLOCK_SIZE interval unless there are existing UVM va_ranges or
// HMM va_blocks. In that case, the new HMM va_block size is adjusted so it
// doesn't overlap.
start = UVM_VA_BLOCK_ALIGN_DOWN(addr);
end = start + UVM_VA_BLOCK_SIZE - 1;
// Search for existing UVM va_ranges in the start/end interval and create
// a maximum interval that doesn't overlap any existing UVM va_ranges.
// We know that 'addr' is not within a va_range or
// hmm_va_block_find_create() wouldn't be called.
uvm_range_tree_adjust_interval(&va_space->va_range_tree, addr, &start, &end);
// Search for existing HMM va_blocks in the start/end interval and create
// a maximum interval that doesn't overlap any existing HMM va_blocks.
uvm_range_tree_adjust_interval(&va_space->hmm.blocks, addr, &start, &end);
// Create a HMM va_block with a NULL va_range pointer.
status = uvm_va_block_create(NULL, start, end, &va_block);
if (status != NV_OK)
goto err_unlock;
va_block->hmm.node.start = start;
va_block->hmm.node.end = end;
va_block->hmm.va_space = va_space;
uvm_range_tree_init(&va_block->hmm.va_policy_tree);
ret = mmu_interval_notifier_insert(&va_block->hmm.notifier,
mm,
start,
end - start + 1,
&uvm_hmm_notifier_ops);
if (ret) {
status = errno_to_nv_status(ret);
goto err_release;
}
status = uvm_range_tree_add(&va_space->hmm.blocks, &va_block->hmm.node);
if (status != NV_OK) {
UVM_ASSERT(status != NV_ERR_UVM_ADDRESS_IN_USE);
goto err_unreg;
}
done:
uvm_mutex_unlock(&va_space->hmm.blocks_lock);
if (va_block_context)
va_block_context->hmm.vma = vma;
*va_block_ptr = va_block;
return NV_OK;
err_unreg:
mmu_interval_notifier_remove(&va_block->hmm.notifier);
err_release:
uvm_va_block_release(va_block);
err_unlock:
uvm_mutex_unlock(&va_space->hmm.blocks_lock);
return status;
}
NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_block_context_t *va_block_context,
uvm_va_block_t **va_block_ptr)
{
return hmm_va_block_find_create(va_space, addr, false, va_block_context, va_block_ptr);
}
typedef struct {
struct mmu_interval_notifier notifier;
uvm_va_block_t *existing_block;
uvm_va_block_t *new_block;
} hmm_split_invalidate_data_t;
static bool hmm_split_invalidate(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
unsigned long cur_seq)
{
hmm_split_invalidate_data_t *split_data = container_of(mni, hmm_split_invalidate_data_t, notifier);
uvm_va_block_t *existing_block = split_data->existing_block;
uvm_va_block_t *new_block = split_data->new_block;
if (uvm_ranges_overlap(existing_block->start, existing_block->end, range->start, range->end - 1))
hmm_invalidate(existing_block, range, cur_seq);
if (uvm_ranges_overlap(new_block->start, new_block->end, range->start, range->end - 1))
hmm_invalidate(new_block, range, cur_seq);
return true;
}
static bool hmm_split_invalidate_entry(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
unsigned long cur_seq)
{
UVM_ENTRY_RET(hmm_split_invalidate(mni, range, cur_seq));
}
static const struct mmu_interval_notifier_ops hmm_notifier_split_ops =
{
.invalidate = hmm_split_invalidate_entry,
};
// Splits existing va_block into two pieces, with new_va_block always after
// va_block. va_block is updated to have new_end. new_end+1 must be page-
// aligned.
//
// Before: [----------- existing ------------]
// After: [---- existing ----][---- new ----]
// ^new_end
//
// On error, va_block is still accessible and is left in its original
// functional state.
static NV_STATUS hmm_split_block(uvm_va_block_t *va_block,
NvU64 new_end,
uvm_va_block_t **new_block_ptr)
{
uvm_va_space_t *va_space = va_block->hmm.va_space;
struct mm_struct *mm = va_space->va_space_mm.mm;
hmm_split_invalidate_data_t split_data;
uvm_va_block_t *new_va_block;
NV_STATUS status;
int ret;
uvm_assert_rwsem_locked_write(&va_space->lock);
UVM_ASSERT(new_end > va_block->start);
UVM_ASSERT(new_end < va_block->end);
UVM_ASSERT(PAGE_ALIGNED(new_end + 1));
status = uvm_va_block_create(NULL, new_end + 1, va_block->end, &new_va_block);
if (status != NV_OK)
return status;
// Initialize the newly created HMM va_block.
new_va_block->hmm.va_space = va_space;
uvm_range_tree_init(&new_va_block->hmm.va_policy_tree);
// The MMU interval notifier has to be removed in order to resize it.
// That means there would be a window of time where invalidation callbacks
// could be missed. To handle this case, we register a temporary notifier
// to cover the same address range while resizing the old notifier (it is
// OK to have multiple notifiers for the same range, we may simply try to
// invalidate twice).
split_data.existing_block = va_block;
split_data.new_block = new_va_block;
ret = mmu_interval_notifier_insert(&split_data.notifier,
mm,
va_block->start,
new_va_block->end - va_block->start + 1,
&hmm_notifier_split_ops);
uvm_mutex_lock(&va_block->lock);
status = uvm_va_block_split_locked(va_block, new_end, new_va_block, NULL);
if (status != NV_OK)
goto err;
uvm_mutex_unlock(&va_block->lock);
// Since __mmu_notifier_register() was called when the va_space was
// initially created, we know that mm->notifier_subscriptions is valid
// and mmu_interval_notifier_insert() can't return ENOMEM.
// The only error return is for start + length overflowing but we already
// registered the same address range before so there should be no error.
UVM_ASSERT(!ret);
mmu_interval_notifier_remove(&va_block->hmm.notifier);
uvm_range_tree_shrink_node(&va_space->hmm.blocks, &va_block->hmm.node, va_block->start, va_block->end);
// Enable notifications on the old block with the smaller size.
ret = mmu_interval_notifier_insert(&va_block->hmm.notifier,
mm,
va_block->start,
va_block->end - va_block->start + 1,
&uvm_hmm_notifier_ops);
UVM_ASSERT(!ret);
new_va_block->hmm.node.start = new_va_block->start;
new_va_block->hmm.node.end = new_va_block->end;
ret = mmu_interval_notifier_insert(&new_va_block->hmm.notifier,
mm,
new_va_block->start,
new_va_block->end - new_va_block->start + 1,
&uvm_hmm_notifier_ops);
UVM_ASSERT(!ret);
mmu_interval_notifier_remove(&split_data.notifier);
status = uvm_range_tree_add(&va_space->hmm.blocks, &new_va_block->hmm.node);
UVM_ASSERT(status == NV_OK);
if (new_block_ptr)
*new_block_ptr = new_va_block;
return status;
err:
uvm_mutex_unlock(&va_block->lock);
mmu_interval_notifier_remove(&split_data.notifier);
uvm_va_block_release(new_va_block);
return status;
}
// Check to see if the HMM va_block would overlap the range start/end and
// split it so it can be removed. That breaks down to the following cases:
// start/end could cover all of the HMM va_block ->
// remove the va_block
// start/end could cover the left part of the HMM va_block ->
// remove the left part
// start/end could cover the right part of the HMM va_block ->
// remove the right part
// or start/end could "punch a hole" in the middle and leave the ends intact.
// In each case, only one HMM va_block is removed so return it in out_va_block.
static NV_STATUS split_block_if_needed(uvm_va_block_t *va_block,
NvU64 start,
NvU64 end,
uvm_va_block_t **out_va_block)
{
uvm_va_block_t *new;
NV_STATUS status;
if (va_block->start < start) {
status = hmm_split_block(va_block, start - 1, &new);
if (status != NV_OK)
return status;
// Keep the left part, the right part will be deleted.
va_block = new;
}
if (va_block->end > end) {
status = hmm_split_block(va_block, end, NULL);
if (status != NV_OK)
return status;
// Keep the right part, the left part will be deleted.
}
*out_va_block = va_block;
return NV_OK;
}
// Normally, the HMM va_block is destroyed when the va_space is destroyed
// (i.e., when the /dev/nvidia-uvm device is closed). A munmap() call triggers
// a uvm_hmm_invalidate() callback which unmaps the VMA's range from the GPU's
// page tables. However, it doesn't destroy the va_block because that would
// require calling mmu_interval_notifier_remove() which can't be called from
// the invalidate callback due to Linux locking constraints. If a process
// calls mmap()/munmap() for SAM and then creates a UVM managed allocation,
// the same VMA range can be picked and there would be a UVM/HMM va_block
// conflict. Creating a UVM managed allocation (or other va_range) calls this
// function to remove stale HMM va_blocks or split the HMM va_block so there
// is no overlap.
NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
struct mm_struct *mm,
NvU64 start,
NvU64 end)
{
uvm_range_tree_node_t *node, *next;
uvm_va_block_t *va_block;
NV_STATUS status;
if (!uvm_hmm_is_enabled(va_space))
return NV_OK;
if (mm) {
UVM_ASSERT(mm == va_space->va_space_mm.mm);
uvm_assert_mmap_lock_locked(mm);
}
uvm_assert_rwsem_locked_write(&va_space->lock);
// Process each HMM va_block that overlaps the interval [start, end].
// Note that end is inclusive.
// The blocks_lock is not needed when the va_space lock is held for write.
uvm_range_tree_for_each_in_safe(node, next, &va_space->hmm.blocks, start, end) {
va_block = hmm_va_block_from_node(node);
if (mm) {
status = split_block_if_needed(va_block, start, end, &va_block);
if (status != NV_OK)
return status;
}
// Note that this waits for any invalidations callbacks to complete
// so uvm_hmm_invalidate() won't see a block disapear.
// The va_space write lock should prevent uvm_hmm_va_block_find_create()
// from adding it back.
mmu_interval_notifier_remove(&va_block->hmm.notifier);
uvm_range_tree_remove(&va_space->hmm.blocks, &va_block->hmm.node);
uvm_va_block_kill(va_block);
}
UVM_ASSERT(!uvm_range_tree_iter_first(&va_space->hmm.blocks, start, end));
return NV_OK;
}
NV_STATUS uvm_hmm_split_as_needed(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_policy_is_split_needed_t split_needed_cb,
void *data)
{
uvm_va_block_t *va_block;
uvm_va_policy_node_t *node;
NV_STATUS status;
uvm_assert_rwsem_locked_write(&va_space->lock);
// If there is no HMM va_block or the va_block doesn't span the policy
// addr, there is no need to split.
status = uvm_hmm_va_block_find(va_space, addr, &va_block);
if (status != NV_OK || va_block->start == addr)
return NV_OK;
uvm_mutex_lock(&va_block->lock);
node = uvm_va_policy_node_find(va_block, addr);
if (!node)
goto done;
// If the policy range doesn't span addr, we're done.
if (addr == node->node.start)
goto done;
if (split_needed_cb(&node->policy, data))
status = uvm_va_policy_node_split(va_block, node, addr - 1, NULL);
done:
uvm_mutex_unlock(&va_block->lock);
return status;
}
NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
uvm_processor_id_t preferred_location,
NvU64 base,
NvU64 last_address)
{
bool is_default = UVM_ID_IS_INVALID(preferred_location);
uvm_va_block_t *va_block;
NvU64 addr;
NV_STATUS status = NV_OK;
if (!uvm_hmm_is_enabled(va_space))
return NV_ERR_INVALID_ADDRESS;
uvm_assert_mmap_lock_locked(va_space->va_space_mm.mm);
uvm_assert_rwsem_locked_write(&va_space->lock);
UVM_ASSERT(PAGE_ALIGNED(base));
UVM_ASSERT(PAGE_ALIGNED(last_address + 1));
UVM_ASSERT(base < last_address);
// Update HMM preferred location policy.
for (addr = base; addr < last_address; addr = va_block->end + 1) {
NvU64 end;
status = hmm_va_block_find_create(va_space, addr, true, NULL, &va_block);
if (status != NV_OK)
break;
end = min(last_address, va_block->end);
uvm_mutex_lock(&va_block->lock);
status = uvm_va_policy_set_range(va_block,
addr,
end,
UVM_VA_POLICY_PREFERRED_LOCATION,
is_default,
preferred_location,
UVM_READ_DUPLICATION_MAX);
// TODO: Bug 1750144: unset requires re-evaluating accessed-by mappings
// (see uvm_va_range_set_preferred_location's call of
// uvm_va_block_set_accessed_by), and set requires unmapping remote
// mappings (uvm_va_block_set_preferred_location_locked).
uvm_mutex_unlock(&va_block->lock);
if (status != NV_OK)
break;
}
return status;
}
NV_STATUS uvm_hmm_set_accessed_by(uvm_va_space_t *va_space,
uvm_processor_id_t processor_id,
bool set_bit,
NvU64 base,
NvU64 last_address)
{
uvm_va_block_t *va_block;
NvU64 addr;
NV_STATUS status = NV_OK;
if (!uvm_hmm_is_enabled(va_space))
return NV_ERR_INVALID_ADDRESS;
uvm_assert_mmap_lock_locked(va_space->va_space_mm.mm);
uvm_assert_rwsem_locked_write(&va_space->lock);
UVM_ASSERT(PAGE_ALIGNED(base));
UVM_ASSERT(PAGE_ALIGNED(last_address + 1));
UVM_ASSERT(base < last_address);
// Update HMM accessed by policy.
for (addr = base; addr < last_address; addr = va_block->end + 1) {
NvU64 end;
status = hmm_va_block_find_create(va_space, addr, true, NULL, &va_block);
if (status != NV_OK)
break;
end = min(last_address, va_block->end);
uvm_mutex_lock(&va_block->lock);
status = uvm_va_policy_set_range(va_block,
addr,
end,
UVM_VA_POLICY_ACCESSED_BY,
!set_bit,
processor_id,
UVM_READ_DUPLICATION_MAX);
// TODO: Bug 1750144: need to call va_block_set_accessed_by_locked()
// if read duplication isn't enabled.
uvm_mutex_unlock(&va_block->lock);
if (status != NV_OK)
break;
}
return status;
}
void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
unsigned long addr,
NvU64 *endp)
{
struct vm_area_struct *vma = va_block_context->hmm.vma;
uvm_va_policy_node_t *node;
NvU64 end = *endp;
uvm_assert_mmap_lock_locked(vma->vm_mm);
uvm_assert_mutex_locked(&va_block->lock);
if (end > vma->vm_end - 1)
end = vma->vm_end - 1;
node = uvm_va_policy_node_find(va_block, addr);
if (node) {
va_block_context->policy = &node->policy;
if (end > node->node.end)
end = node->node.end;
}
else
va_block_context->policy = &uvm_va_policy_default;
*endp = end;
}
NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_page_index_t page_index,
uvm_page_index_t *outerp)
{
struct vm_area_struct *vma;
unsigned long addr;
NvU64 end = va_block->end;
uvm_page_index_t outer;
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
uvm_assert_mmap_lock_locked(va_block_context->mm);
uvm_assert_mutex_locked(&va_block->lock);
addr = uvm_va_block_cpu_page_address(va_block, page_index);
vma = vma_lookup(va_block_context->mm, addr);
if (!vma || !(vma->vm_flags & VM_READ))
return NV_ERR_INVALID_ADDRESS;
va_block_context->hmm.vma = vma;
uvm_hmm_find_policy_end(va_block, va_block_context, addr, &end);
outer = uvm_va_block_cpu_page_index(va_block, end) + 1;
if (*outerp > outer)
*outerp = outer;
return NV_OK;
}
#endif // UVM_IS_CONFIG_HMM()

View File

@@ -0,0 +1,287 @@
/*******************************************************************************
Copyright (c) 2016-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef _UVM_HMM_H_
#define _UVM_HMM_H_
#include "nvtypes.h"
#include "uvm_forward_decl.h"
#include "uvm_va_block_types.h"
#include "uvm_va_policy.h"
#include "uvm_linux.h"
#include "uvm_range_tree.h"
#include "uvm_lock.h"
typedef struct
{
// This stores pointers to uvm_va_block_t for HMM blocks.
uvm_range_tree_t blocks;
uvm_mutex_t blocks_lock;
// TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
// This flag is set true by default for each va_space so most processes
// don't see partially implemented UVM-HMM behavior but can be enabled by
// test code for a given va_space so the test process can do some interim
// testing. It needs to be a separate flag instead of modifying
// uvm_disable_hmm or va_space->flags since those are user inputs and are
// visible/checked by test code.
// Remove this when UVM-HMM is fully integrated into chips_a.
bool disable;
} uvm_hmm_va_space_t;
#if UVM_IS_CONFIG_HMM()
// Tells whether HMM is enabled for the given va_space.
// If it is not enabled, all of the functions below are no-ops.
bool uvm_hmm_is_enabled(uvm_va_space_t *va_space);
// Self-explanatory name: reports if HMM is enabled system-wide.
bool uvm_hmm_is_enabled_system_wide(void);
// Initialize HMM for the given the va_space.
// Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
// and the va_space lock must be held in write mode.
NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space);
// Initialize HMM for the given the va_space for testing.
// Bug 1750144: UVM: Add HMM (Heterogeneous Memory Management) support to
// the UVM driver. Remove this when enough HMM functionality is implemented.
NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space);
// Destroy any HMM state for the given the va_space.
// Locking: va_space lock must be held in write mode.
void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space);
// Find an existing HMM va_block.
// This function can be called without having retained and locked the mm,
// but in that case, the only allowed operations on the returned block are
// locking the block, reading its state, and performing eviction. GPU fault
// handling and user-initiated migrations are not allowed.
// Return values are the same as uvm_va_block_find().
// Locking: This must be called with va_space lock held in at least read
// mode.
NV_STATUS uvm_hmm_va_block_find(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_block_t **va_block_ptr);
// Find or create a new HMM va_block.
//
// Return NV_ERR_INVALID_ADDRESS if there is no VMA associated with the
// address 'addr' or the VMA does not have at least PROT_READ permission.
// Locking: This function must be called with mm retained and locked for
// at least read and the va_space lock at least for read.
NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_block_context_t *va_block_context,
uvm_va_block_t **va_block_ptr);
// Reclaim any HMM va_blocks that overlap the given range.
// Note that 'end' is inclusive.
// A HMM va_block can be reclaimed if it doesn't contain any "valid" VMAs.
// See uvm_hmm_vma_is_valid() for details.
// Return values:
// NV_ERR_NO_MEMORY: Reclaim required a block split, which failed.
// NV_OK: There were no HMM blocks in the range, or all HMM
// blocks in the range were successfully reclaimed.
// Locking: If mm is not NULL, it must equal va_space_mm.mm, the caller
// must hold a reference on it, and it must be locked for at least read
// mode. Also, the va_space lock must be held in write mode.
// TODO: Bug 3372166: add asynchronous va_block reclaim.
NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
struct mm_struct *mm,
NvU64 start,
NvU64 end);
// Find a HMM policy range that needs to be split. The callback function
// 'split_needed_cb' returns true if the policy range needs to be split.
// If a policy range is split, the existing range is truncated to
// [existing_start, addr - 1] and a new policy node with the same policy
// values as the existing node is created covering [addr, existing_end].
// Before: [----------- existing ------------]
// After: [---- existing ----][---- new ----]
// ^addr
// Locking: the va_space must be write locked.
NV_STATUS uvm_hmm_split_as_needed(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_policy_is_split_needed_t split_needed_cb,
void *data);
// Set the preferred location policy for the given range.
// Note that 'last_address' is inclusive.
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked
// and the va_space lock must be held in write mode.
NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
uvm_processor_id_t preferred_location,
NvU64 base,
NvU64 last_address);
// Set the accessed by policy for the given range. This also tries to
// map the range. Note that 'last_address' is inclusive.
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked
// and the va_space lock must be held in write mode.
NV_STATUS uvm_hmm_set_accessed_by(uvm_va_space_t *va_space,
uvm_processor_id_t processor_id,
bool set_bit,
NvU64 base,
NvU64 last_address);
// Set the read duplication policy for the given range.
// Note that 'last_address' is inclusive.
// Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
// and the va_space lock must be held in write mode.
// TODO: Bug 2046423: need to implement read duplication support in Linux.
static NV_STATUS uvm_hmm_set_read_duplication(uvm_va_space_t *va_space,
uvm_read_duplication_policy_t new_policy,
NvU64 base,
NvU64 last_address)
{
if (!uvm_hmm_is_enabled(va_space))
return NV_ERR_INVALID_ADDRESS;
return NV_OK;
}
// Set va_block_context->policy to the policy covering the given address
// 'addr' and update the ending address '*endp' to the minimum of *endp,
// va_block_context->hmm.vma->vm_end - 1, and the ending address of the
// policy range.
// Locking: This function must be called with
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
// the va_block lock held.
void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
unsigned long addr,
NvU64 *endp);
// Find the VMA for the page index 'page_index',
// set va_block_context->policy to the policy covering the given address,
// and update the ending page range '*outerp' to the minimum of *outerp,
// va_block_context->hmm.vma->vm_end - 1, and the ending address of the
// policy range.
// Return NV_ERR_INVALID_ADDRESS if no VMA is found; otherwise, NV_OK.
// Locking: This function must be called with
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
// the va_block lock held.
NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_page_index_t page_index,
uvm_page_index_t *outerp);
#else // UVM_IS_CONFIG_HMM()
static bool uvm_hmm_is_enabled(uvm_va_space_t *va_space)
{
return false;
}
static bool uvm_hmm_is_enabled_system_wide(void)
{
return false;
}
static NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space)
{
return NV_OK;
}
static NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space)
{
return NV_WARN_NOTHING_TO_DO;
}
static void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
{
}
static NV_STATUS uvm_hmm_va_block_find(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_block_t **va_block_ptr)
{
return NV_ERR_INVALID_ADDRESS;
}
static NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_block_context_t *va_block_context,
uvm_va_block_t **va_block_ptr)
{
return NV_ERR_INVALID_ADDRESS;
}
static NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
struct mm_struct *mm,
NvU64 start,
NvU64 end)
{
return NV_OK;
}
static NV_STATUS uvm_hmm_split_as_needed(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_policy_is_split_needed_t split_needed_cb,
void *data)
{
return NV_OK;
}
static NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
uvm_processor_id_t preferred_location,
NvU64 base,
NvU64 last_address)
{
return NV_ERR_INVALID_ADDRESS;
}
static NV_STATUS uvm_hmm_set_accessed_by(uvm_va_space_t *va_space,
uvm_processor_id_t processor_id,
bool set_bit,
NvU64 base,
NvU64 last_address)
{
return NV_ERR_INVALID_ADDRESS;
}
static NV_STATUS uvm_hmm_set_read_duplication(uvm_va_space_t *va_space,
uvm_read_duplication_policy_t new_policy,
NvU64 base,
NvU64 last_address)
{
return NV_ERR_INVALID_ADDRESS;
}
static void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
unsigned long addr,
NvU64 *endp)
{
}
static NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_page_index_t page_index,
uvm_page_index_t *outerp)
{
return NV_OK;
}
#endif // UVM_IS_CONFIG_HMM()
#endif // _UVM_HMM_H_

View File

@@ -0,0 +1,90 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_common.h"
#include "uvm_linux.h"
#include "uvm_test.h"
#include "uvm_va_space.h"
#include "uvm_va_range.h"
#include "uvm_hmm.h"
NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *filp)
{
uvm_va_space_t *va_space = uvm_va_space_get(filp);
struct mm_struct *mm;
uvm_va_block_t *hmm_block = NULL;
NV_STATUS status;
mm = uvm_va_space_mm_retain(va_space);
if (!mm)
return NV_WARN_NOTHING_TO_DO;
uvm_down_write_mmap_lock(mm);
uvm_va_space_down_write(va_space);
// TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
// By default, HMM is enabled system wide but disabled per va_space.
// This will initialize the va_space for HMM.
status = uvm_hmm_va_space_initialize_test(va_space);
if (status != NV_OK)
goto out;
uvm_va_space_up_write(va_space);
uvm_up_write_mmap_lock(mm);
uvm_down_read_mmap_lock(mm);
uvm_va_space_down_read(va_space);
// Try to create an HMM va_block to virtual address zero (NULL).
// It should fail. There should be no VMA but a va_block for range
// [0x0 0x1fffff] is possible.
status = uvm_hmm_va_block_find_create(va_space, 0UL, NULL, &hmm_block);
TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
// Try to create an HMM va_block which overlaps a UVM managed block.
// It should fail.
status = uvm_hmm_va_block_find_create(va_space, params->uvm_address, NULL, &hmm_block);
TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
// Try to create an HMM va_block; it should succeed.
status = uvm_hmm_va_block_find_create(va_space, params->hmm_address, NULL, &hmm_block);
TEST_CHECK_GOTO(status == NV_OK, done);
// Try to find an existing HMM va_block; it should succeed.
status = uvm_hmm_va_block_find(va_space, params->hmm_address, &hmm_block);
TEST_CHECK_GOTO(status == NV_OK, done);
done:
uvm_va_space_up_read(va_space);
uvm_up_read_mmap_lock(mm);
uvm_va_space_mm_release(va_space);
return status;
out:
uvm_va_space_up_write(va_space);
uvm_up_write_mmap_lock(mm);
uvm_va_space_mm_release(va_space);
return status;
}

View File

@@ -0,0 +1,296 @@
/*******************************************************************************
Copyright (c) 2020-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_global.h"
#include "uvm_common.h"
#include "uvm_hal.h"
#include "uvm_push.h"
#include "uvm_test.h"
#include "uvm_va_space.h"
#include "uvm_mem.h"
#include "uvm_rm_mem.h"
typedef struct test_sem_mem_t {
void *cpu_va;
NvU64 gpu_va;
union {
uvm_mem_t *uvm_mem;
uvm_rm_mem_t *rm_mem;
};
} test_sem_mem;
static NV_STATUS test_semaphore_alloc_uvm_rm_mem(uvm_gpu_t *gpu, const size_t size, test_sem_mem *mem_out)
{
NV_STATUS status;
uvm_rm_mem_t *mem = NULL;
NvU64 gpu_va;
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, size, &mem);
TEST_NV_CHECK_RET(status);
gpu_va = uvm_rm_mem_get_gpu_uvm_va(mem, gpu);
TEST_CHECK_GOTO(gpu_va < gpu->parent->max_host_va, error);
mem_out->cpu_va = uvm_rm_mem_get_cpu_va(mem);
mem_out->gpu_va = gpu_va;
mem_out->rm_mem = mem;
return NV_OK;
error:
uvm_rm_mem_free(mem);
return status;
}
static NV_STATUS test_semaphore_alloc_sem(uvm_gpu_t *gpu, const size_t size, test_sem_mem *mem_out)
{
NV_STATUS status = NV_OK;
uvm_mem_t *mem = NULL;
NvU64 gpu_va;
TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem(size, current->mm, &mem));
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(mem, gpu), error);
gpu_va = uvm_mem_get_gpu_va_kernel(mem, gpu);
// Use an RM allocation when Host cannot address the semaphore.
if (gpu_va >= gpu->parent->max_host_va) {
uvm_mem_free(mem);
return test_semaphore_alloc_uvm_rm_mem(gpu, size, mem_out);
}
// This semaphore resides in the uvm_mem region, i.e., it has the GPU VA
// MSbit set. The intent is to validate semaphore operations when the
// semaphore's VA is in the high-end of the GPU effective virtual address
// space spectrum, i.e., its VA upper-bit is set.
TEST_CHECK_GOTO(gpu_va & (1ULL << (gpu->address_space_tree.hal->num_va_bits() - 1)), error);
TEST_NV_CHECK_GOTO(uvm_mem_map_cpu_kernel(mem), error);
mem_out->cpu_va = uvm_mem_get_cpu_addr_kernel(mem);
mem_out->gpu_va = gpu_va;
mem_out->uvm_mem = mem;
return NV_OK;
error:
uvm_mem_free(mem);
return status;
}
static void test_semaphore_free_sem(uvm_gpu_t *gpu, test_sem_mem *mem)
{
if (mem->gpu_va >= gpu->parent->uvm_mem_va_base)
uvm_mem_free(mem->uvm_mem);
else
uvm_rm_mem_free(mem->rm_mem);
}
// This test is similar to the test_semaphore_release() test in uvm_ce_test.c,
// except that this one uses host_hal->semaphore_release();
static NV_STATUS test_semaphore_release(uvm_gpu_t *gpu)
{
NV_STATUS status;
test_sem_mem mem = { 0 };
uvm_push_t push;
NvU32 value;
NvU32 payload = 0xA5A55A5A;
NvU32 *cpu_ptr;
// Semaphore release needs 1 word (4 bytes).
const size_t size = sizeof(NvU32);
status = test_semaphore_alloc_sem(gpu, size, &mem);
TEST_NV_CHECK_RET(status);
// Initialize the payload.
cpu_ptr = (NvU32 *)mem.cpu_va;
*cpu_ptr = 0;
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "semaphore_release test");
TEST_NV_CHECK_GOTO(status, done);
gpu->parent->host_hal->semaphore_release(&push, mem.gpu_va, payload);
status = uvm_push_end_and_wait(&push);
TEST_NV_CHECK_GOTO(status, done);
value = *cpu_ptr;
if (value != payload) {
UVM_TEST_PRINT("Semaphore payload = %u instead of %u, GPU %s\n", value, payload, uvm_gpu_name(gpu));
status = NV_ERR_INVALID_STATE;
goto done;
}
done:
test_semaphore_free_sem(gpu, &mem);
return status;
}
static NV_STATUS test_semaphore_acquire(uvm_gpu_t *gpu)
{
NV_STATUS status;
test_sem_mem mem = { 0 };
uvm_push_t push;
uvm_spin_loop_t spin;
NvU32 *cpu_ptr, *cpu_sema_A, *cpu_sema_B, *cpu_sema_C;
NvU64 gpu_sema_va_A, gpu_sema_va_B, gpu_sema_va_C;
bool check_sema_C;
// The semaphore is one word long(4 bytes), we use three semaphores.
const size_t sema_size = 4;
const size_t size = sema_size * 3;
status = test_semaphore_alloc_sem(gpu, size, &mem);
TEST_NV_CHECK_RET(status);
gpu_sema_va_A = mem.gpu_va;
gpu_sema_va_B = mem.gpu_va + sema_size;
gpu_sema_va_C = mem.gpu_va + 2 * sema_size;
cpu_ptr = (NvU32 *)mem.cpu_va;
memset(cpu_ptr, 0, size);
cpu_sema_A = cpu_ptr;
cpu_sema_B = cpu_ptr + 1;
cpu_sema_C = cpu_ptr + 2;
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "semaphore_acquire test");
TEST_NV_CHECK_GOTO(status, done);
gpu->parent->host_hal->semaphore_release(&push, gpu_sema_va_A, 1);
gpu->parent->host_hal->semaphore_acquire(&push, gpu_sema_va_B, 1);
gpu->parent->host_hal->semaphore_release(&push, gpu_sema_va_C, 1);
uvm_push_end(&push);
// Wait for sema_A release.
UVM_SPIN_WHILE(UVM_READ_ONCE(*cpu_sema_A) != 1, &spin);
// Sleep for 10ms, the GPU waits while sema_B is held by us.
msleep(10);
check_sema_C = UVM_READ_ONCE(*cpu_sema_C) == 0;
// memory fence/barrier, check comment in
// uvm_gpu_semaphore.c:uvm_gpu_semaphore_set_payload() for details.
mb();
// Release sema_B.
UVM_WRITE_ONCE(*cpu_sema_B, 1);
// Wait for the GPU to release sema_C, i.e., the end of the push.
status = uvm_push_wait(&push);
TEST_CHECK_GOTO(status == NV_OK, done);
// check_sema_C is validated here to ensure the push has ended and was not
// interrupted in the middle, had the check failed.
TEST_CHECK_GOTO(check_sema_C, done);
TEST_CHECK_GOTO(UVM_READ_ONCE(*cpu_sema_C) == 1, done);
done:
test_semaphore_free_sem(gpu, &mem);
return status;
}
// This test is similar to the test_semaphore_timestamp() test in
// uvm_ce_test.c, except that this one uses host_hal->semaphore_timestamp();
static NV_STATUS test_semaphore_timestamp(uvm_gpu_t *gpu)
{
NV_STATUS status;
test_sem_mem mem = { 0 };
uvm_push_t push;
NvU32 i;
NvU64 *timestamp;
NvU64 last_timestamp = 0;
// 2 iterations:
// 1: compare retrieved timestamp with 0;
// 2: compare retrieved timestamp with previous timestamp (obtained in 1).
const NvU32 iterations = 2;
// The semaphore is 4 words long (16 bytes).
const size_t size = 16;
status = test_semaphore_alloc_sem(gpu, size, &mem);
TEST_NV_CHECK_RET(status);
timestamp = (NvU64 *)mem.cpu_va;
TEST_CHECK_GOTO(timestamp != NULL, done);
memset(timestamp, 0, size);
// Shift the timestamp pointer to where the semaphore timestamp info is.
timestamp += 1;
for (i = 0; i < iterations; i++) {
status = uvm_push_begin(gpu->channel_manager,
UVM_CHANNEL_TYPE_GPU_INTERNAL,
&push,
"semaphore_timestamp test, iter: %u",
i);
TEST_NV_CHECK_GOTO(status, done);
gpu->parent->host_hal->semaphore_timestamp(&push, mem.gpu_va);
status = uvm_push_end_and_wait(&push);
TEST_NV_CHECK_GOTO(status, done);
TEST_CHECK_GOTO(*timestamp != 0, done);
TEST_CHECK_GOTO(*timestamp >= last_timestamp, done);
last_timestamp = *timestamp;
}
done:
test_semaphore_free_sem(gpu, &mem);
return status;
}
static NV_STATUS test_host(uvm_va_space_t *va_space)
{
uvm_gpu_t *gpu;
for_each_va_space_gpu(gpu, va_space) {
TEST_NV_CHECK_RET(test_semaphore_release(gpu));
TEST_NV_CHECK_RET(test_semaphore_acquire(gpu));
TEST_NV_CHECK_RET(test_semaphore_timestamp(gpu));
}
return NV_OK;
}
NV_STATUS uvm_test_host_sanity(UVM_TEST_HOST_SANITY_PARAMS *params, struct file *filp)
{
NV_STATUS status;
uvm_va_space_t *va_space = uvm_va_space_get(filp);
uvm_va_space_down_read_rm(va_space);
status = test_host(va_space);
uvm_va_space_up_read_rm(va_space);
return status;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,412 @@
/*******************************************************************************
Copyright (c) 2016-2020 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_common.h"
#include "uvm_linux.h"
#include "uvm_global.h"
#include "uvm_kvmalloc.h"
#include "uvm_rb_tree.h"
// To implement realloc for vmalloc-based allocations we need to track the size
// of the original allocation. We can do that by allocating a header along with
// the allocation itself. Since vmalloc is only used for relatively large
// allocations, this overhead is very small.
//
// We don't need this for kmalloc since we can use ksize().
typedef struct
{
size_t alloc_size;
uint8_t ptr[0];
} uvm_vmalloc_hdr_t;
typedef struct
{
const char *file;
const char *function;
int line;
uvm_rb_tree_node_t node;
} uvm_kvmalloc_info_t;
typedef enum
{
UVM_KVMALLOC_LEAK_CHECK_NONE = 0,
UVM_KVMALLOC_LEAK_CHECK_BYTES,
UVM_KVMALLOC_LEAK_CHECK_ORIGIN,
UVM_KVMALLOC_LEAK_CHECK_COUNT
} uvm_kvmalloc_leak_check_t;
// This is used just to make sure that the APIs aren't used outside of
// uvm_kvmalloc_init/uvm_kvmalloc_exit. The memory allocation would still work
// fine, but the leak checker would get confused.
static bool g_malloc_initialized = false;
static struct
{
// Current outstanding bytes allocated
atomic_long_t bytes_allocated;
// Number of allocations made which failed their info allocations. Used just
// for sanity checks.
atomic_long_t untracked_allocations;
// Use a raw spinlock rather than a uvm_spinlock_t because the kvmalloc
// layer is initialized and torn down before the thread context layer.
spinlock_t lock;
// Table of all outstanding allocations
uvm_rb_tree_t allocation_info;
struct kmem_cache *info_cache;
} g_uvm_leak_checker;
// Default to byte-count-only leak checking for non-release builds. This can
// always be overridden by the module parameter.
static int uvm_leak_checker = (UVM_IS_DEBUG() || UVM_IS_DEVELOP()) ?
UVM_KVMALLOC_LEAK_CHECK_BYTES :
UVM_KVMALLOC_LEAK_CHECK_NONE;
module_param(uvm_leak_checker, int, S_IRUGO);
MODULE_PARM_DESC(uvm_leak_checker,
"Enable uvm memory leak checking. "
"0 = disabled, 1 = count total bytes allocated and freed, 2 = per-allocation origin tracking.");
NV_STATUS uvm_kvmalloc_init(void)
{
if (uvm_leak_checker >= UVM_KVMALLOC_LEAK_CHECK_ORIGIN) {
spin_lock_init(&g_uvm_leak_checker.lock);
uvm_rb_tree_init(&g_uvm_leak_checker.allocation_info);
g_uvm_leak_checker.info_cache = NV_KMEM_CACHE_CREATE("uvm_kvmalloc_info_t", uvm_kvmalloc_info_t);
if (!g_uvm_leak_checker.info_cache)
return NV_ERR_NO_MEMORY;
}
g_malloc_initialized = true;
return NV_OK;
}
void uvm_kvmalloc_exit(void)
{
if (!g_malloc_initialized)
return;
if (atomic_long_read(&g_uvm_leak_checker.bytes_allocated) > 0) {
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "Memory leak of %lu bytes detected.%s\n",
atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
" insmod with uvm_leak_checker=2 for detailed information." :
"");
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
if (g_uvm_global.unload_state.ptr)
*g_uvm_global.unload_state.ptr |= UVM_TEST_UNLOAD_STATE_MEMORY_LEAK;
}
if (uvm_leak_checker >= UVM_KVMALLOC_LEAK_CHECK_ORIGIN) {
uvm_rb_tree_node_t *node, *next;
uvm_rb_tree_for_each_safe(node, next, &g_uvm_leak_checker.allocation_info) {
uvm_kvmalloc_info_t *info = container_of(node, uvm_kvmalloc_info_t, node);
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX " Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
uvm_kvsize((void *)((uintptr_t)info->node.key)),
kbasename(info->file),
info->line,
info->function,
info->node.key);
// Free so we don't keep eating up memory while debugging. Note that
// this also removes the entry from the table, frees info, and drops
// the allocated bytes count.
uvm_kvfree((void *)((uintptr_t)info->node.key));
}
if (atomic_long_read(&g_uvm_leak_checker.untracked_allocations) == 0)
UVM_ASSERT(atomic_long_read(&g_uvm_leak_checker.bytes_allocated) == 0);
kmem_cache_destroy_safe(&g_uvm_leak_checker.info_cache);
}
g_malloc_initialized = false;
}
static void insert_info(uvm_kvmalloc_info_t *info)
{
NV_STATUS status;
unsigned long irq_flags;
spin_lock_irqsave(&g_uvm_leak_checker.lock, irq_flags);
status = uvm_rb_tree_insert(&g_uvm_leak_checker.allocation_info, &info->node);
spin_unlock_irqrestore(&g_uvm_leak_checker.lock, irq_flags);
// We shouldn't have duplicates
UVM_ASSERT(status == NV_OK);
}
static uvm_kvmalloc_info_t *remove_info(void *p)
{
uvm_rb_tree_node_t *node;
uvm_kvmalloc_info_t *info = NULL;
unsigned long irq_flags;
spin_lock_irqsave(&g_uvm_leak_checker.lock, irq_flags);
node = uvm_rb_tree_find(&g_uvm_leak_checker.allocation_info, (NvU64)p);
if (node)
uvm_rb_tree_remove(&g_uvm_leak_checker.allocation_info, node);
spin_unlock_irqrestore(&g_uvm_leak_checker.lock, irq_flags);
if (!node) {
UVM_ASSERT(atomic_long_read(&g_uvm_leak_checker.untracked_allocations) > 0);
atomic_long_dec(&g_uvm_leak_checker.untracked_allocations);
}
else {
info = container_of(node, uvm_kvmalloc_info_t, node);
UVM_ASSERT(info->node.key == (NvU64)((uintptr_t)p));
}
return info;
}
static void alloc_tracking_add(void *p, const char *file, int line, const char *function)
{
// Add uvm_kvsize(p) instead of size because uvm_kvsize might be larger (due
// to ksize), and uvm_kvfree only knows about uvm_kvsize
size_t size = uvm_kvsize(p);
uvm_kvmalloc_info_t *info;
UVM_ASSERT(g_malloc_initialized);
if (ZERO_OR_NULL_PTR(p))
return;
atomic_long_add(size, &g_uvm_leak_checker.bytes_allocated);
if (uvm_leak_checker >= UVM_KVMALLOC_LEAK_CHECK_ORIGIN) {
// Silently ignore OOM errors
info = nv_kmem_cache_zalloc(g_uvm_leak_checker.info_cache, NV_UVM_GFP_FLAGS);
if (!info) {
atomic_long_inc(&g_uvm_leak_checker.untracked_allocations);
return;
}
info->node.key = (NvU64)p;
info->file = file;
info->function = function;
info->line = line;
insert_info(info);
}
}
static void alloc_tracking_remove(void *p)
{
size_t size = uvm_kvsize(p);
uvm_kvmalloc_info_t *info;
UVM_ASSERT(g_malloc_initialized);
if (ZERO_OR_NULL_PTR(p))
return;
atomic_long_sub(size, &g_uvm_leak_checker.bytes_allocated);
if (uvm_leak_checker >= UVM_KVMALLOC_LEAK_CHECK_ORIGIN) {
info = remove_info(p);
if (info)
kmem_cache_free(g_uvm_leak_checker.info_cache, info);
}
}
static uvm_vmalloc_hdr_t *get_hdr(void *p)
{
uvm_vmalloc_hdr_t *hdr;
UVM_ASSERT(is_vmalloc_addr(p));
hdr = container_of(p, uvm_vmalloc_hdr_t, ptr);
UVM_ASSERT(hdr->alloc_size > UVM_KMALLOC_THRESHOLD);
return hdr;
}
static void *alloc_internal(size_t size, bool zero_memory)
{
uvm_vmalloc_hdr_t *hdr;
// Make sure that the allocation pointer is suitably-aligned for a natively-
// sized allocation.
BUILD_BUG_ON(offsetof(uvm_vmalloc_hdr_t, ptr) != sizeof(void *));
// Make sure that (sizeof(hdr) + size) is what it should be
BUILD_BUG_ON(sizeof(uvm_vmalloc_hdr_t) != offsetof(uvm_vmalloc_hdr_t, ptr));
if (size <= UVM_KMALLOC_THRESHOLD) {
if (zero_memory)
return kzalloc(size, NV_UVM_GFP_FLAGS);
return kmalloc(size, NV_UVM_GFP_FLAGS);
}
if (zero_memory)
hdr = vzalloc(sizeof(*hdr) + size);
else
hdr = vmalloc(sizeof(*hdr) + size);
if (!hdr)
return NULL;
hdr->alloc_size = size;
return hdr->ptr;
}
void *__uvm_kvmalloc(size_t size, const char *file, int line, const char *function)
{
void *p = alloc_internal(size, false);
if (uvm_leak_checker && p)
alloc_tracking_add(p, file, line, function);
return p;
}
void *__uvm_kvmalloc_zero(size_t size, const char *file, int line, const char *function)
{
void *p = alloc_internal(size, true);
if (uvm_leak_checker && p)
alloc_tracking_add(p, file, line, function);
return p;
}
void uvm_kvfree(void *p)
{
if (!p)
return;
if (uvm_leak_checker)
alloc_tracking_remove(p);
if (is_vmalloc_addr(p))
vfree(get_hdr(p));
else
kfree(p);
}
// Handle reallocs of kmalloc-based allocations
static void *realloc_from_kmalloc(void *p, size_t new_size)
{
void *new_p;
// Simple case: kmalloc -> kmalloc
if (new_size <= UVM_KMALLOC_THRESHOLD)
return krealloc(p, new_size, NV_UVM_GFP_FLAGS);
// kmalloc -> vmalloc
new_p = alloc_internal(new_size, false);
if (!new_p)
return NULL;
memcpy(new_p, p, min(ksize(p), new_size));
kfree(p);
return new_p;
}
// Handle reallocs of vmalloc-based allocations
static void *realloc_from_vmalloc(void *p, size_t new_size)
{
uvm_vmalloc_hdr_t *old_hdr = get_hdr(p);
void *new_p;
if (new_size == 0) {
vfree(old_hdr);
return ZERO_SIZE_PTR; // What krealloc returns for this case
}
if (new_size == old_hdr->alloc_size)
return p;
// vmalloc has no realloc functionality so we need to do a separate alloc +
// copy.
new_p = alloc_internal(new_size, false);
if (!new_p)
return NULL;
memcpy(new_p, p, min(new_size, old_hdr->alloc_size));
vfree(old_hdr);
return new_p;
}
void *__uvm_kvrealloc(void *p, size_t new_size, const char *file, int line, const char *function)
{
void *new_p;
uvm_kvmalloc_info_t *info = NULL;
size_t old_size;
if (ZERO_OR_NULL_PTR(p))
return __uvm_kvmalloc(new_size, file, line, function);
old_size = uvm_kvsize(p);
if (uvm_leak_checker) {
// new_size == 0 is a free, so just remove everything
if (new_size == 0) {
alloc_tracking_remove(p);
}
else {
// Remove the old pointer. If the realloc gives us a new pointer
// with the old one still in the tracking table, that pointer could
// be reallocated by another thread before we remove it from the
// table.
atomic_long_sub(old_size, &g_uvm_leak_checker.bytes_allocated);
if (uvm_leak_checker >= UVM_KVMALLOC_LEAK_CHECK_ORIGIN)
info = remove_info(p);
}
}
if (is_vmalloc_addr(p))
new_p = realloc_from_vmalloc(p, new_size);
else
new_p = realloc_from_kmalloc(p, new_size);
if (uvm_leak_checker) {
if (!new_p) {
// The realloc failed, so put the old info back
atomic_long_add(old_size, &g_uvm_leak_checker.bytes_allocated);
if (uvm_leak_checker >= UVM_KVMALLOC_LEAK_CHECK_ORIGIN && info)
insert_info(info);
}
else if (new_size != 0) {
// Drop the old info and insert the new
if (info)
kmem_cache_free(g_uvm_leak_checker.info_cache, info);
alloc_tracking_add(new_p, file, line, function);
}
}
return new_p;
}
size_t uvm_kvsize(void *p)
{
UVM_ASSERT(g_malloc_initialized);
UVM_ASSERT(p);
if (is_vmalloc_addr(p))
return get_hdr(p)->alloc_size;
return ksize(p);
}

View File

@@ -0,0 +1,75 @@
/*******************************************************************************
Copyright (c) 2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_KVMALLOC_H__
#define __UVM_KVMALLOC_H__
#include "uvm_linux.h"
#include "uvm_test_ioctl.h"
// kmalloc is faster than vmalloc because it doesn't have to remap kernel
// virtual memory, but for that same reason it requires physically-contiguous
// memory. It also supports a native krealloc function which is missing in
// vmalloc.
//
// Therefore the uvm_kvmalloc APIs use kmalloc when possible, but will fall back
// to vmalloc when the allocation size exceeds this UVM_KMALLOC_THRESHOLD.
//
// This value is somewhat arbitrary. kmalloc can support allocations much larger
// than PAGE_SIZE, but the larger the size the higher the chances of allocation
// failure.
//
// This is in the header so callers can use it to inform their allocation sizes
// if they wish.
#define UVM_KMALLOC_THRESHOLD (4*PAGE_SIZE)
NV_STATUS uvm_kvmalloc_init(void);
void uvm_kvmalloc_exit(void);
// Allocating a size of 0 with any of these APIs returns ZERO_SIZE_PTR
void *__uvm_kvmalloc(size_t size, const char *file, int line, const char *function);
void *__uvm_kvmalloc_zero(size_t size, const char *file, int line, const char *function);
#define uvm_kvmalloc(__size) __uvm_kvmalloc(__size, __FILE__, __LINE__, __FUNCTION__)
#define uvm_kvmalloc_zero(__size) __uvm_kvmalloc_zero(__size, __FILE__, __LINE__, __FUNCTION__)
void uvm_kvfree(void *p);
// Follows standard realloc semantics:
// - uvm_kvrealloc(NULL, size) and uvm_kvrealloc(ZERO_SIZE_PTR, size) are each
// equivalent to uvm_kvmalloc(size)
// - uvm_kvrealloc(p, 0) is the same as uvm_kvfree(p), and returns ZERO_SIZE_PTR
void *__uvm_kvrealloc(void *p, size_t new_size, const char *file, int line, const char *function);
#define uvm_kvrealloc(__p, __new_size) __uvm_kvrealloc(__p, __new_size, __FILE__, __LINE__, __FUNCTION__)
// Returns the allocation size for a prior allocation from uvm_kvmalloc,
// uvm_kvmalloc_zero, or uvm_kvrealloc. This may be more than the size requested
// in those calls, in which case the extra memory is safe to use.
//
// p must not be NULL.
size_t uvm_kvsize(void *p);
NV_STATUS uvm_test_kvmalloc(UVM_TEST_KVMALLOC_PARAMS *params, struct file *filp);
#endif // __UVM_KVMALLOC_H__

View File

@@ -0,0 +1,184 @@
/*******************************************************************************
Copyright (c) 2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_common.h"
#include "uvm_linux.h"
#include "uvm_kvmalloc.h"
#include "uvm_test.h"
typedef enum
{
ALLOC_TYPE_MALLOC,
ALLOC_TYPE_ZALLOC,
ALLOC_TYPE_REALLOC_NULL,
ALLOC_TYPE_REALLOC_ZERO,
ALLOC_TYPE_MAX
} alloc_type_t;
static NV_STATUS check_alloc(void *p, size_t size)
{
if (size == 0) {
TEST_CHECK_RET(p == ZERO_SIZE_PTR);
TEST_CHECK_RET(uvm_kvsize(p) == 0);
}
else if (size <= UVM_KMALLOC_THRESHOLD) {
TEST_CHECK_RET(!is_vmalloc_addr(p));
// In theory it's possible to use kmalloc yet have ksize(p) be larger
// than our arbitrary UVM_KMALLOC_THRESHOLD. In practice, as long as
// UVM_KMALLOC_THRESHOLD is a multiple of PAGE_SIZE, that's highly
// unlikely.
TEST_CHECK_RET(uvm_kvsize(p) == ksize(p));
TEST_CHECK_RET(uvm_kvsize(p) >= size);
}
else {
TEST_CHECK_RET(is_vmalloc_addr(p));
TEST_CHECK_RET(uvm_kvsize(p) == size);
}
return NV_OK;
}
static NV_STATUS test_uvm_kvmalloc(void)
{
static const size_t sizes[] = {0, UVM_KMALLOC_THRESHOLD, UVM_KMALLOC_THRESHOLD + 1};
uint8_t *p;
uint8_t expected;
size_t i, j, size;
alloc_type_t alloc_type;
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
size = sizes[i];
for (alloc_type = 0; alloc_type < ALLOC_TYPE_MAX; alloc_type++) {
switch (alloc_type) {
case ALLOC_TYPE_MALLOC:
p = uvm_kvmalloc(size);
break;
case ALLOC_TYPE_ZALLOC:
p = uvm_kvmalloc_zero(size);
break;
case ALLOC_TYPE_REALLOC_NULL:
p = uvm_kvrealloc(NULL, size);
break;
case ALLOC_TYPE_REALLOC_ZERO:
p = uvm_kvrealloc(ZERO_SIZE_PTR, size);
break;
default:
UVM_ASSERT(0);
p = NULL;
}
if (!p)
return NV_ERR_NO_MEMORY;
// On failure, this macro returns and thus leaks the allocation. But
// if the check fails, our allocation state is messed up so we can't
// reasonably free the allocation anyway.
MEM_NV_CHECK_RET(check_alloc(p, size), NV_OK);
// Scribble on the allocation to make sure we don't crash
if (alloc_type == ALLOC_TYPE_ZALLOC) {
expected = 0;
}
else {
expected = (uint8_t)(current->pid + i);
memset(p, expected, size);
}
for (j = 0; j < size; j++) {
if (p[j] != expected) {
UVM_TEST_PRINT("p[%zu] is 0x%x instead of expected value 0x%x\n", j, p[j], expected);
uvm_kvfree(p);
TEST_CHECK_RET(0);
}
}
uvm_kvfree(p);
}
}
return NV_OK;
}
static NV_STATUS test_uvm_kvrealloc(void)
{
size_t i, j, k, old_size, new_size;
uint8_t *old_p, *new_p;
uint8_t expected = (uint8_t)current->pid;
static const size_t sizes[] = {0,
UVM_KMALLOC_THRESHOLD / 2,
UVM_KMALLOC_THRESHOLD,
UVM_KMALLOC_THRESHOLD + 1,
2*UVM_KMALLOC_THRESHOLD};
// uvm_kvrealloc(NULL, size) and uvm_kvrealloc(ZERO_SIZE_PTR, size) are
// tested in test_uvm_alloc so we don't have to do them here.
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
old_size = sizes[i];
for (j = 0; j < ARRAY_SIZE(sizes); j++) {
new_size = sizes[j];
old_p = uvm_kvmalloc(old_size);
if (!old_p)
return NV_ERR_NO_MEMORY;
MEM_NV_CHECK_RET(check_alloc(old_p, old_size), NV_OK);
++expected;
memset(old_p, expected, old_size);
new_p = uvm_kvrealloc(old_p, new_size);
if (!new_p) {
uvm_kvfree(old_p);
return NV_ERR_NO_MEMORY;
}
// At this point, either new_p == old_p or old_p should have been
// freed. In either case there's no need to free old_p.
MEM_NV_CHECK_RET(check_alloc(new_p, new_size), NV_OK);
// Make sure the data is still present
for (k = 0; k < min(new_size, old_size); k++) {
if (new_p[k] != expected) {
UVM_TEST_PRINT("new_p[%zu] is 0x%x instead of expected value 0x%x\n", k, new_p[k], expected);
uvm_kvfree(new_p);
TEST_CHECK_RET(0);
}
}
// Exercise the free-via-realloc path
TEST_CHECK_RET(uvm_kvrealloc(new_p, 0) == ZERO_SIZE_PTR);
}
}
return NV_OK;
}
NV_STATUS uvm_test_kvmalloc(UVM_TEST_KVMALLOC_PARAMS *params, struct file *filp)
{
NV_STATUS status = test_uvm_kvmalloc();
if (status != NV_OK)
return status;
return test_uvm_kvrealloc();
}

View File

@@ -0,0 +1,81 @@
/*******************************************************************************
Copyright (c) 2013 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_linux.h"
#if UVM_CGROUP_ACCOUNTING_SUPPORTED()
#include <linux/memcontrol.h>
#include <linux/sched/mm.h>
#endif
//
// uvm_linux.c
//
// This file, along with conftest.h and umv_linux.h, helps to insulate
// the (out-of-tree) UVM driver from changes to the upstream Linux kernel.
//
#if !defined(NV_ADDRESS_SPACE_INIT_ONCE_PRESENT)
void address_space_init_once(struct address_space *mapping)
{
memset(mapping, 0, sizeof(*mapping));
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
#if defined(NV_ADDRESS_SPACE_HAS_RWLOCK_TREE_LOCK)
//
// The .tree_lock member variable was changed from type rwlock_t, to
// spinlock_t, on 25 July 2008, by mainline commit
// 19fd6231279be3c3bdd02ed99f9b0eb195978064.
//
rwlock_init(&mapping->tree_lock);
#else
spin_lock_init(&mapping->tree_lock);
#endif
spin_lock_init(&mapping->i_mmap_lock);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
}
#endif
#if UVM_CGROUP_ACCOUNTING_SUPPORTED()
void uvm_memcg_context_start(uvm_memcg_context_t *context, struct mm_struct *mm)
{
memset(context, 0, sizeof(*context));
if (!mm)
return;
context->new_memcg = get_mem_cgroup_from_mm(mm);
context->old_memcg = set_active_memcg(context->new_memcg);
}
void uvm_memcg_context_end(uvm_memcg_context_t *context)
{
if (!context->new_memcg)
return;
set_active_memcg(context->old_memcg);
mem_cgroup_put(context->new_memcg);
}
#endif

View File

@@ -0,0 +1,622 @@
/*******************************************************************************
Copyright (c) 2013-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
//
// uvm_linux.h
//
// This file, along with conftest.h and umv_linux.c, helps to insulate
// the (out-of-tree) UVM driver from changes to the upstream Linux kernel.
//
//
#ifndef _UVM_LINUX_H
#define _UVM_LINUX_H
#include "nvtypes.h"
#include "nv-time.h"
#define NV_BUILD_MODULE_INSTANCES 0
#include "nv-linux.h"
#if defined(NV_LINUX_LOG2_H_PRESENT)
#include <linux/log2.h>
#endif
#if defined(NV_PRIO_TREE_PRESENT)
#include <linux/prio_tree.h>
#endif
#include <linux/jhash.h>
#include <linux/rwsem.h>
#include <linux/rbtree.h>
#if defined(NV_ASM_BARRIER_H_PRESENT)
#include <asm/barrier.h>
#endif
#if defined(NV_LINUX_ATOMIC_H_PRESENT)
#include <linux/atomic.h>
#endif
#include <asm/current.h>
#include <linux/random.h> /* get_random_bytes() */
#include <linux/radix-tree.h> /* Linux kernel radix tree */
#include <linux/file.h> /* fget() */
#include <linux/percpu.h>
#if defined(NV_LINUX_PRINTK_H_PRESENT)
#include <linux/printk.h>
#endif
#if defined(NV_LINUX_RATELIMIT_H_PRESENT)
#include <linux/ratelimit.h>
#endif
#if defined(NV_PNV_NPU2_INIT_CONTEXT_PRESENT)
#include <asm/powernv.h>
#endif
#if defined(NV_LINUX_SCHED_TASK_STACK_H_PRESENT)
#include <linux/sched/task_stack.h>
#endif
#include <linux/cpumask.h>
#include <linux/topology.h>
#include "nv-kthread-q.h"
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1 && defined(NV_CPUMASK_OF_NODE_PRESENT)
#define UVM_THREAD_AFFINITY_SUPPORTED() 1
#else
#define UVM_THREAD_AFFINITY_SUPPORTED() 0
#endif
// The ARM arch lacks support for cpumask_of_node() until kernel 4.7. It was
// added via commit1a2db300348b ("arm64, numa: Add NUMA support for arm64
// platforms.") Callers should either check UVM_THREAD_AFFINITY_SUPPORTED()
// prior to calling this function of be prepared to deal with a NULL CPU
// mask.
static inline const struct cpumask *uvm_cpumask_of_node(int node)
{
#ifdef NV_CPUMASK_OF_NODE_PRESENT
return cpumask_of_node(node);
#else
return NULL;
#endif
}
#if defined(CONFIG_HMM_MIRROR) && defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MAKE_DEVICE_EXCLUSIVE_RANGE_PRESENT)
#define UVM_IS_CONFIG_HMM() 1
#else
#define UVM_IS_CONFIG_HMM() 0
#endif
// Various issues prevent us from using mmu_notifiers in older kernels. These
// include:
// - ->release being called under RCU instead of SRCU: fixed by commit
// 21a92735f660eaecf69a6f2e777f18463760ec32, v3.7 (2012-10-08).
// - Race conditions between mmu_notifier_release and mmu_notifier_unregister:
// fixed by commit d34883d4e35c0a994e91dd847a82b4c9e0c31d83, v3.10
// (2013-05-24).
//
// Unfortunately these issues aren't conftest-able, so instead we look for the
// presence of the invalidate_range callback in mmu_notifier_ops. This was added
// after all of the above issues were resolved, so we assume the fixes are
// present if we see the callback.
//
// The callback was added in commit 0f0a327fa12cd55de5e7f8c05a70ac3d047f405e,
// v3.19 (2014-11-13).
#if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE)
#define UVM_CAN_USE_MMU_NOTIFIERS() 1
#else
#define UVM_CAN_USE_MMU_NOTIFIERS() 0
#endif
// See bug 1707453 for further details about setting the minimum kernel version.
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
# error This driver does not support kernels older than 2.6.32!
#endif
#if !defined(VM_RESERVED)
#define VM_RESERVED 0x00000000
#endif
#if !defined(VM_DONTEXPAND)
#define VM_DONTEXPAND 0x00000000
#endif
#if !defined(VM_DONTDUMP)
#define VM_DONTDUMP 0x00000000
#endif
#if !defined(VM_MIXEDMAP)
#define VM_MIXEDMAP 0x00000000
#endif
//
// printk.h already defined pr_fmt, so we have to redefine it so the pr_*
// routines pick up our version
//
#undef pr_fmt
#define NVIDIA_UVM_PRETTY_PRINTING_PREFIX "nvidia-uvm: "
#define pr_fmt(fmt) NVIDIA_UVM_PRETTY_PRINTING_PREFIX fmt
// Dummy printing function that maintains syntax and format specifier checking
// but doesn't print anything and doesn't evaluate the print parameters. This is
// roughly equivalent to the kernel's no_printk function. We use this instead
// because:
// 1) no_printk was not available until 2.6.36
// 2) Until 4.5 no_printk was implemented as a static function, meaning its
// parameters were always evaluated
#define UVM_NO_PRINT(fmt, ...) \
do { \
if (0) \
printk(fmt, ##__VA_ARGS__); \
} while (0)
// printk_ratelimited was added in 2.6.33 via commit
// 8a64f336bc1d4aa203b138d29d5a9c414a9fbb47. If not available, we prefer not
// printing anything since it's supposed to be rate-limited.
#if !defined(printk_ratelimited)
#define printk_ratelimited UVM_NO_PRINT
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
// Just too much compilation trouble with the rate-limiting printk feature
// until about k3.8. Because the non-rate-limited printing will cause
// surprises and problems, just turn it off entirely in this situation.
//
#undef pr_debug_ratelimited
#define pr_debug_ratelimited UVM_NO_PRINT
#endif
#if defined(NVCPU_X86) || defined(NVCPU_X86_64)
#if !defined(pmd_large)
#define pmd_large(_pmd) \
((pmd_val(_pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
#endif
#endif /* defined(NVCPU_X86) || defined(NVCPU_X86_64) */
#if !defined(GFP_DMA32)
/*
* GFP_DMA32 is similar to GFP_DMA, but instructs the Linux zone
* allocator to allocate memory from the first 4GB on platforms
* such as Linux/x86-64; the alternative is to use an IOMMU such
* as the one implemented with the K8 GART, if available.
*/
#define GFP_DMA32 0
#endif
#if !defined(__GFP_NOWARN)
#define __GFP_NOWARN 0
#endif
#if !defined(__GFP_NORETRY)
#define __GFP_NORETRY 0
#endif
#define NV_UVM_GFP_FLAGS (GFP_KERNEL | __GFP_NORETRY)
#if !defined(NV_ADDRESS_SPACE_INIT_ONCE_PRESENT)
void address_space_init_once(struct address_space *mapping);
#endif
// Develop builds define DEBUG but enable optimization
#if defined(DEBUG) && !defined(NVIDIA_UVM_DEVELOP)
// Wrappers for functions not building correctly without optimizations on,
// implemented in uvm_debug_optimized.c. Notably the file is only built for
// debug builds, not develop or release builds.
// Unoptimized builds of atomic_xchg() hit a BUILD_BUG() on arm64 as it relies
// on __xchg being completely inlined:
// /usr/src/linux-3.12.19/arch/arm64/include/asm/cmpxchg.h:67:3: note: in expansion of macro 'BUILD_BUG'
//
// Powerppc hits a similar issue, but ends up with an undefined symbol:
// WARNING: "__xchg_called_with_bad_pointer" [...] undefined!
int nv_atomic_xchg(atomic_t *val, int new);
// Same problem as atomic_xchg() on powerppc:
// WARNING: "__cmpxchg_called_with_bad_pointer" [...] undefined!
int nv_atomic_cmpxchg(atomic_t *val, int old, int new);
// Same problem as atomic_xchg() on powerppc:
// WARNING: "__cmpxchg_called_with_bad_pointer" [...] undefined!
long nv_atomic_long_cmpxchg(atomic_long_t *val, long old, long new);
// This Linux kernel commit:
// 2016-08-30 0d025d271e55f3de21f0aaaf54b42d20404d2b23
// leads to build failures on x86_64, when compiling without optimization. Avoid
// that problem, by providing our own builds of copy_from_user / copy_to_user,
// for debug (non-optimized) UVM builds. Those are accessed via these
// nv_copy_to/from_user wrapper functions.
//
// Bug 1849583 has further details.
unsigned long nv_copy_from_user(void *to, const void __user *from, unsigned long n);
unsigned long nv_copy_to_user(void __user *to, const void *from, unsigned long n);
#else
#define nv_atomic_xchg atomic_xchg
#define nv_atomic_cmpxchg atomic_cmpxchg
#define nv_atomic_long_cmpxchg atomic_long_cmpxchg
#define nv_copy_to_user copy_to_user
#define nv_copy_from_user copy_from_user
#endif
#ifndef NV_ALIGN_DOWN
#define NV_ALIGN_DOWN(v,g) ((v) & ~((g) - 1))
#endif
#if defined(NVCPU_X86)
/* Some old IA32 kernels don't have 64/64 division routines,
* they only support 64/32 division with do_div(). */
static inline uint64_t NV_DIV64(uint64_t dividend, uint64_t divisor, uint64_t *remainder)
{
/* do_div() only accepts a 32-bit divisor */
*remainder = do_div(dividend, (uint32_t)divisor);
/* do_div() modifies the dividend in-place */
return dividend;
}
#else
/* All other 32/64-bit kernels we support (including non-x86 kernels) support
* 64/64 division. */
static inline uint64_t NV_DIV64(uint64_t dividend, uint64_t divisor, uint64_t *remainder)
{
*remainder = dividend % divisor;
return dividend / divisor;
}
#endif
#if defined(CLOCK_MONOTONIC_RAW)
/* Return a nanosecond-precise value */
static inline NvU64 NV_GETTIME(void)
{
struct timespec64 tm;
ktime_get_raw_ts64(&tm);
return (NvU64) timespec64_to_ns(&tm);
}
#else
/* We can only return a microsecond-precise value with the
* available non-GPL symbols. */
static inline NvU64 NV_GETTIME(void)
{
struct timespec64 tm;
ktime_get_real_ts64(&tm);
return (NvU64) timespec64_to_ns(&tm);
}
#endif
#if !defined(ilog2)
static inline int NV_ILOG2_U32(u32 n)
{
return fls(n) - 1;
}
static inline int NV_ILOG2_U64(u64 n)
{
return fls64(n) - 1;
}
#define ilog2(n) (sizeof(n) <= 4 ? NV_ILOG2_U32(n) : NV_ILOG2_U64(n))
#endif
// for_each_bit added in 2.6.24 via commit 3e037454bcfa4b187e8293d2121bd8c0f5a5c31c
// later renamed in 2.6.34 via commit 984b3f5746ed2cde3d184651dabf26980f2b66e5
#if !defined(for_each_set_bit)
#define for_each_set_bit(bit, addr, size) for_each_bit((bit), (addr), (size))
#endif
// for_each_set_bit_cont was added in 3.2 via 1e2ad28f80b4e155678259238f51edebc19e4014
// It was renamed to for_each_set_bit_from in 3.3 via 307b1cd7ecd7f3dc5ce3d3860957f034f0abe4df
#if !defined(for_each_set_bit_from)
#define for_each_set_bit_from(bit, addr, size) \
for ((bit) = find_next_bit((addr), (size), (bit)); \
(bit) < (size); \
(bit) = find_next_bit((addr), (size), (bit) + 1))
#endif
// for_each_clear_bit and for_each_clear_bit_from were added in 3.10 via
// 03f4a8226c2f9c14361f75848d1e93139bab90c4
#if !defined(for_each_clear_bit)
#define for_each_clear_bit(bit, addr, size) \
for ((bit) = find_first_zero_bit((addr), (size)); \
(bit) < (size); \
(bit) = find_next_zero_bit((addr), (size), (bit) + 1))
#endif
#if !defined(for_each_clear_bit_from)
#define for_each_clear_bit_from(bit, addr, size) \
for ((bit) = find_next_zero_bit((addr), (size), (bit)); \
(bit) < (size); \
(bit) = find_next_zero_bit((addr), (size), (bit) + 1))
#endif
// bitmap_clear was added in 2.6.33 via commit c1a2a962a2ad103846e7950b4591471fabecece7
#if !defined(NV_BITMAP_CLEAR_PRESENT)
static inline void bitmap_clear(unsigned long *map, unsigned int start, int len)
{
unsigned int index = start;
for_each_set_bit_from(index, map, start + len)
__clear_bit(index, map);
}
static inline void bitmap_set(unsigned long *map, unsigned int start, int len)
{
unsigned int index = start;
for_each_clear_bit_from(index, map, start + len)
__set_bit(index, map);
}
#endif
// Added in 2.6.24
#ifndef ACCESS_ONCE
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
#endif
// WRITE_ONCE/READ_ONCE have incompatible definitions across versions, which produces warnings.
// Therefore, we define our own macros
#define UVM_WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val))
#define UVM_READ_ONCE(x) ACCESS_ONCE(x)
// smp_mb__before_atomic was added in 3.16, provide a fallback
#ifndef smp_mb__before_atomic
#if NVCPU_IS_X86 || NVCPU_IS_X86_64
// That's what the kernel does for x86
#define smp_mb__before_atomic() barrier()
#else
// That's what the kernel does for at least arm32, arm64 and powerpc as of 4.3
#define smp_mb__before_atomic() smp_mb()
#endif
#endif
// smp_mb__after_atomic was added in 3.16, provide a fallback
#ifndef smp_mb__after_atomic
#if NVCPU_IS_X86 || NVCPU_IS_X86_64
// That's what the kernel does for x86
#define smp_mb__after_atomic() barrier()
#else
// That's what the kernel does for at least arm32, arm64 and powerpc as of 4.3
#define smp_mb__after_atomic() smp_mb()
#endif
#endif
// smp_load_acquire and smp_store_release were added in commit
// 47933ad41a86a4a9b50bed7c9b9bd2ba242aac63 ("arch: Introduce
// smp_load_acquire(), smp_store_release()") in v3.14 (2013-11-06).
#ifndef smp_load_acquire
#define smp_load_acquire(p) \
({ \
typeof(*(p)) __v = UVM_READ_ONCE(*(p)); \
smp_mb(); \
__v; \
})
#endif
#ifndef smp_store_release
#define smp_store_release(p, v) \
do { \
smp_mb(); \
UVM_WRITE_ONCE(*(p), v); \
} while (0)
#endif
// atomic_read_acquire and atomic_set_release were added in commit
// 654672d4ba1a6001c365833be895f9477c4d5eab ("locking/atomics:
// Add _{acquire|release|relaxed}() variants of some atomic operations") in v4.3
// (2015-08-06).
#ifndef atomic_read_acquire
#define atomic_read_acquire(p) smp_load_acquire(&(p)->counter)
#endif
#ifndef atomic_set_release
#define atomic_set_release(p, v) smp_store_release(&(p)->counter, v)
#endif
// Added in 3.11
#ifndef PAGE_ALIGNED
#define PAGE_ALIGNED(addr) (((addr) & (PAGE_SIZE - 1)) == 0)
#endif
// Added in 2.6.37 via commit e1ca7788dec6773b1a2bce51b7141948f2b8bccf
#if !defined(NV_VZALLOC_PRESENT)
static inline void *vzalloc(unsigned long size)
{
void *p = vmalloc(size);
if (p)
memset(p, 0, size);
return p;
}
#endif
// Changed in 3.17 via commit 743162013d40ca612b4cb53d3a200dff2d9ab26e
#if (NV_WAIT_ON_BIT_LOCK_ARGUMENT_COUNT == 3)
#define UVM_WAIT_ON_BIT_LOCK(word, bit, mode) \
wait_on_bit_lock(word, bit, mode)
#elif (NV_WAIT_ON_BIT_LOCK_ARGUMENT_COUNT == 4)
static __sched int uvm_bit_wait(void *word)
{
if (signal_pending_state(current->state, current))
return 1;
schedule();
return 0;
}
#define UVM_WAIT_ON_BIT_LOCK(word, bit, mode) \
wait_on_bit_lock(word, bit, uvm_bit_wait, mode)
#else
#error "Unknown number of arguments"
#endif
static void uvm_init_radix_tree_preloadable(struct radix_tree_root *tree)
{
// GFP_NOWAIT, or some combination of flags that avoids setting
// __GFP_DIRECT_RECLAIM (__GFP_WAIT prior to commit
// d0164adc89f6bb374d304ffcc375c6d2652fe67d from Nov 2015), is required for
// using radix_tree_preload() for the tree.
INIT_RADIX_TREE(tree, GFP_NOWAIT);
}
#if !defined(NV_RADIX_TREE_EMPTY_PRESENT)
static bool radix_tree_empty(struct radix_tree_root *tree)
{
void *dummy;
return radix_tree_gang_lookup(tree, &dummy, 0, 1) == 0;
}
#endif
// The radix tree root parameter was added to radix_tree_replace_slot in 4.10.
// That same change moved radix_tree_replace_slot from a header-only
// implementation to a .c file, but the symbol wasn't exported until later so
// we cannot use the function on 4.10. UVM uses this macro to ensure that
// radix_tree_replace_slot is not called when using that kernel.
#ifndef NV_RADIX_TREE_REPLACE_SLOT_PRESENT
#define NV_RADIX_TREE_REPLACE_SLOT(...) \
UVM_ASSERT_MSG(false, "radix_tree_replace_slot cannot be used in 4.10\n");
#else
#if (NV_RADIX_TREE_REPLACE_SLOT_ARGUMENT_COUNT == 2)
#define NV_RADIX_TREE_REPLACE_SLOT(root, slot, entry) \
radix_tree_replace_slot((slot), (entry))
#elif (NV_RADIX_TREE_REPLACE_SLOT_ARGUMENT_COUNT == 3)
#define NV_RADIX_TREE_REPLACE_SLOT(root, slot, entry) \
radix_tree_replace_slot((root), (slot), (entry))
#else
#error "Unknown number of arguments"
#endif
#endif
#if !defined(NV_USLEEP_RANGE_PRESENT)
static void __sched usleep_range(unsigned long min, unsigned long max)
{
unsigned min_msec = min / 1000;
unsigned max_msec = max / 1000;
if (min_msec != 0)
msleep(min_msec);
else if (max_msec != 0)
msleep(max_msec);
else
msleep(1);
}
#endif
typedef struct
{
struct mem_cgroup *new_memcg;
struct mem_cgroup *old_memcg;
} uvm_memcg_context_t;
// cgroup support requires set_active_memcg(). set_active_memcg() is an
// inline function that requires int_active_memcg per-cpu symbol when called
// from interrupt context. int_active_memcg is only exported by commit
// c74d40e8b5e2a on >= 5.14 kernels.
#if NV_IS_EXPORT_SYMBOL_PRESENT_int_active_memcg
#define UVM_CGROUP_ACCOUNTING_SUPPORTED() 1
#define NV_UVM_GFP_FLAGS_ACCOUNT (NV_UVM_GFP_FLAGS | __GFP_ACCOUNT)
// Begin a Cgroup accounting context.
// All sysmem page allocations done with NV_UVM_ACCOUNT_GFP_FLAGS will be
// charged to the mm's memory control group.
//
// If mm is NULL, the accounting context will not be switched. Please, note
// that in this case, any allocations which include NV_UVM_ACCOUNT_GFP_FLAGS
// will be charged to the currently active context.
//
// Locking: uvm_memcg_context_t does not maintain its own locking. Callers must
// ensure that concurrent calls do not operate on the same context.
void uvm_memcg_context_start(uvm_memcg_context_t *context, struct mm_struct *mm);
// End the Cgroup accounting context started with uvm_mem_memcg_context_start().
// After this call, the previously active memory control group will be restored.
//
// Locking: Callers must ensure that concurrent calls do not operate on the same
// context.
void uvm_memcg_context_end(uvm_memcg_context_t *context);
#else // !NV_IS_EXPORT_SYMBOL_PRESENT_int_active_memcg
#define UVM_CGROUP_ACCOUNTING_SUPPORTED() 0
#define NV_UVM_GFP_FLAGS_ACCOUNT (NV_UVM_GFP_FLAGS)
static inline void uvm_memcg_context_start(uvm_memcg_context_t *context, struct mm_struct *mm)
{
return;
}
static inline void uvm_memcg_context_end(uvm_memcg_context_t *context)
{
return;
}
#endif // NV_IS_EXPORT_SYMBOL_PRESENT_int_active_memcg
// Commit 1dff8083a024650c75a9c961c38082473ceae8cf (v4.7).
//
// Archs with CONFIG_MMU should have their own page.h, and can't include
// asm-generic/page.h. However, x86, powerpc, arm64 don't define page_to_virt()
// macro in their version of page.h.
#include <linux/mm.h>
#ifndef page_to_virt
#include <asm/page.h>
#define page_to_virt(x) __va(PFN_PHYS(page_to_pfn(x)))
#endif
#endif // _UVM_LINUX_H

View File

@@ -0,0 +1,42 @@
/*******************************************************************************
Copyright (c) 2013 NVidia Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef _UVM_LINUX_IOCTL_H
#define _UVM_LINUX_IOCTL_H
#include "uvm_ioctl.h"
// This ioctl must be the first operation performed on the UVM file descriptor
// after opening it. Until this ioctl is made, the UVM file descriptor is
// inoperable: all other ioctls will return NV_ERR_ILLEGAL_ACTION and mmap will
// return EBADFD.
#define UVM_INITIALIZE 0x30000001
typedef struct
{
NvU64 flags NV_ALIGN_BYTES(8); // IN
NV_STATUS rmStatus; // OUT
} UVM_INITIALIZE_PARAMS;
#define UVM_DEINITIALIZE 0x30000002
#endif // _UVM_LINUX_IOCTL_H

View File

@@ -0,0 +1,380 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_lock.h"
#include "uvm_thread_context.h"
#include "uvm_kvmalloc.h"
const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
{
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 26);
switch (lock_order) {
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL_PM);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_ISR);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_MMAP_LOCK);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACES_LIST);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACE_SERIALIZE_WRITERS);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACE_READ_ACQUIRE_WRITE_RELEASE_LOCK);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACE);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_EXT_RANGE_TREE);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GPU_SEMAPHORE_POOL);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_RM_API);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_RM_GPUS);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_BLOCK);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHUNK_MAPPING);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PAGE_TREE);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PUSH);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM_PMA);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM_ROOT_CHUNK);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHANNEL);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACE_EVENTS);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACE_TOOLS);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_SEMA_POOL_TRACKER);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_LEAF);
UVM_ENUM_STRING_DEFAULT();
}
}
bool __uvm_record_lock(void *lock, uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
{
bool correct = true;
uvm_lock_order_t conflicting_order;
uvm_thread_context_lock_t *uvm_context = uvm_thread_context_lock_get();
uvm_lock_flags_t mode_flags = (flags & UVM_LOCK_FLAGS_MODE_MASK);
bool trylock = (flags & UVM_LOCK_FLAGS_TRYLOCK);
UVM_ASSERT(mode_flags == UVM_LOCK_FLAGS_MODE_EXCLUSIVE || mode_flags == UVM_LOCK_FLAGS_MODE_SHARED);
if (!uvm_context) {
UVM_ERR_PRINT("Failed to acquire the thread context when recording lock of %s\n",
uvm_lock_order_to_string(lock_order));
return false;
}
if (uvm_context->skip_lock_tracking > 0)
return true;
if (lock_order == UVM_LOCK_ORDER_INVALID) {
UVM_ERR_PRINT("Acquiring a lock (0x%llx) with an invalid lock order\n", (NvU64)lock);
return false;
}
// TODO: Bug 1799173: Hack in special rules for the RM locks so we don't add
// any new invalid uses while we figure out a better way to handle
// these dependencies.
if (lock_order == UVM_LOCK_ORDER_RM_GPUS) {
if (test_bit(UVM_LOCK_ORDER_MMAP_LOCK, uvm_context->acquired_lock_orders)) {
UVM_ERR_PRINT("Acquiring RM GPU lock with mmap_lock held\n");
correct = false;
}
if (test_bit(UVM_LOCK_ORDER_VA_SPACE, uvm_context->exclusive_acquired_lock_orders)) {
UVM_ERR_PRINT("Acquiring RM GPU lock with VA space lock held in write mode\n");
correct = false;
}
else if (test_bit(UVM_LOCK_ORDER_VA_SPACE, uvm_context->acquired_lock_orders) &&
!test_bit(UVM_LOCK_ORDER_VA_SPACE_SERIALIZE_WRITERS, uvm_context->acquired_lock_orders)) {
UVM_ERR_PRINT("Acquiring RM GPU lock with the VA space lock held in read mode, but without the VA space writer serialization lock held\n");
correct = false;
}
}
conflicting_order = find_next_bit(uvm_context->acquired_lock_orders, UVM_LOCK_ORDER_COUNT, lock_order);
if (conflicting_order != UVM_LOCK_ORDER_COUNT) {
if (trylock) {
// If the lock attempt is a trylock, i.e. non-blocking, then
// out-of-order lock acquisition is acceptable. Record it
// to enable __uvm_record_unlock() to skip enforcing in-order
// lock release for this lock order.
__set_bit(lock_order, uvm_context->out_of_order_acquired_lock_orders);
} else {
correct = false;
// Equivalent order is not necessarily incorrect. However, it is not yet supported,
// and is therefore treated as an error case.
UVM_ERR_PRINT("Already acquired equivalent or deeper lock %s when trying to acquire %s\n",
uvm_lock_order_to_string(conflicting_order),
uvm_lock_order_to_string(lock_order));
}
}
__set_bit(lock_order, uvm_context->acquired_lock_orders);
if (mode_flags == UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
__set_bit(lock_order, uvm_context->exclusive_acquired_lock_orders);
uvm_context->acquired[lock_order] = lock;
return correct;
}
bool __uvm_record_unlock(void *lock, uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
{
bool correct = true;
uvm_thread_context_lock_t *uvm_context = uvm_thread_context_lock_get();
uvm_lock_flags_t mode_flags = (flags & UVM_LOCK_FLAGS_MODE_MASK);
bool exclusive = (mode_flags == UVM_LOCK_FLAGS_MODE_EXCLUSIVE);
bool out_of_order = (flags & UVM_LOCK_FLAGS_OUT_OF_ORDER);
UVM_ASSERT(mode_flags == UVM_LOCK_FLAGS_MODE_EXCLUSIVE || mode_flags == UVM_LOCK_FLAGS_MODE_SHARED);
if (!uvm_context) {
UVM_ERR_PRINT("Failed to acquire the thread context when recording unlock of %s\n",
uvm_lock_order_to_string(lock_order));
return false;
}
if (uvm_context->skip_lock_tracking > 0)
return true;
if (lock_order == UVM_LOCK_ORDER_INVALID) {
UVM_ERR_PRINT("Releasing a lock (0x%llx) with an invalid lock order\n", (NvU64)lock);
return false;
}
// Releasing a lock out of order is not incorrect, but often points to
// issues. Consider it an error by default, unless the lock was
// legally acquired out-of-order via trylock, in which case out-of-order
// lock release is expected. But also give an option to opt out of
// enforcing in-order lock release, if needed.
if (!__test_and_clear_bit(lock_order, uvm_context->out_of_order_acquired_lock_orders) && !out_of_order) {
uvm_lock_order_t deeper_order = find_next_bit(uvm_context->acquired_lock_orders,
UVM_LOCK_ORDER_COUNT, lock_order + 1);
if (deeper_order != UVM_LOCK_ORDER_COUNT) {
correct = false;
UVM_ERR_PRINT("Releasing lock %s while still holding %s\n",
uvm_lock_order_to_string(lock_order),
uvm_lock_order_to_string(deeper_order));
}
}
if (!__test_and_clear_bit(lock_order, uvm_context->acquired_lock_orders)) {
correct = false;
UVM_ERR_PRINT("Releasing lock %s that's not held\n", uvm_lock_order_to_string(lock_order));
}
else if (uvm_context->acquired[lock_order] != lock) {
correct = false;
UVM_ERR_PRINT("Releasing a different instance of lock %s than held, held 0x%llx releasing 0x%llx\n",
uvm_lock_order_to_string(lock_order),
(NvU64)uvm_context->acquired[lock_order],
(NvU64)lock);
}
else if (!!__test_and_clear_bit(lock_order, uvm_context->exclusive_acquired_lock_orders) != exclusive) {
correct = false;
UVM_ERR_PRINT("Releasing lock %s as %s while it was acquired as %s\n",
uvm_lock_order_to_string(lock_order),
exclusive ? "exclusive" : "shared", exclusive ? "shared" : "exclusive");
}
uvm_context->acquired[lock_order] = NULL;
return correct;
}
bool __uvm_record_downgrade(void *lock, uvm_lock_order_t lock_order)
{
uvm_thread_context_lock_t *uvm_context = uvm_thread_context_lock_get();
if (!uvm_context) {
UVM_ERR_PRINT("Failed to acquire the thread context when recording downgrade of %s\n",
uvm_lock_order_to_string(lock_order));
return false;
}
if (uvm_context->skip_lock_tracking > 0)
return true;
if (!__uvm_check_locked(lock, lock_order, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)) {
UVM_ERR_PRINT("Lock %s is not held in exclusive mode: downgrading failed\n",
uvm_lock_order_to_string(lock_order));
return false;
}
clear_bit(lock_order, uvm_context->exclusive_acquired_lock_orders);
return true;
}
bool __uvm_check_locked(void *lock, uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
{
uvm_thread_context_lock_t *uvm_context = uvm_thread_context_lock_get();
uvm_lock_flags_t mode_flags = (flags & UVM_LOCK_FLAGS_MODE_MASK);
bool exclusive = (mode_flags == UVM_LOCK_FLAGS_MODE_EXCLUSIVE);
if (!uvm_context) {
UVM_ERR_PRINT("Failed to acquire the thread context when checking that lock %s is locked\n",
uvm_lock_order_to_string(lock_order));
return false;
}
if (uvm_context->skip_lock_tracking > 0)
return true;
if (!test_bit(lock_order, uvm_context->acquired_lock_orders)) {
UVM_ERR_PRINT("No lock with order %s acquired at all\n", uvm_lock_order_to_string(lock_order));
return false;
}
if (uvm_context->acquired[lock_order] != lock) {
UVM_ERR_PRINT("Different instance of lock %s acquired, 0x%llx != 0x%llx\n",
uvm_lock_order_to_string(lock_order),
(NvU64)lock,
(NvU64)uvm_context->acquired[lock_order]);
return false;
}
if (mode_flags != UVM_LOCK_FLAGS_MODE_ANY &&
!!test_bit(lock_order, uvm_context->exclusive_acquired_lock_orders) != exclusive) {
UVM_ERR_PRINT("Lock %s acquired in %s mode instead of %s mode\n",
uvm_lock_order_to_string(lock_order),
exclusive ? "shared" : "exclusive", exclusive ? "exclusive" : "shared");
return false;
}
return true;
}
bool __uvm_locking_initialized(void)
{
return uvm_thread_context_global_initialized();
}
bool __uvm_check_lockable_order(uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
{
uvm_lock_order_t conflicting_order;
uvm_thread_context_lock_t *uvm_context = uvm_thread_context_lock_get();
bool trylock = (flags & UVM_LOCK_FLAGS_TRYLOCK);
if (!uvm_context)
return true;
if (uvm_context->skip_lock_tracking > 0)
return true;
if (lock_order == UVM_LOCK_ORDER_INVALID) {
UVM_ERR_PRINT("Checking for an invalid lock order\n");
return false;
}
if (!trylock) {
conflicting_order = find_next_bit(uvm_context->acquired_lock_orders, UVM_LOCK_ORDER_COUNT, lock_order);
if (conflicting_order != UVM_LOCK_ORDER_COUNT) {
UVM_ERR_PRINT("Acquired equivalent or deeper lock %s when checking that %s is lockable\n",
uvm_lock_order_to_string(conflicting_order),
uvm_lock_order_to_string(lock_order));
return false;
}
}
return true;
}
bool __uvm_check_unlocked_order(uvm_lock_order_t lock_order)
{
uvm_thread_context_lock_t *uvm_context = uvm_thread_context_lock_get();
if (!uvm_context)
return true;
if (uvm_context->skip_lock_tracking > 0)
return true;
if (lock_order == UVM_LOCK_ORDER_INVALID) {
UVM_ERR_PRINT("Checking for an invalid lock order\n");
return false;
}
if (test_bit(lock_order, uvm_context->acquired_lock_orders)) {
UVM_ERR_PRINT("Lock order %s acquired\n", uvm_lock_order_to_string(lock_order));
return false;
}
return true;
}
bool __uvm_check_all_unlocked(uvm_thread_context_lock_t *uvm_context)
{
uvm_lock_order_t lock_order;
NvU32 still_locked_count;
if (!uvm_context)
return true;
still_locked_count = bitmap_weight(uvm_context->acquired_lock_orders, UVM_LOCK_ORDER_COUNT);
if (still_locked_count == 0)
return true;
UVM_ERR_PRINT("Still %u acquired lock(s):\n", still_locked_count);
for_each_set_bit(lock_order, uvm_context->acquired_lock_orders, UVM_LOCK_ORDER_COUNT) {
UVM_ERR_PRINT(" Lock %s, instance 0x%llx\n",
uvm_lock_order_to_string(lock_order),
(NvU64)uvm_context->acquired[lock_order]);
}
return false;
}
bool __uvm_thread_check_all_unlocked()
{
return __uvm_check_all_unlocked(uvm_thread_context_lock_get());
}
NV_STATUS uvm_bit_locks_init(uvm_bit_locks_t *bit_locks, size_t count, uvm_lock_order_t lock_order)
{
// TODO: Bug 1772140: Notably bit locks currently do not work on memory
// allocated through vmalloc() (including big allocations created with
// uvm_kvmalloc()). The problem is the bit_waitqueue() helper used by the
// kernel internally that uses virt_to_page().
// To prevent us from using kmalloc() for a huge allocation, warn if the
// allocation size gets bigger than what we are comfortable with for
// kmalloc() in uvm_kvmalloc().
size_t size = sizeof(unsigned long) * BITS_TO_LONGS(count);
WARN_ON_ONCE(size > UVM_KMALLOC_THRESHOLD);
bit_locks->bits = kzalloc(size, NV_UVM_GFP_FLAGS);
if (!bit_locks->bits)
return NV_ERR_NO_MEMORY;
#if UVM_IS_DEBUG()
uvm_locking_assert_initialized();
bit_locks->lock_order = lock_order;
#endif
return NV_OK;
}
void uvm_bit_locks_deinit(uvm_bit_locks_t *bit_locks)
{
kfree(bit_locks->bits);
memset(bit_locks, 0, sizeof(*bit_locks));
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,460 @@
/*******************************************************************************
Copyright (c) 2015 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_test.h"
#include "uvm_lock.h"
#include "uvm_global.h"
#include "uvm_thread_context.h"
#define UVM_LOCK_ORDER_FIRST (UVM_LOCK_ORDER_INVALID + 1)
#define UVM_LOCK_ORDER_SECOND (UVM_LOCK_ORDER_INVALID + 2)
static bool fake_lock(uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
{
// Just use the lock_order as the void * handle for the lock
return __uvm_record_lock((void*)(long)lock_order, lock_order, flags);
}
static bool fake_unlock_common(uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
{
// Just use the lock_order as the void * handle for the lock
return __uvm_record_unlock((void*)(long)lock_order, lock_order, flags);
}
static bool fake_unlock(uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
{
return fake_unlock_common(lock_order, flags);
}
static bool fake_unlock_out_of_order(uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
{
return fake_unlock_common(lock_order, flags | UVM_LOCK_FLAGS_OUT_OF_ORDER);
}
static bool fake_downgrade(uvm_lock_order_t lock_order)
{
// Just use the lock_order as the void * handle for the lock
return __uvm_record_downgrade((void*)(long)lock_order, lock_order);
}
static bool fake_check_locked(uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
{
return __uvm_check_locked((void*)(long)lock_order, lock_order, flags);
}
// TODO: Bug 1799173: The lock asserts verify that the RM GPU lock isn't taken
// with the VA space lock in exclusive mode, and that the RM GPU lock
// isn't taken with mmap_lock held in any mode. Hack around this in the
// test to enable the checks until we figure out something better.
static bool skip_lock(uvm_lock_order_t lock_order, uvm_lock_flags_t flags)
{
uvm_lock_flags_t mode_flags = (flags & UVM_LOCK_FLAGS_MODE_MASK);
if (lock_order == UVM_LOCK_ORDER_RM_GPUS)
return mode_flags == UVM_LOCK_FLAGS_MODE_EXCLUSIVE;
return lock_order == UVM_LOCK_ORDER_MMAP_LOCK;
}
static NV_STATUS test_all_locks_from(uvm_lock_order_t from_lock_order)
{
NvU32 exclusive;
uvm_lock_flags_t flags;
NvU32 out_of_order;
NvU32 lock_order;
TEST_CHECK_RET(from_lock_order != UVM_LOCK_ORDER_INVALID);
for (out_of_order = 0; out_of_order < 2; ++out_of_order) {
for (exclusive = 0; exclusive < 2; ++exclusive) {
flags = exclusive ? UVM_LOCK_FLAGS_MODE_EXCLUSIVE : UVM_LOCK_FLAGS_MODE_SHARED;
if (out_of_order)
flags |= UVM_LOCK_FLAGS_OUT_OF_ORDER;
for (lock_order = from_lock_order; lock_order < UVM_LOCK_ORDER_COUNT; ++lock_order) {
TEST_CHECK_RET(__uvm_check_unlocked_order(lock_order));
TEST_CHECK_RET(__uvm_check_lockable_order(lock_order, flags));
}
for (lock_order = from_lock_order; lock_order < UVM_LOCK_ORDER_COUNT; ++lock_order) {
if (skip_lock(lock_order, flags))
continue;
TEST_CHECK_RET(fake_lock(lock_order, flags));
}
if (!skip_lock(from_lock_order, flags)) {
TEST_CHECK_RET(!__uvm_check_unlocked_order(from_lock_order));
TEST_CHECK_RET(!__uvm_check_lockable_order(from_lock_order, flags));
}
for (lock_order = from_lock_order; lock_order < UVM_LOCK_ORDER_COUNT; ++lock_order) {
if (skip_lock(lock_order, flags))
continue;
TEST_CHECK_RET(fake_check_locked(lock_order, flags));
}
for (lock_order = from_lock_order; lock_order < UVM_LOCK_ORDER_COUNT; ++lock_order) {
if (skip_lock(lock_order, flags))
continue;
TEST_CHECK_RET(fake_check_locked(lock_order, UVM_LOCK_FLAGS_MODE_ANY));
}
if (out_of_order == 0) {
for (lock_order = UVM_LOCK_ORDER_COUNT - 1; lock_order != from_lock_order - 1; --lock_order) {
if (skip_lock(lock_order, flags))
continue;
TEST_CHECK_RET(fake_unlock(lock_order, flags));
}
}
else {
for (lock_order = from_lock_order; lock_order < UVM_LOCK_ORDER_COUNT; ++lock_order) {
if (skip_lock(lock_order, flags))
continue;
TEST_CHECK_RET(fake_unlock_out_of_order(lock_order, flags));
}
}
for (lock_order = from_lock_order; lock_order < UVM_LOCK_ORDER_COUNT; ++lock_order) {
if (skip_lock(lock_order, flags))
continue;
TEST_CHECK_RET(__uvm_check_unlocked_order(lock_order));
TEST_CHECK_RET(__uvm_check_lockable_order(lock_order, flags));
}
}
}
return NV_OK;
}
static NV_STATUS test_all_locks(void)
{
TEST_CHECK_RET(test_all_locks_from(UVM_LOCK_ORDER_FIRST) == NV_OK);
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_locking_first_as_shared_then_test_higher_order_locks(void)
{
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_SHARED));
TEST_CHECK_RET(test_all_locks_from(UVM_LOCK_ORDER_FIRST + 1) == NV_OK);
TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_SHARED));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_locking_second_as_exclusive_then_test_higher_order_locks(void)
{
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(test_all_locks_from(UVM_LOCK_ORDER_SECOND + 1) == NV_OK);
TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_unlocking_without_locking(void)
{
// Unlocking a lock w/o locking any lock at all
TEST_CHECK_RET(!fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_unlocking_different_lock_order_than_locked(void)
{
// Unlocking a different lock than locked
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(!fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(!__uvm_thread_check_all_unlocked());
TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_unlocking_different_lock_instance_than_locked(void)
{
// Unlocking a different instance of a lock than locked
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(!__uvm_record_unlock(NULL, UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_unlocking_with_different_mode_than_locked(void)
{
// Unlocking with different mode
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(!fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_SHARED));
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_SHARED));
TEST_CHECK_RET(!fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_unlocking_in_different_order_than_locked(void)
{
// Unlocking in different order than locked
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(fake_unlock_out_of_order(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
// Unlocking in different order than locked (not necessarily incorrect, but
// commonly pointing to issues)
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(!fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_locking_out_of_order(void)
{
// Locking in wrong order
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(!fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_locking_same_order_twice(void)
{
// Locking the same order twice (lock tracking doesn't support this case although
// it's not necessarily incorrect)
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(!fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(!fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_checking_locked_when_no_locks_held(void)
{
// Nothing locked
TEST_CHECK_RET(!fake_check_locked(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_SHARED));
TEST_CHECK_RET(!fake_check_locked(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(!fake_check_locked(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_ANY));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_checking_exclusive_when_locked_as_shared(void)
{
// Expecting exclusive while locked as shared
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_SHARED));
TEST_CHECK_RET(!fake_check_locked(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_SHARED));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_checking_shared_when_locked_as_exclusive(void)
{
// Expecting shared while locked as exclusive
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(!fake_check_locked(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_SHARED));
TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_checking_locked_when_different_instance_held(void)
{
// Wrong instance of a lock held
TEST_CHECK_RET(__uvm_record_lock(NULL, UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(!fake_check_locked(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(__uvm_record_unlock(NULL, UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_checking_all_unlocked_when_lock_held(void)
{
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_SHARED));
TEST_CHECK_RET(!__uvm_thread_check_all_unlocked());
TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_SHARED));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_downgrading(void)
{
// Lock downgrade
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(fake_check_locked(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(fake_check_locked(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_ANY));
TEST_CHECK_RET(fake_downgrade(UVM_LOCK_ORDER_FIRST));
TEST_CHECK_RET(fake_check_locked(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_SHARED));
TEST_CHECK_RET(fake_check_locked(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_ANY));
// Can't downgrade twice
TEST_CHECK_RET(!fake_downgrade(UVM_LOCK_ORDER_FIRST));
TEST_CHECK_RET(fake_check_locked(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_ANY));
TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_SHARED));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_downgrading_without_locking(void)
{
// Downgrading a lock w/o locking any lock at all
TEST_CHECK_RET(!fake_downgrade(UVM_LOCK_ORDER_FIRST));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_downgrading_when_different_instance_held(void)
{
// Wrong instance of lock to downgrade
TEST_CHECK_RET(__uvm_record_lock(NULL, UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(!fake_downgrade(UVM_LOCK_ORDER_FIRST));
TEST_CHECK_RET(__uvm_record_unlock(NULL, UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_downgrading_when_locked_as_shared(void)
{
// Downgrading a lock that was acquired as shared
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_SHARED));
TEST_CHECK_RET(!fake_downgrade(UVM_LOCK_ORDER_FIRST));
TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_SHARED));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS test_try_locking_out_of_order(void)
{
// Try-locking in wrong order
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_TRYLOCK));
TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_TRYLOCK));
TEST_CHECK_RET(fake_lock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(!fake_unlock(UVM_LOCK_ORDER_FIRST, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(fake_unlock(UVM_LOCK_ORDER_SECOND, UVM_LOCK_FLAGS_MODE_EXCLUSIVE));
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
return NV_OK;
}
static NV_STATUS run_all_lock_tests(void)
{
// The test needs all locks to be released initially
TEST_CHECK_RET(__uvm_thread_check_all_unlocked());
TEST_CHECK_RET(test_all_locks() == NV_OK);
TEST_CHECK_RET(test_locking_first_as_shared_then_test_higher_order_locks() == NV_OK);
TEST_CHECK_RET(test_locking_second_as_exclusive_then_test_higher_order_locks() == NV_OK);
TEST_CHECK_RET(test_unlocking_without_locking() == NV_OK);
TEST_CHECK_RET(test_unlocking_different_lock_order_than_locked() == NV_OK);
TEST_CHECK_RET(test_unlocking_different_lock_instance_than_locked() == NV_OK);
TEST_CHECK_RET(test_unlocking_with_different_mode_than_locked() == NV_OK);
TEST_CHECK_RET(test_unlocking_in_different_order_than_locked() == NV_OK);
TEST_CHECK_RET(test_locking_out_of_order() == NV_OK);
TEST_CHECK_RET(test_locking_same_order_twice() == NV_OK);
TEST_CHECK_RET(test_checking_locked_when_no_locks_held() == NV_OK);
TEST_CHECK_RET(test_checking_exclusive_when_locked_as_shared() == NV_OK);
TEST_CHECK_RET(test_checking_shared_when_locked_as_exclusive() == NV_OK);
TEST_CHECK_RET(test_checking_locked_when_different_instance_held() == NV_OK);
TEST_CHECK_RET(test_checking_all_unlocked_when_lock_held() == NV_OK);
TEST_CHECK_RET(test_downgrading() == NV_OK);
TEST_CHECK_RET(test_downgrading_without_locking() == NV_OK);
TEST_CHECK_RET(test_downgrading_when_different_instance_held() == NV_OK);
TEST_CHECK_RET(test_downgrading_when_locked_as_shared() == NV_OK);
TEST_CHECK_RET(test_try_locking_out_of_order() == NV_OK);
return NV_OK;
}
NV_STATUS uvm_test_lock_sanity(UVM_TEST_LOCK_SANITY_PARAMS *params, struct file *filp)
{
NV_STATUS status;
uvm_thread_context_wrapper_t thread_context_wrapper_backup;
// The global PM lock is acquired by the top-level UVM ioctl() entry point
// and still held here, which confuses the (pre-existing) test logic that
// assumes everything is unlocked at the beginning. Clearing the thread
// context data resolves the issue, but the original state needs to be saved
// and restored before exiting the test to avoid problems in the top-level
// code.
uvm_thread_context_save(&thread_context_wrapper_backup.context);
status = run_all_lock_tests();
uvm_thread_context_restore(&thread_context_wrapper_backup.context);
return status;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,151 @@
/*******************************************************************************
Copyright (c) 2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_MAP_EXTERNAL_H__
#define __UVM_MAP_EXTERNAL_H__
#include "uvm_forward_decl.h"
#include "uvm_va_range.h"
#include "uvm_tracker.h"
#include "nv_uvm_types.h"
#include "uvm_types.h"
typedef struct
{
NvU64 map_offset;
UvmGpuMappingType mapping_type;
UvmGpuCachingType caching_type;
UvmGpuFormatType format_type;
UvmGpuFormatElementBits element_bits;
UvmGpuCompressionType compression_type;
} uvm_map_rm_params_t;
static uvm_ext_gpu_range_tree_t *uvm_ext_gpu_range_tree(uvm_va_range_t *va_range, uvm_gpu_t *gpu)
{
UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL);
return &va_range->external.gpu_ranges[uvm_id_gpu_index(gpu->id)];
}
// Returns the first external map (if any) in the gpu's range tree.
// va_range should be of type UVM_VA_RANGE_TYPE_EXTERNAL.
// The caller must hold the range tree lock.
static uvm_ext_gpu_map_t *uvm_ext_gpu_map_iter_first(uvm_va_range_t *va_range, uvm_gpu_t *gpu, NvU64 start, NvU64 end)
{
uvm_ext_gpu_range_tree_t *range_tree;
uvm_range_tree_node_t *node;
UVM_ASSERT(start >= va_range->node.start);
UVM_ASSERT(end <= va_range->node.end);
range_tree = uvm_ext_gpu_range_tree(va_range, gpu);
node = uvm_range_tree_iter_first(&range_tree->tree, start, end);
return uvm_ext_gpu_map_container(node);
}
// Returns the external map following the provided map (if any) in address order from
// the gpu's range tree. va_range should be of type UVM_VA_RANGE_TYPE_EXTERNAL.
// The caller must hold the range tree lock.
static uvm_ext_gpu_map_t *uvm_ext_gpu_map_iter_next(uvm_va_range_t *va_range, uvm_ext_gpu_map_t *ext_gpu_map, NvU64 end)
{
uvm_ext_gpu_range_tree_t *range_tree;
uvm_range_tree_node_t *node;
if (!ext_gpu_map)
return NULL;
UVM_ASSERT(end <= va_range->node.end);
range_tree = uvm_ext_gpu_range_tree(va_range, ext_gpu_map->gpu);
node = uvm_range_tree_iter_next(&range_tree->tree, &ext_gpu_map->node, end);
return uvm_ext_gpu_map_container(node);
}
// The four iterators below require that the caller hold the gpu's range tree
// lock.
#define uvm_ext_gpu_map_for_each_in(ext_gpu_map, va_range, gpu, start, end) \
for ((ext_gpu_map) = uvm_ext_gpu_map_iter_first((va_range), (gpu), (start), (end)); \
(ext_gpu_map); \
(ext_gpu_map) = uvm_ext_gpu_map_iter_next((va_range), (ext_gpu_map), (end)))
#define uvm_ext_gpu_map_for_each_in_safe(ext_gpu_map, ext_gpu_map_next, va_range, gpu, start, end) \
for ((ext_gpu_map) = uvm_ext_gpu_map_iter_first((va_range), (gpu), (start), (end)), \
(ext_gpu_map_next) = uvm_ext_gpu_map_iter_next((va_range), (ext_gpu_map), (end)); \
(ext_gpu_map); \
(ext_gpu_map) = (ext_gpu_map_next), \
(ext_gpu_map_next) = uvm_ext_gpu_map_iter_next((va_range), (ext_gpu_map), (end)))
#define uvm_ext_gpu_map_for_each(ext_gpu_map, va_range, gpu) \
uvm_ext_gpu_map_for_each_in(ext_gpu_map, va_range, gpu, (va_range)->node.start, (va_range)->node.end)
#define uvm_ext_gpu_map_for_each_safe(ext_gpu_map, ext_gpu_map_next, va_range, gpu) \
uvm_ext_gpu_map_for_each_in_safe(ext_gpu_map, \
ext_gpu_map_next, \
va_range, \
gpu, \
(va_range)->node.start, \
(va_range)->node.end)
// User-facing APIs (uvm_api_map_external_allocation, uvm_api_free) are declared
// uvm_api.h.
// Queries RM for the PTEs appropriate to the VA range and mem_info, allocates
// page tables for the VA range, and writes the PTEs.
//
// va_range must have type UVM_VA_RANGE_TYPE_EXTERNAL or
// UVM_VA_RANGE_TYPE_CHANNEL. The allocation descriptor given to RM is looked up
// depending on the type of the va_range.
// For va_ranges of type UVM_VA_RANGE_TYPE_CHANNEL, the descriptor is looked up
// from the va_range. In these cases, the ext_gpu_map parameter should be NULL.
// For va_ranges of type UVM_VA_RANGE_TYPE_EXTERNAL, it is looked up from the
// ext_gpu_map parameter.
//
// This does not wait for the PTE writes to complete. The work is added to
// the output tracker.
NV_STATUS uvm_va_range_map_rm_allocation(uvm_va_range_t *va_range,
uvm_gpu_t *mapping_gpu,
const UvmGpuMemoryInfo *mem_info,
const uvm_map_rm_params_t *map_rm_params,
uvm_ext_gpu_map_t *ext_gpu_map,
uvm_tracker_t *out_tracker);
// Removes and frees the external mapping for mapping_gpu from ext_gpu_map
// mapped within va_range. If deferred_free_list is NULL, the RM handle is freed
// immediately by this function. Otherwise the GPU which owns the allocation (if
// any) is retained and the handle is added to the list for later processing by
// uvm_deferred_free_object_list.
//
// If the mapping is a Sparse mapping, the mapping is removed and freed.
// However, since sparse mappings do not have RM handles, nothing is added to
// the deferred_free_list (if not NULL) and the GPU is no retained.
//
// The caller must hold the range tree lock for the mapping gpu and is
// responsible for making sure that mapping gpu is retained across those calls.
void uvm_ext_gpu_map_destroy(uvm_va_range_t *va_range,
uvm_ext_gpu_map_t *ext_gpu_map,
struct list_head *deferred_free_list);
// Deferred free function which frees the RM handle and the object itself.
void uvm_ext_gpu_map_free(uvm_ext_gpu_map_t *ext_gpu_map);
#endif // __UVM_MAP_EXTERNAL_H__

View File

@@ -0,0 +1,72 @@
/*******************************************************************************
Copyright (c) 2016-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_hal.h"
#include "uvm_gpu.h"
#include "uvm_mem.h"
void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
{
parent_gpu->tlb_batch.va_invalidate_supported = false;
// 128 GB should be enough for all current RM allocations and leaves enough
// space for UVM internal mappings.
// A single top level PDE covers 64 or 128 MB on Maxwell so 128 GB is fine to use.
parent_gpu->rm_va_base = 0;
parent_gpu->rm_va_size = 128ull * 1024 * 1024 * 1024;
parent_gpu->uvm_mem_va_base = 768ull * 1024 * 1024 * 1024;
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
// We don't have a compelling use case in UVM-Lite for direct peer
// migrations between GPUs, so don't bother setting them up.
parent_gpu->peer_copy_mode = UVM_GPU_PEER_COPY_MODE_UNSUPPORTED;
parent_gpu->max_channel_va = 1ULL << 40;
parent_gpu->max_host_va = 1ULL << 40;
// Maxwell can only map sysmem with 4K pages
parent_gpu->can_map_sysmem_with_large_pages = false;
// Maxwell cannot place GPFIFO in vidmem
parent_gpu->gpfifo_in_vidmem_supported = false;
parent_gpu->replayable_faults_supported = false;
parent_gpu->non_replayable_faults_supported = false;
parent_gpu->access_counters_supported = false;
parent_gpu->fault_cancel_va_supported = false;
parent_gpu->scoped_atomics_supported = false;
parent_gpu->sparse_mappings_supported = false;
parent_gpu->map_remap_larger_page_promotion = false;
parent_gpu->smc.supported = false;
parent_gpu->plc_supported = false;
}

View File

@@ -0,0 +1,64 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_gpu.h"
#include "uvm_hal.h"
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT_MSG(false, "enable_access_counter_notifications is not supported on GPU: %s.\n", parent_gpu->name);
}
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT_MSG(false, "disable_access_counter_notifications is not supported on GPU: %s.\n", parent_gpu->name);
}
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get)
{
UVM_ASSERT_MSG(false, "clear_access_counter_notifications is not supported on GPU: %s.\n", parent_gpu->name);
}
NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT_MSG(false, "access_counter_buffer_entry_size is not supported on GPU: %s.\n", parent_gpu->name);
return 0;
}
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
{
UVM_ASSERT_MSG(false, "access_counter_buffer_entry_is_valid is not supported on GPU: %s.\n", parent_gpu->name);
return false;
}
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
{
UVM_ASSERT_MSG(false, "access_counter_buffer_entry_clear_valid is not supported on GPU: %s.\n", parent_gpu->name);
}
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry)
{
UVM_ASSERT_MSG(false, "access_counter_buffer_parse_entry is not supported on GPU: %s.\n", parent_gpu->name);
}

View File

@@ -0,0 +1,377 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_hal.h"
#include "uvm_push.h"
#include "clb06f.h"
#include "clb0b5.h"
void uvm_hal_maxwell_ce_init(uvm_push_t *push)
{
// Notably this sends SET_OBJECT with the CE class on subchannel 0 instead
// of the recommended by HW subchannel 4 (subchannel 4 is recommended to
// match CE usage on GRCE). For the UVM driver using subchannel 0 has the
// benefit of also verifying that we ended up on the right PBDMA though as
// SET_OBJECT with CE class on subchannel 0 would fail on GRCE.
NV_PUSH_1U(B06F, SET_OBJECT, uvm_push_get_gpu(push)->parent->rm_info.ceClass);
}
void uvm_hal_maxwell_ce_offset_out(uvm_push_t *push, NvU64 offset_out)
{
NV_PUSH_2U(B0B5, OFFSET_OUT_UPPER, HWVALUE(B0B5, OFFSET_OUT_UPPER, UPPER, NvOffset_HI32(offset_out)),
OFFSET_OUT_LOWER, HWVALUE(B0B5, OFFSET_OUT_LOWER, VALUE, NvOffset_LO32(offset_out)));
}
void uvm_hal_maxwell_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out)
{
NV_PUSH_4U(B0B5, OFFSET_IN_UPPER, HWVALUE(B0B5, OFFSET_IN_UPPER, UPPER, NvOffset_HI32(offset_in)),
OFFSET_IN_LOWER, HWVALUE(B0B5, OFFSET_IN_LOWER, VALUE, NvOffset_LO32(offset_in)),
OFFSET_OUT_UPPER, HWVALUE(B0B5, OFFSET_OUT_UPPER, UPPER, NvOffset_HI32(offset_out)),
OFFSET_OUT_LOWER, HWVALUE(B0B5, OFFSET_OUT_LOWER, VALUE, NvOffset_LO32(offset_out)));
}
// Perform an appropriate membar before a semaphore operation. Returns whether
// the semaphore operation should include a flush.
static bool maxwell_membar_before_semaphore(uvm_push_t *push)
{
uvm_gpu_t *gpu;
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE)) {
// No MEMBAR requested, don't use a flush.
return false;
}
if (!uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU)) {
// By default do a MEMBAR SYS and for that we can just use flush on the
// semaphore operation.
return true;
}
// MEMBAR GPU requested, do it on the HOST and skip the CE flush as CE
// doesn't have this capability.
gpu = uvm_push_get_gpu(push);
gpu->parent->host_hal->wait_for_idle(push);
gpu->parent->host_hal->membar_gpu(push);
return false;
}
void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
{
NvU32 flush_value;
bool use_flush;
use_flush = maxwell_membar_before_semaphore(push);
if (use_flush)
flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
else
flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
NV_PUSH_3U(B0B5, SET_SEMAPHORE_A, HWVALUE(B0B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
SET_SEMAPHORE_B, HWVALUE(B0B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
SET_SEMAPHORE_PAYLOAD, payload);
NV_PUSH_1U(B0B5, LAUNCH_DMA, flush_value |
HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
HWCONST(B0B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_ONE_WORD_SEMAPHORE));
}
void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
{
NvU32 flush_value;
bool use_flush;
use_flush = maxwell_membar_before_semaphore(push);
if (use_flush)
flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
else
flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
NV_PUSH_3U(B0B5, SET_SEMAPHORE_A, HWVALUE(B0B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
SET_SEMAPHORE_B, HWVALUE(B0B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
SET_SEMAPHORE_PAYLOAD, payload);
NV_PUSH_1U(B0B5, LAUNCH_DMA, flush_value |
HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
HWCONST(B0B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_ONE_WORD_SEMAPHORE) |
HWCONST(B0B5, LAUNCH_DMA, SEMAPHORE_REDUCTION, INC) |
HWCONST(B0B5, LAUNCH_DMA, SEMAPHORE_REDUCTION_SIGN, UNSIGNED) |
HWCONST(B0B5, LAUNCH_DMA, SEMAPHORE_REDUCTION_ENABLE, TRUE));
}
void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
{
NvU32 flush_value;
bool use_flush;
use_flush = maxwell_membar_before_semaphore(push);
if (use_flush)
flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
else
flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
NV_PUSH_3U(B0B5, SET_SEMAPHORE_A, HWVALUE(B0B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
SET_SEMAPHORE_B, HWVALUE(B0B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
SET_SEMAPHORE_PAYLOAD, 0xdeadbeef);
NV_PUSH_1U(B0B5, LAUNCH_DMA, flush_value |
HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
HWCONST(B0B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_FOUR_WORD_SEMAPHORE));
}
static void maxwell_membar_after_transfer(uvm_push_t *push)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
return;
// Flush on transfers only works when paired with a semaphore release. Use a
// host WFI + MEMBAR.
// http://nvbugs/1709888
gpu->parent->host_hal->wait_for_idle(push);
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
gpu->parent->host_hal->membar_gpu(push);
else
gpu->parent->host_hal->membar_sys(push);
}
static NvU32 ce_aperture(uvm_aperture_t aperture)
{
BUILD_BUG_ON(HWCONST(B0B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB) !=
HWCONST(B0B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB));
BUILD_BUG_ON(HWCONST(B0B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM) !=
HWCONST(B0B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM));
UVM_ASSERT_MSG(aperture == UVM_APERTURE_VID || aperture == UVM_APERTURE_SYS, "aperture 0x%x\n", aperture);
if (aperture == UVM_APERTURE_SYS)
return HWCONST(B0B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM);
else
return HWCONST(B0B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB);
}
// Push SET_{SRC,DST}_PHYS mode if needed and return LAUNCH_DMA_{SRC,DST}_TYPE
// flags
NvU32 uvm_hal_maxwell_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
{
NvU32 launch_dma_src_dst_type = 0;
if (src.is_virtual)
launch_dma_src_dst_type |= HWCONST(B0B5, LAUNCH_DMA, SRC_TYPE, VIRTUAL);
else
launch_dma_src_dst_type |= HWCONST(B0B5, LAUNCH_DMA, SRC_TYPE, PHYSICAL);
if (dst.is_virtual)
launch_dma_src_dst_type |= HWCONST(B0B5, LAUNCH_DMA, DST_TYPE, VIRTUAL);
else
launch_dma_src_dst_type |= HWCONST(B0B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
if (!src.is_virtual && !dst.is_virtual) {
NV_PUSH_2U(B0B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture),
SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
}
else if (!src.is_virtual) {
NV_PUSH_1U(B0B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture));
}
else if (!dst.is_virtual) {
NV_PUSH_1U(B0B5, SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
}
return launch_dma_src_dst_type;
}
// Noop, since DISABLE_PLC doesn't exist in Maxwell.
NvU32 uvm_hal_maxwell_ce_plc_mode(void)
{
return 0;
}
void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size)
{
// If >4GB copies ever become an important use case, this function should
// use multi-line transfers so we don't have to iterate (bug 1766588).
static const size_t max_single_copy_size = 0xFFFFFFFF;
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
NvU32 pipelined_value;
NvU32 launch_dma_src_dst_type;
NvU32 launch_dma_plc_mode;
bool first_operation = true;
UVM_ASSERT_MSG(gpu->parent->ce_hal->memcopy_validate(push, dst, src),
"Memcopy validation failed in channel %s, GPU %s",
push->channel->name,
uvm_gpu_name(gpu));
gpu->parent->ce_hal->memcopy_patch_src(push, &src);
launch_dma_src_dst_type = gpu->parent->ce_hal->phys_mode(push, dst, src);
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
do {
NvU32 copy_this_time = (NvU32)min(size, max_single_copy_size);
if (first_operation && uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED))
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
else
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
gpu->parent->ce_hal->offset_in_out(push, src.address, dst.address);
NV_PUSH_1U(B0B5, LINE_LENGTH_IN, copy_this_time);
NV_PUSH_1U(B0B5, LAUNCH_DMA,
HWCONST(B0B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) |
HWCONST(B0B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
HWCONST(B0B5, LAUNCH_DMA, MULTI_LINE_ENABLE, FALSE) |
HWCONST(B0B5, LAUNCH_DMA, REMAP_ENABLE, FALSE) |
HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE) |
launch_dma_src_dst_type |
launch_dma_plc_mode |
pipelined_value);
dst.address += copy_this_time;
src.address += copy_this_time;
size -= copy_this_time;
first_operation = false;
} while (size > 0);
maxwell_membar_after_transfer(push);
}
void uvm_hal_maxwell_ce_memcopy_v_to_v(uvm_push_t *push, NvU64 dst_va, NvU64 src_va, size_t size)
{
uvm_hal_maxwell_ce_memcopy(push, uvm_gpu_address_virtual(dst_va), uvm_gpu_address_virtual(src_va), size);
}
// Push SET_DST_PHYS mode if needed and return LAUNCH_DMA_DST_TYPE flags
static NvU32 memset_push_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst)
{
if (dst.is_virtual)
return HWCONST(B0B5, LAUNCH_DMA, DST_TYPE, VIRTUAL);
NV_PUSH_1U(B0B5, SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
return HWCONST(B0B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
}
static void memset_common(uvm_push_t *push, uvm_gpu_address_t dst, size_t size, size_t memset_element_size)
{
// If >4GB memsets ever become an important use case, this function should
// use multi-line transfers so we don't have to iterate (bug 1766588).
static const size_t max_single_memset_size = 0xFFFFFFFF;
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
NvU32 pipelined_value;
NvU32 launch_dma_dst_type;
NvU32 launch_dma_plc_mode;
UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_validate(push, dst, memset_element_size),
"Memset validation failed in channel %s, GPU %s",
push->channel->name,
uvm_gpu_name(gpu));
launch_dma_dst_type = memset_push_phys_mode(push, dst);
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED))
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
else
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
do {
NvU32 memset_this_time = (NvU32)min(size, max_single_memset_size);
gpu->parent->ce_hal->offset_out(push, dst.address);
NV_PUSH_1U(B0B5, LINE_LENGTH_IN, memset_this_time);
NV_PUSH_1U(B0B5, LAUNCH_DMA,
HWCONST(B0B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) |
HWCONST(B0B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
HWCONST(B0B5, LAUNCH_DMA, MULTI_LINE_ENABLE, FALSE) |
HWCONST(B0B5, LAUNCH_DMA, REMAP_ENABLE, TRUE) |
HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE) |
launch_dma_dst_type |
launch_dma_plc_mode |
pipelined_value);
dst.address += memset_this_time * memset_element_size;
size -= memset_this_time;
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
} while (size > 0);
maxwell_membar_after_transfer(push);
}
void uvm_hal_maxwell_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size)
{
NV_PUSH_2U(B0B5, SET_REMAP_CONST_B, (NvU32)value,
SET_REMAP_COMPONENTS,
HWCONST(B0B5, SET_REMAP_COMPONENTS, DST_X, CONST_B) |
HWCONST(B0B5, SET_REMAP_COMPONENTS, COMPONENT_SIZE, ONE) |
HWCONST(B0B5, SET_REMAP_COMPONENTS, NUM_DST_COMPONENTS, ONE));
memset_common(push, dst, size, 1);
}
void uvm_hal_maxwell_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size)
{
UVM_ASSERT_MSG(size % 4 == 0, "size: %zd\n", size);
size /= 4;
NV_PUSH_2U(B0B5, SET_REMAP_CONST_B, value,
SET_REMAP_COMPONENTS,
HWCONST(B0B5, SET_REMAP_COMPONENTS, DST_X, CONST_B) |
HWCONST(B0B5, SET_REMAP_COMPONENTS, COMPONENT_SIZE, FOUR) |
HWCONST(B0B5, SET_REMAP_COMPONENTS, NUM_DST_COMPONENTS, ONE));
memset_common(push, dst, size, 4);
}
void uvm_hal_maxwell_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size)
{
UVM_ASSERT_MSG(size % 8 == 0, "size: %zd\n", size);
size /= 8;
NV_PUSH_3U(B0B5, SET_REMAP_CONST_A, (NvU32)value,
SET_REMAP_CONST_B, (NvU32)(value >> 32),
SET_REMAP_COMPONENTS,
HWCONST(B0B5, SET_REMAP_COMPONENTS, DST_X, CONST_A) |
HWCONST(B0B5, SET_REMAP_COMPONENTS, DST_Y, CONST_B) |
HWCONST(B0B5, SET_REMAP_COMPONENTS, COMPONENT_SIZE, FOUR) |
HWCONST(B0B5, SET_REMAP_COMPONENTS, NUM_DST_COMPONENTS, TWO));
memset_common(push, dst, size, 8);
}
void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size)
{
uvm_hal_maxwell_ce_memset_4(push, uvm_gpu_address_virtual(dst_va), value, size);
}

View File

@@ -0,0 +1,95 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_gpu.h"
#include "uvm_hal.h"
void uvm_hal_maxwell_enable_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT_MSG(false, "enable_replayable_faults is not supported on GPU: %s.\n", parent_gpu->name);
}
void uvm_hal_maxwell_disable_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT_MSG(false, "disable_replayable_faults is not supported on GPU: %s.\n", parent_gpu->name);
}
void uvm_hal_maxwell_clear_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get)
{
UVM_ASSERT_MSG(false, "clear_replayable_faults is not supported on GPU: %s.\n", parent_gpu->name);
}
NvU32 uvm_hal_maxwell_fault_buffer_read_put_unsupported(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT_MSG(false, "fault_buffer_read_put is not supported on GPU: %s.\n", parent_gpu->name);
return 0;
}
NvU32 uvm_hal_maxwell_fault_buffer_read_get_unsupported(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT_MSG(false, "fault_buffer_read_get is not supported on GPU: %s.\n", parent_gpu->name);
return 0;
}
void uvm_hal_maxwell_fault_buffer_write_get_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
{
UVM_ASSERT_MSG(false, "fault_buffer_write_get is not supported on GPU: %s.\n", parent_gpu->name);
}
NvU8 uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type)
{
UVM_ASSERT_MSG(false, "fault_buffer_get_ve_id is not supported on Maxwell GPUs.\n");
return 0;
}
void uvm_hal_maxwell_fault_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
NvU32 index,
uvm_fault_buffer_entry_t *buffer_entry)
{
UVM_ASSERT_MSG(false, "fault_buffer_parse_entry is not supported on GPU: %s.\n", parent_gpu->name);
}
bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
{
UVM_ASSERT_MSG(false, "fault_buffer_entry_is_valid is not supported on GPU: %s.\n", parent_gpu->name);
return false;
}
void uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
{
UVM_ASSERT_MSG(false, "fault_buffer_entry_clear_valid is not supported on GPU: %s.\n", parent_gpu->name);
}
NvU32 uvm_hal_maxwell_fault_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT_MSG(false, "fault_buffer_entry_size is not supported on GPU: %s.\n", parent_gpu->name);
return 0;
}
void uvm_hal_maxwell_fault_buffer_parse_non_replayable_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
void *fault_packet,
uvm_fault_buffer_entry_t *buffer_entry)
{
UVM_ASSERT_MSG(false, "fault_buffer_parse_non_replayable_entry is not supported on GPU: %s.\n", parent_gpu->name);
}

View File

@@ -0,0 +1,326 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_linux.h"
#include "uvm_global.h"
#include "uvm_hal_types.h"
#include "uvm_hal.h"
#include "uvm_push.h"
#include "cla16f.h"
#include "clb06f.h"
void uvm_hal_maxwell_host_wait_for_idle(uvm_push_t *push)
{
NV_PUSH_1U(A16F, WFI, 0);
}
void uvm_hal_maxwell_host_membar_sys(uvm_push_t *push)
{
NV_PUSH_1U(A16F, MEM_OP_B,
HWCONST(A16F, MEM_OP_B, OPERATION, SYSMEMBAR_FLUSH));
}
void uvm_hal_maxwell_host_tlb_invalidate_all_a16f(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
uvm_membar_t membar)
{
NvU32 target;
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
// Only Pascal+ supports invalidating down from a specific depth.
(void)depth;
(void)membar;
if (pdb.aperture == UVM_APERTURE_VID)
target = HWCONST(A16F, MEM_OP_A, TLB_INVALIDATE_TARGET, VID_MEM);
else
target = HWCONST(A16F, MEM_OP_A, TLB_INVALIDATE_TARGET, SYS_MEM_COHERENT);
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
pdb.address >>= 12;
NV_PUSH_2U(A16F, MEM_OP_A, target |
HWVALUE(A16F, MEM_OP_A, TLB_INVALIDATE_ADDR, pdb.address),
MEM_OP_B, HWCONST(A16F, MEM_OP_B, OPERATION, MMU_TLB_INVALIDATE) |
HWCONST(A16F, MEM_OP_B, MMU_TLB_INVALIDATE_PDB, ONE) |
HWCONST(A16F, MEM_OP_B, MMU_TLB_INVALIDATE_GPC, ENABLE));
}
void uvm_hal_maxwell_host_tlb_invalidate_all_b06f(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
uvm_membar_t membar)
{
NvU32 target;
NvU32 pdb_lo;
NvU32 pdb_hi;
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
// Only Pascal+ supports invalidating down from a specific depth.
(void)depth;
(void)membar;
if (pdb.aperture == UVM_APERTURE_VID)
target = HWCONST(B06F, MEM_OP_C, TLB_INVALIDATE_TARGET, VID_MEM);
else
target = HWCONST(B06F, MEM_OP_C, TLB_INVALIDATE_TARGET, SYS_MEM_COHERENT);
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
pdb.address >>= 12;
pdb_lo = pdb.address & HWMASK(B06F, MEM_OP_C, TLB_INVALIDATE_ADDR_LO);
pdb_hi = pdb.address >> HWSIZE(B06F, MEM_OP_C, TLB_INVALIDATE_ADDR_LO);
NV_PUSH_2U(B06F, MEM_OP_C, target |
HWCONST(B06F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
HWCONST(B06F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
HWVALUE(B06F, MEM_OP_C, TLB_INVALIDATE_ADDR_LO, pdb_lo),
MEM_OP_D, HWCONST(B06F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
HWVALUE(B06F, MEM_OP_D, TLB_INVALIDATE_ADDR_HI, pdb_hi));
}
void uvm_hal_maxwell_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
uvm_membar_t membar)
{
// No per VA invalidate on Maxwell, redirect to invalidate all.
uvm_push_get_gpu(push)->parent->host_hal->tlb_invalidate_all(push, pdb, depth, membar);
}
void uvm_hal_maxwell_host_tlb_invalidate_test(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params)
{
NvU32 target_pdb = 0;
NvU32 invalidate_gpc_value;
// Only Pascal+ supports invalidating down from a specific depth. We
// invalidate all
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
pdb.address >>= 12;
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
if (pdb.aperture == UVM_APERTURE_VID)
target_pdb = HWCONST(A16F, MEM_OP_A, TLB_INVALIDATE_TARGET, VID_MEM);
else
target_pdb = HWCONST(A16F, MEM_OP_A, TLB_INVALIDATE_TARGET, SYS_MEM_COHERENT);
target_pdb |= HWVALUE(A16F, MEM_OP_A, TLB_INVALIDATE_ADDR, pdb.address);
if (params->disable_gpc_invalidate)
invalidate_gpc_value = HWCONST(A16F, MEM_OP_B, MMU_TLB_INVALIDATE_GPC, DISABLE);
else
invalidate_gpc_value = HWCONST(A16F, MEM_OP_B, MMU_TLB_INVALIDATE_GPC, ENABLE);
NV_PUSH_2U(A16F, MEM_OP_A, target_pdb,
MEM_OP_B, HWCONST(A16F, MEM_OP_B, OPERATION, MMU_TLB_INVALIDATE) |
HWCONST(A16F, MEM_OP_B, MMU_TLB_INVALIDATE_PDB, ONE) |
invalidate_gpc_value);
}
void uvm_hal_maxwell_host_noop(uvm_push_t *push, NvU32 size)
{
UVM_ASSERT_MSG(size % 4 == 0, "size %u\n", size);
if (size == 0)
return;
// size is in bytes so divide by the method size (4 bytes)
size /= 4;
while (size > 0) {
// noop_this_time includes the NOP method itself and hence can be
// up to COUNT_MAX + 1.
NvU32 noop_this_time = min(UVM_METHOD_COUNT_MAX + 1, size);
// -1 for the NOP method itself.
NV_PUSH_NU_NONINC(A16F, NOP, noop_this_time - 1);
size -= noop_this_time;
}
}
void uvm_hal_maxwell_host_interrupt(uvm_push_t *push)
{
NV_PUSH_1U(A16F, NON_STALL_INTERRUPT, 0);
}
void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
{
NvU32 sem_lo;
UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(A16F, SEMAPHOREB, OFFSET_LOWER)));
sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), A16F, SEMAPHOREB, OFFSET_LOWER);
uvm_hal_wfi_membar(push, uvm_push_get_and_reset_membar_flag(push));
NV_PUSH_4U(A16F, SEMAPHOREA, HWVALUE(A16F, SEMAPHOREA, OFFSET_UPPER, NvOffset_HI32(gpu_va)),
SEMAPHOREB, HWVALUE(A16F, SEMAPHOREB, OFFSET_LOWER, sem_lo),
SEMAPHOREC, payload,
SEMAPHORED, HWCONST(A16F, SEMAPHORED, OPERATION, RELEASE) |
HWCONST(A16F, SEMAPHORED, RELEASE_SIZE, 4BYTE)|
HWCONST(A16F, SEMAPHORED, RELEASE_WFI, DIS));
}
void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
{
NvU32 sem_lo;
UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(A16F, SEMAPHOREB, OFFSET_LOWER)));
sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), A16F, SEMAPHOREB, OFFSET_LOWER);
NV_PUSH_4U(A16F, SEMAPHOREA, HWVALUE(A16F, SEMAPHOREA, OFFSET_UPPER, NvOffset_HI32(gpu_va)),
SEMAPHOREB, HWVALUE(A16F, SEMAPHOREB, OFFSET_LOWER, sem_lo),
SEMAPHOREC, payload,
SEMAPHORED, HWCONST(A16F, SEMAPHORED, ACQUIRE_SWITCH, ENABLED) |
HWCONST(A16F, SEMAPHORED, OPERATION, ACQ_GEQ));
}
void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
{
NvU32 sem_lo;
UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(A16F, SEMAPHOREB, OFFSET_LOWER)));
sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), A16F, SEMAPHOREB, OFFSET_LOWER);
uvm_hal_wfi_membar(push, uvm_push_get_and_reset_membar_flag(push));
NV_PUSH_4U(A16F, SEMAPHOREA, HWVALUE(A16F, SEMAPHOREA, OFFSET_UPPER, NvOffset_HI32(gpu_va)),
SEMAPHOREB, HWVALUE(A16F, SEMAPHOREB, OFFSET_LOWER, sem_lo),
SEMAPHOREC, 0xdeadbeef,
SEMAPHORED, HWCONST(A16F, SEMAPHORED, OPERATION, RELEASE) |
HWCONST(A16F, SEMAPHORED, RELEASE_SIZE, 16BYTE)|
HWCONST(A16F, SEMAPHORED, RELEASE_WFI, DIS));
}
void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length)
{
NvU64 fifo_entry_value;
UVM_ASSERT(!uvm_global_is_suspended());
UVM_ASSERT_MSG(pushbuffer_va % 4 == 0, "pushbuffer va unaligned: %llu\n", pushbuffer_va);
UVM_ASSERT_MSG(pushbuffer_length % 4 == 0, "pushbuffer length unaligned: %u\n", pushbuffer_length);
fifo_entry_value = HWVALUE(A16F, GP_ENTRY0, GET, NvU64_LO32(pushbuffer_va) >> 2);
fifo_entry_value |= (NvU64)(HWVALUE(A16F, GP_ENTRY1, GET_HI, NvU64_HI32(pushbuffer_va)) |
HWVALUE(A16F, GP_ENTRY1, LENGTH, pushbuffer_length >> 2) |
HWCONST(A16F, GP_ENTRY1, PRIV, KERNEL)) << 32;
*fifo_entry = fifo_entry_value;
}
void uvm_hal_maxwell_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put)
{
UVM_GPU_WRITE_ONCE(*channel->channel_info.gpPut, gpu_put);
}
void uvm_hal_maxwell_host_init_noop(uvm_push_t *push)
{
}
void uvm_hal_maxwell_replay_faults_unsupported(uvm_push_t *push, uvm_fault_replay_type_t type)
{
UVM_ASSERT_MSG(false, "host replay_faults called on Maxwell GPU\n");
}
void uvm_hal_maxwell_cancel_faults_global_unsupported(uvm_push_t *push, uvm_gpu_phys_address_t instance_ptr)
{
UVM_ASSERT_MSG(false, "host cancel_faults_global called on Maxwell GPU\n");
}
void uvm_hal_maxwell_cancel_faults_targeted_unsupported(uvm_push_t *push,
uvm_gpu_phys_address_t instance_ptr,
NvU32 gpc_id,
NvU32 client_id)
{
UVM_ASSERT_MSG(false, "host cancel_faults_targeted called on Maxwell GPU\n");
}
void uvm_hal_maxwell_cancel_faults_va_unsupported(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
const uvm_fault_buffer_entry_t *fault_entry,
uvm_fault_cancel_va_mode_t cancel_va_mode)
{
UVM_ASSERT_MSG(false, "host cancel_faults_va called on Maxwell GPU\n");
}
void uvm_hal_maxwell_host_clear_faulted_channel_sw_method_unsupported(uvm_push_t *push,
uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *buffer_entry)
{
UVM_ASSERT_MSG(false, "host clear_faulted_channel_sw_method called on Maxwell GPU\n");
}
void uvm_hal_maxwell_host_clear_faulted_channel_method_unsupported(uvm_push_t *push,
uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *buffer_entry)
{
UVM_ASSERT_MSG(false, "host clear_faulted_channel_method called on Maxwell GPU\n");
}
void uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported(uvm_user_channel_t *user_channel,
const uvm_fault_buffer_entry_t *buffer_entry)
{
UVM_ASSERT_MSG(false, "host clear_faulted_channel_register called on Maxwell GPU\n");
}
void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push)
{
UVM_ASSERT_MSG(false, "host access_counter_clear_all called on Maxwell GPU\n");
}
void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type)
{
UVM_ASSERT_MSG(false, "host access_counter_clear_type called on Maxwell GPU\n");
}
void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry)
{
UVM_ASSERT_MSG(false, "host access_counter_clear_targeted called on Maxwell GPU\n");
}
NvU64 uvm_hal_maxwell_get_time(uvm_gpu_t *gpu)
{
NvU32 time0;
NvU32 time1_first, time1_second;
// When reading the TIME, TIME_1 should be read first, followed by TIME_0,
// then a second reading of TIME_1 should be done. If the two readings of
// do not match, this process should be repeated.
//
// Doing that will catch the 4-second wrap-around
do {
time1_first = UVM_GPU_READ_ONCE(*gpu->time.time1_register);
rmb();
time0 = UVM_GPU_READ_ONCE(*gpu->time.time0_register);
rmb();
time1_second = UVM_GPU_READ_ONCE(*gpu->time.time1_register);
} while (time1_second != time1_first);
return (((NvU64)time1_first) << 32) + time0;
}

Some files were not shown because too many files have changed in this diff Show More