diff --git a/README.md b/README.md index fea72f437..f1b68c059 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # NVIDIA Linux Open GPU Kernel Module Source This is the source release of the NVIDIA Linux open GPU kernel modules, -version 580.94.06. +version 580.94.10. ## How to Build @@ -17,7 +17,7 @@ as root: Note that the kernel modules built here must be used with GSP firmware and user-space NVIDIA GPU driver components from a corresponding -580.94.06 driver release. This can be achieved by installing +580.94.10 driver release. This can be achieved by installing the NVIDIA GPU driver from the .run file using the `--no-kernel-modules` option. E.g., @@ -185,7 +185,7 @@ table below). For details on feature support and limitations, see the NVIDIA GPU driver end user README here: -https://us.download.nvidia.com/XFree86/Linux-x86_64/580.94.06/README/kernel_open.html +https://us.download.nvidia.com/XFree86/Linux-x86_64/580.94.10/README/kernel_open.html For vGPU support, please refer to the README.vgpu packaged in the vGPU Host Package for more details. @@ -749,6 +749,7 @@ Subsystem Device ID. | NVIDIA A10 | 2236 10DE 1482 | | NVIDIA A10G | 2237 10DE 152F | | NVIDIA A10M | 2238 10DE 1677 | +| NVIDIA H20 NVL16 | 230E 10DE 20DF | | NVIDIA H100 NVL | 2321 10DE 1839 | | NVIDIA H800 PCIe | 2322 10DE 17A4 | | NVIDIA H800 | 2324 10DE 17A6 | @@ -949,9 +950,10 @@ Subsystem Device ID. | NVIDIA GB200 | 2941 10DE 20D5 | | NVIDIA GB200 | 2941 10DE 21C9 | | NVIDIA GB200 | 2941 10DE 21CA | +| NVIDIA DRIVE P2021 | 29BB 10DE 207C | | NVIDIA GeForce RTX 5090 | 2B85 | | NVIDIA GeForce RTX 5090 D | 2B87 | -| NVIDIA GeForce RTX 5090 D v2 | 2B8C 17AA 530C | +| NVIDIA GeForce RTX 5090 D v2 | 2B8C | | NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 1028 204B | | NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 103C 204B | | NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 10DE 204B | @@ -964,6 +966,8 @@ Subsystem Device ID. | NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 103C 204C | | NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 10DE 204C | | NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 17AA 204C | +| NVIDIA RTX PRO 6000 Blackwell Server Edition | 2BB5 10DE 204E | +| NVIDIA RTX 6000D | 2BB9 10DE 2091 | | NVIDIA GeForce RTX 5080 | 2C02 | | NVIDIA GeForce RTX 5070 Ti | 2C05 | | NVIDIA GeForce RTX 5090 Laptop GPU | 2C18 | @@ -974,6 +978,7 @@ Subsystem Device ID. | NVIDIA RTX PRO 4500 Blackwell | 2C31 17AA 2051 | | NVIDIA RTX PRO 4000 Blackwell SFF Edition | 2C33 1028 2053 | | NVIDIA RTX PRO 4000 Blackwell SFF Edition | 2C33 103C 2053 | +| NVIDIA RTX PRO 4000 Blackwell SFF Edition | 2C33 10DE 2053 | | NVIDIA RTX PRO 4000 Blackwell SFF Edition | 2C33 17AA 2053 | | NVIDIA RTX PRO 4000 Blackwell | 2C34 1028 2052 | | NVIDIA RTX PRO 4000 Blackwell | 2C34 103C 2052 | @@ -983,22 +988,29 @@ Subsystem Device ID. | NVIDIA RTX PRO 4000 Blackwell Generation Laptop GPU | 2C39 | | NVIDIA GeForce RTX 5090 Laptop GPU | 2C58 | | NVIDIA GeForce RTX 5080 Laptop GPU | 2C59 | +| NVIDIA RTX PRO 5000 Blackwell Embedded GPU | 2C77 | +| NVIDIA RTX PRO 4000 Blackwell Embedded GPU | 2C79 | | NVIDIA GeForce RTX 5060 Ti | 2D04 | | NVIDIA GeForce RTX 5060 | 2D05 | | NVIDIA GeForce RTX 5070 Laptop GPU | 2D18 | | NVIDIA GeForce RTX 5060 Laptop GPU | 2D19 | | NVIDIA RTX PRO 2000 Blackwell | 2D30 1028 2054 | | NVIDIA RTX PRO 2000 Blackwell | 2D30 103C 2054 | +| NVIDIA RTX PRO 2000 Blackwell | 2D30 10DE 2054 | | NVIDIA RTX PRO 2000 Blackwell | 2D30 17AA 2054 | | NVIDIA RTX PRO 2000 Blackwell Generation Laptop GPU | 2D39 | | NVIDIA GeForce RTX 5070 Laptop GPU | 2D58 | | NVIDIA GeForce RTX 5060 Laptop GPU | 2D59 | -| NVIDIA GeForce RTX 5050 | 2D83 17AA C791 | +| NVIDIA RTX PRO 2000 Blackwell Embedded GPU | 2D79 | +| NVIDIA GeForce RTX 5050 | 2D83 | | NVIDIA GeForce RTX 5050 Laptop GPU | 2D98 | | NVIDIA RTX PRO 1000 Blackwell Generation Laptop GPU | 2DB8 | | NVIDIA RTX PRO 500 Blackwell Generation Laptop GPU | 2DB9 | | NVIDIA GeForce RTX 5050 Laptop GPU | 2DD8 | +| NVIDIA RTX PRO 500 Blackwell Embedded GPU | 2DF9 | | NVIDIA GeForce RTX 5070 | 2F04 | | NVIDIA GeForce RTX 5070 Ti Laptop GPU | 2F18 | | NVIDIA RTX PRO 3000 Blackwell Generation Laptop GPU | 2F38 | | NVIDIA GeForce RTX 5070 Ti Laptop GPU | 2F58 | +| NVIDIA B300 SXM6 AC | 3182 10DE 20E6 | +| NVIDIA GB300 | 31C2 10DE 21F1 | diff --git a/kernel-open/Kbuild b/kernel-open/Kbuild index 536552e5d..bfb3a3c95 100644 --- a/kernel-open/Kbuild +++ b/kernel-open/Kbuild @@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc ccflags-y += -I$(src) ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args ccflags-y += -D__KERNEL__ -DMODULE -DNVRM -ccflags-y += -DNV_VERSION_STRING=\"580.94.06\" +ccflags-y += -DNV_VERSION_STRING=\"580.94.10\" # Include and link Tegra out-of-tree modules. ifneq ($(wildcard /usr/src/nvidia/nvidia-oot),) diff --git a/kernel-open/common/inc/nvstatuscodes.h b/kernel-open/common/inc/nvstatuscodes.h index 98ebb7b47..440434997 100644 --- a/kernel-open/common/inc/nvstatuscodes.h +++ b/kernel-open/common/inc/nvstatuscodes.h @@ -165,6 +165,7 @@ NV_STATUS_CODE(NV_ERR_FABRIC_STATE_OUT_OF_SYNC, 0x00000087, "NVLink fabri NV_STATUS_CODE(NV_ERR_BUFFER_FULL, 0x00000088, "Buffer is full") NV_STATUS_CODE(NV_ERR_BUFFER_EMPTY, 0x00000089, "Buffer is empty") NV_STATUS_CODE(NV_ERR_MC_FLA_OFFSET_TABLE_FULL, 0x0000008A, "Multicast FLA offset table has no available slots") +NV_STATUS_CODE(NV_ERR_DMA_XFER_FAILED, 0x0000008B, "DMA transfer failed") // Warnings: NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch") diff --git a/kernel-open/common/inc/os-interface.h b/kernel-open/common/inc/os-interface.h index 523368eaa..dde5c843a 100644 --- a/kernel-open/common/inc/os-interface.h +++ b/kernel-open/common/inc/os-interface.h @@ -62,6 +62,11 @@ struct os_work_queue; /* Each OS defines its own version of this opaque type */ typedef struct os_wait_queue os_wait_queue; +/* Flags needed by os_get_current_proccess_flags */ +#define OS_CURRENT_PROCESS_FLAG_NONE 0x0 +#define OS_CURRENT_PROCESS_FLAG_KERNEL_THREAD 0x1 +#define OS_CURRENT_PROCESS_FLAG_EXITING 0x2 + /* * --------------------------------------------------------------------------- * @@ -194,6 +199,7 @@ NV_STATUS NV_API_CALL os_open_readonly_file (const char *, void ** NV_STATUS NV_API_CALL os_open_and_read_file (const char *, NvU8 *, NvU64); NvBool NV_API_CALL os_is_nvswitch_present (void); NV_STATUS NV_API_CALL os_get_random_bytes (NvU8 *, NvU16); +NvU32 NV_API_CALL os_get_current_process_flags (void); NV_STATUS NV_API_CALL os_alloc_wait_queue (os_wait_queue **); void NV_API_CALL os_free_wait_queue (os_wait_queue *); void NV_API_CALL os_wait_uninterruptible (os_wait_queue *); diff --git a/kernel-open/nvidia-uvm/uvm_ampere_host.c b/kernel-open/nvidia-uvm/uvm_ampere_host.c index 834bf93b9..0bcbf9cf9 100644 --- a/kernel-open/nvidia-uvm/uvm_ampere_host.c +++ b/kernel-open/nvidia-uvm/uvm_ampere_host.c @@ -461,3 +461,29 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push, if (params->membar == UvmInvalidateTlbMemBarLocal) uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push); } + +void uvm_hal_ampere_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t aperture) +{ + uvm_gpu_t *gpu = uvm_push_get_gpu(push); + NvU32 aperture_value; + + if (aperture == UVM_APERTURE_SYS) { + aperture_value = HWCONST(C56F, MEM_OP_D, OPERATION, L2_SYSMEM_INVALIDATE); + } + else if (uvm_aperture_is_peer(aperture)) { + aperture_value = HWCONST(C56F, MEM_OP_D, OPERATION, L2_PEERMEM_INVALIDATE); + } + else { + UVM_ASSERT_MSG(false, "Invalid aperture_type %d\n", aperture); + return; + } + + uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS); + + NV_PUSH_4U(C56F, MEM_OP_A, 0, + MEM_OP_B, 0, + MEM_OP_C, 0, + MEM_OP_D, aperture_value); + + uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS); +} diff --git a/kernel-open/nvidia-uvm/uvm_blackwell_host.c b/kernel-open/nvidia-uvm/uvm_blackwell_host.c index 7552863ce..d829bf445 100644 --- a/kernel-open/nvidia-uvm/uvm_blackwell_host.c +++ b/kernel-open/nvidia-uvm/uvm_blackwell_host.c @@ -347,10 +347,23 @@ uvm_hal_blackwell_access_counter_query_clear_op_gb20x(uvm_parent_gpu_t *parent_g return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED; } -// Host-specific L2 cache invalidate for non-coherent sysmem -void uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem(uvm_push_t *push) +void uvm_hal_blackwell_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t aperture) { uvm_gpu_t *gpu = uvm_push_get_gpu(push); + NvU32 aperture_value; + + if (!gpu->parent->is_integrated_gpu) { + return uvm_hal_ampere_host_l2_invalidate(push, aperture); + } + + switch (aperture) { + case UVM_APERTURE_SYS: + aperture_value = HWCONST(C96F, MEM_OP_D, OPERATION, L2_SYSMEM_NCOH_INVALIDATE); + break; + default: + UVM_ASSERT_MSG(false, "Invalid aperture_type %d\n", aperture); + return; + } // First sysmembar uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS); @@ -363,7 +376,7 @@ void uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem(uvm_push_t *push) NV_PUSH_4U(C96F, MEM_OP_A, 0, MEM_OP_B, 0, MEM_OP_C, 0, - MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, L2_SYSMEM_NCOH_INVALIDATE)); + MEM_OP_D, aperture_value); // Final sysmembar uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS); } diff --git a/kernel-open/nvidia-uvm/uvm_hal.c b/kernel-open/nvidia-uvm/uvm_hal.c index 777d16a97..93f0e70c1 100644 --- a/kernel-open/nvidia-uvm/uvm_hal.c +++ b/kernel-open/nvidia-uvm/uvm_hal.c @@ -221,7 +221,7 @@ static uvm_hal_class_ops_t host_table[] = .access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported, .access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported, .access_counter_query_clear_op = uvm_hal_maxwell_access_counter_query_clear_op_unsupported, - .l2_invalidate_noncoh_sysmem = uvm_hal_host_l2_invalidate_noncoh_sysmem_unsupported, + .l2_invalidate = uvm_hal_host_l2_invalidate_unsupported, .get_time = uvm_hal_maxwell_get_time, } }, @@ -287,6 +287,7 @@ static uvm_hal_class_ops_t host_table[] = .tlb_invalidate_all = uvm_hal_ampere_host_tlb_invalidate_all, .tlb_invalidate_va = uvm_hal_ampere_host_tlb_invalidate_va, .tlb_invalidate_test = uvm_hal_ampere_host_tlb_invalidate_test, + .l2_invalidate = uvm_hal_ampere_host_l2_invalidate, } }, { @@ -315,8 +316,8 @@ static uvm_hal_class_ops_t host_table[] = .tlb_invalidate_phys = uvm_hal_blackwell_host_tlb_invalidate_phys, .tlb_invalidate_test = uvm_hal_blackwell_host_tlb_invalidate_test, .tlb_flush_prefetch = uvm_hal_blackwell_host_tlb_flush_prefetch, - .l2_invalidate_noncoh_sysmem = uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem, .access_counter_query_clear_op = uvm_hal_blackwell_access_counter_query_clear_op_gb100, + .l2_invalidate = uvm_hal_blackwell_host_l2_invalidate, } }, { @@ -1162,10 +1163,11 @@ void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src) { } -void uvm_hal_host_l2_invalidate_noncoh_sysmem_unsupported(uvm_push_t *push) +void uvm_hal_host_l2_invalidate_unsupported(uvm_push_t *push, uvm_aperture_t aperture) { uvm_gpu_t *gpu = uvm_push_get_gpu(push); UVM_ERR_PRINT("L2 cache invalidation: Called on unsupported GPU %s (arch: 0x%x, impl: 0x%x)\n", uvm_gpu_name(gpu), gpu->parent->rm_info.gpuArch, gpu->parent->rm_info.gpuImplementation); - UVM_ASSERT_MSG(false, "host l2_invalidate_noncoh_sysmem called on unsupported GPU\n"); + UVM_ASSERT_MSG(false, "L2 invalidate is not supported on %s", + uvm_parent_gpu_name(gpu->parent)); } \ No newline at end of file diff --git a/kernel-open/nvidia-uvm/uvm_hal.h b/kernel-open/nvidia-uvm/uvm_hal.h index 004ba6392..f492f5cc9 100644 --- a/kernel-open/nvidia-uvm/uvm_hal.h +++ b/kernel-open/nvidia-uvm/uvm_hal.h @@ -248,11 +248,12 @@ typedef void (*uvm_hal_host_tlb_flush_prefetch_t)(uvm_push_t *push); void uvm_hal_maxwell_host_tlb_flush_prefetch_unsupported(uvm_push_t *push); void uvm_hal_blackwell_host_tlb_flush_prefetch(uvm_push_t *push); -// L2 cache invalidate for non-coherent sysmem for systems with write back cache. -// These are iGPUs as of now. -typedef void (*uvm_hal_host_l2_invalidate_noncoh_sysmem_t)(uvm_push_t *push); -void uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem(uvm_push_t *push); -void uvm_hal_host_l2_invalidate_noncoh_sysmem_unsupported(uvm_push_t *push); +// Performs L2 cache invalidation for peer or system memory. +typedef void (*uvm_hal_host_l2_invalidate_t)(uvm_push_t *push, uvm_aperture_t aperture); +void uvm_hal_blackwell_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t aperture); + +void uvm_hal_ampere_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t aperture); +void uvm_hal_host_l2_invalidate_unsupported(uvm_push_t *push, uvm_aperture_t aperture); // By default all semaphore release operations include a membar sys before the // operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with @@ -822,7 +823,7 @@ struct uvm_host_hal_struct uvm_hal_host_tlb_invalidate_phys_t tlb_invalidate_phys; uvm_hal_host_tlb_invalidate_test_t tlb_invalidate_test; uvm_hal_host_tlb_flush_prefetch_t tlb_flush_prefetch; - uvm_hal_host_l2_invalidate_noncoh_sysmem_t l2_invalidate_noncoh_sysmem; + uvm_hal_host_l2_invalidate_t l2_invalidate; uvm_hal_fault_buffer_replay_t replay_faults; uvm_hal_fault_cancel_global_t cancel_faults_global; uvm_hal_fault_cancel_targeted_t cancel_faults_targeted; diff --git a/kernel-open/nvidia-uvm/uvm_map_external.c b/kernel-open/nvidia-uvm/uvm_map_external.c index af6b8f5c5..24bfca275 100644 --- a/kernel-open/nvidia-uvm/uvm_map_external.c +++ b/kernel-open/nvidia-uvm/uvm_map_external.c @@ -1276,11 +1276,20 @@ void uvm_ext_gpu_map_destroy(uvm_va_range_external_t *external_range, range_tree = uvm_ext_gpu_range_tree(external_range, mapped_gpu); - // Perform L2 cache invalidation for noncoherent sysmem mappings. - // This is done only on systems with write-back cache which is iGPUs as of now. + // Perform L2 cache invalidation for cached peer and sysmem mappings. if (ext_gpu_map->need_l2_invalidate_at_unmap) { - UVM_ASSERT(ext_gpu_map->gpu->parent->is_integrated_gpu); - status = uvm_mmu_l2_invalidate_noncoh_sysmem(mapped_gpu); + uvm_aperture_t aperture; + + // Peer cache invalidation is not targeted to a specific peer, so we + // just use UVM_APERTURE_PEER(0). + if (ext_gpu_map->is_egm) + aperture = UVM_APERTURE_PEER(0); + else if (ext_gpu_map->is_sysmem) + aperture = UVM_APERTURE_SYS; + else + aperture = UVM_APERTURE_PEER(0); + + status = uvm_mmu_l2_invalidate(mapped_gpu, aperture); UVM_ASSERT(status == NV_OK); } diff --git a/kernel-open/nvidia-uvm/uvm_mmu.c b/kernel-open/nvidia-uvm/uvm_mmu.c index 5a1fd6a44..c6e1ed256 100644 --- a/kernel-open/nvidia-uvm/uvm_mmu.c +++ b/kernel-open/nvidia-uvm/uvm_mmu.c @@ -2974,25 +2974,21 @@ NV_STATUS uvm_mmu_tlb_invalidate_phys(uvm_gpu_t *gpu) return uvm_push_end_and_wait(&push); } -NV_STATUS uvm_mmu_l2_invalidate_noncoh_sysmem(uvm_gpu_t *gpu) +NV_STATUS uvm_mmu_l2_invalidate(uvm_gpu_t *gpu, uvm_aperture_t aperture) { uvm_push_t push; NV_STATUS status; - // L2 cache invalidation is only done for systems with write-back - // cache which is iGPUs as of now. - UVM_ASSERT(gpu->parent->is_integrated_gpu); - status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, - "L2 cache invalidate for sysmem"); + "L2 cache invalidate"); if (status != NV_OK) { UVM_ERR_PRINT("L2 cache invalidation: Failed to begin push, status: %s\n", nvstatusToString(status)); return status; } - gpu->parent->host_hal->l2_invalidate_noncoh_sysmem(&push); + gpu->parent->host_hal->l2_invalidate(&push, aperture); status = uvm_push_end_and_wait(&push); if (status != NV_OK) diff --git a/kernel-open/nvidia-uvm/uvm_mmu.h b/kernel-open/nvidia-uvm/uvm_mmu.h index a969df3ac..134c73b5e 100644 --- a/kernel-open/nvidia-uvm/uvm_mmu.h +++ b/kernel-open/nvidia-uvm/uvm_mmu.h @@ -722,9 +722,8 @@ uvm_gpu_address_t uvm_mmu_gpu_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t phy // dma addresses, IOVAs, and GPAs). See uvm_dma_map_invalidation_t. NV_STATUS uvm_mmu_tlb_invalidate_phys(uvm_gpu_t *gpu); -// Invalidate L2 cache when noncoherent sysmem mappings are unmapped. -// This is done for systems with write-back cache i.e. iGPUs as of now. -NV_STATUS uvm_mmu_l2_invalidate_noncoh_sysmem(uvm_gpu_t *gpu); +// Invalidate L2 cache for peer or system memory. +NV_STATUS uvm_mmu_l2_invalidate(uvm_gpu_t *gpu, uvm_aperture_t aperture); NV_STATUS uvm_test_invalidate_tlb(UVM_TEST_INVALIDATE_TLB_PARAMS *params, struct file *filp); diff --git a/kernel-open/nvidia-uvm/uvm_va_range.h b/kernel-open/nvidia-uvm/uvm_va_range.h index 4c12cfdd3..16ff18eb5 100644 --- a/kernel-open/nvidia-uvm/uvm_va_range.h +++ b/kernel-open/nvidia-uvm/uvm_va_range.h @@ -204,8 +204,12 @@ typedef struct uvm_deferred_free_object_t deferred_free; // Flag indicating whether L2 cache invalidation is needed at unmap time. - // This is set by RM during mapping and used during unmap to determine - // if L2 cache invalidation should be performed for non coherent sysmem. + // This is set by RM during mapping and used during unmap to determine if L2 + // cache invalidation should be performed. For GPU cached system memory + // allocations on systems a write-back cache this is required for + // correctness. For GPU cached peer and system memory on systems with a + // write-through cache the invalidation could be done by RM at map time + // however this introduces overhead during performance sensitive sections. bool need_l2_invalidate_at_unmap; } uvm_ext_gpu_map_t; diff --git a/kernel-open/nvidia/os-interface.c b/kernel-open/nvidia/os-interface.c index a03a3b88c..835e329d9 100644 --- a/kernel-open/nvidia/os-interface.c +++ b/kernel-open/nvidia/os-interface.c @@ -2061,6 +2061,22 @@ NV_STATUS NV_API_CALL os_get_random_bytes return NV_OK; } +NvU32 NV_API_CALL os_get_current_process_flags +( + void +) +{ + NvU32 flags = OS_CURRENT_PROCESS_FLAG_NONE; + + if (current->flags & PF_EXITING) + flags |= OS_CURRENT_PROCESS_FLAG_EXITING; + + if (current->flags & PF_KTHREAD) + flags |= OS_CURRENT_PROCESS_FLAG_KERNEL_THREAD; + + return flags; +} + NV_STATUS NV_API_CALL os_alloc_wait_queue ( os_wait_queue **wq diff --git a/src/common/inc/nvBldVer.h b/src/common/inc/nvBldVer.h index 844e22126..83ec5b74a 100644 --- a/src/common/inc/nvBldVer.h +++ b/src/common/inc/nvBldVer.h @@ -43,18 +43,18 @@ #endif #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) -#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r580/VK580_65-182" -#define NV_BUILD_CHANGELIST_NUM (36741708) +#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r580/VK580_65-186" +#define NV_BUILD_CHANGELIST_NUM (36888175) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "rel/gpu_drv/r580/VK580_65-182" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36741708) +#define NV_BUILD_NAME "rel/gpu_drv/r580/VK580_65-186" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36888175) #else /* Windows builds */ -#define NV_BUILD_BRANCH_VERSION "VK580_65-9" -#define NV_BUILD_CHANGELIST_NUM (36741708) +#define NV_BUILD_BRANCH_VERSION "VK580_65-12" +#define NV_BUILD_CHANGELIST_NUM (36887028) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "581.71" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36741708) +#define NV_BUILD_NAME "581.90" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36887028) #define NV_BUILD_BRANCH_BASE_VERSION R580 #endif // End buildmeister python edited section diff --git a/src/common/inc/nvUnixVersion.h b/src/common/inc/nvUnixVersion.h index 0f8a7250b..89e34cfb8 100644 --- a/src/common/inc/nvUnixVersion.h +++ b/src/common/inc/nvUnixVersion.h @@ -4,7 +4,7 @@ #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \ (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1) -#define NV_VERSION_STRING "580.94.06" +#define NV_VERSION_STRING "580.94.10" #else diff --git a/src/common/modeset/hdmipacket/nvhdmipkt_C671.c b/src/common/modeset/hdmipacket/nvhdmipkt_C671.c index e25344dde..eb4d3096f 100644 --- a/src/common/modeset/hdmipacket/nvhdmipkt_C671.c +++ b/src/common/modeset/hdmipacket/nvhdmipkt_C671.c @@ -175,6 +175,7 @@ static NVHDMIPKT_RESULT SetFRLLinkRate(NVHDMIPKT_CLASS *pThis, const NvU32 subDevice, const NvU32 displayId, const NvBool bFakeLt, + const NvBool bDoNotSkipLt, const NvBool bLinkAssessmentOnly, const NvU32 frlRate) { @@ -184,6 +185,7 @@ static NVHDMIPKT_RESULT SetFRLLinkRate(NVHDMIPKT_CLASS *pThis, params.displayId = displayId; params.data = frlRate; params.bFakeLt = bFakeLt; + params.bDoNotSkipLt = bDoNotSkipLt; params.bLinkAssessmentOnly = bLinkAssessmentOnly; #if NVHDMIPKT_RM_CALLS_INTERNAL @@ -275,14 +277,16 @@ performLinkTraningToAssessFRLLink(NVHDMIPKT_CLASS *pThis, { // If the display is active and the maximum link rate matches the link // rate required for the current mode timings, avoid marking the set - // link configuration call as an assessment only. This prevents - // re-training after the assessment. + // link configuration call as an assessment only. This allows us to + // re-train the existing link now instead of after the assessment. + // In addition, do not allow link training to be skipped to ensure + // we succesfully recover an existing FRL config. const NvBool bLinkAssessmentOnly = bIsDisplayActive ? (nv0073currFRLRate != maxFRLRate) : NV_TRUE; if (SetFRLLinkRate(pThis, subDevice, displayId, - NV_FALSE /* bFakeLt */, bLinkAssessmentOnly, - maxFRLRate) == NVHDMIPKT_SUCCESS) + NV_FALSE /* bFakeLt */, NV_TRUE /* bDoNotSkipLt */, + bLinkAssessmentOnly, maxFRLRate) == NVHDMIPKT_SUCCESS) { break; } @@ -299,11 +303,13 @@ performLinkTraningToAssessFRLLink(NVHDMIPKT_CLASS *pThis, if (SetFRLLinkRate(pThis, subDevice, displayId, bFakeLt, NV_FALSE /* bLinkAssessmentOnly */, + NV_FALSE /* bDoNotSkipLt */, currFRLRate) != NVHDMIPKT_SUCCESS) { if (!bFakeLt) { if (SetFRLLinkRate(pThis, subDevice, displayId, NV_TRUE, NV_FALSE /* bLinkAssessmentOnly */, + NV_FALSE /* bDoNotSkipLt */, currFRLRate) != NVHDMIPKT_SUCCESS) { NvHdmiPkt_Assert(0); } @@ -1130,6 +1136,19 @@ hdmiQueryFRLConfigC671(NVHDMIPKT_CLASS *pThis, NvU32 bppMinX16Itr, bppMaxX16Itr; NvBool bHasPreCalcFRLData = NV_FALSE; + NvBool forceFRLRateDSC = pClientCtrl->forceFRLRate; + HDMI_FRL_DATA_RATE requestedFRLRate = pClientCtrl->frlRate; + +#if defined(NVHDMIPKT_NVKMS) + NvU32 rr = (pVidTransInfo->pTiming->pclk * (NvU64)10000) / + (pVidTransInfo->pTiming->HTotal * (NvU64)pVidTransInfo->pTiming->VTotal); + + if (!pVidTransInfo->pTiming->interlaced && (rr >= 480)) { + forceFRLRateDSC = NV_TRUE; + requestedFRLRate = dscMaxFRLRate; + } +#endif + // DSC_All_bpp = 1: // Lower the compression ratio better the pixel quality, hence a high bppTarget value will be ideal // DSC_All_bpp = 1 allows us the flexibility to use a bppTarget setting different from the primary compressed format @@ -1237,16 +1256,16 @@ hdmiQueryFRLConfigC671(NVHDMIPKT_CLASS *pThis, frlParams.compressionInfo.hSlices = NV_UNSIGNED_DIV_CEIL(pVidTransInfo->pTiming->HVisible, pClientCtrl->sliceWidth); } - if (pClientCtrl->forceFRLRate) + if (forceFRLRateDSC) { - if (pClientCtrl->frlRate > dscMaxFRLRate) + if (requestedFRLRate > dscMaxFRLRate) { result = NVHDMIPKT_FAIL; goto frlQuery_fail; } - minFRLRateItr = pClientCtrl->frlRate; - maxFRLRateItr = pClientCtrl->frlRate; + minFRLRateItr = requestedFRLRate; + maxFRLRateItr = requestedFRLRate; } if (pClientCtrl->forceBppx16) @@ -1419,6 +1438,7 @@ hdmiSetFRLConfigC671(NVHDMIPKT_CLASS *pThis, { return SetFRLLinkRate(pThis, subDevice, displayId, bFakeLt, NV_FALSE /* bLinkAssessmentOnly */, + NV_FALSE /* bDoNotSkipLt */, translateFRLRateToNv0073SetHdmiFrlConfig(pFRLConfig->frlRate)); } @@ -1432,6 +1452,7 @@ hdmiClearFRLConfigC671(NVHDMIPKT_CLASS *pThis, { return SetFRLLinkRate(pThis, subDevice, displayId, NV_FALSE, NV_FALSE /* bLinkAssessmentOnly */, + NV_FALSE /* bDoNotSkipLt */, NV0073_CTRL_HDMI_FRL_DATA_SET_FRL_RATE_NONE); } diff --git a/src/common/nvlink/inband/interface/nvlink_inband_msg.h b/src/common/nvlink/inband/interface/nvlink_inband_msg.h index 727d157ef..bce5d755e 100644 --- a/src/common/nvlink/inband/interface/nvlink_inband_msg.h +++ b/src/common/nvlink/inband/interface/nvlink_inband_msg.h @@ -84,6 +84,7 @@ typedef struct #define NVLINK_INBAND_GPU_PROBE_CAPS_ATS_SUPPORT NVBIT(3) #define NVLINK_INBAND_GPU_PROBE_CAPS_LINK_RETRAIN_SUPPORT NVBIT(4) #define NVLINK_INBAND_GPU_PROBE_CAPS_HEALTH_SUMMARY NVBIT(6) +#define NVLINK_INBAND_GPU_PROBE_CAPS_MC_RETRY NVBIT(8) /* Add more caps as need in the future */ diff --git a/src/common/sdk/nvidia/inc/ctrl/ctrl0073/ctrl0073specific.h b/src/common/sdk/nvidia/inc/ctrl/ctrl0073/ctrl0073specific.h index f52a49f39..3bceec0fd 100644 --- a/src/common/sdk/nvidia/inc/ctrl/ctrl0073/ctrl0073specific.h +++ b/src/common/sdk/nvidia/inc/ctrl/ctrl0073/ctrl0073specific.h @@ -1377,6 +1377,7 @@ typedef struct NV0073_CTRL_SPECIFIC_SET_HDMI_FRL_LINK_CONFIG_PARAMS { NvU32 displayId; NvU32 data; NvBool bFakeLt; + NvBool bDoNotSkipLt; NvBool bLtSkipped; NvBool bLinkAssessmentOnly; } NV0073_CTRL_SPECIFIC_SET_HDMI_FRL_LINK_CONFIG_PARAMS; diff --git a/src/common/sdk/nvidia/inc/nverror.h b/src/common/sdk/nvidia/inc/nverror.h index fa52e8bf3..6f9b9544a 100644 --- a/src/common/sdk/nvidia/inc/nverror.h +++ b/src/common/sdk/nvidia/inc/nverror.h @@ -165,7 +165,8 @@ #define ROBUST_CHANNEL_UNUSED_ERROR_170 (170) #define UNCORRECTABLE_DRAM_ERROR (171) #define UNCORRECTABLE_SRAM_ERROR (172) -#define ROBUST_CHANNEL_LAST_ERROR (172) +#define C2C_FATAL_LINK_FAILURE (173) +#define ROBUST_CHANNEL_LAST_ERROR (173) // Indexed CE reference #define ROBUST_CHANNEL_CE_ERROR(x) \ diff --git a/src/common/sdk/nvidia/inc/nvstatuscodes.h b/src/common/sdk/nvidia/inc/nvstatuscodes.h index 98ebb7b47..440434997 100644 --- a/src/common/sdk/nvidia/inc/nvstatuscodes.h +++ b/src/common/sdk/nvidia/inc/nvstatuscodes.h @@ -165,6 +165,7 @@ NV_STATUS_CODE(NV_ERR_FABRIC_STATE_OUT_OF_SYNC, 0x00000087, "NVLink fabri NV_STATUS_CODE(NV_ERR_BUFFER_FULL, 0x00000088, "Buffer is full") NV_STATUS_CODE(NV_ERR_BUFFER_EMPTY, 0x00000089, "Buffer is empty") NV_STATUS_CODE(NV_ERR_MC_FLA_OFFSET_TABLE_FULL, 0x0000008A, "Multicast FLA offset table has no available slots") +NV_STATUS_CODE(NV_ERR_DMA_XFER_FAILED, 0x0000008B, "DMA transfer failed") // Warnings: NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch") diff --git a/src/common/shared/inc/g_vgpu_chip_flags.h b/src/common/shared/inc/g_vgpu_chip_flags.h index 6184ed11a..845e8c97d 100644 --- a/src/common/shared/inc/g_vgpu_chip_flags.h +++ b/src/common/shared/inc/g_vgpu_chip_flags.h @@ -621,25 +621,6 @@ ENTRY(0x2238, 0x16B8, 0x10de, "NVIDIA A10M-10C"), ENTRY(0x2238, 0x16B9, 0x10de, "NVIDIA A10M-20C"), ENTRY(0x2238, 0x16E6, 0x10de, "NVIDIA A10M-1"), ENTRY(0x2238, 0x2208, 0x10de, "NVIDIA A10M-3B"), -ENTRY(0x230E, 0x20F5, 0x10de, "NVIDIA H20L-1-15CME"), -ENTRY(0x230E, 0x20F6, 0x10de, "NVIDIA H20L-1-15C"), -ENTRY(0x230E, 0x20F7, 0x10de, "NVIDIA H20L-1-30C"), -ENTRY(0x230E, 0x20F8, 0x10de, "NVIDIA H20L-2-30C"), -ENTRY(0x230E, 0x20F9, 0x10de, "NVIDIA H20L-3-60C"), -ENTRY(0x230E, 0x20FA, 0x10de, "NVIDIA H20L-4-60C"), -ENTRY(0x230E, 0x20FB, 0x10de, "NVIDIA H20L-7-120C"), -ENTRY(0x230E, 0x20FC, 0x10de, "NVIDIA H20L-4C"), -ENTRY(0x230E, 0x20FD, 0x10de, "NVIDIA H20L-5C"), -ENTRY(0x230E, 0x20FE, 0x10de, "NVIDIA H20L-6C"), -ENTRY(0x230E, 0x20FF, 0x10de, "NVIDIA H20L-8C"), -ENTRY(0x230E, 0x2100, 0x10de, "NVIDIA H20L-10C"), -ENTRY(0x230E, 0x2101, 0x10de, "NVIDIA H20L-12C"), -ENTRY(0x230E, 0x2102, 0x10de, "NVIDIA H20L-15C"), -ENTRY(0x230E, 0x2103, 0x10de, "NVIDIA H20L-20C"), -ENTRY(0x230E, 0x2104, 0x10de, "NVIDIA H20L-30C"), -ENTRY(0x230E, 0x2105, 0x10de, "NVIDIA H20L-40C"), -ENTRY(0x230E, 0x2106, 0x10de, "NVIDIA H20L-60C"), -ENTRY(0x230E, 0x2107, 0x10de, "NVIDIA H20L-120C"), ENTRY(0x2321, 0x1853, 0x10de, "NVIDIA H100L-1-12CME"), ENTRY(0x2321, 0x1854, 0x10de, "NVIDIA H100L-1-12C"), ENTRY(0x2321, 0x1855, 0x10de, "NVIDIA H100L-1-24C"), diff --git a/src/common/shared/inc/g_vgpu_resman_specific.h b/src/common/shared/inc/g_vgpu_resman_specific.h index eeca3a11e..ad84bab6b 100644 --- a/src/common/shared/inc/g_vgpu_resman_specific.h +++ b/src/common/shared/inc/g_vgpu_resman_specific.h @@ -17,7 +17,6 @@ static inline void _get_chip_id_for_alias_pgpu(NvU32 *dev_id, NvU32 *subdev_id) { 0x20B7, 0x1804, 0x20B7, 0x1532 }, { 0x20B9, 0x157F, 0x20B7, 0x1532 }, { 0x20FD, 0x17F8, 0x20F5, 0x0 }, - { 0x230E, 0x20DF, 0x230E, 0x20DF }, { 0x2324, 0x17A8, 0x2324, 0x17A6 }, { 0x2329, 0x198C, 0x2329, 0x198B }, { 0x232C, 0x2064, 0x232C, 0x2063 }, @@ -122,13 +121,6 @@ static const struct { {0x20F610DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU , 1094}, // GRID A800-4-20C {0x20F610DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU , 1095}, // GRID A800-7-40C {0x20F610DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU , 1091}, // GRID A800-1-10C - {0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1499}, // NVIDIA H20L-1-15CME - {0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU , 1500}, // NVIDIA H20L-1-15C - {0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU , 1501}, // NVIDIA H20L-1-30C - {0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_QUARTER_GPU , 1502}, // NVIDIA H20L-2-30C - {0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU , 1503}, // NVIDIA H20L-3-60C - {0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU , 1504}, // NVIDIA H20L-4-60C - {0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU , 1505}, // NVIDIA H20L-7-120C {0x232110DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1061}, // NVIDIA H100L-1-12CME {0x232110DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU , 1062}, // NVIDIA H100L-1-12C {0x232110DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU , 1063}, // NVIDIA H100L-1-24C diff --git a/src/common/shared/msgq/inc/msgq/msgq.h b/src/common/shared/msgq/inc/msgq/msgq.h index ee0d6f05a..d81bbd235 100644 --- a/src/common/shared/msgq/inc/msgq/msgq.h +++ b/src/common/shared/msgq/inc/msgq/msgq.h @@ -68,8 +68,8 @@ typedef void (*msgqFcnBarrier)(void); // Function to access backend memory (if it's not memory mapped). // Keep in mind than when using it, pointers given by peek can't be trusted // Should return 0 on success. -typedef int (*msgqFcnBackendRw)(void *pDest, const void *pSrc, unsigned size, - unsigned flags, void *pArg); +typedef unsigned (*msgqFcnBackendRw)(void *pDest, const void *pSrc, unsigned size, + unsigned flags, void *pArg); /** * @brief Return size of metadata (that must be allocated) diff --git a/src/common/shared/msgq/msgq.c b/src/common/shared/msgq/msgq.c index 84714b318..76d9dd361 100644 --- a/src/common/shared/msgq/msgq.c +++ b/src/common/shared/msgq/msgq.c @@ -104,35 +104,45 @@ msgqSetBarrier(msgqHandle handle, msgqFcnBarrier fcn) /* * Helper functions to access indirect backend. */ - -sysSHARED_CODE static void +// TODO: Make these funcions return NV_STATUS instead of int wherever possible. +sysSHARED_CODE static int _backendRead32(msgqMetadata *pQueue, volatile const void *pAddr, NvU32 *pVal, unsigned flags) { if (pQueue->fcnBackendRw != NULL) { - pQueue->fcnBackendRw(pVal, (const void *)pAddr, sizeof(*pVal), - flags | FCN_FLAG_BACKEND_ACCESS_READ, - pQueue->fcnBackendRwArg); + int status = pQueue->fcnBackendRw(pVal, (const void *)pAddr, sizeof(*pVal), + flags | FCN_FLAG_BACKEND_ACCESS_READ, + pQueue->fcnBackendRwArg); + if (status != 0) + { + return -1; + } } else { *pVal = *(volatile const NvU32*)pAddr; } + return 0; } -sysSHARED_CODE static void +sysSHARED_CODE static int _backendWrite32(msgqMetadata *pQueue, volatile void *pAddr, NvU32 *pVal, unsigned flags) { if (pQueue->fcnBackendRw != NULL) { - pQueue->fcnBackendRw((void*)pAddr, pVal, sizeof(*pVal), - flags | FCN_FLAG_BACKEND_ACCESS_WRITE, - pQueue->fcnBackendRwArg); + int status = pQueue->fcnBackendRw((void*)pAddr, pVal, sizeof(*pVal), + flags | FCN_FLAG_BACKEND_ACCESS_WRITE, + pQueue->fcnBackendRwArg); + if (status != 0) + { + return -1; + } } else { *(volatile NvU32*)pAddr = *pVal; } + return 0; } /** @@ -142,7 +152,7 @@ _backendWrite32(msgqMetadata *pQueue, volatile void *pAddr, NvU32 *pVal, unsigne sysSHARED_CODE static void msgqRiscvDefaultBarrier(void) { - asm volatile("fence iorw,iorw"); + __asm__ volatile("fence iorw,iorw"); } #endif @@ -188,6 +198,7 @@ msgqTxCreate { msgqMetadata *pQueue = (msgqMetadata*)handle; msgqTxHeader *pTx; + int status; if ((pQueue == NULL) || pQueue->txLinked) { @@ -282,10 +293,15 @@ msgqTxCreate // Indirect access to backend if (pQueue->fcnBackendRw != NULL) { - pQueue->fcnBackendRw(pTx, &pQueue->tx, sizeof *pTx, - FCN_FLAG_BACKEND_ACCESS_WRITE | FCN_FLAG_BACKEND_QUEUE_TX, - pQueue->fcnBackendRwArg); - } else + status = pQueue->fcnBackendRw(pTx, &pQueue->tx, sizeof *pTx, + FCN_FLAG_BACKEND_ACCESS_WRITE | FCN_FLAG_BACKEND_QUEUE_TX, + pQueue->fcnBackendRwArg); + if (status != 0) + { + return -1; + } + } + else { memcpy(pTx, &pQueue->tx, sizeof *pTx); } @@ -315,6 +331,7 @@ sysSHARED_CODE int msgqRxLink(msgqHandle handle, const void *pBackingStore, unsigned size, unsigned msgSize) { msgqMetadata *pQueue = (msgqMetadata*)handle; + int status; if ((pQueue == NULL) || pQueue->rxLinked) { @@ -347,10 +364,14 @@ msgqRxLink(msgqHandle handle, const void *pBackingStore, unsigned size, unsigned // copy their metadata if (pQueue->fcnBackendRw != NULL) { - pQueue->fcnBackendRw(&pQueue->rx, (const void *)pQueue->pTheirTxHdr, - sizeof pQueue->rx, - FCN_FLAG_BACKEND_ACCESS_READ | FCN_FLAG_BACKEND_QUEUE_RX, - pQueue->fcnBackendRwArg); + status = pQueue->fcnBackendRw(&pQueue->rx, (const void *)pQueue->pTheirTxHdr, + sizeof pQueue->rx, + FCN_FLAG_BACKEND_ACCESS_READ | FCN_FLAG_BACKEND_QUEUE_RX, + pQueue->fcnBackendRwArg); + if (status != 0) + { + return -11; + } } else { @@ -413,8 +434,13 @@ msgqRxLink(msgqHandle handle, const void *pBackingStore, unsigned size, unsigned } pQueue->rxReadPtr = 0; - _backendWrite32(pQueue, pQueue->pReadOutgoing, &pQueue->rxReadPtr, - pQueue->rxSwapped ? FCN_FLAG_BACKEND_QUEUE_TX : FCN_FLAG_BACKEND_QUEUE_RX); + status = _backendWrite32(pQueue, pQueue->pReadOutgoing, &pQueue->rxReadPtr, + pQueue->rxSwapped ? FCN_FLAG_BACKEND_QUEUE_TX : FCN_FLAG_BACKEND_QUEUE_RX); + if (status != 0) + { + return -12; + } + if (pQueue->fcnFlush != NULL) { pQueue->fcnFlush(pQueue->pReadOutgoing, sizeof(NvU32)); @@ -451,8 +477,12 @@ msgqTxGetFreeSpace(msgqHandle handle) return 0; } - _backendRead32(pQueue, pQueue->pReadIncoming, &pQueue->txReadPtr, - pQueue->rxSwapped ? FCN_FLAG_BACKEND_QUEUE_RX : FCN_FLAG_BACKEND_QUEUE_TX); + if (_backendRead32(pQueue, pQueue->pReadIncoming, &pQueue->txReadPtr, + pQueue->rxSwapped ? FCN_FLAG_BACKEND_QUEUE_RX : FCN_FLAG_BACKEND_QUEUE_TX) != 0) + { + return 0; + } + if (pQueue->txReadPtr >= pQueue->tx.msgCount) { return 0; @@ -505,6 +535,7 @@ sysSHARED_CODE int msgqTxSubmitBuffers(msgqHandle handle, unsigned n) { msgqMetadata *pQueue = (msgqMetadata*)handle; + int status; if ((pQueue == NULL) || !pQueue->txLinked) { @@ -531,8 +562,19 @@ msgqTxSubmitBuffers(msgqHandle handle, unsigned n) pQueue->tx.writePtr -= pQueue->tx.msgCount; } - _backendWrite32(pQueue, pQueue->pWriteOutgoing, - &pQueue->tx.writePtr, FCN_FLAG_BACKEND_QUEUE_TX); + status = _backendWrite32(pQueue, pQueue->pWriteOutgoing, + &pQueue->tx.writePtr, FCN_FLAG_BACKEND_QUEUE_TX); + if (status != 0) + { + // restore write pointer + if (pQueue->tx.writePtr < n) + { + pQueue->tx.writePtr += pQueue->tx.msgCount; + } + + pQueue->tx.writePtr -= n; + return -2; + } // Adjust cached value for number of free elements. pQueue->txFree -= n; @@ -606,7 +648,11 @@ msgqRxGetReadAvailable(msgqHandle handle) return 0; } - _backendRead32(pQueue, pQueue->pWriteIncoming, &pQueue->rx.writePtr, FCN_FLAG_BACKEND_QUEUE_RX); + if (_backendRead32(pQueue, pQueue->pWriteIncoming, &pQueue->rx.writePtr, FCN_FLAG_BACKEND_QUEUE_RX) != 0) + { + return 0; + } + if (pQueue->rx.writePtr >= pQueue->rx.msgCount) { return 0; @@ -659,6 +705,7 @@ sysSHARED_CODE int msgqRxMarkConsumed(msgqHandle handle, unsigned n) { msgqMetadata *pQueue = (msgqMetadata*)handle; + int status; if ((pQueue == NULL) || !pQueue->rxLinked) { @@ -679,8 +726,19 @@ msgqRxMarkConsumed(msgqHandle handle, unsigned n) } // Copy to backend - _backendWrite32(pQueue, pQueue->pReadOutgoing, &pQueue->rxReadPtr, - pQueue->rxSwapped ? FCN_FLAG_BACKEND_QUEUE_TX : FCN_FLAG_BACKEND_QUEUE_RX); + status = _backendWrite32(pQueue, pQueue->pReadOutgoing, &pQueue->rxReadPtr, + pQueue->rxSwapped ? FCN_FLAG_BACKEND_QUEUE_TX : FCN_FLAG_BACKEND_QUEUE_RX); + if (status != 0) + { + // restore read pointer + if (pQueue->rxReadPtr < n) + { + pQueue->rxReadPtr += pQueue->rx.msgCount; + } + + pQueue->rxReadPtr -= n; + return -2; + } // Adjust cached value for number of available elements. pQueue->rxAvail -= n; diff --git a/src/nvidia-modeset/src/nvkms-dpy.c b/src/nvidia-modeset/src/nvkms-dpy.c index dcbc4085a..bbdf4df50 100644 --- a/src/nvidia-modeset/src/nvkms-dpy.c +++ b/src/nvidia-modeset/src/nvkms-dpy.c @@ -909,6 +909,29 @@ void nvDpyProbeMaxPixelClock(NVDpyEvoPtr pDpyEvo) pDpyEvo->maxPixelClockKHz = ((4 * 12 * 1000 * 1000 * 16) / 18); } + } else { + const NVParsedEdidEvoRec *pParsedEdid = &pDpyEvo->parsedEdid; + + if (pParsedEdid->valid) { + const NVT_EDID_INFO *pEdidInfo = &pParsedEdid->info; + /* Default Maximum HDMI TMDS character rate is 165MHz. */ + NvU32 maxTmdsCharRate = 33; + + if (pEdidInfo->ext861.valid.H20_HF_VSDB && + (pEdidInfo->hdmiForumInfo.max_TMDS_char_rate > 0)) { + maxTmdsCharRate = + NV_MIN(pEdidInfo->hdmiForumInfo.max_TMDS_char_rate, 120); + } else if (pEdidInfo->ext861.valid.H14B_VSDB && + (pEdidInfo->hdmiLlcInfo.max_tmds_clock > 0)) { + maxTmdsCharRate = + NV_MIN(pEdidInfo->hdmiLlcInfo.max_tmds_clock, 68); + } + + /* Max Pixel Rate = Max TMDS character Rate * 5MHz */ + pDpyEvo->maxPixelClockKHz = + pDpyEvo->maxSingleLinkPixelClockKHz = + maxTmdsCharRate * 5000; + } } } else { /* diff --git a/src/nvidia-modeset/src/nvkms-hdmi.c b/src/nvidia-modeset/src/nvkms-hdmi.c index 23c03c21a..e7f2ca230 100644 --- a/src/nvidia-modeset/src/nvkms-hdmi.c +++ b/src/nvidia-modeset/src/nvkms-hdmi.c @@ -2036,7 +2036,13 @@ NvBool nvHdmiDpySupportsFrl(const NVDpyEvoRec *pDpyEvo) { const NVDevEvoRec *pDevEvo = pDpyEvo->pDispEvo->pDevEvo; - nvAssert(nvDpyIsHdmiEvo(pDpyEvo)); + /* + * Can't use FRL if HDMI is not supported by the GPU and the monitor + * connection. + */ + if (!nvDpyIsHdmiEvo(pDpyEvo)) { + return FALSE; + } /* Can't use FRL if disabled by kernel module param. */ if (nvkms_disable_hdmi_frl()) { @@ -2102,9 +2108,6 @@ NvBool nvHdmiIsTmdsPossible(const NVDpyEvoRec *pDpyEvo, pDpyEvo->pDispEvo->pDevEvo->caps.hdmiTmds10BpcMaxPClkMHz * 1000UL; NvU32 adjustedMaxPixelClock = (pDpyEvo->maxSingleLinkPixelClockKHz * 4ULL) / 5ULL; - NvU32 adjustedMaxEDIDPixelClock = - pDpyEvo->parsedEdid.valid ? - (pDpyEvo->parsedEdid.limits.max_pclk_10khz * 10 * 4ULL) / 5ULL : 0; /* Pixel clock must satisfy hdmiTmds10BpcMaxPClkKHz, if applicable. */ if ((hdmiTmds10BpcMaxPClkKHz > 0) && @@ -2117,12 +2120,6 @@ NvBool nvHdmiIsTmdsPossible(const NVDpyEvoRec *pDpyEvo, return FALSE; } - /* Pixel clock must also satisfy adjustedMaxEDIDPixelClock. */ - if (adjustedMaxEDIDPixelClock != 0 && - pixelClock > adjustedMaxEDIDPixelClock) { - return FALSE; - } - return TRUE; } diff --git a/src/nvidia-modeset/src/nvkms-modepool.c b/src/nvidia-modeset/src/nvkms-modepool.c index 57c7b18d9..75ef6fb4e 100644 --- a/src/nvidia-modeset/src/nvkms-modepool.c +++ b/src/nvidia-modeset/src/nvkms-modepool.c @@ -1214,43 +1214,66 @@ static NvBool ValidateModeTimings( } } - /* reject modes with too high pclk */ + /* + * Reject modes with too high pclk, except when using HDMI FRL or + * DisplayPort. FRL and DP have features like DSC that cannot be trivially + * checked against a pixel clock rate limit. Instead: + * + * - DPlib will perform link assessment to determine whether both the + * monitor and GPU can drive a particular bandwidth. + * + * - hdmipacket will perform the equivalent for FRL. + * + * TMDS will only be considered on a connection capable of HDMI FRL for the + * mode being validated if nvHdmiIsTmdsPossible returns TRUE in the + * following callpath: + * + * ValidateMode + * |_ ValidateModeTimings + * |_ nvConstructHwModeTimingsEvo + * |_ GetDfpProtocol + * |_ GetDfpHdmiProtocol + * |_ nvHdmiIsTmdsPossible + */ - if ((overrides & NVKMS_MODE_VALIDATION_NO_MAX_PCLK_CHECK) == 0) { + if (!(nvHdmiDpySupportsFrl(pDpyEvo) || + nvConnectorUsesDPLib(pDpyEvo->pConnectorEvo))) { + if ((overrides & NVKMS_MODE_VALIDATION_NO_MAX_PCLK_CHECK) == 0) { - NvU32 maxPixelClockKHz = pDpyEvo->maxPixelClockKHz; - NvU32 realPixelClock = HzToKHz(pModeTimings->pixelClockHz); - if (pModeTimings->yuv420Mode != NV_YUV420_MODE_NONE) { - realPixelClock /= 2; - } - - if (realPixelClock > maxPixelClockKHz) { - NvU32 hdmi3DPixelClock = realPixelClock; - - if (pModeTimings->hdmi3D) { - hdmi3DPixelClock /= 2; + NvU32 maxPixelClockKHz = pDpyEvo->maxPixelClockKHz; + NvU32 realPixelClock = HzToKHz(pModeTimings->pixelClockHz); + if (pModeTimings->yuv420Mode != NV_YUV420_MODE_NONE) { + realPixelClock /= 2; } - if (is3DVisionStereo && - pDpyEvo->stereo3DVision.requiresModetimingPatching && - (realPixelClock - maxPixelClockKHz < 5000)) { + if (realPixelClock > maxPixelClockKHz) { + NvU32 hdmi3DPixelClock = realPixelClock; - nvAssert(!pModeTimings->hdmi3D); + if (pModeTimings->hdmi3D) { + hdmi3DPixelClock /= 2; + } - nvEvoLogInfoString(pInfoString, - "PixelClock (" NV_FMT_DIV_1000_POINT_1 " MHz) is slightly higher than Display Device maximum (" NV_FMT_DIV_1000_POINT_1 " MHz), but is within tolerance for 3D Vision Stereo.", - NV_VA_DIV_1000_POINT_1(realPixelClock), - NV_VA_DIV_1000_POINT_1(maxPixelClockKHz)); + if (is3DVisionStereo && + pDpyEvo->stereo3DVision.requiresModetimingPatching && + (realPixelClock - maxPixelClockKHz < 5000)) { - } else { + nvAssert(!pModeTimings->hdmi3D); - LogModeValidationEnd(pDispEvo, pInfoString, - "PixelClock (" NV_FMT_DIV_1000_POINT_1 " MHz%s) too high for Display Device (Max: " NV_FMT_DIV_1000_POINT_1 " MHz)", - NV_VA_DIV_1000_POINT_1(hdmi3DPixelClock), - pModeTimings->hdmi3D ? - ", doubled for HDMI 3D" : "", - NV_VA_DIV_1000_POINT_1(maxPixelClockKHz)); - return FALSE; + nvEvoLogInfoString(pInfoString, + "PixelClock (" NV_FMT_DIV_1000_POINT_1 " MHz) is slightly higher than Display Device maximum (" NV_FMT_DIV_1000_POINT_1 " MHz), but is within tolerance for 3D Vision Stereo.", + NV_VA_DIV_1000_POINT_1(realPixelClock), + NV_VA_DIV_1000_POINT_1(maxPixelClockKHz)); + + } else { + + LogModeValidationEnd(pDispEvo, pInfoString, + "PixelClock (" NV_FMT_DIV_1000_POINT_1 " MHz%s) too high for Display Device (Max: " NV_FMT_DIV_1000_POINT_1 " MHz)", + NV_VA_DIV_1000_POINT_1(hdmi3DPixelClock), + pModeTimings->hdmi3D ? + ", doubled for HDMI 3D" : "", + NV_VA_DIV_1000_POINT_1(maxPixelClockKHz)); + return FALSE; + } } } } diff --git a/src/nvidia-modeset/src/nvkms-vrr.c b/src/nvidia-modeset/src/nvkms-vrr.c index 4c8e26a71..3e2f8976d 100644 --- a/src/nvidia-modeset/src/nvkms-vrr.c +++ b/src/nvidia-modeset/src/nvkms-vrr.c @@ -254,6 +254,17 @@ nvGetAllowedDpyVrrType(const NVDpyEvoRec *pDpyEvo, const NvBool allowGsync, const enum NvKmsAllowAdaptiveSync allowAdaptiveSync) { + + if (nvDpyIsHdmiEvo(pDpyEvo)) { + /* + * Do not allow HDMI VRR if refresh rate less than + * 50Hz or Vactive < 720. + */ + if ((pTimings->vVisible < 720) || (pTimings->RRx1k < 50000)) { + return NVKMS_DPY_VRR_TYPE_NONE; + } + } + /* * Mark these mode timings as indicating a VRR mode, even if the timings * don't need to be adjusted; this is used to distinguish between VRR and diff --git a/src/nvidia/arch/nvalloc/unix/include/os-interface.h b/src/nvidia/arch/nvalloc/unix/include/os-interface.h index 6eb955964..84842e17a 100644 --- a/src/nvidia/arch/nvalloc/unix/include/os-interface.h +++ b/src/nvidia/arch/nvalloc/unix/include/os-interface.h @@ -62,6 +62,11 @@ struct os_work_queue; /* Each OS defines its own version of this opaque type */ typedef struct os_wait_queue os_wait_queue; +/* Flags needed by os_get_current_proccess_flags */ +#define OS_CURRENT_PROCESS_FLAG_NONE 0x0 +#define OS_CURRENT_PROCESS_FLAG_KERNEL_THREAD 0x1 +#define OS_CURRENT_PROCESS_FLAG_EXITING 0x2 + /* * --------------------------------------------------------------------------- * @@ -190,6 +195,7 @@ NV_STATUS NV_API_CALL os_open_readonly_file (const char *, void ** NV_STATUS NV_API_CALL os_open_and_read_file (const char *, NvU8 *, NvU64); NvBool NV_API_CALL os_is_nvswitch_present (void); NV_STATUS NV_API_CALL os_get_random_bytes (NvU8 *, NvU16); +NvU32 NV_API_CALL os_get_current_process_flags (void); NV_STATUS NV_API_CALL os_alloc_wait_queue (os_wait_queue **); void NV_API_CALL os_free_wait_queue (os_wait_queue *); void NV_API_CALL os_wait_uninterruptible (os_wait_queue *); diff --git a/src/nvidia/arch/nvalloc/unix/src/os.c b/src/nvidia/arch/nvalloc/unix/src/os.c index 2c1a89c22..2379c382d 100644 --- a/src/nvidia/arch/nvalloc/unix/src/os.c +++ b/src/nvidia/arch/nvalloc/unix/src/os.c @@ -5074,6 +5074,18 @@ osGetRandomBytes return os_get_random_bytes(pBytes, numBytes); } +/* + * @brief Get current process flags.. + */ +NvU32 +osGetCurrentProcessFlags +( + void +) +{ + return os_get_current_process_flags(); +} + /* * @brief Allocate wait queue * diff --git a/src/nvidia/generated/g_kern_mem_sys_nvoc.c b/src/nvidia/generated/g_kern_mem_sys_nvoc.c index 6e241eae1..1f0400e95 100644 --- a/src/nvidia/generated/g_kern_mem_sys_nvoc.c +++ b/src/nvidia/generated/g_kern_mem_sys_nvoc.c @@ -661,8 +661,7 @@ static void __nvoc_init_funcTable_KernelMemorySystem_1(KernelMemorySystem *pThis } // kmemsysNeedInvalidateGpuCacheOnMap -- halified (2 hals) body - if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x71f0ffe0UL) ) || - ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000003e6UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 */ + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000003e0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 */ { pThis->__kmemsysNeedInvalidateGpuCacheOnMap__ = &kmemsysNeedInvalidateGpuCacheOnMap_GV100; } @@ -673,9 +672,9 @@ static void __nvoc_init_funcTable_KernelMemorySystem_1(KernelMemorySystem *pThis } // kmemsysNeedInvalidateGpuCacheOnUnmap -- halified (2 hals) body - if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x80000000UL) ) || - ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000c00UL) ) || - ( ((chipHal_HalVarIdx >> 5) == 3UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00005000UL) )) /* ChipHal: GB10B | GB20B | GB20C | T234D | T264D */ + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xf1f0fc00UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe6UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 3UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00005000UL) )) /* ChipHal: GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB10B | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C | T234D | T264D */ { pThis->__kmemsysNeedInvalidateGpuCacheOnUnmap__ = &kmemsysNeedInvalidateGpuCacheOnUnmap_T194; } diff --git a/src/nvidia/generated/g_nv_name_released.h b/src/nvidia/generated/g_nv_name_released.h index bc9d4a5ed..a0fcfd284 100644 --- a/src/nvidia/generated/g_nv_name_released.h +++ b/src/nvidia/generated/g_nv_name_released.h @@ -5214,6 +5214,7 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x2236, 0x1482, 0x10de, "NVIDIA A10" }, { 0x2237, 0x152f, 0x10de, "NVIDIA A10G" }, { 0x2238, 0x1677, 0x10de, "NVIDIA A10M" }, + { 0x230E, 0x20df, 0x10de, "NVIDIA H20 NVL16" }, { 0x2321, 0x1839, 0x10de, "NVIDIA H100 NVL" }, { 0x2322, 0x17a4, 0x10de, "NVIDIA H800 PCIe" }, { 0x2324, 0x17a6, 0x10de, "NVIDIA H800" }, @@ -5414,9 +5415,10 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x2941, 0x20d5, 0x10de, "NVIDIA GB200" }, { 0x2941, 0x21c9, 0x10de, "NVIDIA GB200" }, { 0x2941, 0x21ca, 0x10de, "NVIDIA GB200" }, + { 0x29BB, 0x207c, 0x10de, "NVIDIA DRIVE P2021" }, { 0x2B85, 0x0000, 0x0000, "NVIDIA GeForce RTX 5090" }, { 0x2B87, 0x0000, 0x0000, "NVIDIA GeForce RTX 5090 D" }, - { 0x2B8C, 0x530c, 0x17aa, "NVIDIA GeForce RTX 5090 D v2" }, + { 0x2B8C, 0x0000, 0x0000, "NVIDIA GeForce RTX 5090 D v2" }, { 0x2BB1, 0x204b, 0x1028, "NVIDIA RTX PRO 6000 Blackwell Workstation Edition" }, { 0x2BB1, 0x204b, 0x103c, "NVIDIA RTX PRO 6000 Blackwell Workstation Edition" }, { 0x2BB1, 0x204b, 0x10de, "NVIDIA RTX PRO 6000 Blackwell Workstation Edition" }, @@ -5429,6 +5431,8 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x2BB4, 0x204c, 0x103c, "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition" }, { 0x2BB4, 0x204c, 0x10de, "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition" }, { 0x2BB4, 0x204c, 0x17aa, "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition" }, + { 0x2BB5, 0x204e, 0x10de, "NVIDIA RTX PRO 6000 Blackwell Server Edition" }, + { 0x2BB9, 0x2091, 0x10de, "NVIDIA RTX 6000D" }, { 0x2C02, 0x0000, 0x0000, "NVIDIA GeForce RTX 5080" }, { 0x2C05, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070 Ti" }, { 0x2C18, 0x0000, 0x0000, "NVIDIA GeForce RTX 5090 Laptop GPU" }, @@ -5439,6 +5443,7 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x2C31, 0x2051, 0x17aa, "NVIDIA RTX PRO 4500 Blackwell" }, { 0x2C33, 0x2053, 0x1028, "NVIDIA RTX PRO 4000 Blackwell SFF Edition" }, { 0x2C33, 0x2053, 0x103c, "NVIDIA RTX PRO 4000 Blackwell SFF Edition" }, + { 0x2C33, 0x2053, 0x10de, "NVIDIA RTX PRO 4000 Blackwell SFF Edition" }, { 0x2C33, 0x2053, 0x17aa, "NVIDIA RTX PRO 4000 Blackwell SFF Edition" }, { 0x2C34, 0x2052, 0x1028, "NVIDIA RTX PRO 4000 Blackwell" }, { 0x2C34, 0x2052, 0x103c, "NVIDIA RTX PRO 4000 Blackwell" }, @@ -5448,25 +5453,32 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x2C39, 0x0000, 0x0000, "NVIDIA RTX PRO 4000 Blackwell Generation Laptop GPU" }, { 0x2C58, 0x0000, 0x0000, "NVIDIA GeForce RTX 5090 Laptop GPU" }, { 0x2C59, 0x0000, 0x0000, "NVIDIA GeForce RTX 5080 Laptop GPU" }, + { 0x2C77, 0x0000, 0x0000, "NVIDIA RTX PRO 5000 Blackwell Embedded GPU" }, + { 0x2C79, 0x0000, 0x0000, "NVIDIA RTX PRO 4000 Blackwell Embedded GPU" }, { 0x2D04, 0x0000, 0x0000, "NVIDIA GeForce RTX 5060 Ti" }, { 0x2D05, 0x0000, 0x0000, "NVIDIA GeForce RTX 5060" }, { 0x2D18, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070 Laptop GPU" }, { 0x2D19, 0x0000, 0x0000, "NVIDIA GeForce RTX 5060 Laptop GPU" }, { 0x2D30, 0x2054, 0x1028, "NVIDIA RTX PRO 2000 Blackwell" }, { 0x2D30, 0x2054, 0x103c, "NVIDIA RTX PRO 2000 Blackwell" }, + { 0x2D30, 0x2054, 0x10de, "NVIDIA RTX PRO 2000 Blackwell" }, { 0x2D30, 0x2054, 0x17aa, "NVIDIA RTX PRO 2000 Blackwell" }, { 0x2D39, 0x0000, 0x0000, "NVIDIA RTX PRO 2000 Blackwell Generation Laptop GPU" }, { 0x2D58, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070 Laptop GPU" }, { 0x2D59, 0x0000, 0x0000, "NVIDIA GeForce RTX 5060 Laptop GPU" }, - { 0x2D83, 0xc791, 0x17aa, "NVIDIA GeForce RTX 5050" }, + { 0x2D79, 0x0000, 0x0000, "NVIDIA RTX PRO 2000 Blackwell Embedded GPU" }, + { 0x2D83, 0x0000, 0x0000, "NVIDIA GeForce RTX 5050" }, { 0x2D98, 0x0000, 0x0000, "NVIDIA GeForce RTX 5050 Laptop GPU" }, { 0x2DB8, 0x0000, 0x0000, "NVIDIA RTX PRO 1000 Blackwell Generation Laptop GPU" }, { 0x2DB9, 0x0000, 0x0000, "NVIDIA RTX PRO 500 Blackwell Generation Laptop GPU" }, { 0x2DD8, 0x0000, 0x0000, "NVIDIA GeForce RTX 5050 Laptop GPU" }, + { 0x2DF9, 0x0000, 0x0000, "NVIDIA RTX PRO 500 Blackwell Embedded GPU" }, { 0x2F04, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070" }, { 0x2F18, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070 Ti Laptop GPU" }, { 0x2F38, 0x0000, 0x0000, "NVIDIA RTX PRO 3000 Blackwell Generation Laptop GPU" }, { 0x2F58, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070 Ti Laptop GPU" }, + { 0x3182, 0x20e6, 0x10de, "NVIDIA B300 SXM6 AC" }, + { 0x31C2, 0x21f1, 0x10de, "NVIDIA GB300" }, { 0x13BD, 0x11cc, 0x10DE, "GRID M10-0B" }, { 0x13BD, 0x11cd, 0x10DE, "GRID M10-1B" }, { 0x13BD, 0x11ce, 0x10DE, "GRID M10-0Q" }, @@ -6067,25 +6079,6 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x2238, 0x16b9, 0x10DE, "NVIDIA A10M-20C" }, { 0x2238, 0x16e6, 0x10DE, "NVIDIA A10M-1" }, { 0x2238, 0x2208, 0x10DE, "NVIDIA A10M-3B" }, - { 0x230E, 0x20f5, 0x10DE, "NVIDIA H20L-1-15CME" }, - { 0x230E, 0x20f6, 0x10DE, "NVIDIA H20L-1-15C" }, - { 0x230E, 0x20f7, 0x10DE, "NVIDIA H20L-1-30C" }, - { 0x230E, 0x20f8, 0x10DE, "NVIDIA H20L-2-30C" }, - { 0x230E, 0x20f9, 0x10DE, "NVIDIA H20L-3-60C" }, - { 0x230E, 0x20fa, 0x10DE, "NVIDIA H20L-4-60C" }, - { 0x230E, 0x20fb, 0x10DE, "NVIDIA H20L-7-120C" }, - { 0x230E, 0x20fc, 0x10DE, "NVIDIA H20L-4C" }, - { 0x230E, 0x20fd, 0x10DE, "NVIDIA H20L-5C" }, - { 0x230E, 0x20fe, 0x10DE, "NVIDIA H20L-6C" }, - { 0x230E, 0x20ff, 0x10DE, "NVIDIA H20L-8C" }, - { 0x230E, 0x2100, 0x10DE, "NVIDIA H20L-10C" }, - { 0x230E, 0x2101, 0x10DE, "NVIDIA H20L-12C" }, - { 0x230E, 0x2102, 0x10DE, "NVIDIA H20L-15C" }, - { 0x230E, 0x2103, 0x10DE, "NVIDIA H20L-20C" }, - { 0x230E, 0x2104, 0x10DE, "NVIDIA H20L-30C" }, - { 0x230E, 0x2105, 0x10DE, "NVIDIA H20L-40C" }, - { 0x230E, 0x2106, 0x10DE, "NVIDIA H20L-60C" }, - { 0x230E, 0x2107, 0x10DE, "NVIDIA H20L-120C" }, { 0x2321, 0x1853, 0x10DE, "NVIDIA H100L-1-12CME" }, { 0x2321, 0x1854, 0x10DE, "NVIDIA H100L-1-12C" }, { 0x2321, 0x1855, 0x10DE, "NVIDIA H100L-1-24C" }, diff --git a/src/nvidia/generated/g_os_nvoc.h b/src/nvidia/generated/g_os_nvoc.h index 08c3d1a3a..2c08adfd2 100644 --- a/src/nvidia/generated/g_os_nvoc.h +++ b/src/nvidia/generated/g_os_nvoc.h @@ -215,6 +215,11 @@ typedef struct RM_PAGEABLE_SECTION { #define OS_ALLOC_PAGES_NODE_NONE 0x0 #define OS_ALLOC_PAGES_NODE_SKIP_RECLAIM 0x1 +// Flags needed by osGetCurrentProccessFlags +#define OS_CURRENT_PROCESS_FLAG_NONE 0x0 +#define OS_CURRENT_PROCESS_FLAG_KERNEL_THREAD 0x1 +#define OS_CURRENT_PROCESS_FLAG_EXITING 0x2 + // // Structures for osPackageRegistry and osUnpackageRegistry // @@ -737,6 +742,8 @@ NvS32 osImexChannelCount(void); NV_STATUS osGetRandomBytes(NvU8 *pBytes, NvU16 numBytes); +NvU32 osGetCurrentProcessFlags(void); + NV_STATUS osAllocWaitQueue(OS_WAIT_QUEUE **ppWq); void osFreeWaitQueue(OS_WAIT_QUEUE *pWq); void osWaitUninterruptible(OS_WAIT_QUEUE *pWq); diff --git a/src/nvidia/generated/g_sysmem_scrub_nvoc.h b/src/nvidia/generated/g_sysmem_scrub_nvoc.h index f5437a5af..c91ad877e 100644 --- a/src/nvidia/generated/g_sysmem_scrub_nvoc.h +++ b/src/nvidia/generated/g_sysmem_scrub_nvoc.h @@ -78,14 +78,21 @@ typedef struct { MEMORY_DESCRIPTOR *pMemDesc; NvU64 semaphoreValue; + NODE listNode; } SysScrubEntry; -MAKE_LIST(SysScrubList, SysScrubEntry); +MAKE_INTRUSIVE_LIST(SysScrubList, SysScrubEntry, listNode); typedef struct { + // semaphore event handle doesn't take GPU lock + PORT_SPINLOCK *pSpinlock; + + // spinlock needs to be taken to use pSysmemScrubber struct SysmemScrubber *pSysmemScrubber; + NvU32 refCount; + NvU32 bWorkerQueued; } SysmemScrubberWorkerParams; @@ -124,7 +131,6 @@ struct SysmemScrubber { struct CeUtils *pCeUtils; SysScrubList asyncScrubList; NvBool bAsync; - NvBool bCallbackQueued; SysmemScrubberWorkerParams *pWorkerParams; }; diff --git a/src/nvidia/inc/kernel/core/thread_state.h b/src/nvidia/inc/kernel/core/thread_state.h index 1f721c8ed..01ac1e485 100644 --- a/src/nvidia/inc/kernel/core/thread_state.h +++ b/src/nvidia/inc/kernel/core/thread_state.h @@ -187,6 +187,8 @@ typedef struct THREAD_STATE_DB #define THREAD_STATE_FLAGS_TIMEOUT_INITED NVBIT(5) #define THREAD_STATE_FLAGS_DEVICE_INIT NVBIT(7) #define THREAD_STATE_FLAGS_STATE_FREE_CB_ENABLED NVBIT(8) +#define THREAD_STATE_FLAGS_IS_KERNEL_THREAD NVBIT(9) +#define THREAD_STATE_FLAGS_IS_EXITING NVBIT(10) // These Threads run exclusively between a conditional acquire #define THREAD_STATE_FLAGS_EXCLUSIVE_RUNNING (THREAD_STATE_FLAGS_IS_ISR | \ diff --git a/src/nvidia/src/kernel/core/thread_state.c b/src/nvidia/src/kernel/core/thread_state.c index c53388eaa..10f73e3e4 100644 --- a/src/nvidia/src/kernel/core/thread_state.c +++ b/src/nvidia/src/kernel/core/thread_state.c @@ -590,6 +590,8 @@ static NV_STATUS _threadStateInitCommon(THREAD_STATE_NODE *pThreadNode, NvU32 fl */ void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags) { + NvU32 osFlags; + // Isrs should be using threadStateIsrInit(). NV_ASSERT_OR_RETURN_VOID((flags & (THREAD_STATE_FLAGS_IS_ISR_LOCKLESS | THREAD_STATE_FLAGS_IS_ISR | @@ -599,6 +601,14 @@ void threadStateInit(THREAD_STATE_NODE *pThreadNode, NvU32 flags) if (!(threadStateDatabase.setupFlags & THREAD_STATE_SETUP_FLAGS_ENABLED)) return; + osFlags = osGetCurrentProcessFlags(); + + if (osFlags & OS_CURRENT_PROCESS_FLAG_KERNEL_THREAD) + flags |= THREAD_STATE_FLAGS_IS_KERNEL_THREAD; + + if (osFlags & OS_CURRENT_PROCESS_FLAG_EXITING) + flags |= THREAD_STATE_FLAGS_IS_EXITING; + // Use common initialization logic (stack-allocated) // Note: Legacy void API ignores errors for backward compatibility _threadStateInitCommon(pThreadNode, flags, NV_FALSE); diff --git a/src/nvidia/src/kernel/gpu/external_device/arch/kepler/kern_gsync_p2060.c b/src/nvidia/src/kernel/gpu/external_device/arch/kepler/kern_gsync_p2060.c index 375947afd..886d3e898 100644 --- a/src/nvidia/src/kernel/gpu/external_device/arch/kepler/kern_gsync_p2060.c +++ b/src/nvidia/src/kernel/gpu/external_device/arch/kepler/kern_gsync_p2060.c @@ -1214,6 +1214,8 @@ gsyncReadUniversalFrameCount_P2060 OBJTMR *pTmpTmr = NULL; OBJTMR *pTmr = GPU_GET_TIMER(pGpu); + NV_CHECK_OR_RETURN(LEVEL_INFO, gsyncIsFrameLocked_P2060(pThis), NV_ERR_INVALID_STATE); + if (!(pThis->FrameCountData.iface == NV_P2060_MAX_IFACES_PER_GSYNC)) { // @@ -1258,7 +1260,8 @@ gsyncReadUniversalFrameCount_P2060 // P2060 refreshrate is in 0.00001 Hz, so divide by 10000 to get Hz. // divide 1000000 by refreshRate to get the frame time in us. // - pThis->FrameCountData.frameTime = 1000000 / (pThis->RefreshRate/10000); //in us + NV_CHECK_OR_RETURN(LEVEL_INFO, pThis->RefreshRate >= 10, NV_ERR_INVALID_STATE); + pThis->FrameCountData.frameTime = 1000*1000*1000 / (pThis->RefreshRate/10); //in us // // Enable FrameCountTimerService to verify FrameCountData.initialDifference. diff --git a/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c b/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c index bc83cb44f..e87348b5f 100644 --- a/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c +++ b/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c @@ -242,6 +242,7 @@ kfspStateUnload_IMPL NvU32 flags ) { + kfspReleaseProxyImage(pGpu, pKernelFsp); return NV_OK; } diff --git a/src/nvidia/src/kernel/gpu/gpu.c b/src/nvidia/src/kernel/gpu/gpu.c index 44f8ddd95..44db3e9f3 100644 --- a/src/nvidia/src/kernel/gpu/gpu.c +++ b/src/nvidia/src/kernel/gpu/gpu.c @@ -2287,7 +2287,7 @@ gpuStateInit_IMPL } // Set a property indicating that VF BAR0 MMU TLB Invalidation register emulation is required or not. - if (hypervisorIsVgxHyper()) + if (hypervisorIsVgxHyper() || (RMCFG_FEATURE_PLATFORM_GSP && IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu))) { if ( IsdADA(pGpu) || diff --git a/src/nvidia/src/kernel/gpu/gr/kernel_graphics.c b/src/nvidia/src/kernel/gpu/gr/kernel_graphics.c index f78f0f9ff..63c9b43c4 100644 --- a/src/nvidia/src/kernel/gpu/gr/kernel_graphics.c +++ b/src/nvidia/src/kernel/gpu/gr/kernel_graphics.c @@ -788,9 +788,13 @@ cleanup: // to be allocated. We delay them until now to save memory when runs // are done without using graphics contexts! // + // For MIG ESX hypervisor, vGPU stack do not need any GR channel on host so + // skip global ctx buffer alloc to save FB memory + // if (!pKernelGraphics->globalCtxBuffersInfo.pGlobalCtxBuffers[gfid].bAllocated && (!gpuIsClientRmAllocatedCtxBufferEnabled(pGpu) || - (gpuIsSriovEnabled(pGpu) && IS_GFID_PF(gfid)))) + (gpuIsSriovEnabled(pGpu) && IS_GFID_PF(gfid) && + !(IS_MIG_IN_USE(pGpu) && hypervisorIsType(OS_HYPERVISOR_VMWARE))))) { NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, kgraphicsAllocGrGlobalCtxBuffers_HAL(pGpu, pKernelGraphics, gfid, NULL)); @@ -865,6 +869,17 @@ kgraphicsLoadStaticInfo_VF portMemCopy(pPrivate->staticInfo.pSmIssueRateModifier, sizeof(*pPrivate->staticInfo.pSmIssueRateModifier), &pVSI->smIssueRateModifier.smIssueRateModifier[grIdx], sizeof(pVSI->smIssueRateModifier.smIssueRateModifier[grIdx])); + pPrivate->staticInfo.pSmIssueThrottleCtrl = + portMemAllocNonPaged(sizeof(*pPrivate->staticInfo.pSmIssueThrottleCtrl)); + if (pPrivate->staticInfo.pSmIssueThrottleCtrl == NULL) + { + status = NV_ERR_NO_MEMORY; + goto cleanup; + } + + portMemCopy(pPrivate->staticInfo.pSmIssueThrottleCtrl, sizeof(*pPrivate->staticInfo.pSmIssueThrottleCtrl), + &pVSI->smIssueThrottleCtrl.smIssueThrottleCtrl[grIdx], sizeof(pVSI->smIssueThrottleCtrl.smIssueThrottleCtrl[grIdx])); + pPrivate->staticInfo.pPpcMasks = portMemAllocNonPaged(sizeof(*pPrivate->staticInfo.pPpcMasks)); if (pPrivate->staticInfo.pPpcMasks == NULL) { @@ -958,6 +973,28 @@ kgraphicsLoadStaticInfo_VF portMemCopy(pPrivate->staticInfo.pSmIssueRateModifier, sizeof(*pPrivate->staticInfo.pSmIssueRateModifier), &pVSI->smIssueRateModifier.smIssueRateModifier[grIdx], sizeof(pVSI->smIssueRateModifier.smIssueRateModifier[grIdx])); + pPrivate->staticInfo.pSmIssueRateModifierV2 = + portMemAllocNonPaged(sizeof(*pPrivate->staticInfo.pSmIssueRateModifierV2)); + if (pPrivate->staticInfo.pSmIssueRateModifierV2 == NULL) + { + status = NV_ERR_NO_MEMORY; + goto cleanup; + } + + portMemCopy(pPrivate->staticInfo.pSmIssueRateModifierV2, sizeof(*pPrivate->staticInfo.pSmIssueRateModifierV2), + &pVSI->smIssueRateModifierV2.smIssueRateModifierV2[grIdx], sizeof(pVSI->smIssueRateModifierV2.smIssueRateModifierV2[grIdx])); + + pPrivate->staticInfo.pSmIssueThrottleCtrl = + portMemAllocNonPaged(sizeof(*pPrivate->staticInfo.pSmIssueThrottleCtrl)); + if (pPrivate->staticInfo.pSmIssueThrottleCtrl == NULL) + { + status = NV_ERR_NO_MEMORY; + goto cleanup; + } + + portMemCopy(pPrivate->staticInfo.pSmIssueThrottleCtrl, sizeof(*pPrivate->staticInfo.pSmIssueThrottleCtrl), + &pVSI->smIssueThrottleCtrl.smIssueThrottleCtrl[grIdx], sizeof(pVSI->smIssueThrottleCtrl.smIssueThrottleCtrl[grIdx])); + pPrivate->staticInfo.pPpcMasks = portMemAllocNonPaged(sizeof(*pPrivate->staticInfo.pPpcMasks)); if (pPrivate->staticInfo.pPpcMasks == NULL) { @@ -1072,6 +1109,12 @@ cleanup : portMemFree(pPrivate->staticInfo.pSmIssueRateModifier); pPrivate->staticInfo.pSmIssueRateModifier = NULL; + portMemFree(pPrivate->staticInfo.pSmIssueRateModifierV2); + pPrivate->staticInfo.pSmIssueRateModifierV2 = NULL; + + portMemFree(pPrivate->staticInfo.pSmIssueThrottleCtrl); + pPrivate->staticInfo.pSmIssueThrottleCtrl = NULL; + portMemFree(pPrivate->staticInfo.pFecsTraceDefines); pPrivate->staticInfo.pFecsTraceDefines = NULL; } @@ -3355,7 +3398,6 @@ subdeviceCtrlCmdKGrGetSmIssueThrottleCtrl_IMPL NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager, pDevice, &ref)); - NV_ASSERT_OR_RETURN(ref.pMIGComputeInstance != NULL && ref.pKernelMIGGpuInstance != NULL, NV_ERR_INVALID_STATE); NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, kmigmgrGetLocalToGlobalEngineType(pGpu, pKernelMIGManager, ref, RM_ENGINE_TYPE_GR(0), &globalGrEngine)); diff --git a/src/nvidia/src/kernel/gpu/gr/kernel_graphics_context.c b/src/nvidia/src/kernel/gpu/gr/kernel_graphics_context.c index 69e543726..f91c538f4 100644 --- a/src/nvidia/src/kernel/gpu/gr/kernel_graphics_context.c +++ b/src/nvidia/src/kernel/gpu/gr/kernel_graphics_context.c @@ -2619,7 +2619,8 @@ kgrctxShouldManageCtxBuffers_PHYSICAL NvU32 gfid ) { - return !gpuIsClientRmAllocatedCtxBufferEnabled(pGpu) || (gpuIsSriovEnabled(pGpu) && IS_GFID_PF(gfid)); + return !gpuIsClientRmAllocatedCtxBufferEnabled(pGpu) || (gpuIsSriovEnabled(pGpu) && IS_GFID_PF(gfid) && + !(IS_MIG_IN_USE(pGpu) && hypervisorIsType(OS_HYPERVISOR_VMWARE))); } /** diff --git a/src/nvidia/src/kernel/gpu/gr/kernel_graphics_object.c b/src/nvidia/src/kernel/gpu/gr/kernel_graphics_object.c index 90b42800c..c2ed97ca2 100644 --- a/src/nvidia/src/kernel/gpu/gr/kernel_graphics_object.c +++ b/src/nvidia/src/kernel/gpu/gr/kernel_graphics_object.c @@ -28,6 +28,7 @@ #include "kernel/core/locks.h" #include "kernel/gpu/subdevice/subdevice.h" #include "vgpu/rpc.h" +#include "virtualization/hypervisor/hypervisor.h" #include "kernel/mem_mgr/gpu_vaspace.h" #include "kernel/gpu/mem_mgr/mem_mgr.h" #include "kernel/gpu/fifo/kernel_channel_group.h" @@ -520,7 +521,8 @@ kgrobjShouldCleanup_PHYSICAL ChannelDescendant *pChannelDescendant = staticCast(pKernelGraphicsObject, ChannelDescendant); NvU32 gfid = kchannelGetGfid(pChannelDescendant->pKernelChannel); - return !gpuIsClientRmAllocatedCtxBufferEnabled(pGpu) || (gpuIsSriovEnabled(pGpu) && IS_GFID_PF(gfid)); + return !gpuIsClientRmAllocatedCtxBufferEnabled(pGpu) || (gpuIsSriovEnabled(pGpu) && IS_GFID_PF(gfid) && + !(IS_MIG_IN_USE(pGpu) && hypervisorIsType(OS_HYPERVISOR_VMWARE))); } /*! diff --git a/src/nvidia/src/kernel/gpu/gsp/message_queue_cpu.c b/src/nvidia/src/kernel/gpu/gsp/message_queue_cpu.c index 80c7212c5..382360943 100644 --- a/src/nvidia/src/kernel/gpu/gsp/message_queue_cpu.c +++ b/src/nvidia/src/kernel/gpu/gsp/message_queue_cpu.c @@ -239,6 +239,16 @@ GspMsgQueuesInit memdescSetPageSize(pMQCollection->pSharedMemDesc, AT_GPU, RM_PAGE_SIZE_HUGE); memdescTagAlloc(nvStatus, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_58, pMQCollection->pSharedMemDesc); + + if (nvStatus == NV_ERR_NO_MEMORY) + { + // TODO: Bug 5299603 + NV_PRINTF(LEVEL_ERROR, "Allocation failed with big page size, retrying with default page size\n"); + memdescSetPageSize(pMQCollection->pSharedMemDesc, AT_GPU, RM_PAGE_SIZE); + memdescTagAlloc(nvStatus, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_58, + pMQCollection->pSharedMemDesc); + } + NV_ASSERT_OK_OR_GOTO(nvStatus, nvStatus, error_ret); // Create kernel mapping for command queue. @@ -760,7 +770,6 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu) } exit: - pMQI->rxSeqNum++; nRet = msgqRxMarkConsumed(pMQI->hQueue, nElements); if (nRet < 0) @@ -768,6 +777,10 @@ exit: NV_PRINTF(LEVEL_ERROR, "msgqRxMarkConsumed failed: %d\n", nRet); nvStatus = NV_ERR_GENERIC; } + else + { + pMQI->rxSeqNum++; + } return nvStatus; } diff --git a/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c b/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c index a2a01ff0e..4dd29461b 100644 --- a/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c +++ b/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c @@ -2661,6 +2661,10 @@ memdescCreateSubMem pMemDescNew->_flags |= MEMDESC_FLAGS_ENCRYPTED; else pMemDescNew->_flags &= ~MEMDESC_FLAGS_ENCRYPTED; + if (pMemDesc->_flags & MEMDESC_FLAGS_ALLOC_AS_LOCALIZED) + pMemDescNew->_flags |= MEMDESC_FLAGS_ALLOC_AS_LOCALIZED; + else + pMemDescNew->_flags &= ~MEMDESC_FLAGS_ALLOC_AS_LOCALIZED; pMemDescNew->_pageSize = pMemDesc->_pageSize; pMemDescNew->pageArrayGranularity = pageArrayGranularity; pMemDescNew->_gpuCacheAttrib = pMemDesc->_gpuCacheAttrib; diff --git a/src/nvidia/src/kernel/gpu/mem_mgr/sysmem_scrub.c b/src/nvidia/src/kernel/gpu/mem_mgr/sysmem_scrub.c index bb9b016a4..ce5519df5 100644 --- a/src/nvidia/src/kernel/gpu/mem_mgr/sysmem_scrub.c +++ b/src/nvidia/src/kernel/gpu/mem_mgr/sysmem_scrub.c @@ -29,6 +29,17 @@ #include "gpu/mem_mgr/ce_utils.h" #include "nvrm_registry.h" +void +_sysmemscrubFreeWorkerParams +( + SysmemScrubberWorkerParams *pWorkerParams +) +{ + if (pWorkerParams->pSpinlock != NULL) + portSyncSpinlockDestroy(pWorkerParams->pSpinlock); + portMemFree(pWorkerParams); +} + NV_STATUS sysmemscrubConstruct_IMPL ( @@ -47,7 +58,7 @@ sysmemscrubConstruct_IMPL pSysmemScrubber->pGpu = pGpu; // Disable by default until locking issues are addressed - pSysmemScrubber->bAsync = NV_FALSE; + pSysmemScrubber->bAsync = NV_TRUE; if (osReadRegistryDword(pGpu, NV_REG_STR_RM_DISABLE_ASYNC_SYSMEM_SCRUB, &data32) == NV_OK) { @@ -56,11 +67,13 @@ sysmemscrubConstruct_IMPL pWorkerParams = portMemAllocNonPaged(sizeof (*pWorkerParams)); NV_ASSERT_OR_RETURN(pWorkerParams != NULL, NV_ERR_NO_MEMORY); + pWorkerParams->pSpinlock = portSyncSpinlockCreate(portMemAllocatorGetGlobalNonPaged()); + NV_ASSERT_TRUE_OR_GOTO(status, pWorkerParams->pSpinlock != NULL, NV_ERR_NO_MEMORY, failed); pWorkerParams->pSysmemScrubber = pSysmemScrubber; pWorkerParams->refCount = 1; pSysmemScrubber->pWorkerParams = pWorkerParams; - listInit(&pSysmemScrubber->asyncScrubList, portMemAllocatorGetGlobalNonPaged()); + listInitIntrusive(&pSysmemScrubber->asyncScrubList); ceUtilsAllocParams.flags |= DRF_DEF(0050_CEUTILS, _FLAGS, _ENABLE_COMPLETION_CB, _TRUE); NV_ASSERT_OK_OR_GOTO(status, @@ -70,7 +83,7 @@ sysmemscrubConstruct_IMPL failed: if (status != NV_OK) { - portMemFree(pWorkerParams); + _sysmemscrubFreeWorkerParams(pWorkerParams); } return status; @@ -80,24 +93,60 @@ static void _sysmemscrubProcessCompletedEntries ( SysmemScrubber *pSysmemScrubber, - NvU64 lastCompleted + SysmemScrubberWorkerParams *pWorkerParams ) { SysScrubEntry *pEntry; + SysScrubList freeList; - while ((pEntry = listHead(&pSysmemScrubber->asyncScrubList)) != NULL) + // + // Destructor sets pWorkerParams->pSysmemScrubber to NULL + // After that the workers need to return early + // Destructor is responsible for draining all the work iself + // This is done as destructor can't flush all the pending workers + // + + listInitIntrusive(&freeList); + + portSyncSpinlockAcquire(pWorkerParams->pSpinlock); + + if (pSysmemScrubber == NULL) { - if (pEntry->semaphoreValue > lastCompleted) - break; + // Destructor passes pSysmemScrubber directly, as pWorkerParams->pSysmemScrubber is NULL by then (see below) + pSysmemScrubber = pWorkerParams->pSysmemScrubber; + } + if (pSysmemScrubber != NULL) + { + // ceutilsDestruct() ensures that the work is completed + NvU64 lastCompleted = (pSysmemScrubber->pCeUtils == NULL) ? + NV_U64_MAX : ceutilsUpdateProgress(pSysmemScrubber->pCeUtils); + + while ((pEntry = listHead(&pSysmemScrubber->asyncScrubList)) != NULL) + { + if (pEntry->semaphoreValue > lastCompleted) + break; + + listRemove(&pSysmemScrubber->asyncScrubList, pEntry); + listAppendExisting(&freeList, pEntry); + } + } + + portSyncSpinlockRelease(pWorkerParams->pSpinlock); + + while ((pEntry = listHead(&freeList)) != NULL) + { NV_PRINTF(LEVEL_INFO, "freeing scrubbed pMemDesc=%p RefCount=%u DupCount=%u\n", pEntry->pMemDesc, pEntry->pMemDesc->RefCount, pEntry->pMemDesc->DupCount); memdescFree(pEntry->pMemDesc); memdescDestroy(pEntry->pMemDesc); - listRemove(&pSysmemScrubber->asyncScrubList, pEntry); + listRemove(&freeList, pEntry); + portMemFree(pEntry); } + + listDestroy(&freeList); } static void @@ -108,46 +157,57 @@ _sysmemscrubProcessCompletedEntriesCb ) { SysmemScrubberWorkerParams *pWorkerParams = pArg; - SysmemScrubber *pSysmemScrubber = pWorkerParams->pSysmemScrubber; - - if (--pWorkerParams->refCount == 0) - portMemFree(pWorkerParams); - - if (pSysmemScrubber == NULL) - return; NV_PRINTF(LEVEL_SILENT, "processing completed scrub work in deferred work item\n"); - pSysmemScrubber->bCallbackQueued = NV_FALSE; + portAtomicSetU32(&pWorkerParams->bWorkerQueued, NV_FALSE); - _sysmemscrubProcessCompletedEntries(pSysmemScrubber, ceutilsUpdateProgress(pSysmemScrubber->pCeUtils)); + _sysmemscrubProcessCompletedEntries(NULL, pWorkerParams); + + if (portAtomicDecrementU32(&pWorkerParams->refCount) == 0) + { + _sysmemscrubFreeWorkerParams(pWorkerParams); + } } static NvBool _sysmemscrubIsWorkPending ( - SysmemScrubber *pSysmemScrubber + SysmemScrubberWorkerParams *pWorkerParams ) { // TODO: remove this function when CeUtils migrates to SemaphoreSurface - SysScrubEntry *pEntry = listHead(&pSysmemScrubber->asyncScrubList); + SysmemScrubber *pSysmemScrubber; + SysScrubEntry *pEntry; + NvBool bWorkPending = NV_FALSE; - return pEntry != NULL && pEntry->semaphoreValue <= ceutilsUpdateProgress(pSysmemScrubber->pCeUtils); + portSyncSpinlockAcquire(pWorkerParams->pSpinlock); + pSysmemScrubber = pWorkerParams->pSysmemScrubber; + if (pSysmemScrubber != NULL) + { + pEntry = listHead(&pSysmemScrubber->asyncScrubList); + bWorkPending = pEntry != NULL && pEntry->semaphoreValue <= ceutilsUpdateProgress(pSysmemScrubber->pCeUtils); + } + portSyncSpinlockRelease(pWorkerParams->pSpinlock); + + return bWorkPending; } static void _sysmemscrubQueueProcessCompletedEntries(void *pArg) { + // The event handler can't get called after destructor, as the event gets deregistered SysmemScrubber *pSysmemScrubber = pArg; SysmemScrubberWorkerParams *pWorkerParams = pSysmemScrubber->pWorkerParams; NV_PRINTF(LEVEL_SILENT, "scrub completed callback\n"); - NV_ASSERT_OR_RETURN_VOID(rmDeviceGpuLockIsOwner(pSysmemScrubber->pGpu->gpuInstance) || rmGpuLockIsOwner()); - - if (pWorkerParams->pSysmemScrubber == NULL || pSysmemScrubber->bCallbackQueued || !_sysmemscrubIsWorkPending(pSysmemScrubber)) + if (portAtomicAddU32(&pWorkerParams->bWorkerQueued, 0) || + !_sysmemscrubIsWorkPending(pWorkerParams)) + { return; + } // queue work to run it outside interrupt context NV_ASSERT_OR_RETURN_VOID( @@ -157,8 +217,8 @@ _sysmemscrubQueueProcessCompletedEntries(void *pArg) OS_QUEUE_WORKITEM_FLAGS_LOCK_GPU_GROUP_DEVICE | OS_QUEUE_WORKITEM_FLAGS_FULL_GPU_SANITY) == NV_OK); - pWorkerParams->refCount++; - pSysmemScrubber->bCallbackQueued = NV_TRUE; + portAtomicSetU32(&pWorkerParams->bWorkerQueued, NV_TRUE); + portAtomicIncrementU32(&pWorkerParams->refCount); } static NV_STATUS @@ -176,11 +236,15 @@ _sysmemscrubScrubAndFreeAsync .pCompletionCallback = _sysmemscrubQueueProcessCompletedEntries, .pCompletionCallbackArg = pSysmemScrubber }; - SysScrubEntry *pEntry = listAppendNew(&pSysmemScrubber->asyncScrubList); + SysmemScrubberWorkerParams *pWorkerParams = pSysmemScrubber->pWorkerParams; + SysScrubEntry *pEntry = portMemAllocNonPaged(sizeof (*pEntry)); NV_STATUS status; NV_ASSERT_OR_RETURN(pEntry != NULL, NV_ERR_NO_MEMORY); + portSyncSpinlockAcquire(pWorkerParams->pSpinlock); + listAppendExisting(&pSysmemScrubber->asyncScrubList, pEntry); + // // RM might be holding memory references despite memory is freed by the user // This should not happen for compressed allocations, so don't handle it and clear memdesc anyway @@ -203,7 +267,9 @@ _sysmemscrubScrubAndFreeAsync else { listRemove(&pSysmemScrubber->asyncScrubList, pEntry); + portMemFree(pEntry); } + portSyncSpinlockRelease(pWorkerParams->pSpinlock); return status; } @@ -237,7 +303,7 @@ sysmemscrubScrubAndFree_IMPL NV_ASSERT(pMemDesc->Size == pMemDesc->ActualSize); // WAR: currently queuing work out of ISR can fail, clean it up here - _sysmemscrubProcessCompletedEntries(pSysmemScrubber, ceutilsUpdateProgress(pSysmemScrubber->pCeUtils)); + _sysmemscrubProcessCompletedEntries(NULL, pSysmemScrubber->pWorkerParams); if (pSysmemScrubber->bAsync && _sysmemscrubScrubAndFreeAsync(pSysmemScrubber, pMemDesc) == NV_OK) @@ -258,13 +324,18 @@ sysmemscrubDestruct_IMPL { SysmemScrubberWorkerParams *pWorkerParams = pSysmemScrubber->pWorkerParams; + portSyncSpinlockAcquire(pWorkerParams->pSpinlock); pWorkerParams->pSysmemScrubber = NULL; + portSyncSpinlockRelease(pWorkerParams->pSpinlock); objDelete(pSysmemScrubber->pCeUtils); - _sysmemscrubProcessCompletedEntries(pSysmemScrubber, NV_U64_MAX); + pSysmemScrubber->pCeUtils = NULL; - if (--pWorkerParams->refCount == 0) - portMemFree(pWorkerParams); + // pWorkerParams->pSysmemScrubber is NULL, so wokers won't run at this point + _sysmemscrubProcessCompletedEntries(pSysmemScrubber, pWorkerParams); + + if (portAtomicDecrementU32(&pWorkerParams->refCount) == 0) + _sysmemscrubFreeWorkerParams(pWorkerParams); NV_ASSERT(listCount(&pSysmemScrubber->asyncScrubList) == 0); listDestroy(&pSysmemScrubber->asyncScrubList); diff --git a/src/nvidia/src/kernel/gpu/mmu/arch/hopper/kern_gmmu_gh100.c b/src/nvidia/src/kernel/gpu/mmu/arch/hopper/kern_gmmu_gh100.c index 796c50672..19cc74b5d 100644 --- a/src/nvidia/src/kernel/gpu/mmu/arch/hopper/kern_gmmu_gh100.c +++ b/src/nvidia/src/kernel/gpu/mmu/arch/hopper/kern_gmmu_gh100.c @@ -489,6 +489,16 @@ kgmmuFaultBufferAllocSharedMemory_GH100 memdescSetPageSize(pMemDesc, AT_GPU, RM_PAGE_SIZE_HUGE); memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_131, pMemDesc); + + if (status == NV_ERR_NO_MEMORY) + { + // TODO: Bug 5299603 + NV_PRINTF(LEVEL_ERROR, "Allocation failed with big page size, retrying with default page size\n"); + memdescSetPageSize(pMemDesc, AT_GPU, RM_PAGE_SIZE); + memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_131, + pMemDesc); + } + if (status != NV_OK) { goto destroy_memdesc; diff --git a/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlink.c b/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlink.c index 891f421c8..e4908ec36 100644 --- a/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlink.c +++ b/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlink.c @@ -276,6 +276,32 @@ _knvlinkCheckFabricCliqueId return NV_TRUE; } +static NvBool +_knvlinkCheckFabricProbeHealth +( + OBJGPU *pGpu, + OBJGPU *pPeerGpu +) +{ + NvU32 healthStatusMask = 0; + NvU32 peerHealthStatusMask = 0; + NV_STATUS status; + + status = gpuFabricProbeGetFabricHealthStatus(pGpu->pGpuFabricProbeInfoKernel, &healthStatusMask); + NV_ASSERT_OK_OR_RETURN(status); + + status = gpuFabricProbeGetFabricHealthStatus(pPeerGpu->pGpuFabricProbeInfoKernel, &peerHealthStatusMask); + NV_ASSERT_OK_OR_RETURN(status); + + if (nvlinkGetFabricHealthSummary(healthStatusMask) == NVLINK_INBAND_FABRIC_HEALTH_SUMMARY_UNHEALTHY || + nvlinkGetFabricHealthSummary(peerHealthStatusMask) == NVLINK_INBAND_FABRIC_HEALTH_SUMMARY_UNHEALTHY) + { + return NV_FALSE; + } + + return NV_TRUE; +} + /*! * @brief Checks whether EGM addresses are valid for P2P * when GPU is connected to NVSwitch @@ -372,7 +398,8 @@ knvlinkCheckNvswitchP2pConfig_IMPL if (gpuFabricProbeIsSupported(pGpu) && gpuFabricProbeIsSupported(pPeerGpu)) { - if (!_knvlinkCheckFabricCliqueId(pGpu, pPeerGpu)) + if (!_knvlinkCheckFabricCliqueId(pGpu, pPeerGpu) || + !_knvlinkCheckFabricProbeHealth(pGpu, pPeerGpu)) { return NV_FALSE; } diff --git a/src/nvidia/src/kernel/mem_mgr/mem_multicast_fabric.c b/src/nvidia/src/kernel/mem_mgr/mem_multicast_fabric.c index 0901db1f1..c65bed261 100644 --- a/src/nvidia/src/kernel/mem_mgr/mem_multicast_fabric.c +++ b/src/nvidia/src/kernel/mem_mgr/mem_multicast_fabric.c @@ -155,10 +155,19 @@ typedef struct mem_multicast_fabric_descriptor // // Boolean to be set when an Inband request has been sent to FM - // and is currently in progress + // and is currently in progress. + // + // This flag is only set on the prime object. // NvBool bInbandReqInProgress; + // + // Boolean set when an inband request response is received. + // + // This flag is set on both prime and non-prime objects. + // + NvBool bResponseReceived; + // // Request Id associated with the Inband request in progress when // bInbandReqSent is set to true @@ -1110,27 +1119,43 @@ _memMulticastFabricDescriptorFree MEM_MULTICAST_FABRIC_TEAM_RELEASE_REQUEST); } + // + // In the process cleanup path or a deferred cleanup path, skip waiting on + // the clients which are being torn down. The process could be already in + // uninterruptible state at that point, and if for some reason GFM doesn't + // respond, we will be stuck indefinitely in the wait queue. Instead march + // on, and handle the cleanup later (see memorymulticastfabricTeamSetupResponseCallback) + // whenever GFM responds. + // + // This wait is really required for interruptible cases like NvRmFree(), + // to mimic a synchronous op. + // if (pMulticastFabricDesc->bInbandReqInProgress) { - OS_WAIT_QUEUE *pWq; THREAD_STATE_NODE *pThreadNode = NULL; THREAD_STATE_FREE_CALLBACK freeCallback; - NV_ASSERT_OK(osAllocWaitQueue(&pWq)); + NV_ASSERT_OK(threadStateGetCurrent(&pThreadNode, NULL)); - if (pWq != NULL) + if (!((pThreadNode->flags & THREAD_STATE_FLAGS_IS_EXITING) || + (pThreadNode->flags & THREAD_STATE_FLAGS_IS_KERNEL_THREAD))) { - NV_ASSERT_OK(fabricMulticastCleanupCacheInsert(pFabric, - pMulticastFabricDesc->inbandReqId, - pWq)); + OS_WAIT_QUEUE *pWq = NULL; + NV_ASSERT_OK(osAllocWaitQueue(&pWq)); - NV_ASSERT_OK(threadStateGetCurrent(&pThreadNode, NULL)); + if (pWq != NULL) + { + NV_ASSERT_OK(fabricMulticastCleanupCacheInsert(pFabric, + pMulticastFabricDesc->inbandReqId, + pWq)); - freeCallback.pCb = fabricMulticastWaitOnTeamCleanupCallback; - freeCallback.pCbData = (void *)pMulticastFabricDesc->inbandReqId; - NV_ASSERT_OK(threadStateEnqueueCallbackOnFree(pThreadNode, - &freeCallback)); + freeCallback.pCb = fabricMulticastWaitOnTeamCleanupCallback; + freeCallback.pCbData = (void *)pMulticastFabricDesc->inbandReqId; + + NV_ASSERT_OK(threadStateEnqueueCallbackOnFree(pThreadNode, + &freeCallback)); + } } } @@ -1668,34 +1693,8 @@ memorymulticastfabricTeamSetupResponseCallback pMulticastFabricDesc = fabricMulticastSetupCacheGet(pFabric, requestId); - if ((pMulticastFabricDesc != NULL) && (mcTeamStatus == NV_ERR_BUSY_RETRY)) + if (pMulticastFabricDesc != NULL) { - NvBool bRetrySuccess; - - portSyncRwLockAcquireWrite(pMulticastFabricDesc->pLock); - - pMulticastFabricDesc->bInbandReqInProgress = NV_FALSE; - - _memMulticastFabricAttachGpuPostProcessor(pGpu, - pMulticastFabricDesc, - mcTeamStatus, - mcTeamHandle, - mcAddressBase, - mcAddressSize); - - bRetrySuccess = pMulticastFabricDesc->bInbandReqInProgress; - - portSyncRwLockReleaseWrite(pMulticastFabricDesc->pLock); - - if (!bRetrySuccess) - fabricMulticastSetupCacheDelete(pFabric, requestId); - - portSyncRwLockReleaseWrite(pFabric->pMulticastFabricModuleLock); - } - else if (pMulticastFabricDesc != NULL) - { - fabricMulticastSetupCacheDelete(pFabric, requestId); - // // We have now safely acquired pMulticastFabricDesc->lock, which // should block the destructor from removing pMulticastFabricDesc @@ -1709,14 +1708,20 @@ memorymulticastfabricTeamSetupResponseCallback // portSyncRwLockReleaseWrite(pFabric->pMulticastFabricModuleLock); - pMulticastFabricDesc->bInbandReqInProgress = NV_FALSE; + if (!pMulticastFabricDesc->bResponseReceived) + { + pMulticastFabricDesc->bInbandReqInProgress = NV_FALSE; - _memMulticastFabricAttachGpuPostProcessor(pGpu, - pMulticastFabricDesc, - mcTeamStatus, - mcTeamHandle, - mcAddressBase, - mcAddressSize); + // This call sets `bInbandReqInProgress` on a successful retry. + _memMulticastFabricAttachGpuPostProcessor(pGpu, + pMulticastFabricDesc, + mcTeamStatus, + mcTeamHandle, + mcAddressBase, + mcAddressSize); + + pMulticastFabricDesc->bResponseReceived = !pMulticastFabricDesc->bInbandReqInProgress; + } portSyncRwLockReleaseWrite(pMulticastFabricDesc->pLock); } diff --git a/src/nvidia/src/kernel/os/os_init.c b/src/nvidia/src/kernel/os/os_init.c index 16cc4d48a..c442f8c80 100644 --- a/src/nvidia/src/kernel/os/os_init.c +++ b/src/nvidia/src/kernel/os/os_init.c @@ -299,9 +299,9 @@ NvU32 vgpuDevReadReg032( OBJSYS *pSys = SYS_GET_INSTANCE(); OBJHYPERVISOR *pHypervisor = SYS_GET_HYPERVISOR(pSys); - if(!pGpu || - !pHypervisor || !pHypervisor->bDetected || !pHypervisor->bIsHVMGuest || - !GPU_GET_KERNEL_BIF(pGpu)) + + if (!pGpu || !GPU_GET_KERNEL_BIF(pGpu) || + (!IS_VIRTUAL(pGpu) && !(pHypervisor && pHypervisor->bDetected && pHypervisor->bIsHVMGuest))) { *vgpuHandled = NV_FALSE; return 0; diff --git a/src/nvidia/src/kernel/vgpu/rpcstructurecopy.c b/src/nvidia/src/kernel/vgpu/rpcstructurecopy.c index 115d76822..231b85d93 100644 --- a/src/nvidia/src/kernel/vgpu/rpcstructurecopy.c +++ b/src/nvidia/src/kernel/vgpu/rpcstructurecopy.c @@ -393,7 +393,7 @@ NV_STATUS deserialize_NV2080_CTRL_GR_GET_SM_ISSUE_RATE_MODIFIER_V2_PARAMS_v2B_06 grSmIssueRateModifierV2->smIssueRateModifierListSize = gr_get_sm_issue_rate_modifier_v2B_06->smIssueRateModifierListSize; - if (gr_get_sm_issue_rate_modifier_v2B_06->smIssueRateModifierListSize >= NV2080_CTRL_GR_SM_ISSUE_RATE_MODIFIER_V2_MAX_LIST_SIZE_v2B_06) + if (gr_get_sm_issue_rate_modifier_v2B_06->smIssueRateModifierListSize > NV2080_CTRL_GR_SM_ISSUE_RATE_MODIFIER_V2_MAX_LIST_SIZE_v2B_06) { return NV_ERR_OUT_OF_RANGE; } @@ -433,7 +433,7 @@ NV_STATUS deserialize_NV2080_CTRL_GR_GET_SM_ISSUE_THROTTLE_CTRL_PARAMS_v2B_10(NV grSmIssueThrottleCtrl->smIssueThrottleCtrlListSize = gr_get_sm_issue_throttle_ctrl_v2B_10->smIssueThrottleCtrlListSize; - if (gr_get_sm_issue_throttle_ctrl_v2B_10->smIssueThrottleCtrlListSize >= NV2080_CTRL_GR_SM_ISSUE_THROTTLE_CTRL_MAX_LIST_SIZE_v2B_10) + if (gr_get_sm_issue_throttle_ctrl_v2B_10->smIssueThrottleCtrlListSize > NV2080_CTRL_GR_SM_ISSUE_THROTTLE_CTRL_MAX_LIST_SIZE_v2B_10) { return NV_ERR_OUT_OF_RANGE; } @@ -859,7 +859,7 @@ NV_STATUS deserialize_NV2080_CTRL_INTERNAL_STATIC_GR_GET_SM_ISSUE_RATE_MODIFIER_ { smIssueRateModifierV2->smIssueRateModifierV2[i].smIssueRateModifierListSize = rate_modifier_v2B_06->smIssueRateModifierV2[i].smIssueRateModifierListSize; - if (rate_modifier_v2B_06->smIssueRateModifierV2[i].smIssueRateModifierListSize >= NV2080_CTRL_GR_SM_ISSUE_RATE_MODIFIER_V2_MAX_LIST_SIZE_v2B_06) + if (rate_modifier_v2B_06->smIssueRateModifierV2[i].smIssueRateModifierListSize > NV2080_CTRL_GR_SM_ISSUE_RATE_MODIFIER_V2_MAX_LIST_SIZE_v2B_06) { return NV_ERR_OUT_OF_RANGE; } @@ -903,7 +903,7 @@ NV_STATUS deserialize_NV2080_CTRL_INTERNAL_STATIC_GR_GET_SM_ISSUE_THROTTLE_CTRL_ { smIssueThrottleCtrl->smIssueThrottleCtrl[i].smIssueThrottleCtrlListSize = throttle_ctrl_v2B_10->smIssueThrottleCtrl[i].smIssueThrottleCtrlListSize; - if (throttle_ctrl_v2B_10->smIssueThrottleCtrl[i].smIssueThrottleCtrlListSize >= NV2080_CTRL_GR_SM_ISSUE_THROTTLE_CTRL_MAX_LIST_SIZE_v2B_10) + if (throttle_ctrl_v2B_10->smIssueThrottleCtrl[i].smIssueThrottleCtrlListSize > NV2080_CTRL_GR_SM_ISSUE_THROTTLE_CTRL_MAX_LIST_SIZE_v2B_10) { return NV_ERR_OUT_OF_RANGE; } diff --git a/src/nvidia/src/kernel/virtualization/kernel_vgpu_mgr.c b/src/nvidia/src/kernel/virtualization/kernel_vgpu_mgr.c index a7019acd3..e98973d6b 100644 --- a/src/nvidia/src/kernel/virtualization/kernel_vgpu_mgr.c +++ b/src/nvidia/src/kernel/virtualization/kernel_vgpu_mgr.c @@ -1322,9 +1322,22 @@ kvgpumgrGuestRegister(OBJGPU *pGpu, } } + /* On device-vm, swizzId is reserved during A084 object creation */ + if (IS_MIG_ENABLED(pGpu) && (osIsVgpuDeviceVmPresent() == NV_OK)) + { + NvU32 partitionFlag; + + if (swizzId >= KMIGMGR_MAX_GPU_SWIZZID) + return NV_ERR_INVALID_ARGUMENT; + + NV_ASSERT_OK_OR_RETURN(kvgpumgrGetPartitionFlag(vgpuType, &partitionFlag)); + + NV_ASSERT_OK_OR_RETURN(kvgpumgrGetSwizzId(pGpu, pPhysGpuInfo, partitionFlag, + pPhysGpuInfo->vgpuTypes[vgpuTypeIdx], &swizzId)); + + } + /* - * For MIG mode, vGPU type is already validated based on swizzid in - * NVA081_CTRL_CMD_VGPU_CONFIG_[GET_FREE|VALIDATE]_SWIZZID RmCtrl. * For heterogeneous vGPU mode, vGPU type is already validated based on placement ID * in NVA081_CTRL_CMD_VGPU_CONFIG_UPDATE_HETEROGENEOUS_INFO RmCtrl. * Both the RmCtrls are done before allocating the A084 object. @@ -2101,14 +2114,17 @@ kvgpumgrGetSwizzId(OBJGPU *pGpu, NvU32 id; NV_STATUS rmStatus = NV_OK; VGPU_TYPE *existingVgpuTypeInfo = NULL; + NvBool bIsSwizzIdReserved = NV_FALSE; swizzIdInUseMask = kmigmgrGetSwizzIdInUseMask(pGpu, pKernelMIGManager); - *swizzId = KMIGMGR_SWIZZID_INVALID; - // Determine valid swizzids not assigned to any vGPU device. for (id = 0; id < KMIGMGR_MAX_GPU_SWIZZID; id++) { + //If specified GI is present, ignore other GIs + if ((*swizzId != KMIGMGR_SWIZZID_INVALID) && (*swizzId != id)) + continue; + if (NVBIT64(id) & swizzIdInUseMask) { KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance; @@ -2173,13 +2189,14 @@ kvgpumgrGetSwizzId(OBJGPU *pGpu, { NV_ASSERT_OK_OR_RETURN(_kvgpumgrSetAssignedSwizzIdMask(pGpu, vgpuTypeInfo, pKernelMIGGpuInstance->swizzId)); *swizzId = pKernelMIGGpuInstance->swizzId; + bIsSwizzIdReserved = NV_TRUE; break; } } } } - if (*swizzId == KMIGMGR_SWIZZID_INVALID) + if (bIsSwizzIdReserved == NV_FALSE) { return NV_ERR_INVALID_STATE; } diff --git a/src/nvidia/src/kernel/virtualization/vgpuconfigapi.c b/src/nvidia/src/kernel/virtualization/vgpuconfigapi.c index 335ae56b2..abf7c2717 100644 --- a/src/nvidia/src/kernel/virtualization/vgpuconfigapi.c +++ b/src/nvidia/src/kernel/virtualization/vgpuconfigapi.c @@ -1317,6 +1317,7 @@ vgpuconfigapiCtrlCmdVgpuConfigGetFreeSwizzId_IMPL NV_ASSERT_OK_OR_RETURN( kvgpumgrGetVgpuTypeInfo(pParams->vgpuTypeId, &vgpuTypeInfo)); + pParams->swizzId = KMIGMGR_SWIZZID_INVALID; NV_ASSERT_OK_OR_RETURN( kvgpumgrGetSwizzId(pGpu, pPhysGpuInfo, partitionFlag, vgpuTypeInfo, &pParams->swizzId)); } diff --git a/version.mk b/version.mk index 08a250493..97958be83 100644 --- a/version.mk +++ b/version.mk @@ -1,5 +1,5 @@ -NVIDIA_VERSION = 580.94.06 -NVIDIA_NVID_VERSION = 580.94.06 +NVIDIA_VERSION = 580.94.10 +NVIDIA_NVID_VERSION = 580.94.10 NVIDIA_NVID_EXTRA = # This file.