diff --git a/README.md b/README.md index 706e80db5..3cac64804 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # NVIDIA Linux Open GPU Kernel Module Source This is the source release of the NVIDIA Linux open GPU kernel modules, -version 570.181. +version 570.190. ## How to Build @@ -17,7 +17,7 @@ as root: Note that the kernel modules built here must be used with GSP firmware and user-space NVIDIA GPU driver components from a corresponding -570.181 driver release. This can be achieved by installing +570.190 driver release. This can be achieved by installing the NVIDIA GPU driver from the .run file using the `--no-kernel-modules` option. E.g., @@ -185,7 +185,7 @@ table below). For details on feature support and limitations, see the NVIDIA GPU driver end user README here: -https://us.download.nvidia.com/XFree86/Linux-x86_64/570.181/README/kernel_open.html +https://us.download.nvidia.com/XFree86/Linux-x86_64/570.190/README/kernel_open.html For vGPU support, please refer to the README.vgpu packaged in the vGPU Host Package for more details. @@ -970,6 +970,9 @@ Subsystem Device ID. | NVIDIA RTX PRO 4500 Blackwell | 2C31 103C 2051 | | NVIDIA RTX PRO 4500 Blackwell | 2C31 10DE 2051 | | NVIDIA RTX PRO 4500 Blackwell | 2C31 17AA 2051 | +| NVIDIA RTX PRO 4000 Blackwell SFF Edition | 2C33 1028 2053 | +| NVIDIA RTX PRO 4000 Blackwell SFF Edition | 2C33 103C 2053 | +| NVIDIA RTX PRO 4000 Blackwell SFF Edition | 2C33 17AA 2053 | | NVIDIA RTX PRO 4000 Blackwell | 2C34 1028 2052 | | NVIDIA RTX PRO 4000 Blackwell | 2C34 103C 2052 | | NVIDIA RTX PRO 4000 Blackwell | 2C34 10DE 2052 | @@ -982,6 +985,9 @@ Subsystem Device ID. | NVIDIA GeForce RTX 5060 | 2D05 | | NVIDIA GeForce RTX 5070 Laptop GPU | 2D18 | | NVIDIA GeForce RTX 5060 Laptop GPU | 2D19 | +| NVIDIA RTX PRO 2000 Blackwell | 2D30 1028 2054 | +| NVIDIA RTX PRO 2000 Blackwell | 2D30 103C 2054 | +| NVIDIA RTX PRO 2000 Blackwell | 2D30 17AA 2054 | | NVIDIA RTX PRO 2000 Blackwell Generation Laptop GPU | 2D39 | | NVIDIA GeForce RTX 5070 Laptop GPU | 2D58 | | NVIDIA GeForce RTX 5060 Laptop GPU | 2D59 | diff --git a/kernel-open/Kbuild b/kernel-open/Kbuild index 2919972c7..00c027771 100644 --- a/kernel-open/Kbuild +++ b/kernel-open/Kbuild @@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc ccflags-y += -I$(src) ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args ccflags-y += -D__KERNEL__ -DMODULE -DNVRM -ccflags-y += -DNV_VERSION_STRING=\"570.181\" +ccflags-y += -DNV_VERSION_STRING=\"570.190\" ifneq ($(SYSSRCHOST1X),) ccflags-y += -I$(SYSSRCHOST1X) diff --git a/kernel-open/conftest.sh b/kernel-open/conftest.sh index 98faac1b4..5dd4a4ff6 100755 --- a/kernel-open/conftest.sh +++ b/kernel-open/conftest.sh @@ -4071,6 +4071,43 @@ compile_test() { fi ;; + drm_fb_create_takes_format_info) + # + # Determine if a `struct drm_format_info *` is passed into + # the .fb_create callback. If so, it will have 4 arguments. + # This parameter was added in commit 81112eaac559 ("drm: + # Pass the format info to .fb_create") in linux-next + # (2025-07-16) + CODE=" + #include + #include + + static const struct drm_mode_config_funcs funcs; + void conftest_drm_fb_create_takes_format_info(void) { + funcs.fb_create(NULL, NULL, NULL, NULL); + }" + + compile_check_conftest "$CODE" "NV_DRM_FB_CREATE_TAKES_FORMAT_INFO" "" "types" + ;; + + drm_fill_fb_struct_takes_format_info) + # + # Determine if a `struct drm_format_info *` is passed into + # drm_helper_mode_fill_fb_struct(). If so, it will have 4 arguments. + # This parameter was added in commit a34cc7bf1034 ("drm: + # Allow the caller to pass in the format info to + # drm_helper_mode_fill_fb_struct()") in linux-next + # (2025-07-16) + CODE=" + #include + + void conftest_drm_fill_fb_struct_takes_format_info(void) { + drm_helper_mode_fill_fb_struct(NULL, NULL, NULL, NULL); + }" + + compile_check_conftest "$CODE" "NV_DRM_FILL_FB_STRUCT_TAKES_FORMAT_INFO" "" "types" + ;; + drm_connector_funcs_have_mode_in_name) # # Determine if _mode_ is present in connector function names. We diff --git a/kernel-open/nvidia-drm/nvidia-drm-drv.c b/kernel-open/nvidia-drm/nvidia-drm-drv.c index 89087d7b3..e0df73201 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-drv.c +++ b/kernel-open/nvidia-drm/nvidia-drm-drv.c @@ -209,11 +209,14 @@ static void nv_drm_output_poll_changed(struct drm_device *dev) static struct drm_framebuffer *nv_drm_framebuffer_create( struct drm_device *dev, struct drm_file *file, - #if defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG) +#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO) + const struct drm_format_info *info, +#endif +#if defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG) const struct drm_mode_fb_cmd2 *cmd - #else +#else struct drm_mode_fb_cmd2 *cmd - #endif +#endif ) { struct drm_mode_fb_cmd2 local_cmd; @@ -224,11 +227,14 @@ static struct drm_framebuffer *nv_drm_framebuffer_create( fb = nv_drm_internal_framebuffer_create( dev, file, +#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO) + info, +#endif &local_cmd); - #if !defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG) +#if !defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG) *cmd = local_cmd; - #endif +#endif return fb; } @@ -2046,13 +2052,13 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info) #endif nvKms->framebufferConsoleDisabled(nv_dev->pDevice); } - #if defined(NV_DRM_CLIENT_AVAILABLE) +#if defined(NV_DRM_CLIENT_AVAILABLE) drm_client_setup(dev, NULL); - #elif defined(NV_DRM_FBDEV_TTM_AVAILABLE) +#elif defined(NV_DRM_FBDEV_TTM_AVAILABLE) drm_fbdev_ttm_setup(dev, 32); - #elif defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) +#elif defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) drm_fbdev_generic_setup(dev, 32); - #endif +#endif } #endif /* defined(NV_DRM_FBDEV_AVAILABLE) */ diff --git a/kernel-open/nvidia-drm/nvidia-drm-fb.c b/kernel-open/nvidia-drm/nvidia-drm-fb.c index e88dc948d..4dc41054f 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-fb.c +++ b/kernel-open/nvidia-drm/nvidia-drm-fb.c @@ -220,6 +220,9 @@ fail: struct drm_framebuffer *nv_drm_internal_framebuffer_create( struct drm_device *dev, struct drm_file *file, +#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO) + const struct drm_format_info *info, +#endif struct drm_mode_fb_cmd2 *cmd) { struct nv_drm_device *nv_dev = to_nv_device(dev); @@ -273,6 +276,9 @@ struct drm_framebuffer *nv_drm_internal_framebuffer_create( dev, #endif &nv_fb->base, + #if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO) + info, + #endif cmd); /* diff --git a/kernel-open/nvidia-drm/nvidia-drm-fb.h b/kernel-open/nvidia-drm/nvidia-drm-fb.h index 40445665e..b2d8b8664 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-fb.h +++ b/kernel-open/nvidia-drm/nvidia-drm-fb.h @@ -84,6 +84,9 @@ static inline void nv_fb_set_gem_obj( struct drm_framebuffer *nv_drm_internal_framebuffer_create( struct drm_device *dev, struct drm_file *file, +#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO) + const struct drm_format_info *info, +#endif struct drm_mode_fb_cmd2 *cmd); #endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */ diff --git a/kernel-open/nvidia-drm/nvidia-drm-modeset.c b/kernel-open/nvidia-drm/nvidia-drm-modeset.c index 6ed769615..adaee1148 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-modeset.c +++ b/kernel-open/nvidia-drm/nvidia-drm-modeset.c @@ -703,6 +703,13 @@ int nv_drm_atomic_commit(struct drm_device *dev, #else drm_atomic_helper_swap_state(dev, state); #endif + /* + * Used to update legacy modeset state pointers to support UAPIs not updated + * by the core atomic modeset infrastructure. + * + * Example: /sys/class/drm//enabled + */ + drm_atomic_helper_update_legacy_modeset_state(dev, state); /* * nv_drm_atomic_commit_internal() must not return failure after diff --git a/kernel-open/nvidia-drm/nvidia-drm-sources.mk b/kernel-open/nvidia-drm/nvidia-drm-sources.mk index c5969d4f0..4dd98e58a 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-sources.mk +++ b/kernel-open/nvidia-drm/nvidia-drm-sources.mk @@ -148,3 +148,4 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_date NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_helper_funcs_mode_valid_has_const_mode_arg +NV_CONFTEST_TYPE_COMPILE_TESTS += drm_fb_create_takes_format_info diff --git a/kernel-open/nvidia-uvm/uvm_hmm.c b/kernel-open/nvidia-uvm/uvm_hmm.c index 6f904aa6f..cd77a73b5 100644 --- a/kernel-open/nvidia-uvm/uvm_hmm.c +++ b/kernel-open/nvidia-uvm/uvm_hmm.c @@ -91,10 +91,6 @@ static __always_inline bool nv_PageSwapCache(struct page *page) #endif } -static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block, - uvm_page_index_t page_index, - struct page *page); - typedef struct { uvm_processor_id_t processor_id; @@ -269,6 +265,7 @@ void uvm_hmm_va_space_initialize(uvm_va_space_t *va_space) uvm_range_tree_init(&hmm_va_space->blocks); uvm_mutex_init(&hmm_va_space->blocks_lock, UVM_LOCK_ORDER_LEAF); + atomic64_set(&hmm_va_space->allocated_page_count, 0); return; } @@ -348,14 +345,20 @@ void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_ for (pfn = __phys_to_pfn(devmem_start); pfn <= __phys_to_pfn(devmem_end); pfn++) { struct page *page = pfn_to_page(pfn); + // No need to keep scanning if no HMM pages are allocated for this + // va_space. + if (!atomic64_read(&va_space->hmm.allocated_page_count)) + break; + UVM_ASSERT(is_device_private_page(page)); // This check is racy because nothing stops the page being freed and // even reused. That doesn't matter though - worst case the // migration fails, we retry and find the va_space doesn't match. - if (uvm_pmm_devmem_page_to_va_space(page) == va_space) + if (uvm_pmm_devmem_page_to_va_space(page) == va_space) { if (uvm_hmm_pmm_gpu_evict_pfn(pfn) != NV_OK) retry = true; + } } } while (retry); @@ -945,7 +948,6 @@ static NV_STATUS split_block_if_needed(uvm_va_block_t *va_block, uvm_va_space_t *va_space; struct mm_struct *mm; struct vm_area_struct *vma; - uvm_va_block_region_t region; NvU64 addr, from, to; uvm_va_block_t *new; NV_STATUS status = NV_OK; @@ -987,7 +989,6 @@ static NV_STATUS split_block_if_needed(uvm_va_block_t *va_block, from = max(addr, (NvU64)vma->vm_start); to = min(va_block->end, (NvU64)vma->vm_end - 1); - region = uvm_va_block_region_from_start_end(va_block, from, to); if (!uvm_hmm_vma_is_valid(vma, from, false)) continue; @@ -1344,6 +1345,7 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space, uvm_tracker_t local_tracker = UVM_TRACKER_INIT(); uvm_va_policy_node_t *node; uvm_va_block_region_t region; + const uvm_va_policy_t *policy; uvm_processor_mask_t *map_processors = &block_context->hmm.map_processors_eviction; uvm_processor_id_t id; NV_STATUS tracker_status; @@ -1355,8 +1357,8 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space, uvm_mutex_lock(&va_block->lock); - uvm_for_each_va_policy_node_in(node, va_block, va_block->start, va_block->end) { - for_each_id_in_mask(id, &node->policy.accessed_by) { + uvm_for_each_va_policy_in(policy, va_block, va_block->start, va_block->end, node, region) { + for_each_id_in_mask(id, &policy->accessed_by) { status = hmm_set_accessed_by_start_end_locked(va_block, block_context, id, @@ -1371,7 +1373,7 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space, // Exclude the processors that have been already mapped due to // AccessedBy. - uvm_processor_mask_andnot(map_processors, &va_block->evicted_gpus, &node->policy.accessed_by); + uvm_processor_mask_andnot(map_processors, &va_block->evicted_gpus, &policy->accessed_by); for_each_gpu_id_in_mask(id, map_processors) { uvm_gpu_t *gpu = uvm_gpu_get(id); @@ -1604,7 +1606,7 @@ static NV_STATUS hmm_va_block_cpu_page_populate(uvm_va_block_t *va_block, status = uvm_va_block_map_cpu_chunk_on_gpus(va_block, chunk); if (status != NV_OK) { - uvm_cpu_chunk_remove_from_block(va_block, page_to_nid(page), page_index); + uvm_cpu_chunk_remove_from_block(va_block, chunk, page_to_nid(page), page_index); uvm_cpu_chunk_free(chunk); } @@ -1623,7 +1625,7 @@ static void hmm_va_block_cpu_unpopulate_chunk(uvm_va_block_t *va_block, !uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index)); UVM_ASSERT(uvm_cpu_chunk_get_size(chunk) == PAGE_SIZE); - uvm_cpu_chunk_remove_from_block(va_block, chunk_nid, page_index); + uvm_cpu_chunk_remove_from_block(va_block, chunk, chunk_nid, page_index); uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk); uvm_cpu_chunk_free(chunk); } @@ -1648,14 +1650,45 @@ static void hmm_va_block_cpu_page_unpopulate(uvm_va_block_t *va_block, uvm_page_ } } -static bool hmm_va_block_cpu_page_is_same(uvm_va_block_t *va_block, - uvm_page_index_t page_index, - struct page *page) +// Insert the given sysmem page. +// Note that we might have a driver allocated sysmem page for staged GPU to GPU +// copies and that Linux may independently have allocated a page. +// If so, we have to free the driver page and use the one from Linux. +static NV_STATUS hmm_va_block_cpu_page_insert_or_replace(uvm_va_block_t *va_block, + uvm_page_index_t page_index, + struct page *page, + uvm_page_mask_t *populated_page_mask) { - struct page *old_page = uvm_va_block_get_cpu_page(va_block, page_index); + NV_STATUS status; - UVM_ASSERT(uvm_cpu_chunk_is_hmm(uvm_cpu_chunk_get_chunk_for_page(va_block, page_to_nid(page), page_index))); - return old_page == page; + if (uvm_page_mask_test(&va_block->cpu.allocated, page_index)) { + uvm_cpu_chunk_t *cpu_chunk = uvm_cpu_chunk_get_chunk_for_page(va_block, page_to_nid(page), page_index); + + // Check to see if the CPU chunk already refers to the given page. + if (cpu_chunk && + uvm_cpu_chunk_is_hmm(cpu_chunk) && + uvm_cpu_chunk_get_cpu_page(va_block, cpu_chunk, page_index) == page) { + + UVM_ASSERT(uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU)); + UVM_ASSERT(uvm_va_block_cpu_is_page_resident_on(va_block, page_to_nid(page), page_index)); + + return NV_OK; + } + + // A driver allocated CPU chunk could have a different NUMA node ID. + hmm_va_block_cpu_page_unpopulate(va_block, page_index, NULL); + } + + status = hmm_va_block_cpu_page_populate(va_block, page_index, page); + if (status != NV_OK) + return status; + + // Record that we populated this page. hmm_block_cpu_fault_locked() + // uses this to ensure pages that don't migrate get remote mapped. + if (populated_page_mask) + uvm_page_mask_set(populated_page_mask, page_index); + + return NV_OK; } // uvm_va_block_service_copy() and uvm_va_block_service_finish() expect the @@ -1709,6 +1742,67 @@ static void cpu_mapping_clear(uvm_va_block_t *va_block, uvm_page_index_t page_in uvm_processor_mask_clear(&va_block->mapped, UVM_ID_CPU); } +static void gpu_chunk_free(uvm_va_block_t *va_block, + uvm_va_block_retry_t *va_block_retry, + uvm_va_block_gpu_state_t *gpu_state, + uvm_page_index_t page_index) +{ + uvm_gpu_chunk_t *gpu_chunk = gpu_state->chunks[page_index]; + + if (gpu_chunk->state != UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED || gpu_chunk->is_referenced) + return; + + UVM_ASSERT(gpu_chunk->va_block == va_block); + UVM_ASSERT(gpu_chunk->va_block_page_index == page_index); + + uvm_mmu_chunk_unmap(gpu_chunk, &va_block->tracker); + gpu_state->chunks[page_index] = NULL; + if (va_block_retry) { + list_move_tail(&gpu_chunk->list, &va_block_retry->free_chunks); + } + else { + list_del_init(&gpu_chunk->list); + uvm_pmm_gpu_free(&uvm_gpu_chunk_get_gpu(gpu_chunk)->pmm, gpu_chunk, NULL); + } +} + +static void gpu_chunk_free_region(uvm_va_block_t *va_block, + uvm_va_block_retry_t *va_block_retry, + uvm_gpu_id_t gpu_id, + uvm_va_block_region_t region, + const uvm_page_mask_t *page_mask) +{ + uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(va_block, gpu_id); + uvm_page_index_t page_index; + + for_each_va_block_page_in_region_mask(page_index, page_mask, region) + gpu_chunk_free(va_block, va_block_retry, gpu_state, page_index); +} + +static void gpu_chunk_free_preallocated(uvm_va_block_t *va_block, + uvm_va_block_retry_t *va_block_retry) +{ + uvm_gpu_chunk_t *gpu_chunk, *next_chunk; + + list_for_each_entry_safe(gpu_chunk, next_chunk, &va_block_retry->used_chunks, list) { + uvm_gpu_t *gpu = uvm_gpu_chunk_get_gpu(gpu_chunk); + uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(va_block, gpu->id); + uvm_page_index_t page_index = gpu_chunk->va_block_page_index; + + UVM_ASSERT(gpu_state); + + UVM_ASSERT(gpu_chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED); + UVM_ASSERT(gpu_chunk->va_block == va_block); + UVM_ASSERT(!gpu_chunk->is_referenced); + + uvm_mmu_chunk_unmap(gpu_chunk, &va_block->tracker); + gpu_state->chunks[page_index] = NULL; + + list_del_init(&gpu_chunk->list); + uvm_pmm_gpu_free(&gpu->pmm, gpu_chunk, NULL); + } +} + static void gpu_chunk_remove(uvm_va_block_t *va_block, uvm_page_index_t page_index, struct page *page) @@ -1717,20 +1811,23 @@ static void gpu_chunk_remove(uvm_va_block_t *va_block, uvm_gpu_chunk_t *gpu_chunk; uvm_gpu_id_t id; - id = uvm_gpu_chunk_get_gpu(uvm_pmm_devmem_page_to_chunk(page))->id; + gpu_chunk = uvm_pmm_devmem_page_to_chunk(page); + id = uvm_gpu_chunk_get_gpu(gpu_chunk)->id; gpu_state = uvm_va_block_gpu_state_get(va_block, id); UVM_ASSERT(gpu_state); - gpu_chunk = gpu_state->chunks[page_index]; - if (!gpu_chunk) { + if (!gpu_state->chunks[page_index]) { // If we didn't find a chunk it's because the page was unmapped for // mremap and no fault has established a new mapping. UVM_ASSERT(!uvm_page_mask_test(&gpu_state->resident, page_index)); return; } - UVM_ASSERT(gpu_chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED); + UVM_ASSERT(gpu_chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED || + gpu_chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED); + UVM_ASSERT(gpu_chunk->va_block == va_block); UVM_ASSERT(gpu_chunk->is_referenced); + UVM_ASSERT(gpu_chunk == gpu_state->chunks[page_index]); uvm_page_mask_clear(&gpu_state->resident, page_index); @@ -1739,42 +1836,42 @@ static void gpu_chunk_remove(uvm_va_block_t *va_block, } static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block, + uvm_va_block_retry_t *va_block_retry, uvm_page_index_t page_index, struct page *page) { uvm_va_block_gpu_state_t *gpu_state; uvm_gpu_chunk_t *gpu_chunk; - uvm_gpu_id_t id; + uvm_gpu_t *gpu; NV_STATUS status; - id = uvm_gpu_chunk_get_gpu(uvm_pmm_devmem_page_to_chunk(page))->id; - gpu_state = uvm_va_block_gpu_state_get(va_block, id); + gpu_chunk = uvm_pmm_devmem_page_to_chunk(page); + gpu = uvm_gpu_chunk_get_gpu(gpu_chunk); + gpu_state = uvm_va_block_gpu_state_get_alloc(va_block, gpu); - // It's possible that this is a fresh va_block we're trying to add an - // existing gpu_chunk to. This occurs for example when a GPU faults on a - // virtual address that has been remapped with mremap(). - if (!gpu_state) { - status = uvm_va_block_gpu_state_alloc(va_block); - if (status != NV_OK) - return status; - gpu_state = uvm_va_block_gpu_state_get(va_block, id); - } - - UVM_ASSERT(gpu_state); + if (!gpu_state) + return NV_ERR_NO_MEMORY; // Note that a mremap() might be to a CPU virtual address that is nolonger // aligned with a larger GPU chunk size. We would need to allocate a new // aligned GPU chunk and copy from old to new. // TODO: Bug 3368756: add support for large GPU pages. - gpu_chunk = uvm_pmm_devmem_page_to_chunk(page); UVM_ASSERT(gpu_chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED); UVM_ASSERT(gpu_chunk->is_referenced); UVM_ASSERT(uvm_pmm_devmem_page_to_va_space(page) == va_block->hmm.va_space); - if (gpu_state->chunks[page_index] == gpu_chunk) + if (gpu_state->chunks[page_index] == gpu_chunk) { + UVM_ASSERT(gpu_chunk->va_block == va_block); + UVM_ASSERT(gpu_chunk->va_block_page_index == page_index); return NV_OK; + } - UVM_ASSERT(!gpu_state->chunks[page_index]); + if (gpu_state->chunks[page_index]) { + // In the mremap() case, if we pre-allocated a new GPU chunk for the + // destination of a potential migration but we need to free it because + // we are replacing it with the old chunk from the mremap() source. + gpu_chunk_free(va_block, va_block_retry, gpu_state, page_index); + } // In some configurations such as SR-IOV heavy, the chunk cannot be // referenced using its physical address. Create a virtual mapping. @@ -1782,7 +1879,7 @@ static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block, if (status != NV_OK) return status; - uvm_processor_mask_set(&va_block->resident, id); + uvm_processor_mask_set(&va_block->resident, gpu->id); uvm_page_mask_set(&gpu_state->resident, page_index); // It is safe to modify the page index field without holding any PMM locks @@ -1817,34 +1914,50 @@ static NV_STATUS sync_page_and_chunk_state(uvm_va_block_t *va_block, // Wait for the GPU to finish. migrate_vma_finalize() will release the // migrated source pages (or non migrating destination pages), so GPU - // opererations must be finished by then. + // opererations must be finished by then. Also, we unmap the source or + // destination so DMAs must be complete before DMA unmapping. status = uvm_tracker_wait(&va_block->tracker); for_each_va_block_page_in_region(page_index, region) { - struct page *page; + struct page *src_page; + struct page *dst_page; if (uvm_page_mask_test(same_devmem_page_mask, page_index)) continue; - // If a page migrated, clean up the source page. - // Otherwise, clean up the destination page. - if (uvm_page_mask_test(migrated_pages, page_index)) - page = migrate_pfn_to_page(src_pfns[page_index]); - else - page = migrate_pfn_to_page(dst_pfns[page_index]); - - if (!page) - continue; - - if (is_device_private_page(page)) { - gpu_chunk_remove(va_block, page_index, page); + // If the source page migrated, we have to remove our pointers to it + // because migrate_vma_finalize() will release the reference. + // TODO: Bug 3660922: Need to handle read duplication at some point. + src_page = migrate_pfn_to_page(src_pfns[page_index]); + if (src_page && uvm_page_mask_test(migrated_pages, page_index)) { + if (is_device_private_page(src_page)) + gpu_chunk_remove(va_block, page_index, src_page); + else + hmm_va_block_cpu_page_unpopulate(va_block, page_index, src_page); } - else { - // If the source page is a system memory page, - // migrate_vma_finalize() will release the reference so we should - // clear our pointer to it. - // TODO: Bug 3660922: Need to handle read duplication at some point. - hmm_va_block_cpu_page_unpopulate(va_block, page_index, page); + + dst_page = migrate_pfn_to_page(dst_pfns[page_index]); + if (dst_page) { + if (is_device_private_page(dst_page)) { + uvm_gpu_chunk_t *gpu_chunk = uvm_pmm_devmem_page_to_chunk(dst_page); + + UVM_ASSERT(gpu_chunk); + UVM_ASSERT(gpu_chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED || + gpu_chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED); + UVM_ASSERT(gpu_chunk->is_referenced); + + // If a page migrated to the GPU, we have to unpin the + // gpu_chunk. Otherwise, clear pointers to temporary pinned + // pages that aren't migrating. + if (uvm_page_mask_test(migrated_pages, page_index)) + uvm_pmm_gpu_unpin_allocated(&uvm_gpu_chunk_get_gpu(gpu_chunk)->pmm, gpu_chunk, va_block); + else + gpu_chunk_remove(va_block, page_index, dst_page); + } + else if (!uvm_page_mask_test(migrated_pages, page_index)) { + // Clear pointer to sysmem page that will be released. + hmm_va_block_cpu_page_unpopulate(va_block, page_index, dst_page); + } } } @@ -1853,7 +1966,6 @@ static NV_STATUS sync_page_and_chunk_state(uvm_va_block_t *va_block, // Update va_block state to reflect that the page isn't migrating. static void clean_up_non_migrating_page(uvm_va_block_t *va_block, - const unsigned long *src_pfns, unsigned long *dst_pfns, uvm_page_index_t page_index) { @@ -1879,10 +1991,9 @@ static void clean_up_non_migrating_page(uvm_va_block_t *va_block, } static void clean_up_non_migrating_pages(uvm_va_block_t *va_block, - const unsigned long *src_pfns, unsigned long *dst_pfns, uvm_va_block_region_t region, - uvm_page_mask_t *page_mask) + const uvm_page_mask_t *page_mask) { uvm_page_index_t page_index; NV_STATUS status; @@ -1891,23 +2002,47 @@ static void clean_up_non_migrating_pages(uvm_va_block_t *va_block, UVM_ASSERT(status == NV_OK); for_each_va_block_page_in_region_mask(page_index, page_mask, region) { - clean_up_non_migrating_page(va_block, src_pfns, dst_pfns, page_index); + clean_up_non_migrating_page(va_block, dst_pfns, page_index); } } // CPU page fault handling. -// Fill in the dst_pfns[page_index] entry given that there is an allocated -// CPU page. -static void lock_block_cpu_page(uvm_va_block_t *va_block, - uvm_page_index_t page_index, - struct page *src_page, - unsigned long *dst_pfns, - uvm_page_mask_t *same_devmem_page_mask) +// Fill in the dst_pfns[page_index] entry with a CPU page. +// The src_pfns[page_index] page, if present, is page locked. +static NV_STATUS alloc_page_on_cpu(uvm_va_block_t *va_block, + uvm_va_block_retry_t *va_block_retry, + uvm_page_index_t page_index, + const unsigned long *src_pfns, + unsigned long *dst_pfns, + uvm_page_mask_t *same_devmem_page_mask, + uvm_va_block_context_t *block_context) { - uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_any_chunk_for_page(va_block, page_index); - uvm_va_block_region_t chunk_region; + struct page *src_page; struct page *dst_page; + uvm_cpu_chunk_t *chunk; + uvm_va_block_region_t chunk_region; + + if (!uvm_page_mask_test(&va_block->cpu.allocated, page_index)) { + NV_STATUS status; + + UVM_ASSERT(!uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) || + !uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index)); + + status = uvm_va_block_populate_page_cpu(va_block, page_index, block_context); + if (status != NV_OK) + return status; + } + + // This is the page that will be copied to system memory. + src_page = migrate_pfn_to_page(src_pfns[page_index]); + + // mremap may have caused us to lose the gpu_chunk associated with + // this va_block/page_index so make sure we have the correct chunk. + if (src_page && is_device_private_page(src_page)) + gpu_chunk_add(va_block, va_block_retry, page_index, src_page); + + chunk = uvm_cpu_chunk_get_any_chunk_for_page(va_block, page_index); UVM_ASSERT(chunk); UVM_ASSERT(chunk->page); @@ -1923,61 +2058,63 @@ static void lock_block_cpu_page(uvm_va_block_t *va_block, // remote mapped system memory page. It could also be a driver allocated // page for GPU-to-GPU staged copies (i.e., not a resident copy and owned // by the driver). - if (is_device_private_page(src_page)) { - // Since the page isn't mirrored, it was allocated by alloc_pages() + if (!src_page || is_device_private_page(src_page)) { + UVM_ASSERT(!uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) || + !uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index)); + + // If the page isn't mirrored, it was allocated by alloc_pages() // and UVM owns the reference. We leave the reference count unchanged // and mark the page pointer as mirrored since UVM is transferring // ownership to Linux and we don't want UVM to double free the page in // hmm_va_block_cpu_page_unpopulate() or block_kill(). If the page // does not migrate, it will be freed though. - UVM_ASSERT(!uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) || - !uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index)); - UVM_ASSERT(chunk->type == UVM_CPU_CHUNK_TYPE_PHYSICAL); - UVM_ASSERT(page_ref_count(dst_page) == 1); - uvm_cpu_chunk_make_hmm(chunk); + if (chunk->type == UVM_CPU_CHUNK_TYPE_PHYSICAL) { + UVM_ASSERT(page_ref_count(dst_page) == 1); + uvm_cpu_chunk_make_hmm(chunk); + } + + lock_page(dst_page); + dst_pfns[page_index] = migrate_pfn(page_to_pfn(dst_page)); } else { + if (src_page != dst_page) { + // This must be a driver allocated staging page that doesn't match + // the page that migrate_vma_setup() locked. + hmm_va_block_cpu_unpopulate_chunk(va_block, chunk, page_to_nid(dst_page), page_index); + hmm_va_block_cpu_page_populate(va_block, page_index, src_page); + } + + UVM_ASSERT(uvm_cpu_chunk_is_hmm(chunk)); UVM_ASSERT(same_devmem_page_mask); - UVM_ASSERT(src_page == dst_page); uvm_page_mask_set(same_devmem_page_mask, page_index); // The call to migrate_vma_setup() will have inserted a migration PTE // so the CPU has no access. cpu_mapping_clear(va_block, page_index); - return; } - lock_page(dst_page); - dst_pfns[page_index] = migrate_pfn(page_to_pfn(dst_page)); -} - -static void hmm_mark_gpu_chunk_referenced(uvm_va_block_t *va_block, - uvm_gpu_t *gpu, - uvm_gpu_chunk_t *gpu_chunk) -{ - // Tell PMM to expect a callback from Linux to free the page since the - // device private struct page reference count will determine when the - // GPU chunk is free. - UVM_ASSERT(gpu_chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED); - list_del_init(&gpu_chunk->list); - uvm_pmm_gpu_unpin_referenced(&gpu->pmm, gpu_chunk, va_block); + return NV_OK; } static void fill_dst_pfn(uvm_va_block_t *va_block, + uvm_va_block_gpu_state_t *gpu_state, uvm_gpu_t *gpu, const unsigned long *src_pfns, unsigned long *dst_pfns, uvm_page_index_t page_index, + const uvm_page_mask_t *page_mask, uvm_page_mask_t *same_devmem_page_mask) { unsigned long src_pfn = src_pfns[page_index]; - uvm_gpu_chunk_t *gpu_chunk; + uvm_gpu_chunk_t *gpu_chunk = gpu_state->chunks[page_index]; unsigned long pfn; struct page *dpage; - gpu_chunk = uvm_va_block_lookup_gpu_chunk(va_block, gpu, uvm_va_block_cpu_page_address(va_block, page_index)); UVM_ASSERT(gpu_chunk); + UVM_ASSERT(uvm_gpu_chunk_is_user(gpu_chunk)); UVM_ASSERT(gpu_chunk->log2_size == PAGE_SHIFT); + UVM_ASSERT(gpu_chunk->va_block == va_block); + pfn = uvm_pmm_gpu_devmem_get_pfn(&gpu->pmm, gpu_chunk); // If the same GPU page is both source and destination, migrate_vma_pages() @@ -1985,6 +2122,8 @@ static void fill_dst_pfn(uvm_va_block_t *va_block, // mark it as not migrating but we keep track of this so we don't confuse // it with a page that migrate_vma_pages() actually does not migrate. if ((src_pfn & MIGRATE_PFN_VALID) && (src_pfn >> MIGRATE_PFN_SHIFT) == pfn) { + UVM_ASSERT(gpu_chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED); + UVM_ASSERT(gpu_chunk->is_referenced); uvm_page_mask_set(same_devmem_page_mask, page_index); return; } @@ -1993,90 +2132,32 @@ static void fill_dst_pfn(uvm_va_block_t *va_block, UVM_ASSERT(is_device_private_page(dpage)); UVM_ASSERT(page_pgmap(dpage)->owner == &g_uvm_global); - hmm_mark_gpu_chunk_referenced(va_block, gpu, gpu_chunk); - UVM_ASSERT(!page_count(dpage)); - zone_device_page_init(dpage); - dpage->zone_device_data = gpu_chunk; + if (gpu_chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) { + UVM_ASSERT(!gpu_chunk->is_referenced); + gpu_chunk->is_referenced = true; + + // Remove the GPU chunk from the retry->used_chunks list. + list_del_init(&gpu_chunk->list); + + UVM_ASSERT(!page_count(dpage)); + UVM_ASSERT(!dpage->zone_device_data); + zone_device_page_init(dpage); + dpage->zone_device_data = gpu_chunk; + atomic64_inc(&va_block->hmm.va_space->hmm.allocated_page_count); + } + else { + UVM_ASSERT(gpu_chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED); + UVM_ASSERT(gpu_chunk->is_referenced); + UVM_ASSERT(uvm_pmm_devmem_page_to_chunk(dpage) == gpu_chunk); + UVM_ASSERT(page_count(dpage) == 1); + } dst_pfns[page_index] = migrate_pfn(pfn); } -static void fill_dst_pfns(uvm_va_block_t *va_block, - const unsigned long *src_pfns, - unsigned long *dst_pfns, - uvm_va_block_region_t region, - uvm_page_mask_t *page_mask, - uvm_page_mask_t *same_devmem_page_mask, - uvm_processor_id_t dest_id) -{ - uvm_gpu_t *gpu = uvm_gpu_get(dest_id); - uvm_page_index_t page_index; - - uvm_page_mask_zero(same_devmem_page_mask); - - for_each_va_block_page_in_region_mask(page_index, page_mask, region) { - if (!(src_pfns[page_index] & MIGRATE_PFN_MIGRATE)) - continue; - - fill_dst_pfn(va_block, - gpu, - src_pfns, - dst_pfns, - page_index, - same_devmem_page_mask); - } -} - -static NV_STATUS alloc_page_on_cpu(uvm_va_block_t *va_block, - uvm_page_index_t page_index, - const unsigned long *src_pfns, - unsigned long *dst_pfns, - uvm_page_mask_t *same_devmem_page_mask, - uvm_va_block_context_t *block_context) -{ - NV_STATUS status; - struct page *src_page; - struct page *dst_page; - - // This is the page that will be copied to system memory. - src_page = migrate_pfn_to_page(src_pfns[page_index]); - - if (src_page) { - // mremap may have caused us to lose the gpu_chunk associated with - // this va_block/page_index so make sure we have the correct chunk. - if (is_device_private_page(src_page)) - gpu_chunk_add(va_block, page_index, src_page); - - if (uvm_page_mask_test(&va_block->cpu.allocated, page_index)) { - lock_block_cpu_page(va_block, page_index, src_page, dst_pfns, same_devmem_page_mask); - return NV_OK; - } - } - - UVM_ASSERT(!uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) || - !uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index)); - - status = uvm_va_block_populate_page_cpu(va_block, page_index, block_context); - if (status != NV_OK) - return status; - - // TODO: Bug 3368756: add support for transparent huge pages - // Support for large CPU pages means the page_index may need fixing - dst_page = migrate_pfn_to_page(block_context->hmm.dst_pfns[page_index]); - - // Note that we don't call get_page(dst_page) since alloc_page_vma() - // returns with a page reference count of one and we are passing - // ownership to Linux. Also, uvm_va_block_cpu_page_populate() recorded - // the page as "mirrored" so that migrate_vma_finalize() and - // hmm_va_block_cpu_page_unpopulate() don't double free the page. - lock_page(dst_page); - dst_pfns[page_index] = migrate_pfn(page_to_pfn(dst_page)); - - return NV_OK; -} - // Allocates pages on the CPU to handle migration due to a page fault static NV_STATUS fault_alloc_on_cpu(uvm_va_block_t *va_block, + uvm_va_block_retry_t *va_block_retry, const unsigned long *src_pfns, unsigned long *dst_pfns, uvm_va_block_region_t region, @@ -2109,7 +2190,13 @@ static NV_STATUS fault_alloc_on_cpu(uvm_va_block_t *va_block, goto clr_mask; } - status = alloc_page_on_cpu(va_block, page_index, src_pfns, dst_pfns, same_devmem_page_mask, service_context->block_context); + status = alloc_page_on_cpu(va_block, + va_block_retry, + page_index, + src_pfns, + dst_pfns, + same_devmem_page_mask, + service_context->block_context); if (status != NV_OK) { // Ignore errors if the page is only for prefetching. if (service_context && @@ -2126,7 +2213,7 @@ static NV_STATUS fault_alloc_on_cpu(uvm_va_block_t *va_block, } if (status != NV_OK) - clean_up_non_migrating_pages(va_block, src_pfns, dst_pfns, region, page_mask); + clean_up_non_migrating_pages(va_block, dst_pfns, region, page_mask); else if (uvm_page_mask_empty(page_mask)) return NV_WARN_MORE_PROCESSING_REQUIRED; @@ -2135,6 +2222,7 @@ static NV_STATUS fault_alloc_on_cpu(uvm_va_block_t *va_block, // Allocates pages on the CPU for explicit migration calls. static NV_STATUS migrate_alloc_on_cpu(uvm_va_block_t *va_block, + uvm_va_block_retry_t *va_block_retry, const unsigned long *src_pfns, unsigned long *dst_pfns, uvm_va_block_region_t region, @@ -2157,11 +2245,18 @@ static NV_STATUS migrate_alloc_on_cpu(uvm_va_block_t *va_block, continue; } - status = alloc_page_on_cpu(va_block, page_index, src_pfns, dst_pfns, same_devmem_page_mask, block_context); + status = alloc_page_on_cpu(va_block, va_block_retry, page_index, src_pfns, dst_pfns, same_devmem_page_mask, block_context); + if (status != NV_OK) { + // Try to migrate other pages if we can't allocate this one. + if (status != NV_ERR_NO_MEMORY) + break; + + uvm_page_mask_clear(page_mask, page_index); + } } if (status != NV_OK) - clean_up_non_migrating_pages(va_block, src_pfns, dst_pfns, region, page_mask); + clean_up_non_migrating_pages(va_block, dst_pfns, region, page_mask); else if (uvm_page_mask_empty(page_mask)) return NV_WARN_MORE_PROCESSING_REQUIRED; @@ -2194,6 +2289,7 @@ static NV_STATUS uvm_hmm_devmem_fault_alloc_and_copy(uvm_hmm_devmem_fault_contex uvm_page_mask_copy(page_mask, &service_context->per_processor_masks[UVM_ID_CPU_VALUE].new_residency); status = fault_alloc_on_cpu(va_block, + va_block_retry, src_pfns, dst_pfns, service_context->region, @@ -2208,7 +2304,7 @@ static NV_STATUS uvm_hmm_devmem_fault_alloc_and_copy(uvm_hmm_devmem_fault_contex // location yet. status = uvm_va_block_service_copy(processor_id, UVM_ID_CPU, va_block, va_block_retry, service_context); if (status != NV_OK) - clean_up_non_migrating_pages(va_block, src_pfns, dst_pfns, service_context->region, page_mask); + clean_up_non_migrating_pages(va_block, dst_pfns, service_context->region, page_mask); return status; } @@ -2217,10 +2313,8 @@ static NV_STATUS uvm_hmm_devmem_fault_finalize_and_map(uvm_hmm_devmem_fault_cont { uvm_processor_id_t processor_id; uvm_service_block_context_t *service_context; - uvm_perf_prefetch_hint_t *prefetch_hint; - uvm_va_block_retry_t *va_block_retry; const unsigned long *src_pfns; - unsigned long *dst_pfns; + const unsigned long *dst_pfns; uvm_page_mask_t *page_mask; uvm_va_block_t *va_block; uvm_va_block_region_t region; @@ -2229,9 +2323,7 @@ static NV_STATUS uvm_hmm_devmem_fault_finalize_and_map(uvm_hmm_devmem_fault_cont processor_id = devmem_fault_context->processor_id; service_context = devmem_fault_context->service_context; - prefetch_hint = &service_context->prefetch_hint; va_block = devmem_fault_context->va_block; - va_block_retry = devmem_fault_context->va_block_retry; src_pfns = service_context->block_context->hmm.src_pfns; dst_pfns = service_context->block_context->hmm.dst_pfns; region = service_context->region; @@ -2270,6 +2362,7 @@ static NV_STATUS uvm_hmm_devmem_fault_finalize_and_map(uvm_hmm_devmem_fault_cont } static NV_STATUS populate_region(uvm_va_block_t *va_block, + uvm_va_block_retry_t *va_block_retry, unsigned long *pfns, uvm_va_block_region_t region, uvm_page_mask_t *populated_page_mask) @@ -2277,12 +2370,6 @@ static NV_STATUS populate_region(uvm_va_block_t *va_block, uvm_page_index_t page_index; NV_STATUS status; - // Make sure GPU state is allocated or else the GPU DMA mappings to - // system memory won't be saved. - status = uvm_va_block_gpu_state_alloc(va_block); - if (status != NV_OK) - return status; - for_each_va_block_page_in_region(page_index, region) { struct page *page; @@ -2316,30 +2403,18 @@ static NV_STATUS populate_region(uvm_va_block_t *va_block, // not release the device private struct page reference. Since // hmm_range_fault() did find a device private PTE, we can // re-establish the GPU chunk pointer. - status = gpu_chunk_add(va_block, page_index, page); + status = gpu_chunk_add(va_block, va_block_retry, page_index, page); if (status != NV_OK) return status; continue; } - // If a CPU chunk is already allocated, check to see it matches what - // hmm_range_fault() found. - if (uvm_page_mask_test(&va_block->cpu.allocated, page_index)) { - UVM_ASSERT(hmm_va_block_cpu_page_is_same(va_block, page_index, page)); - } - else { - status = hmm_va_block_cpu_page_populate(va_block, page_index, page); - if (status != NV_OK) - return status; + status = hmm_va_block_cpu_page_insert_or_replace(va_block, page_index, page, populated_page_mask); + if (status != NV_OK) + return status; - // Record that we populated this page. hmm_block_cpu_fault_locked() - // uses this to ensure pages that don't migrate get remote mapped. - if (populated_page_mask) - uvm_page_mask_set(populated_page_mask, page_index); - } - - // Since we have a stable snapshot of the CPU pages, we can - // update the residency and protection information. + // Since we have a stable snapshot of the CPU pages, we can update the + // residency and mapping information. uvm_va_block_cpu_set_resident_page(va_block, page_to_nid(page), page_index); cpu_mapping_set(va_block, pfns[page_index] & HMM_PFN_WRITE, page_index); @@ -2367,6 +2442,7 @@ static bool hmm_range_fault_retry(uvm_va_block_t *va_block) // Make the region be resident on the CPU by calling hmm_range_fault() to fault // in CPU pages. static NV_STATUS hmm_make_resident_cpu(uvm_va_block_t *va_block, + uvm_va_block_retry_t *va_block_retry, struct vm_area_struct *vma, unsigned long *hmm_pfns, uvm_va_block_region_t region, @@ -2414,6 +2490,7 @@ static NV_STATUS hmm_make_resident_cpu(uvm_va_block_t *va_block, return NV_WARN_MORE_PROCESSING_REQUIRED; return populate_region(va_block, + va_block_retry, hmm_pfns, region, populated_page_mask); @@ -2548,27 +2625,15 @@ static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id, for_each_va_block_page_in_region(page_index, region) { struct page *page = pages[page_index]; - if (!page) { + if (!page || hmm_va_block_cpu_page_insert_or_replace(va_block, page_index, page, NULL) != NV_OK) { // Record that one of the pages isn't exclusive but keep converting // the others. status = NV_WARN_MORE_PROCESSING_REQUIRED; continue; } - // If a CPU chunk is already allocated, check to see it matches what - // make_device_exclusive_range() found. - if (uvm_page_mask_test(&va_block->cpu.allocated, page_index)) { - UVM_ASSERT(hmm_va_block_cpu_page_is_same(va_block, page_index, page)); - UVM_ASSERT(uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU)); - UVM_ASSERT(uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index)); - } - else { - NV_STATUS s = hmm_va_block_cpu_page_populate(va_block, page_index, page); - - if (s == NV_OK) - uvm_va_block_cpu_set_resident_page(va_block, page_to_nid(page), page_index); - } - + // Since we have a stable snapshot of the CPU pages, we can update the + // mapping information. cpu_mapping_clear(va_block, page_index); } @@ -2629,6 +2694,7 @@ static NV_STATUS hmm_block_cpu_fault_locked(uvm_processor_id_t processor_id, .va_block = va_block, .va_block_retry = va_block_retry, .service_context = service_context, + .same_devmem_page_mask = {} }; // Normally the source page will be a device private page that is being @@ -2655,6 +2721,7 @@ static NV_STATUS hmm_block_cpu_fault_locked(uvm_processor_id_t processor_id, } status = hmm_make_resident_cpu(va_block, + va_block_retry, service_context->block_context->hmm.vma, service_context->block_context->hmm.src_pfns, region, @@ -2724,20 +2791,27 @@ static NV_STATUS hmm_block_cpu_fault_locked(uvm_processor_id_t processor_id, return status; } -static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block, - struct vm_area_struct *vma, - const unsigned long *src_pfns, - unsigned long *dst_pfns, - uvm_va_block_region_t region, - uvm_page_mask_t *page_mask, - uvm_processor_id_t dest_id, - uvm_service_block_context_t *service_context) +static NV_STATUS dmamap_src_sysmem_and_fill_dst(uvm_va_block_t *va_block, + uvm_va_block_retry_t *va_block_retry, + const unsigned long *src_pfns, + unsigned long *dst_pfns, + uvm_va_block_region_t region, + uvm_page_mask_t *page_mask, + uvm_page_mask_t *same_devmem_page_mask, + uvm_processor_id_t dest_id, + uvm_service_block_context_t *service_context) { + uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(va_block, dest_id); + uvm_gpu_t *gpu = uvm_gpu_get(dest_id); uvm_page_index_t page_index; NV_STATUS status = NV_OK; + UVM_ASSERT(gpu_state); + UVM_ASSERT(gpu); UVM_ASSERT(service_context); + uvm_page_mask_zero(same_devmem_page_mask); + for_each_va_block_page_in_region_mask(page_index, page_mask, region) { struct page *src_page; @@ -2752,46 +2826,28 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block, src_page = migrate_pfn_to_page(src_pfns[page_index]); if (src_page) { if (is_device_private_page(src_page)) { - status = gpu_chunk_add(va_block, page_index, src_page); + status = gpu_chunk_add(va_block, va_block_retry, page_index, src_page); if (status != NV_OK) - break; - continue; + goto clr_mask; + + goto fill_dst; } if (nv_PageSwapCache(src_page)) { // TODO: Bug 4050579: Remove this when swap cached pages can be // migrated. + gpu_chunk_free_region(va_block, va_block_retry, dest_id, region, page_mask); status = NV_WARN_MISMATCHED_TARGET; break; } - // If the page is already allocated, it is most likely a mirrored - // page. Check to be sure it matches what we have recorded. The - // page shouldn't be a staging page from a GPU to GPU migration - // or a remote mapped atomic sysmem page because migrate_vma_setup() - // found a normal page and non-mirrored pages are only known - // privately to the UVM driver. - if (uvm_page_mask_test(&va_block->cpu.allocated, page_index)) { - UVM_ASSERT(hmm_va_block_cpu_page_is_same(va_block, page_index, src_page)); - UVM_ASSERT(uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU)); - UVM_ASSERT(uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index)); - } - else { - status = hmm_va_block_cpu_page_populate(va_block, page_index, src_page); - if (status != NV_OK) - goto clr_mask; + status = hmm_va_block_cpu_page_insert_or_replace(va_block, page_index, src_page, NULL); + if (status != NV_OK) + goto clr_mask; - // Since there is a CPU resident page, there shouldn't be one - // anywhere else. TODO: Bug 3660922: Need to handle read - // duplication at some point. - UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block, - service_context->block_context, - page_index)); - - // migrate_vma_setup() was able to isolate and lock the page; - // therefore, it is CPU resident and not mapped. - uvm_va_block_cpu_set_resident_page(va_block, page_to_nid(src_page), page_index); - } + // Since we have a stable snapshot of the CPU pages, we can update + // the residency information. + uvm_va_block_cpu_set_resident_page(va_block, page_to_nid(src_page), page_index); // The call to migrate_vma_setup() will have inserted a migration // PTE so the CPU has no access. @@ -2810,33 +2866,43 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block, } } + fill_dst: + fill_dst_pfn(va_block, + gpu_state, + gpu, + src_pfns, + dst_pfns, + page_index, + page_mask, + same_devmem_page_mask); + continue; clr_mask: + // Free the pre-allocated GPU chunk for non-migrating pages. + gpu_chunk_free(va_block, va_block_retry, gpu_state, page_index); + // TODO: Bug 3900774: clean up murky mess of mask clearing. uvm_page_mask_clear(page_mask, page_index); if (service_context) clear_service_context_masks(service_context, dest_id, page_index); } - if (uvm_page_mask_empty(page_mask)) - status = NV_WARN_MORE_PROCESSING_REQUIRED; + gpu_chunk_free_preallocated(va_block, va_block_retry); - if (status != NV_OK) - clean_up_non_migrating_pages(va_block, src_pfns, dst_pfns, region, page_mask); + if (status == NV_OK && uvm_page_mask_empty(page_mask)) + status = NV_WARN_MORE_PROCESSING_REQUIRED; return status; } -static NV_STATUS uvm_hmm_gpu_fault_alloc_and_copy(struct vm_area_struct *vma, - uvm_hmm_gpu_fault_event_t *uvm_hmm_gpu_fault_event) +static NV_STATUS uvm_hmm_gpu_fault_alloc_and_copy(uvm_hmm_gpu_fault_event_t *uvm_hmm_gpu_fault_event) { uvm_processor_id_t processor_id; uvm_processor_id_t new_residency; uvm_va_block_t *va_block; uvm_va_block_retry_t *va_block_retry; uvm_service_block_context_t *service_context; - uvm_perf_prefetch_hint_t *prefetch_hint; const unsigned long *src_pfns; unsigned long *dst_pfns; uvm_va_block_region_t region; @@ -2849,7 +2915,6 @@ static NV_STATUS uvm_hmm_gpu_fault_alloc_and_copy(struct vm_area_struct *vma, va_block_retry = uvm_hmm_gpu_fault_event->va_block_retry; service_context = uvm_hmm_gpu_fault_event->service_context; region = service_context->region; - prefetch_hint = &service_context->prefetch_hint; src_pfns = service_context->block_context->hmm.src_pfns; dst_pfns = service_context->block_context->hmm.dst_pfns; @@ -2860,14 +2925,15 @@ static NV_STATUS uvm_hmm_gpu_fault_alloc_and_copy(struct vm_area_struct *vma, uvm_page_mask_copy(page_mask, &service_context->per_processor_masks[uvm_id_value(new_residency)].new_residency); - status = dmamap_src_sysmem_pages(va_block, - vma, - src_pfns, - dst_pfns, - region, - page_mask, - new_residency, - service_context); + status = dmamap_src_sysmem_and_fill_dst(va_block, + va_block_retry, + src_pfns, + dst_pfns, + region, + page_mask, + &uvm_hmm_gpu_fault_event->same_devmem_page_mask, + new_residency, + service_context); if (status != NV_OK) return status; @@ -2875,17 +2941,7 @@ static NV_STATUS uvm_hmm_gpu_fault_alloc_and_copy(struct vm_area_struct *vma, // new location yet. status = uvm_va_block_service_copy(processor_id, new_residency, va_block, va_block_retry, service_context); if (status != NV_OK) - return status; - - // Record the destination PFNs of device private struct pages now that - // uvm_va_block_service_copy() has populated the GPU destination pages. - fill_dst_pfns(va_block, - src_pfns, - dst_pfns, - region, - page_mask, - &uvm_hmm_gpu_fault_event->same_devmem_page_mask, - new_residency); + clean_up_non_migrating_pages(va_block, dst_pfns, region, page_mask); return status; } @@ -2895,10 +2951,9 @@ static NV_STATUS uvm_hmm_gpu_fault_finalize_and_map(uvm_hmm_gpu_fault_event_t *u uvm_processor_id_t processor_id; uvm_processor_id_t new_residency; uvm_va_block_t *va_block; - uvm_va_block_retry_t *va_block_retry; uvm_service_block_context_t *service_context; const unsigned long *src_pfns; - unsigned long *dst_pfns; + const unsigned long *dst_pfns; uvm_va_block_region_t region; uvm_page_index_t page_index; uvm_page_mask_t *page_mask; @@ -2907,7 +2962,6 @@ static NV_STATUS uvm_hmm_gpu_fault_finalize_and_map(uvm_hmm_gpu_fault_event_t *u processor_id = uvm_hmm_gpu_fault_event->processor_id; new_residency = uvm_hmm_gpu_fault_event->new_residency; va_block = uvm_hmm_gpu_fault_event->va_block; - va_block_retry = uvm_hmm_gpu_fault_event->va_block_retry; service_context = uvm_hmm_gpu_fault_event->service_context; src_pfns = service_context->block_context->hmm.src_pfns; dst_pfns = service_context->block_context->hmm.dst_pfns; @@ -2958,6 +3012,8 @@ NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id, uvm_va_block_region_t region = service_context->region; uvm_hmm_gpu_fault_event_t uvm_hmm_gpu_fault_event; struct migrate_vma *args = &service_context->block_context->hmm.migrate_vma_args; + const uvm_page_mask_t *new_residency_mask = + &service_context->per_processor_masks[uvm_id_value(new_residency)].new_residency; int ret; NV_STATUS status = NV_ERR_INVALID_ADDRESS; @@ -2971,8 +3027,66 @@ NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id, UVM_ASSERT(vma); // If the desired destination is the CPU, try to fault in CPU pages. - if (UVM_ID_IS_CPU(new_residency)) + if (UVM_ID_IS_CPU(new_residency)) { + if (va_block_retry && !list_empty(&va_block_retry->used_chunks)) + gpu_chunk_free_preallocated(va_block, va_block_retry); + return hmm_block_cpu_fault_locked(processor_id, va_block, va_block_retry, service_context); + } + + UVM_ASSERT(va_block_retry); + + // The overall process here is to migrate pages from the CPU or GPUs to the + // faulting GPU. This is only safe because we hold the va_block lock across + // the calls to migrate_vma_pages(), uvm_hmm_gpu_fault_alloc_and_copy(), + // uvm_hmm_gpu_fault_finalize_and_map(), and migrate_vma_finalize(). + // If the va_block lock were to be dropped, eviction callbacks from RM, + // migration callbacks from CPU faults, or invalidation callbacks from + // Linux could change the va_block state which would require careful + // revalidation of the state. Also, pages are page locked which leads to + // inefficiency or potential deadlocks. + + // We pre-allocate the destination GPU pages because otherwise, + // migrate_vma_setup() could page lock the source pages and then try to + // allocate destination pages with block_alloc_gpu_chunk() which might + // unlock the va_block lock and try to evict the source page and fail. + // Note that by preallocating, we introduce 3 states instead of 2 for + // GPU chunks: + // UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED, !is_referenced + // UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED, is_referenced + // UVM_PMM_GPU_CHUNK_STATE_ALLOCATED, is_referenced + // The first state is when uvm_va_block_populate_pages_gpu() returns and + // we should call uvm_pmm_gpu_free() if the chunk isn't needed. + // The second state is after the source pages are pinned and we know which + // chunks will be used for DMA and passed to migrate_vma_pages() dst_pfns[]. + // The third state is when migrate_vma_pages() commits to the migration and + // the GPU chunk will be marked resident. + // The is_referenced flag is just for sanity checking so it is clear when + // ownership for freeing the chunk changes from the driver to Linux's + // page_free() callback. The TEMP_PINNED/is_referenced state could be + // replaced with ALLOCATED/is_referenced but ALLOCATED implies the chunk + // could be evicted (except we hold the va_block lock) and seems safer + // to leave it in the pinned state until we are about to call + // migrate_vma_finalize(). + // Also note that we have to free any pre-allocated pages because otherwise + // they would be marked ALLOCATED in uvm_va_block_retry_deinit() and + // we can't free them in uvm_va_block_retry_deinit() because the va_block + // lock might not be held and freeing the GPU chunk requires unmapping and + // clearing the gpu_state->chunks[] entry. + // Also note that the new_residency and new_residency_mask can change each + // time uvm_va_block_populate_pages_gpu() returns + // NV_ERR_MORE_PROCESSING_REQUIRED (based on thrashing and other reasons) + // so there might be pre-allocated chunks not in region. + status = uvm_va_block_populate_pages_gpu(va_block, + va_block_retry, + new_residency, + region, + new_residency_mask); + if (status != NV_OK) { + if (status != NV_ERR_MORE_PROCESSING_REQUIRED) + gpu_chunk_free_preallocated(va_block, va_block_retry); + return status; + } uvm_hmm_gpu_fault_event.processor_id = processor_id; uvm_hmm_gpu_fault_event.new_residency = new_residency; @@ -2992,21 +3106,7 @@ NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id, ret = migrate_vma_setup_locked(args, va_block); UVM_ASSERT(!ret); - // The overall process here is to migrate pages from the CPU or GPUs to the - // faulting GPU. - // This is safe because we hold the va_block lock across the calls to - // uvm_hmm_gpu_fault_alloc_and_copy(), migrate_vma_pages(), - // uvm_hmm_gpu_fault_finalize_and_map(), and migrate_vma_finalize(). - // If uvm_hmm_gpu_fault_alloc_and_copy() needs to drop the va_block - // lock, a sequence number is used to tell if an invalidate() callback - // occurred while not holding the lock. If the sequence number changes, - // all the locks need to be dropped (mm, va_space, va_block) and the whole - // uvm_va_block_service_locked() called again. Otherwise, there were no - // conflicting invalidate callbacks and our snapshots of the CPU page - // tables are accurate and can be used to DMA pages and update GPU page - // tables. TODO: Bug 3901904: there might be better ways of handling no - // page being migrated. - status = uvm_hmm_gpu_fault_alloc_and_copy(vma, &uvm_hmm_gpu_fault_event); + status = uvm_hmm_gpu_fault_alloc_and_copy(&uvm_hmm_gpu_fault_event); if (status == NV_WARN_MORE_PROCESSING_REQUIRED) { migrate_vma_finalize(args); @@ -3015,12 +3115,16 @@ NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id, // We do know that none of the pages in the region are zero pages // since migrate_vma_setup() would have reported that information. // Try to make it resident in system memory and retry the migration. + // TODO: Bug 3901904: there might be better ways of handling no page + // being migrated. status = hmm_make_resident_cpu(va_block, + va_block_retry, service_context->block_context->hmm.vma, service_context->block_context->hmm.src_pfns, region, service_context->access_type, NULL); + return NV_WARN_MORE_PROCESSING_REQUIRED; } @@ -3037,8 +3141,7 @@ NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id, return status; } -static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma, - uvm_hmm_migrate_event_t *uvm_hmm_migrate_event) +static NV_STATUS uvm_hmm_migrate_alloc_and_copy(uvm_hmm_migrate_event_t *uvm_hmm_migrate_event) { uvm_va_block_t *va_block; uvm_va_block_retry_t *va_block_retry; @@ -3065,6 +3168,7 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma, if (UVM_ID_IS_CPU(dest_id)) { status = migrate_alloc_on_cpu(va_block, + va_block_retry, src_pfns, dst_pfns, region, @@ -3073,14 +3177,15 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma, service_context->block_context); } else { - status = dmamap_src_sysmem_pages(va_block, - vma, - src_pfns, - dst_pfns, - region, - page_mask, - dest_id, - service_context); + status = dmamap_src_sysmem_and_fill_dst(va_block, + va_block_retry, + src_pfns, + dst_pfns, + region, + page_mask, + &uvm_hmm_migrate_event->same_devmem_page_mask, + dest_id, + service_context); } if (status != NV_OK) @@ -3095,20 +3200,7 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma, NULL, uvm_hmm_migrate_event->cause); if (status != NV_OK) - return status; - - if (!UVM_ID_IS_CPU(dest_id)) { - // Record the destination PFNs of device private struct pages now that - // uvm_va_block_make_resident_copy() has populated the GPU destination - // pages. - fill_dst_pfns(va_block, - src_pfns, - dst_pfns, - region, - page_mask, - &uvm_hmm_migrate_event->same_devmem_page_mask, - dest_id); - } + clean_up_non_migrating_pages(va_block, dst_pfns, region, page_mask); return status; } @@ -3116,17 +3208,15 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma, static NV_STATUS uvm_hmm_migrate_finalize(uvm_hmm_migrate_event_t *uvm_hmm_migrate_event) { uvm_va_block_t *va_block; - uvm_va_block_retry_t *va_block_retry; uvm_va_block_context_t *va_block_context; uvm_va_block_region_t region; uvm_processor_id_t dest_id; uvm_page_index_t page_index; uvm_page_mask_t *page_mask; const unsigned long *src_pfns; - unsigned long *dst_pfns; + const unsigned long *dst_pfns; va_block = uvm_hmm_migrate_event->va_block; - va_block_retry = uvm_hmm_migrate_event->va_block_retry; va_block_context = uvm_hmm_migrate_event->service_context->block_context; region = uvm_hmm_migrate_event->region; dest_id = uvm_hmm_migrate_event->dest_id; @@ -3189,6 +3279,45 @@ NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block, uvm_assert_mutex_locked(&va_block->hmm.migrate_lock); uvm_assert_mutex_locked(&va_block->lock); + // Save some time and effort if we can't migrate to a GPU. + if (UVM_ID_IS_GPU(dest_id) && uvm_hmm_must_use_sysmem(va_block, vma)) { + return hmm_make_resident_cpu(va_block, + va_block_retry, + vma, + va_block_context->hmm.src_pfns, + region, + NULL, + NULL); + } + + // The overall process here is to migrate pages from the CPU or GPUs to the + // destination processor. Note that block_migrate_add_mappings() handles + // updating GPU mappings after the migration. + // This is only safe because we hold the va_block lock across the calls to + // uvm_hmm_migrate_alloc_and_copy(), migrate_vma_pages(), + // uvm_hmm_migrate_finalize(), migrate_vma_finalize() and + // block_migrate_add_mappings(). + // If the va_block lock were to be dropped, eviction callbacks from RM, + // migration callbacks from CPU faults, or invalidation callbacks from + // Linux could change the va_block state which would require careful + // revalidation of the state. Also, pages are page locked which leads to + // inefficiency or potential deadlocks. + // tables are accurate and can be used to DMA pages and update GPU page + // tables. + + // We pre-allocate the destination GPU pages because otherwise, + // migrate_vma_setup() could page lock the source pages and then try to + // allocate destination pages with block_alloc_gpu_chunk() which might + // unlock the va_block lock and try to evict the source page and fail. + if (UVM_ID_IS_GPU(dest_id)) { + status = uvm_va_block_populate_pages_gpu(va_block, va_block_retry, dest_id, region, NULL); + if (status != NV_OK) { + if (status != NV_ERR_MORE_PROCESSING_REQUIRED) + gpu_chunk_free_preallocated(va_block, va_block_retry); + return status; + } + } + start = uvm_va_block_region_start(va_block, region); end = uvm_va_block_region_end(va_block, region); UVM_ASSERT(vma->vm_start <= start && end < vma->vm_end); @@ -3214,30 +3343,20 @@ NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block, // VMAs so if UvmMigrate() tries to migrate such a region, -EINVAL will // be returned and we will only try to make the pages be CPU resident. ret = migrate_vma_setup_locked(args, va_block); - if (ret) + if (ret) { + if (va_block_retry && !list_empty(&va_block_retry->used_chunks)) + gpu_chunk_free_preallocated(va_block, va_block_retry); + return hmm_make_resident_cpu(va_block, + va_block_retry, vma, va_block_context->hmm.src_pfns, region, NULL, NULL); + } - // The overall process here is to migrate pages from the CPU or GPUs to the - // destination processor. Note that block_migrate_add_mappings() handles - // updating GPU mappings after the migration. - // This is safe because we hold the va_block lock across the calls to - // uvm_hmm_migrate_alloc_and_copy(), migrate_vma_pages(), - // uvm_hmm_migrate_finalize(), migrate_vma_finalize() and - // block_migrate_add_mappings(). - // If uvm_hmm_migrate_alloc_and_copy() needs to drop the va_block - // lock, a sequence number is used to tell if an invalidate() callback - // occurred while not holding the lock. If the sequence number changes, - // all the locks need to be dropped (mm, va_space, va_block) and the whole - // uvm_hmm_va_block_migrate_locked() called again. Otherwise, there were no - // conflicting invalidate callbacks and our snapshots of the CPU page - // tables are accurate and can be used to DMA pages and update GPU page - // tables. - status = uvm_hmm_migrate_alloc_and_copy(vma, &uvm_hmm_migrate_event); + status = uvm_hmm_migrate_alloc_and_copy(&uvm_hmm_migrate_event); if (status == NV_WARN_MORE_PROCESSING_REQUIRED) { uvm_processor_id_t id; uvm_page_mask_t *page_mask; @@ -3260,6 +3379,7 @@ NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block, } return hmm_make_resident_cpu(va_block, + va_block_retry, vma, va_block_context->hmm.src_pfns, region, @@ -3350,6 +3470,17 @@ NV_STATUS uvm_hmm_va_block_evict_chunk_prep(uvm_va_block_t *va_block, if (ret) return errno_to_nv_status(ret); + if (!(src_pfns[page_index] & MIGRATE_PFN_MIGRATE)) + return NV_WARN_MORE_PROCESSING_REQUIRED; + + if (UVM_IS_DEBUG()) { + struct page *src_page = migrate_pfn_to_page(src_pfns[page_index]); + + UVM_ASSERT(is_device_private_page(src_page)); + UVM_ASSERT(page_pgmap(src_page)->owner == &g_uvm_global); + UVM_ASSERT(uvm_pmm_devmem_page_to_chunk(src_page) == gpu_chunk); + } + return NV_OK; } @@ -3374,6 +3505,7 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block, .region = region, .dest_id = UVM_ID_CPU, .cause = cause, + .same_devmem_page_mask = {}, }; uvm_page_mask_t *page_mask = &uvm_hmm_migrate_event.page_mask; const uvm_va_policy_t *policy; @@ -3390,7 +3522,7 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block, // Note that there is no VMA available when evicting HMM pages. va_block_context->hmm.vma = NULL; - uvm_page_mask_copy(page_mask, pages_to_evict); + uvm_page_mask_init_from_region(page_mask, region, pages_to_evict); uvm_for_each_va_policy_in(policy, va_block, start, end, node, region) { npages = uvm_va_block_region_num_pages(region); @@ -3401,9 +3533,16 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block, // Pages resident on the GPU should not have a resident page in system // memory. // TODO: Bug 3660922: Need to handle read duplication at some point. - UVM_ASSERT(uvm_page_mask_region_empty(cpu_resident_mask, region)); + UVM_ASSERT(!uvm_page_mask_intersects(cpu_resident_mask, page_mask)); - status = migrate_alloc_on_cpu(va_block, src_pfns, dst_pfns, region, page_mask, NULL, va_block_context); + status = migrate_alloc_on_cpu(va_block, + NULL, + src_pfns, + dst_pfns, + region, + page_mask, + &uvm_hmm_migrate_event.same_devmem_page_mask, + va_block_context); if (status != NV_OK) goto err; @@ -3429,6 +3568,13 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block, migrate_device_finalize(src_pfns + region.first, dst_pfns + region.first, npages); } + // TODO: Bug 5167764: Evictions can't handle partial migrations. + uvm_page_mask_init_from_region(&va_block_context->scratch_page_mask, region, pages_to_evict); + if (uvm_page_mask_andnot(&va_block_context->scratch_page_mask, + &va_block_context->scratch_page_mask, + page_mask)) + return NV_WARN_MORE_PROCESSING_REQUIRED; + return NV_OK; err: @@ -3663,12 +3809,12 @@ NV_STATUS uvm_hmm_va_block_update_residency_info(uvm_va_block_t *va_block, // Update the va_block CPU state based on the snapshot. // Note that we have to adjust the pfns address since it will be indexed // by region.first. - status = populate_region(va_block, &pfn - region.first, region, NULL); + status = populate_region(va_block, NULL, &pfn - region.first, region, NULL); uvm_mutex_unlock(&va_block->lock); uvm_hmm_migrate_finish(va_block); - return NV_OK; + return status; } NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params, struct file *filp) diff --git a/kernel-open/nvidia-uvm/uvm_hmm.h b/kernel-open/nvidia-uvm/uvm_hmm.h index 9e20b973d..cf9ddfd05 100644 --- a/kernel-open/nvidia-uvm/uvm_hmm.h +++ b/kernel-open/nvidia-uvm/uvm_hmm.h @@ -37,6 +37,7 @@ typedef struct // This stores pointers to uvm_va_block_t for HMM blocks. uvm_range_tree_t blocks; uvm_mutex_t blocks_lock; + atomic64_t allocated_page_count; } uvm_hmm_va_space_t; #if UVM_IS_CONFIG_HMM() diff --git a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c index cf626ad5c..aaa12c727 100644 --- a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c +++ b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c @@ -402,7 +402,10 @@ static void chunk_pin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk) { uvm_gpu_root_chunk_t *root_chunk = root_chunk_from_chunk(pmm, chunk); - uvm_assert_spinlock_locked(&pmm->list_lock); + // The PMM list_lock must be held, but calling uvm_assert_spinlock_locked() + // is not possible here due to the absence of the UVM context pointer in + // the interrupt context when called from devmem_page_free(). + UVM_ASSERT(chunk->state != UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED); chunk->state = UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED; @@ -415,8 +418,9 @@ static void chunk_pin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk) // The passed-in subchunk is not the root chunk so the root chunk has to be // split. - UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, "chunk state %s\n", - uvm_pmm_gpu_chunk_state_string(chunk->state)); + UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, + "chunk state %s\n", + uvm_pmm_gpu_chunk_state_string(chunk->state)); chunk->suballoc->pinned_leaf_chunks++; } @@ -429,7 +433,6 @@ static void chunk_unpin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_pmm_gpu_ uvm_assert_spinlock_locked(&pmm->list_lock); UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED); - UVM_ASSERT(chunk->va_block == NULL); UVM_ASSERT(chunk_is_root_chunk_pinned(pmm, chunk)); UVM_ASSERT(new_state != UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED); @@ -444,8 +447,9 @@ static void chunk_unpin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_pmm_gpu_ // The passed-in subchunk is not the root chunk so the root chunk has to be // split. - UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, "chunk state %s\n", - uvm_pmm_gpu_chunk_state_string(chunk->state)); + UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, + "chunk state %s\n", + uvm_pmm_gpu_chunk_state_string(chunk->state)); UVM_ASSERT(chunk->suballoc->pinned_leaf_chunks != 0); chunk->suballoc->pinned_leaf_chunks--; @@ -597,8 +601,6 @@ NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm, return status; for (i = 0; i < num_chunks; ++i) { - UVM_ASSERT(chunks[i]->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED); - uvm_spin_lock(&pmm->list_lock); chunk_unpin(pmm, chunks[i], UVM_PMM_GPU_CHUNK_STATE_ALLOCATED); chunks[i]->is_referenced = false; @@ -644,45 +646,29 @@ static void chunk_update_lists_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk list_del_init(&chunk->list); } -static void gpu_unpin_temp(uvm_pmm_gpu_t *pmm, - uvm_gpu_chunk_t *chunk, - uvm_va_block_t *va_block, - bool is_referenced) +void uvm_pmm_gpu_unpin_allocated(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block) { UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED); UVM_ASSERT(uvm_gpu_chunk_is_user(chunk)); - - INIT_LIST_HEAD(&chunk->list); + UVM_ASSERT(list_empty(&chunk->list)); + UVM_ASSERT(va_block); + UVM_ASSERT(chunk->va_block == va_block); + UVM_ASSERT(chunk->va_block_page_index < uvm_va_block_num_cpu_pages(va_block)); uvm_spin_lock(&pmm->list_lock); - UVM_ASSERT(!chunk->va_block); - UVM_ASSERT(va_block); - UVM_ASSERT(chunk->va_block_page_index < uvm_va_block_num_cpu_pages(va_block)); - chunk_unpin(pmm, chunk, UVM_PMM_GPU_CHUNK_STATE_ALLOCATED); - chunk->is_referenced = is_referenced; - chunk->va_block = va_block; chunk_update_lists_locked(pmm, chunk); uvm_spin_unlock(&pmm->list_lock); } -void uvm_pmm_gpu_unpin_allocated(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block) -{ - gpu_unpin_temp(pmm, chunk, va_block, false); -} - -void uvm_pmm_gpu_unpin_referenced(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block) -{ - gpu_unpin_temp(pmm, chunk, va_block, true); -} - void uvm_pmm_gpu_free(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_tracker_t *tracker) { NV_STATUS status; - if (!chunk) + // Referenced chunks are freed by Linux when the reference is released. + if (!chunk || chunk->is_referenced) return; UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED || @@ -748,6 +734,10 @@ static bool assert_chunk_mergeable(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk) size_t i; UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT); + UVM_ASSERT_MSG(chunk->suballoc->allocated == num_subchunks(chunk), + "%u != %u\n", + chunk->suballoc->allocated, + num_subchunks(chunk)); UVM_ASSERT(first_child->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED || first_child->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED); @@ -766,14 +756,6 @@ static bool assert_chunk_mergeable(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk) } } - if (first_child->state == UVM_PMM_GPU_CHUNK_STATE_FREE) { - UVM_ASSERT(chunk->suballoc->allocated == 0); - } - else { - UVM_ASSERT_MSG(chunk->suballoc->allocated == num_subchunks(chunk), "%u != %u\n", - chunk->suballoc->allocated, num_subchunks(chunk)); - } - return true; } @@ -812,6 +794,7 @@ static void merge_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk) else if (child_state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) { UVM_ASSERT(root_chunk->chunk.suballoc->pinned_leaf_chunks >= num_sub); root_chunk->chunk.suballoc->pinned_leaf_chunks += 1 - num_sub; + chunk->va_block = subchunk->va_block; } chunk->state = child_state; @@ -835,7 +818,7 @@ static void merge_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk) UVM_ASSERT(list_empty(&subchunk->list)); if ((child_state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) && uvm_gpu_chunk_is_user(subchunk)) - UVM_ASSERT(subchunk->va_block != NULL); + UVM_ASSERT(subchunk->va_block); kmem_cache_free(CHUNK_CACHE, subchunk); } @@ -1202,7 +1185,7 @@ void uvm_pmm_gpu_mark_chunk_evicted(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk) UVM_ASSERT(chunk_is_in_eviction(pmm, chunk)); UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED); - UVM_ASSERT(chunk->va_block != NULL); + UVM_ASSERT(chunk->va_block); chunk->va_block = NULL; chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK; @@ -1259,11 +1242,13 @@ static NV_STATUS find_and_retain_va_block_to_evict(uvm_pmm_gpu_t *pmm, uvm_gpu_c uvm_spin_lock(&pmm->list_lock); - // All free chunks should have been pinned already by pin_free_chunks_func(). + // All free chunks should have been pinned already by + // pin_free_chunks_func(). UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED || chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED || chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, - "state %s\n", uvm_pmm_gpu_chunk_state_string(chunk->state)); + "state %s\n", + uvm_pmm_gpu_chunk_state_string(chunk->state)); if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) { UVM_ASSERT(chunk->va_block); @@ -1750,8 +1735,10 @@ static NV_STATUS alloc_chunk_with_splits(uvm_pmm_gpu_t *pmm, UVM_ASSERT(chunk->parent->suballoc); UVM_ASSERT(uvm_gpu_chunk_get_size(chunk->parent) == uvm_chunk_find_next_size(chunk_sizes, cur_size)); UVM_ASSERT(chunk->parent->type == type); - UVM_ASSERT_MSG(chunk->parent->suballoc->allocated <= num_subchunks(chunk->parent), "allocated %u num %u\n", - chunk->parent->suballoc->allocated, num_subchunks(chunk->parent)); + UVM_ASSERT_MSG(chunk->parent->suballoc->allocated <= num_subchunks(chunk->parent), + "allocated %u num %u\n", + chunk->parent->suballoc->allocated, + num_subchunks(chunk->parent)); } if (cur_size == chunk_size) { @@ -1856,9 +1843,9 @@ static void init_root_chunk(uvm_pmm_gpu_t *pmm, uvm_pmm_gpu_chunk_state_string(chunk->state), uvm_gpu_name(gpu)); - UVM_ASSERT(chunk->parent == NULL); - UVM_ASSERT(chunk->suballoc == NULL); - UVM_ASSERT(chunk->va_block == NULL); + UVM_ASSERT(!chunk->parent); + UVM_ASSERT(!chunk->suballoc); + UVM_ASSERT(!chunk->va_block); UVM_ASSERT(chunk->va_block_page_index == PAGES_PER_UVM_VA_BLOCK); UVM_ASSERT(list_empty(&chunk->list)); UVM_ASSERT(uvm_gpu_chunk_get_size(chunk) == UVM_CHUNK_SIZE_MAX); @@ -2116,6 +2103,9 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk) subchunk->va_block_page_index = chunk->va_block_page_index + (i * subchunk_size) / PAGE_SIZE; subchunk->is_referenced = chunk->is_referenced; } + else if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) { + subchunk->va_block = chunk->va_block; + } } // We're splitting an allocated or pinned chunk in-place. @@ -2141,6 +2131,10 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk) // accounting for the root chunk itself so add the 1 back. if (chunk_is_root_chunk(chunk)) root_chunk->chunk.suballoc->pinned_leaf_chunks += 1; + + chunk->va_block = NULL; + chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK; + chunk->is_referenced = false; } chunk->state = UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT; @@ -2222,16 +2216,16 @@ static void chunk_free_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk) if (root_chunk->chunk.in_eviction) { // A root chunk with pinned subchunks would never be picked for eviction - // so this one has to be in the allocated state. Pin it and let the - // evicting thread pick it up. - UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED); - UVM_ASSERT(chunk->va_block != NULL); - UVM_ASSERT(chunk->va_block_page_index != PAGES_PER_UVM_VA_BLOCK); - UVM_ASSERT(list_empty(&chunk->list)); - chunk->va_block = NULL; - chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK; - chunk->is_zero = false; - chunk_pin(pmm, chunk); + // but HMM evictions will end up here so leave the chunk pinned (or pin + // it) and let the eviction thread pick it up. + if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) { + UVM_ASSERT(chunk->va_block); + UVM_ASSERT(list_empty(&chunk->list)); + chunk->va_block = NULL; + chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK; + chunk->is_zero = false; + chunk_pin(pmm, chunk); + } return; } @@ -2245,17 +2239,15 @@ static void chunk_free_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk) } } - if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) { - chunk_unpin(pmm, chunk, UVM_PMM_GPU_CHUNK_STATE_FREE); - } - else { - chunk->state = UVM_PMM_GPU_CHUNK_STATE_FREE; - chunk->va_block = NULL; - } - + chunk->va_block = NULL; chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK; chunk->is_zero = false; + if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) + chunk_unpin(pmm, chunk, UVM_PMM_GPU_CHUNK_STATE_FREE); + else + chunk->state = UVM_PMM_GPU_CHUNK_STATE_FREE; + chunk_update_lists_locked(pmm, chunk); } @@ -2369,8 +2361,8 @@ static void free_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk) try_free = is_root; } else { - // Freeing a chunk can only fail if it requires merging. Take the PMM lock - // and free it with merges supported. + // Freeing a chunk can only fail if it requires merging. Take the PMM + // lock and free it with merges supported. uvm_mutex_lock(&pmm->lock); free_chunk_with_merges(pmm, chunk); uvm_mutex_unlock(&pmm->lock); @@ -3088,6 +3080,11 @@ static bool uvm_pmm_gpu_check_orphan_pages(uvm_pmm_gpu_t *pmm) break; } + if (page->zone_device_data) { + ret = false; + break; + } + if (page_count(page)) { ret = false; break; @@ -3102,6 +3099,14 @@ static void devmem_page_free(struct page *page) uvm_gpu_chunk_t *chunk = uvm_pmm_devmem_page_to_chunk(page); uvm_gpu_t *gpu = uvm_gpu_chunk_get_gpu(chunk); + if (chunk->va_block) { + uvm_va_space_t *va_space = chunk->va_block->hmm.va_space; + + UVM_ASSERT(va_space); + atomic64_dec(&va_space->hmm.allocated_page_count); + UVM_ASSERT(atomic64_read(&va_space->hmm.allocated_page_count) >= 0); + } + page->zone_device_data = NULL; // We should be calling free_chunk() except that it acquires a mutex and @@ -3111,7 +3116,20 @@ static void devmem_page_free(struct page *page) spin_lock(&gpu->pmm.list_lock.lock); UVM_ASSERT(chunk->is_referenced); + + chunk->va_block = NULL; + chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK; chunk->is_referenced = false; + + if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) { + list_del_init(&chunk->list); + chunk_pin(&gpu->pmm, chunk); + } + else { + UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED); + UVM_ASSERT(list_empty(&chunk->list)); + } + list_add_tail(&chunk->list, &gpu->pmm.root_chunks.va_block_lazy_free); spin_unlock(&gpu->pmm.list_lock.lock); @@ -3362,6 +3380,7 @@ static void process_lazy_free(uvm_pmm_gpu_t *pmm) // is empty. while (!list_empty(&pmm->root_chunks.va_block_lazy_free)) { chunk = list_first_entry(&pmm->root_chunks.va_block_lazy_free, uvm_gpu_chunk_t, list); + UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED); list_del_init(&chunk->list); uvm_spin_unlock(&pmm->list_lock); @@ -3414,6 +3433,7 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm) for (i = 0; i < UVM_PMM_GPU_MEMORY_TYPE_COUNT; i++) { pmm->chunk_sizes[i] = 0; + // Add the common root chunk size to all memory types pmm->chunk_sizes[i] |= UVM_CHUNK_SIZE_MAX; for (j = 0; j < ARRAY_SIZE(chunk_size_init); j++) @@ -3421,7 +3441,9 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm) UVM_ASSERT(pmm->chunk_sizes[i] < UVM_CHUNK_SIZE_INVALID); UVM_ASSERT_MSG(hweight_long(pmm->chunk_sizes[i]) <= UVM_MAX_CHUNK_SIZES, - "chunk sizes %lu, max chunk sizes %u\n", hweight_long(pmm->chunk_sizes[i]), UVM_MAX_CHUNK_SIZES); + "chunk sizes %lu, max chunk sizes %u\n", + hweight_long(pmm->chunk_sizes[i]), + UVM_MAX_CHUNK_SIZES); } status = init_caches(pmm); @@ -3515,9 +3537,9 @@ void uvm_pmm_gpu_deinit(uvm_pmm_gpu_t *pmm) gpu = uvm_pmm_to_gpu(pmm); - UVM_ASSERT(uvm_pmm_gpu_check_orphan_pages(pmm)); nv_kthread_q_flush(&gpu->parent->lazy_free_q); UVM_ASSERT(list_empty(&pmm->root_chunks.va_block_lazy_free)); + UVM_ASSERT(uvm_pmm_gpu_check_orphan_pages(pmm)); release_free_root_chunks(pmm); if (gpu->mem_info.size != 0 && gpu_supports_pma_eviction(gpu)) diff --git a/kernel-open/nvidia-uvm/uvm_pmm_gpu.h b/kernel-open/nvidia-uvm/uvm_pmm_gpu.h index 142b2c5f5..86d1a4f47 100644 --- a/kernel-open/nvidia-uvm/uvm_pmm_gpu.h +++ b/kernel-open/nvidia-uvm/uvm_pmm_gpu.h @@ -271,6 +271,11 @@ struct uvm_gpu_chunk_struct // This flag indicates an allocated user chunk is referenced by a device // private struct page PTE and therefore expects a page_free() callback. + // The flag is only for sanity checking since uvm_pmm_gpu_free() + // shouldn't be called if Linux has a device private reference to this + // chunk and devmem_page_free() should only be called from the Linux + // callback if a reference was created. + // See uvm_hmm_va_block_service_locked() and fill_dst_pfn() for details. // // This field is always false in kernel chunks. bool is_referenced : 1; @@ -300,6 +305,9 @@ struct uvm_gpu_chunk_struct // The VA block using the chunk, if any. // User chunks that are not backed by a VA block are considered to be // temporarily pinned and cannot be evicted. + // Note that the chunk state is normally UVM_PMM_GPU_CHUNK_STATE_ALLOCATED + // but can also be UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED if an HMM va_block + // and device private struct page have a pointer to this chunk. // // This field is always NULL in kernel chunks. uvm_va_block_t *va_block; @@ -437,17 +445,16 @@ struct page *uvm_gpu_chunk_to_page(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk); // Allocates num_chunks chunks of size chunk_size in caller-supplied array // (chunks). // -// Returned chunks are in the TEMP_PINNED state, requiring a call to either -// uvm_pmm_gpu_unpin_allocated, uvm_pmm_gpu_unpin_referenced, or -// uvm_pmm_gpu_free. If a tracker is passed in, all -// the pending operations on the allocated chunks will be added to it +// Returned chunks are in the TEMP_PINNED state, requiring a call to +// uvm_pmm_gpu_unpin_allocated or uvm_pmm_gpu_free. If a tracker is passed in, +// all the pending operations on the allocated chunks will be added to it // guaranteeing that all the entries come from the same GPU as the PMM. // Otherwise, when tracker is NULL, all the pending operations will be // synchronized before returning to the caller. // // Each of the allocated chunks list nodes (uvm_gpu_chunk_t::list) can be used -// by the caller until the chunk is unpinned (uvm_pmm_gpu_unpin_allocated, -// uvm_pmm_gpu_unpin_referenced) or freed (uvm_pmm_gpu_free). If used, the list +// by the caller until the chunk is unpinned (uvm_pmm_gpu_unpin_allocated) +// or freed (uvm_pmm_gpu_free). If used, the list // node has to be returned to a valid state before calling either of the APIs. // // In case of an error, the chunks array is guaranteed to be cleared. @@ -480,12 +487,6 @@ NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm, // Can only be used on user memory. void uvm_pmm_gpu_unpin_allocated(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block); -// Unpin a temporarily pinned chunk, set its reverse map to a VA block, and -// mark it as referenced. -// -// Can only be used on user memory. -void uvm_pmm_gpu_unpin_referenced(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block); - // Free a user or kernel chunk. Temporarily pinned chunks are unpinned. // // The tracker is optional and a NULL tracker indicates that no new operation diff --git a/kernel-open/nvidia-uvm/uvm_va_block.c b/kernel-open/nvidia-uvm/uvm_va_block.c index 50c9707cb..d76137582 100644 --- a/kernel-open/nvidia-uvm/uvm_va_block.c +++ b/kernel-open/nvidia-uvm/uvm_va_block.c @@ -426,11 +426,13 @@ static uvm_cpu_chunk_t *uvm_cpu_chunk_get_chunk_for_page_resident(uvm_va_block_t return chunk; } -void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block, int nid, uvm_page_index_t page_index) +void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block, + uvm_cpu_chunk_t *chunk, + int nid, + uvm_page_index_t page_index) { uvm_va_block_cpu_node_state_t *node_state = block_node_state_get(va_block, nid); uvm_cpu_chunk_storage_mixed_t *mixed; - uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(va_block, nid, page_index); uvm_va_block_region_t chunk_region = uvm_cpu_chunk_block_region(va_block, chunk, page_index); size_t slot_index; uvm_cpu_chunk_t **chunks; @@ -765,7 +767,7 @@ static bool block_check_cpu_chunks(uvm_va_block_t *block) int nid; uvm_page_mask_t *temp_resident_mask; - temp_resident_mask = kmem_cache_alloc(g_uvm_page_mask_cache, NV_UVM_GFP_FLAGS | __GFP_ZERO); + temp_resident_mask = nv_kmem_cache_zalloc(g_uvm_page_mask_cache, NV_UVM_GFP_FLAGS); for_each_possible_uvm_node(nid) { uvm_cpu_chunk_t *chunk; @@ -827,16 +829,16 @@ void uvm_va_block_retry_deinit(uvm_va_block_retry_t *retry, uvm_va_block_t *va_b uvm_pmm_gpu_free(&gpu->pmm, gpu_chunk, NULL); } + // HMM should have already moved allocated GPU chunks to the referenced + // state or freed them. + if (uvm_va_block_is_hmm(va_block)) + UVM_ASSERT(list_empty(&retry->used_chunks)); + // Unpin all the used chunks now that we are done list_for_each_entry_safe(gpu_chunk, next_chunk, &retry->used_chunks, list) { list_del_init(&gpu_chunk->list); gpu = uvm_gpu_chunk_get_gpu(gpu_chunk); - // HMM should have already moved allocated blocks to the referenced - // state so any left over were not migrated and should be freed. - if (uvm_va_block_is_hmm(va_block)) - uvm_pmm_gpu_free(&gpu->pmm, gpu_chunk, NULL); - else - uvm_pmm_gpu_unpin_allocated(&gpu->pmm, gpu_chunk, va_block); + uvm_pmm_gpu_unpin_allocated(&gpu->pmm, gpu_chunk, va_block); } } @@ -1158,6 +1160,8 @@ static size_t block_gpu_chunk_index(uvm_va_block_t *block, UVM_ASSERT(gpu_state->chunks); chunk = gpu_state->chunks[index]; if (chunk) { + UVM_ASSERT(uvm_gpu_chunk_is_user(chunk)); + UVM_ASSERT(uvm_id_equal(uvm_gpu_id_from_index(chunk->gpu_index), gpu->id)); UVM_ASSERT(uvm_gpu_chunk_get_size(chunk) == size); UVM_ASSERT(chunk->state != UVM_PMM_GPU_CHUNK_STATE_PMA_OWNED); UVM_ASSERT(chunk->state != UVM_PMM_GPU_CHUNK_STATE_FREE); @@ -1385,10 +1389,7 @@ error: return status; } -// Retrieves the gpu_state for the given GPU. The returned pointer is -// internally managed and will be allocated (and freed) automatically, -// rather than by the caller. -static uvm_va_block_gpu_state_t *block_gpu_state_get_alloc(uvm_va_block_t *block, uvm_gpu_t *gpu) +uvm_va_block_gpu_state_t *uvm_va_block_gpu_state_get_alloc(uvm_va_block_t *block, uvm_gpu_t *gpu) { NV_STATUS status; uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(block, gpu->id); @@ -1420,22 +1421,6 @@ error: return NULL; } -NV_STATUS uvm_va_block_gpu_state_alloc(uvm_va_block_t *va_block) -{ - uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block); - uvm_gpu_id_t gpu_id; - - UVM_ASSERT(uvm_va_block_is_hmm(va_block)); - uvm_assert_mutex_locked(&va_block->lock); - - for_each_gpu_id_in_mask(gpu_id, &va_space->registered_gpus) { - if (!block_gpu_state_get_alloc(va_block, uvm_gpu_get(gpu_id))) - return NV_ERR_NO_MEMORY; - } - - return NV_OK; -} - void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *block, uvm_cpu_chunk_t *chunk) { @@ -1490,7 +1475,7 @@ void uvm_va_block_remove_cpu_chunks(uvm_va_block_t *va_block, uvm_va_block_regio uvm_page_mask_region_clear(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_READ], chunk_region); uvm_page_mask_region_clear(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_WRITE], chunk_region); uvm_va_block_cpu_clear_resident_region(va_block, nid, chunk_region); - uvm_cpu_chunk_remove_from_block(va_block, nid, page_index); + uvm_cpu_chunk_remove_from_block(va_block, chunk, nid, page_index); uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk); uvm_cpu_chunk_free(chunk); } @@ -1586,26 +1571,6 @@ static NV_STATUS block_alloc_cpu_chunk(uvm_va_block_t *block, return status; } -// Same as block_alloc_cpu_chunk() but allocate a chunk suitable for use as -// a HMM destination page. The main difference is UVM does not own the reference -// on the struct page backing these chunks. -static NV_STATUS block_alloc_hmm_cpu_chunk(uvm_va_block_t *block, - uvm_chunk_sizes_mask_t cpu_allocation_sizes, - uvm_cpu_chunk_alloc_flags_t flags, - int nid, - uvm_cpu_chunk_t **chunk) -{ - NV_STATUS status; - - UVM_ASSERT(uvm_va_block_is_hmm(block)); - - status = block_alloc_cpu_chunk(block, cpu_allocation_sizes, flags, nid, chunk); - if (status == NV_OK) - (*chunk)->type = UVM_CPU_CHUNK_TYPE_HMM; - - return status; -} - // Find the largest allocation size we can use for the given page_index in the // given block. Returns the mask of possible sizes and region covered by the // largest. Callers may also elect to use a smaller size. @@ -1837,7 +1802,7 @@ static NV_STATUS block_add_cpu_chunk(uvm_va_block_t *block, status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk); if (status != NV_OK) { - uvm_cpu_chunk_remove_from_block(block, uvm_cpu_chunk_get_numa_node(chunk), page_index); + uvm_cpu_chunk_remove_from_block(block, chunk, uvm_cpu_chunk_get_numa_node(chunk), page_index); goto out; } } @@ -1859,10 +1824,9 @@ out: // is required for IOMMU support. Skipped on GPUs without access to CPU memory. // e.g., this happens when the Confidential Computing Feature is enabled. static NV_STATUS block_populate_pages_cpu(uvm_va_block_t *block, - uvm_page_mask_t *populate_page_mask, + const uvm_page_mask_t *populate_page_mask, uvm_va_block_region_t populate_region, - uvm_va_block_context_t *block_context, - bool staged) + uvm_va_block_context_t *block_context) { NV_STATUS status = NV_OK; uvm_cpu_chunk_t *chunk; @@ -1956,13 +1920,7 @@ static NV_STATUS block_populate_pages_cpu(uvm_va_block_t *block, if (!uvm_page_mask_region_full(resident_mask, region)) chunk_alloc_flags |= UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO; - // Management of a page used for a staged migration is never handed off - // to the kernel and is really just a driver managed page. Therefore - // don't allocate a HMM chunk in this case. - if (uvm_va_block_is_hmm(block) && !staged) - status = block_alloc_hmm_cpu_chunk(block, allocation_sizes, chunk_alloc_flags, preferred_nid, &chunk); - else - status = block_alloc_cpu_chunk(block, allocation_sizes, chunk_alloc_flags, preferred_nid, &chunk); + status = block_alloc_cpu_chunk(block, allocation_sizes, chunk_alloc_flags, preferred_nid, &chunk); if (status == NV_WARN_MORE_PROCESSING_REQUIRED) { alloc_flags &= ~UVM_CPU_CHUNK_ALLOC_FLAGS_STRICT; @@ -1973,7 +1931,8 @@ static NV_STATUS block_populate_pages_cpu(uvm_va_block_t *block, return status; } - // A smaller chunk than the maximum size may have been allocated, update the region accordingly. + // A smaller chunk than the maximum size may have been allocated, + // update the region accordingly. region = uvm_va_block_chunk_region(block, uvm_cpu_chunk_get_size(chunk), page_index); status = block_add_cpu_chunk(block, node_pages_mask, chunk, region); if (status != NV_OK) @@ -1981,50 +1940,14 @@ static NV_STATUS block_populate_pages_cpu(uvm_va_block_t *block, // Skip iterating over all pages covered by the allocated chunk. page_index = region.outer - 1; - -#if UVM_IS_CONFIG_HMM() - if (uvm_va_block_is_hmm(block) && block_context) - block_context->hmm.dst_pfns[page_index] = migrate_pfn(page_to_pfn(chunk->page)); -#endif } return NV_OK; } -// Note this clears the block_context caller_page_mask. NV_STATUS uvm_va_block_populate_page_cpu(uvm_va_block_t *va_block, uvm_page_index_t page_index, uvm_va_block_context_t *block_context) { - uvm_page_mask_t *page_mask = &block_context->caller_page_mask; - NV_STATUS status = NV_OK; - - uvm_page_mask_zero(page_mask); - uvm_page_mask_set(page_mask, page_index); - - if (uvm_va_block_is_hmm(va_block)) { - const uvm_va_policy_t *policy; - uvm_va_block_region_t region; - uvm_va_policy_node_t *node; - - uvm_for_each_va_policy_in(policy, va_block, va_block->start, va_block->end, node, region) { - status = block_populate_pages_cpu(va_block, - page_mask, - region, - block_context, - false); - - if (status != NV_OK) - break; - } - } - else { - status = block_populate_pages_cpu(va_block, - page_mask, - uvm_va_block_region_from_block(va_block), - block_context, - false); - } - - return status; + return block_populate_pages_cpu(va_block, NULL, uvm_va_block_region_for_page(page_index), block_context); } // Try allocating a chunk. If eviction was required, @@ -2413,7 +2336,7 @@ static uvm_page_mask_t *block_resident_mask_get_alloc(uvm_va_block_t *block, uvm if (UVM_ID_IS_CPU(processor)) return uvm_va_block_resident_mask_get(block, processor, nid); - gpu_state = block_gpu_state_get_alloc(block, uvm_gpu_get(processor)); + gpu_state = uvm_va_block_gpu_state_get_alloc(block, uvm_gpu_get(processor)); if (!gpu_state) return NULL; @@ -2453,9 +2376,15 @@ void uvm_va_block_unmapped_pages_get(uvm_va_block_t *va_block, return; } + uvm_page_mask_zero(out_mask); uvm_page_mask_region_fill(out_mask, region); - for_each_id_in_mask(id, &va_block->mapped) { + // UVM-HMM doesn't always know when CPU pages are mapped or not since there + // is no notification when CPU page tables are upgraded. If the page is + // resident, assume the CPU has some mapping. + uvm_page_mask_andnot(out_mask, out_mask, uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU, NUMA_NO_NODE)); + + for_each_gpu_id_in_mask(id, &va_block->mapped) { uvm_page_mask_andnot(out_mask, out_mask, uvm_va_block_map_mask_get(va_block, id)); } } @@ -2951,7 +2880,7 @@ static NV_STATUS block_populate_gpu_chunk(uvm_va_block_t *block, size_t chunk_index, uvm_va_block_region_t chunk_region) { - uvm_va_block_gpu_state_t *gpu_state = block_gpu_state_get_alloc(block, gpu); + uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get_alloc(block, gpu); uvm_gpu_chunk_t *chunk = NULL; uvm_chunk_size_t chunk_size = uvm_va_block_region_size(chunk_region); uvm_va_block_test_t *block_test = uvm_va_block_get_test(block); @@ -3005,8 +2934,10 @@ static NV_STATUS block_populate_gpu_chunk(uvm_va_block_t *block, } // Record the used chunk so that it can be unpinned at the end of the whole - // operation. + // operation. HMM chunks are unpinned after a successful migration. block_retry_add_used_chunk(retry, chunk); + + chunk->va_block = block; gpu_state->chunks[chunk_index] = chunk; return NV_OK; @@ -3023,12 +2954,13 @@ chunk_free: } // Populate all chunks which cover the given region and page mask. -static NV_STATUS block_populate_pages_gpu(uvm_va_block_t *block, +NV_STATUS uvm_va_block_populate_pages_gpu(uvm_va_block_t *block, uvm_va_block_retry_t *retry, - uvm_gpu_t *gpu, + uvm_gpu_id_t gpu_id, uvm_va_block_region_t region, const uvm_page_mask_t *populate_mask) { + uvm_gpu_t *gpu = uvm_gpu_get(gpu_id); uvm_va_block_region_t chunk_region, check_region; size_t chunk_index; uvm_page_index_t page_index; @@ -3105,7 +3037,7 @@ static NV_STATUS block_populate_pages(uvm_va_block_t *block, if (!tmp_processor_mask) return NV_ERR_NO_MEMORY; - status = block_populate_pages_gpu(block, retry, uvm_gpu_get(dest_id), region, populate_page_mask); + status = uvm_va_block_populate_pages_gpu(block, retry, dest_id, region, populate_page_mask); if (status != NV_OK) { uvm_processor_mask_cache_free(tmp_processor_mask); return status; @@ -3150,7 +3082,7 @@ static NV_STATUS block_populate_pages(uvm_va_block_t *block, } uvm_memcg_context_start(&memcg_context, block_context->mm); - status = block_populate_pages_cpu(block, cpu_populate_mask, region, block_context, UVM_ID_IS_GPU(dest_id)); + status = block_populate_pages_cpu(block, cpu_populate_mask, region, block_context); uvm_memcg_context_end(&memcg_context); return status; } @@ -4180,7 +4112,7 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block, // Ensure that there is GPU state that can be used for CPU-to-CPU copies if (UVM_ID_IS_CPU(dst_id) && uvm_id_equal(src_id, dst_id)) { - uvm_va_block_gpu_state_t *gpu_state = block_gpu_state_get_alloc(block, copying_gpu); + uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get_alloc(block, copying_gpu); if (!gpu_state) { status = NV_ERR_NO_MEMORY; break; @@ -4841,6 +4773,7 @@ static void block_cleanup_temp_pinned_gpu_chunks(uvm_va_block_t *va_block, uvm_g // block_populate_pages above. Release them since the copy // failed and they won't be mapped to userspace. if (chunk && chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) { + list_del_init(&chunk->list); uvm_mmu_chunk_unmap(chunk, &va_block->tracker); uvm_pmm_gpu_free(&gpu->pmm, chunk, &va_block->tracker); gpu_state->chunks[i] = NULL; @@ -4935,7 +4868,8 @@ NV_STATUS uvm_va_block_make_resident_copy(uvm_va_block_t *va_block, prefetch_page_mask, UVM_VA_BLOCK_TRANSFER_MODE_MOVE); - if (status != NV_OK) { + // HMM does its own clean up. + if (status != NV_OK && !uvm_va_block_is_hmm(va_block)) { if (UVM_ID_IS_GPU(dest_id)) block_cleanup_temp_pinned_gpu_chunks(va_block, dest_id); @@ -7891,7 +7825,7 @@ static NV_STATUS block_pre_populate_pde1_gpu(uvm_va_block_t *block, gpu = gpu_va_space->gpu; big_page_size = gpu_va_space->page_tables.big_page_size; - gpu_state = block_gpu_state_get_alloc(block, gpu); + gpu_state = uvm_va_block_gpu_state_get_alloc(block, gpu); if (!gpu_state) return NV_ERR_NO_MEMORY; @@ -8604,12 +8538,12 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block, gpu = uvm_gpu_get(id); - // Although this GPU UUID is registered in the VA space, it might not have a - // GPU VA space registered. + // Although this GPU UUID is registered in the VA space, it might not + // have a GPU VA space registered. if (!uvm_gpu_va_space_get(va_space, gpu)) return NV_OK; - gpu_state = block_gpu_state_get_alloc(va_block, gpu); + gpu_state = uvm_va_block_gpu_state_get_alloc(va_block, gpu); if (!gpu_state) return NV_ERR_NO_MEMORY; @@ -9608,7 +9542,7 @@ static void block_kill(uvm_va_block_t *block) if (!uvm_va_block_is_hmm(block)) uvm_cpu_chunk_mark_dirty(chunk, 0); - uvm_cpu_chunk_remove_from_block(block, nid, page_index); + uvm_cpu_chunk_remove_from_block(block, chunk, nid, page_index); uvm_cpu_chunk_free(chunk); } @@ -9672,13 +9606,12 @@ void uvm_va_block_kill(uvm_va_block_t *va_block) static void block_gpu_release_region(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id, uvm_va_block_gpu_state_t *gpu_state, - uvm_page_mask_t *page_mask, uvm_va_block_region_t region) { uvm_page_index_t page_index; uvm_gpu_t *gpu = uvm_gpu_get(gpu_id); - for_each_va_block_page_in_region_mask(page_index, page_mask, region) { + for_each_va_block_page_in_region(page_index, region) { size_t chunk_index = block_gpu_chunk_index(va_block, gpu, page_index, NULL); uvm_gpu_chunk_t *gpu_chunk = gpu_state->chunks[chunk_index]; @@ -9723,7 +9656,7 @@ void uvm_va_block_munmap_region(uvm_va_block_t *va_block, uvm_processor_mask_clear(&va_block->evicted_gpus, gpu_id); if (gpu_state->chunks) { - block_gpu_release_region(va_block, gpu_id, gpu_state, NULL, region); + block_gpu_release_region(va_block, gpu_id, gpu_state, region); // TODO: bug 3660922: Need to update the read duplicated pages mask // when read duplication is supported for HMM. @@ -10294,7 +10227,7 @@ static NV_STATUS block_split_preallocate_no_retry(uvm_va_block_t *existing, uvm_ if (status != NV_OK) goto error; - if (!block_gpu_state_get_alloc(new, gpu)) { + if (!uvm_va_block_gpu_state_get_alloc(new, gpu)) { status = NV_ERR_NO_MEMORY; goto error; } @@ -10468,7 +10401,7 @@ static void block_split_cpu(uvm_va_block_t *existing, uvm_va_block_t *new) uvm_page_index_t new_chunk_page_index; NV_STATUS status; - uvm_cpu_chunk_remove_from_block(existing, nid, page_index); + uvm_cpu_chunk_remove_from_block(existing, chunk, nid, page_index); // The chunk has to be adjusted for the new block before inserting it. new_chunk_page_index = page_index - split_page_index; @@ -13067,7 +13000,7 @@ out: static NV_STATUS block_gpu_force_4k_ptes(uvm_va_block_t *block, uvm_va_block_context_t *block_context, uvm_gpu_t *gpu) { - uvm_va_block_gpu_state_t *gpu_state = block_gpu_state_get_alloc(block, gpu); + uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get_alloc(block, gpu); uvm_push_t push; NV_STATUS status; diff --git a/kernel-open/nvidia-uvm/uvm_va_block.h b/kernel-open/nvidia-uvm/uvm_va_block.h index 180e2114a..5d53bcc6b 100644 --- a/kernel-open/nvidia-uvm/uvm_va_block.h +++ b/kernel-open/nvidia-uvm/uvm_va_block.h @@ -1339,9 +1339,11 @@ NV_STATUS uvm_va_block_service_finish(uvm_processor_id_t processor_id, uvm_va_block_t *va_block, uvm_service_block_context_t *service_context); -// Allocate GPU state for the given va_block and registered GPUs. +// Returns the gpu_state for the given GPU. The returned pointer is +// internally managed and will be allocated (and freed) automatically, +// rather than by the caller. Returns NULL if there is no memory. // Locking: The block lock must be held. -NV_STATUS uvm_va_block_gpu_state_alloc(uvm_va_block_t *va_block); +uvm_va_block_gpu_state_t *uvm_va_block_gpu_state_get_alloc(uvm_va_block_t *va_block, uvm_gpu_t *gpu); // Release any GPU or policy data associated with the given region in response // to munmap(). @@ -2113,10 +2115,13 @@ bool uvm_va_block_cpu_is_region_resident_on(uvm_va_block_t *va_block, int nid, u // Locking: The va_block lock must be held. NV_STATUS uvm_cpu_chunk_insert_in_block(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index); -// Remove a CPU chunk at the given page_index from the va_block. +// Remove the given CPU chunk at the given page_index from the va_block. // nid cannot be NUMA_NO_NODE. // Locking: The va_block lock must be held. -void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block, int nid, uvm_page_index_t page_index); +void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block, + uvm_cpu_chunk_t *chunk, + int nid, + uvm_page_index_t page_index); // Return the CPU chunk at the given page_index on the given NUMA node from the // va_block. nid cannot be NUMA_NO_NODE. @@ -2249,6 +2254,13 @@ NV_STATUS uvm_va_block_populate_page_cpu(uvm_va_block_t *va_block, uvm_page_index_t page_index, uvm_va_block_context_t *block_context); +// Populate all GPU chunks which cover the given region and page mask. +NV_STATUS uvm_va_block_populate_pages_gpu(uvm_va_block_t *block, + uvm_va_block_retry_t *retry, + uvm_gpu_id_t gpu_id, + uvm_va_block_region_t region, + const uvm_page_mask_t *populate_mask); + // A helper macro for handling allocation-retry // // The macro takes a VA block, uvm_va_block_retry_t struct and a function call diff --git a/kernel-open/nvidia/nv.c b/kernel-open/nvidia/nv.c index 07f842a1a..568560540 100644 --- a/kernel-open/nvidia/nv.c +++ b/kernel-open/nvidia/nv.c @@ -2497,8 +2497,13 @@ nvidia_ioctl( NV_CTL_DEVICE_ONLY(nv); - if (num_arg_gpus == 0 || nvlfp->num_attached_gpus != 0 || - arg_size % sizeof(NvU32) != 0) + if ((num_arg_gpus == 0) || (arg_size % sizeof(NvU32) != 0)) + { + status = -EINVAL; + goto done; + } + + if (nvlfp->num_attached_gpus != 0) { status = -EINVAL; goto done; @@ -2527,6 +2532,7 @@ nvidia_ioctl( if (nvlfp->attached_gpus[i] != 0) nvidia_dev_put(nvlfp->attached_gpus[i], sp); } + NV_KFREE(nvlfp->attached_gpus, arg_size); nvlfp->num_attached_gpus = 0; diff --git a/src/common/displayport/inc/dp_connectorimpl.h b/src/common/displayport/inc/dp_connectorimpl.h index 2219dcc30..b4cfd9410 100644 --- a/src/common/displayport/inc/dp_connectorimpl.h +++ b/src/common/displayport/inc/dp_connectorimpl.h @@ -260,6 +260,12 @@ namespace DisplayPort // Flag to check if the system is UEFI. bool bIsUefiSystem; + // + // Flag to ensure we take into account that + // Displayport++ supports HDMI as well. + // + bool bHDMIOnDPPlusPlus; + bool bSkipResetLinkStateDuringPlug; // Flag to check if LT should be skipped. diff --git a/src/common/displayport/inc/dp_regkeydatabase.h b/src/common/displayport/inc/dp_regkeydatabase.h index 88641362b..f74609810 100644 --- a/src/common/displayport/inc/dp_regkeydatabase.h +++ b/src/common/displayport/inc/dp_regkeydatabase.h @@ -110,6 +110,8 @@ #define NV_DP_REGKEY_SKIP_SETTING_LINK_STATE_DURING_UNPLUG "DP_SKIP_SETTING_LINK_STATE_DURING_UNPLUG" +// This regkey ensures DPLib takes into account Displayport++ supports HDMI. +#define NV_DP_REGKEY_HDMI_ON_DP_PLUS_PLUS "HDMI_ON_DP_PLUS_PLUS" // Data Base used to store all the regkey values. // The actual data base is declared statically in dp_evoadapter.cpp. @@ -154,6 +156,7 @@ struct DP_REGKEY_DATABASE bool bEnableLowerBppCheckForDsc; bool bSkipSettingLinkStateDuringUnplug; bool bEnableDevId; + bool bHDMIOnDPPlusPlus; }; extern struct DP_REGKEY_DATABASE dpRegkeyDatabase; diff --git a/src/common/displayport/src/dp_configcaps2x.cpp b/src/common/displayport/src/dp_configcaps2x.cpp index 2f83c8920..4ad3b35ad 100644 --- a/src/common/displayport/src/dp_configcaps2x.cpp +++ b/src/common/displayport/src/dp_configcaps2x.cpp @@ -151,6 +151,16 @@ void DPCDHALImpl2x::parseAndReadCaps() DPCDHALImpl::parseAndReadCaps(); + // reset DP tunneling UHBR caps + caps2x.dpInTunnelingCaps.bUHBR_10GSupported = NV_FALSE; + caps2x.dpInTunnelingCaps.bUHBR_13_5GSupported = NV_FALSE; + caps2x.dpInTunnelingCaps.bUHBR_20GSupported = NV_FALSE; + + // reset CableCaps + caps2x.cableCaps.bUHBR_10GSupported = NV_TRUE; + caps2x.cableCaps.bUHBR_13_5GSupported = NV_TRUE; + caps2x.cableCaps.bUHBR_20GSupported = NV_TRUE; + // 02206h if (AuxRetry::ack == bus.read(NV_DPCD14_EXTENDED_MAIN_LINK_CHANNEL_CODING, &buffer[0], 1)) { diff --git a/src/common/displayport/src/dp_connectorimpl.cpp b/src/common/displayport/src/dp_connectorimpl.cpp index 106ed709f..c36916afe 100644 --- a/src/common/displayport/src/dp_connectorimpl.cpp +++ b/src/common/displayport/src/dp_connectorimpl.cpp @@ -199,6 +199,7 @@ void ConnectorImpl::applyRegkeyOverrides(const DP_REGKEY_DATABASE& dpRegkeyDatab this->bEnableLowerBppCheckForDsc = dpRegkeyDatabase.bEnableLowerBppCheckForDsc; this->bSkipSettingLinkStateDuringUnplug = dpRegkeyDatabase.bSkipSettingLinkStateDuringUnplug; this->bEnableDevId = dpRegkeyDatabase.bEnableDevId; + this->bHDMIOnDPPlusPlus = dpRegkeyDatabase.bHDMIOnDPPlusPlus; } void ConnectorImpl::setPolicyModesetOrderMitigation(bool enabled) @@ -412,7 +413,16 @@ void ConnectorImpl::processNewDevice(const DiscoveryManager::Device & device, { case DISPLAY_PORT: case DISPLAY_PORT_PLUSPLUS: // DP port that supports DP and TMDS - connector = connectorDisplayPort; + if (bHDMIOnDPPlusPlus && + existingDev && + existingDev->connectorType == connectorHDMI) + { + connector = connectorHDMI; + } + else + { + connector = connectorDisplayPort; + } break; case ANALOG_VGA: diff --git a/src/common/displayport/src/dp_evoadapter.cpp b/src/common/displayport/src/dp_evoadapter.cpp index 1076e9359..6d759f933 100644 --- a/src/common/displayport/src/dp_evoadapter.cpp +++ b/src/common/displayport/src/dp_evoadapter.cpp @@ -108,7 +108,8 @@ const struct {NV_DP_REGKEY_FORCE_HEAD_SHUTDOWN, &dpRegkeyDatabase.bForceHeadShutdown, DP_REG_VAL_BOOL}, {NV_DP_REGKEY_ENABLE_LOWER_BPP_CHECK_FOR_DSC, &dpRegkeyDatabase.bEnableLowerBppCheckForDsc, DP_REG_VAL_BOOL}, {NV_DP_REGKEY_SKIP_SETTING_LINK_STATE_DURING_UNPLUG, &dpRegkeyDatabase.bSkipSettingLinkStateDuringUnplug, DP_REG_VAL_BOOL}, - {NV_DP_REGKEY_EXPOSE_DSC_DEVID_WAR, &dpRegkeyDatabase.bEnableDevId, DP_REG_VAL_BOOL} + {NV_DP_REGKEY_EXPOSE_DSC_DEVID_WAR, &dpRegkeyDatabase.bEnableDevId, DP_REG_VAL_BOOL}, + {NV_DP_REGKEY_HDMI_ON_DP_PLUS_PLUS, &dpRegkeyDatabase.bHDMIOnDPPlusPlus, DP_REG_VAL_BOOL} }; EvoMainLink::EvoMainLink(EvoInterface * provider, Timer * timer) : diff --git a/src/common/inc/nvBldVer.h b/src/common/inc/nvBldVer.h index 028624600..9c4a387a0 100644 --- a/src/common/inc/nvBldVer.h +++ b/src/common/inc/nvBldVer.h @@ -43,18 +43,18 @@ #endif #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) -#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r570/r570_00-540" -#define NV_BUILD_CHANGELIST_NUM (36324750) +#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r570/r570_00-575" +#define NV_BUILD_CHANGELIST_NUM (36467544) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "rel/gpu_drv/r570/r570_00-540" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36324750) +#define NV_BUILD_NAME "rel/gpu_drv/r570/r570_00-575" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36467544) #else /* Windows builds */ -#define NV_BUILD_BRANCH_VERSION "r570_00-536" -#define NV_BUILD_CHANGELIST_NUM (36324750) +#define NV_BUILD_BRANCH_VERSION "r570_00-569" +#define NV_BUILD_CHANGELIST_NUM (36467544) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "573.65" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36324750) +#define NV_BUILD_NAME "573.73" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36467544) #define NV_BUILD_BRANCH_BASE_VERSION R570 #endif // End buildmeister python edited section diff --git a/src/common/inc/nvUnixVersion.h b/src/common/inc/nvUnixVersion.h index 089c722f5..cd6960628 100644 --- a/src/common/inc/nvUnixVersion.h +++ b/src/common/inc/nvUnixVersion.h @@ -4,7 +4,7 @@ #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \ (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1) -#define NV_VERSION_STRING "570.181" +#define NV_VERSION_STRING "570.190" #else diff --git a/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208ffb.h b/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208ffb.h index 41825448b..b528cd4db 100644 --- a/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208ffb.h +++ b/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208ffb.h @@ -724,4 +724,25 @@ typedef struct NV208F_CTRL_FB_CONVERT_CHANNEL_PARAMS { #define NV208F_CTRL_FB_CHANNEL_CONVERSION_TYPE_LOGICAL_TO_PHYSICAL (0x00000000U) #define NV208F_CTRL_FB_CHANNEL_CONVERSION_TYPE_PHYSICAL_TO_LOGICAL (0x00000001U) + +/* + * NV208F_CTRL_CMD_FB_SET_ROW_REMAP_FAILURE_FLAG + * + * This command sets the status of row remap failure flag to the passed value. + * + * value + * The value to set for row remap failure flag + * + * Possible status values returned are: + * NV_OK + * NV_ERR_NOT_SUPPORTED + */ +#define NV208F_CTRL_CMD_FB_SET_ROW_REMAP_FAILURE_FLAG (0x208f051cU) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_DIAG_FB_INTERFACE_ID << 8) | NV208F_CTRL_FB_SET_ROW_REMAP_FAILURE_FLAG_PARAMS_MESSAGE_ID" */ + +#define NV208F_CTRL_FB_SET_ROW_REMAP_FAILURE_FLAG_PARAMS_MESSAGE_ID (0x1cU) + +typedef struct NV208F_CTRL_FB_SET_ROW_REMAP_FAILURE_FLAG_PARAMS { + NvBool value; +} NV208F_CTRL_FB_SET_ROW_REMAP_FAILURE_FLAG_PARAMS; + /* _ctrl208ffb_h_ */ diff --git a/src/common/uproc/os/libos-v3.1.0/lib/liblogdecode.c b/src/common/uproc/os/libos-v3.1.0/lib/liblogdecode.c index 75e37ba86..11d17e416 100644 --- a/src/common/uproc/os/libos-v3.1.0/lib/liblogdecode.c +++ b/src/common/uproc/os/libos-v3.1.0/lib/liblogdecode.c @@ -1505,25 +1505,25 @@ NvBool isLibosPreserveLogBufferFull(LIBOS_LOG_DECODE *pLogDecode, NvU32 gpuInsta { NvU64 i = (NvU32)(pLogDecode->numLogBuffers); NvU32 tag = LIBOS_LOG_NVLOG_BUFFER_TAG(pLogDecode->sourceName, i * 2); - NVLOG_BUFFER_HANDLE handle = 0; - NV_STATUS status = nvlogGetBufferHandleFromTag(tag, &handle); - if (status != NV_OK) + // + // Cannot use nvlogGetBufferHandleFromTag here since in multi GPU case, + // we can have multiple buffers with exact same tag, only differentiable + // from gpuInstance + // + for (i = 0; i < NVLOG_MAX_BUFFERS; i++) { - return NV_FALSE; - } - - NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[handle]; - if (pNvLogBuffer == NULL) - { - return NV_FALSE; - } - - if (FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) && - DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance && - (pNvLogBuffer->pos >= pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64))) - { - return NV_TRUE; + if (NvLogLogger.pBuffers[i] != NULL) + { + NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[i]; + if ((pNvLogBuffer->tag == tag) && + (DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance) && + FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) && + (pNvLogBuffer->pos >= pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64))) + { + return NV_TRUE; + } + } } return NV_FALSE; @@ -1531,19 +1531,27 @@ NvBool isLibosPreserveLogBufferFull(LIBOS_LOG_DECODE *pLogDecode, NvU32 gpuInsta static NvBool findPreservedNvlogBuffer(NvU32 tag, NvU32 gpuInstance, NVLOG_BUFFER_HANDLE *pHandle) { - NVLOG_BUFFER_HANDLE handle = 0; - NV_STATUS status = nvlogGetBufferHandleFromTag(tag, &handle); + NvU64 i; - if (status != NV_OK) - return NV_FALSE; - - NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[handle]; - if (FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) && - DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance && - (pNvLogBuffer->pos < pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64))) + // + // Cannot use nvlogGetBufferHandleFromTag here since in multi GPU case, + // we can have multiple buffers with exact same tag, only differentiable + // from gpuInstance + // + for (i = 0; i < NVLOG_MAX_BUFFERS; i++) { - *pHandle = handle; - return NV_TRUE; + if (NvLogLogger.pBuffers[i] != NULL) + { + NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[i]; + if ((pNvLogBuffer->tag == tag) && + (DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance) && + FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) && + (pNvLogBuffer->pos < pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64))) + { + *pHandle = i; + return NV_TRUE; + } + } } return NV_FALSE; diff --git a/src/nvidia/arch/nvalloc/unix/include/osapi.h b/src/nvidia/arch/nvalloc/unix/include/osapi.h index 83029cb4a..0411e9bf5 100644 --- a/src/nvidia/arch/nvalloc/unix/include/osapi.h +++ b/src/nvidia/arch/nvalloc/unix/include/osapi.h @@ -176,6 +176,8 @@ void RmUpdateGc6ConsoleRefCount (nv_state_t *, NvBool); NvBool rm_get_uefi_console_status (nv_state_t *); NvU64 rm_get_uefi_console_size (nv_state_t *, NvU64 *); +void rm_check_s0ix_regkey_and_platform_support(void); + RM_API *RmUnixRmApiPrologue (nv_state_t *, THREAD_STATE_NODE *, NvU32 module); void RmUnixRmApiEpilogue (nv_state_t *, THREAD_STATE_NODE *); diff --git a/src/nvidia/arch/nvalloc/unix/src/dynamic-power.c b/src/nvidia/arch/nvalloc/unix/src/dynamic-power.c index d7929ba73..80ff9b9fa 100644 --- a/src/nvidia/arch/nvalloc/unix/src/dynamic-power.c +++ b/src/nvidia/arch/nvalloc/unix/src/dynamic-power.c @@ -747,6 +747,16 @@ rmReadAndParseDynamicPowerRegkey } #undef NV_PMC_BOOT_42_CHIP_ID_GA102 +void rm_check_s0ix_regkey_and_platform_support(void) +{ + OBJSYS *pSys = SYS_GET_INSTANCE(); + NvU32 data; + NvBool status = ((nv_platform_supports_s0ix()) && + ((osReadRegistryDword(NULL, NV_REG_ENABLE_S0IX_POWER_MANAGEMENT, &data) == NV_OK) && (data == 1))); + + pSys->setProperty(pSys, PDB_PROP_SYS_SUPPORTS_S0IX, status); +} + /*! * @brief Initialize state related to dynamic power management. * Called once per GPU during driver initialization. diff --git a/src/nvidia/arch/nvalloc/unix/src/osinit.c b/src/nvidia/arch/nvalloc/unix/src/osinit.c index b9437d37a..453d64d2c 100644 --- a/src/nvidia/arch/nvalloc/unix/src/osinit.c +++ b/src/nvidia/arch/nvalloc/unix/src/osinit.c @@ -272,6 +272,8 @@ NV_STATUS osRmInitRm(void) return status; } + rm_check_s0ix_regkey_and_platform_support(); + // Setup any ThreadState defaults threadStateInitSetupFlags(THREAD_STATE_SETUP_FLAGS_ENABLED | THREAD_STATE_SETUP_FLAGS_TIMEOUT_ENABLED | diff --git a/src/nvidia/generated/g_engines_pb.c b/src/nvidia/generated/g_engines_pb.c index ea90f57a6..ff2fc5b94 100644 --- a/src/nvidia/generated/g_engines_pb.c +++ b/src/nvidia/generated/g_engines_pb.c @@ -368,6 +368,18 @@ const PRB_FIELD_DESC prb_fields_nvdebug_eng_kgsp_rpcinfo[] = { PRB_MAYBE_FIELD_NAME("data1") PRB_MAYBE_FIELD_DEFAULT(0) }, + { + 6, + { + PRB_OPTIONAL, + PRB_UINT32, + 0, + }, + 0, + 0, + PRB_MAYBE_FIELD_NAME("sequence") + PRB_MAYBE_FIELD_DEFAULT(0) + }, }; // Message descriptors @@ -403,7 +415,7 @@ const PRB_MSG_DESC prb_messages_nvdebug_eng[] = { PRB_MAYBE_MESSAGE_NAME("NvDebug.Eng.Mc.PciBarInfo") }, { - 5, + 6, prb_fields_nvdebug_eng_kgsp_rpcinfo, PRB_MAYBE_MESSAGE_NAME("NvDebug.Eng.KGsp.RpcInfo") }, diff --git a/src/nvidia/generated/g_engines_pb.h b/src/nvidia/generated/g_engines_pb.h index 0fb4a58f3..c5a5b4818 100644 --- a/src/nvidia/generated/g_engines_pb.h +++ b/src/nvidia/generated/g_engines_pb.h @@ -21,10 +21,10 @@ extern const PRB_MSG_DESC prb_messages_nvdebug_eng[]; #define NVDEBUG_ENG_MC_LEN 66 #define NVDEBUG_ENG_GPU_LEN 62 #define NVDEBUG_ENG_NVD_LEN 30 -#define NVDEBUG_ENG_KGSP_LEN 88 +#define NVDEBUG_ENG_KGSP_LEN 100 #define NVDEBUG_ENG_MC_RMDATA_LEN 6 #define NVDEBUG_ENG_MC_PCIBARINFO_LEN 22 -#define NVDEBUG_ENG_KGSP_RPCINFO_LEN 40 +#define NVDEBUG_ENG_KGSP_RPCINFO_LEN 46 extern const PRB_FIELD_DESC prb_fields_nvdebug_eng_mc[]; @@ -85,8 +85,8 @@ extern const PRB_FIELD_DESC prb_fields_nvdebug_eng_kgsp[]; #define NVDEBUG_ENG_KGSP_EVENT_HISTORY (&prb_fields_nvdebug_eng_kgsp[1]) // 'KGsp' field lengths -#define NVDEBUG_ENG_KGSP_RPC_HISTORY_LEN 43 -#define NVDEBUG_ENG_KGSP_EVENT_HISTORY_LEN 43 +#define NVDEBUG_ENG_KGSP_RPC_HISTORY_LEN 49 +#define NVDEBUG_ENG_KGSP_EVENT_HISTORY_LEN 49 extern const PRB_FIELD_DESC prb_fields_nvdebug_eng_mc_rmdata[]; @@ -114,6 +114,7 @@ extern const PRB_FIELD_DESC prb_fields_nvdebug_eng_kgsp_rpcinfo[]; #define NVDEBUG_ENG_KGSP_RPCINFO_TS_END (&prb_fields_nvdebug_eng_kgsp_rpcinfo[2]) #define NVDEBUG_ENG_KGSP_RPCINFO_DATA0 (&prb_fields_nvdebug_eng_kgsp_rpcinfo[3]) #define NVDEBUG_ENG_KGSP_RPCINFO_DATA1 (&prb_fields_nvdebug_eng_kgsp_rpcinfo[4]) +#define NVDEBUG_ENG_KGSP_RPCINFO_SEQUENCE (&prb_fields_nvdebug_eng_kgsp_rpcinfo[5]) // 'RpcInfo' field lengths #define NVDEBUG_ENG_KGSP_RPCINFO_FUNCTION_LEN 5 @@ -121,6 +122,7 @@ extern const PRB_FIELD_DESC prb_fields_nvdebug_eng_kgsp_rpcinfo[]; #define NVDEBUG_ENG_KGSP_RPCINFO_TS_END_LEN 10 #define NVDEBUG_ENG_KGSP_RPCINFO_DATA0_LEN 5 #define NVDEBUG_ENG_KGSP_RPCINFO_DATA1_LEN 5 +#define NVDEBUG_ENG_KGSP_RPCINFO_SEQUENCE_LEN 5 extern const PRB_SERVICE_DESC prb_services_nvdebug_eng[]; diff --git a/src/nvidia/generated/g_intr_nvoc.h b/src/nvidia/generated/g_intr_nvoc.h index f17204db1..1963c7c35 100644 --- a/src/nvidia/generated/g_intr_nvoc.h +++ b/src/nvidia/generated/g_intr_nvoc.h @@ -1054,23 +1054,23 @@ static inline NV_STATUS intrRestoreIntrRegValue(OBJGPU *pGpu, struct Intr *pIntr #define intrRestoreIntrRegValue_HAL(pGpu, pIntr, arg3, arg4, arg5) intrRestoreIntrRegValue(pGpu, pIntr, arg3, arg4, arg5) -static inline NV_STATUS intrTriggerCpuDoorbellForVF_46f6a7(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid) { +static inline NV_STATUS intrTriggerCpuDoorbellForVF_46f6a7(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid, NvBool bRearmIntr) { return NV_ERR_NOT_SUPPORTED; } -NV_STATUS intrTriggerCpuDoorbellForVF_TU102(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid); +NV_STATUS intrTriggerCpuDoorbellForVF_TU102(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid, NvBool bRearmIntr); #ifdef __nvoc_intr_h_disabled -static inline NV_STATUS intrTriggerCpuDoorbellForVF(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid) { +static inline NV_STATUS intrTriggerCpuDoorbellForVF(OBJGPU *pGpu, struct Intr *pIntr, NvU32 gfid, NvBool bRearmIntr) { NV_ASSERT_FAILED_PRECOMP("Intr was disabled!"); return NV_ERR_NOT_SUPPORTED; } #else //__nvoc_intr_h_disabled -#define intrTriggerCpuDoorbellForVF(pGpu, pIntr, gfid) intrTriggerCpuDoorbellForVF_46f6a7(pGpu, pIntr, gfid) +#define intrTriggerCpuDoorbellForVF(pGpu, pIntr, gfid, bRearmIntr) intrTriggerCpuDoorbellForVF_46f6a7(pGpu, pIntr, gfid, bRearmIntr) #endif //__nvoc_intr_h_disabled -#define intrTriggerCpuDoorbellForVF_HAL(pGpu, pIntr, gfid) intrTriggerCpuDoorbellForVF(pGpu, pIntr, gfid) +#define intrTriggerCpuDoorbellForVF_HAL(pGpu, pIntr, gfid, bRearmIntr) intrTriggerCpuDoorbellForVF(pGpu, pIntr, gfid, bRearmIntr) void intrRetriggerTopLevel_TU102(OBJGPU *pGpu, struct Intr *pIntr); diff --git a/src/nvidia/generated/g_kernel_bif_nvoc.c b/src/nvidia/generated/g_kernel_bif_nvoc.c index 5b02ee041..b0ace17e8 100644 --- a/src/nvidia/generated/g_kernel_bif_nvoc.c +++ b/src/nvidia/generated/g_kernel_bif_nvoc.c @@ -895,18 +895,25 @@ static void __nvoc_init_funcTable_KernelBif_1(KernelBif *pThis, RmHalspecOwner * pThis->__kbifCacheMnocSupport__ = &kbifCacheMnocSupport_b3696a; } - // kbifCacheVFInfo -- halified (3 hals) body - if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xc0000000UL) )) /* ChipHal: GB100 | GB102 */ + // kbifCacheVFInfo -- halified (4 hals) body + if (( ((rmVariantHal_HalVarIdx >> 5) == 0UL) && ((1UL << (rmVariantHal_HalVarIdx & 0x1f)) & 0x00000001UL) )) /* RmVariantHal: VF */ { - pThis->__kbifCacheVFInfo__ = &kbifCacheVFInfo_GB100; - } - else if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x01f0ffe0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 */ - { - pThis->__kbifCacheVFInfo__ = &kbifCacheVFInfo_TU102; + pThis->__kbifCacheVFInfo__ = &kbifCacheVFInfo_b3696a; } else { - pThis->__kbifCacheVFInfo__ = &kbifCacheVFInfo_GH100; + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xc0000000UL) )) /* ChipHal: GB100 | GB102 */ + { + pThis->__kbifCacheVFInfo__ = &kbifCacheVFInfo_GB100; + } + else if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x01f0ffe0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 */ + { + pThis->__kbifCacheVFInfo__ = &kbifCacheVFInfo_TU102; + } + else + { + pThis->__kbifCacheVFInfo__ = &kbifCacheVFInfo_GH100; + } } // kbifRestoreBar0 -- halified (3 hals) body @@ -1113,7 +1120,7 @@ static void __nvoc_init_funcTable_KernelBif_1(KernelBif *pThis, RmHalspecOwner * { pThis->__kbifDoSecondaryBusHotReset__ = &kbifDoSecondaryBusHotReset_GH100; } -} // End __nvoc_init_funcTable_KernelBif_1 with approximately 159 basic block(s). +} // End __nvoc_init_funcTable_KernelBif_1 with approximately 160 basic block(s). // Initialize vtable(s) for 75 virtual method(s). diff --git a/src/nvidia/generated/g_kernel_bif_nvoc.h b/src/nvidia/generated/g_kernel_bif_nvoc.h index 6f6c4aa6f..0bf3ddde9 100644 --- a/src/nvidia/generated/g_kernel_bif_nvoc.h +++ b/src/nvidia/generated/g_kernel_bif_nvoc.h @@ -225,7 +225,7 @@ struct KernelBif { void (*__kbifCacheFlrSupport__)(struct OBJGPU *, struct KernelBif * /*this*/); // halified (3 hals) body void (*__kbifCache64bBar0Support__)(struct OBJGPU *, struct KernelBif * /*this*/); // halified (4 hals) body void (*__kbifCacheMnocSupport__)(struct OBJGPU *, struct KernelBif * /*this*/); // halified (2 hals) body - void (*__kbifCacheVFInfo__)(struct OBJGPU *, struct KernelBif * /*this*/); // halified (3 hals) body + void (*__kbifCacheVFInfo__)(struct OBJGPU *, struct KernelBif * /*this*/); // halified (4 hals) body void (*__kbifRestoreBar0__)(struct OBJGPU *, struct KernelBif * /*this*/, void *, NvU32 *); // halified (3 hals) body NvBool (*__kbifAnyBarsAreValid__)(struct OBJGPU *, struct KernelBif * /*this*/); // halified (2 hals) body NV_STATUS (*__kbifRestoreBarsAndCommand__)(struct OBJGPU *, struct KernelBif * /*this*/); // halified (3 hals) body @@ -1302,6 +1302,10 @@ static inline void kbifCacheMnocSupport_b3696a(struct OBJGPU *pGpu, struct Kerne void kbifCacheMnocSupport_GB100(struct OBJGPU *pGpu, struct KernelBif *pKernelBif); +static inline void kbifCacheVFInfo_b3696a(struct OBJGPU *pGpu, struct KernelBif *pKernelBif) { + return; +} + void kbifCacheVFInfo_TU102(struct OBJGPU *pGpu, struct KernelBif *pKernelBif); void kbifCacheVFInfo_GH100(struct OBJGPU *pGpu, struct KernelBif *pKernelBif); diff --git a/src/nvidia/generated/g_nv_name_released.h b/src/nvidia/generated/g_nv_name_released.h index 793a6c1e8..047e88aae 100644 --- a/src/nvidia/generated/g_nv_name_released.h +++ b/src/nvidia/generated/g_nv_name_released.h @@ -5416,6 +5416,7 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x2941, 0x21ca, 0x10de, "NVIDIA GB200" }, { 0x2B85, 0x0000, 0x0000, "NVIDIA GeForce RTX 5090" }, { 0x2B87, 0x0000, 0x0000, "NVIDIA GeForce RTX 5090 D" }, + { 0x2B8C, 0x530c, 0x17aa, "NVIDIA GeForce RTX 5090 D v2" }, { 0x2BB1, 0x204b, 0x1028, "NVIDIA RTX PRO 6000 Blackwell Workstation Edition" }, { 0x2BB1, 0x204b, 0x103c, "NVIDIA RTX PRO 6000 Blackwell Workstation Edition" }, { 0x2BB1, 0x204b, 0x10de, "NVIDIA RTX PRO 6000 Blackwell Workstation Edition" }, @@ -5436,6 +5437,9 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x2C31, 0x2051, 0x103c, "NVIDIA RTX PRO 4500 Blackwell" }, { 0x2C31, 0x2051, 0x10de, "NVIDIA RTX PRO 4500 Blackwell" }, { 0x2C31, 0x2051, 0x17aa, "NVIDIA RTX PRO 4500 Blackwell" }, + { 0x2C33, 0x2053, 0x1028, "NVIDIA RTX PRO 4000 Blackwell SFF Edition" }, + { 0x2C33, 0x2053, 0x103c, "NVIDIA RTX PRO 4000 Blackwell SFF Edition" }, + { 0x2C33, 0x2053, 0x17aa, "NVIDIA RTX PRO 4000 Blackwell SFF Edition" }, { 0x2C34, 0x2052, 0x1028, "NVIDIA RTX PRO 4000 Blackwell" }, { 0x2C34, 0x2052, 0x103c, "NVIDIA RTX PRO 4000 Blackwell" }, { 0x2C34, 0x2052, 0x10de, "NVIDIA RTX PRO 4000 Blackwell" }, @@ -5448,6 +5452,9 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x2D05, 0x0000, 0x0000, "NVIDIA GeForce RTX 5060" }, { 0x2D18, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070 Laptop GPU" }, { 0x2D19, 0x0000, 0x0000, "NVIDIA GeForce RTX 5060 Laptop GPU" }, + { 0x2D30, 0x2054, 0x1028, "NVIDIA RTX PRO 2000 Blackwell" }, + { 0x2D30, 0x2054, 0x103c, "NVIDIA RTX PRO 2000 Blackwell" }, + { 0x2D30, 0x2054, 0x17aa, "NVIDIA RTX PRO 2000 Blackwell" }, { 0x2D39, 0x0000, 0x0000, "NVIDIA RTX PRO 2000 Blackwell Generation Laptop GPU" }, { 0x2D58, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070 Laptop GPU" }, { 0x2D59, 0x0000, 0x0000, "NVIDIA GeForce RTX 5060 Laptop GPU" }, diff --git a/src/nvidia/generated/g_nvdebug_pb.h b/src/nvidia/generated/g_nvdebug_pb.h index 789d1e86b..c00f2cf7f 100644 --- a/src/nvidia/generated/g_nvdebug_pb.h +++ b/src/nvidia/generated/g_nvdebug_pb.h @@ -42,8 +42,8 @@ extern const PRB_MSG_DESC prb_messages_nvdebug[]; // Message maximum lengths // Does not include repeated fields, strings and byte arrays. #define NVDEBUG_SYSTEMINFO_LEN 354 -#define NVDEBUG_GPUINFO_LEN 262 -#define NVDEBUG_NVDUMP_LEN 1613 +#define NVDEBUG_GPUINFO_LEN 274 +#define NVDEBUG_NVDUMP_LEN 1625 #define NVDEBUG_SYSTEMINFO_NORTHBRIDGEINFO_LEN 12 #define NVDEBUG_SYSTEMINFO_SOCINFO_LEN 12 #define NVDEBUG_SYSTEMINFO_CPUINFO_LEN 24 @@ -101,7 +101,7 @@ extern const PRB_FIELD_DESC prb_fields_nvdebug_gpuinfo[]; #define NVDEBUG_GPUINFO_ENG_GPU_LEN 65 #define NVDEBUG_GPUINFO_ENG_MC_LEN 69 #define NVDEBUG_GPUINFO_ENG_NVD_LEN 33 -#define NVDEBUG_GPUINFO_ENG_KGSP_LEN 91 +#define NVDEBUG_GPUINFO_ENG_KGSP_LEN 103 extern const PRB_FIELD_DESC prb_fields_nvdebug_nvdump[]; @@ -115,7 +115,7 @@ extern const PRB_FIELD_DESC prb_fields_nvdebug_nvdump[]; // 'NvDump' field lengths #define NVDEBUG_NVDUMP_SYSTEM_INFO_LEN 357 #define NVDEBUG_NVDUMP_DCL_MSG_LEN 619 -#define NVDEBUG_NVDUMP_GPU_INFO_LEN 265 +#define NVDEBUG_NVDUMP_GPU_INFO_LEN 277 #define NVDEBUG_NVDUMP_EXCEPTION_ADDRESS_LEN 10 #define NVDEBUG_NVDUMP_SYSTEM_INFO_GSPRM_LEN 357 diff --git a/src/nvidia/generated/g_rpc_hal.h b/src/nvidia/generated/g_rpc_hal.h index ad6d2db74..023fb1158 100644 --- a/src/nvidia/generated/g_rpc_hal.h +++ b/src/nvidia/generated/g_rpc_hal.h @@ -16,8 +16,8 @@ typedef NV_STATUS RpcConstruct(POBJGPU, POBJRPC); typedef void RpcDestroy(POBJGPU, POBJRPC); -typedef NV_STATUS RpcSendMessage(POBJGPU, POBJRPC); -typedef NV_STATUS RpcRecvPoll(POBJGPU, POBJRPC, NvU32); +typedef NV_STATUS RpcSendMessage(POBJGPU, POBJRPC, NvU32 *); +typedef NV_STATUS RpcRecvPoll(POBJGPU, POBJRPC, NvU32, NvU32); // @@ -42,10 +42,10 @@ typedef struct RPC_OBJ_IFACES { (_pRpc)->obj.__rpcConstruct__(_pGpu, _pRpc) #define rpcDestroy(_pGpu, _pRpc) \ (_pRpc)->obj.__rpcDestroy__(_pGpu, _pRpc) -#define rpcSendMessage(_pGpu, _pRpc) \ - (_pRpc)->obj.__rpcSendMessage__(_pGpu, _pRpc) -#define rpcRecvPoll(_pGpu, _pRpc, _arg0) \ - (_pRpc)->obj.__rpcRecvPoll__(_pGpu, _pRpc, _arg0) +#define rpcSendMessage(_pGpu, _pRpc, _pArg0) \ + (_pRpc)->obj.__rpcSendMessage__(_pGpu, _pRpc, _pArg0) +#define rpcRecvPoll(_pGpu, _pRpc, _arg0, _arg1) \ + (_pRpc)->obj.__rpcRecvPoll__(_pGpu, _pRpc, _arg0, _arg1) // diff --git a/src/nvidia/generated/g_system_nvoc.c b/src/nvidia/generated/g_system_nvoc.c index fcde01e0c..a0bbfdd4a 100644 --- a/src/nvidia/generated/g_system_nvoc.c +++ b/src/nvidia/generated/g_system_nvoc.c @@ -103,6 +103,7 @@ void __nvoc_init_dataField_OBJSYS(OBJSYS *pThis) { pThis->clientListDeferredFreeLimit = 0; pThis->setProperty(pThis, PDB_PROP_SYS_RECOVERY_REBOOT_REQUIRED, NV_FALSE); + pThis->setProperty(pThis, PDB_PROP_SYS_SUPPORTS_S0IX, (0)); } NV_STATUS __nvoc_ctor_Object(Object* ); diff --git a/src/nvidia/generated/g_system_nvoc.h b/src/nvidia/generated/g_system_nvoc.h index f5122783c..4611b1e3a 100644 --- a/src/nvidia/generated/g_system_nvoc.h +++ b/src/nvidia/generated/g_system_nvoc.h @@ -418,7 +418,7 @@ struct OBJSYS { struct OBJTRACEABLE *__nvoc_pbase_OBJTRACEABLE; // traceable super struct OBJSYS *__nvoc_pbase_OBJSYS; // sys - // 34 PDB properties + // 35 PDB properties NvBool PDB_PROP_SYS_SBIOS_NVIF_POWERMIZER_LIMIT; NvBool PDB_PROP_SYS_MXM_THERMAL_CONTROL_PRESENT; NvBool PDB_PROP_SYS_POWER_BATTERY; @@ -453,6 +453,7 @@ struct OBJSYS { NvBool PDB_PROP_SYS_ENABLE_FORCE_SHARED_LOCK; NvBool PDB_PROP_SYS_DESTRUCTING; NvBool PDB_PROP_SYS_RECOVERY_REBOOT_REQUIRED; + NvBool PDB_PROP_SYS_SUPPORTS_S0IX; // Data members NvU32 apiLockMask; @@ -547,6 +548,8 @@ extern const struct NVOC_CLASS_DEF __nvoc_class_def_OBJSYS; #define PDB_PROP_SYS_VALIDATE_CLIENT_HANDLE_STRICT_BASE_NAME PDB_PROP_SYS_VALIDATE_CLIENT_HANDLE_STRICT #define PDB_PROP_SYS_DESTRUCTING_BASE_CAST #define PDB_PROP_SYS_DESTRUCTING_BASE_NAME PDB_PROP_SYS_DESTRUCTING +#define PDB_PROP_SYS_SUPPORTS_S0IX_BASE_CAST +#define PDB_PROP_SYS_SUPPORTS_S0IX_BASE_NAME PDB_PROP_SYS_SUPPORTS_S0IX #define PDB_PROP_SYS_VALIDATE_KERNEL_BUFFERS_BASE_CAST #define PDB_PROP_SYS_VALIDATE_KERNEL_BUFFERS_BASE_NAME PDB_PROP_SYS_VALIDATE_KERNEL_BUFFERS #define PDB_PROP_SYS_PRIMARY_VBIOS_STATE_SAVED_BASE_CAST diff --git a/src/nvidia/inc/kernel/gpu/gsp/gsp_static_config.h b/src/nvidia/inc/kernel/gpu/gsp/gsp_static_config.h index 48a927a1e..3f457d5c4 100644 --- a/src/nvidia/inc/kernel/gpu/gsp/gsp_static_config.h +++ b/src/nvidia/inc/kernel/gpu/gsp/gsp_static_config.h @@ -218,6 +218,7 @@ typedef struct GspSystemInfo NvBool bRouteDispIntrsToCPU; NvU64 hostPageSize; NvBool bGspNocatEnabled; + NvBool bS0ixSupport; NvU16 virtualConfigBits; } GspSystemInfo; diff --git a/src/nvidia/inc/kernel/gpu/rpc/objrpc.h b/src/nvidia/inc/kernel/gpu/rpc/objrpc.h index ad3f368fd..4f6d57ec7 100644 --- a/src/nvidia/inc/kernel/gpu/rpc/objrpc.h +++ b/src/nvidia/inc/kernel/gpu/rpc/objrpc.h @@ -58,6 +58,7 @@ TYPEDEF_BITVECTOR(MC_ENGINE_BITVECTOR); typedef struct RpcHistoryEntry { NvU32 function; + NvU32 sequence; NvU64 data[2]; NvU64 ts_start; NvU64 ts_end; @@ -89,6 +90,9 @@ struct OBJRPC{ NvU32 rpcHistoryCurrent; RpcHistoryEntry rpcEventHistory[RPC_HISTORY_DEPTH]; NvU32 rpcEventHistoryCurrent; + + /* sequence number for RPC */ + NvU32 sequence; NvU32 timeoutCount; NvBool bQuietPrints; diff --git a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c index fe9d5d669..0d73a79e7 100644 --- a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c +++ b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c @@ -129,9 +129,9 @@ static void _kgspFreeRpcInfrastructure(OBJGPU *, KernelGsp *); static NV_STATUS _kgspConstructRpcObject(OBJGPU *, KernelGsp *, MESSAGE_QUEUE_INFO *, OBJRPC **); -static NV_STATUS _kgspRpcSendMessage(OBJGPU *, OBJRPC *); -static NV_STATUS _kgspRpcRecvPoll(OBJGPU *, OBJRPC *, NvU32); -static NV_STATUS _kgspRpcDrainEvents(OBJGPU *, KernelGsp *, NvU32, KernelGspRpcEventHandlerContext); +static NV_STATUS _kgspRpcSendMessage(OBJGPU *, OBJRPC *, NvU32 *); +static NV_STATUS _kgspRpcRecvPoll(OBJGPU *, OBJRPC *, NvU32, NvU32); +static NV_STATUS _kgspRpcDrainEvents(OBJGPU *, KernelGsp *, NvU32, NvU32, KernelGspRpcEventHandlerContext); static void _kgspRpcIncrementTimeoutCountAndRateLimitPrints(OBJGPU *, OBJRPC *); static NV_STATUS _kgspAllocSimAccessBuffer(OBJGPU *pGpu, KernelGsp *pKernelGsp); @@ -312,12 +312,14 @@ _kgspAddRpcHistoryEntry ) { NvU32 func = RPC_HDR->function; + NvU32 sequence = RPC_HDR->sequence; NvU32 entry; entry = *pCurrent = (*pCurrent + 1) % RPC_HISTORY_DEPTH; portMemSet(&pHistory[entry], 0, sizeof(pHistory[0])); pHistory[entry].function = func; + pHistory[entry].sequence = sequence; pHistory[entry].ts_start = osGetTimestamp(); _kgspGetActiveRpcDebugData(pRpc, func, @@ -367,7 +369,8 @@ static NV_STATUS _kgspRpcSendMessage ( OBJGPU *pGpu, - OBJRPC *pRpc + OBJRPC *pRpc, + NvU32 *pSequence ) { NV_STATUS nvStatus; @@ -376,6 +379,11 @@ _kgspRpcSendMessage NV_ASSERT(rmGpuGroupLockIsOwner(pGpu->gpuInstance, GPU_LOCK_GRP_SUBDEVICE, &gpuMaskUnused)); + if (pSequence) + vgpu_rpc_message_header_v->sequence = *pSequence = pRpc->sequence++; + else + vgpu_rpc_message_header_v->sequence = 0; + NV_CHECK_OK_OR_RETURN(LEVEL_SILENT, _kgspRpcSanityCheck(pGpu, pKernelGsp, pRpc)); nvStatus = GspMsgQueueSendCommand(pRpc->pMessageQueueInfo, pGpu); @@ -1582,8 +1590,8 @@ _kgspProcessRpcEvent // eventually comes in as an unexpected event. The error handling // for the timeout should have already happened. // - NV_PRINTF(LEVEL_ERROR, "Unexpected RPC event from GPU%d: 0x%x (%s)\n", - gpuGetInstance(pGpu), event, _getRpcName(event)); + NV_PRINTF(LEVEL_ERROR, "Unexpected RPC event from GPU%d: 0x%x (%s), sequence: %u\n", + gpuGetInstance(pGpu), event, _getRpcName(event), pMsgHdr->sequence); break; } @@ -1718,6 +1726,7 @@ _kgspRpcDrainOneEvent OBJGPU *pGpu, OBJRPC *pRpc, NvU32 expectedFunc, + NvU32 expectedSequence, KernelGspRpcEventHandlerContext rpcHandlerContext ) { @@ -1734,8 +1743,11 @@ _kgspRpcDrainOneEvent { rpc_message_header_v *pMsgHdr = RPC_HDR; - if (pMsgHdr->function == expectedFunc) + if (pMsgHdr->function == expectedFunc && + pMsgHdr->sequence == expectedSequence) + { return NV_WARN_MORE_PROCESSING_REQUIRED; + } _kgspProcessRpcEvent(pGpu, pRpc, rpcHandlerContext); } @@ -1768,6 +1780,7 @@ _kgspRpcDrainEvents OBJGPU *pGpu, KernelGsp *pKernelGsp, NvU32 expectedFunc, + NvU32 expectedSequence, KernelGspRpcEventHandlerContext rpcHandlerContext ) { @@ -1776,7 +1789,7 @@ _kgspRpcDrainEvents while (nvStatus == NV_OK) { - nvStatus = _kgspRpcDrainOneEvent(pGpu, pRpc, expectedFunc, rpcHandlerContext); + nvStatus = _kgspRpcDrainOneEvent(pGpu, pRpc, expectedFunc, expectedSequence, rpcHandlerContext); kgspDumpGspLogs(pKernelGsp, NV_FALSE); } @@ -1871,11 +1884,12 @@ _kgspLogRpcHistoryEntry duration = _tsDiffToDuration(duration, &durationUnitsChar); NV_ERROR_LOG_DATA(pGpu, errorNum, - " %c%-4d %-4d %-21.21s 0x%016llx 0x%016llx 0x%016llx 0x%016llx %6llu%cs %c\n", + " %c%-4d %-4d %-21.21s %10u 0x%016llx 0x%016llx 0x%016llx 0x%016llx %6llu%cs %c\n", ((historyIndex == 0) ? ' ' : '-'), historyIndex, pEntry->function, _getRpcName(pEntry->function), + pEntry->sequence, pEntry->data[0], pEntry->data[1], pEntry->ts_start, @@ -1886,11 +1900,12 @@ _kgspLogRpcHistoryEntry else { NV_ERROR_LOG_DATA(pGpu, errorNum, - " %c%-4d %-4d %-21.21s 0x%016llx 0x%016llx 0x%016llx 0x%016llx %c\n", + " %c%-4d %-4d %-21.21s %10u 0x%016llx 0x%016llx 0x%016llx 0x%016llx %c\n", ((historyIndex == 0) ? ' ' : '-'), historyIndex, pEntry->function, _getRpcName(pEntry->function), + pEntry->sequence, pEntry->data[0], pEntry->data[1], pEntry->ts_start, @@ -1918,16 +1933,16 @@ kgspLogRpcDebugInfo _kgspGetActiveRpcDebugData(pRpc, pMsgHdr->function, &activeData[0], &activeData[1]); NV_ERROR_LOG_DATA(pGpu, errorNum, - "GPU%d GSP RPC buffer contains function %d (%s) and data 0x%016llx 0x%016llx.\n", + "GPU%d GSP RPC buffer contains function %d (%s) sequence %u and data 0x%016llx 0x%016llx.\n", gpuGetInstance(pGpu), - pMsgHdr->function, _getRpcName(pMsgHdr->function), + pMsgHdr->function, _getRpcName(pMsgHdr->function), pMsgHdr->sequence, activeData[0], activeData[1]); NV_ERROR_LOG_DATA(pGpu, errorNum, "GPU%d RPC history (CPU -> GSP):\n", gpuGetInstance(pGpu)); NV_ERROR_LOG_DATA(pGpu, errorNum, - " entry function data0 data1 ts_start ts_end duration actively_polling\n"); + " entry function sequence data0 data1 ts_start ts_end duration actively_polling\n"); for (historyIndex = 0; historyIndex < rpcEntriesToLog; historyIndex++) { historyEntry = (pRpc->rpcHistoryCurrent + RPC_HISTORY_DEPTH - historyIndex) % RPC_HISTORY_DEPTH; @@ -1939,7 +1954,7 @@ kgspLogRpcDebugInfo "GPU%d RPC event history (CPU <- GSP):\n", gpuGetInstance(pGpu)); NV_ERROR_LOG_DATA(pGpu, errorNum, - " entry function data0 data1 ts_start ts_end duration during_incomplete_rpc\n"); + " entry function sequence data0 data1 ts_start ts_end duration during_incomplete_rpc\n"); for (historyIndex = 0; historyIndex < rpcEntriesToLog; historyIndex++) { historyEntry = (pRpc->rpcEventHistoryCurrent + RPC_HISTORY_DEPTH - historyIndex) % RPC_HISTORY_DEPTH; @@ -1958,7 +1973,8 @@ _kgspLogXid119 ( OBJGPU *pGpu, OBJRPC *pRpc, - NvU32 expectedFunc + NvU32 expectedFunc, + NvU32 expectedSequence ) { RpcHistoryEntry *pHistoryEntry = &pRpc->rpcHistory[pRpc->rpcHistoryCurrent]; @@ -1980,11 +1996,12 @@ _kgspLogXid119 duration = _tsDiffToDuration(ts_end - pHistoryEntry->ts_start, &durationUnitsChar); NV_ERROR_LOG(pGpu, GSP_RPC_TIMEOUT, - "Timeout after %llus of waiting for RPC response from GPU%d GSP! Expected function %d (%s) (0x%llx 0x%llx).", + "Timeout after %llus of waiting for RPC response from GPU%d GSP! Expected function %d (%s) sequence %u (0x%llx 0x%llx).", (durationUnitsChar == 'm' ? duration / 1000 : duration), gpuGetInstance(pGpu), expectedFunc, _getRpcName(expectedFunc), + expectedSequence, pHistoryEntry->data[0], pHistoryEntry->data[1]); @@ -2013,7 +2030,8 @@ _kgspLogRpcSanityCheckFailure OBJGPU *pGpu, OBJRPC *pRpc, NvU32 rpcStatus, - NvU32 expectedFunc + NvU32 expectedFunc, + NvU32 expectedSequence ) { RpcHistoryEntry *pHistoryEntry = &pRpc->rpcHistory[pRpc->rpcHistoryCurrent]; @@ -2021,11 +2039,12 @@ _kgspLogRpcSanityCheckFailure NV_ASSERT(expectedFunc == pHistoryEntry->function); NV_PRINTF(LEVEL_ERROR, - "GPU%d sanity check failed 0x%x waiting for RPC response from GSP. Expected function %d (%s) (0x%llx 0x%llx).\n", + "GPU%d sanity check failed 0x%x waiting for RPC response from GSP. Expected function %d (%s) sequence %u (0x%llx 0x%llx).\n", gpuGetInstance(pGpu), rpcStatus, expectedFunc, _getRpcName(expectedFunc), + expectedSequence, pHistoryEntry->data[0], pHistoryEntry->data[1]); @@ -2072,7 +2091,8 @@ _kgspRpcRecvPoll ( OBJGPU *pGpu, OBJRPC *pRpc, - NvU32 expectedFunc + NvU32 expectedFunc, + NvU32 expectedSequence ) { KernelGsp *pKernelGsp = GPU_GET_KERNEL_GSP(pGpu); @@ -2165,7 +2185,7 @@ _kgspRpcRecvPoll // timeoutStatus = gpuCheckTimeout(pGpu, &timeout); - rpcStatus = _kgspRpcDrainEvents(pGpu, pKernelGsp, expectedFunc, rpcHandlerContext); + rpcStatus = _kgspRpcDrainEvents(pGpu, pKernelGsp, expectedFunc, expectedSequence, rpcHandlerContext); switch (rpcStatus) { case NV_WARN_MORE_PROCESSING_REQUIRED: @@ -2191,7 +2211,7 @@ _kgspRpcRecvPoll { if (!pRpc->bQuietPrints) { - _kgspLogRpcSanityCheckFailure(pGpu, pRpc, rpcStatus, expectedFunc); + _kgspLogRpcSanityCheckFailure(pGpu, pRpc, rpcStatus, expectedFunc, expectedSequence); pRpc->bQuietPrints = NV_TRUE; } goto done; @@ -2205,7 +2225,7 @@ _kgspRpcRecvPoll if (!pRpc->bQuietPrints) { - _kgspLogXid119(pGpu, pRpc, expectedFunc); + _kgspLogXid119(pGpu, pRpc, expectedFunc, expectedSequence); } goto done; @@ -4744,7 +4764,7 @@ kgspRpcRecvEvents_IMPL // If we do the assert will fail on NV_WARN_MORE_PROCESSING_REQUIRED, // in addition to general error codes. // - NV_ASSERT_OK(_kgspRpcDrainEvents(pGpu, pKernelGsp, NV_VGPU_MSG_FUNCTION_NUM_FUNCTIONS, KGSP_RPC_EVENT_HANDLER_CONTEXT_INTERRUPT)); + NV_ASSERT_OK(_kgspRpcDrainEvents(pGpu, pKernelGsp, NV_VGPU_MSG_FUNCTION_NUM_FUNCTIONS, 0, KGSP_RPC_EVENT_HANDLER_CONTEXT_INTERRUPT)); } /*! @@ -4766,7 +4786,7 @@ kgspWaitForRmInitDone_IMPL threadStateResetTimeout(pGpu); NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, - rpcRecvPoll(pGpu, pRpc, NV_VGPU_MSG_EVENT_GSP_INIT_DONE)); + rpcRecvPoll(pGpu, pRpc, NV_VGPU_MSG_EVENT_GSP_INIT_DONE, 0)); // // Now check if RPC really succeeded (NV_VGPU_MSG_RESULT_* are defined to @@ -5256,6 +5276,7 @@ static NV_STATUS _kgspDumpEngineFunc prbEncNestedStart(pPrbEnc, NVDEBUG_ENG_KGSP_RPC_HISTORY)); prbEncAddUInt32(pPrbEnc, NVDEBUG_ENG_KGSP_RPCINFO_FUNCTION, entry->function); + prbEncAddUInt32(pPrbEnc, NVDEBUG_ENG_KGSP_RPCINFO_SEQUENCE, entry->sequence); prbEncAddUInt64(pPrbEnc, NVDEBUG_ENG_KGSP_RPCINFO_TS_START, entry->ts_start); prbEncAddUInt64(pPrbEnc, NVDEBUG_ENG_KGSP_RPCINFO_TS_END, entry->ts_end); prbEncAddUInt32(pPrbEnc, NVDEBUG_ENG_KGSP_RPCINFO_DATA0, entry->data[0]); @@ -5276,6 +5297,7 @@ static NV_STATUS _kgspDumpEngineFunc prbEncNestedStart(pPrbEnc, NVDEBUG_ENG_KGSP_EVENT_HISTORY)); prbEncAddUInt32(pPrbEnc, NVDEBUG_ENG_KGSP_RPCINFO_FUNCTION, entry->function); + prbEncAddUInt32(pPrbEnc, NVDEBUG_ENG_KGSP_RPCINFO_SEQUENCE, entry->sequence); prbEncAddUInt64(pPrbEnc, NVDEBUG_ENG_KGSP_RPCINFO_TS_START, entry->ts_start); prbEncAddUInt64(pPrbEnc, NVDEBUG_ENG_KGSP_RPCINFO_TS_END, entry->ts_end); prbEncAddUInt32(pPrbEnc, NVDEBUG_ENG_KGSP_RPCINFO_DATA0, entry->data[0]); diff --git a/src/nvidia/src/kernel/gpu/mem_mgr/arch/maxwell/mem_mgr_gm107.c b/src/nvidia/src/kernel/gpu/mem_mgr/arch/maxwell/mem_mgr_gm107.c index 62391c2b7..87740daca 100644 --- a/src/nvidia/src/kernel/gpu/mem_mgr/arch/maxwell/mem_mgr_gm107.c +++ b/src/nvidia/src/kernel/gpu/mem_mgr/arch/maxwell/mem_mgr_gm107.c @@ -49,6 +49,7 @@ #include "vgpu/rpc.h" #include "vgpu/vgpu_events.h" +#include "nvdevid.h" // // statics @@ -1412,15 +1413,37 @@ memmgrGetRsvdSizeForSr_GM107 MemoryManager *pMemoryManager ) { + // + // Temporary WAR to override WDDM S/R buffer for specific skus + // Bug 5327051 + // + static const NvU16 gb20x_devid[] = { 0x2B8C }; + NvU32 pciDeviceID = DRF_VAL(_PCI, _DEVID, _DEVICE, pGpu->idInfo.PCIDeviceID); + NvBool overrideFbsrRsvdBufferSize = NV_FALSE; + + for (NvU32 i = 0; i < NV_ARRAY_ELEMENTS(gb20x_devid); i++) + { + if (pciDeviceID == gb20x_devid[i]) + { + overrideFbsrRsvdBufferSize = NV_TRUE; + break; + } + } + if (((pMemoryManager->Ram.fbTotalMemSizeMb >> 10) >= 31) || IS_GSP_CLIENT(pGpu)) { // // We need to reserve more memory for S/R if - // 1. FB size is > 32GB Bug Id: 2468357 + // 1. FB size is >= 31GB Bug Id: 2468357 // 2. Or GSP is enabled Bug Id: 4312881 // return 512 * 1024 * 1024; } + else if (overrideFbsrRsvdBufferSize) + { + // Bug 5327051: WAR to override WDDM S/R buffer for specific skus + return 300 * 1024 * 1024; + } else { return 256 * 1024 * 1024; diff --git a/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c b/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c index 1d9539fa5..b6c79365a 100644 --- a/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c +++ b/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c @@ -306,8 +306,6 @@ memdescCreate // (4k >> 12 = 1). This modification helps us to avoid overflow of variable // allocSize, in case caller of this function passes highest value of NvU64. // - // If allocSize is passed as 0, PageCount should be returned as 0. - // if (allocSize == 0) { PageCount = 0; diff --git a/src/nvidia/src/kernel/mem_mgr/standard_mem.c b/src/nvidia/src/kernel/mem_mgr/standard_mem.c index 84ed46770..4ef0535ee 100644 --- a/src/nvidia/src/kernel/mem_mgr/standard_mem.c +++ b/src/nvidia/src/kernel/mem_mgr/standard_mem.c @@ -57,7 +57,7 @@ NV_STATUS stdmemValidateParams return NV_ERR_INVALID_ARGUMENT; } - // + // // These flags don't do anything in this path. No mapping on alloc and // kernel map is controlled by TYPE // diff --git a/src/nvidia/src/kernel/rmapi/nv_gpu_ops.c b/src/nvidia/src/kernel/rmapi/nv_gpu_ops.c index c5abc71fd..aec294151 100644 --- a/src/nvidia/src/kernel/rmapi/nv_gpu_ops.c +++ b/src/nvidia/src/kernel/rmapi/nv_gpu_ops.c @@ -7694,7 +7694,7 @@ static NV_STATUS dupMemory(struct gpuDevice *device, // particular doesn't create IOMMU mappings required for the mapped GPU // to access the memory. That's a problem if the mapped GPU is different // from the GPU that the allocation was created under. Add them - // explicitly here and remove them when the memory is freed in n + // explicitly here and remove them when the memory is freed in // nvGpuOpsFreeDupedHandle(). Notably memdescMapIommu() refcounts the // mappings so it's ok to call it if the mappings are already there. // diff --git a/src/nvidia/src/kernel/rmapi/rpc_common.c b/src/nvidia/src/kernel/rmapi/rpc_common.c index f90e25c80..a0567b2ca 100644 --- a/src/nvidia/src/kernel/rmapi/rpc_common.c +++ b/src/nvidia/src/kernel/rmapi/rpc_common.c @@ -88,7 +88,8 @@ OBJRPC *initRpcObject(OBJGPU *pGpu) pRpc->timeoutCount = 0; pRpc->bQuietPrints = NV_FALSE; - // VIRTUALIZATION is disabled on DCE. Only run the below code on VGPU and GSP. + pRpc->sequence = 0; + // VIRTUALIZATION is disabled on DCE. Only run the below code on VGPU and GSP. rpcSetIpVersion(pGpu, pRpc, RPC_VERSION_FROM_VGX_VERSION(VGX_MAJOR_VERSION_NUMBER, VGX_MINOR_VERSION_NUMBER)); diff --git a/src/nvidia/src/kernel/vgpu/rpc.c b/src/nvidia/src/kernel/vgpu/rpc.c index 9ce5b2865..9b9e075bd 100644 --- a/src/nvidia/src/kernel/vgpu/rpc.c +++ b/src/nvidia/src/kernel/vgpu/rpc.c @@ -120,8 +120,8 @@ static NvU64 startTimeInNs, endTimeInNs, elapsedTimeInNs; static NV_STATUS updateHostVgpuFbUsage(OBJGPU *pGpu, NvHandle hClient, NvHandle hDevice, NvHandle hSubdevice); -static NV_STATUS _rpcSendMessage_VGPUGSP(OBJGPU *pGpu, OBJRPC *pRPC); -static NV_STATUS _rpcRecvPoll_VGPUGSP(OBJGPU *pGpu, OBJRPC *pRPC, NvU32 expectedFunc); +static NV_STATUS _rpcSendMessage_VGPUGSP(OBJGPU *pGpu, OBJRPC *pRPC, NvU32 *pSequence); +static NV_STATUS _rpcRecvPoll_VGPUGSP(OBJGPU *pGpu, OBJRPC *pRPC, NvU32 expectedFunc, NvU32 expectedSequence); void setGuestEccStatus(OBJGPU *pGpu); typedef NV_STATUS dma_control_copy_params_to_rpc_buffer_v(NvU32 cmd, void *Params, void *params_in); @@ -1386,6 +1386,9 @@ NV_STATUS vgpuGspSetupBuffers(OBJGPU *pGpu) return NV_ERR_NOT_SUPPORTED; } + // Modifying the DMA address size to the value supported by the hardware + osDmaSetAddressSize(pGpu->pOsGpuInfo, gpuGetPhysAddrWidth_HAL(pGpu, ADDR_SYSMEM)); + rpcSendMessage_FNPTR(pVGpu->pRpc) = _rpcSendMessage_VGPUGSP; rpcRecvPoll_FNPTR(pVGpu->pRpc) = _rpcRecvPoll_VGPUGSP; @@ -1665,28 +1668,29 @@ NV_STATUS freeRpcInfrastructure_VGPU(OBJGPU *pGpu) return rmStatus; } -NV_STATUS rpcSendMessage_IMPL(OBJGPU *pGpu, OBJRPC *pRpc) +NV_STATUS rpcSendMessage_IMPL(OBJGPU *pGpu, OBJRPC *pRpc, NvU32 *pSequence) { NV_PRINTF(LEVEL_ERROR, "virtual function not implemented.\n"); return NV_ERR_NOT_SUPPORTED; } -NV_STATUS rpcRecvPoll_IMPL(OBJGPU *pGpu, OBJRPC *pRpc, NvU32 expectedFunc) +NV_STATUS rpcRecvPoll_IMPL(OBJGPU *pGpu, OBJRPC *pRpc, NvU32 expectedFunc, NvU32 expectedSequence) { NV_PRINTF(LEVEL_ERROR, "virtual function not implemented.\n"); return NV_ERR_NOT_SUPPORTED; } -static NV_STATUS _rpcSendMessage_VGPUGSP(OBJGPU *pGpu, OBJRPC *pRpc) +static NV_STATUS _rpcSendMessage_VGPUGSP(OBJGPU *pGpu, OBJRPC *pRpc, NvU32 *pSequence) { OBJVGPU *pVGpu = GPU_GET_VGPU(pGpu); - vgpu_rpc_message_header_v->sequence = pVGpu->sequence_base++; + NV_ASSERT(pSequence != NULL); + vgpu_rpc_message_header_v->sequence = *pSequence = pVGpu->sequence_base++; return _vgpuGspSendRpcRequest(pGpu, pRpc); } -static NV_STATUS _rpcRecvPoll_VGPUGSP(OBJGPU *pGpu, OBJRPC *pRPC, NvU32 expectedFunc) +static NV_STATUS _rpcRecvPoll_VGPUGSP(OBJGPU *pGpu, OBJRPC *pRPC, NvU32 expectedFunc, NvU32 expectedSequence) { return _vgpuGspWaitForResponse(pGpu); } @@ -1722,6 +1726,15 @@ static NV_STATUS _issueRpcAndWait(OBJGPU *pGpu, OBJRPC *pRpc) pNewEntry->rpcData.rpcDataTag = vgpu_rpc_message_header_v->function; + switch (vgpu_rpc_message_header_v->function) + { + case NV_VGPU_MSG_FUNCTION_RM_API_CONTROL: + pNewEntry->rpcData.rpcExtraData = rpc_message->rm_api_control_v.params.cmd; + break; + default: + break; + } + rpcProfilerEntryCount++; osGetPerformanceCounter(&pNewEntry->rpcData.startTimeInNs); @@ -1729,13 +1742,14 @@ static NV_STATUS _issueRpcAndWait(OBJGPU *pGpu, OBJRPC *pRpc) // For HCC, cache expectedFunc value before encrypting. NvU32 expectedFunc = vgpu_rpc_message_header_v->function; + NvU32 expectedSequence = 0; - status = rpcSendMessage(pGpu, pRpc); + status = rpcSendMessage(pGpu, pRpc, &expectedSequence); if (status != NV_OK) { NV_PRINTF_COND(pRpc->bQuietPrints, LEVEL_INFO, LEVEL_ERROR, - "rpcSendMessage failed with status 0x%08x for fn %d!\n", - status, vgpu_rpc_message_header_v->function); + "rpcSendMessage failed with status 0x%08x for fn %d sequence %d!\n", + status, expectedFunc, expectedSequence); // // It has been observed that returning NV_ERR_BUSY_RETRY in a bad state (RPC // buffers full and not being serviced) can make things worse, i.e. turn RPC @@ -1746,20 +1760,20 @@ static NV_STATUS _issueRpcAndWait(OBJGPU *pGpu, OBJRPC *pRpc) } // Use cached expectedFunc here because vgpu_rpc_message_header_v is encrypted for HCC. - status = rpcRecvPoll(pGpu, pRpc, expectedFunc); + status = rpcRecvPoll(pGpu, pRpc, expectedFunc, expectedSequence); if (status != NV_OK) { if (status == NV_ERR_TIMEOUT) { NV_PRINTF_COND(pRpc->bQuietPrints, LEVEL_INFO, LEVEL_ERROR, - "rpcRecvPoll timedout for fn %d!\n", - vgpu_rpc_message_header_v->function); + "rpcRecvPoll timedout for fn %d sequence %u!\n", + expectedFunc, expectedSequence); } else { NV_PRINTF_COND(pRpc->bQuietPrints, LEVEL_INFO, LEVEL_ERROR, - "rpcRecvPoll failed with status 0x%08x for fn %d!\n", - status, vgpu_rpc_message_header_v->function); + "rpcRecvPoll failed with status 0x%08x for fn %d sequence %u!\n", + status, expectedFunc, expectedSequence); } return status; } @@ -1793,10 +1807,10 @@ static NV_STATUS _issueRpcAsync(OBJGPU *pGpu, OBJRPC *pRpc) // should not be called in broadcast mode NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE); - status = rpcSendMessage(pGpu, pRpc); + status = rpcSendMessage(pGpu, pRpc, NULL); if (status != NV_OK) { - NV_PRINTF(LEVEL_ERROR, "rpcSendMessage failed with status 0x%08x for fn %d!\n", + NV_PRINTF(LEVEL_ERROR, "rpcSendMessage async failed with status 0x%08x for fn %d!\n", status, vgpu_rpc_message_header_v->function); NV_ASSERT(0); // @@ -1824,6 +1838,8 @@ static NV_STATUS _issueRpcLarge NvU8 *pBuf8 = (NvU8 *)pBuffer; NV_STATUS nvStatus = NV_OK; NvU32 expectedFunc = vgpu_rpc_message_header_v->function; + NvU32 firstSequence = pRpc->sequence; + NvU32 lastSequence, waitSequence; NvU32 entryLength; NvU32 remainingSize = bufSize; NvU32 recordCount = 0; @@ -1840,7 +1856,7 @@ static NV_STATUS _issueRpcLarge // Set the correct length for this queue entry. vgpu_rpc_message_header_v->length = entryLength; - nvStatus = rpcSendMessage(pGpu, pRpc); + nvStatus = rpcSendMessage(pGpu, pRpc, &firstSequence); if (nvStatus != NV_OK) { NV_PRINTF(LEVEL_ERROR, "rpcSendMessage failed with status 0x%08x for fn %d!\n", @@ -1876,7 +1892,7 @@ static NV_STATUS _issueRpcLarge vgpu_rpc_message_header_v->length = entryLength + sizeof(rpc_message_header_v); vgpu_rpc_message_header_v->function = NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD; - nvStatus = rpcSendMessage(pGpu, pRpc); + nvStatus = rpcSendMessage(pGpu, pRpc, &lastSequence); if (nvStatus != NV_OK) { NV_PRINTF(LEVEL_ERROR, @@ -1897,6 +1913,8 @@ static NV_STATUS _issueRpcLarge recordCount++; } + NV_ASSERT(lastSequence == (firstSequence + recordCount)); + if (!bWait) { // In case of Async RPC, we are done here. @@ -1904,18 +1922,20 @@ static NV_STATUS _issueRpcLarge } // Always receive at least one.. - nvStatus = rpcRecvPoll(pGpu, pRpc, expectedFunc); + waitSequence = firstSequence; + + nvStatus = rpcRecvPoll(pGpu, pRpc, expectedFunc, waitSequence); if (nvStatus != NV_OK) { if (nvStatus == NV_ERR_TIMEOUT) { - NV_PRINTF(LEVEL_ERROR, "rpcRecvPoll timedout for fn %d!\n", - vgpu_rpc_message_header_v->function); + NV_PRINTF(LEVEL_ERROR, "rpcRecvPoll timedout for fn %d sequence %d!\n", + expectedFunc, waitSequence); } else { - NV_PRINTF(LEVEL_ERROR, "rpcRecvPoll failed with status 0x%08x for fn %d!\n", - nvStatus, vgpu_rpc_message_header_v->function); + NV_PRINTF(LEVEL_ERROR, "rpcRecvPoll failed with status 0x%08x for fn %d sequence %d!\n", + nvStatus, expectedFunc, waitSequence); } NV_ASSERT(0); return nvStatus; @@ -1931,26 +1951,27 @@ static NV_STATUS _issueRpcLarge remainingSize -= entryLength; pBuf8 += entryLength; + waitSequence++; // For bidirectional transfer messages, need to receive all other frames as well if (bBidirectional && (recordCount > 0)) { while (remainingSize > 0) { - nvStatus = rpcRecvPoll(pGpu, pRpc, NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD); + nvStatus = rpcRecvPoll(pGpu, pRpc, NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD, waitSequence); if (nvStatus != NV_OK) { if (nvStatus == NV_ERR_TIMEOUT) { NV_PRINTF(LEVEL_ERROR, - "rpcRecvPoll timedout for fn %d continuation record (remainingSize=0x%x)!\n", - vgpu_rpc_message_header_v->function, remainingSize); + "rpcRecvPoll timedout for fn %d sequence %d continuation record (remainingSize=0x%x)!\n", + expectedFunc, waitSequence, remainingSize); } else { NV_PRINTF(LEVEL_ERROR, - "rpcRecvPoll failed with status 0x%08x for fn %d continuation record! (remainingSize=0x%x)\n", - nvStatus, vgpu_rpc_message_header_v->function, remainingSize); + "rpcRecvPoll failed with status 0x%08x for fn %d sequence %d continuation record! (remainingSize=0x%x)\n", + nvStatus, expectedFunc, waitSequence, remainingSize); } NV_ASSERT(0); return nvStatus; @@ -1968,9 +1989,11 @@ static NV_STATUS _issueRpcLarge remainingSize -= entryLength; pBuf8 += entryLength; recordCount--; + waitSequence++; } vgpu_rpc_message_header_v->function = expectedFunc; NV_ASSERT(recordCount == 0); + NV_ASSERT(waitSequence - 1 == lastSequence); } // Now check if RPC really succeeded @@ -9577,6 +9600,7 @@ NV_STATUS rpcGspSetSystemInfo_v17_00 rpcInfo->bIsPrimary = pGpu->getProperty(pGpu, PDB_PROP_GPU_PRIMARY_DEVICE); + rpcInfo->bS0ixSupport = pSys->getProperty(pSys, PDB_PROP_SYS_SUPPORTS_S0IX); #if defined(NV_UNIX) && !RMCFG_FEATURE_MODS_FEATURES rpcInfo->isGridBuild = os_is_grid_supported(); #endif diff --git a/version.mk b/version.mk index 35b396326..3af3247a1 100644 --- a/version.mk +++ b/version.mk @@ -1,4 +1,4 @@ -NVIDIA_VERSION = 570.181 +NVIDIA_VERSION = 570.190 # This file. VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))