570.158.01

This commit is contained in:
Maneet Singh
2025-06-17 11:52:54 -07:00
parent d5cb404571
commit 443ace971f
36 changed files with 302 additions and 112 deletions

View File

@@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc
ccflags-y += -I$(src)
ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
ccflags-y += -DNV_VERSION_STRING=\"570.153.02\"
ccflags-y += -DNV_VERSION_STRING=\"570.158.01\"
ifneq ($(SYSSRCHOST1X),)
ccflags-y += -I$(SYSSRCHOST1X)

View File

@@ -6602,22 +6602,22 @@ compile_test() {
compile_check_conftest "$CODE" "NV_DRM_DRIVER_HAS_DUMB_DESTROY" "" "types"
;;
memory_failure_has_trapno_arg)
memory_failure_queue_has_trapno_arg)
#
# Check if memory_failure() has trapno parameter.
# Check if memory_failure_queue() has trapno parameter.
#
# Removed by commit 83b57531c58f ("mm/memory_failure: Remove
# unused trapno from memory_failure") in v4.16.
#
CODE="
#include <linux/mm.h>
void conftest_memory_failure_has_trapno_arg(unsigned long pfn,
void conftest_memory_failure_queue_has_trapno_arg(unsigned long pfn,
int trapno,
int flags) {
(void) memory_failure(pfn, trapno, flags);
memory_failure_queue(pfn, trapno, flags);
}"
compile_check_conftest "$CODE" "NV_MEMORY_FAILURE_HAS_TRAPNO_ARG" "" "types"
compile_check_conftest "$CODE" "NV_MEMORY_FAILURE_QUEUE_HAS_TRAPNO_ARG" "" "types"
;;
memory_failure_mf_sw_simulated_defined)
@@ -7571,7 +7571,7 @@ compile_test() {
CODE="
#include <linux/mmzone.h>
int conftest_page_pgmap(void) {
return page_pgmap(NULL);
return page_pgmap();
}"
compile_check_conftest "$CODE" "NV_PAGE_PGMAP_PRESENT" "" "functions"

View File

@@ -767,6 +767,20 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
&ats_context->access_counters.accessed_mask,
&ats_context->prefetch_state.residency_mask);
// Pretend that pages that are already resident at the destination GPU were
// migrated now. This makes sure that the access counter is cleared even if
// the accessed pages, were already resident on the target.
// TODO: Bug 5296998: [uvm][ats] Not clearing stale access counter
// notifications can lead to missed migrations
// The same problem of stale notification exists for migration to other
// locations than local vidmem. However, stale notifications to data
// migrated to another remote location are identical to those triggered
// by accessing memory that cannot or should not be migrated.
if (uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->id)) {
uvm_page_mask_copy(&ats_context->access_counters.migrated_mask,
&ats_context->prefetch_state.residency_mask);
}
for_each_va_block_subregion_in_mask(subregion, &ats_context->access_counters.accessed_mask, region) {
NV_STATUS status;
NvU64 start = base + (subregion.first * PAGE_SIZE);
@@ -779,7 +793,7 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
// clear access counters if pages were migrated or migration needs to
// Clear access counters if pages were migrated or migration needs to
// be retried
if (status == NV_OK || status == NV_ERR_BUSY_RETRY)
uvm_page_mask_region_fill(migrated_mask, subregion);

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -582,6 +582,9 @@ err:
void NV_API_CALL nv_cap_close_fd(int fd)
{
#if NV_FILESYSTEM_ACCESS_AVAILABLE
struct file *file;
NvBool is_nv_cap_fd;
if (fd == -1)
{
return;
@@ -600,6 +603,30 @@ void NV_API_CALL nv_cap_close_fd(int fd)
return;
}
file = fget(fd);
if (file == NULL)
{
task_unlock(current);
return;
}
/* Make sure the fd belongs to the nv-cap-drv */
is_nv_cap_fd = (file->f_op == &g_nv_cap_drv_fops);
fput(file);
/*
* In some cases, we may be in shutdown path and execute
* in context of unrelated process. In that case we should
* not access any 'current' state, but instead let kernel
* clean up capability files on its own.
*/
if (!is_nv_cap_fd)
{
task_unlock(current);
return;
}
/*
* From v4.17-rc1 (to v5.10.8) kernels have stopped exporting sys_close(fd)
* and started exporting __close_fd, as of this commit:

View File

@@ -257,7 +257,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += add_memory_driver_managed_has_mhp_flags_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += num_registered_fb
NV_CONFTEST_TYPE_COMPILE_TESTS += pci_driver_has_driver_managed_dma
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += memory_failure_has_trapno_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += memory_failure_queue_has_trapno_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += foll_longterm_present
NV_CONFTEST_TYPE_COMPILE_TESTS += bus_type_has_iommu_ops
NV_CONFTEST_TYPE_COMPILE_TESTS += class_create_has_no_owner_arg

View File

@@ -2596,7 +2596,6 @@ NV_STATUS NV_API_CALL os_offline_page_at_address
{
#if defined(CONFIG_MEMORY_FAILURE)
int flags = 0;
int ret;
NvU64 pfn;
struct page *page = NV_GET_PAGE_STRUCT(address);
@@ -2621,22 +2620,18 @@ NV_STATUS NV_API_CALL os_offline_page_at_address
flags |= MF_SW_SIMULATED;
#endif
#ifdef NV_MEMORY_FAILURE_HAS_TRAPNO_ARG
ret = memory_failure(pfn, 0, flags);
#else
ret = memory_failure(pfn, flags);
#endif
nv_printf(NV_DBG_INFO, "NVRM: offlining page at address: 0x%llx pfn: 0x%llx\n",
address, pfn);
if (ret != 0)
{
nv_printf(NV_DBG_ERRORS, "NVRM: page offlining failed. address: 0x%llx pfn: 0x%llx ret: %d\n",
address, pfn, ret);
return NV_ERR_OPERATING_SYSTEM;
}
#ifdef NV_MEMORY_FAILURE_QUEUE_HAS_TRAPNO_ARG
memory_failure_queue(pfn, 0, flags);
#else
memory_failure_queue(pfn, flags);
#endif
return NV_OK;
#else // !defined(CONFIG_MEMORY_FAILURE)
nv_printf(NV_DBG_ERRORS, "NVRM: memory_failure() not supported by kernel. page offlining failed. address: 0x%llx\n",
nv_printf(NV_DBG_ERRORS, "NVRM: memory_failure_queue() not supported by kernel. page offlining failed. address: 0x%llx\n",
address);
return NV_ERR_NOT_SUPPORTED;
#endif