590.44.01

This commit is contained in:
Maneet Singh
2025-12-02 15:32:25 -08:00
parent 2af9f1f0f7
commit a5bfb10e75
954 changed files with 421883 additions and 408177 deletions

View File

@@ -24,6 +24,8 @@
#ifndef __DETECT_SELF_HOSTED_H__
#define __DETECT_SELF_HOSTED_H__
static inline int pci_devid_is_self_hosted_hopper(unsigned short devid)
{
return devid >= 0x2340 && devid <= 0x237f; // GH100 Self-Hosted
@@ -39,7 +41,8 @@ static inline int pci_devid_is_self_hosted_blackwell(unsigned short devid)
static inline int pci_devid_is_self_hosted(unsigned short devid)
{
return pci_devid_is_self_hosted_hopper(devid) ||
pci_devid_is_self_hosted_blackwell(devid);
pci_devid_is_self_hosted_blackwell(devid)
;
}
#endif

View File

@@ -119,11 +119,12 @@ static struct pci_device_id nvswitch_pci_table[] =
static struct pci_driver nvswitch_pci_driver =
{
.name = NVSWITCH_DRIVER_NAME,
.id_table = nvswitch_pci_table,
.probe = nvswitch_probe,
.remove = nvswitch_remove,
.shutdown = nvswitch_remove
.name = NVSWITCH_DRIVER_NAME,
.id_table = nvswitch_pci_table,
.probe = nvswitch_probe,
.remove = nvswitch_remove,
.shutdown = nvswitch_remove,
.driver.probe_type = PROBE_FORCE_SYNCHRONOUS,
};
//

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -28,6 +28,7 @@
#include "nv-reg.h"
#include <linux/acpi.h>
#include <acpi/video.h>
#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
static NV_STATUS nv_acpi_extract_integer (const union acpi_object *, void *, NvU32, NvU32 *);
@@ -56,16 +57,6 @@ static NvBool battery_present = NV_FALSE;
#define BIF_BATTERY_TECHNOLOGY_OFFSET 0x3
#define BATTERY_RECHARGABLE 0x1
/* Moved into acpi/video.h in Linux 4.10 */
#ifndef ACPI_VIDEO_NOTIFY_PROBE
#define ACPI_VIDEO_NOTIFY_PROBE 0x81
#endif
/* Added to acpi/video.h in Linux 3.1 */
#ifndef ACPI_VIDEO_CLASS
#define ACPI_VIDEO_CLASS "video"
#endif
/* Maximum size of ACPI _DSM method's 4th argument */
#define NV_MAX_ACPI_DSM_PARAM_SIZE 1024

View File

@@ -27,6 +27,17 @@
#include "nv-linux.h"
#include "nv-reg.h"
#if IS_ENABLED(CONFIG_DRM)
#include <drm/drm_device.h>
#include <drm/drm_drv.h>
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
#endif
#include <drm/drm_gem.h>
#endif /* IS_ENABLED(CONFIG_DRM) */
#define NV_DMA_DEV_PRINTF(debuglevel, dma_dev, format, ... ) \
nv_printf(debuglevel, "NVRM: %s: " format, \
(((dma_dev) && ((dma_dev)->dev)) ? dev_name((dma_dev)->dev) : \
@@ -909,7 +920,7 @@ void NV_API_CALL nv_dma_cache_invalidate
#endif
}
#if defined(NV_DRM_AVAILABLE)
#if IS_ENABLED(CONFIG_DRM)
static inline void
nv_dma_gem_object_put_unlocked(struct drm_gem_object *gem)
@@ -990,4 +1001,4 @@ void NV_API_CALL nv_dma_release_sgt
)
{
}
#endif /* NV_DRM_AVAILABLE */
#endif /* IS_ENABLED(CONFIG_DRM) */

View File

@@ -326,9 +326,9 @@ unlock_api_lock:
// In TOT, nv_dma_buf_dup_mem_handles() acquires GPU lock only for calling pGPU
// instance. However, it is not sufficient as per DupObject() SYSMEM's design since it expects
// either all GPU locks to be acquired by the caller or do not take any GPU locks.
// PDB_PROP_GPU_ZERO_FB chips (iGPU) doesn't have local memory. In this case,
// gpuarchIsZeroFb chips (iGPU) doesn't have local memory. In this case,
// SYSMEM is used as Device resources. priv->acquire_release_all_gpu_lock_on_dup flag set as
// NV_TRUE only for PDB_PROP_GPU_ZERO_FB chips.
// NV_TRUE only for gpuarchIsZeroFb chips.
//
// Proper Fix (Bug 4866388):
// The RS_FLAGS_ACQUIRE_RELAXED_GPUS_LOCK_ON_DUP flag was introduced to allow an
@@ -802,7 +802,7 @@ nv_dma_buf_map_pages (
goto free_table;
}
sg_set_page(sg, page, sg_len, NV_GET_OFFSET_IN_PAGE(dma_addr));
sg_set_page(sg, page, sg_len, offset_in_page(dma_addr));
dma_addr += sg_len;
dma_len -= sg_len;
sg = sg_next(sg);

View File

@@ -26,7 +26,8 @@
#include "os-interface.h"
#include "nv-linux.h"
#if defined(NV_LINUX_NVHOST_H_PRESENT)
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(NV_LINUX_HOST1X_NEXT_H_PRESENT)
#include <linux/host1x-next.h>
#include <linux/nvhost.h>
#if defined(NV_LINUX_NVHOST_T194_H_PRESENT)
#include <linux/nvhost_t194.h>
@@ -40,9 +41,12 @@ NV_STATUS nv_get_syncpoint_aperture
NvU32 *offset
)
{
struct platform_device *host1x_pdev = NULL;
struct platform_device *host1x_pdev;
phys_addr_t base;
size_t size;
struct host1x *host1x;
NvU32 stride;
NvU32 num_syncpts;
NvS32 ret;
host1x_pdev = nvhost_get_default_device();
if (host1x_pdev == NULL)
@@ -50,12 +54,16 @@ NV_STATUS nv_get_syncpoint_aperture
return NV_ERR_INVALID_DEVICE;
}
nvhost_syncpt_unit_interface_get_aperture(
host1x_pdev, &base, &size);
host1x = platform_get_drvdata(host1x_pdev);
ret = host1x_syncpt_get_shim_info(host1x, &base, &stride, &num_syncpts);
if ((ret != 0) || (syncpointId >= num_syncpts))
{
return NV_ERR_INVALID_DATA;
}
*physAddr = base;
*limit = nvhost_syncpt_unit_interface_get_byte_offset(1);
*offset = nvhost_syncpt_unit_interface_get_byte_offset(syncpointId);
*limit = stride;
*offset = stride * syncpointId;
return NV_OK;
}

View File

@@ -22,7 +22,6 @@
*/
#include "nv-kthread-q.h"
#include "nv-list-helpers.h"
#include <linux/kthread.h>
#include <linux/interrupt.h>
@@ -43,17 +42,6 @@
// into the queue, and those functions will be run in the context of the
// queue's kthread.
#ifndef WARN
// Only *really* old kernels (2.6.9) end up here. Just use a simple printk
// to implement this, because such kernels won't be supported much longer.
#define WARN(condition, format...) ({ \
int __ret_warn_on = !!(condition); \
if (unlikely(__ret_warn_on)) \
printk(KERN_ERR format); \
unlikely(__ret_warn_on); \
})
#endif
#define NVQ_WARN(fmt, ...) \
do { \
if (in_interrupt()) { \

View File

@@ -334,15 +334,21 @@ int nv_encode_caching(
NV_PGPROT_UNCACHED(*prot) :
NV_PGPROT_UNCACHED_DEVICE(*prot);
break;
#if defined(NV_PGPROT_WRITE_COMBINED) && \
defined(NV_PGPROT_WRITE_COMBINED_DEVICE)
#if defined(NV_PGPROT_WRITE_COMBINED)
case NV_MEMORY_DEFAULT:
case NV_MEMORY_WRITECOMBINED:
if (NV_ALLOW_WRITE_COMBINING(memory_type))
{
#if defined(NVCPU_RISCV64)
/*
* Don't attempt to mark sysmem pages as write combined on riscv.
* Bug 5404055 to clean up this check.
*/
*prot = (memory_type == NV_MEMORY_TYPE_FRAMEBUFFER) ?
NV_PGPROT_WRITE_COMBINED_DEVICE(*prot) :
NV_PGPROT_WRITE_COMBINED(*prot);
NV_PGPROT_WRITE_COMBINED(*prot) : *prot;
#else
*prot = NV_PGPROT_WRITE_COMBINED(*prot);
#endif
break;
}
@@ -590,7 +596,8 @@ int nvidia_mmap_helper(
// TODO: Refactor is needed as part of bug#2001704.
//
if ((nv_get_numa_status(nvl) == NV_NUMA_STATUS_ONLINE) &&
!IS_REG_OFFSET(nv, access_start, access_len) &&
pfn_to_page(__phys_to_pfn(access_start)) != NULL &&
pfn_to_page(__phys_to_pfn(access_start + access_len - 1)) != NULL &&
(mmap_context->num_pages != 0))
{
ret = nvidia_mmap_numa(vma, mmap_context);
@@ -894,7 +901,7 @@ static NvBool nv_vma_enter_locked(struct vm_area_struct *vma, NvBool detaching)
{
// Clean up on error: release refcount and dep_map
refcount_sub_and_test(VMA_LOCK_OFFSET, &vma->vm_refcnt);
rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
return NV_FALSE;
}

View File

@@ -159,9 +159,6 @@ struct nvidia_p2p_page_table {
*
* This API only supports pinned, GPU-resident memory, such as that provided
* by cudaMalloc().
* This API does not support Coherent Driver-based Memory Management(CDMM) mode.
* CDMM allows coherent GPU memory to be managed by the driver and not the OS.
* This is done by the driver not onlining the memory as a NUMA node.
*
* This API may sleep.
*

View File

@@ -86,8 +86,8 @@ static void nv_setup_pat_entries(void *info)
return;
#endif
NV_SAVE_FLAGS(eflags);
NV_CLI();
local_save_flags(eflags);
local_irq_disable();
nv_disable_caches(&cr4);
NV_READ_PAT_ENTRIES(pat1, pat2);
@@ -98,7 +98,7 @@ static void nv_setup_pat_entries(void *info)
NV_WRITE_PAT_ENTRIES(pat1, pat2);
nv_enable_caches(cr4);
NV_RESTORE_FLAGS(eflags);
local_irq_restore(eflags);
}
static void nv_restore_pat_entries(void *info)
@@ -112,14 +112,14 @@ static void nv_restore_pat_entries(void *info)
return;
#endif
NV_SAVE_FLAGS(eflags);
NV_CLI();
local_save_flags(eflags);
local_irq_disable();
nv_disable_caches(&cr4);
NV_WRITE_PAT_ENTRIES(orig_pat1, orig_pat2);
nv_enable_caches(cr4);
NV_RESTORE_FLAGS(eflags);
local_irq_restore(eflags);
}
static int

View File

@@ -35,9 +35,24 @@
#include <linux/clk.h>
#include <linux/device.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/workqueue.h>
#if defined(CONFIG_PM_DEVFREQ)
#include <linux/devfreq.h>
#if defined(CONFIG_DEVFREQ_THERMAL) \
&& defined(NV_DEVFREQ_DEV_PROFILE_HAS_IS_COOLING_DEVICE) \
&& defined(NV_THERMAL_ZONE_FOR_EACH_TRIP_PRESENT) \
&& defined(NV_THERMAL_BIND_CDEV_TO_TRIP_PRESENT) \
&& defined(NV_THERMAL_UNBIND_CDEV_FROM_TRIP_PRESENT)
#include <linux/thermal.h>
#define NV_HAS_COOLING_SUPPORTED 1
#else
#define NV_HAS_COOLING_SUPPORTED 0
#endif
#endif
#if defined(CONFIG_INTERCONNECT) \
@@ -557,6 +572,14 @@ struct nv_pci_tegra_devfreq_data {
const TEGRASOC_DEVFREQ_CLK devfreq_clk;
};
#if NV_HAS_COOLING_SUPPORTED
struct nv_pci_tegra_thermal_data {
const char *tz_name;
const struct thermal_trip *passive_trip;
struct list_head zones;
};
#endif
struct nv_pci_tegra_devfreq_dev {
TEGRASOC_DEVFREQ_CLK devfreq_clk;
int domain;
@@ -567,28 +590,33 @@ struct nv_pci_tegra_devfreq_dev {
struct nv_pci_tegra_devfreq_dev *nvd_master;
struct clk *clk;
struct devfreq *devfreq;
bool boost_enabled;
struct delayed_work boost_disable;
#if NV_HAS_ICC_SUPPORTED
struct icc_path *icc_path;
#endif
#if NV_HAS_COOLING_SUPPORTED
struct list_head therm_zones;
#endif
};
static const struct nv_pci_tegra_devfreq_data gb10b_tegra_devfreq_table[] = {
{
.clk_name = "gpc0clk",
.icc_name = "gpu-write",
.gpc_fuse_field = BIT(0),
.gpc_fuse_field = BIT(3),
.devfreq_clk = TEGRASOC_DEVFREQ_CLK_GPC,
},
{
.clk_name = "gpc1clk",
.icc_name = "gpu-write",
.gpc_fuse_field = BIT(1),
.gpc_fuse_field = BIT(4),
.devfreq_clk = TEGRASOC_DEVFREQ_CLK_GPC,
},
{
.clk_name = "gpc2clk",
.icc_name = "gpu-write",
.gpc_fuse_field = BIT(2),
.gpc_fuse_field = BIT(5),
.devfreq_clk = TEGRASOC_DEVFREQ_CLK_GPC,
},
{
@@ -613,6 +641,7 @@ static int
nv_pci_gb10b_devfreq_target(struct device *dev, unsigned long *freq, u32 flags)
{
struct pci_dev *pdev = to_pci_dev(dev->parent);
nv_linux_state_t *nvl = pci_get_drvdata(pdev);
struct nv_pci_tegra_devfreq_dev *tdev = to_tegra_devfreq_dev(dev), *tptr;
unsigned long rate;
#if NV_HAS_ICC_SUPPORTED
@@ -645,6 +674,10 @@ nv_pci_gb10b_devfreq_target(struct device *dev, unsigned long *freq, u32 flags)
if (tdev->icc_path != NULL)
{
kBps = Bps_to_icc(*freq * gpu_bus_bandwidth * 400 / 1000);
if (tdev->boost_enabled)
{
kBps = UINT_MAX;
}
icc_set_bw(tdev->icc_path, kBps, 0);
}
#endif
@@ -662,7 +695,15 @@ nv_pci_gb10b_devfreq_target(struct device *dev, unsigned long *freq, u32 flags)
rate = max(rate, clk_get_rate(tptr->nvd_master->clk));
}
clk_set_rate(tptr->clk, rate);
if (tdev->boost_enabled
&& (tptr == nvl->sys_devfreq_dev || tptr == nvl->pwr_devfreq_dev))
{
clk_set_rate(tptr->clk, ULONG_MAX);
}
else
{
clk_set_rate(tptr->clk, rate);
}
}
rate = 0;
@@ -678,7 +719,15 @@ nv_pci_gb10b_devfreq_target(struct device *dev, unsigned long *freq, u32 flags)
rate = max(rate, clk_get_rate(tptr->nvd_master->clk));
}
clk_set_rate(tptr->clk, rate);
if (tdev->boost_enabled
&& (tptr == nvl->sys_devfreq_dev || tptr == nvl->pwr_devfreq_dev))
{
clk_set_rate(tptr->clk, ULONG_MAX);
}
else
{
clk_set_rate(tptr->clk, rate);
}
}
return 0;
@@ -787,16 +836,185 @@ populate_opp_table(struct nv_pci_tegra_devfreq_dev *tdev)
} while (rate <= max_rate && step);
}
static void
nv_pci_tegra_devfreq_remove_opps(struct nv_pci_tegra_devfreq_dev *tdev)
{
#if defined(NV_DEVFREQ_HAS_FREQ_TABLE)
unsigned long *freq_table = tdev->devfreq->freq_table;
unsigned int max_state = tdev->devfreq->max_state;
#else
unsigned long *freq_table = tdev->devfreq->profile->freq_table;
unsigned int max_state = tdev->devfreq->profile->max_state;
#endif
int i;
for (i = 0; i < max_state; i++)
{
dev_pm_opp_remove(&tdev->dev, freq_table[i]);
}
}
#if NV_HAS_COOLING_SUPPORTED
static int
nv_pci_tegra_thermal_get_passive_trip_cb(struct thermal_trip *trip, void *arg)
{
const struct thermal_trip **ptrip = arg;
/* Return zero to continue the search */
if (trip->type != THERMAL_TRIP_PASSIVE)
return 0;
/* Return nonzero to terminate the search */
*ptrip = trip;
return -1;
}
static int
nv_pci_tegra_init_cooling_device(struct nv_pci_tegra_devfreq_dev *tdev)
{
struct device *pdev = tdev->dev.parent;
const struct thermal_trip *passive_trip = NULL;
struct devfreq *devfreq = tdev->devfreq;
struct nv_pci_tegra_thermal_data *data;
struct thermal_zone_device *tzdev;
int i, err, val, n_strings, n_elems;
u32 temp_min, temp_max;
const char *tz_name;
if (!devfreq->cdev)
{
nv_printf(NV_DBG_ERRORS, "NVRM: devfreq cooling cannot be found\n");
return -ENODEV;
}
if (!pdev->of_node)
{
nv_printf(NV_DBG_ERRORS, "NVRM: associated OF node cannot be found\n");
return -ENODEV;
}
val = of_property_count_strings(pdev->of_node, "nvidia,thermal-zones");
if (val == -EINVAL)
{
return 0;
}
else if (val < 0)
{
nv_printf(NV_DBG_ERRORS, "NVRM: nvidia,thermal-zones DT property format error\n");
return val;
}
n_strings = val;
val = of_property_count_u32_elems(pdev->of_node, "nvidia,cooling-device");
if (val == -EINVAL)
{
return 0;
}
else if (val < 0)
{
nv_printf(NV_DBG_ERRORS, "NVRM: nvidia,cooling-device DT property format error\n");
return val;
}
n_elems = val;
if ((n_elems >> 1) != n_strings)
{
nv_printf(NV_DBG_ERRORS,
"NVRM: number of strings specified in nvidia,thermal-zones needs to"
"be exact half the number of elements specified nvidia,cooling-device\n");
return -EINVAL;
}
if (((n_elems >> 1) == 0) && ((n_elems & 1) == 1))
{
nv_printf(NV_DBG_ERRORS,
"NVRM: number of elements specified in nvidia,cooling-device needs"
"to be an even number\n");
return -EINVAL;
}
for (i = 0; i < n_strings; i++)
{
data = devm_kzalloc(pdev, sizeof(*data), GFP_KERNEL);
if (data == NULL)
{
err = -ENOMEM;
goto err_nv_pci_tegra_init_cooling_device;
}
of_property_read_string_index(pdev->of_node,
"nvidia,thermal-zones", i, &tz_name);
of_property_read_u32_index(pdev->of_node,
"nvidia,cooling-device", (i << 1) + 0, &temp_min);
of_property_read_u32_index(pdev->of_node,
"nvidia,cooling-device", (i << 1) + 1, &temp_max);
tzdev = thermal_zone_get_zone_by_name(tz_name);
if (IS_ERR(tzdev))
{
nv_printf(NV_DBG_ERRORS,
"NVRM: fail to get %s thermal_zone_device\n", tz_name);
err = -ENODEV;
goto err_nv_pci_tegra_init_cooling_device;
}
thermal_zone_for_each_trip(tzdev, nv_pci_tegra_thermal_get_passive_trip_cb, &passive_trip);
if (passive_trip == NULL)
{
nv_printf(NV_DBG_ERRORS,
"NVRM: fail to find passive_trip in %s thermal_zone_device\n", tz_name);
err = -ENODEV;
goto err_nv_pci_tegra_init_cooling_device;
}
val = thermal_bind_cdev_to_trip(tzdev,
passive_trip,
devfreq->cdev,
temp_max, temp_min, THERMAL_WEIGHT_DEFAULT);
if (val != 0)
{
nv_printf(NV_DBG_ERRORS,
"NVRM: fail to bind devfreq cooling device with %s thermal_zone_device\n", tz_name);
err = -ENODEV;
goto err_nv_pci_tegra_init_cooling_device;
}
data->tz_name = tz_name;
data->passive_trip = passive_trip;
list_add_tail(&data->zones, &tdev->therm_zones);
}
return 0;
err_nv_pci_tegra_init_cooling_device:
list_for_each_entry(data, &tdev->therm_zones, zones)
{
tzdev = thermal_zone_get_zone_by_name(data->tz_name);
if (IS_ERR(tzdev))
{
continue;
}
thermal_unbind_cdev_from_trip(tzdev, data->passive_trip, devfreq->cdev);
}
return err;
}
#endif
static int
nv_pci_gb10b_add_devfreq_device(struct nv_pci_tegra_devfreq_dev *tdev)
{
struct devfreq_dev_profile *profile;
int err;
populate_opp_table(tdev);
profile = devm_kzalloc(&tdev->dev, sizeof(*profile), GFP_KERNEL);
if (profile == NULL)
{
return -ENOMEM;
err = -ENOMEM;
goto err_nv_pci_gb10b_add_devfreq_device_opp;
}
profile->target = nv_pci_gb10b_devfreq_target;
@@ -804,6 +1022,9 @@ nv_pci_gb10b_add_devfreq_device(struct nv_pci_tegra_devfreq_dev *tdev)
profile->get_dev_status = nv_pci_tegra_devfreq_get_dev_status;
profile->initial_freq = clk_get_rate(tdev->clk);
profile->polling_ms = 25;
#if NV_HAS_COOLING_SUPPORTED
profile->is_cooling_device = true;
#endif
tdev->devfreq = devm_devfreq_add_device(&tdev->dev,
profile,
@@ -811,10 +1032,32 @@ nv_pci_gb10b_add_devfreq_device(struct nv_pci_tegra_devfreq_dev *tdev)
NULL);
if (IS_ERR(tdev->devfreq))
{
return PTR_ERR(tdev->devfreq);
err = PTR_ERR(tdev->devfreq);
goto err_nv_pci_gb10b_add_devfreq_device_opp;
}
#if defined(NV_DEVFREQ_HAS_SUSPEND_FREQ)
tdev->devfreq->suspend_freq = tdev->devfreq->scaling_max_freq;
#endif
#if NV_HAS_COOLING_SUPPORTED
err = nv_pci_tegra_init_cooling_device(tdev);
if (err)
{
goto err_nv_pci_gb10b_add_devfreq_device;
}
#endif
return 0;
#if NV_HAS_COOLING_SUPPORTED
err_nv_pci_gb10b_add_devfreq_device:
devm_devfreq_remove_device(&tdev->dev, tdev->devfreq);
#endif
err_nv_pci_gb10b_add_devfreq_device_opp:
nv_pci_tegra_devfreq_remove_opps(tdev);
return err;
}
static int
@@ -829,10 +1072,8 @@ nv_pci_gb10b_register_devfreq(struct pci_dev *pdev)
struct icc_path *icc_path;
#endif
struct clk *clk;
resource_size_t bar0_addr, bar0_size;
void *bar0_map;
int i, err, node;
u32 gpc_fuse_mask;
u32 gpu_pg_mask;
while (pbus->parent != NULL)
{
@@ -841,24 +1082,21 @@ nv_pci_gb10b_register_devfreq(struct pci_dev *pdev)
node = max(0, dev_to_node(to_pci_host_bridge(pbus->bridge)->dev.parent));
bar0_addr = (resource_size_t)nv->bars[NV_GPU_BAR_INDEX_REGS].cpu_address;
bar0_size = (resource_size_t)nv->bars[NV_GPU_BAR_INDEX_REGS].size;
bar0_map = devm_ioremap(&pdev->dev, bar0_addr, bar0_size);
if (bar0_map == NULL)
if (nv->tegra_pci_igpu_pg_mask == NV_TEGRA_PCI_IGPU_PG_MASK_DEFAULT)
{
gpc_fuse_mask = 0;
gpu_pg_mask = 0;
}
else
{
#define NV_FUSE_STATUS_OPT_GPC 0x00820c1c
gpc_fuse_mask = readl(bar0_map + NV_FUSE_STATUS_OPT_GPC);
gpu_pg_mask = nv->tegra_pci_igpu_pg_mask;
nv_printf(NV_DBG_INFO, "NVRM: devfreq register receives gpu_pg_mask = %u\n", gpu_pg_mask);
}
for (i = 0; i < nvl->devfreq_table_size; i++)
{
tdata = &nvl->devfreq_table[i];
if (gpc_fuse_mask && (gpc_fuse_mask & tdata->gpc_fuse_field))
if (gpu_pg_mask && (gpu_pg_mask & tdata->gpc_fuse_field))
{
continue;
}
@@ -888,6 +1126,9 @@ nv_pci_gb10b_register_devfreq(struct pci_dev *pdev)
INIT_LIST_HEAD(&tdev->gpc_cluster);
INIT_LIST_HEAD(&tdev->nvd_cluster);
#if NV_HAS_COOLING_SUPPORTED
INIT_LIST_HEAD(&tdev->therm_zones);
#endif
#if NV_HAS_ICC_SUPPORTED
tdev->icc_path = icc_path;
#endif
@@ -940,7 +1181,8 @@ nv_pci_gb10b_register_devfreq(struct pci_dev *pdev)
err = nv_pci_gb10b_add_devfreq_device(nvl->gpc_devfreq_dev);
if (err != 0)
{
goto error_return;
nvl->gpc_devfreq_dev->devfreq = NULL;
goto error_slave_teardown;
}
if (nvl->sys_devfreq_dev != NULL)
@@ -961,7 +1203,8 @@ nv_pci_gb10b_register_devfreq(struct pci_dev *pdev)
err = nv_pci_gb10b_add_devfreq_device(nvl->nvd_devfreq_dev);
if (err != 0)
{
goto error_return;
nvl->nvd_devfreq_dev->devfreq = NULL;
goto error_slave_teardown;
}
if (nvl->sys_devfreq_dev != NULL)
@@ -979,6 +1222,29 @@ nv_pci_gb10b_register_devfreq(struct pci_dev *pdev)
return 0;
error_slave_teardown:
if (nvl->sys_devfreq_dev != NULL)
{
if (nvl->sys_devfreq_dev->gpc_master != NULL)
{
list_del(&nvl->sys_devfreq_dev->gpc_cluster);
nvl->sys_devfreq_dev->gpc_master = NULL;
}
device_unregister(&nvl->sys_devfreq_dev->dev);
nvl->sys_devfreq_dev = NULL;
}
if (nvl->pwr_devfreq_dev != NULL)
{
if (nvl->pwr_devfreq_dev->gpc_master != NULL)
{
list_del(&nvl->pwr_devfreq_dev->gpc_cluster);
nvl->pwr_devfreq_dev->gpc_master = NULL;
}
device_unregister(&nvl->pwr_devfreq_dev->dev);
nvl->pwr_devfreq_dev = NULL;
}
error_return:
/* The caller will call unregister to unwind on failure */
return err;
@@ -1054,6 +1320,82 @@ nv_pci_gb10b_resume_devfreq(struct device *dev)
return err;
}
static void nv_pci_devfreq_disable_boost(struct work_struct *work)
{
#if defined(NV_UPDATE_DEVFREQ_PRESENT)
struct nv_pci_tegra_devfreq_dev *tdev;
tdev = container_of(work, struct nv_pci_tegra_devfreq_dev, boost_disable.work);
tdev->boost_enabled = 0;
#endif
}
static int
nv_pci_gb10b_devfreq_enable_boost(struct device *dev, unsigned int duration)
{
#if defined(NV_UPDATE_DEVFREQ_PRESENT)
struct pci_dev *pci_dev = to_pci_dev(dev);
nv_linux_state_t *nvl = pci_get_drvdata(pci_dev);
struct nv_pci_tegra_devfreq_dev *tdev;
unsigned long delay;
if (duration == 0)
return 0;
delay = msecs_to_jiffies(duration * 1000);
tdev = nvl->gpc_devfreq_dev;
if (tdev != NULL && tdev->devfreq != NULL && tdev->boost_enabled == 0)
{
tdev->boost_enabled = 1;
INIT_DELAYED_WORK(&tdev->boost_disable, nv_pci_devfreq_disable_boost);
schedule_delayed_work(&tdev->boost_disable, delay);
}
tdev = nvl->nvd_devfreq_dev;
if (tdev != NULL && tdev->devfreq != NULL && tdev->boost_enabled == 0)
{
tdev->boost_enabled = 1;
INIT_DELAYED_WORK(&tdev->boost_disable, nv_pci_devfreq_disable_boost);
schedule_delayed_work(&tdev->boost_disable, delay);
}
return 0;
#else // !defined(NV_UPDATE_DEVFREQ_PRESENT)
return -1;
#endif
}
static int
nv_pci_gb10b_devfreq_disable_boost(struct device *dev)
{
#if defined(NV_UPDATE_DEVFREQ_PRESENT)
struct pci_dev *pci_dev = to_pci_dev(dev);
nv_linux_state_t *nvl = pci_get_drvdata(pci_dev);
struct nv_pci_tegra_devfreq_dev *tdev;
tdev = nvl->gpc_devfreq_dev;
if (tdev != NULL && tdev->devfreq != NULL && tdev->boost_enabled)
{
tdev->boost_enabled = 0;
cancel_delayed_work_sync(&tdev->boost_disable);
}
tdev = nvl->nvd_devfreq_dev;
if (tdev != NULL && tdev->devfreq != NULL && tdev->boost_enabled)
{
tdev->boost_enabled = 0;
cancel_delayed_work_sync(&tdev->boost_disable);
}
return 0;
#else // !defined(NV_UPDATE_DEVFREQ_PRESENT)
return -1;
#endif
}
struct nv_pci_tegra_data {
unsigned short vendor;
unsigned short device;
@@ -1062,6 +1404,8 @@ struct nv_pci_tegra_data {
int (*devfreq_register)(struct pci_dev*);
int (*devfreq_suspend)(struct device*);
int (*devfreq_resume)(struct device*);
int (*devfreq_enable_boost)(struct device*, unsigned int);
int (*devfreq_disable_boost)(struct device*);
};
static const struct nv_pci_tegra_data nv_pci_tegra_table[] = {
@@ -1073,35 +1417,38 @@ static const struct nv_pci_tegra_data nv_pci_tegra_table[] = {
.devfreq_register = nv_pci_gb10b_register_devfreq,
.devfreq_suspend = nv_pci_gb10b_suspend_devfreq,
.devfreq_resume = nv_pci_gb10b_resume_devfreq,
.devfreq_enable_boost = nv_pci_gb10b_devfreq_enable_boost,
.devfreq_disable_boost = nv_pci_gb10b_devfreq_disable_boost,
},
};
static void
nv_pci_tegra_devfreq_remove_opps(struct nv_pci_tegra_devfreq_dev *tdev)
{
#if defined(NV_DEVFREQ_HAS_FREQ_TABLE)
unsigned long *freq_table = tdev->devfreq->freq_table;
unsigned int max_state = tdev->devfreq->max_state;
#else
unsigned long *freq_table = tdev->devfreq->profile->freq_table;
unsigned int max_state = tdev->devfreq->profile->max_state;
#endif
int i;
for (i = 0; i < max_state; i++)
{
dev_pm_opp_remove(&tdev->dev, freq_table[i]);
}
}
static void
nv_pci_tegra_devfreq_remove(struct nv_pci_tegra_devfreq_dev *tdev)
{
struct nv_pci_tegra_devfreq_dev *tptr;
struct nv_pci_tegra_devfreq_dev *tptr, *next;
#if NV_HAS_COOLING_SUPPORTED
struct nv_pci_tegra_thermal_data *data;
struct thermal_zone_device *tzdev;
#endif
nv_pci_tegra_devfreq_remove_opps(tdev);
devm_devfreq_remove_device(&tdev->dev, tdev->devfreq);
tdev->devfreq = NULL;
if (tdev->devfreq != NULL)
{
#if NV_HAS_COOLING_SUPPORTED
list_for_each_entry(data, &tdev->therm_zones, zones)
{
tzdev = thermal_zone_get_zone_by_name(data->tz_name);
if (IS_ERR(tzdev))
{
continue;
}
thermal_unbind_cdev_from_trip(tzdev, data->passive_trip, tdev->devfreq->cdev);
}
#endif
devm_devfreq_remove_device(&tdev->dev, tdev->devfreq);
nv_pci_tegra_devfreq_remove_opps(tdev);
tdev->devfreq = NULL;
}
#if NV_HAS_ICC_SUPPORTED
if (tdev->icc_path != NULL)
@@ -1110,7 +1457,7 @@ nv_pci_tegra_devfreq_remove(struct nv_pci_tegra_devfreq_dev *tdev)
}
#endif
list_for_each_entry(tptr, &tdev->gpc_cluster, gpc_cluster)
list_for_each_entry_safe(tptr, next, &tdev->gpc_cluster, gpc_cluster)
{
if (tptr->clk != NULL)
{
@@ -1118,9 +1465,12 @@ nv_pci_tegra_devfreq_remove(struct nv_pci_tegra_devfreq_dev *tdev)
tptr->clk = NULL;
device_unregister(&tptr->dev);
}
list_del(&tptr->gpc_cluster);
tptr->gpc_master = NULL;
}
list_for_each_entry(tptr, &tdev->nvd_cluster, nvd_cluster)
list_for_each_entry_safe(tptr, next, &tdev->nvd_cluster, nvd_cluster)
{
if (tptr->clk != NULL)
{
@@ -1128,6 +1478,9 @@ nv_pci_tegra_devfreq_remove(struct nv_pci_tegra_devfreq_dev *tdev)
tptr->clk = NULL;
device_unregister(&tptr->dev);
}
list_del(&tptr->nvd_cluster);
tptr->nvd_master = NULL;
}
if (tdev->clk != NULL)
@@ -1194,6 +1547,8 @@ nv_pci_tegra_register_devfreq(struct pci_dev *pdev)
nvl->devfreq_table_size = tegra_data->devfreq_table_size;
nvl->devfreq_suspend = tegra_data->devfreq_suspend;
nvl->devfreq_resume = tegra_data->devfreq_resume;
nvl->devfreq_enable_boost = tegra_data->devfreq_enable_boost;
nvl->devfreq_disable_boost = tegra_data->devfreq_disable_boost;
err = tegra_data->devfreq_register(pdev);
if (err != 0)
@@ -1229,12 +1584,34 @@ static void nv_init_dynamic_power_management
pr3_acpi_method_present = nv_acpi_power_resource_method_present(pci_dev->bus->self);
}
// Support dynamic power management if device is a tegra PCI iGPU
rm_init_tegra_dynamic_power_management(sp, nv);
rm_init_dynamic_power_management(sp, nv, pr3_acpi_method_present);
}
static void nv_init_tegra_gpu_pg_mask(nvidia_stack_t *sp, struct pci_dev *pci_dev)
{
nv_linux_state_t *nvl = pci_get_drvdata(pci_dev);
nv_state_t *nv = NV_STATE_PTR(nvl);
struct device_node *np = pci_dev->dev.of_node;
u32 gpu_pg_mask = 0;
/* Only continue with certain Tegra PCI iGPUs */
if (!nv->supports_tegra_igpu_rg)
{
return;
}
nv->tegra_pci_igpu_pg_mask = NV_TEGRA_PCI_IGPU_PG_MASK_DEFAULT;
of_property_read_u32(np, "nvidia,fuse-overrides", &gpu_pg_mask);
if (gpu_pg_mask != 0) {
nv_printf(NV_DBG_INFO,
"NVRM: nvidia,fuse-overrides parsed from device tree: 0x%x\n", gpu_pg_mask);
nv->tegra_pci_igpu_pg_mask = gpu_pg_mask;
}
nv_set_gpu_pg_mask(nv);
}
static NvBool
nv_pci_validate_bars(const struct pci_dev *pci_dev, NvBool only_bar0)
{
@@ -1345,6 +1722,20 @@ nv_pci_probe
}
#endif /* NV_PCI_SRIOV_SUPPORT */
if (!rm_wait_for_bar_firewall(
sp,
NV_PCI_DOMAIN_NUMBER(pci_dev),
NV_PCI_BUS_NUMBER(pci_dev),
NV_PCI_SLOT_NUMBER(pci_dev),
PCI_FUNC(pci_dev->devfn),
pci_dev->device,
pci_dev->subsystem_device))
{
nv_printf(NV_DBG_ERRORS,
"NVRM: failed to wait for bar firewall to lower\n");
goto failed;
}
if (!rm_is_supported_pci_device(
(pci_dev->class >> 16) & 0xFF,
(pci_dev->class >> 8) & 0xFF,
@@ -1406,6 +1797,7 @@ nv_pci_probe
}
nv = NV_STATE_PTR(nvl);
os_mem_copy(nv->cached_gpu_info.vbios_version, "??.??.??.??.??", 15);
for (i = 0; i < NVRM_PCICFG_NUM_BARS; i++)
{
@@ -1532,6 +1924,10 @@ nv_pci_probe
"Enabling SMMU SVA feature failed! ret: %d\n", ret);
nv->ats_support = NV_FALSE;
}
#else
NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
"Enabling SMMU SVA feature failed due to lack of necessary kernel configs.\n");
nv->ats_support = NV_FALSE;
#endif
#endif // NV_IS_EXPORT_SYMBOL_GPL_iommu_dev_enable_feature
}
@@ -1602,8 +1998,15 @@ nv_pci_probe
pm_vt_switch_required(nvl->dev, NV_TRUE);
#if defined(CONFIG_PM_DEVFREQ)
// Support dynamic power management if device is a tegra PCI iGPU
rm_init_tegra_dynamic_power_management(sp, nv);
#endif
nv_init_dynamic_power_management(sp, pci_dev);
nv_init_tegra_gpu_pg_mask(sp, pci_dev);
rm_get_gpu_uuid_raw(sp, nv);
nv_procfs_add_gpu(nvl);
@@ -1918,6 +2321,10 @@ nv_pci_shutdown(struct pci_dev *pci_dev)
nvl->nv_state.is_shutdown = NV_TRUE;
}
#if defined(CONFIG_PM_DEVFREQ)
nv_pci_tegra_unregister_devfreq(pci_dev);
#endif
/* pci_clear_master is not defined for !CONFIG_PCI */
#ifdef CONFIG_PCI
pci_clear_master(pci_dev);
@@ -2193,6 +2600,7 @@ struct pci_driver nv_pci_driver = {
#if defined(CONFIG_PM)
.driver.pm = &nv_pm_ops,
#endif
.driver.probe_type = PROBE_FORCE_SYNCHRONOUS,
};
void nv_pci_unregister_driver(void)

View File

@@ -302,14 +302,11 @@ int nv_soc_register_irqs(nv_state_t *nv)
for (dpauxindex = 0; dpauxindex < nv->num_dpaux_instance; dpauxindex++)
{
char dpaux_dev_name[10] = {0};
snprintf(dpaux_dev_name, sizeof(dpaux_dev_name), "%s%d", "dpaux", dpauxindex);
rc = nv_request_soc_irq(nvl, nv->dpaux_irqs[dpauxindex],
NV_SOC_IRQ_DPAUX_TYPE,
nv_default_irq_flags(nv),
dpauxindex,
dpaux_dev_name);
nv->dpaux_devname[dpauxindex]);
if (rc != 0)
{
nv_printf(NV_DBG_ERRORS, "failed to request dpaux irq (%d)\n", rc);
@@ -424,8 +421,7 @@ static int nv_platform_alloc_device_dpaux(struct platform_device *plat_dev, nv_s
for (dpauxindex = 0; dpauxindex < nv->num_dpaux_instance; dpauxindex++)
{
char sdpaux_device[10];
snprintf(sdpaux_device, sizeof(sdpaux_device), "%s%d", sdpaux, dpauxindex);
snprintf(nv->dpaux_devname[dpauxindex], sizeof(nv->dpaux_devname[dpauxindex]), "%s%d", sdpaux, dpauxindex);
NV_KMALLOC(nv->dpaux[dpauxindex], sizeof(*(nv->dpaux[dpauxindex])));
if (nv->dpaux[dpauxindex] == NULL)
@@ -437,7 +433,7 @@ static int nv_platform_alloc_device_dpaux(struct platform_device *plat_dev, nv_s
os_mem_set(nv->dpaux[dpauxindex], 0, sizeof(*(nv->dpaux[dpauxindex])));
irq = platform_get_irq_byname(plat_dev, sdpaux_device);
irq = platform_get_irq_byname(plat_dev, nv->dpaux_devname[dpauxindex]);
if (irq < 0)
{
nv_printf(NV_DBG_ERRORS, "NVRM: failed to get IO irq resource\n");
@@ -1367,6 +1363,7 @@ struct platform_driver nv_platform_driver = {
#if defined(CONFIG_PM)
.pm = &nv_pm_ops,
#endif
.probe_type = PROBE_FORCE_SYNCHRONOUS,
},
.probe = nv_platform_device_probe,
.remove = nv_platform_device_remove_wrapper,

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -110,21 +110,13 @@ nv_procfs_read_gpu_info(
char *type;
const char *name;
char *uuid;
char vbios_version[15];
nvidia_stack_t *sp = NULL;
char firmware_version[64] = { 0 };
if (nv_kmem_cache_alloc_stack(&sp) != 0)
{
return 0;
}
if (rm_ref_dynamic_power(sp, nv, NV_DYNAMIC_PM_COARSE) != NV_OK)
{
nv_kmem_cache_free_stack(sp);
return 0;
}
name = rm_get_device_name(pci_dev->device,
pci_dev->subsystem_vendor,
pci_dev->subsystem_device);
@@ -132,18 +124,23 @@ nv_procfs_read_gpu_info(
seq_printf(s, "Model: \t\t %s\n", name);
seq_printf(s, "IRQ: \t\t %d\n", nv->interrupt_line);
uuid = rm_get_gpu_uuid(sp, nv);
if (uuid != NULL)
if (nv->nv_uuid_cache.valid)
{
seq_printf(s, "GPU UUID: \t %s\n", uuid);
os_free_mem(uuid);
uuid = NULL;
uuid = rm_get_gpu_uuid(sp, nv);
if (uuid != NULL)
{
seq_printf(s, "GPU UUID: \t %s\n", uuid);
os_free_mem(uuid);
uuid = NULL;
}
}
else
{
nv_printf(NV_DBG_ERRORS, "GPU UUID cache not valid!\n");
}
rm_get_vbios_version(sp, nv, vbios_version);
seq_printf(s, "Video BIOS: \t %s\n", vbios_version);
seq_printf(s, "Video BIOS: \t %s\n", nv->cached_gpu_info.vbios_version);
if (nv_find_pci_capability(pci_dev, PCI_CAP_ID_EXP))
type = "PCIe";
@@ -159,10 +156,13 @@ nv_procfs_read_gpu_info(
nv->pci_info.slot, PCI_FUNC(pci_dev->devfn));
seq_printf(s, "Device Minor: \t %u\n", nvl->minor_num);
rm_get_firmware_version(sp, nv, firmware_version, sizeof(firmware_version));
if (firmware_version[0] != '\0')
if (nv->cached_gpu_info.firmware_version[0] == '\0')
{
seq_printf(s, "GPU Firmware: \t %s\n", firmware_version);
seq_printf(s, "GPU Firmware: \t N/A\n");
}
else
{
seq_printf(s, "GPU Firmware: \t %s\n", nv->cached_gpu_info.firmware_version);
}
#if defined(DEBUG)
@@ -180,7 +180,6 @@ nv_procfs_read_gpu_info(
seq_printf(s, "GPU Excluded:\t %s\n",
((nv->flags & NV_FLAG_EXCLUDE) != 0) ? "Yes" : "No");
rm_unref_dynamic_power(sp, nv, NV_DYNAMIC_PM_COARSE);
nv_kmem_cache_free_stack(sp);
@@ -206,6 +205,8 @@ nv_procfs_read_power(
rm_get_power_info(sp, nv, &power_info);
seq_printf(s, "Runtime D3 status: %s\n", power_info.dynamic_power_status);
seq_printf(s, "Tegra iGPU Rail-Gating: %s\n",
nv->is_tegra_pci_igpu_rg_enabled ? "Enabled" : "Disabled");
seq_printf(s, "Video Memory: %s\n\n", power_info.vidmem_power_status);
seq_printf(s, "GPU Hardware Support:\n");

View File

@@ -662,6 +662,16 @@
#define NV_REG_DYNAMIC_POWER_MANAGEMENT_VIDEO_MEMORY_THRESHOLD \
NV_REG_STRING(__NV_DYNAMIC_POWER_MANAGEMENT_VIDEO_MEMORY_THRESHOLD)
/*
* Option: TegraGpuPgMask
*
* This option controls the TPC/GPC/FBP power-gating mask for Tegra iGPU.
*
*/
#define __NV_TEGRA_GPU_PG_MASK TegraGpuPgMask
#define NV_REG_TEGRA_GPU_PG_MASK \
NV_REG_STRING(__NV_TEGRA_GPU_PG_MASK)
/*
* Option: RegisterPCIDriver
*
@@ -945,6 +955,25 @@
#define NV_REG_GRDMA_PCI_TOPO_CHECK_OVERRIDE_ALLOW_ACCESS 1
#define NV_REG_GRDMA_PCI_TOPO_CHECK_OVERRIDE_DENY_ACCESS 2
/*
* Option: NVreg_EnableSystemMemoryPools
*
* Description:
*
* This option controls system memory page pools creation for different page sizes
* Pool for pageSize is enabled by setting bit (pageSize >> NV_ENABLE_SYSTEM_MEMORY_POOLS_SHIFT)
* The pools keep memory cached once freed to speed-up reallocation
* Pools are shared by all adapters
*
* This feature is only supported by OpenRM driver
*
* By default 4K, 64K, 2M page size pools are enabled
*/
#define __NV_ENABLE_SYSTEM_MEMORY_POOLS EnableSystemMemoryPools
#define NV_ENABLE_SYSTEM_MEMORY_POOLS NV_REG_STRING(__NV_ENABLE_SYSTEM_MEMORY_POOLS)
#define NV_ENABLE_SYSTEM_MEMORY_POOLS_DEFAULT 0x00000211
#define NV_ENABLE_SYSTEM_MEMORY_POOLS_SHIFT 12
#if defined(NV_DEFINE_REGISTRY_KEY_TABLE)
/*
@@ -983,6 +1012,7 @@ NV_DEFINE_REG_ENTRY_GLOBAL(__NV_REGISTER_PCI_DRIVER, 1);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_REGISTER_PLATFORM_DEVICE_DRIVER, 1);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_ENABLE_RESIZABLE_BAR, 0);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_ENABLE_DBG_BREAKPOINT, 0);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_TEGRA_GPU_PG_MASK, 0);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_ENABLE_NONBLOCKING_OPEN, 1);
NV_DEFINE_REG_STRING_ENTRY(__NV_COHERENT_GPU_MEMORY_MODE, NULL);
@@ -999,6 +1029,7 @@ NV_DEFINE_REG_ENTRY_GLOBAL(__NV_IMEX_CHANNEL_COUNT, 2048);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_CREATE_IMEX_CHANNEL_0, 0);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_GRDMA_PCI_TOPO_CHECK_OVERRIDE,
NV_REG_GRDMA_PCI_TOPO_CHECK_OVERRIDE_DEFAULT);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_ENABLE_SYSTEM_MEMORY_POOLS, NV_ENABLE_SYSTEM_MEMORY_POOLS_DEFAULT);
/*
*----------------registry database definition----------------------
@@ -1036,6 +1067,7 @@ nv_parm_t nv_parms[] = {
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_S0IX_POWER_MANAGEMENT_VIDEO_MEMORY_THRESHOLD),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_DYNAMIC_POWER_MANAGEMENT),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_DYNAMIC_POWER_MANAGEMENT_VIDEO_MEMORY_THRESHOLD),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_TEGRA_GPU_PG_MASK),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_REGISTER_PCI_DRIVER),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_ENABLE_PCIE_RELAXED_ORDERING_MODE),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_ENABLE_RESIZABLE_BAR),
@@ -1048,6 +1080,7 @@ nv_parm_t nv_parms[] = {
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_IMEX_CHANNEL_COUNT),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_CREATE_IMEX_CHANNEL_0),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_GRDMA_PCI_TOPO_CHECK_OVERRIDE),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_ENABLE_SYSTEM_MEMORY_POOLS),
{NULL, NULL}
};

File diff suppressed because it is too large Load Diff

View File

@@ -105,10 +105,16 @@
#include "conftest/patches.h"
#include "detect-self-hosted.h"
#if defined(NV_BPMP_MRQ_HAS_STRAP_SET) && defined(NV_PM_RUNTIME_AVAILABLE)
#include <soc/tegra/bpmp-abi.h>
#endif
#define RM_THRESHOLD_TOTAL_IRQ_COUNT 100000
#define RM_THRESHOLD_UNAHNDLED_IRQ_COUNT 99900
#define RM_UNHANDLED_TIMEOUT_US 100000
#define TEGRA264_STRAP_NV_FUSE_CTRL_OPT_GPU 1U
MODULE_LICENSE("Dual MIT/GPL");
MODULE_INFO(supported, "external");
@@ -522,6 +528,9 @@ nvlink_drivers_init(void)
return rc;
}
int nv_init_page_pools(void);
void nv_destroy_page_pools(void);
static void
nv_module_state_exit(nv_stack_t *sp)
{
@@ -532,6 +541,8 @@ nv_module_state_exit(nv_stack_t *sp)
nv_kthread_q_stop(&nv_deferred_close_kthread_q);
nv_kthread_q_stop(&nv_kthread_q);
nv_destroy_page_pools();
nv_lock_destroy_locks(sp, nv);
}
@@ -554,6 +565,12 @@ nv_module_state_init(nv_stack_t *sp)
goto exit;
}
rc = nv_init_page_pools();
if (rc != 0)
{
goto exit;
}
rc = nv_kthread_q_init(&nv_deferred_close_kthread_q, "nv_queue");
if (rc != 0)
{
@@ -585,6 +602,7 @@ nv_module_state_init(nv_stack_t *sp)
exit:
if (rc < 0)
{
nv_destroy_page_pools();
nv_lock_destroy_locks(sp, nv);
}
@@ -1481,16 +1499,8 @@ static int nv_start_device(nv_state_t *nv, nvidia_stack_t *sp)
goto failed_release_irq;
}
{
const NvU8 *uuid = rm_get_gpu_uuid_raw(sp, nv);
if (uuid != NULL)
{
#if defined(NV_UVM_ENABLE)
nv_uvm_notify_start_device(uuid);
#endif
}
}
/* Generate and cache the UUID for future callers */
(void)rm_get_gpu_uuid_raw(sp, nv);
if (!(nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
{
@@ -1675,6 +1685,7 @@ static int nv_open_device(nv_state_t *nv, nvidia_stack_t *sp)
nv_assert_not_in_gpu_exclusion_list(sp, nv);
NV_ATOMIC_INC(nvl->usage_count);
return 0;
}
@@ -2037,18 +2048,6 @@ static void nv_stop_device(nv_state_t *nv, nvidia_stack_t *sp)
*/
rm_ref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE);
#if defined(NV_UVM_ENABLE)
{
const NvU8* uuid;
// Inform UVM before disabling adapter. Use cached copy
uuid = nv_get_cached_uuid(nv);
if (uuid != NULL)
{
// this function cannot fail
nv_uvm_notify_stop_device(uuid);
}
}
#endif
/* Adapter is already shutdown as part of nvidia_pci_remove */
if (!nv->removed)
{
@@ -2373,6 +2372,7 @@ out:
return rc;
}
int
nvidia_ioctl(
struct inode *inode,
@@ -2415,7 +2415,7 @@ nvidia_ioctl(
goto done_early;
}
if (NV_COPY_FROM_USER(&ioc_xfer, arg_ptr, sizeof(ioc_xfer)))
if (copy_from_user(&ioc_xfer, arg_ptr, sizeof(ioc_xfer)))
{
nv_printf(NV_DBG_ERRORS,
"NVRM: failed to copy in ioctl XFER data!\n");
@@ -2443,7 +2443,7 @@ nvidia_ioctl(
goto done_early;
}
if (NV_COPY_FROM_USER(arg_copy, arg_ptr, arg_size))
if (copy_from_user(arg_copy, arg_ptr, arg_size))
{
nv_printf(NV_DBG_ERRORS, "NVRM: failed to copy in ioctl data!\n");
status = -EFAULT;
@@ -2503,7 +2503,7 @@ nvidia_ioctl(
{
case NV_ESC_QUERY_DEVICE_INTR:
{
nv_ioctl_query_device_intr *query_intr = arg_copy;
nv_ioctl_query_device_intr_t *query_intr = arg_copy;
NV_ACTUAL_DEVICE_ONLY(nv);
@@ -2773,7 +2773,7 @@ done_early:
{
if (status != -EFAULT)
{
if (NV_COPY_TO_USER(arg_ptr, arg_copy, arg_size))
if (copy_to_user(arg_ptr, arg_copy, arg_size))
{
nv_printf(NV_DBG_ERRORS, "NVRM: failed to copy out ioctl data\n");
status = -EFAULT;
@@ -4497,16 +4497,18 @@ nvidia_suspend(
}
nv = NV_STATE_PTR(nvl);
#if defined(NV_PM_RUNTIME_AVAILABLE)
#if defined(NV_PM_RUNTIME_AVAILABLE) && defined(NV_PM_DOMAIN_AVAILABLE)
/* Handle GenPD suspend sequence for Tegra PCI iGPU */
if (dev_is_pci(dev) && nv->is_tegra_pci_igpu_rg_enabled == NV_TRUE)
if (dev_is_pci(dev) && nv->is_tegra_pci_igpu_rg_enabled)
{
/* Turn on the GPU power before saving PCI configuration */
pm_runtime_forbid(dev);
dev_pm_genpd_suspend(dev);
/*
* If a PCI device is attached to a GenPD power domain,
* resume_early callback in PCI framework will not be
* pci_pm_resume_noirq callback in PCI framework will not be
* executed during static resume. That leads to the PCI
* configuration couldn't be properly restored.
*
@@ -4582,7 +4584,7 @@ nvidia_resume(
{
NV_STATUS status = NV_OK;
struct pci_dev *pci_dev;
#if defined(NV_PM_RUNTIME_AVAILABLE)
#if defined(NV_PM_RUNTIME_AVAILABLE) && defined(NV_PM_DOMAIN_AVAILABLE)
struct pci_bus *bus;
struct pci_host_bridge *bridge;
struct device *ctrl;
@@ -4601,9 +4603,9 @@ nvidia_resume(
}
nv = NV_STATE_PTR(nvl);
#if defined(NV_PM_RUNTIME_AVAILABLE)
#if defined(NV_PM_RUNTIME_AVAILABLE) && defined(NV_PM_DOMAIN_AVAILABLE)
/* Handle GenPD resume sequence for Tegra PCI iGPU */
if (dev_is_pci(dev) && nv->is_tegra_pci_igpu_rg_enabled == NV_TRUE)
if (dev_is_pci(dev) && nv->is_tegra_pci_igpu_rg_enabled)
{
// Get PCI controller device
bus = pci_dev->bus;
@@ -4622,6 +4624,8 @@ nvidia_resume(
"NVRM: restore GPU pm_domain after suspend\n");
dev->pm_domain = ctrl->pm_domain;
dev_pm_genpd_resume(dev);
pm_runtime_allow(dev);
}
#endif
@@ -4726,7 +4730,7 @@ nv_suspend_devices(
{
nv = NV_STATE_PTR(nvl);
dev = nvl->dev;
if (dev_is_pci(dev) && nv->is_tegra_pci_igpu_rg_enabled == NV_TRUE)
if (dev_is_pci(dev) && nv->is_tegra_pci_igpu_rg_enabled)
{
nv_printf(NV_DBG_INFO,
"NVRM: GPU suspend through procfs is forbidden with Tegra iGPU\n");
@@ -4979,32 +4983,35 @@ int nv_pmops_runtime_suspend(
struct device *dev
)
{
int err = 0;
#if defined(CONFIG_PM_DEVFREQ)
struct pci_dev *pci_dev = to_pci_dev(dev);
nv_linux_state_t *nvl = pci_get_drvdata(pci_dev);
#endif
int err = 0;
err = nvidia_transition_dynamic_power(dev, NV_TRUE);
if (err)
{
return err;
}
#if defined(CONFIG_PM_DEVFREQ)
if (nvl->devfreq_suspend != NULL)
{
err = nvl->devfreq_suspend(dev);
if (err)
{
goto nv_pmops_runtime_suspend_exit;
return err;
}
}
#endif
err = nvidia_transition_dynamic_power(dev, NV_TRUE);
if (err)
{
goto nv_pmops_runtime_suspend_exit;
}
return err;
nv_pmops_runtime_suspend_exit:
nvidia_transition_dynamic_power(dev, NV_FALSE);
#if defined(CONFIG_PM_DEVFREQ)
if (nvl->devfreq_resume != NULL)
{
nvl->devfreq_resume(dev);
}
#endif
return err;
}
@@ -5013,11 +5020,17 @@ int nv_pmops_runtime_resume(
struct device *dev
)
{
int err = 0;
#if defined(CONFIG_PM_DEVFREQ)
struct pci_dev *pci_dev = to_pci_dev(dev);
nv_linux_state_t *nvl = pci_get_drvdata(pci_dev);
#endif
int err;
err = nvidia_transition_dynamic_power(dev, NV_FALSE);
if (err)
{
return err;
}
#if defined(CONFIG_PM_DEVFREQ)
if (nvl->devfreq_resume != NULL)
@@ -5030,22 +5043,13 @@ int nv_pmops_runtime_resume(
}
#endif
err = nvidia_transition_dynamic_power(dev, NV_FALSE);
return err;
#if defined(CONFIG_PM_DEVFREQ)
if (err)
{
goto nv_pmops_runtime_resume_exit;
}
return err;
nv_pmops_runtime_resume_exit:
if (nvl->devfreq_suspend != NULL)
{
nvl->devfreq_suspend(dev);
}
#endif
nvidia_transition_dynamic_power(dev, NV_TRUE);
return err;
#endif
}
#endif /* defined(CONFIG_PM) */
@@ -5101,8 +5105,6 @@ NV_STATUS NV_API_CALL nv_set_primary_vga_status(
nv_state_t *nv
)
{
/* IORESOURCE_ROM_SHADOW wasn't added until 2.6.10 */
#if defined(IORESOURCE_ROM_SHADOW)
nv_linux_state_t *nvl;
struct pci_dev *pci_dev;
@@ -5112,9 +5114,6 @@ NV_STATUS NV_API_CALL nv_set_primary_vga_status(
nv->primary_vga = ((NV_PCI_RESOURCE_FLAGS(pci_dev, PCI_ROM_RESOURCE) &
IORESOURCE_ROM_SHADOW) == IORESOURCE_ROM_SHADOW);
return NV_OK;
#else
return NV_ERR_NOT_SUPPORTED;
#endif
}
NvBool NV_API_CALL nv_requires_dma_remap(
@@ -5856,7 +5855,7 @@ NvBool NV_API_CALL nv_pci_tegra_register_power_domain
NvBool attach
)
{
#if defined(NV_PM_RUNTIME_AVAILABLE)
#if defined(NV_PM_RUNTIME_AVAILABLE) && defined(NV_PM_DOMAIN_AVAILABLE)
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
struct pci_dev *pci_dev = nvl->pci_dev;
struct device_node *node = pci_dev->dev.of_node;
@@ -5891,7 +5890,7 @@ NvBool NV_API_CALL nv_pci_tegra_pm_init
nv_state_t *nv
)
{
#if defined(NV_PM_RUNTIME_AVAILABLE)
#if defined(NV_PM_RUNTIME_AVAILABLE) && defined(NV_PM_DOMAIN_AVAILABLE)
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
struct pci_dev *pci_dev = nvl->pci_dev;
struct pci_bus *bus = pci_dev->bus;
@@ -5918,8 +5917,9 @@ NvBool NV_API_CALL nv_pci_tegra_pm_init
// Use autosuspend for GPU with idleness threshold 500 ms
pm_runtime_set_autosuspend_delay(&pci_dev->dev, 500);
pm_runtime_use_autosuspend(&pci_dev->dev);
#endif
return NV_TRUE;
#endif
return NV_FALSE;
}
void NV_API_CALL nv_pci_tegra_pm_deinit
@@ -5927,7 +5927,7 @@ void NV_API_CALL nv_pci_tegra_pm_deinit
nv_state_t *nv
)
{
#if defined(NV_PM_RUNTIME_AVAILABLE)
#if defined(NV_PM_RUNTIME_AVAILABLE) && defined(NV_PM_DOMAIN_AVAILABLE)
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
struct pci_dev *pci_dev = nvl->pci_dev;
struct pci_bus *bus = pci_dev->bus;
@@ -6297,6 +6297,82 @@ void NV_API_CALL nv_get_screen_info(
}
}
void NV_API_CALL nv_set_gpu_pg_mask
(
nv_state_t *nv
)
{
/*
* This function is used to set the GPU PG mask for the Tegra PCI iGPU.
* After sending the PG mask to BPMP, GPU needs a FLR(function level reset) or
* a GPU reset to make PG mask effective.
*
* As Tegra iGPU rail-ungate itself is a GPU reset or GPU cold-boot, GPU PG mask could
* rely on it and it would be triggered when runtime PM is enabled.
*
* Make sure the GPU PG feature is allowable only when runtime PM is supported here.
*/
#if defined(NV_BPMP_MRQ_HAS_STRAP_SET)
#if defined(NV_PM_RUNTIME_AVAILABLE) && defined(NV_PM_DOMAIN_AVAILABLE)
struct mrq_strap_request request;
NvS32 ret, api_ret;
NV_STATUS status = NV_ERR_NOT_SUPPORTED;
/*
* Only certain Tegra which supports Rail-Gating could use this feature
* because making PG mask effective requires a GPU FLR or GPU cold-boot.
*/
if (!nv->is_tegra_pci_igpu_rg_enabled || (nv->flags & NV_FLAG_PERSISTENT_SW_STATE))
{
nv_printf(NV_DBG_INFO, "NVRM: gpu_pg_mask is not supported.\n");
return;
}
// overlay the gpu_pg_mask from module parameter
if (NVreg_TegraGpuPgMask != NV_TEGRA_PCI_IGPU_PG_MASK_DEFAULT) {
nv_printf(NV_DBG_INFO, "NVRM: overlay gpu_pg_mask with module parameter.\n");
nv->tegra_pci_igpu_pg_mask = NVreg_TegraGpuPgMask;
}
if (nv->tegra_pci_igpu_pg_mask == NV_TEGRA_PCI_IGPU_PG_MASK_DEFAULT) {
nv_printf(NV_DBG_INFO, "NVRM: Using default gpu_pg_mask. "\
"There's no need to send BPMP MRQ.\n");
return;
}
memset(&request, 0, sizeof(request));
request.cmd = STRAP_SET;
request.id = TEGRA264_STRAP_NV_FUSE_CTRL_OPT_GPU;
request.value = nv->tegra_pci_igpu_pg_mask;
status = nv_bpmp_send_mrq(nv,
MRQ_STRAP,
&request,
sizeof(request),
NULL,
0,
&ret,
&api_ret);
if (status != NV_OK)
{
nv_printf(NV_DBG_ERRORS, "NVRM: failed to call bpmp_send_mrq\n");
return;
}
if (api_ret)
{
nv_printf(NV_DBG_ERRORS, "NVRM: BPMP call for gpu_pg_mask %d failed, rv = %d\n",\
nv->tegra_pci_igpu_pg_mask, api_ret);
return;
}
nv_printf(NV_DBG_INFO, "NVRM: set gpu_pg_mask %d success\n", nv->tegra_pci_igpu_pg_mask);
#else
nv_printf(NV_DBG_INFO, "NVRM: gpu_pg_mask configuration is not supported\n");
#endif // defined(NV_PM_RUNTIME_AVAILABLE) && defined(NV_PM_DOMAIN_AVAILABLE)
#endif // defined(NV_BPMP_MRQ_HAS_STRAP_SET)
}
module_init(nvidia_init_module);
module_exit(nvidia_exit_module);

View File

@@ -63,6 +63,16 @@ NV_STATUS nvGpuOpsDeviceCreate(struct gpuSession *session,
NV_STATUS nvGpuOpsDeviceDestroy(struct gpuDevice *device);
NV_STATUS nvGpuOpsAccessBitsBufAlloc(struct gpuDevice *device,
gpuAccessBitsBufferAlloc *pAccessBitsInfo);
NV_STATUS nvGpuOpsAccessBitsBufFree(struct gpuDevice *device,
gpuAccessBitsBufferAlloc *pAccessBitsInfo);
NV_STATUS nvGpuOpsAccessBitsDump(struct gpuDevice *device,
gpuAccessBitsBufferAlloc *pAccessBitsInfo,
UVM_ACCESS_BITS_DUMP_MODE mode);
NV_STATUS nvGpuOpsAddressSpaceCreate(struct gpuDevice *device,
NvU64 vaBase,
NvU64 vaSize,

View File

@@ -38,18 +38,18 @@
#include "nv_gpu_ops.h"
#include "rm-gpu-ops.h"
// This is really a struct UvmOpsUvmEvents *. It needs to be an atomic because
// it can be read outside of the g_pNvUvmEventsLock. Use getUvmEvents and
// This is really a struct UvmEventsLinux *. It needs to be an atomic because it
// can be read outside of the g_pNvUvmEventsLock. Use getUvmEvents and
// setUvmEvents to access it.
static atomic_long_t g_pNvUvmEvents;
static struct semaphore g_pNvUvmEventsLock;
static struct UvmOpsUvmEvents *getUvmEvents(void)
static struct UvmEventsLinux *getUvmEvents(void)
{
return (struct UvmOpsUvmEvents *)atomic_long_read(&g_pNvUvmEvents);
return (struct UvmEventsLinux *)atomic_long_read(&g_pNvUvmEvents);
}
static void setUvmEvents(struct UvmOpsUvmEvents *newEvents)
static void setUvmEvents(struct UvmEventsLinux *newEvents)
{
atomic_long_set(&g_pNvUvmEvents, (long)newEvents);
}
@@ -1047,12 +1047,55 @@ NV_STATUS nvUvmInterfaceDisableAccessCntr(uvmGpuDeviceHandle device,
}
EXPORT_SYMBOL(nvUvmInterfaceDisableAccessCntr);
// this function is called by the UVM driver to register the ops
NV_STATUS nvUvmInterfaceRegisterUvmCallbacks(struct UvmOpsUvmEvents *importedUvmOps)
NV_STATUS nvUvmInterfaceAccessBitsBufAlloc(uvmGpuDeviceHandle device,
UvmGpuAccessBitsBufferAlloc* pAccessBitsInfo)
{
nvidia_stack_t *sp = nvUvmGetSafeStack();
NV_STATUS status;
status = rm_gpu_ops_access_bits_buffer_alloc(sp, (gpuDeviceHandle)device,
pAccessBitsInfo);
nvUvmFreeSafeStack(sp);
return status;
}
EXPORT_SYMBOL(nvUvmInterfaceAccessBitsBufAlloc);
NV_STATUS nvUvmInterfaceAccessBitsBufFree(uvmGpuDeviceHandle device,
UvmGpuAccessBitsBufferAlloc* pAccessBitsInfo)
{
nvidia_stack_t *sp = nvUvmGetSafeStack();
NV_STATUS status;
status = rm_gpu_ops_access_bits_buffer_free(sp, (gpuDeviceHandle)device,
pAccessBitsInfo);
nvUvmFreeSafeStack(sp);
return status;
}
EXPORT_SYMBOL(nvUvmInterfaceAccessBitsBufFree);
NV_STATUS nvUvmInterfaceAccessBitsDump(uvmGpuDeviceHandle device,
UvmGpuAccessBitsBufferAlloc* pAccessBitsInfo,
UVM_ACCESS_BITS_DUMP_MODE mode)
{
nvidia_stack_t *sp = nvUvmGetSafeStack();
NV_STATUS status = NV_OK;
status = rm_gpu_ops_access_bits_dump(sp, (gpuDeviceHandle)device,
pAccessBitsInfo, mode);
nvUvmFreeSafeStack(sp);
return status;
}
EXPORT_SYMBOL(nvUvmInterfaceAccessBitsDump);
// this function is called by the UVM driver to register the event callbacks
NV_STATUS nvUvmInterfaceRegisterUvmEvents(struct UvmEventsLinux *importedEvents)
{
NV_STATUS status = NV_OK;
if (!importedUvmOps)
if (!importedEvents)
{
return NV_ERR_INVALID_ARGUMENT;
}
@@ -1066,13 +1109,13 @@ NV_STATUS nvUvmInterfaceRegisterUvmCallbacks(struct UvmOpsUvmEvents *importedUvm
{
// Be careful: as soon as the pointer is assigned, top half ISRs can
// start reading it to make callbacks, even before we drop the lock.
setUvmEvents(importedUvmOps);
setUvmEvents(importedEvents);
}
up(&g_pNvUvmEventsLock);
return status;
}
EXPORT_SYMBOL(nvUvmInterfaceRegisterUvmCallbacks);
EXPORT_SYMBOL(nvUvmInterfaceRegisterUvmEvents);
static void flush_top_half(void *info)
{
@@ -1081,7 +1124,7 @@ static void flush_top_half(void *info)
return;
}
void nvUvmInterfaceDeRegisterUvmOps(void)
void nvUvmInterfaceDeRegisterUvmEvents(void)
{
// Taking the lock forces us to wait for non-interrupt callbacks to finish
// up.
@@ -1094,7 +1137,7 @@ void nvUvmInterfaceDeRegisterUvmOps(void)
// cores. We can wait for them to finish by waiting for a context switch to
// happen on every core.
//
// This is slow, but since nvUvmInterfaceDeRegisterUvmOps is very rare
// This is slow, but since nvUvmInterfaceDeRegisterUvmEvents is very rare
// (module unload) it beats having the top half synchronize with a spin lock
// every time.
//
@@ -1103,12 +1146,12 @@ void nvUvmInterfaceDeRegisterUvmOps(void)
// ones to finish.
on_each_cpu(flush_top_half, NULL, 1);
}
EXPORT_SYMBOL(nvUvmInterfaceDeRegisterUvmOps);
EXPORT_SYMBOL(nvUvmInterfaceDeRegisterUvmEvents);
NV_STATUS nv_uvm_suspend(void)
{
NV_STATUS status = NV_OK;
struct UvmOpsUvmEvents *events;
struct UvmEventsLinux *events;
// Synchronize callbacks with unregistration
down(&g_pNvUvmEventsLock);
@@ -1130,7 +1173,7 @@ NV_STATUS nv_uvm_suspend(void)
NV_STATUS nv_uvm_resume(void)
{
NV_STATUS status = NV_OK;
struct UvmOpsUvmEvents *events;
struct UvmEventsLinux *events;
// Synchronize callbacks with unregistration
down(&g_pNvUvmEventsLock);
@@ -1149,48 +1192,6 @@ NV_STATUS nv_uvm_resume(void)
return status;
}
void nv_uvm_notify_start_device(const NvU8 *pUuid)
{
NvProcessorUuid uvmUuid;
struct UvmOpsUvmEvents *events;
memcpy(uvmUuid.uuid, pUuid, NV_UUID_LEN);
// Synchronize callbacks with unregistration
down(&g_pNvUvmEventsLock);
// It's not strictly necessary to use a cached local copy of the events
// pointer here since it can't change under the lock, but we'll do it for
// consistency.
events = getUvmEvents();
if(events && events->startDevice)
{
events->startDevice(&uvmUuid);
}
up(&g_pNvUvmEventsLock);
}
void nv_uvm_notify_stop_device(const NvU8 *pUuid)
{
NvProcessorUuid uvmUuid;
struct UvmOpsUvmEvents *events;
memcpy(uvmUuid.uuid, pUuid, NV_UUID_LEN);
// Synchronize callbacks with unregistration
down(&g_pNvUvmEventsLock);
// It's not strictly necessary to use a cached local copy of the events
// pointer here since it can't change under the lock, but we'll do it for
// consistency.
events = getUvmEvents();
if(events && events->stopDevice)
{
events->stopDevice(&uvmUuid);
}
up(&g_pNvUvmEventsLock);
}
NV_STATUS nv_uvm_event_interrupt(const NvU8 *pUuid)
{
//
@@ -1200,7 +1201,7 @@ NV_STATUS nv_uvm_event_interrupt(const NvU8 *pUuid)
// absolutely necessary.
//
// Instead, we allow this function to be called concurrently with
// nvUvmInterfaceDeRegisterUvmOps. That function will clear the events
// nvUvmInterfaceDeRegisterUvmEvents. That function will clear the events
// pointer, then wait for all top halves to finish out. This means the
// pointer may change out from under us, but the callbacks are still safe to
// invoke while we're in this function.
@@ -1209,7 +1210,7 @@ NV_STATUS nv_uvm_event_interrupt(const NvU8 *pUuid)
// nor the compiler make assumptions about the pointer remaining valid while
// in this function.
//
struct UvmOpsUvmEvents *events = getUvmEvents();
struct UvmEventsLinux *events = getUvmEvents();
if (events && events->isrTopHalf)
return events->isrTopHalf((const NvProcessorUuid *)pUuid);
@@ -1243,7 +1244,7 @@ EXPORT_SYMBOL(nvUvmInterfaceGetNvlinkInfo);
NV_STATUS nv_uvm_drain_P2P(const NvU8 *uuid)
{
NvProcessorUuid uvmUuid;
struct UvmOpsUvmEvents *events;
struct UvmEventsLinux *events;
NV_STATUS ret = NV_ERR_NOT_SUPPORTED;
memcpy(uvmUuid.uuid, uuid, NV_UUID_LEN);
@@ -1267,7 +1268,7 @@ NV_STATUS nv_uvm_drain_P2P(const NvU8 *uuid)
NV_STATUS nv_uvm_resume_P2P(const NvU8 *uuid)
{
NvProcessorUuid uvmUuid;
struct UvmOpsUvmEvents *events;
struct UvmEventsLinux *events;
NV_STATUS ret = NV_ERR_NOT_SUPPORTED;
memcpy(uvmUuid.uuid, uuid, NV_UUID_LEN);

View File

@@ -110,15 +110,11 @@ $(obj)/$(NVIDIA_INTERFACE): $(addprefix $(obj)/,$(NVIDIA_OBJECTS))
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_OBJECTS)
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_array_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_array_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_cache
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_wc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_driver_hardened
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_driver_hardened_wc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_cache_shared
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
NV_CONFTEST_FUNCTION_COMPILE_TESTS += xen_ioemu_inject_msi
NV_CONFTEST_FUNCTION_COMPILE_TESTS += phys_to_dma
@@ -149,6 +145,10 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_buf_ops_has_map
NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_buf_ops_has_map_atomic
NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_buf_attachment_has_peer2peer
NV_CONFTEST_FUNCTION_COMPILE_TESTS += devm_clk_bulk_get_all
NV_CONFTEST_FUNCTION_COMPILE_TESTS += thermal_zone_for_each_trip
NV_CONFTEST_FUNCTION_COMPILE_TESTS += thermal_bind_cdev_to_trip
NV_CONFTEST_FUNCTION_COMPILE_TESTS += thermal_unbind_cdev_from_trip
NV_CONFTEST_FUNCTION_COMPILE_TESTS += update_devfreq
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_task_ioprio
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mdev_set_iommu_device
NV_CONFTEST_FUNCTION_COMPILE_TESTS += offline_and_remove_memory
@@ -158,6 +158,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += assign_str
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_drop
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
NV_CONFTEST_FUNCTION_COMPILE_TESTS += shrinker_alloc
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_sme_active
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_swiotlb_map_sg_attrs
@@ -188,12 +189,12 @@ NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_set_memory_encrypted
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_set_memory_decrypted
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl___platform_driver_register
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present___platform_driver_register
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_hrtimer_setup
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_mutex_destroy
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_hrtimer_setup
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_timer_delete_sync
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl___vma_start_write
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_iommu_dev_enable_feature
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_iommu_dev_disable_feature
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl___vma_start_write
NV_CONFTEST_TYPE_COMPILE_TESTS += vmf_insert_pfn_prot
NV_CONFTEST_TYPE_COMPILE_TESTS += sysfs_slab_unlink
@@ -213,8 +214,12 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += of_property_for_each_u32_has_internal_args
NV_CONFTEST_TYPE_COMPILE_TESTS += platform_driver_struct_remove_returns_void
NV_CONFTEST_TYPE_COMPILE_TESTS += class_create_has_no_owner_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += class_devnode_has_const_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += devfreq_dev_profile_has_is_cooling_device
NV_CONFTEST_TYPE_COMPILE_TESTS += devfreq_has_freq_table
NV_CONFTEST_TYPE_COMPILE_TESTS += devfreq_has_suspend_freq
NV_CONFTEST_TYPE_COMPILE_TESTS += has_enum_pidtype_tgid
NV_CONFTEST_TYPE_COMPILE_TESTS += bpmp_mrq_has_strap_set
NV_CONFTEST_TYPE_COMPILE_TESTS += register_shrinker_has_format_arg
NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present
NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build
@@ -225,9 +230,9 @@ NV_CONFTEST_GENERIC_COMPILE_TESTS += get_user_pages_remote
NV_CONFTEST_GENERIC_COMPILE_TESTS += pin_user_pages
NV_CONFTEST_GENERIC_COMPILE_TESTS += pin_user_pages_remote
NV_CONFTEST_GENERIC_COMPILE_TESTS += pm_runtime_available
NV_CONFTEST_GENERIC_COMPILE_TESTS += pm_domain_available
NV_CONFTEST_GENERIC_COMPILE_TESTS += vm_fault_t
NV_CONFTEST_GENERIC_COMPILE_TESTS += pci_class_multimedia_hd_audio
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_available
NV_CONFTEST_GENERIC_COMPILE_TESTS += vfio_pci_core_available
NV_CONFTEST_GENERIC_COMPILE_TESTS += cmd_uphy_display_port_init
NV_CONFTEST_GENERIC_COMPILE_TESTS += cmd_uphy_display_port_off

View File

@@ -523,7 +523,7 @@ NV_STATUS NV_API_CALL os_memcpy_from_user(
NvU32 n
)
{
return (NV_COPY_FROM_USER(to, from, n) ? NV_ERR_INVALID_ADDRESS : NV_OK);
return (copy_from_user(to, from, n) ? NV_ERR_INVALID_ADDRESS : NV_OK);
}
NV_STATUS NV_API_CALL os_memcpy_to_user(
@@ -532,7 +532,7 @@ NV_STATUS NV_API_CALL os_memcpy_to_user(
NvU32 n
)
{
return (NV_COPY_TO_USER(to, from, n) ? NV_ERR_INVALID_ADDRESS : NV_OK);
return (copy_to_user(to, from, n) ? NV_ERR_INVALID_ADDRESS : NV_OK);
}
void* NV_API_CALL os_mem_set(
@@ -753,7 +753,7 @@ NvU64 NV_API_CALL os_get_cpu_frequency(void)
NvU32 NV_API_CALL os_get_current_process(void)
{
return NV_GET_CURRENT_PROCESS();
return current->tgid;
}
void NV_API_CALL os_get_current_process_name(char *buf, NvU32 len)
@@ -1262,7 +1262,7 @@ NvU32 NV_API_CALL os_get_cpu_number(void)
NvU32 NV_API_CALL os_get_cpu_count(void)
{
return NV_NUM_CPUS();
return num_possible_cpus();
}
NvBool NV_API_CALL os_pat_supported(void)
@@ -1691,7 +1691,15 @@ NV_STATUS NV_API_CALL os_alloc_pages_node
* the requested order is too large (just fail
* instead).
*
* 5. (Optional) __GFP_RECLAIM: Used to allow/forbid reclaim.
* 5. __GFP_RETRY_MAYFAIL: Used to avoid the Linux kernel OOM killer.
* To help PMA on paths where UVM might be in
* memory over subscription. This gives UVM a
* chance to free memory before invoking any
* action from the OOM killer. Freeing
* non-essential memory will also benefit the
* system as a whole.
*
* 6. (Optional) __GFP_RECLAIM: Used to allow/forbid reclaim.
* This is part of GFP_USER and consequently
* GFP_HIGHUSER_MOVABLE.
*
@@ -1705,37 +1713,12 @@ NV_STATUS NV_API_CALL os_alloc_pages_node
*/
gfp_mask = __GFP_THISNODE | GFP_HIGHUSER_MOVABLE | __GFP_COMP |
__GFP_NOWARN;
#if defined(__GFP_RETRY_MAYFAIL)
__GFP_NOWARN | __GFP_RETRY_MAYFAIL;
/*
* __GFP_RETRY_MAYFAIL : Used to avoid the Linux kernel OOM killer.
* To help PMA on paths where UVM might be
* in memory over subscription. This gives UVM
* a chance to free memory before invoking any
* action from the OOM killer.
* Freeing non-essential memory will also benefit
* the system as a whole.
*/
gfp_mask |= __GFP_RETRY_MAYFAIL;
#elif defined(__GFP_NORETRY)
/*
* __GFP_NORETRY : Use __GFP_NORETRY on older kernels where
* __GFP_RETRY_MAYFAIL is not present.
*/
gfp_mask |= __GFP_NORETRY;
#endif
#if defined(__GFP_RECLAIM)
if (flag & NV_ALLOC_PAGES_NODE_SKIP_RECLAIM)
{
gfp_mask &= ~(__GFP_RECLAIM);
}
#endif // defined(__GFP_RECLAIM)
alloc_addr = alloc_pages_node(nid, gfp_mask, order);
if (alloc_addr == NULL)
@@ -2245,6 +2228,51 @@ NvS32 NV_API_CALL os_imex_channel_get
return nv_caps_imex_channel_get((int)descriptor);
}
NV_STATUS NV_API_CALL os_tegra_igpu_perf_boost
(
void *handle,
NvBool enable,
NvU32 duration
)
{
#if defined(CONFIG_PM_DEVFREQ) && defined(NV_UPDATE_DEVFREQ_PRESENT)
nv_state_t *nv = handle;
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
int err;
if (enable)
{
if (nvl->devfreq_enable_boost == NULL)
{
return NV_ERR_NOT_SUPPORTED;
}
err = nvl->devfreq_enable_boost(nvl->dev, duration);
if (err != 0)
{
return NV_ERR_OPERATING_SYSTEM;
}
}
else
{
if (nvl->devfreq_disable_boost == NULL)
{
return NV_ERR_NOT_SUPPORTED;
}
err = nvl->devfreq_disable_boost(nvl->dev);
if (err != 0)
{
return NV_ERR_OPERATING_SYSTEM;
}
}
return NV_OK;
#else // !defined(CONFIG_PM_DEVFREQ) || !defined(NV_UPDATE_DEVFREQ_PRESENT)
return NV_ERR_NOT_SUPPORTED;
#endif
}
/*
* Reads the total memory and free memory of a NUMA node from the kernel.
*/