This commit is contained in:
Andy Ritger
2022-09-20 13:54:59 -07:00
committed by Liam Middlebrook
parent 91e02299dc
commit 70259dbe7a
47 changed files with 805 additions and 350 deletions

View File

@@ -2,6 +2,13 @@
## Release 515 Entries
### [515.76] 2022-09-20
#### Fixed
- Improved compatibility with new Linux kernel releases
- Fixed possible excessive GPU power draw on an idle X11 or Wayland desktop when driving high resolutions or refresh rates
### [515.65.01] 2022-08-02
#### Fixed

View File

@@ -643,6 +643,8 @@ Subsystem Device ID.
| NVIDIA A100-PG509-200 | 20B0 10DE 1450 |
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1463 |
| NVIDIA A100-SXM4-80GB | 20B2 10DE 147F |
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1622 |
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1623 |
| NVIDIA PG506-242 | 20B3 10DE 14A7 |
| NVIDIA PG506-243 | 20B3 10DE 14A8 |
| NVIDIA A100 80GB PCIe | 20B5 10DE 1533 |
@@ -743,6 +745,7 @@ Subsystem Device ID.
| NVIDIA GeForce RTX 3050 | 2507 |
| NVIDIA GeForce RTX 3050 OEM | 2508 |
| NVIDIA GeForce RTX 3060 Laptop GPU | 2520 |
| NVIDIA GeForce RTX 3060 Laptop GPU | 2521 |
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 2523 |
| NVIDIA RTX A2000 | 2531 1028 151D |
| NVIDIA RTX A2000 | 2531 103C 151D |

View File

@@ -203,9 +203,108 @@ $(obj)/conftest/patches.h: $(NV_CONFTEST_SCRIPT)
@mkdir -p $(obj)/conftest
@$(NV_CONFTEST_CMD) patch_check > $@
$(obj)/conftest/headers.h: $(NV_CONFTEST_SCRIPT)
@mkdir -p $(obj)/conftest
@$(NV_CONFTEST_CMD) test_kernel_headers '$(NV_CONFTEST_CFLAGS)' > $@
# Each of these headers is checked for presence with a test #include; a
# corresponding #define will be generated in conftest/headers.h.
NV_HEADER_PRESENCE_TESTS = \
asm/system.h \
drm/drmP.h \
drm/drm_auth.h \
drm/drm_gem.h \
drm/drm_crtc.h \
drm/drm_atomic.h \
drm/drm_atomic_helper.h \
drm/drm_encoder.h \
drm/drm_atomic_uapi.h \
drm/drm_drv.h \
drm/drm_framebuffer.h \
drm/drm_connector.h \
drm/drm_probe_helper.h \
drm/drm_blend.h \
drm/drm_fourcc.h \
drm/drm_prime.h \
drm/drm_plane.h \
drm/drm_vblank.h \
drm/drm_file.h \
drm/drm_ioctl.h \
drm/drm_device.h \
drm/drm_mode_config.h \
dt-bindings/interconnect/tegra_icc_id.h \
generated/autoconf.h \
generated/compile.h \
generated/utsrelease.h \
linux/efi.h \
linux/kconfig.h \
linux/platform/tegra/mc_utils.h \
linux/semaphore.h \
linux/printk.h \
linux/ratelimit.h \
linux/prio_tree.h \
linux/log2.h \
linux/of.h \
linux/bug.h \
linux/sched/signal.h \
linux/sched/task.h \
linux/sched/task_stack.h \
xen/ioemu.h \
linux/fence.h \
linux/dma-resv.h \
soc/tegra/chip-id.h \
soc/tegra/fuse.h \
soc/tegra/tegra_bpmp.h \
video/nv_internal.h \
linux/platform/tegra/dce/dce-client-ipc.h \
linux/nvhost.h \
linux/nvhost_t194.h \
asm/book3s/64/hash-64k.h \
asm/set_memory.h \
asm/prom.h \
asm/powernv.h \
linux/atomic.h \
asm/barrier.h \
asm/opal-api.h \
sound/hdaudio.h \
asm/pgtable_types.h \
linux/stringhash.h \
linux/dma-map-ops.h \
rdma/peer_mem.h \
sound/hda_codec.h \
linux/dma-buf.h \
linux/time.h \
linux/platform_device.h \
linux/mutex.h \
linux/reset.h \
linux/of_platform.h \
linux/of_device.h \
linux/of_gpio.h \
linux/gpio.h \
linux/gpio/consumer.h \
linux/interconnect.h \
linux/pm_runtime.h \
linux/clk.h \
linux/clk-provider.h \
linux/ioasid.h \
linux/stdarg.h \
linux/iosys-map.h \
asm/coco.h
# Filename to store the define for the header in $(1); this is only consumed by
# the rule below that concatenates all of these together.
NV_HEADER_PRESENCE_PART = $(addprefix $(obj)/conftest/header_presence/,$(addsuffix .part,$(1)))
# Define a rule to check the header $(1).
define NV_HEADER_PRESENCE_CHECK
$$(call NV_HEADER_PRESENCE_PART,$(1)): $$(NV_CONFTEST_SCRIPT) $(obj)/conftest/uts_release
@mkdir -p $$(dir $$@)
@$$(NV_CONFTEST_CMD) test_kernel_header '$$(NV_CONFTEST_CFLAGS)' '$(1)' > $$@
endef
# Evaluate the rule above for each header in the list.
$(foreach header,$(NV_HEADER_PRESENCE_TESTS),$(eval $(call NV_HEADER_PRESENCE_CHECK,$(header))))
# Concatenate all of the parts into headers.h.
$(obj)/conftest/headers.h: $(call NV_HEADER_PRESENCE_PART,$(NV_HEADER_PRESENCE_TESTS))
@cat $^ > $@
clean-dirs := $(obj)/conftest

View File

@@ -227,6 +227,7 @@ static inline uid_t __kuid_val(uid_t uid)
#endif
#include <linux/fb.h> /* fb_info struct */
#include <linux/screen_info.h> /* screen_info */
#if !defined(CONFIG_PCI)
#warning "Attempting to build driver for a platform with no PCI support!"

View File

@@ -78,13 +78,8 @@ static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot)
#define NV_PGPROT_UNCACHED_DEVICE(old_prot) pgprot_noncached(old_prot)
#if defined(NVCPU_AARCH64)
#if defined(NV_MT_DEVICE_GRE_PRESENT)
#define NV_PROT_WRITE_COMBINED_DEVICE (PROT_DEFAULT | PTE_PXN | PTE_UXN | \
PTE_ATTRINDX(MT_DEVICE_GRE))
#else
#define NV_PROT_WRITE_COMBINED_DEVICE (PROT_DEFAULT | PTE_PXN | PTE_UXN | \
PTE_ATTRINDX(MT_DEVICE_nGnRE))
#endif
#define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot) \
__pgprot_modify(old_prot, PTE_ATTRINDX_MASK, NV_PROT_WRITE_COMBINED_DEVICE)
#define NV_PGPROT_WRITE_COMBINED(old_prot) NV_PGPROT_UNCACHED(old_prot)

View File

@@ -624,27 +624,45 @@ typedef enum
#define NV_GET_NV_STATE(pGpu) \
(nv_state_t *)((pGpu) ? (pGpu)->pOsGpuInfo : NULL)
#define IS_REG_OFFSET(nv, offset, length) \
(((offset) >= (nv)->regs->cpu_address) && \
(((offset) + ((length)-1)) <= \
(nv)->regs->cpu_address + ((nv)->regs->size-1)))
static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((offset >= nv->regs->cpu_address) &&
#define IS_FB_OFFSET(nv, offset, length) \
(((nv)->fb) && ((offset) >= (nv)->fb->cpu_address) && \
(((offset) + ((length)-1)) <= (nv)->fb->cpu_address + ((nv)->fb->size-1)))
#define IS_UD_OFFSET(nv, offset, length) \
(((nv)->ud.cpu_address != 0) && ((nv)->ud.size != 0) && \
((offset) >= (nv)->ud.cpu_address) && \
(((offset) + ((length)-1)) <= (nv)->ud.cpu_address + ((nv)->ud.size-1)))
#define IS_IMEM_OFFSET(nv, offset, length) \
(((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) && \
((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) && \
((offset) >= (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) && \
(((offset) + ((length) - 1)) <= \
(nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + \
((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size - 1)))
((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1))));
}
static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((nv->fb) && (offset >= nv->fb->cpu_address) &&
((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
}
static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) &&
(offset >= nv->ud.cpu_address) &&
((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1))));
}
static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&
(offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&
((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))));
}
#define NV_RM_MAX_MSIX_LINES 8

View File

@@ -55,9 +55,13 @@ append_conftest() {
done
}
translate_and_preprocess_header_files() {
# Inputs:
# $1: list of relative file paths
test_header_presence() {
#
# Determine if the given header file (which may or may not be
# present) is provided by the target kernel.
#
# Input:
# $1: relative file path
#
# This routine creates an upper case, underscore version of each of the
# relative file paths, and uses that as the token to either define or
@@ -73,7 +77,7 @@ translate_and_preprocess_header_files() {
# strings, without special handling of the beginning or the end of the line.
TEST_CFLAGS=`echo "-E -M $CFLAGS " | sed -e 's/\( -M[DG]\)* / /g'`
for file in "$@"; do
file="$1"
file_define=NV_`echo $file | tr '/.' '_' | tr '-' '_' | tr 'a-z' 'A-Z'`_PRESENT
CODE="#include <$file>"
@@ -92,96 +96,6 @@ translate_and_preprocess_header_files() {
echo "#define $file_define"
fi
fi
done
}
test_headers() {
#
# Determine which header files (of a set that may or may not be
# present) are provided by the target kernel.
#
FILES="asm/system.h"
FILES="$FILES drm/drmP.h"
FILES="$FILES drm/drm_auth.h"
FILES="$FILES drm/drm_gem.h"
FILES="$FILES drm/drm_crtc.h"
FILES="$FILES drm/drm_atomic.h"
FILES="$FILES drm/drm_atomic_helper.h"
FILES="$FILES drm/drm_encoder.h"
FILES="$FILES drm/drm_atomic_uapi.h"
FILES="$FILES drm/drm_drv.h"
FILES="$FILES drm/drm_framebuffer.h"
FILES="$FILES drm/drm_connector.h"
FILES="$FILES drm/drm_probe_helper.h"
FILES="$FILES drm/drm_blend.h"
FILES="$FILES drm/drm_fourcc.h"
FILES="$FILES drm/drm_prime.h"
FILES="$FILES drm/drm_plane.h"
FILES="$FILES drm/drm_vblank.h"
FILES="$FILES drm/drm_file.h"
FILES="$FILES drm/drm_ioctl.h"
FILES="$FILES drm/drm_device.h"
FILES="$FILES drm/drm_mode_config.h"
FILES="$FILES dt-bindings/interconnect/tegra_icc_id.h"
FILES="$FILES generated/autoconf.h"
FILES="$FILES generated/compile.h"
FILES="$FILES generated/utsrelease.h"
FILES="$FILES linux/efi.h"
FILES="$FILES linux/kconfig.h"
FILES="$FILES linux/platform/tegra/mc_utils.h"
FILES="$FILES linux/semaphore.h"
FILES="$FILES linux/printk.h"
FILES="$FILES linux/ratelimit.h"
FILES="$FILES linux/prio_tree.h"
FILES="$FILES linux/log2.h"
FILES="$FILES linux/of.h"
FILES="$FILES linux/bug.h"
FILES="$FILES linux/sched/signal.h"
FILES="$FILES linux/sched/task.h"
FILES="$FILES linux/sched/task_stack.h"
FILES="$FILES xen/ioemu.h"
FILES="$FILES linux/fence.h"
FILES="$FILES linux/dma-resv.h"
FILES="$FILES soc/tegra/chip-id.h"
FILES="$FILES soc/tegra/fuse.h"
FILES="$FILES soc/tegra/tegra_bpmp.h"
FILES="$FILES video/nv_internal.h"
FILES="$FILES linux/platform/tegra/dce/dce-client-ipc.h"
FILES="$FILES linux/nvhost.h"
FILES="$FILES linux/nvhost_t194.h"
FILES="$FILES asm/book3s/64/hash-64k.h"
FILES="$FILES asm/set_memory.h"
FILES="$FILES asm/prom.h"
FILES="$FILES asm/powernv.h"
FILES="$FILES linux/atomic.h"
FILES="$FILES asm/barrier.h"
FILES="$FILES asm/opal-api.h"
FILES="$FILES sound/hdaudio.h"
FILES="$FILES asm/pgtable_types.h"
FILES="$FILES linux/stringhash.h"
FILES="$FILES linux/dma-map-ops.h"
FILES="$FILES rdma/peer_mem.h"
FILES="$FILES sound/hda_codec.h"
FILES="$FILES linux/dma-buf.h"
FILES="$FILES linux/time.h"
FILES="$FILES linux/platform_device.h"
FILES="$FILES linux/mutex.h"
FILES="$FILES linux/reset.h"
FILES="$FILES linux/of_platform.h"
FILES="$FILES linux/of_device.h"
FILES="$FILES linux/of_gpio.h"
FILES="$FILES linux/gpio.h"
FILES="$FILES linux/gpio/consumer.h"
FILES="$FILES linux/interconnect.h"
FILES="$FILES linux/pm_runtime.h"
FILES="$FILES linux/clk.h"
FILES="$FILES linux/clk-provider.h"
FILES="$FILES linux/ioasid.h"
FILES="$FILES linux/stdarg.h"
FILES="$FILES linux/iosys-map.h"
FILES="$FILES asm/coco.h"
translate_and_preprocess_header_files $FILES
}
build_cflags() {
@@ -2420,23 +2334,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PCI_DEV_HAS_ATS_ENABLED" "" "types"
;;
mt_device_gre)
#
# Determine if MT_DEVICE_GRE flag is present.
#
# MT_DEVICE_GRE flag is removed by commit 58cc6b72a21274
# ("arm64: mm: Remove unused support for Device-GRE memory type") in v5.14-rc1
# (2021-06-01).
#
CODE="
#include <asm/memory.h>
unsigned int conftest_mt_device_gre(void) {
return MT_DEVICE_GRE;
}"
compile_check_conftest "$CODE" "NV_MT_DEVICE_GRE_PRESENT" "" "types"
;;
get_user_pages)
#
# Conftest for get_user_pages()
@@ -5366,6 +5263,23 @@ compile_test() {
compile_check_conftest "$CODE" "NV_GET_TASK_IOPRIO_PRESENT" "" "functions"
;;
num_registered_fb)
#
# Determine if 'num_registered_fb' variable is present.
#
# 'num_registered_fb' was removed by commit 5727dcfd8486
# ("fbdev: Make registered_fb[] private to fbmem.c) for
# v5.20 linux-next (2022-07-27).
#
CODE="
#include <linux/fb.h>
int conftest_num_registered_fb(void) {
return num_registered_fb;
}"
compile_check_conftest "$CODE" "NV_NUM_REGISTERED_FB_PRESENT" "" "types"
;;
# When adding a new conftest entry, please use the correct format for
# specifying the relevant upstream Linux kernel commit.
#
@@ -5764,14 +5678,14 @@ case "$5" in
;;
test_kernel_headers)
test_kernel_header)
#
# Check for the availability of certain kernel headers
# Check for the availability of the given kernel header
#
CFLAGS=$6
test_headers
test_header_presence "${7}"
for file in conftest*.d; do
rm -f $file > /dev/null 2>&1

View File

@@ -41,6 +41,19 @@
#include <drm/drm_atomic_uapi.h>
#endif
/*
* The inclusion of drm_framebuffer.h was removed from drm_crtc.h by commit
* 720cf96d8fecde29b72e1101f8a567a0ce99594f ("drm: Drop drm_framebuffer.h from
* drm_crtc.h") in linux-next, expected in v5.19-rc7.
*
* We only need drm_framebuffer.h for drm_framebuffer_put(), and it is always
* present (v4.9+) when drm_framebuffer_{put,get}() is present (v4.12+), so it
* is safe to unconditionally include it when drm_framebuffer_get() is present.
*/
#if defined(NV_DRM_FRAMEBUFFER_GET_PRESENT)
#include <drm/drm_framebuffer.h>
#endif
static void __nv_drm_framebuffer_put(struct drm_framebuffer *fb)
{
#if defined(NV_DRM_FRAMEBUFFER_GET_PRESENT)

View File

@@ -59,6 +59,9 @@
#define NVKMS_LOG_PREFIX "nvidia-modeset: "
static bool output_rounding_fix = false;
module_param_named(output_rounding_fix, output_rounding_fix, bool, 0400);
/* These parameters are used for fault injection tests. Normally the defaults
* should be used. */
MODULE_PARM_DESC(fail_malloc, "Fail the Nth call to nvkms_alloc");
@@ -71,6 +74,10 @@ module_param_named(malloc_verbose, malloc_verbose, bool, 0400);
static atomic_t nvkms_alloc_called_count;
NvBool nvkms_output_rounding_fix(void)
{
return output_rounding_fix;
}
#define NVKMS_SYNCPT_STUBS_NEEDED

View File

@@ -110,6 +110,7 @@ typedef struct {
} set_maxval;
} NvKmsSyncPtOpParams;
NvBool nvkms_output_rounding_fix(void);
void nvkms_call_rm (void *ops);
void* nvkms_alloc (size_t size,

View File

@@ -35,10 +35,6 @@
#include "nv_uvm_interface.h"
#include "clb06f.h"
#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT 1024
#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MIN 32
#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX (1024 * 1024)
static unsigned uvm_channel_num_gpfifo_entries = UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT;
#define UVM_CHANNEL_GPFIFO_LOC_DEFAULT "auto"
@@ -86,6 +82,12 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
uvm_spin_lock(&channel->pool->lock);
// Completed value should never exceed the queued value
UVM_ASSERT_MSG_RELEASE(completed_value <= channel->tracking_sem.queued_value,
"GPU %s channel %s unexpected completed_value 0x%llx > queued_value 0x%llx\n",
channel->pool->manager->gpu->parent->name, channel->name, completed_value,
channel->tracking_sem.queued_value);
cpu_put = channel->cpu_put;
gpu_get = channel->gpu_get;
@@ -395,6 +397,14 @@ static void uvm_channel_semaphore_release(uvm_push_t *push, NvU64 semaphore_va,
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
// We used to skip the membar or use membar GPU for the semaphore release
// for a few pushes, but that doesn't provide sufficient ordering guarantees
// in some cases (e.g. ga100 with an LCE with PCEs from both HSHUBs) for the
// semaphore writes. To be safe, just always uses a membar sys for now.
// TODO bug 3770539: Optimize membars used by end of push semaphore releases
(void)uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
(void)uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
if (uvm_channel_is_ce(push->channel))
gpu->parent->ce_hal->semaphore_release(push, semaphore_va, new_payload);
@@ -1562,6 +1572,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
UVM_SEQ_OR_DBG_PRINT(s, "get %u\n", channel->gpu_get);
UVM_SEQ_OR_DBG_PRINT(s, "put %u\n", channel->cpu_put);
UVM_SEQ_OR_DBG_PRINT(s, "Semaphore GPU VA 0x%llx\n", uvm_channel_tracking_semaphore_get_gpu_va(channel));
UVM_SEQ_OR_DBG_PRINT(s, "Semaphore CPU VA 0x%llx\n", (NvU64)(uintptr_t)channel->tracking_sem.semaphore.payload);
uvm_spin_unlock(&channel->pool->lock);
}

View File

@@ -46,6 +46,21 @@
// wait for a GPFIFO entry to free up.
//
#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT 1024
#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MIN 32
#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX (1024 * 1024)
// Semaphore payloads cannot advance too much between calls to
// uvm_gpu_tracking_semaphore_update_completed_value(). In practice the jumps
// are bound by gpfifo sizing as we have to update the completed value to
// reclaim gpfifo entries. Set a limit based on the max gpfifo entries we could
// ever see.
//
// Logically this define belongs to uvm_gpu_semaphore.h but it depends on the
// channel GPFIFO sizing defined here so it's easiest to just have it here as
// uvm_channel.h includes uvm_gpu_semaphore.h.
#define UVM_GPU_SEMAPHORE_MAX_JUMP (2 * UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX)
// Channel types
typedef enum
{

View File

@@ -151,6 +151,37 @@ done:
return status;
}
static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space)
{
NV_STATUS status;
uvm_gpu_t *gpu;
for_each_va_space_gpu(gpu, va_space) {
uvm_channel_t *channel;
NvU64 completed_value;
// The GPU channel manager is destroyed and then re-created after
// the test, so this test requires exclusive access to the GPU.
TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
channel = &gpu->channel_manager->channel_pools[0].channels[0];
completed_value = uvm_channel_update_completed_value(channel);
uvm_gpu_semaphore_set_payload(&channel->tracking_sem.semaphore, (NvU32)completed_value + 1);
TEST_CHECK_RET(uvm_global_get_status() == NV_OK);
uvm_channel_update_progress_all(channel);
TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_INVALID_STATE);
uvm_channel_manager_destroy(gpu->channel_manager);
// Destruction will hit the error again, so clear one more time.
uvm_global_reset_fatal_error();
TEST_NV_CHECK_RET(uvm_channel_manager_create(gpu, &gpu->channel_manager));
}
return NV_OK;
}
static NV_STATUS uvm_test_rc_for_gpu(uvm_gpu_t *gpu)
{
uvm_push_t push;
@@ -712,6 +743,14 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
g_uvm_global.disable_fatal_error_assert = true;
uvm_release_asserts_set_global_error_for_tests = true;
status = test_unexpected_completed_values(va_space);
uvm_release_asserts_set_global_error_for_tests = false;
g_uvm_global.disable_fatal_error_assert = false;
if (status != NV_OK)
goto done;
if (g_uvm_global.num_simulated_devices == 0) {
status = test_rc(va_space);
if (status != NV_OK)

View File

@@ -48,6 +48,33 @@ module_param(uvm_enable_builtin_tests, int, S_IRUGO);
MODULE_PARM_DESC(uvm_enable_builtin_tests,
"Enable the UVM built-in tests. (This is a security risk)");
// Default to release asserts being enabled.
int uvm_release_asserts __read_mostly = 1;
// Make the module param writable so that release asserts can be enabled or
// disabled at any time by modifying the module parameter.
module_param(uvm_release_asserts, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(uvm_release_asserts, "Enable uvm asserts included in release builds.");
// Default to failed release asserts not dumping stack.
int uvm_release_asserts_dump_stack __read_mostly = 0;
// Make the module param writable so that dumping the stack can be enabled and
// disabled at any time by modifying the module parameter.
module_param(uvm_release_asserts_dump_stack, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(uvm_release_asserts_dump_stack, "dump_stack() on failed UVM release asserts.");
// Default to failed release asserts not setting the global UVM error.
int uvm_release_asserts_set_global_error __read_mostly = 0;
// Make the module param writable so that setting the global fatal error can be
// enabled and disabled at any time by modifying the module parameter.
module_param(uvm_release_asserts_set_global_error, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(uvm_release_asserts_set_global_error, "Set UVM global fatal error on failed release asserts.");
// A separate flag to enable setting global error, to be used by tests only.
bool uvm_release_asserts_set_global_error_for_tests __read_mostly = false;
//
// Convert kernel errno codes to corresponding NV_STATUS
//

View File

@@ -80,6 +80,9 @@ bool uvm_debug_prints_enabled(void);
#define UVM_ASSERT_PRINT(fmt, ...) \
UVM_PRINT_FUNC_PREFIX(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_ASSERT_PRINT_RL(fmt, ...) \
UVM_PRINT_FUNC_PREFIX(printk_ratelimited, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_ERR_PRINT(fmt, ...) \
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
@@ -146,9 +149,7 @@ void on_uvm_test_fail(void);
// Unlike on_uvm_test_fail it provides 'panic' coverity semantics
void on_uvm_assert(void);
// UVM_ASSERT_RELEASE and UVM_ASSERT_MSG_RELEASE are always enabled, even on
// release builds.
#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...) \
#define _UVM_ASSERT_MSG(expr, cond, fmt, ...) \
do { \
if (unlikely(!(expr))) { \
UVM_ASSERT_PRINT("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
@@ -157,9 +158,6 @@ void on_uvm_assert(void);
} \
} while (0)
#define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...) _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__)
#define UVM_ASSERT_RELEASE(expr) _UVM_ASSERT_MSG_RELEASE(expr, #expr, "\n")
// Prevent function calls in expr and the print argument list from being
// evaluated.
#define UVM_ASSERT_MSG_IGNORE(expr, fmt, ...) \
@@ -170,13 +168,42 @@ void on_uvm_assert(void);
// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity builds
#if UVM_IS_DEBUG() || defined __COVERITY__
#define UVM_ASSERT_MSG UVM_ASSERT_MSG_RELEASE
#define UVM_ASSERT UVM_ASSERT_RELEASE
#define UVM_ASSERT_MSG(expr, fmt, ...) _UVM_ASSERT_MSG(expr, #expr, ": " fmt, ##__VA_ARGS__)
#define UVM_ASSERT(expr) _UVM_ASSERT_MSG(expr, #expr, "\n")
#else
#define UVM_ASSERT_MSG(expr, fmt, ...) UVM_ASSERT_MSG_IGNORE(expr, fmt, ##__VA_ARGS__)
#define UVM_ASSERT(expr) UVM_ASSERT_MSG_IGNORE(expr, "\n")
#endif
// UVM_ASSERT_RELEASE and UVM_ASSERT_MSG_RELEASE are always included in the
// build, even on release builds. They are skipped at runtime if
// uvm_release_asserts is 0.
// Whether release asserts are enabled and whether they should dump the stack
// and set the global error.
extern int uvm_release_asserts;
extern int uvm_release_asserts_dump_stack;
extern int uvm_release_asserts_set_global_error;
extern bool uvm_release_asserts_set_global_error_for_tests;
// Given these are enabled for release builds, we need to be more cautious than
// in UVM_ASSERT(). Use a ratelimited print and only dump the stack if a module
// param is enabled.
#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...) \
do { \
if (uvm_release_asserts && unlikely(!(expr))) { \
UVM_ASSERT_PRINT_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \
uvm_global_set_fatal_error(NV_ERR_INVALID_STATE); \
if (uvm_release_asserts_dump_stack) \
dump_stack(); \
on_uvm_assert(); \
} \
} while (0)
#define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...) _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__)
#define UVM_ASSERT_RELEASE(expr) _UVM_ASSERT_MSG_RELEASE(expr, #expr, "\n")
// Provide a short form of UUID's, typically for use in debug printing:
#define ABBREV_UUID(uuid) (unsigned)(uuid)

View File

@@ -25,6 +25,7 @@
#include "uvm_lock.h"
#include "uvm_global.h"
#include "uvm_kvmalloc.h"
#include "uvm_channel.h" // For UVM_GPU_SEMAPHORE_MAX_JUMP
#define UVM_SEMAPHORE_SIZE 4
#define UVM_SEMAPHORE_PAGE_SIZE PAGE_SIZE
@@ -467,9 +468,16 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
// push, it's easily guaranteed because of the small number of GPFIFO
// entries available per channel (there could be at most as many pending
// pushes as GPFIFO entries).
if (new_sem_value < old_sem_value)
if (unlikely(new_sem_value < old_sem_value))
new_value += 1ULL << 32;
// Check for unexpected large jumps of the semaphore value
UVM_ASSERT_MSG_RELEASE(new_value - old_value <= UVM_GPU_SEMAPHORE_MAX_JUMP,
"GPU %s unexpected semaphore (CPU VA 0x%llx) jump from 0x%llx to 0x%llx\n",
tracking_semaphore->semaphore.page->pool->gpu->parent->name,
(NvU64)(uintptr_t)tracking_semaphore->semaphore.payload,
old_value, new_value);
// Use an atomic write even though the spinlock is held so that the value can
// be (carefully) read atomically outside of the lock.
//

View File

@@ -27,6 +27,18 @@
#include "uvm_va_space.h"
#include "uvm_kvmalloc.h"
static NV_STATUS set_and_test(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 new_value)
{
uvm_gpu_semaphore_set_payload(&tracking_sem->semaphore, (NvU32)new_value);
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem) == new_value);
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value));
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value - 1));
TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value + 1));
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_completed(tracking_sem));
return NV_OK;
}
static NV_STATUS add_and_test(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU32 increment_by)
{
NvU64 new_value;
@@ -43,13 +55,45 @@ static NV_STATUS add_and_test(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU32
TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value));
TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_completed(tracking_sem));
uvm_gpu_semaphore_set_payload(&tracking_sem->semaphore, (NvU32)new_value);
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem) == new_value);
TEST_NV_CHECK_RET(set_and_test(tracking_sem, new_value));
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, completed));
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value));
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value - 1));
TEST_CHECK_RET(!uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, new_value + 1));
TEST_CHECK_RET(uvm_gpu_tracking_semaphore_is_completed(tracking_sem));
return NV_OK;
}
// Set the current state of the sema, avoiding UVM_GPU_SEMAPHORE_MAX_JUMP
// detection.
static void manual_set(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 value)
{
uvm_gpu_semaphore_set_payload(&tracking_sem->semaphore, (NvU32)value);
atomic64_set(&tracking_sem->completed_value, value);
tracking_sem->queued_value = value;
}
// Set the starting value and payload and expect a global error
static NV_STATUS set_and_expect_error(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 starting_value, NvU32 payload)
{
manual_set(tracking_sem, starting_value);
uvm_gpu_semaphore_set_payload(&tracking_sem->semaphore, payload);
TEST_CHECK_RET(uvm_global_get_status() == NV_OK);
uvm_gpu_tracking_semaphore_update_completed_value(tracking_sem);
TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_INVALID_STATE);
return NV_OK;
}
static NV_STATUS test_invalid_jumps(uvm_gpu_tracking_semaphore_t *tracking_sem)
{
int i;
for (i = 0; i < 10; ++i) {
NvU64 base = (1ULL<<32) * i;
TEST_NV_CHECK_RET(set_and_expect_error(tracking_sem, base, UVM_GPU_SEMAPHORE_MAX_JUMP + 1));
TEST_NV_CHECK_RET(set_and_expect_error(tracking_sem, base, UINT_MAX));
TEST_NV_CHECK_RET(set_and_expect_error(tracking_sem, base + i + 1, i));
TEST_NV_CHECK_RET(set_and_expect_error(tracking_sem, base + UINT_MAX / 2, UINT_MAX / 2 + UVM_GPU_SEMAPHORE_MAX_JUMP + 1));
TEST_NV_CHECK_RET(set_and_expect_error(tracking_sem, base + UINT_MAX / 2, UINT_MAX / 2 - i - 1));
}
return NV_OK;
}
@@ -73,11 +117,31 @@ static NV_STATUS test_tracking(uvm_va_space_t *va_space)
goto done;
for (i = 0; i < 100; ++i) {
status = add_and_test(&tracking_sem, UINT_MAX - 1);
status = add_and_test(&tracking_sem, UVM_GPU_SEMAPHORE_MAX_JUMP - i);
if (status != NV_OK)
goto done;
}
// Test wrap-around cases
for (i = 0; i < 100; ++i) {
// Start with a value right before wrap-around
NvU64 starting_value = (1ULL<<32) * (i + 1) - i - 1;
manual_set(&tracking_sem, starting_value);
// And set payload to after wrap-around
status = set_and_test(&tracking_sem, (1ULL<<32) * (i + 1) + i);
if (status != NV_OK)
goto done;
}
g_uvm_global.disable_fatal_error_assert = true;
uvm_release_asserts_set_global_error_for_tests = true;
status = test_invalid_jumps(&tracking_sem);
uvm_release_asserts_set_global_error_for_tests = false;
g_uvm_global.disable_fatal_error_assert = false;
if (status != NV_OK)
goto done;
done:
uvm_gpu_tracking_semaphore_free(&tracking_sem);
return status;

View File

@@ -52,11 +52,21 @@ typedef enum
// By default all operations include a membar sys after any transfer and
// before a semaphore operation.
// This flag indicates that next operation should use no membar at all.
//
// For end of push semaphore release, this flag indicates that the push
// itself does not need a membar to be used (membar sys is the default). A
// membar may still be used, if needed to order the semaphore release
// write. See comments in uvm_channel_end_push().
UVM_PUSH_FLAG_NEXT_MEMBAR_NONE,
// By default all operations include a membar sys after any transfer and
// before a semaphore operation.
// This flag indicates that next operation should use a membar gpu instead.
//
// For end of push semaphore release, this flag indicates that the push
// itself only needs a membar gpu (the default is membar sys). A membar sys
// may still be used, if needed to order the semaphore release write. See
// comments in uvm_channel_end_push().
UVM_PUSH_FLAG_NEXT_MEMBAR_GPU,
UVM_PUSH_FLAG_COUNT,

View File

@@ -820,8 +820,13 @@ nv_dma_buf_reuse(
goto cleanup_dmabuf;
}
if (params->index > (priv->total_objects - params->numObjects))
{
status = NV_ERR_INVALID_ARGUMENT;
goto unlock_priv;
}

View File

@@ -132,6 +132,13 @@ nvidia_vma_access(
pageIndex = ((addr - vma->vm_start) >> PAGE_SHIFT);
pageOffset = (addr & ~PAGE_MASK);
if (!mmap_context->valid)
{
nv_printf(NV_DBG_ERRORS, "NVRM: VM: invalid mmap context\n");
@@ -430,7 +437,7 @@ static int nvidia_mmap_numa(
const nv_alloc_mapping_context_t *mmap_context)
{
NvU64 start, addr;
unsigned int pages;
NvU64 pages;
NvU64 i;
pages = NV_VMA_SIZE(vma) >> PAGE_SHIFT;
@@ -509,6 +516,13 @@ int nvidia_mmap_helper(
NvU64 access_start = mmap_context->access_start;
NvU64 access_len = mmap_context->access_size;
if (IS_REG_OFFSET(nv, access_start, access_len))
{
if (nv_encode_caching(&vma->vm_page_prot, NV_MEMORY_UNCACHED,

View File

@@ -1467,6 +1467,11 @@ static int nv_open_device(nv_state_t *nv, nvidia_stack_t *sp)
return -ENODEV;
}
if ( ! (nv->flags & NV_FLAG_OPEN))
{
/* Sanity check: !NV_FLAG_OPEN requires usage_count == 0 */

View File

@@ -219,6 +219,7 @@ NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_dram_clk_to_mc_clk
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_get_dram_num_channels
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tegra_dram_types
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_pxm_to_node
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_screen_info
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
NV_CONFTEST_TYPE_COMPILE_TESTS += kuid_t
@@ -242,9 +243,9 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += vmalloc_has_pgprot_t_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
NV_CONFTEST_TYPE_COMPILE_TESTS += pci_channel_state
NV_CONFTEST_TYPE_COMPILE_TESTS += pci_dev_has_ats_enabled
NV_CONFTEST_TYPE_COMPILE_TESTS += mt_device_gre
NV_CONFTEST_TYPE_COMPILE_TESTS += remove_memory_has_nid_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += add_memory_driver_managed_has_mhp_flags_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += num_registered_fb
NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present
NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2015-2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2015-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -207,7 +207,10 @@ static int nvlink_fops_release(struct inode *inode, struct file *filp)
nvlink_print(NVLINK_DBG_INFO, "nvlink driver close\n");
WARN_ON(private == NULL);
mutex_lock(&nvlink_drvctx.lock);

View File

@@ -1120,11 +1120,14 @@ void NV_API_CALL os_get_screen_info(
NvU64 consoleBar2Address
)
{
#if defined(CONFIG_FB)
int i;
*pPhysicalAddress = 0;
*pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = 0;
#if defined(CONFIG_FB) && defined(NV_NUM_REGISTERED_FB_PRESENT)
if (num_registered_fb > 0)
{
int i;
for (i = 0; i < num_registered_fb; i++)
{
if (!registered_fb[i])
@@ -1142,9 +1145,33 @@ void NV_API_CALL os_get_screen_info(
break;
}
}
#else
*pPhysicalAddress = 0;
*pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = 0;
}
#elif NV_IS_EXPORT_SYMBOL_PRESENT_screen_info
/*
* If there is not a framebuffer console, return 0 size.
*
* orig_video_isVGA is set to 1 during early Linux kernel
* initialization, and then will be set to a value, such as
* VIDEO_TYPE_VLFB or VIDEO_TYPE_EFI if an fbdev console is used.
*/
if (screen_info.orig_video_isVGA > 1)
{
NvU64 physAddr = screen_info.lfb_base;
#if defined(VIDEO_CAPABILITY_64BIT_BASE)
physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
#endif
/* Make sure base address is mapped to GPU BAR */
if ((physAddr == consoleBar1Address) ||
(physAddr == consoleBar2Address))
{
*pPhysicalAddress = physAddr;
*pFbWidth = screen_info.lfb_width;
*pFbHeight = screen_info.lfb_height;
*pFbDepth = screen_info.lfb_depth;
*pFbPitch = screen_info.lfb_linelength;
}
}
#endif
}

View File

@@ -195,6 +195,11 @@ extern NVLOG_LOGGER NvLogLogger;
#define NVLOG_BUFFER_FLAGS_FORMAT_LIBOS_LOG 1
#define NVLOG_BUFFER_FLAGS_FORMAT_MEMTRACK 2
// Never deallocate this buffer until RM is unloaded
#define NVLOG_BUFFER_FLAGS_PRESERVE 11:11
#define NVLOG_BUFFER_FLAGS_PRESERVE_NO 0
#define NVLOG_BUFFER_FLAGS_PRESERVE_YES 1
// Buffer GPU index
#define NVLOG_BUFFER_FLAGS_GPU_INSTANCE 31:24

View File

@@ -4091,6 +4091,8 @@ typedef struct tagNVT_GAMUT_METADATA
#define NVT_DPCD_ADDRESS_DOWN_REP_BUFFER_FIELD 0x01400
#define NVT_DPCD_ADDRESS_UP_REQ_BUFFER_FIELD 0x01600
#define NVT_DPCD_ADDRESS_DEVICE_SERVICE_IRQ_VECTOR_ESI0 0x02003
#define NVT_DPCD_ADDRESS_DP_TUNNELING_DEVICE_IEEE_OUI 0xE0000
#define NVT_DPCD_ADDRESS_DP_TUNNELING_DEVICE_ID_STRING 0xE0003
#define NVT_DPCD_ADDRESS_DP_TUNNELING_CAPS_SUPPORT_FIELD 0xE000D
#define NVT_DPCD_ADDRESS_DP_IN_ADAPTER_INFO_FIELD 0xE000E
#define NVT_DPCD_ADDRESS_USB4_DRIVER_ID_FIELD 0xE000F
@@ -5079,7 +5081,7 @@ typedef struct tagNVT_DPCD_CONFIG
typedef struct tagNVT_DPCD_DP_TUNNELING_CAPS
{
NvU8 dpTunnelingSupport : 1; // DP Tunneling through USB4 Support
NvU8 dpTunneling : 1; // DP Tunneling through USB4 Support
NvU8 reserved : 5; // Reserved.
NvU8 dpPanelReplayTunnelingOptSupport : 1; // Panel Replay Tunneling Optimization Support
NvU8 dpInBwAllocationModeSupport : 1; // DP IN Bandwidth Allocation Mode Support

View File

@@ -64,7 +64,7 @@
* Total number of nvlink endpoints core library can have
* This is mapped to NVLINK_MAX_SYSTEM_LINK_NUM in drivers/nvlink/interface/nvlink.h
*/
#define NVLINK_MAX_NVLINK_ENDPOINTS 312
#define NVLINK_MAX_NVLINK_ENDPOINTS 624
#define NVLINK_VERSION_STRING_LENGTH 64

View File

@@ -28,6 +28,7 @@
#include "../nvlink_ctx.h"
#include "../nvlink_helper.h"
#include "nvlink_lock.h"
#include "nvctassert.h"
#define NVLINK_IOC_GET_BUF(ctrlParams, type) (ctrlParams)->size >= sizeof(type) ? (type *) (ctrlParams)->buf : NULL
@@ -3423,6 +3424,8 @@ nvlink_lib_ctrl_get_device_link_states
NvU32 numLinks = 0;
NvU32 i = 0;
ct_assert(NVLINK_MAX_SYSTEM_LINK_NUM == NVLINK_MAX_NVLINK_ENDPOINTS);
nvlink_link **links = (nvlink_link **)nvlink_malloc(
sizeof(nvlink_link *) * NVLINK_MAX_SYSTEM_LINK_NUM);
if (links == NULL)

View File

@@ -1041,24 +1041,41 @@ static NvBool libosCopyLogToNvlog_nowrap(LIBOS_LOG_DECODE_LOG *pLog)
NvU64 putCopy = pLog->physicLogBuffer[0];
NvU64 putOffset = putCopy * sizeof(NvU64) + sizeof(NvU64);
if (putOffset == pNvLogBuffer->pos)
//
// If RM was not unloaded, we will reuse a preserved nowrap nvlog buffer with the fresh
// physical log buffer. In this case, we fix up all the offsets into the nvlog buffer to be
// relative to its preserved position rather than the start.
//
NvU64 nvlogPos = pNvLogBuffer->pos - pLog->preservedNoWrapPos;
if (putOffset < nvlogPos)
{
// Buffer put counter unexpectedly reset. Terminate nowrap log collection.
return NV_FALSE;
}
if (putOffset == nvlogPos)
{
// No new data
return NV_TRUE;
}
if (putOffset > pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64))
if (putOffset + pLog->preservedNoWrapPos >
pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64))
{
// Are we done filling nowrap?
return NV_FALSE;
}
NvU64 len = putOffset - pNvLogBuffer->pos;
NvU8 *pSrc = ((NvU8 *)pLog->physicLogBuffer) + pNvLogBuffer->pos;
NvU64 len = putOffset - nvlogPos;
NvU8 *pSrc = ((NvU8 *)pLog->physicLogBuffer) + nvlogPos;
NvU8 *pDst = pNoWrapBuf->data + pNvLogBuffer->pos;
pLog->bDidPush = NV_TRUE;
portMemCopy(pDst, len, pSrc, len);
pNvLogBuffer->pos = putOffset; // TODO: usage of NVLOG_BUFFER::pos is sus here, reconsider?
*(NvU64 *)(pNoWrapBuf->data) = putCopy;
pNvLogBuffer->pos = putOffset + pLog->preservedNoWrapPos; // TODO: usage of NVLOG_BUFFER::pos is sus here, reconsider?
*(NvU64 *)(pNoWrapBuf->data) = putCopy + pLog->preservedNoWrapPos / sizeof(NvU64);
return NV_TRUE;
}
@@ -1095,6 +1112,46 @@ static void libosExtractLogs_nvlog(LIBOS_LOG_DECODE *logDecode, NvBool bSyncNvLo
}
}
void libosPreserveLogs(LIBOS_LOG_DECODE *pLogDecode)
{
NvU64 i;
for (i = 0; i < pLogDecode->numLogBuffers; i++)
{
LIBOS_LOG_DECODE_LOG *pLog = &pLogDecode->log[i];
if (pLog->bDidPush)
{
NvHandle hNvlog = pLog->hNvLogNoWrap;
NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[hNvlog];
if (hNvlog == 0 || pNvLogBuffer == NULL)
continue;
pNvLogBuffer->flags |= DRF_DEF(LOG, _BUFFER_FLAGS, _PRESERVE, _YES);
}
}
}
static NvBool findPreservedNvlogBuffer(NvU32 tag, NvU32 gpuInstance, NVLOG_BUFFER_HANDLE *pHandle)
{
NVLOG_BUFFER_HANDLE handle = 0;
NV_STATUS status = nvlogGetBufferHandleFromTag(tag, &handle);
if (status != NV_OK)
return NV_FALSE;
NVLOG_BUFFER *pNvLogBuffer = NvLogLogger.pBuffers[handle];
if (FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pNvLogBuffer->flags) &&
DRF_VAL(LOG, _BUFFER_FLAGS, _GPU_INSTANCE, pNvLogBuffer->flags) == gpuInstance &&
(pNvLogBuffer->pos < pNvLogBuffer->size - NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data) - sizeof(NvU64)))
{
*pHandle = handle;
return NV_TRUE;
}
return NV_FALSE;
}
#endif // LIBOS_LOG_TO_NVLOG
/**
@@ -1211,11 +1268,18 @@ void libosLogAddLogEx(LIBOS_LOG_DECODE *logDecode, void *buffer, NvU64 bufferSiz
pLog->hNvLogWrap = 0;
pLog->bNvLogNoWrap = NV_FALSE;
LIBOS_LOG_NVLOG_BUFFER *pNoWrapBuf;
pLog->bDidPush = NV_FALSE;
pLog->preservedNoWrapPos = 0;
LIBOS_LOG_NVLOG_BUFFER *pNoWrapBuf;
NvU32 tag = LIBOS_LOG_NVLOG_BUFFER_TAG(logDecode->sourceName, i * 2);
NvBool bFoundPreserved = findPreservedNvlogBuffer(tag, gpuInstance, &pLog->hNvLogNoWrap);
if (!bFoundPreserved)
{
status = nvlogAllocBuffer(
bufferSize + NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data), libosNoWrapBufferFlags,
LIBOS_LOG_NVLOG_BUFFER_TAG(logDecode->sourceName, i * 2),
tag,
&pLog->hNvLogNoWrap);
if (status == NV_OK)
@@ -1237,13 +1301,27 @@ void libosLogAddLogEx(LIBOS_LOG_DECODE *logDecode, void *buffer, NvU64 bufferSiz
{
printf("nvlogAllocBuffer nowrap failed\n");
}
}
else
{
pLog->bNvLogNoWrap = NV_TRUE;
pLog->preservedNoWrapPos = NvLogLogger.pBuffers[pLog->hNvLogNoWrap]->pos;
//
// The 0th NvU64 is the last value of put pointer from the physical log buffer, which is
// the number of NvU64 log buffer elements in it plus one.
// Subtract one NvU64 from it to avoid off-by-one error.
//
if (pLog->preservedNoWrapPos >= sizeof(NvU64))
pLog->preservedNoWrapPos -= sizeof(NvU64);
}
LIBOS_LOG_NVLOG_BUFFER *pWrapBuf;
tag = LIBOS_LOG_NVLOG_BUFFER_TAG(logDecode->sourceName, i * 2 + 1);
status = nvlogAllocBuffer(
bufferSize + NV_OFFSETOF(LIBOS_LOG_NVLOG_BUFFER, data), libosWrapBufferFlags,
LIBOS_LOG_NVLOG_BUFFER_TAG(logDecode->sourceName, i * 2 + 1),
&pLog->hNvLogWrap);
tag, &pLog->hNvLogWrap);
if (status == NV_OK)
{
@@ -1349,13 +1427,13 @@ void libosLogDestroy(LIBOS_LOG_DECODE *logDecode)
if (pLog->hNvLogNoWrap != 0)
{
nvlogDeallocBuffer(pLog->hNvLogNoWrap);
nvlogDeallocBuffer(pLog->hNvLogNoWrap, NV_FALSE);
pLog->hNvLogNoWrap = 0;
}
if (pLog->hNvLogWrap != 0)
{
nvlogDeallocBuffer(pLog->hNvLogWrap);
nvlogDeallocBuffer(pLog->hNvLogWrap, NV_FALSE);
pLog->hNvLogWrap = 0;
}
}

View File

@@ -108,6 +108,9 @@ struct LIBOS_LOG_DECODE_LOG
NvU32 hNvLogNoWrap; // No wrap buffer captures first records.
NvU32 hNvLogWrap; // Wrap buffer captures last records.
NvBool bNvLogNoWrap; // NV_TRUE if no wrap buffer not full.
NvBool bDidPush; // NV_TRUE if this buffer was ever pushed to
NvU64 preservedNoWrapPos; // Position in preserved nvlog buffer
#endif
#if LIBOS_LOG_DECODE_ENABLE
@@ -170,6 +173,8 @@ void libosLogDestroy(LIBOS_LOG_DECODE *logDecode);
void libosExtractLogs(LIBOS_LOG_DECODE *logDecode, NvBool bSyncNvLog);
void libosPreserveLogs(LIBOS_LOG_DECODE *pLogDecode);
#ifdef __cplusplus
}
#endif

View File

@@ -110,6 +110,7 @@ typedef struct {
} set_maxval;
} NvKmsSyncPtOpParams;
NvBool nvkms_output_rounding_fix(void);
void nvkms_call_rm (void *ops);
void* nvkms_alloc (size_t size,

View File

@@ -1288,6 +1288,8 @@ static void EvoSetOCsc0C5(NVDispEvoPtr pDispEvo, const NvU32 head)
const float32_t zeroF32 = NvU32viewAsF32(NV_FLOAT_ZERO);
const float32_t oneF32 = NvU32viewAsF32(NV_FLOAT_ONE);
const float32_t inv2048F32 = f32_div(NvU32viewAsF32(NV_FLOAT_HALF),
NvU32viewAsF32(NV_FLOAT_1024));
/* divide satCos by the default setting of 1024 */
const float32_t satCos = f32_div(i32_to_f32(pHeadState->procAmp.satCos),
NvU32viewAsF32(NV_FLOAT_1024));
@@ -1324,6 +1326,12 @@ static void EvoSetOCsc0C5(NVDispEvoPtr pDispEvo, const NvU32 head)
ocsc0Matrix = nvMultiply3x4Matrix(&satHueMatrix, &ocsc0Matrix);
ocsc0Matrix = nvMultiply3x4Matrix(&CrYCbtoRGBMatrix, &ocsc0Matrix);
if (nvkms_output_rounding_fix()) {
ocsc0Matrix.m[0][3] = f32_add(ocsc0Matrix.m[0][3], inv2048F32);
ocsc0Matrix.m[1][3] = f32_add(ocsc0Matrix.m[1][3], inv2048F32);
ocsc0Matrix.m[2][3] = f32_add(ocsc0Matrix.m[2][3], inv2048F32);
}
nvDmaSetStartEvoMethod(pChannel, NVC57D_HEAD_SET_OCSC0COEFFICIENT_C00(head), 12);
nvDmaSetEvoMethodData(pChannel, DRF_NUM(C57D, _HEAD_SET_OCSC0COEFFICIENT_C00, _VALUE, cscCoefConvertS514(ocsc0Matrix.m[0][0])));
nvDmaSetEvoMethodData(pChannel, DRF_NUM(C57D, _HEAD_SET_OCSC0COEFFICIENT_C01, _VALUE, cscCoefConvertS514(ocsc0Matrix.m[0][1])));
@@ -1965,11 +1973,13 @@ static inline NvU32 GetMaxPixelsFetchedPerLine(NvU16 inWidth,
static void SetScalingUsageBoundsOneWindow5(
NVDevEvoPtr pDevEvo, NvU32 window,
const struct NvKmsScalingUsageBounds *pScaling,
NvBool layerUsable,
const NVHwModeViewPortEvo *pViewPort,
NVEvoUpdateState *updateState)
{
NVEvoChannelPtr pChannel = pDevEvo->core;
NvU32 setWindowUsageBounds = NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C5;
NvU32 maxPixelsFetchedPerLine;
nvUpdateUpdateState(pDevEvo, updateState, pChannel);
@@ -1981,10 +1991,15 @@ static void SetScalingUsageBoundsOneWindow5(
DRF_NUM(C57D, _WINDOW_SET_MAX_INPUT_SCALE_FACTOR, _VERTICAL,
pScaling->maxVDownscaleFactor));
if (layerUsable) {
maxPixelsFetchedPerLine = GetMaxPixelsFetchedPerLine(pViewPort->in.width,
pScaling->maxHDownscaleFactor);
} else {
maxPixelsFetchedPerLine = 0;
}
setWindowUsageBounds |=
(DRF_NUM(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _MAX_PIXELS_FETCHED_PER_LINE,
GetMaxPixelsFetchedPerLine(pViewPort->in.width,
pScaling->maxHDownscaleFactor))) |
(DRF_NUM(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _MAX_PIXELS_FETCHED_PER_LINE,maxPixelsFetchedPerLine)) |
(pScaling->vTaps >= NV_EVO_SCALER_5TAPS ?
DRF_DEF(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _INPUT_SCALER_TAPS, _TAPS_5) :
DRF_DEF(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _INPUT_SCALER_TAPS, _TAPS_2)) |
@@ -2056,8 +2071,9 @@ static NvBool EvoSetUsageBoundsC5(NVDevEvoPtr pDevEvo, NvU32 sd, NvU32 head,
needCoreUpdate = EvoSetUsageBounds3(pDevEvo, sd, head, pUsage, updateState);
for (layer = 0; layer < pDevEvo->head[head].numLayers; layer++) {
if (!nvEvoScalingUsageBoundsEqual(&pCurrentUsage->layer[layer].scaling,
&pUsage->layer[layer].scaling)) {
if ((pCurrentUsage->layer[layer].usable != pUsage->layer[layer].usable) ||
(!nvEvoScalingUsageBoundsEqual(&pCurrentUsage->layer[layer].scaling,
&pUsage->layer[layer].scaling))) {
const NVHwModeViewPortEvo *pViewPort =
&pDevEvo->gpus[sd].pDispEvo->headState[head].timings.viewPort;
@@ -2066,6 +2082,7 @@ static NvBool EvoSetUsageBoundsC5(NVDevEvoPtr pDevEvo, NvU32 sd, NvU32 head,
NV_EVO_CHANNEL_MASK_WINDOW_NUMBER(
pDevEvo->head[head].layer[layer]->channelMask),
&pUsage->layer[layer].scaling,
pUsage->layer[layer].usable,
pViewPort,
updateState);
needCoreUpdate = TRUE;
@@ -4383,7 +4400,9 @@ static void EvoSetLUTContextDmaC5(const NVDispEvoRec *pDispEvo,
nvDmaSetStartEvoMethod(pChannel, NVC57D_HEAD_SET_OLUT_CONTROL(head), 1);
nvDmaSetEvoMethodData(pChannel,
DRF_DEF(C57D, _HEAD_SET_OLUT_CONTROL, _INTERPOLATE, _ENABLE) |
(!nvkms_output_rounding_fix() ?
DRF_DEF(C57D, _HEAD_SET_OLUT_CONTROL, _INTERPOLATE, _ENABLE) :
DRF_DEF(C57D, _HEAD_SET_OLUT_CONTROL, _INTERPOLATE, _DISABLE)) |
DRF_DEF(C57D, _HEAD_SET_OLUT_CONTROL, _MIRROR, _DISABLE) |
DRF_DEF(C57D, _HEAD_SET_OLUT_CONTROL, _MODE, _DIRECT10) |
DRF_NUM(C57D, _HEAD_SET_OLUT_CONTROL, _SIZE, NV_LUT_VSS_HEADER_SIZE +
@@ -5180,13 +5199,11 @@ static NvBool EvoSetViewportInOut3(NVDevEvoPtr pDevEvo, const int head,
const NVHwModeViewPortEvo *pViewPortMin,
const NVHwModeViewPortEvo *pViewPort,
const NVHwModeViewPortEvo *pViewPortMax,
NVEvoUpdateState *updateState,
NvU32 setWindowUsageBounds)
NVEvoUpdateState *updateState)
{
const NVEvoCapabilitiesPtr pEvoCaps = &pDevEvo->gpus[0].capabilities;
NVEvoChannelPtr pChannel = pDevEvo->core;
struct NvKmsScalingUsageBounds scalingUsageBounds = { };
NvU32 win;
/* These methods should only apply to a single pDpy */
nvAssert(pDevEvo->subDevMaskStackDepth > 0);
@@ -5232,9 +5249,33 @@ static NvBool EvoSetViewportInOut3(NVDevEvoPtr pDevEvo, const int head,
DRF_NUM(C37D, _HEAD_SET_MAX_OUTPUT_SCALE_FACTOR, _VERTICAL,
scalingUsageBounds.maxVDownscaleFactor));
return scalingUsageBounds.vUpscalingAllowed;
}
static void EvoSetViewportInOutC3(NVDevEvoPtr pDevEvo, const int head,
const NVHwModeViewPortEvo *pViewPortMin,
const NVHwModeViewPortEvo *pViewPort,
const NVHwModeViewPortEvo *pViewPortMax,
NVEvoUpdateState *updateState)
{
NVEvoChannelPtr pChannel = pDevEvo->core;
NvU32 win;
NvU32 setWindowUsageBounds = NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C3;
NvBool verticalUpscalingAllowed =
EvoSetViewportInOut3(pDevEvo, head, pViewPortMin, pViewPort,
pViewPortMax, updateState);
nvDmaSetStartEvoMethod(pChannel,
NVC37D_HEAD_SET_HEAD_USAGE_BOUNDS(head), 1);
nvDmaSetEvoMethodData(pChannel,
DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _CURSOR, _USAGE_W256_H256) |
DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _OUTPUT_LUT, _USAGE_1025) |
(verticalUpscalingAllowed ?
DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _UPSCALING_ALLOWED, _TRUE) :
DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _UPSCALING_ALLOWED, _FALSE)));
/*
* Program MAX_PIXELS_FETCHED_PER_LINE window usage bounds
* for each window thats attached to the head.
* for each window that is attached to the head.
*
* Precomp will clip the post-scaled window to the input viewport, reverse-scale
* this cropped size back to the input surface domain, and isohub will fetch
@@ -5242,7 +5283,11 @@ static NvBool EvoSetViewportInOut3(NVDevEvoPtr pDevEvo, const int head,
* so the MAX_PIXELS_FETCHED_PER_LINE will be bounded by the input viewport
* width. SetScalingUsageBoundsOneWindow5() will take care of updating
* MAX_PIXELS_FETCHED_PER_LINE, if window scaling is enabled later.
* On Volta, Program for each window that is attached to head. For turing+,
* SetScalingUsageBoundsOneWindow5() will take care of programming window
* usage bounds only for the layers/windows in use.
*/
setWindowUsageBounds |=
DRF_NUM(C37D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _MAX_PIXELS_FETCHED_PER_LINE,
GetMaxPixelsFetchedPerLine(pViewPort->in.width,
@@ -5256,30 +5301,6 @@ static NvBool EvoSetViewportInOut3(NVDevEvoPtr pDevEvo, const int head,
nvDmaSetStartEvoMethod(pChannel, NVC37D_WINDOW_SET_WINDOW_USAGE_BOUNDS(win), 1);
nvDmaSetEvoMethodData(pChannel, setWindowUsageBounds);
}
return scalingUsageBounds.vUpscalingAllowed;
}
static void EvoSetViewportInOutC3(NVDevEvoPtr pDevEvo, const int head,
const NVHwModeViewPortEvo *pViewPortMin,
const NVHwModeViewPortEvo *pViewPort,
const NVHwModeViewPortEvo *pViewPortMax,
NVEvoUpdateState *updateState)
{
NVEvoChannelPtr pChannel = pDevEvo->core;
NvBool verticalUpscalingAllowed =
EvoSetViewportInOut3(pDevEvo, head, pViewPortMin, pViewPort,
pViewPortMax, updateState,
NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C3);
nvDmaSetStartEvoMethod(pChannel,
NVC37D_HEAD_SET_HEAD_USAGE_BOUNDS(head), 1);
nvDmaSetEvoMethodData(pChannel,
DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _CURSOR, _USAGE_W256_H256) |
DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _OUTPUT_LUT, _USAGE_1025) |
(verticalUpscalingAllowed ?
DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _UPSCALING_ALLOWED, _TRUE) :
DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _UPSCALING_ALLOWED, _FALSE)));
}
static void EvoSetViewportInOutC5(NVDevEvoPtr pDevEvo, const int head,
@@ -5289,13 +5310,9 @@ static void EvoSetViewportInOutC5(NVDevEvoPtr pDevEvo, const int head,
NVEvoUpdateState *updateState)
{
NVEvoChannelPtr pChannel = pDevEvo->core;
NvU32 setWindowUsageBounds =
(NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C5 |
DRF_DEF(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _INPUT_SCALER_TAPS, _TAPS_2) |
DRF_DEF(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _UPSCALING_ALLOWED, _FALSE));
NvU32 verticalUpscalingAllowed =
EvoSetViewportInOut3(pDevEvo, head, pViewPortMin, pViewPort,
pViewPortMax, updateState, setWindowUsageBounds);
pViewPortMax, updateState);
nvDmaSetStartEvoMethod(pChannel,
NVC57D_HEAD_SET_HEAD_USAGE_BOUNDS(head), 1);

View File

@@ -619,27 +619,33 @@ typedef enum
#define NV_GET_NV_STATE(pGpu) \
(nv_state_t *)((pGpu) ? (pGpu)->pOsGpuInfo : NULL)
#define IS_REG_OFFSET(nv, offset, length) \
(((offset) >= (nv)->regs->cpu_address) && \
(((offset) + ((length)-1)) <= \
(nv)->regs->cpu_address + ((nv)->regs->size-1)))
static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((offset >= nv->regs->cpu_address) &&
((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1))));
}
#define IS_FB_OFFSET(nv, offset, length) \
(((nv)->fb) && ((offset) >= (nv)->fb->cpu_address) && \
(((offset) + ((length)-1)) <= (nv)->fb->cpu_address + ((nv)->fb->size-1)))
static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((nv->fb) && (offset >= nv->fb->cpu_address) &&
((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
}
#define IS_UD_OFFSET(nv, offset, length) \
(((nv)->ud.cpu_address != 0) && ((nv)->ud.size != 0) && \
((offset) >= (nv)->ud.cpu_address) && \
(((offset) + ((length)-1)) <= (nv)->ud.cpu_address + ((nv)->ud.size-1)))
static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) &&
(offset >= nv->ud.cpu_address) &&
((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1))));
}
#define IS_IMEM_OFFSET(nv, offset, length) \
(((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) && \
((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) && \
((offset) >= (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) && \
(((offset) + ((length) - 1)) <= \
(nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + \
((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size - 1)))
static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&
(offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&
((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))));
}
#define NV_RM_MAX_MSIX_LINES 8

View File

@@ -780,10 +780,8 @@ static NV_STATUS RmAccessRegistry(
RmStatus = NV_ERR_INVALID_STRING_LENGTH;
goto done;
}
// get access to client's parmStr
RMAPI_PARAM_COPY_INIT(parmStrParamCopy, tmpParmStr, clientParmStrAddress, ParmStrLength, 1);
parmStrParamCopy.flags |= RMAPI_PARAM_COPY_FLAGS_ZERO_BUFFER;
RmStatus = rmapiParamsAcquire(&parmStrParamCopy, NV_TRUE);
if (RmStatus != NV_OK)
{
@@ -2026,6 +2024,7 @@ static NV_STATUS RmGetAllocPrivate(
PMEMORY_DESCRIPTOR pMemDesc;
NvU32 pageOffset;
NvU64 pageCount;
NvU64 endingOffset;
RsResourceRef *pResourceRef;
RmResource *pRmResource;
void *pMemData;
@@ -2086,8 +2085,9 @@ static NV_STATUS RmGetAllocPrivate(
if (rmStatus != NV_OK)
goto done;
pageCount = ((pageOffset + length) / os_page_size);
pageCount += (*pPageIndex + (((pageOffset + length) % os_page_size) ? 1 : 0));
endingOffset = pageOffset + length;
pageCount = (endingOffset / os_page_size);
pageCount += (*pPageIndex + ((endingOffset % os_page_size) ? 1 : 0));
if (pageCount > NV_RM_PAGES_TO_OS_PAGES(pMemDesc->PageCount))
{

View File

@@ -362,10 +362,6 @@ osHandleGpuLost
pmc_boot_0 = NV_PRIV_REG_RD32(nv->regs->map_u, NV_PMC_BOOT_0);
if (pmc_boot_0 != nvp->pmc_boot_0)
{
RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
NV2080_CTRL_GPU_GET_OEM_BOARD_INFO_PARAMS *pBoardInfoParams;
NV_STATUS status;
//
// This doesn't support PEX Reset and Recovery yet.
// This will help to prevent accessing registers of a GPU
@@ -376,24 +372,11 @@ osHandleGpuLost
NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "GPU has fallen off the bus.\n");
pBoardInfoParams = portMemAllocNonPaged(sizeof(*pBoardInfoParams));
if (pBoardInfoParams != NULL)
{
portMemSet(pBoardInfoParams, 0, sizeof(*pBoardInfoParams));
status = pRmApi->Control(pRmApi, nv->rmapi.hClient,
nv->rmapi.hSubDevice,
NV2080_CTRL_CMD_GPU_GET_OEM_BOARD_INFO,
pBoardInfoParams,
sizeof(*pBoardInfoParams));
if (status == NV_OK)
if (pGpu->boardInfo != NULL && pGpu->boardInfo->serialNumber[0] != '\0')
{
NV_DEV_PRINTF(NV_DBG_ERRORS, nv,
"GPU serial number is %s.\n",
pBoardInfoParams->serialNumber);
}
portMemFree(pBoardInfoParams);
pGpu->boardInfo->serialNumber);
}
gpuSetDisconnectedProperties(pGpu);

View File

@@ -60,6 +60,7 @@ typedef struct GPUATTACHARG GPUATTACHARG;
* */
#include "ctrl/ctrl0080/ctrl0080gpu.h" // NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS (form hal)
#include "ctrl/ctrl2080/ctrl2080internal.h" // NV2080_CTRL_CMD_INTERNAL_MAX_BSPS/NVENCS
#include "ctrl/ctrl2080/ctrl2080ecc.h"
#include "ctrl/ctrl2080/ctrl2080nvd.h"
#include "class/cl2080.h"
#include "class/cl90cd.h"

View File

@@ -301,6 +301,7 @@ struct KernelGsp {
LIBOS_LOG_DECODE logDecode;
RM_LIBOS_LOG_MEM rmLibosLogMem[2];
void *pLogElf;
NvBool bInInit;
MEMORY_DESCRIPTOR *pMemDesc_simAccessBuf;
SimAccessBuffer *pSimAccessBuf;
NvP64 pSimAccessBufPriv;

View File

@@ -806,6 +806,8 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x20B0, 0x1450, 0x10de, "NVIDIA A100-PG509-200" },
{ 0x20B2, 0x1463, 0x10de, "NVIDIA A100-SXM4-80GB" },
{ 0x20B2, 0x147f, 0x10de, "NVIDIA A100-SXM4-80GB" },
{ 0x20B2, 0x1622, 0x10de, "NVIDIA A100-SXM4-80GB" },
{ 0x20B2, 0x1623, 0x10de, "NVIDIA A100-SXM4-80GB" },
{ 0x20B3, 0x14a7, 0x10de, "NVIDIA PG506-242" },
{ 0x20B3, 0x14a8, 0x10de, "NVIDIA PG506-243" },
{ 0x20B5, 0x1533, 0x10de, "NVIDIA A100 80GB PCIe" },
@@ -907,6 +909,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x2507, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050" },
{ 0x2508, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050 OEM" },
{ 0x2520, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Laptop GPU" },
{ 0x2521, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Laptop GPU" },
{ 0x2523, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050 Ti Laptop GPU" },
{ 0x2531, 0x151d, 0x1028, "NVIDIA RTX A2000" },
{ 0x2531, 0x151d, 0x103c, "NVIDIA RTX A2000" },

View File

@@ -86,8 +86,9 @@ NV_STATUS nvlogAllocBuffer(NvU32 size, NvU32 flags, NvU32 tag, NVLOG_BUFFER_HAND
* @brief Deallocate a buffer with the given handle
*
* @param[in] hBuffer Handle of the buffer to deallocate
* @param[in] bDeallocPreserved Deallocate preserved buffers
*/
void nvlogDeallocBuffer(NVLOG_BUFFER_HANDLE hBuffer);
void nvlogDeallocBuffer(NVLOG_BUFFER_HANDLE hBuffer, NvBool bDeallocPreserved);
/**
* @brief Write to a buffer with the given handle

View File

@@ -265,8 +265,11 @@ static NV_STATUS _issueRpcLarge
// should not be called in broadcast mode
NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE);
//
// Copy the initial buffer
entryLength = NV_MIN(bufSize, pRpc->maxRpcSize);
// Temporary black magic WAR for bug 3594082: reducing the size by 1
//
entryLength = NV_MIN(bufSize, pRpc->maxRpcSize - 1);
if ((NvU8 *)vgpu_rpc_message_header_v != pBuf8)
portMemCopy(vgpu_rpc_message_header_v, entryLength, pBuf8, entryLength);
@@ -291,8 +294,11 @@ static NV_STATUS _issueRpcLarge
remainingSize -= entryLength;
pBuf8 += entryLength;
//
// Copy the remaining buffers
entryLength = pRpc->maxRpcSize - sizeof(rpc_message_header_v);
// Temporary black magic WAR for bug 3594082: reducing the size by 1
//
entryLength = pRpc->maxRpcSize - sizeof(rpc_message_header_v) - 1;
while (remainingSize != 0)
{
if (entryLength > remainingSize)

View File

@@ -103,7 +103,7 @@ nvlogDestroy()
tlsShutdown();
for (i = 0; i < NVLOG_MAX_BUFFERS; i++)
{
nvlogDeallocBuffer(i);
nvlogDeallocBuffer(i, NV_TRUE);
}
if (NvLogLogger.mainLock != NULL)
{
@@ -261,7 +261,8 @@ nvlogAllocBuffer
void
nvlogDeallocBuffer
(
NVLOG_BUFFER_HANDLE hBuffer
NVLOG_BUFFER_HANDLE hBuffer,
NvBool bDeallocPreserved
)
{
NVLOG_BUFFER *pBuffer;
@@ -271,6 +272,12 @@ nvlogDeallocBuffer
pBuffer = NvLogLogger.pBuffers[hBuffer];
if (FLD_TEST_DRF(LOG_BUFFER, _FLAGS, _PRESERVE, _YES, pBuffer->flags) &&
!bDeallocPreserved)
{
return;
}
pBuffer->flags = FLD_SET_DRF(LOG_BUFFER, _FLAGS, _DISABLED,
_YES, pBuffer->flags);

View File

@@ -2502,17 +2502,21 @@ kbusFlushSingle_GM107
if (IS_GSP_CLIENT(pGpu))
{
//
// on GSP client, we only support PCIE_READ to do flush
// a sysmembar flush should call kbusSendSysmembarSingle_HAL explicitly
// on GSP client, we should use PCIE_READ to do video memory flush.
// A sysmembar flush that touches registers is done through RPC and has
// lower effeciency. For cases where it needs sysmembar, the caller site
// should use kbusSendSysmembarSingle_HAL explicitly.
//
NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_PATH);
NV_ASSERT(0);
// This will dump a stack trace to assist debug on certain
// platforms.
osAssertFailed();
}
else
{
return kbusSendSysmembarSingle_HAL(pGpu, pKernelBus);
}
}
}
return NV_OK;
}

View File

@@ -3750,6 +3750,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
Memory *pMemory;
ContextDma *pContextDma;
NvU32 addressSpace;
NvU64 notificationBufferSize;
NV_STATUS status;
hNotifier = pKernelChannel->hErrorContext;
@@ -3758,6 +3759,8 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
NV_CHECK_OR_RETURN(LEVEL_INFO, index != NV_CHANNELGPFIFO_NOTIFICATION_TYPE_ERROR,
NV_ERR_INVALID_ARGUMENT);
notificationBufferSize = (index + 1) * sizeof(NvNotification);
status = deviceGetByInstance(pClient, gpuGetDeviceInstance(pGpu), &pDevice);
if (status != NV_OK)
return NV_ERR_INVALID_DEVICE;
@@ -3766,7 +3769,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
{
addressSpace = memdescGetAddressSpace(pMemory->pMemDesc);
NV_CHECK_OR_RETURN(LEVEL_INFO, pMemory->Length >= ((index + 1) * sizeof(NvNotification)),
NV_CHECK_OR_RETURN(LEVEL_INFO, pMemory->Length >= notificationBufferSize,
NV_ERR_OUT_OF_RANGE);
switch (addressSpace)
{
@@ -3784,7 +3787,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
&pDmaMappingInfo),
NV_ERR_GENERIC);
NV_CHECK_OR_RETURN(LEVEL_INFO, pDmaMappingInfo->pMemDesc->Size >= ((index + 1) * sizeof(NvNotification)),
NV_CHECK_OR_RETURN(LEVEL_INFO, pDmaMappingInfo->pMemDesc->Size >= notificationBufferSize,
NV_ERR_OUT_OF_RANGE);
break;
}
@@ -3799,7 +3802,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
}
else if (NV_OK == ctxdmaGetByHandle(pClient, hNotifier, &pContextDma))
{
NV_CHECK_OR_RETURN(LEVEL_INFO, pContextDma->Limit >= (((index + 1) * sizeof(NvNotification)) - 1),
NV_CHECK_OR_RETURN(LEVEL_INFO, pContextDma->Limit >= (notificationBufferSize - 1),
NV_ERR_OUT_OF_RANGE);
}
else

View File

@@ -1923,26 +1923,6 @@ gpuStatePreInit_IMPL
}
}
pGpu->boardInfo = portMemAllocNonPaged(sizeof(*pGpu->boardInfo));
if (pGpu->boardInfo)
{
// To avoid potential race of xid reporting with the control, zero it out
portMemSet(pGpu->boardInfo, '\0', sizeof(*pGpu->boardInfo));
RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
if (pRmApi->Control(pRmApi,
pGpu->hInternalClient,
pGpu->hInternalSubdevice,
NV2080_CTRL_CMD_GPU_GET_OEM_BOARD_INFO,
pGpu->boardInfo,
sizeof(*pGpu->boardInfo)) != NV_OK)
{
portMemFree(pGpu->boardInfo);
pGpu->boardInfo = NULL;
}
}
return rmStatus;
}
@@ -2291,6 +2271,26 @@ gpuStatePostLoad
goto gpuStatePostLoad_exit;
}
pGpu->boardInfo = portMemAllocNonPaged(sizeof(*pGpu->boardInfo));
if (pGpu->boardInfo)
{
// To avoid potential race of xid reporting with the control, zero it out
portMemSet(pGpu->boardInfo, '\0', sizeof(*pGpu->boardInfo));
RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
if(pRmApi->Control(pRmApi,
pGpu->hInternalClient,
pGpu->hInternalSubdevice,
NV2080_CTRL_CMD_GPU_GET_OEM_BOARD_INFO,
pGpu->boardInfo,
sizeof(*pGpu->boardInfo)) != NV_OK)
{
portMemFree(pGpu->boardInfo);
pGpu->boardInfo = NULL;
}
}
gpuStatePostLoad_exit:
return rmStatus;
}
@@ -2326,6 +2326,9 @@ gpuStatePreUnload
NvU32 curEngDescIdx;
NV_STATUS rmStatus = NV_OK;
portMemFree(pGpu->boardInfo);
pGpu->boardInfo = NULL;
engDescriptorList = gpuGetUnloadEngineDescriptors(pGpu);
numEngDescriptors = gpuGetNumEngDescriptors(pGpu);
@@ -2648,9 +2651,6 @@ gpuStateDestroy_IMPL
_gpuFreeInternalObjects(pGpu);
gpuDestroyGenericKernelFalconList(pGpu);
portMemFree(pGpu->boardInfo);
pGpu->boardInfo = NULL;
portMemFree(pGpu->gspSupportedEngines);
pGpu->gspSupportedEngines = NULL;

View File

@@ -1047,7 +1047,7 @@ _kgspInitLibosLoggingStructures
//
// Setup logging memory for each task.
// Use MEMDESC_FLAGS_CPU_ONLY -- to early to call memdescMapIommu.
// Use MEMDESC_FLAGS_CPU_ONLY -- too early to call memdescMapIommu.
//
NV_ASSERT_OK_OR_GOTO(nvStatus,
memdescCreate(&pLog->pTaskLogDescriptor,
@@ -1258,6 +1258,8 @@ kgspInitRm_IMPL
return NV_ERR_INVALID_ARGUMENT;
}
pKernelGsp->bInInit = NV_TRUE;
// Need to hold the GPU instance lock in order to write to the RPC queue
NV_ASSERT_OK_OR_GOTO(status,
rmGpuGroupLockAcquire(pGpu->gpuInstance, GPU_LOCK_GRP_SUBDEVICE,
@@ -1403,6 +1405,14 @@ kgspInitRm_IMPL
NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, kgspStartLogPolling(pGpu, pKernelGsp), done);
done:
pKernelGsp->bInInit = NV_FALSE;
if (status != NV_OK)
{
// Preserve any captured gsp-rm logs
libosPreserveLogs(&pKernelGsp->logDecode);
}
if (gpusLockedMask != 0)
{
rmGpuGroupLockRelease(gpusLockedMask, GPUS_LOCK_FLAGS_NONE);
@@ -1520,7 +1530,7 @@ kgspDumpGspLogs_IMPL
NvBool bSyncNvLog
)
{
if (pKernelGsp->pLogElf || bSyncNvLog)
if (pKernelGsp->bInInit || pKernelGsp->pLogElf || bSyncNvLog)
libosExtractLogs(&pKernelGsp->logDecode, bSyncNvLog);
}

View File

@@ -1959,6 +1959,7 @@ memmgrFillComprInfo_IMPL
{
const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
NvU32 size;
portMemSet(pComprInfo, 0, sizeof(*pComprInfo));
@@ -1969,10 +1970,12 @@ memmgrFillComprInfo_IMPL
NV_ASSERT(compTagStartOffset != ~(NvU32)0);
size = pageSize * pageCount;
pComprInfo->compPageShift = pMemorySystemConfig->comprPageShift;
pComprInfo->compTagLineMin = compTagStartOffset;
pComprInfo->compPageIndexLo = (NvU32)(surfOffset >> pComprInfo->compPageShift);
pComprInfo->compPageIndexHi = (NvU32)((surfOffset + pageSize * pageCount - 1) >> pComprInfo->compPageShift);
pComprInfo->compPageIndexHi = (NvU32)((surfOffset + size - 1) >> pComprInfo->compPageShift);
pComprInfo->compTagLineMultiplier = 1;
return NV_OK;

View File

@@ -751,6 +751,8 @@ NvBool gpumgrIsDeviceRmFirmwareCapable
0x2236, // A10 SKU215 Pris-24
0x2237, // A10G SKU215 Pris-24
0x25B6, // A16
0x20F5, // A800-80
0x20F6, // A800-40
};
NvU32 count = NV_ARRAY_ELEMENTS(defaultGspRmGpus);
NvU32 i;