diff --git a/README.md b/README.md index ef72d3bb8..a89739266 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # NVIDIA Linux Open GPU Kernel Module Source This is the source release of the NVIDIA Linux open GPU kernel modules, -version 590.44.01. +version 590.48.01. ## How to Build @@ -17,7 +17,7 @@ as root: Note that the kernel modules built here must be used with GSP firmware and user-space NVIDIA GPU driver components from a corresponding -590.44.01 driver release. This can be achieved by installing +590.48.01 driver release. This can be achieved by installing the NVIDIA GPU driver from the .run file using the `--no-kernel-modules` option. E.g., @@ -185,7 +185,7 @@ table below). For details on feature support and limitations, see the NVIDIA GPU driver end user README here: -https://us.download.nvidia.com/XFree86/Linux-x86_64/590.44.01/README/kernel_open.html +https://us.download.nvidia.com/XFree86/Linux-x86_64/590.48.01/README/kernel_open.html For vGPU support, please refer to the README.vgpu packaged in the vGPU Host Package for more details. @@ -313,7 +313,6 @@ Subsystem Device ID. | NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1D05 1089 | | Quadro RTX 5000 | 1EB0 | | Quadro RTX 4000 | 1EB1 | -| EIZO Quadro MED-XN92 | 1EB1 15C3 12A0 | | Quadro RTX 5000 | 1EB5 | | Quadro RTX 5000 with Max-Q Design | 1EB5 1025 1375 | | Quadro RTX 5000 with Max-Q Design | 1EB5 1025 1401 | @@ -960,9 +959,13 @@ Subsystem Device ID. | NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 10DE 204B | | NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 17AA 204B | | NVIDIA RTX PRO 5000 Blackwell | 2BB3 1028 204D | +| NVIDIA RTX PRO 5000 72GB Blackwell | 2BB3 1028 227A | | NVIDIA RTX PRO 5000 Blackwell | 2BB3 103C 204D | +| NVIDIA RTX PRO 5000 72GB Blackwell | 2BB3 103C 227A | | NVIDIA RTX PRO 5000 Blackwell | 2BB3 10DE 204D | +| NVIDIA RTX PRO 5000 72GB Blackwell | 2BB3 10DE 227A | | NVIDIA RTX PRO 5000 Blackwell | 2BB3 17AA 204D | +| NVIDIA RTX PRO 5000 72GB Blackwell | 2BB3 17AA 227A | | NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 1028 204C | | NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 103C 204C | | NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 10DE 204C | @@ -1009,6 +1012,7 @@ Subsystem Device ID. | NVIDIA RTX PRO 500 Blackwell Generation Laptop GPU | 2DB9 | | NVIDIA GeForce RTX 5050 Laptop GPU | 2DD8 | | NVIDIA RTX PRO 500 Blackwell Embedded GPU | 2DF9 | +| NVIDIA GB10 | 2E12 10DE 21EC | | NVIDIA GeForce RTX 5070 | 2F04 | | NVIDIA GeForce RTX 5070 Ti Laptop GPU | 2F18 | | NVIDIA RTX PRO 3000 Blackwell Generation Laptop GPU | 2F38 | diff --git a/kernel-open/Kbuild b/kernel-open/Kbuild index 640ee9797..60181c7f0 100644 --- a/kernel-open/Kbuild +++ b/kernel-open/Kbuild @@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc ccflags-y += -I$(src) ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args ccflags-y += -D__KERNEL__ -DMODULE -DNVRM -ccflags-y += -DNV_VERSION_STRING=\"590.44.01\" +ccflags-y += -DNV_VERSION_STRING=\"590.48.01\" # Include and link Tegra out-of-tree modules. ifneq ($(wildcard /usr/src/nvidia/nvidia-oot),) diff --git a/kernel-open/common/inc/nv-linux.h b/kernel-open/common/inc/nv-linux.h index a3116e441..8d0428289 100644 --- a/kernel-open/common/inc/nv-linux.h +++ b/kernel-open/common/inc/nv-linux.h @@ -649,9 +649,9 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa) #define NV_PRINT_AT(nv_debug_level,at) \ { \ nv_printf(nv_debug_level, \ - "NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, " \ + "NVRM: VM: %s:%d: 0x%p, %d page(s), count = %lld, " \ "page_table = 0x%p\n", __FUNCTION__, __LINE__, at, \ - at->num_pages, NV_ATOMIC_READ(at->usage_count), \ + at->num_pages, (long long)atomic64_read(&at->usage_count), \ at->page_table); \ } @@ -919,7 +919,7 @@ struct nv_dma_buf typedef struct nv_alloc_s { struct nv_alloc_s *next; struct device *dev; - atomic_t usage_count; + atomic64_t usage_count; struct { NvBool contig : 1; NvBool guest : 1; @@ -1248,7 +1248,7 @@ struct nv_pci_tegra_devfreq_dev; typedef struct nv_linux_state_s { nv_state_t nv_state; - atomic_t usage_count; + atomic64_t usage_count; NvU32 suspend_count; @@ -1627,9 +1627,9 @@ static inline NvBool nv_alloc_release(nv_linux_file_private_t *nvlfp, nv_alloc_t { NV_PRINT_AT(NV_DBG_MEMINFO, at); - if (NV_ATOMIC_DEC_AND_TEST(at->usage_count)) + if (atomic64_dec_and_test(&at->usage_count)) { - NV_ATOMIC_INC(at->usage_count); + atomic64_inc(&at->usage_count); at->next = nvlfp->free_list; nvlfp->free_list = at; diff --git a/kernel-open/common/inc/nv-mm.h b/kernel-open/common/inc/nv-mm.h index faeb84456..919dacde8 100644 --- a/kernel-open/common/inc/nv-mm.h +++ b/kernel-open/common/inc/nv-mm.h @@ -196,14 +196,33 @@ static inline struct rw_semaphore *nv_mmap_get_lock(struct mm_struct *mm) * Commit 45ad9f5290dc updated vma_start_write() to call __vma_start_write(). */ void nv_vma_start_write(struct vm_area_struct *); + +static inline void nv_vma_flags_set_word(struct vm_area_struct *vma, unsigned long flags) +{ + nv_vma_start_write(vma); +#if defined(NV_VMA_FLAGS_SET_WORD_PRESENT) + vma_flags_set_word(&vma->flags, flags); +#else + ACCESS_PRIVATE(vma, __vm_flags) |= flags; #endif +} + +static inline void nv_vma_flags_clear_word(struct vm_area_struct *vma, unsigned long flags) +{ + nv_vma_start_write(vma); +#if defined(NV_VMA_FLAGS_SET_WORD_PRESENT) + vma_flags_clear_word(&vma->flags, flags); +#else + ACCESS_PRIVATE(vma, __vm_flags) &= ~flags; +#endif +} +#endif // !NV_CAN_CALL_VMA_START_WRITE static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags) { #if !NV_CAN_CALL_VMA_START_WRITE - nv_vma_start_write(vma); - ACCESS_PRIVATE(vma, __vm_flags) |= flags; -#elif defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS) + nv_vma_flags_set_word(vma, flags); +#elif defined(NV_VM_FLAGS_SET_PRESENT) vm_flags_set(vma, flags); #else vma->vm_flags |= flags; @@ -213,9 +232,8 @@ static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags) static inline void nv_vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags) { #if !NV_CAN_CALL_VMA_START_WRITE - nv_vma_start_write(vma); - ACCESS_PRIVATE(vma, __vm_flags) &= ~flags; -#elif defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS) + nv_vma_flags_clear_word(vma, flags); +#elif defined(NV_VM_FLAGS_SET_PRESENT) vm_flags_clear(vma, flags); #else vma->vm_flags &= ~flags; diff --git a/kernel-open/common/inc/nv-time.h b/kernel-open/common/inc/nv-time.h index 3d14f9bd5..ab605c909 100644 --- a/kernel-open/common/inc/nv-time.h +++ b/kernel-open/common/inc/nv-time.h @@ -36,6 +36,19 @@ #define NV_MAX_ISR_DELAY_MS (NV_MAX_ISR_DELAY_US / 1000) #define NV_NSECS_TO_JIFFIES(nsec) ((nsec) * HZ / 1000000000) +/* + * in_hardirq() was added in v5.11-rc1 (2020-12-15) to replace in_irq(). + * Fall back to in_irq() for older kernels that don't have in_hardirq(). + */ +static inline NvBool nv_in_hardirq(void) +{ +#if defined(in_hardirq) + return in_hardirq(); +#else + return in_irq(); +#endif +} + #if !defined(NV_KTIME_GET_RAW_TS64_PRESENT) static inline void ktime_get_raw_ts64(struct timespec64 *ts64) { @@ -82,7 +95,7 @@ static inline NV_STATUS nv_sleep_us(unsigned int us) ktime_get_raw_ts64(&tm1); #endif - if (in_irq() && (us > NV_MAX_ISR_DELAY_US)) + if (nv_in_hardirq() && (us > NV_MAX_ISR_DELAY_US)) return NV_ERR_GENERIC; mdelay_safe_msec = us / 1000; @@ -127,7 +140,7 @@ static inline NV_STATUS nv_sleep_ms(unsigned int ms) tm_start = tm_aux; #endif - if (in_irq() && (ms > NV_MAX_ISR_DELAY_MS)) + if (nv_in_hardirq() && (ms > NV_MAX_ISR_DELAY_MS)) { return NV_ERR_GENERIC; } diff --git a/kernel-open/conftest.sh b/kernel-open/conftest.sh index 024281bd4..59f3624d1 100755 --- a/kernel-open/conftest.sh +++ b/kernel-open/conftest.sh @@ -2114,6 +2114,35 @@ compile_test() { compile_check_conftest "$CODE" "NV_GET_BACKLIGHT_DEVICE_BY_NAME_PRESENT" "" "functions" ;; + dma_map_ops_has_map_phys) + # + # Determine if .map_phys exists in struct dma_map_ops. + # + # Commit 14cb413af00c ("dma-mapping: remove unused mapping resource callbacks") + # removed .map_resource operation and replaced it with .map_phys. + # + echo "$CONFTEST_PREAMBLE + #include + int conftest_dma_map_ops_has_map_phys(void) { + return offsetof(struct dma_map_ops, map_phys); + } + int conftest_dma_map_ops_has_unmap_phys(void) { + return offsetof(struct dma_map_ops, unmap_phys); + }" > conftest$$.c + + $CC $CFLAGS -c conftest$$.c > /dev/null 2>&1 + rm -f conftest$$.c + + if [ -f conftest$$.o ]; then + echo "#define NV_DMA_MAP_OPS_HAS_MAP_PHYS" | append_conftest "types" + rm -f conftest$$.o + return + else + echo "#undef NV_DMA_MAP_OPS_HAS_MAP_PHYS" | append_conftest "types" + return + fi + ;; + dma_buf_ops_has_map) # # Determine if .map exists in dma_buf_ops. @@ -3938,6 +3967,27 @@ compile_test() { compile_check_conftest "$CODE" "NV_PCI_REBAR_GET_POSSIBLE_SIZES_PRESENT" "" "functions" ;; + pci_resize_resource_has_exclude_bars_arg) + # + # Determine if pci_resize_resource() has exclude_bars argument. + # + # exclude_bars argument was added to pci_resize_resource by commit + # 337b1b566db0 (11/14/2025) ("PCI: Fix restoring BARs on BAR resize rollback path") + # in linux-next. + # + CODE=" + #include + + typeof(pci_resize_resource) conftest_pci_resize_resource_has_exclude_bars_arg; + int __must_check conftest_pci_resize_resource_has_exclude_bars_arg(struct pci_dev *dev, + int i, int size, + int exclude_bars) { + return 0; + }" + + compile_check_conftest "$CODE" "NV_PCI_RESIZE_RESOURCE_HAS_EXCLUDE_BARS_ARG" "" "types" + ;; + drm_connector_has_override_edid) # # Determine if 'struct drm_connector' has an 'override_edid' member. @@ -3976,22 +4026,39 @@ compile_test() { compile_check_conftest "$CODE" "NV_IOMMU_SVA_BIND_DEVICE_HAS_DRVDATA_ARG" "" "types" ;; - vm_area_struct_has_const_vm_flags) + vm_flags_set) # - # Determine if the 'vm_area_struct' structure has - # const 'vm_flags'. + # Determine if the vm_flags_set() function is present. The + # presence of this function indicates that the vm_flags_clear() + # function is also present. # - # A union of '__vm_flags' and 'const vm_flags' was added by + # The functions vm_flags_set()/ vm_flags_clear() were added by # commit bc292ab00f6c ("mm: introduce vma->vm_flags wrapper - # functions") in v6.3. + # functions") in v6.3-rc1 (2023-02-09). # CODE=" - #include - int conftest_vm_area_struct_has_const_vm_flags(void) { - return offsetof(struct vm_area_struct, __vm_flags); + #include + void conftest_vm_flags_set(void) { + vm_flags_set(); }" - compile_check_conftest "$CODE" "NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS" "" "types" + compile_check_conftest "$CODE" "NV_VM_FLAGS_SET_PRESENT" "" "functions" + ;; + + vma_flags_set_word) + # + # Determine if the vma_flags_set_word() function is present. + # + # Added by commit c3f7c506e8f1 ("mm: introduce VMA flags bitmap type") + # in v6.19-rc1. + # + CODE=" + #include + void conftest_vma_flags_set_word(void) { + vma_flags_set_word(); + }" + + compile_check_conftest "$CODE" "NV_VMA_FLAGS_SET_WORD_PRESENT" "" "functions" ;; drm_driver_has_dumb_destroy) diff --git a/kernel-open/nvidia-drm/nvidia-drm-crtc.c b/kernel-open/nvidia-drm/nvidia-drm-crtc.c index 9f1e2646e..713c47896 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-crtc.c +++ b/kernel-open/nvidia-drm/nvidia-drm-crtc.c @@ -1554,7 +1554,7 @@ static int __nv_drm_cursor_atomic_check(struct drm_plane *plane, WARN_ON(nv_plane->layer_idx != NVKMS_KAPI_LAYER_INVALID_IDX); - nv_drm_for_each_crtc_in_state(plane_state->state, crtc, crtc_state, i) { + for_each_new_crtc_in_state(plane_state->state, crtc, crtc_state, i) { struct nv_drm_crtc_state *nv_crtc_state = to_nv_crtc_state(crtc_state); struct NvKmsKapiHeadRequestedConfig *head_req_config = &nv_crtc_state->req_config; @@ -1600,7 +1600,7 @@ static int nv_drm_plane_atomic_check(struct drm_plane *plane, WARN_ON(nv_plane->layer_idx == NVKMS_KAPI_LAYER_INVALID_IDX); - nv_drm_for_each_crtc_in_state(plane_state->state, crtc, crtc_state, i) { + for_each_new_crtc_in_state(plane_state->state, crtc, crtc_state, i) { struct nv_drm_crtc_state *nv_crtc_state = to_nv_crtc_state(crtc_state); struct NvKmsKapiHeadRequestedConfig *head_req_config = &nv_crtc_state->req_config; @@ -2430,7 +2430,7 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc, req_config->flags.displaysChanged = NV_TRUE; - nv_drm_for_each_connector_in_state(crtc_state->state, + for_each_new_connector_in_state(crtc_state->state, connector, connector_state, j) { if (connector_state->crtc != crtc) { continue; diff --git a/kernel-open/nvidia-drm/nvidia-drm-helper.c b/kernel-open/nvidia-drm/nvidia-drm-helper.c index 393b00d0a..8f1a78849 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-helper.c +++ b/kernel-open/nvidia-drm/nvidia-drm-helper.c @@ -54,7 +54,7 @@ * drm_atomic_helper_disable_all() is copied from * linux/drivers/gpu/drm/drm_atomic_helper.c and modified to use * nv_drm_for_each_crtc instead of drm_for_each_crtc to loop over all crtcs, - * use nv_drm_for_each_*_in_state instead of for_each_connector_in_state to loop + * use for_each_new_*_in_state instead of for_each_connector_in_state to loop * over all modeset object states, and use drm_atomic_state_free() if * drm_atomic_state_put() is not available. * @@ -139,13 +139,13 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev, plane_state->rotation = DRM_MODE_ROTATE_0; } - nv_drm_for_each_connector_in_state(state, conn, conn_state, i) { + for_each_new_connector_in_state(state, conn, conn_state, i) { ret = drm_atomic_set_crtc_for_connector(conn_state, NULL); if (ret < 0) goto free; } - nv_drm_for_each_plane_in_state(state, plane, plane_state, i) { + for_each_new_plane_in_state(state, plane, plane_state, i) { ret = drm_atomic_set_crtc_for_plane(plane_state, NULL); if (ret < 0) goto free; diff --git a/kernel-open/nvidia-drm/nvidia-drm-helper.h b/kernel-open/nvidia-drm/nvidia-drm-helper.h index a297b9435..068949a02 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-helper.h +++ b/kernel-open/nvidia-drm/nvidia-drm-helper.h @@ -138,154 +138,6 @@ nv_drm_prime_pages_to_sg(struct drm_device *dev, int nv_drm_atomic_helper_disable_all(struct drm_device *dev, struct drm_modeset_acquire_ctx *ctx); -/* - * for_each_connector_in_state(), for_each_crtc_in_state() and - * for_each_plane_in_state() were added by kernel commit - * df63b9994eaf942afcdb946d27a28661d7dfbf2a which was Signed-off-by: - * Ander Conselvan de Oliveira - * Daniel Vetter - * - * for_each_connector_in_state(), for_each_crtc_in_state() and - * for_each_plane_in_state() were copied from - * include/drm/drm_atomic.h @ - * 21a01abbe32a3cbeb903378a24e504bfd9fe0648 - * which has the following copyright and license information: - * - * Copyright (C) 2014 Red Hat - * Copyright (C) 2014 Intel Corp. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Rob Clark - * Daniel Vetter - */ - -/** - * nv_drm_for_each_connector_in_state - iterate over all connectors in an - * atomic update - * @__state: &struct drm_atomic_state pointer - * @connector: &struct drm_connector iteration cursor - * @connector_state: &struct drm_connector_state iteration cursor - * @__i: int iteration cursor, for macro-internal use - * - * This iterates over all connectors in an atomic update. Note that before the - * software state is committed (by calling drm_atomic_helper_swap_state(), this - * points to the new state, while afterwards it points to the old state. Due to - * this tricky confusion this macro is deprecated. - */ -#if !defined(for_each_connector_in_state) -#define nv_drm_for_each_connector_in_state(__state, \ - connector, connector_state, __i) \ - for ((__i) = 0; \ - (__i) < (__state)->num_connector && \ - ((connector) = (__state)->connectors[__i].ptr, \ - (connector_state) = (__state)->connectors[__i].state, 1); \ - (__i)++) \ - for_each_if (connector) -#else -#define nv_drm_for_each_connector_in_state(__state, \ - connector, connector_state, __i) \ - for_each_connector_in_state(__state, connector, connector_state, __i) -#endif - - -/** - * nv_drm_for_each_crtc_in_state - iterate over all CRTCs in an atomic update - * @__state: &struct drm_atomic_state pointer - * @crtc: &struct drm_crtc iteration cursor - * @crtc_state: &struct drm_crtc_state iteration cursor - * @__i: int iteration cursor, for macro-internal use - * - * This iterates over all CRTCs in an atomic update. Note that before the - * software state is committed (by calling drm_atomic_helper_swap_state(), this - * points to the new state, while afterwards it points to the old state. Due to - * this tricky confusion this macro is deprecated. - */ -#if !defined(for_each_crtc_in_state) -#define nv_drm_for_each_crtc_in_state(__state, crtc, crtc_state, __i) \ - for ((__i) = 0; \ - (__i) < (__state)->dev->mode_config.num_crtc && \ - ((crtc) = (__state)->crtcs[__i].ptr, \ - (crtc_state) = (__state)->crtcs[__i].state, 1); \ - (__i)++) \ - for_each_if (crtc_state) -#else -#define nv_drm_for_each_crtc_in_state(__state, crtc, crtc_state, __i) \ - for_each_crtc_in_state(__state, crtc, crtc_state, __i) -#endif - -/** - * nv_drm_for_each_plane_in_state - iterate over all planes in an atomic update - * @__state: &struct drm_atomic_state pointer - * @plane: &struct drm_plane iteration cursor - * @plane_state: &struct drm_plane_state iteration cursor - * @__i: int iteration cursor, for macro-internal use - * - * This iterates over all planes in an atomic update. Note that before the - * software state is committed (by calling drm_atomic_helper_swap_state(), this - * points to the new state, while afterwards it points to the old state. Due to - * this tricky confusion this macro is deprecated. - */ -#if !defined(for_each_plane_in_state) -#define nv_drm_for_each_plane_in_state(__state, plane, plane_state, __i) \ - for ((__i) = 0; \ - (__i) < (__state)->dev->mode_config.num_total_plane && \ - ((plane) = (__state)->planes[__i].ptr, \ - (plane_state) = (__state)->planes[__i].state, 1); \ - (__i)++) \ - for_each_if (plane_state) -#else -#define nv_drm_for_each_plane_in_state(__state, plane, plane_state, __i) \ - for_each_plane_in_state(__state, plane, plane_state, __i) -#endif - -/* - * for_each_new_plane_in_state() was added by kernel commit - * 581e49fe6b411f407102a7f2377648849e0fa37f which was Signed-off-by: - * Maarten Lankhorst - * Daniel Vetter - * - * This commit also added the old_state and new_state pointers to - * __drm_planes_state. Because of this, the best that can be done on kernel - * versions without this macro is for_each_plane_in_state. - */ - -/** - * nv_drm_for_each_new_plane_in_state - iterate over all planes in an atomic update - * @__state: &struct drm_atomic_state pointer - * @plane: &struct drm_plane iteration cursor - * @new_plane_state: &struct drm_plane_state iteration cursor for the new state - * @__i: int iteration cursor, for macro-internal use - * - * This iterates over all planes in an atomic update, tracking only the new - * state. This is useful in enable functions, where we need the new state the - * hardware should be in when the atomic commit operation has completed. - */ -#if !defined(for_each_new_plane_in_state) -#define nv_drm_for_each_new_plane_in_state(__state, plane, new_plane_state, __i) \ - nv_drm_for_each_plane_in_state(__state, plane, new_plane_state, __i) -#else -#define nv_drm_for_each_new_plane_in_state(__state, plane, new_plane_state, __i) \ - for_each_new_plane_in_state(__state, plane, new_plane_state, __i) -#endif - #include #include diff --git a/kernel-open/nvidia-drm/nvidia-drm-modeset.c b/kernel-open/nvidia-drm/nvidia-drm-modeset.c index 8e114e34b..da167a075 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-modeset.c +++ b/kernel-open/nvidia-drm/nvidia-drm-modeset.c @@ -108,8 +108,11 @@ static bool __will_generate_flip_event(struct drm_crtc *crtc, return false; } - /* Find out whether primary & overlay flip done events will be generated. */ - nv_drm_for_each_plane_in_state(old_crtc_state->state, + /* + * Find out whether primary & overlay flip done events will be generated. + * Only called after drm_atomic_helper_swap_state, so we use old state. + */ + for_each_old_plane_in_state(old_crtc_state->state, plane, old_plane_state, i) { if (old_plane_state->crtc != crtc) { continue; @@ -193,7 +196,7 @@ static int __nv_drm_convert_in_fences( return 0; } - nv_drm_for_each_new_plane_in_state(state, plane, plane_state, i) { + for_each_new_plane_in_state(state, plane, plane_state, i) { if ((plane->type == DRM_PLANE_TYPE_CURSOR) || (plane_state->crtc != crtc) || (plane_state->fence == NULL)) { @@ -334,7 +337,8 @@ static int __nv_drm_get_syncpt_data( head_reply_config = &reply_config->headReplyConfig[nv_crtc->head]; - nv_drm_for_each_plane_in_state(old_crtc_state->state, plane, old_plane_state, i) { + /* Use old state because this is only called after drm_atomic_helper_swap_state */ + for_each_old_plane_in_state(old_crtc_state->state, plane, old_plane_state, i) { struct nv_drm_plane *nv_plane = to_nv_plane(plane); if (plane->type == DRM_PLANE_TYPE_CURSOR || old_plane_state->crtc != crtc) { @@ -395,7 +399,7 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev, &(to_nv_atomic_state(state)->config); struct NvKmsKapiModeSetReplyConfig reply_config = { }; struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; int i; int ret; @@ -429,18 +433,10 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev, memset(requested_config, 0, sizeof(*requested_config)); /* Loop over affected crtcs and construct NvKmsKapiRequestedModeSetConfig */ - nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) { - /* - * When committing a state, the new state is already stored in - * crtc->state. When checking a proposed state, the proposed state is - * stored in crtc_state. - */ - struct drm_crtc_state *new_crtc_state = - commit ? crtc->state : crtc_state; + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc); if (commit) { - struct drm_crtc_state *old_crtc_state = crtc_state; struct nv_drm_crtc_state *nv_new_crtc_state = to_nv_crtc_state(new_crtc_state); @@ -497,10 +493,11 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev, } if (commit && nv_dev->supportsSyncpts) { - nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) { + /* commit is true so we check old state */ + for_each_old_crtc_in_state(state, crtc, old_crtc_state, i) { /*! loop over affected crtcs and get NvKmsKapiModeSetReplyConfig */ ret = __nv_drm_get_syncpt_data( - nv_dev, crtc, crtc_state, requested_config, &reply_config); + nv_dev, crtc, old_crtc_state, requested_config, &reply_config); if (ret != 0) { return ret; } @@ -525,7 +522,7 @@ int nv_drm_atomic_check(struct drm_device *dev, bool cursor_surface_changed; bool cursor_only_commit; - nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) { + for_each_new_crtc_in_state(state, crtc, crtc_state, i) { /* * Committing cursor surface change without any other plane change can @@ -534,7 +531,7 @@ int nv_drm_atomic_check(struct drm_device *dev, */ cursor_surface_changed = false; cursor_only_commit = true; - nv_drm_for_each_plane_in_state(crtc_state->state, plane, plane_state, j) { + for_each_new_plane_in_state(crtc_state->state, plane, plane_state, j) { if (plane->type == DRM_PLANE_TYPE_CURSOR) { if (plane_state->fb != plane->state->fb) { cursor_surface_changed = true; @@ -641,7 +638,7 @@ int nv_drm_atomic_commit(struct drm_device *dev, * Our system already implements such a queue, but due to * bug 4054608, it is currently not used. */ - nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) { + for_each_new_crtc_in_state(state, crtc, crtc_state, i) { struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc); /* @@ -748,7 +745,7 @@ int nv_drm_atomic_commit(struct drm_device *dev, goto done; } - nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) { + for_each_old_crtc_in_state(state, crtc, crtc_state, i) { struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc); struct nv_drm_crtc_state *nv_new_crtc_state = to_nv_crtc_state(crtc->state); diff --git a/kernel-open/nvidia-drm/nvidia-drm-priv.h b/kernel-open/nvidia-drm/nvidia-drm-priv.h index 703725c31..88c74a069 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-priv.h +++ b/kernel-open/nvidia-drm/nvidia-drm-priv.h @@ -30,6 +30,7 @@ #if defined(NV_DRM_DRMP_H_PRESENT) #include #endif +#include #include #include diff --git a/kernel-open/nvidia-drm/nvidia-drm-sources.mk b/kernel-open/nvidia-drm/nvidia-drm-sources.mk index b3ba7450a..a2c3a3862 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-sources.mk +++ b/kernel-open/nvidia-drm/nvidia-drm-sources.mk @@ -64,6 +64,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_mixed NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_prime_mmap NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_sysfs_connector_property_event NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_sysfs_connector_status_event +NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_flags_set NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_legacy_dev_list NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg @@ -93,7 +94,6 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fenc NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_has_override_edid NV_CONFTEST_TYPE_COMPILE_TESTS += drm_file_get_master NV_CONFTEST_TYPE_COMPILE_TESTS += drm_modeset_lock_all_end -NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_framebuffers_has_driver_arg diff --git a/kernel-open/nvidia-uvm/uvm_migrate_pageable.c b/kernel-open/nvidia-uvm/uvm_migrate_pageable.c index d31fa038a..577a13ff7 100644 --- a/kernel-open/nvidia-uvm/uvm_migrate_pageable.c +++ b/kernel-open/nvidia-uvm/uvm_migrate_pageable.c @@ -43,8 +43,6 @@ #ifdef UVM_MIGRATE_VMA_SUPPORTED -static struct kmem_cache *g_uvm_migrate_vma_state_cache __read_mostly; - static const gfp_t g_migrate_vma_gfp_flags = NV_UVM_GFP_FLAGS | GFP_HIGHUSER_MOVABLE | __GFP_THISNODE; static uvm_sgt_t *uvm_select_sgt(uvm_processor_id_t src_id, int src_nid, migrate_vma_state_t *state) @@ -1497,7 +1495,7 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args) uvm_migrate_args->dst_node_id = uvm_gpu_numa_node(gpu); } - state = nv_kmem_cache_zalloc(g_uvm_migrate_vma_state_cache, NV_UVM_GFP_FLAGS); + state = uvm_kvmalloc_zero(sizeof(migrate_vma_state_t)); if (!state) return NV_ERR_NO_MEMORY; @@ -1519,22 +1517,17 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args) out: uvm_kvfree(state->dma.sgt_cpu); uvm_kvfree(state->cpu_page_mask); - kmem_cache_free(g_uvm_migrate_vma_state_cache, state); + uvm_kvfree(state); return status; } NV_STATUS uvm_migrate_pageable_init(void) { - g_uvm_migrate_vma_state_cache = NV_KMEM_CACHE_CREATE("migrate_vma_state_t", migrate_vma_state_t); - if (!g_uvm_migrate_vma_state_cache) - return NV_ERR_NO_MEMORY; - return NV_OK; } void uvm_migrate_pageable_exit(void) { - kmem_cache_destroy_safe(&g_uvm_migrate_vma_state_cache); } #endif diff --git a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c index 4eeecd01c..97ff13dcd 100644 --- a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c +++ b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c @@ -3360,12 +3360,10 @@ void uvm_pmm_gpu_device_p2p_init(uvm_parent_gpu_t *parent_gpu) void uvm_pmm_gpu_device_p2p_deinit(uvm_parent_gpu_t *parent_gpu) { - unsigned long pci_start_pfn = pci_resource_start(parent_gpu->pci_dev, - uvm_device_p2p_static_bar(parent_gpu)) >> PAGE_SHIFT; - struct page *p2p_page; - if (parent_gpu->device_p2p_initialised && !uvm_parent_gpu_is_coherent(parent_gpu)) { - p2p_page = pfn_to_page(pci_start_pfn); + struct page *p2p_page = pfn_to_page(pci_resource_start(parent_gpu->pci_dev, + uvm_device_p2p_static_bar(parent_gpu)) >> PAGE_SHIFT); + devm_memunmap_pages(&parent_gpu->pci_dev->dev, page_pgmap(p2p_page)); } diff --git a/kernel-open/nvidia/nv-dma.c b/kernel-open/nvidia/nv-dma.c index ff399de19..ea3866867 100644 --- a/kernel-open/nvidia/nv-dma.c +++ b/kernel-open/nvidia/nv-dma.c @@ -729,7 +729,11 @@ static NvBool nv_dma_use_map_resource #endif } +#if defined(NV_DMA_MAP_OPS_HAS_MAP_PHYS) + return (ops->map_phys != NULL); +#else return (ops->map_resource != NULL); +#endif } /* DMA-map a peer device's C2C aperture for peer access. */ diff --git a/kernel-open/nvidia/nv-dmabuf.c b/kernel-open/nvidia/nv-dmabuf.c index 5e20e6e8a..7197907b5 100644 --- a/kernel-open/nvidia/nv-dmabuf.c +++ b/kernel-open/nvidia/nv-dmabuf.c @@ -468,9 +468,28 @@ nv_dma_buf_dup_mem_handles( return NV_OK; failed: - nv_dma_buf_undup_mem_handles_unlocked(sp, params->index, count, priv); + if (!priv->acquire_release_all_gpu_lock_on_dup) + { + // + // Undup requires taking all-GPUs lock. + // So if single GPU lock was taken, + // release it first so all-GPUs lock can be taken in + // nv_dma_buf_undup_mem_handles(). + // + nv_dma_buf_release_gpu_lock(sp, priv); - nv_dma_buf_release_gpu_lock(sp, priv); + nv_dma_buf_undup_mem_handles(sp, params->index, count, priv); + } + else + { + // + // Here, all-GPUs lock is already taken, so undup the handles under + // the unlocked version of the function and then release the locks. + // + nv_dma_buf_undup_mem_handles_unlocked(sp, params->index, count, priv); + + nv_dma_buf_release_gpu_lock(sp, priv); + } unlock_api_lock: rm_release_api_lock(sp); diff --git a/kernel-open/nvidia/nv-mmap.c b/kernel-open/nvidia/nv-mmap.c index 72092deff..9ce5f4260 100644 --- a/kernel-open/nvidia/nv-mmap.c +++ b/kernel-open/nvidia/nv-mmap.c @@ -72,7 +72,7 @@ nvidia_vma_open(struct vm_area_struct *vma) if (at != NULL) { - NV_ATOMIC_INC(at->usage_count); + atomic64_inc(&at->usage_count); NV_PRINT_AT(NV_DBG_MEMINFO, at); } @@ -414,7 +414,7 @@ static int nvidia_mmap_sysmem( int ret = 0; unsigned long start = 0; - NV_ATOMIC_INC(at->usage_count); + atomic64_inc(&at->usage_count); start = vma->vm_start; for (j = page_index; j < (page_index + pages); j++) @@ -450,7 +450,7 @@ static int nvidia_mmap_sysmem( if (ret) { - NV_ATOMIC_DEC(at->usage_count); + atomic64_dec(&at->usage_count); nv_printf(NV_DBG_ERRORS, "NVRM: Userspace mapping creation failed [%d]!\n", ret); return -EAGAIN; diff --git a/kernel-open/nvidia/nv-pci.c b/kernel-open/nvidia/nv-pci.c index 7892742f1..2767134e8 100644 --- a/kernel-open/nvidia/nv-pci.c +++ b/kernel-open/nvidia/nv-pci.c @@ -244,7 +244,11 @@ static int nv_resize_pcie_bars(struct pci_dev *pci_dev) { resize: /* Attempt to resize BAR1 to the largest supported size */ +#if defined(NV_PCI_RESIZE_RESOURCE_HAS_EXCLUDE_BARS_ARG) + r = pci_resize_resource(pci_dev, NV_GPU_BAR1, requested_size, 0); +#else r = pci_resize_resource(pci_dev, NV_GPU_BAR1, requested_size); +#endif if (r) { if (r == -ENOSPC) @@ -1687,11 +1691,6 @@ nv_pci_probe nv_printf(NV_DBG_SETUP, "NVRM: probing 0x%x 0x%x, class 0x%x\n", pci_dev->vendor, pci_dev->device, pci_dev->class); - if (nv_kmem_cache_alloc_stack(&sp) != 0) - { - return -1; - } - #ifdef NV_PCI_SRIOV_SUPPORT if (pci_dev->is_virtfn) { @@ -1707,21 +1706,25 @@ nv_pci_probe "since IOMMU is not present on the system.\n", NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); - goto failed; + return -1; } - nv_kmem_cache_free_stack(sp); return 0; #else nv_printf(NV_DBG_ERRORS, "NVRM: Ignoring probe for VF %04x:%02x:%02x.%x ", NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); - goto failed; + return -1; #endif /* NV_VGPU_KVM_BUILD */ } #endif /* NV_PCI_SRIOV_SUPPORT */ + if (nv_kmem_cache_alloc_stack(&sp) != 0) + { + return -1; + } + if (!rm_wait_for_bar_firewall( sp, NV_PCI_DOMAIN_NUMBER(pci_dev), @@ -2178,7 +2181,7 @@ nv_pci_remove(struct pci_dev *pci_dev) * For eGPU, fall off the bus along with clients active is a valid scenario. * Hence skipping the sanity check for eGPU. */ - if ((NV_ATOMIC_READ(nvl->usage_count) != 0) && !(nv->is_external_gpu)) + if ((atomic64_read(&nvl->usage_count) != 0) && !(nv->is_external_gpu)) { nv_printf(NV_DBG_ERRORS, "NVRM: Attempting to remove device %04x:%02x:%02x.%x with non-zero usage count!\n", @@ -2189,7 +2192,7 @@ nv_pci_remove(struct pci_dev *pci_dev) * We can't return from this function without corrupting state, so we wait for * the usage count to go to zero. */ - while (NV_ATOMIC_READ(nvl->usage_count) != 0) + while (atomic64_read(&nvl->usage_count) != 0) { /* @@ -2267,7 +2270,7 @@ nv_pci_remove(struct pci_dev *pci_dev) nvl->sysfs_config_file = NULL; } - if (NV_ATOMIC_READ(nvl->usage_count) == 0) + if (atomic64_read(&nvl->usage_count) == 0) { nv_lock_destroy_locks(sp, nv); } @@ -2283,7 +2286,7 @@ nv_pci_remove(struct pci_dev *pci_dev) num_nv_devices--; - if (NV_ATOMIC_READ(nvl->usage_count) == 0) + if (atomic64_read(&nvl->usage_count) == 0) { NV_PCI_DISABLE_DEVICE(pci_dev); NV_KFREE(nvl, sizeof(nv_linux_state_t)); diff --git a/kernel-open/nvidia/nv-procfs.c b/kernel-open/nvidia/nv-procfs.c index a43cbb0a3..a91123a23 100644 --- a/kernel-open/nvidia/nv-procfs.c +++ b/kernel-open/nvidia/nv-procfs.c @@ -890,7 +890,7 @@ nv_procfs_close_unbind_lock( down(&nvl->ldata_lock); if ((value == 1) && !(nv->flags & NV_FLAG_UNBIND_LOCK)) { - if (NV_ATOMIC_READ(nvl->usage_count) == 0) + if (atomic64_read(&nvl->usage_count) == 0) rm_unbind_lock(sp, nv); if (nv->flags & NV_FLAG_UNBIND_LOCK) diff --git a/kernel-open/nvidia/nv.c b/kernel-open/nvidia/nv.c index 34215572c..9ad14f1d9 100644 --- a/kernel-open/nvidia/nv.c +++ b/kernel-open/nvidia/nv.c @@ -419,7 +419,7 @@ nv_alloc_t *nvos_create_alloc( return NULL; } - NV_ATOMIC_SET(at->usage_count, 0); + atomic64_set(&at->usage_count, 0); at->pid = os_get_current_process(); at->dev = dev; @@ -434,7 +434,7 @@ int nvos_free_alloc( if (at == NULL) return -1; - if (NV_ATOMIC_READ(at->usage_count)) + if (atomic64_read(&at->usage_count)) return 1; kvfree(at->page_table); @@ -1656,13 +1656,10 @@ static int nv_open_device(nv_state_t *nv, nvidia_stack_t *sp) return -ENODEV; } - if (unlikely(NV_ATOMIC_READ(nvl->usage_count) >= NV_S32_MAX)) - return -EMFILE; - if ( ! (nv->flags & NV_FLAG_OPEN)) { /* Sanity check: !NV_FLAG_OPEN requires usage_count == 0 */ - if (NV_ATOMIC_READ(nvl->usage_count) != 0) + if (atomic64_read(&nvl->usage_count) != 0) { NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "Minor device %u is referenced without being open!\n", @@ -1684,7 +1681,7 @@ static int nv_open_device(nv_state_t *nv, nvidia_stack_t *sp) nv_assert_not_in_gpu_exclusion_list(sp, nv); - NV_ATOMIC_INC(nvl->usage_count); + atomic64_inc(&nvl->usage_count); return 0; } @@ -2100,7 +2097,7 @@ static void nv_close_device(nv_state_t *nv, nvidia_stack_t *sp) { nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv); - if (NV_ATOMIC_READ(nvl->usage_count) == 0) + if (atomic64_read(&nvl->usage_count) == 0) { nv_printf(NV_DBG_ERRORS, "NVRM: Attempting to close unopened minor device %u!\n", @@ -2109,7 +2106,7 @@ static void nv_close_device(nv_state_t *nv, nvidia_stack_t *sp) return; } - if (NV_ATOMIC_DEC_AND_TEST(nvl->usage_count)) + if (atomic64_dec_and_test(&nvl->usage_count)) nv_stop_device(nv, sp); } @@ -2154,7 +2151,7 @@ nvidia_close_callback( nv_close_device(nv, sp); bRemove = (!NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv)) && - (NV_ATOMIC_READ(nvl->usage_count) == 0) && + (atomic64_read(&nvl->usage_count) == 0) && rm_get_device_remove_flag(sp, nv->gpu_id); nv_free_file_private(nvlfp); @@ -2173,7 +2170,7 @@ nvidia_close_callback( * any cleanup related to linux layer locks and nv linux state struct. * nvidia_pci_remove when scheduled will do necessary cleanup. */ - if ((NV_ATOMIC_READ(nvl->usage_count) == 0) && nv->removed) + if ((atomic64_read(&nvl->usage_count) == 0) && nv->removed) { nv_lock_destroy_locks(sp, nv); NV_KFREE(nvl, sizeof(nv_linux_state_t)); @@ -2693,7 +2690,7 @@ nvidia_ioctl( * Only the current client should have an open file * descriptor for the device, to allow safe offlining. */ - if (NV_ATOMIC_READ(nvl->usage_count) > 1) + if (atomic64_read(&nvl->usage_count) > 1) { status = -EBUSY; goto unlock; @@ -3082,12 +3079,12 @@ nvidia_ctl_open( /* save the nv away in file->private_data */ nvlfp->nvptr = nvl; - if (NV_ATOMIC_READ(nvl->usage_count) == 0) + if (atomic64_read(&nvl->usage_count) == 0) { nv->flags |= (NV_FLAG_OPEN | NV_FLAG_CONTROL); } - NV_ATOMIC_INC(nvl->usage_count); + atomic64_inc(&nvl->usage_count); up(&nvl->ldata_lock); return 0; @@ -3112,7 +3109,7 @@ nvidia_ctl_close( nv_printf(NV_DBG_INFO, "NVRM: nvidia_ctl_close\n"); down(&nvl->ldata_lock); - if (NV_ATOMIC_DEC_AND_TEST(nvl->usage_count)) + if (atomic64_dec_and_test(&nvl->usage_count)) { nv->flags &= ~NV_FLAG_OPEN; } @@ -3275,7 +3272,7 @@ nv_alias_pages( at->guest_id = guest_id; *priv_data = at; - NV_ATOMIC_INC(at->usage_count); + atomic64_inc(&at->usage_count); NV_PRINT_AT(NV_DBG_MEMINFO, at); @@ -3588,7 +3585,7 @@ NV_STATUS NV_API_CALL nv_register_sgt( at->order = get_order(at->num_pages * PAGE_SIZE); - NV_ATOMIC_INC(at->usage_count); + atomic64_inc(&at->usage_count); *priv_data = at; @@ -3619,7 +3616,7 @@ void NV_API_CALL nv_unregister_sgt( *import_priv = at->import_priv; } - if (NV_ATOMIC_DEC_AND_TEST(at->usage_count)) + if (atomic64_dec_and_test(&at->usage_count)) { nvos_free_alloc(at); } @@ -3892,7 +3889,7 @@ NV_STATUS NV_API_CALL nv_alloc_pages( } *priv_data = at; - NV_ATOMIC_INC(at->usage_count); + atomic64_inc(&at->usage_count); NV_PRINT_AT(NV_DBG_MEMINFO, at); @@ -3928,7 +3925,7 @@ NV_STATUS NV_API_CALL nv_free_pages( * This is described in greater detail in the comments above the * nvidia_vma_(open|release)() callbacks in nv-mmap.c. */ - if (!NV_ATOMIC_DEC_AND_TEST(at->usage_count)) + if (!atomic64_dec_and_test(&at->usage_count)) return NV_OK; if (!at->flags.guest && !at->import_sgt) @@ -3957,7 +3954,7 @@ NvBool nv_lock_init_locks NV_INIT_MUTEX(&nvl->mmap_lock); NV_INIT_MUTEX(&nvl->open_q_lock); - NV_ATOMIC_SET(nvl->usage_count, 0); + atomic64_set(&nvl->usage_count, 0); if (!rm_init_event_locks(sp, nv)) return NV_FALSE; diff --git a/kernel-open/nvidia/nvidia.Kbuild b/kernel-open/nvidia/nvidia.Kbuild index 140070c32..a04d6198b 100644 --- a/kernel-open/nvidia/nvidia.Kbuild +++ b/kernel-open/nvidia/nvidia.Kbuild @@ -141,6 +141,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += icc_get NV_CONFTEST_FUNCTION_COMPILE_TESTS += devm_of_icc_get NV_CONFTEST_FUNCTION_COMPILE_TESTS += icc_put NV_CONFTEST_FUNCTION_COMPILE_TESTS += icc_set_bw +NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_map_ops_has_map_phys NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_buf_ops_has_map NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_buf_ops_has_map_atomic NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_buf_attachment_has_peer2peer @@ -159,6 +160,8 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_drop NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg NV_CONFTEST_FUNCTION_COMPILE_TESTS += shrinker_alloc +NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_flags_set +NV_CONFTEST_FUNCTION_COMPILE_TESTS += vma_flags_set_word NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_sme_active NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_swiotlb_map_sg_attrs @@ -206,7 +209,6 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += remove_memory_has_nid_arg NV_CONFTEST_TYPE_COMPILE_TESTS += add_memory_driver_managed_has_mhp_flags_arg NV_CONFTEST_TYPE_COMPILE_TESTS += num_registered_fb NV_CONFTEST_TYPE_COMPILE_TESTS += pci_driver_has_driver_managed_dma -NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags NV_CONFTEST_TYPE_COMPILE_TESTS += memory_failure_queue_has_trapno_arg NV_CONFTEST_TYPE_COMPILE_TESTS += foll_longterm_present NV_CONFTEST_TYPE_COMPILE_TESTS += bus_type_has_iommu_ops @@ -220,6 +222,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += devfreq_has_suspend_freq NV_CONFTEST_TYPE_COMPILE_TESTS += has_enum_pidtype_tgid NV_CONFTEST_TYPE_COMPILE_TESTS += bpmp_mrq_has_strap_set NV_CONFTEST_TYPE_COMPILE_TESTS += register_shrinker_has_format_arg +NV_CONFTEST_TYPE_COMPILE_TESTS += pci_resize_resource_has_exclude_bars_arg NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build diff --git a/kernel-open/nvidia/os-interface.c b/kernel-open/nvidia/os-interface.c index 872ddf01d..37cbbee54 100644 --- a/kernel-open/nvidia/os-interface.c +++ b/kernel-open/nvidia/os-interface.c @@ -371,7 +371,7 @@ NvBool NV_API_CALL os_semaphore_may_sleep(void) NvBool NV_API_CALL os_is_isr(void) { - return (in_irq()); + return (nv_in_hardirq()); } // return TRUE if the caller is the super-user diff --git a/src/common/inc/nvBldVer.h b/src/common/inc/nvBldVer.h index 1f3ade779..8fbebbd3a 100644 --- a/src/common/inc/nvBldVer.h +++ b/src/common/inc/nvBldVer.h @@ -36,25 +36,25 @@ // and then checked back in. You cannot make changes to these sections without // corresponding changes to the buildmeister script #ifndef NV_BUILD_BRANCH - #define NV_BUILD_BRANCH r591_37 + #define NV_BUILD_BRANCH r591_47 #endif #ifndef NV_PUBLIC_BRANCH - #define NV_PUBLIC_BRANCH r591_37 + #define NV_PUBLIC_BRANCH r591_47 #endif #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) -#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r590/r591_37-155" -#define NV_BUILD_CHANGELIST_NUM (36926008) +#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r590/r591_47-174" +#define NV_BUILD_CHANGELIST_NUM (37007394) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "rel/gpu_drv/r590/r591_37-155" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36926008) +#define NV_BUILD_NAME "rel/gpu_drv/r590/r591_47-174" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (37007394) #else /* Windows builds */ -#define NV_BUILD_BRANCH_VERSION "r591_37-1" -#define NV_BUILD_CHANGELIST_NUM (36926008) +#define NV_BUILD_BRANCH_VERSION "r591_47-1" +#define NV_BUILD_CHANGELIST_NUM (37007394) #define NV_BUILD_TYPE "Official" -#define NV_BUILD_NAME "591.38" -#define NV_LAST_OFFICIAL_CHANGELIST_NUM (36926008) +#define NV_BUILD_NAME "591.51" +#define NV_LAST_OFFICIAL_CHANGELIST_NUM (37007394) #define NV_BUILD_BRANCH_BASE_VERSION R590 #endif // End buildmeister python edited section diff --git a/src/common/inc/nvUnixVersion.h b/src/common/inc/nvUnixVersion.h index 09cb0eec6..d598dd66b 100644 --- a/src/common/inc/nvUnixVersion.h +++ b/src/common/inc/nvUnixVersion.h @@ -5,7 +5,7 @@ (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1) || \ defined(NV_DCECORE) -#define NV_VERSION_STRING "590.44.01" +#define NV_VERSION_STRING "590.48.01" #else diff --git a/src/common/inc/swref/published/ampere/ga102/dev_riscv_pri.h b/src/common/inc/swref/published/ampere/ga102/dev_riscv_pri.h index 8d629f22a..c7fdf1b9c 100644 --- a/src/common/inc/swref/published/ampere/ga102/dev_riscv_pri.h +++ b/src/common/inc/swref/published/ampere/ga102/dev_riscv_pri.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2003-2024 NVIDIA CORPORATION & AFFILIATES + * SPDX-FileCopyrightText: Copyright (c) 2003-2025 NVIDIA CORPORATION & AFFILIATES * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -27,10 +27,31 @@ #define NV_FALCON2_GSP_BASE 0x00111000 #define NV_PRISCV_RISCV_IRQMASK 0x00000528 /* R-I4R */ #define NV_PRISCV_RISCV_IRQDEST 0x0000052c /* RW-4R */ +#define NV_PRISCV_RISCV_IRQDELEG 0x00000534 /* RWI4R */ +#define NV_PRISCV_RISCV_RPC 0x000003ec /* R--4R */ #define NV_PRISCV_RISCV_CPUCTL 0x00000388 /* RWI4R */ #define NV_PRISCV_RISCV_CPUCTL_ACTIVE_STAT 7:7 /* R-IVF */ #define NV_PRISCV_RISCV_CPUCTL_ACTIVE_STAT_ACTIVE 0x00000001 /* R---V */ #define NV_PRISCV_RISCV_CPUCTL_HALTED 4:4 /* R-IVF */ +#define NV_PRISCV_RISCV_ICD_CMD 0x000003d0 /* RW-4R */ +#define NV_PRISCV_RISCV_ICD_ADDR0 0x000003d4 /* RW-4R */ +#define NV_PRISCV_RISCV_ICD_ADDR1 0x000003d8 /* RW-4R */ +#define NV_PRISCV_RISCV_ICD_RDATA0 0x000003e4 /* R--4R */ +#define NV_PRISCV_RISCV_ICD_RDATA1 0x000003e8 /* R--4R */ +#define NV_PRISCV_RISCV_TRACECTL 0x00000400 /* RW-4R */ +#define NV_PRISCV_RISCV_TRACECTL_FULL 30:30 /* RWIVF */ +#define NV_PRISCV_RISCV_TRACE_RDIDX 0x00000404 /* RW-4R */ +#define NV_PRISCV_RISCV_TRACE_RDIDX_RDIDX 7:0 /* RWIVF */ +#define NV_PRISCV_RISCV_TRACE_RDIDX_MAXIDX 23:16 /* R-IVF */ +#define NV_PRISCV_RISCV_TRACE_WTIDX 0x00000408 /* RW-4R */ +#define NV_PRISCV_RISCV_TRACE_WTIDX_WTIDX 31:24 /* RWIVF */ +#define NV_PRISCV_RISCV_TRACEPC_HI 0x00000410 /* RW-4R */ +#define NV_PRISCV_RISCV_TRACEPC_LO 0x0000040c /* RW-4R */ +#define NV_PRISCV_RISCV_PRIV_ERR_STAT 0x00000500 /* RWI4R */ +#define NV_PRISCV_RISCV_PRIV_ERR_INFO 0x00000504 /* R-I4R */ +#define NV_PRISCV_RISCV_PRIV_ERR_ADDR 0x00000508 /* R-I4R */ +#define NV_PRISCV_RISCV_PRIV_ERR_ADDR_HI 0x0000050c /* R-I4R */ +#define NV_PRISCV_RISCV_HUB_ERR_STAT 0x00000510 /* RWI4R */ #define NV_PRISCV_RISCV_BCR_CTRL 0x00000668 /* RWI4R */ #define NV_PRISCV_RISCV_BCR_CTRL_VALID 0:0 /* R-IVF */ #define NV_PRISCV_RISCV_BCR_CTRL_VALID_TRUE 0x00000001 /* R---V */ diff --git a/src/common/inc/swref/published/blackwell/gb202/dev_riscv_pri.h b/src/common/inc/swref/published/blackwell/gb202/dev_riscv_pri.h new file mode 100644 index 000000000..1daf4ad1a --- /dev/null +++ b/src/common/inc/swref/published/blackwell/gb202/dev_riscv_pri.h @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2003-2025 NVIDIA CORPORATION & AFFILIATES + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __gb202_dev_riscv_pri_h__ +#define __gb202_dev_riscv_pri_h__ + +#define NV_PRISCV_RISCV_CPUCTL 0x00000388 /* RW-4R */ +#define NV_PRISCV_RISCV_RPC 0x000003ec /* R--4R */ +#define NV_PRISCV_RISCV_IRQDELEG 0x00000534 /* RW-4R */ +#define NV_PRISCV_RISCV_IRQDEST 0x0000052c /* RW-4R */ +#define NV_PRISCV_RISCV_IRQMASK 0x00000528 /* R--4R */ +#define NV_PRISCV_RISCV_PRIV_ERR_STAT 0x00000420 /* RW-4R */ +#define NV_PRISCV_RISCV_PRIV_ERR_INFO 0x00000424 /* R--4R */ +#define NV_PRISCV_RISCV_PRIV_ERR_ADDR 0x00000428 /* R--4R */ +#define NV_PRISCV_RISCV_PRIV_ERR_ADDR_HI 0x0000042c /* R--4R */ +#define NV_PRISCV_RISCV_HUB_ERR_STAT 0x00000430 /* RW-4R */ + +#endif // __gb202_dev_riscv_pri_h__ diff --git a/src/common/inc/swref/published/turing/tu102/dev_falcon_v4.h b/src/common/inc/swref/published/turing/tu102/dev_falcon_v4.h index 7b89a47a7..25a9ef152 100644 --- a/src/common/inc/swref/published/turing/tu102/dev_falcon_v4.h +++ b/src/common/inc/swref/published/turing/tu102/dev_falcon_v4.h @@ -33,6 +33,7 @@ #define NV_PFALCON_FALCON_IRQSTAT_HALT_TRUE 0x00000001 /* R---V */ #define NV_PFALCON_FALCON_IRQSTAT_SWGEN0 6:6 /* R-XVF */ #define NV_PFALCON_FALCON_IRQSTAT_SWGEN0_TRUE 0x00000001 /* R---V */ +#define NV_PFALCON_FALCON_IRQMODE 0x0000000c /* RW-4R */ #define NV_PFALCON_FALCON_IRQMSET 0x00000010 /* -W-4R */ #define NV_PFALCON_FALCON_IRQMCLR 0x00000014 /* -W-4R */ #define NV_PFALCON_FALCON_IRQMASK 0x00000018 /* R--4R */ diff --git a/src/common/inc/swref/published/turing/tu102/dev_fbif_v4.h b/src/common/inc/swref/published/turing/tu102/dev_fbif_v4.h index 126c4a0eb..cba40a79b 100644 --- a/src/common/inc/swref/published/turing/tu102/dev_fbif_v4.h +++ b/src/common/inc/swref/published/turing/tu102/dev_fbif_v4.h @@ -30,8 +30,15 @@ #define NV_PFALCON_FBIF_TRANSCFG_TARGET_COHERENT_SYSMEM 0x00000001 /* R---V */ #define NV_PFALCON_FBIF_TRANSCFG_MEM_TYPE 2:2 /* RWIVF */ #define NV_PFALCON_FBIF_TRANSCFG_MEM_TYPE_PHYSICAL 0x00000001 /* R---V */ +#define NV_PFALCON_FBIF_INSTBLK 0x00000020 /* R--4R */ #define NV_PFALCON_FBIF_CTL 0x00000024 /* RW-4R */ #define NV_PFALCON_FBIF_CTL_ALLOW_PHYS_NO_CTX 7:7 /* RWIVF */ #define NV_PFALCON_FBIF_CTL_ALLOW_PHYS_NO_CTX_ALLOW 0x00000001 /* RW--V */ +#define NV_PFALCON_FBIF_THROTTLE 0x0000002c /* RW-4R */ +#define NV_PFALCON_FBIF_ACHK_BLK(i) (0x00000030+(i)*8) /* RW-4A */ +#define NV_PFALCON_FBIF_ACHK_BLK__SIZE_1 2 /* */ +#define NV_PFALCON_FBIF_ACHK_CTL(i) (0x00000034+(i)*8) /* RW-4A */ +#define NV_PFALCON_FBIF_ACHK_CTL__SIZE_1 2 /* */ +#define NV_PFALCON_FBIF_CG1 0x00000074 /* RW-4R */ #endif // __tu102_dev_fbif_v4_h__ diff --git a/src/common/inc/swref/published/turing/tu102/dev_riscv_pri.h b/src/common/inc/swref/published/turing/tu102/dev_riscv_pri.h index d5967ba2e..97f451d68 100644 --- a/src/common/inc/swref/published/turing/tu102/dev_riscv_pri.h +++ b/src/common/inc/swref/published/turing/tu102/dev_riscv_pri.h @@ -28,7 +28,42 @@ #define NV_PRISCV_RISCV_CORE_SWITCH_RISCV_STATUS 0x00000240 /* R-I4R */ #define NV_PRISCV_RISCV_CORE_SWITCH_RISCV_STATUS_ACTIVE_STAT 0:0 /* R-IVF */ #define NV_PRISCV_RISCV_CORE_SWITCH_RISCV_STATUS_ACTIVE_STAT_ACTIVE 0x00000001 /* R---V */ +#define NV_PRISCV_RISCV_CPUCTL 0x00000268 /* RWI4R */ #define NV_PRISCV_RISCV_IRQMASK 0x000002b4 /* R-I4R */ #define NV_PRISCV_RISCV_IRQDEST 0x000002b8 /* RW-4R */ +#define NV_PRISCV_RISCV_ICD_CMD 0x00000300 /* RW-4R */ +#define NV_PRISCV_RISCV_ICD_CMD_OPC 4:0 /* RW-VF */ +#define NV_PRISCV_RISCV_ICD_CMD_OPC_STOP 0x00000000 /* RW--V */ +#define NV_PRISCV_RISCV_ICD_CMD_OPC_RREG 0x00000008 /* RW--V */ +#define NV_PRISCV_RISCV_ICD_CMD_OPC_RDM 0x0000000a /* RW--V */ +#define NV_PRISCV_RISCV_ICD_CMD_OPC_RSTAT 0x0000000e /* RW--V */ +#define NV_PRISCV_RISCV_ICD_CMD_OPC_RCSR 0x00000010 /* RW--V */ +#define NV_PRISCV_RISCV_ICD_CMD_OPC_RPC 0x00000012 /* RW--V */ +#define NV_PRISCV_RISCV_ICD_CMD_SZ 7:6 /* RW-VF */ +#define NV_PRISCV_RISCV_ICD_CMD_IDX 12:8 /* RW-VF */ +#define NV_PRISCV_RISCV_ICD_CMD_ERROR 14:14 /* R-IVF */ +#define NV_PRISCV_RISCV_ICD_CMD_ERROR_TRUE 0x00000001 /* R---V */ +#define NV_PRISCV_RISCV_ICD_CMD_ERROR_FALSE 0x00000000 /* R-I-V */ +#define NV_PRISCV_RISCV_ICD_CMD_BUSY 15:15 /* R-IVF */ +#define NV_PRISCV_RISCV_ICD_CMD_BUSY_FALSE 0x00000000 /* R-I-V */ +#define NV_PRISCV_RISCV_ICD_CMD_BUSY_TRUE 0x00000001 /* R---V */ +#define NV_PRISCV_RISCV_ICD_CMD_PARM 31:16 /* RW-VF */ +#define NV_PRISCV_RISCV_ICD_RDATA0 0x0000030c /* R--4R */ +#define NV_PRISCV_RISCV_ICD_RDATA1 0x00000318 /* R--4R */ +#define NV_PRISCV_RISCV_ICD_ADDR0 0x00000304 /* RW-4R */ +#define NV_PRISCV_RISCV_ICD_ADDR1 0x00000310 /* RW-4R */ +#define NV_PRISCV_RISCV_TRACECTL 0x00000344 /* RW-4R */ +#define NV_PRISCV_RISCV_TRACECTL_FULL 30:30 /* RWIVF */ +#define NV_PRISCV_RISCV_TRACE_RDIDX 0x00000348 /* RW-4R */ +#define NV_PRISCV_RISCV_TRACE_RDIDX_RDIDX 7:0 /* RWIVF */ +#define NV_PRISCV_RISCV_TRACE_RDIDX_MAXIDX 23:16 /* R-IVF */ +#define NV_PRISCV_RISCV_TRACE_WTIDX 0x0000034c /* RW-4R */ +#define NV_PRISCV_RISCV_TRACE_WTIDX_WTIDX 31:24 /* RWIVF */ +#define NV_PRISCV_RISCV_TRACEPC_HI 0x00000350 /* RW-4R */ +#define NV_PRISCV_RISCV_TRACEPC_LO 0x00000354 /* RW-4R */ +#define NV_PRISCV_RISCV_PRIV_ERR_STAT 0x00000360 /* RWI4R */ +#define NV_PRISCV_RISCV_PRIV_ERR_INFO 0x00000364 /* R-I4R */ +#define NV_PRISCV_RISCV_PRIV_ERR_ADDR 0x00000368 /* R-I4R */ +#define NV_PRISCV_RISCV_HUB_ERR_STAT 0x0000036c /* RWI4R */ #endif // __tu102_dev_riscv_pri_h__ diff --git a/src/common/modeset/hdmipacket/nvhdmipkt_C671.c b/src/common/modeset/hdmipacket/nvhdmipkt_C671.c index eb4d3096f..abdb91f46 100644 --- a/src/common/modeset/hdmipacket/nvhdmipkt_C671.c +++ b/src/common/modeset/hdmipacket/nvhdmipkt_C671.c @@ -1136,17 +1136,9 @@ hdmiQueryFRLConfigC671(NVHDMIPKT_CLASS *pThis, NvU32 bppMinX16Itr, bppMaxX16Itr; NvBool bHasPreCalcFRLData = NV_FALSE; - NvBool forceFRLRateDSC = pClientCtrl->forceFRLRate; - HDMI_FRL_DATA_RATE requestedFRLRate = pClientCtrl->frlRate; - #if defined(NVHDMIPKT_NVKMS) - NvU32 rr = (pVidTransInfo->pTiming->pclk * (NvU64)10000) / - (pVidTransInfo->pTiming->HTotal * (NvU64)pVidTransInfo->pTiming->VTotal); - - if (!pVidTransInfo->pTiming->interlaced && (rr >= 480)) { - forceFRLRateDSC = NV_TRUE; - requestedFRLRate = dscMaxFRLRate; - } + NvU32 hVisible, vVisible, rr; + NvBool clampBpp; #endif // DSC_All_bpp = 1: @@ -1256,16 +1248,16 @@ hdmiQueryFRLConfigC671(NVHDMIPKT_CLASS *pThis, frlParams.compressionInfo.hSlices = NV_UNSIGNED_DIV_CEIL(pVidTransInfo->pTiming->HVisible, pClientCtrl->sliceWidth); } - if (forceFRLRateDSC) + if (pClientCtrl->forceFRLRate) { - if (requestedFRLRate > dscMaxFRLRate) + if (pClientCtrl->frlRate > dscMaxFRLRate) { result = NVHDMIPKT_FAIL; goto frlQuery_fail; } - minFRLRateItr = requestedFRLRate; - maxFRLRateItr = requestedFRLRate; + minFRLRateItr = pClientCtrl->frlRate; + maxFRLRateItr = pClientCtrl->frlRate; } if (pClientCtrl->forceBppx16) @@ -1274,6 +1266,23 @@ hdmiQueryFRLConfigC671(NVHDMIPKT_CLASS *pThis, bppMaxX16Itr = pClientCtrl->bitsPerPixelX16; } +#if defined(NVHDMIPKT_NVKMS) + hVisible = pVidTransInfo->pTiming->HVisible; + vVisible = pVidTransInfo->pTiming->VVisible; + + rr = (pVidTransInfo->pTiming->pclk * (NvU64)10000) / + (pVidTransInfo->pTiming->HTotal * (NvU64)pVidTransInfo->pTiming->VTotal); + + clampBpp = ((rr >= 480) || ((rr >= 165) && (hVisible == 5120) && (vVisible == 2160))) && + (!pVidTransInfo->pTiming->interlaced) && + (bppMinX16Itr <= 8 * 16) && + (bppMaxX16Itr >= 8 * 16); + + if (clampBpp) { + bppMaxX16Itr = 8 * 16; + } +#endif + // Determine Primary Compressed Format // First determine the FRL rate at which video transport is possible even at bppMin // Then iterate over bppTarget - start at max n decrement until we hit bppMin. The max bpp for which diff --git a/src/common/modeset/timing/nvt_gtf.c b/src/common/modeset/timing/nvt_gtf.c index 405a16ce4..37e5d207b 100644 --- a/src/common/modeset/timing/nvt_gtf.c +++ b/src/common/modeset/timing/nvt_gtf.c @@ -103,6 +103,7 @@ NVT_STATUS NvTiming_CalcGTF(NvU32 width, NvU32 height, NvU32 rr, NvU32 flag, NVT // A proper way to calculate fixed HTotal*VTotal*Rr/10000 pT->pclk = axb_div_c(dwHTCells*dwVTotal, dwRefreshRate, 10000/NVT_GTF_CELL_GRAN); + pT->pclk1khz = pT->pclk * 10; pT->HSyncPol = NVT_H_SYNC_NEGATIVE; pT->VSyncPol = NVT_V_SYNC_POSITIVE; @@ -111,7 +112,7 @@ NVT_STATUS NvTiming_CalcGTF(NvU32 width, NvU32 height, NvU32 rr, NvU32 flag, NVT // fill in the extra timing info pT->etc.flag = 0; pT->etc.rr = (NvU16)rr; - pT->etc.rrx1k = axb_div_c((NvU32)pT->pclk, (NvU32)10000*(NvU32)1000, (NvU32)pT->HTotal*(NvU32)pT->VTotal); + pT->etc.rrx1k = axb_div_c((NvU32)pT->pclk1khz, (NvU32)1000*(NvU32)1000, (NvU32)pT->HTotal*(NvU32)pT->VTotal); pT->etc.aspect = 0; pT->etc.rep = 0x1; pT->etc.status = NVT_STATUS_GTF; @@ -128,6 +129,7 @@ NVT_STATUS NvTiming_CalcGTF(NvU32 width, NvU32 height, NvU32 rr, NvU32 flag, NVT pT->interlaced = NVT_INTERLACED_NO_EXTRA_VBLANK_ON_FIELD2; pT->pclk >>= 1; + pT->pclk1khz >>= 1; pT->VTotal >>= 1; pT->VVisible = (pT->VVisible + 1) / 2; } diff --git a/src/common/sdk/nvidia/inc/ctrl/ctrl0073/ctrl0073dfp.h b/src/common/sdk/nvidia/inc/ctrl/ctrl0073/ctrl0073dfp.h index 0d74e7ada..9b5213aa8 100644 --- a/src/common/sdk/nvidia/inc/ctrl/ctrl0073/ctrl0073dfp.h +++ b/src/common/sdk/nvidia/inc/ctrl/ctrl0073/ctrl0073dfp.h @@ -813,6 +813,9 @@ typedef struct NV0073_CTRL_DFP_DSC_CRC_CONTROL_PARAMS { * productId (in) * Specifies the product ID of panel obtained from the EDID. This * parameter is expected to be non-zero only in case of internal panel. + * tconId (out) + * RM provides an enumerated TCON specific value to help the client + * identify the panel TCON. Clients can refer to the enum from sdk/nvidia/inc/dds_tcon_db.h * * Possible status values returned are: * NV_OK @@ -830,6 +833,7 @@ typedef struct NV0073_CTRL_CMD_DFP_INIT_MUX_DATA_PARAMS { NvU32 displayId; NvU16 manfId; NvU16 productId; + NvU16 tconId; } NV0073_CTRL_CMD_DFP_INIT_MUX_DATA_PARAMS; diff --git a/src/nvidia/arch/nvalloc/unix/src/osapi.c b/src/nvidia/arch/nvalloc/unix/src/osapi.c index bff9091f8..f5db7a0e9 100644 --- a/src/nvidia/arch/nvalloc/unix/src/osapi.c +++ b/src/nvidia/arch/nvalloc/unix/src/osapi.c @@ -1330,7 +1330,7 @@ RmDmabufVerifyMemHandle( } // Check if hMemory belongs to the same pGpu - if ((pMemDesc->pGpu != pGpu) && + if ((pMemDesc->pGpu != pGpu) || (pSrcMemory->pGpu != pGpu)) { return NV_ERR_INVALID_OBJECT_PARENT; diff --git a/src/nvidia/generated/g_gpu_nvoc.c b/src/nvidia/generated/g_gpu_nvoc.c index d366543c7..1ce623e7f 100644 --- a/src/nvidia/generated/g_gpu_nvoc.c +++ b/src/nvidia/generated/g_gpu_nvoc.c @@ -2282,7 +2282,7 @@ static void __nvoc_init_funcTable_OBJGPU_2(OBJGPU *pThis) { pThis->__gpuIsDevModeEnabledInHw__ = &gpuIsDevModeEnabledInHw_3dd2c9; } else if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x60000000UL) ) || - ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000006UL) )) /* ChipHal: GB100 | GB102 | GB110 | GB112 */ + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe6UL) )) /* ChipHal: GB100 | GB102 | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C */ { pThis->__gpuIsDevModeEnabledInHw__ = &gpuIsDevModeEnabledInHw_GB100; } diff --git a/src/nvidia/generated/g_kernel_falcon_nvoc.c b/src/nvidia/generated/g_kernel_falcon_nvoc.c index e6eb9ee69..d0080ddcf 100644 --- a/src/nvidia/generated/g_kernel_falcon_nvoc.c +++ b/src/nvidia/generated/g_kernel_falcon_nvoc.c @@ -695,6 +695,214 @@ static void __nvoc_init_funcTable_KernelFalcon_1(KernelFalcon *pThis, GpuHalspec } } + // kflcnRiscvIcdWaitForIdle -- halified (2 hals) body + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xf1f0ffe0UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe6UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB10B | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C */ + { + pThis->__kflcnRiscvIcdWaitForIdle__ = &kflcnRiscvIcdWaitForIdle_TU102; + } + // default + else + { + pThis->__kflcnRiscvIcdWaitForIdle__ = &kflcnRiscvIcdWaitForIdle_46f6a7; + } + + // kflcnRiscvIcdReadMem -- halified (2 hals) body + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xf1f0ffe0UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe6UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB10B | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C */ + { + pThis->__kflcnRiscvIcdReadMem__ = &kflcnRiscvIcdReadMem_TU102; + } + // default + else + { + pThis->__kflcnRiscvIcdReadMem__ = &kflcnRiscvIcdReadMem_46f6a7; + } + + // kflcnRiscvIcdReadReg -- halified (2 hals) body + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xf1f0ffe0UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe6UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB10B | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C */ + { + pThis->__kflcnRiscvIcdReadReg__ = &kflcnRiscvIcdReadReg_TU102; + } + // default + else + { + pThis->__kflcnRiscvIcdReadReg__ = &kflcnRiscvIcdReadReg_46f6a7; + } + + // kflcnRiscvIcdRcsr -- halified (2 hals) body + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xf1f0ffe0UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe6UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB10B | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C */ + { + pThis->__kflcnRiscvIcdRcsr__ = &kflcnRiscvIcdRcsr_TU102; + } + // default + else + { + pThis->__kflcnRiscvIcdRcsr__ = &kflcnRiscvIcdRcsr_46f6a7; + } + + // kflcnRiscvIcdRstat -- halified (2 hals) body + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xf1f0ffe0UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe6UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB10B | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C */ + { + pThis->__kflcnRiscvIcdRstat__ = &kflcnRiscvIcdRstat_TU102; + } + // default + else + { + pThis->__kflcnRiscvIcdRstat__ = &kflcnRiscvIcdRstat_46f6a7; + } + + // kflcnRiscvIcdRpc -- halified (2 hals) body + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xf1f0ffe0UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe6UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB10B | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C */ + { + pThis->__kflcnRiscvIcdRpc__ = &kflcnRiscvIcdRpc_TU102; + } + // default + else + { + pThis->__kflcnRiscvIcdRpc__ = &kflcnRiscvIcdRpc_46f6a7; + } + + // kflcnRiscvIcdHalt -- halified (2 hals) body + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xf1f0ffe0UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe6UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB10B | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C */ + { + pThis->__kflcnRiscvIcdHalt__ = &kflcnRiscvIcdHalt_TU102; + } + // default + else + { + pThis->__kflcnRiscvIcdHalt__ = &kflcnRiscvIcdHalt_46f6a7; + } + + // kflcnIcdReadCmdReg -- halified (3 hals) body + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000007e0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 */ + { + pThis->__kflcnIcdReadCmdReg__ = &kflcnIcdReadCmdReg_TU102; + } + else if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xf1f0f800UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe6UL) )) /* ChipHal: GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB10B | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C */ + { + pThis->__kflcnIcdReadCmdReg__ = &kflcnIcdReadCmdReg_GA102; + } + // default + else + { + pThis->__kflcnIcdReadCmdReg__ = &kflcnIcdReadCmdReg_4a4dee; + } + + // kflcnRiscvIcdReadRdata -- halified (3 hals) body + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000007e0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 */ + { + pThis->__kflcnRiscvIcdReadRdata__ = &kflcnRiscvIcdReadRdata_TU102; + } + else if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xf1f0f800UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe6UL) )) /* ChipHal: GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB10B | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C */ + { + pThis->__kflcnRiscvIcdReadRdata__ = &kflcnRiscvIcdReadRdata_GA102; + } + // default + else + { + pThis->__kflcnRiscvIcdReadRdata__ = &kflcnRiscvIcdReadRdata_4a4dee; + } + + // kflcnRiscvIcdWriteAddress -- halified (3 hals) body + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000007e0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 */ + { + pThis->__kflcnRiscvIcdWriteAddress__ = &kflcnRiscvIcdWriteAddress_TU102; + } + else if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xf1f0f800UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe6UL) )) /* ChipHal: GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB10B | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C */ + { + pThis->__kflcnRiscvIcdWriteAddress__ = &kflcnRiscvIcdWriteAddress_GA102; + } + // default + else + { + pThis->__kflcnRiscvIcdWriteAddress__ = &kflcnRiscvIcdWriteAddress_b3696a; + } + + // kflcnIcdWriteCmdReg -- halified (3 hals) body + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000007e0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 */ + { + pThis->__kflcnIcdWriteCmdReg__ = &kflcnIcdWriteCmdReg_TU102; + } + else if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xf1f0f800UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe6UL) )) /* ChipHal: GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB10B | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C */ + { + pThis->__kflcnIcdWriteCmdReg__ = &kflcnIcdWriteCmdReg_GA102; + } + // default + else + { + pThis->__kflcnIcdWriteCmdReg__ = &kflcnIcdWriteCmdReg_b3696a; + } + + // kflcnCoreDumpPc -- halified (2 hals) body + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xf1f0f800UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe6UL) )) /* ChipHal: GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB10B | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C */ + { + pThis->__kflcnCoreDumpPc__ = &kflcnCoreDumpPc_GA102; + } + // default + else + { + pThis->__kflcnCoreDumpPc__ = &kflcnCoreDumpPc_46f6a7; + } + + // kflcnDumpCoreRegs -- halified (4 hals) body + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000007e0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 */ + { + pThis->__kflcnDumpCoreRegs__ = &kflcnDumpCoreRegs_TU102; + } + else if (( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe0UL) )) /* ChipHal: GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C */ + { + pThis->__kflcnDumpCoreRegs__ = &kflcnDumpCoreRegs_GB202; + } + else if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xf1f0f800UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000006UL) )) /* ChipHal: GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB10B | GB110 | GB112 */ + { + pThis->__kflcnDumpCoreRegs__ = &kflcnDumpCoreRegs_GA102; + } + // default + else + { + pThis->__kflcnDumpCoreRegs__ = &kflcnDumpCoreRegs_b3696a; + } + + // kflcnDumpTracepc -- halified (3 hals) body + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000007e0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 */ + { + pThis->__kflcnDumpTracepc__ = &kflcnDumpTracepc_TU102; + } + else if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xf1f0f800UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe6UL) )) /* ChipHal: GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB10B | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C */ + { + pThis->__kflcnDumpTracepc__ = &kflcnDumpTracepc_GA102; + } + // default + else + { + pThis->__kflcnDumpTracepc__ = &kflcnDumpTracepc_b3696a; + } + + // kflcnDumpPeripheralRegs -- halified (2 hals) body + if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0xf1f0ffe0UL) ) || + ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe6UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 | GB100 | GB102 | GB10B | GB110 | GB112 | GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C */ + { + pThis->__kflcnDumpPeripheralRegs__ = &kflcnDumpPeripheralRegs_TU102; + } + // default + else + { + pThis->__kflcnDumpPeripheralRegs__ = &kflcnDumpPeripheralRegs_b3696a; + } + // kflcnGetEccInterruptMask -- halified (2 hals) body if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x60000000UL) ) || ( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000006UL) )) /* ChipHal: GB100 | GB102 | GB110 | GB112 */ @@ -739,13 +947,13 @@ static void __nvoc_init_funcTable_KernelFalcon_1(KernelFalcon *pThis, GpuHalspec // kflcnGetWFL0Offset -- virtual halified (2 hals) inherited (kcrashcatEngine) base (kcrashcatEngine) pThis->__kflcnGetWFL0Offset__ = &__nvoc_up_thunk_KernelCrashCatEngine_kflcnGetWFL0Offset; -} // End __nvoc_init_funcTable_KernelFalcon_1 with approximately 88 basic block(s). +} // End __nvoc_init_funcTable_KernelFalcon_1 with approximately 125 basic block(s). -// Initialize vtable(s) for 38 virtual method(s). +// Initialize vtable(s) for 53 virtual method(s). void __nvoc_init_funcTable_KernelFalcon(KernelFalcon *pThis, GpuHalspecOwner *pGpuhalspecowner, RmHalspecOwner *pRmhalspecowner) { - // Initialize vtable(s) with 28 per-object function pointer(s). + // Initialize vtable(s) with 43 per-object function pointer(s). __nvoc_init_funcTable_KernelFalcon_1(pThis, pGpuhalspecowner, pRmhalspecowner); } diff --git a/src/nvidia/generated/g_kernel_falcon_nvoc.h b/src/nvidia/generated/g_kernel_falcon_nvoc.h index 95bf9a759..1d97f60ef 100644 --- a/src/nvidia/generated/g_kernel_falcon_nvoc.h +++ b/src/nvidia/generated/g_kernel_falcon_nvoc.h @@ -51,6 +51,7 @@ extern "C" { #include "core/core.h" #include "gpu/falcon/falcon_common.h" +#include "gpu/falcon/kernel_falcon_core_dump.h" #include "gpu/falcon/kernel_crashcat_engine.h" #include "gpu/intr/intr_service.h" @@ -119,7 +120,7 @@ struct KernelFalcon { struct KernelCrashCatEngine *__nvoc_pbase_KernelCrashCatEngine; // kcrashcatEngine super struct KernelFalcon *__nvoc_pbase_KernelFalcon; // kflcn - // Vtable with 28 per-object function pointers + // Vtable with 43 per-object function pointers NvU32 (*__kflcnRegRead__)(struct OBJGPU *, struct KernelFalcon * /*this*/, NvU32); // virtual halified (3 hals) override (kcrashcatEngine) base (kcrashcatEngine) body void (*__kflcnRegWrite__)(struct OBJGPU *, struct KernelFalcon * /*this*/, NvU32, NvU32); // virtual halified (3 hals) override (kcrashcatEngine) base (kcrashcatEngine) body NvU32 (*__kflcnRiscvRegRead__)(struct OBJGPU *, struct KernelFalcon * /*this*/, NvU32); // halified (3 hals) body @@ -142,6 +143,21 @@ struct KernelFalcon { void (*__kflcnIntrRetrigger__)(struct OBJGPU *, struct KernelFalcon * /*this*/); // halified (3 hals) body NvU32 (*__kflcnMaskImemAddr__)(struct OBJGPU *, struct KernelFalcon * /*this*/, NvU32); // halified (4 hals) body NvU32 (*__kflcnMaskDmemAddr__)(struct OBJGPU *, struct KernelFalcon * /*this*/, NvU32); // virtual halified (4 hals) override (kcrashcatEngine) base (kcrashcatEngine) body + NV_STATUS (*__kflcnRiscvIcdWaitForIdle__)(struct OBJGPU *, struct KernelFalcon * /*this*/); // halified (2 hals) body + NV_STATUS (*__kflcnRiscvIcdReadMem__)(struct OBJGPU *, struct KernelFalcon * /*this*/, NvU64, NvU64, NvU64 *); // halified (2 hals) body + NV_STATUS (*__kflcnRiscvIcdReadReg__)(struct OBJGPU *, struct KernelFalcon * /*this*/, NvU32, NvU64 *); // halified (2 hals) body + NV_STATUS (*__kflcnRiscvIcdRcsr__)(struct OBJGPU *, struct KernelFalcon * /*this*/, NvU32, NvU64 *); // halified (2 hals) body + NV_STATUS (*__kflcnRiscvIcdRstat__)(struct OBJGPU *, struct KernelFalcon * /*this*/, NvU32, NvU64 *); // halified (2 hals) body + NV_STATUS (*__kflcnRiscvIcdRpc__)(struct OBJGPU *, struct KernelFalcon * /*this*/, NvU64 *); // halified (2 hals) body + NV_STATUS (*__kflcnRiscvIcdHalt__)(struct OBJGPU *, struct KernelFalcon * /*this*/); // halified (2 hals) body + NvU32 (*__kflcnIcdReadCmdReg__)(struct OBJGPU *, struct KernelFalcon * /*this*/); // halified (3 hals) body + NvU64 (*__kflcnRiscvIcdReadRdata__)(struct OBJGPU *, struct KernelFalcon * /*this*/); // halified (3 hals) body + void (*__kflcnRiscvIcdWriteAddress__)(struct OBJGPU *, struct KernelFalcon * /*this*/, NvU64); // halified (3 hals) body + void (*__kflcnIcdWriteCmdReg__)(struct OBJGPU *, struct KernelFalcon * /*this*/, NvU32); // halified (3 hals) body + NV_STATUS (*__kflcnCoreDumpPc__)(struct OBJGPU *, struct KernelFalcon * /*this*/, NvU64 *); // halified (2 hals) body + void (*__kflcnDumpCoreRegs__)(struct OBJGPU *, struct KernelFalcon * /*this*/, CoreDumpRegs *); // halified (4 hals) body + void (*__kflcnDumpTracepc__)(struct OBJGPU *, struct KernelFalcon * /*this*/, CoreDumpRegs *); // halified (3 hals) body + void (*__kflcnDumpPeripheralRegs__)(struct OBJGPU *, struct KernelFalcon * /*this*/, CoreDumpRegs *); // halified (2 hals) body NvU32 (*__kflcnGetEccInterruptMask__)(struct OBJGPU *, struct KernelFalcon * /*this*/); // halified (2 hals) body NV_STATUS (*__kflcnGetFatalHwErrorStatus__)(struct OBJGPU *, struct KernelFalcon * /*this*/, NvU32 *); // halified (2 hals) body const char * (*__kflcnFatalHwErrorCodeToString__)(struct OBJGPU *, struct KernelFalcon * /*this*/, NvU32, NvBool); // halified (2 hals) @@ -224,6 +240,26 @@ static inline void kflcnConfigureEngine(struct OBJGPU *pGpu, struct KernelFalcon #define kflcnConfigureEngine(pGpu, pKernelFalcon, pFalconConfig) kflcnConfigureEngine_IMPL(pGpu, pKernelFalcon, pFalconConfig) #endif // __nvoc_kernel_falcon_h_disabled +NV_STATUS kflcnCoreDumpNondestructive_IMPL(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU32 verbosity); +#ifdef __nvoc_kernel_falcon_h_disabled +static inline NV_STATUS kflcnCoreDumpNondestructive(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU32 verbosity) { + NV_ASSERT_FAILED_PRECOMP("KernelFalcon was disabled!"); + return NV_ERR_NOT_SUPPORTED; +} +#else // __nvoc_kernel_falcon_h_disabled +#define kflcnCoreDumpNondestructive(pGpu, pKernelFlcn, verbosity) kflcnCoreDumpNondestructive_IMPL(pGpu, pKernelFlcn, verbosity) +#endif // __nvoc_kernel_falcon_h_disabled + +NV_STATUS kflcnCoreDumpDestructive_IMPL(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn); +#ifdef __nvoc_kernel_falcon_h_disabled +static inline NV_STATUS kflcnCoreDumpDestructive(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn) { + NV_ASSERT_FAILED_PRECOMP("KernelFalcon was disabled!"); + return NV_ERR_NOT_SUPPORTED; +} +#else // __nvoc_kernel_falcon_h_disabled +#define kflcnCoreDumpDestructive(pGpu, pKernelFlcn) kflcnCoreDumpDestructive_IMPL(pGpu, pKernelFlcn) +#endif // __nvoc_kernel_falcon_h_disabled + NvU32 kflcnGetPendingHostInterrupts_IMPL(struct OBJGPU *arg1, struct KernelFalcon *arg_this); #ifdef __nvoc_kernel_falcon_h_disabled static inline NvU32 kflcnGetPendingHostInterrupts(struct OBJGPU *arg1, struct KernelFalcon *arg_this) { @@ -327,6 +363,51 @@ struct KernelFalcon * kflcnGetKernelFalconForEngine_IMPL(struct OBJGPU *pGpu, EN #define kflcnMaskDmemAddr_FNPTR(pKernelFlcn) pKernelFlcn->__kflcnMaskDmemAddr__ #define kflcnMaskDmemAddr(pGpu, pKernelFlcn, addr) kflcnMaskDmemAddr_DISPATCH(pGpu, pKernelFlcn, addr) #define kflcnMaskDmemAddr_HAL(pGpu, pKernelFlcn, addr) kflcnMaskDmemAddr_DISPATCH(pGpu, pKernelFlcn, addr) +#define kflcnRiscvIcdWaitForIdle_FNPTR(pKernelFlcn) pKernelFlcn->__kflcnRiscvIcdWaitForIdle__ +#define kflcnRiscvIcdWaitForIdle(pGpu, pKernelFlcn) kflcnRiscvIcdWaitForIdle_DISPATCH(pGpu, pKernelFlcn) +#define kflcnRiscvIcdWaitForIdle_HAL(pGpu, pKernelFlcn) kflcnRiscvIcdWaitForIdle_DISPATCH(pGpu, pKernelFlcn) +#define kflcnRiscvIcdReadMem_FNPTR(pKernelFlcn) pKernelFlcn->__kflcnRiscvIcdReadMem__ +#define kflcnRiscvIcdReadMem(pGpu, pKernelFlcn, address, size, pValue) kflcnRiscvIcdReadMem_DISPATCH(pGpu, pKernelFlcn, address, size, pValue) +#define kflcnRiscvIcdReadMem_HAL(pGpu, pKernelFlcn, address, size, pValue) kflcnRiscvIcdReadMem_DISPATCH(pGpu, pKernelFlcn, address, size, pValue) +#define kflcnRiscvIcdReadReg_FNPTR(pKernelFlcn) pKernelFlcn->__kflcnRiscvIcdReadReg__ +#define kflcnRiscvIcdReadReg(pGpu, pKernelFlcn, reg, pValue) kflcnRiscvIcdReadReg_DISPATCH(pGpu, pKernelFlcn, reg, pValue) +#define kflcnRiscvIcdReadReg_HAL(pGpu, pKernelFlcn, reg, pValue) kflcnRiscvIcdReadReg_DISPATCH(pGpu, pKernelFlcn, reg, pValue) +#define kflcnRiscvIcdRcsr_FNPTR(pKernelFlcn) pKernelFlcn->__kflcnRiscvIcdRcsr__ +#define kflcnRiscvIcdRcsr(pGpu, pKernelFlcn, csr, pValue) kflcnRiscvIcdRcsr_DISPATCH(pGpu, pKernelFlcn, csr, pValue) +#define kflcnRiscvIcdRcsr_HAL(pGpu, pKernelFlcn, csr, pValue) kflcnRiscvIcdRcsr_DISPATCH(pGpu, pKernelFlcn, csr, pValue) +#define kflcnRiscvIcdRstat_FNPTR(pKernelFlcn) pKernelFlcn->__kflcnRiscvIcdRstat__ +#define kflcnRiscvIcdRstat(pGpu, pKernelFlcn, index, pValue) kflcnRiscvIcdRstat_DISPATCH(pGpu, pKernelFlcn, index, pValue) +#define kflcnRiscvIcdRstat_HAL(pGpu, pKernelFlcn, index, pValue) kflcnRiscvIcdRstat_DISPATCH(pGpu, pKernelFlcn, index, pValue) +#define kflcnRiscvIcdRpc_FNPTR(pKernelFlcn) pKernelFlcn->__kflcnRiscvIcdRpc__ +#define kflcnRiscvIcdRpc(pGpu, pKernelFlcn, pValue) kflcnRiscvIcdRpc_DISPATCH(pGpu, pKernelFlcn, pValue) +#define kflcnRiscvIcdRpc_HAL(pGpu, pKernelFlcn, pValue) kflcnRiscvIcdRpc_DISPATCH(pGpu, pKernelFlcn, pValue) +#define kflcnRiscvIcdHalt_FNPTR(pKernelFlcn) pKernelFlcn->__kflcnRiscvIcdHalt__ +#define kflcnRiscvIcdHalt(pGpu, pKernelFlcn) kflcnRiscvIcdHalt_DISPATCH(pGpu, pKernelFlcn) +#define kflcnRiscvIcdHalt_HAL(pGpu, pKernelFlcn) kflcnRiscvIcdHalt_DISPATCH(pGpu, pKernelFlcn) +#define kflcnIcdReadCmdReg_FNPTR(pKernelFlcn) pKernelFlcn->__kflcnIcdReadCmdReg__ +#define kflcnIcdReadCmdReg(pGpu, pKernelFlcn) kflcnIcdReadCmdReg_DISPATCH(pGpu, pKernelFlcn) +#define kflcnIcdReadCmdReg_HAL(pGpu, pKernelFlcn) kflcnIcdReadCmdReg_DISPATCH(pGpu, pKernelFlcn) +#define kflcnRiscvIcdReadRdata_FNPTR(pKernelFlcn) pKernelFlcn->__kflcnRiscvIcdReadRdata__ +#define kflcnRiscvIcdReadRdata(pGpu, pKernelFlcn) kflcnRiscvIcdReadRdata_DISPATCH(pGpu, pKernelFlcn) +#define kflcnRiscvIcdReadRdata_HAL(pGpu, pKernelFlcn) kflcnRiscvIcdReadRdata_DISPATCH(pGpu, pKernelFlcn) +#define kflcnRiscvIcdWriteAddress_FNPTR(pKernelFlcn) pKernelFlcn->__kflcnRiscvIcdWriteAddress__ +#define kflcnRiscvIcdWriteAddress(pGpu, pKernelFlcn, address) kflcnRiscvIcdWriteAddress_DISPATCH(pGpu, pKernelFlcn, address) +#define kflcnRiscvIcdWriteAddress_HAL(pGpu, pKernelFlcn, address) kflcnRiscvIcdWriteAddress_DISPATCH(pGpu, pKernelFlcn, address) +#define kflcnIcdWriteCmdReg_FNPTR(pKernelFlcn) pKernelFlcn->__kflcnIcdWriteCmdReg__ +#define kflcnIcdWriteCmdReg(pGpu, pKernelFlcn, value) kflcnIcdWriteCmdReg_DISPATCH(pGpu, pKernelFlcn, value) +#define kflcnIcdWriteCmdReg_HAL(pGpu, pKernelFlcn, value) kflcnIcdWriteCmdReg_DISPATCH(pGpu, pKernelFlcn, value) +#define kflcnCoreDumpPc_FNPTR(pKernelFlcn) pKernelFlcn->__kflcnCoreDumpPc__ +#define kflcnCoreDumpPc(pGpu, pKernelFlcn, pc) kflcnCoreDumpPc_DISPATCH(pGpu, pKernelFlcn, pc) +#define kflcnCoreDumpPc_HAL(pGpu, pKernelFlcn, pc) kflcnCoreDumpPc_DISPATCH(pGpu, pKernelFlcn, pc) +#define kflcnDumpCoreRegs_FNPTR(pKernelFlcn) pKernelFlcn->__kflcnDumpCoreRegs__ +#define kflcnDumpCoreRegs(pGpu, pKernelFlcn, pCore) kflcnDumpCoreRegs_DISPATCH(pGpu, pKernelFlcn, pCore) +#define kflcnDumpCoreRegs_HAL(pGpu, pKernelFlcn, pCore) kflcnDumpCoreRegs_DISPATCH(pGpu, pKernelFlcn, pCore) +#define kflcnDumpTracepc_FNPTR(pKernelFlcn) pKernelFlcn->__kflcnDumpTracepc__ +#define kflcnDumpTracepc(pGpu, pKernelFlcn, pCode) kflcnDumpTracepc_DISPATCH(pGpu, pKernelFlcn, pCode) +#define kflcnDumpTracepc_HAL(pGpu, pKernelFlcn, pCode) kflcnDumpTracepc_DISPATCH(pGpu, pKernelFlcn, pCode) +#define kflcnDumpPeripheralRegs_FNPTR(pKernelFlcn) pKernelFlcn->__kflcnDumpPeripheralRegs__ +#define kflcnDumpPeripheralRegs(pGpu, pKernelFlcn, pCore) kflcnDumpPeripheralRegs_DISPATCH(pGpu, pKernelFlcn, pCore) +#define kflcnDumpPeripheralRegs_HAL(pGpu, pKernelFlcn, pCore) kflcnDumpPeripheralRegs_DISPATCH(pGpu, pKernelFlcn, pCore) #define kflcnGetEccInterruptMask_FNPTR(pKernelFlcn) pKernelFlcn->__kflcnGetEccInterruptMask__ #define kflcnGetEccInterruptMask(pGpu, pKernelFlcn) kflcnGetEccInterruptMask_DISPATCH(pGpu, pKernelFlcn) #define kflcnGetEccInterruptMask_HAL(pGpu, pKernelFlcn) kflcnGetEccInterruptMask_DISPATCH(pGpu, pKernelFlcn) @@ -458,6 +539,66 @@ static inline NvU32 kflcnMaskDmemAddr_DISPATCH(struct OBJGPU *pGpu, struct Kerne return pKernelFlcn->__kflcnMaskDmemAddr__(pGpu, pKernelFlcn, addr); } +static inline NV_STATUS kflcnRiscvIcdWaitForIdle_DISPATCH(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn) { + return pKernelFlcn->__kflcnRiscvIcdWaitForIdle__(pGpu, pKernelFlcn); +} + +static inline NV_STATUS kflcnRiscvIcdReadMem_DISPATCH(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU64 address, NvU64 size, NvU64 *pValue) { + return pKernelFlcn->__kflcnRiscvIcdReadMem__(pGpu, pKernelFlcn, address, size, pValue); +} + +static inline NV_STATUS kflcnRiscvIcdReadReg_DISPATCH(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU32 reg, NvU64 *pValue) { + return pKernelFlcn->__kflcnRiscvIcdReadReg__(pGpu, pKernelFlcn, reg, pValue); +} + +static inline NV_STATUS kflcnRiscvIcdRcsr_DISPATCH(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU32 csr, NvU64 *pValue) { + return pKernelFlcn->__kflcnRiscvIcdRcsr__(pGpu, pKernelFlcn, csr, pValue); +} + +static inline NV_STATUS kflcnRiscvIcdRstat_DISPATCH(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU32 index, NvU64 *pValue) { + return pKernelFlcn->__kflcnRiscvIcdRstat__(pGpu, pKernelFlcn, index, pValue); +} + +static inline NV_STATUS kflcnRiscvIcdRpc_DISPATCH(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU64 *pValue) { + return pKernelFlcn->__kflcnRiscvIcdRpc__(pGpu, pKernelFlcn, pValue); +} + +static inline NV_STATUS kflcnRiscvIcdHalt_DISPATCH(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn) { + return pKernelFlcn->__kflcnRiscvIcdHalt__(pGpu, pKernelFlcn); +} + +static inline NvU32 kflcnIcdReadCmdReg_DISPATCH(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn) { + return pKernelFlcn->__kflcnIcdReadCmdReg__(pGpu, pKernelFlcn); +} + +static inline NvU64 kflcnRiscvIcdReadRdata_DISPATCH(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn) { + return pKernelFlcn->__kflcnRiscvIcdReadRdata__(pGpu, pKernelFlcn); +} + +static inline void kflcnRiscvIcdWriteAddress_DISPATCH(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU64 address) { + pKernelFlcn->__kflcnRiscvIcdWriteAddress__(pGpu, pKernelFlcn, address); +} + +static inline void kflcnIcdWriteCmdReg_DISPATCH(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU32 value) { + pKernelFlcn->__kflcnIcdWriteCmdReg__(pGpu, pKernelFlcn, value); +} + +static inline NV_STATUS kflcnCoreDumpPc_DISPATCH(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU64 *pc) { + return pKernelFlcn->__kflcnCoreDumpPc__(pGpu, pKernelFlcn, pc); +} + +static inline void kflcnDumpCoreRegs_DISPATCH(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, CoreDumpRegs *pCore) { + pKernelFlcn->__kflcnDumpCoreRegs__(pGpu, pKernelFlcn, pCore); +} + +static inline void kflcnDumpTracepc_DISPATCH(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, CoreDumpRegs *pCode) { + pKernelFlcn->__kflcnDumpTracepc__(pGpu, pKernelFlcn, pCode); +} + +static inline void kflcnDumpPeripheralRegs_DISPATCH(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, CoreDumpRegs *pCore) { + pKernelFlcn->__kflcnDumpPeripheralRegs__(pGpu, pKernelFlcn, pCore); +} + static inline NvU32 kflcnGetEccInterruptMask_DISPATCH(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn) { return pKernelFlcn->__kflcnGetEccInterruptMask__(pGpu, pKernelFlcn); } @@ -686,6 +827,110 @@ static inline NvU32 kflcnMaskDmemAddr_474d46(struct OBJGPU *pGpu, struct KernelF NV_ASSERT_OR_RETURN_PRECOMP(0, 0); } +NV_STATUS kflcnRiscvIcdWaitForIdle_TU102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn); + +static inline NV_STATUS kflcnRiscvIcdWaitForIdle_46f6a7(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn) { + return NV_ERR_NOT_SUPPORTED; +} + +NV_STATUS kflcnRiscvIcdReadMem_TU102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU64 address, NvU64 size, NvU64 *pValue); + +static inline NV_STATUS kflcnRiscvIcdReadMem_46f6a7(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU64 address, NvU64 size, NvU64 *pValue) { + return NV_ERR_NOT_SUPPORTED; +} + +NV_STATUS kflcnRiscvIcdReadReg_TU102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU32 reg, NvU64 *pValue); + +static inline NV_STATUS kflcnRiscvIcdReadReg_46f6a7(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU32 reg, NvU64 *pValue) { + return NV_ERR_NOT_SUPPORTED; +} + +NV_STATUS kflcnRiscvIcdRcsr_TU102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU32 csr, NvU64 *pValue); + +static inline NV_STATUS kflcnRiscvIcdRcsr_46f6a7(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU32 csr, NvU64 *pValue) { + return NV_ERR_NOT_SUPPORTED; +} + +NV_STATUS kflcnRiscvIcdRstat_TU102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU32 index, NvU64 *pValue); + +static inline NV_STATUS kflcnRiscvIcdRstat_46f6a7(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU32 index, NvU64 *pValue) { + return NV_ERR_NOT_SUPPORTED; +} + +NV_STATUS kflcnRiscvIcdRpc_TU102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU64 *pValue); + +static inline NV_STATUS kflcnRiscvIcdRpc_46f6a7(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU64 *pValue) { + return NV_ERR_NOT_SUPPORTED; +} + +NV_STATUS kflcnRiscvIcdHalt_TU102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn); + +static inline NV_STATUS kflcnRiscvIcdHalt_46f6a7(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn) { + return NV_ERR_NOT_SUPPORTED; +} + +NvU32 kflcnIcdReadCmdReg_TU102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn); + +NvU32 kflcnIcdReadCmdReg_GA102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn); + +static inline NvU32 kflcnIcdReadCmdReg_4a4dee(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn) { + return 0; +} + +NvU64 kflcnRiscvIcdReadRdata_TU102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn); + +NvU64 kflcnRiscvIcdReadRdata_GA102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn); + +static inline NvU64 kflcnRiscvIcdReadRdata_4a4dee(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn) { + return 0; +} + +void kflcnRiscvIcdWriteAddress_TU102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU64 address); + +void kflcnRiscvIcdWriteAddress_GA102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU64 address); + +static inline void kflcnRiscvIcdWriteAddress_b3696a(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU64 address) { + return; +} + +void kflcnIcdWriteCmdReg_TU102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU32 value); + +void kflcnIcdWriteCmdReg_GA102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU32 value); + +static inline void kflcnIcdWriteCmdReg_b3696a(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU32 value) { + return; +} + +NV_STATUS kflcnCoreDumpPc_GA102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU64 *pc); + +static inline NV_STATUS kflcnCoreDumpPc_46f6a7(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, NvU64 *pc) { + return NV_ERR_NOT_SUPPORTED; +} + +void kflcnDumpCoreRegs_TU102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, CoreDumpRegs *pCore); + +void kflcnDumpCoreRegs_GA102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, CoreDumpRegs *pCore); + +void kflcnDumpCoreRegs_GB202(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, CoreDumpRegs *pCore); + +static inline void kflcnDumpCoreRegs_b3696a(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, CoreDumpRegs *pCore) { + return; +} + +void kflcnDumpTracepc_TU102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, CoreDumpRegs *pCode); + +void kflcnDumpTracepc_GA102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, CoreDumpRegs *pCode); + +static inline void kflcnDumpTracepc_b3696a(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, CoreDumpRegs *pCode) { + return; +} + +void kflcnDumpPeripheralRegs_TU102(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, CoreDumpRegs *pCore); + +static inline void kflcnDumpPeripheralRegs_b3696a(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn, CoreDumpRegs *pCore) { + return; +} + NvU32 kflcnGetEccInterruptMask_GB100(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn); static inline NvU32 kflcnGetEccInterruptMask_4a4dee(struct OBJGPU *pGpu, struct KernelFalcon *pKernelFlcn) { diff --git a/src/nvidia/generated/g_kernel_graphics_nvoc.c b/src/nvidia/generated/g_kernel_graphics_nvoc.c index 1205e164c..2d7a3f422 100644 --- a/src/nvidia/generated/g_kernel_graphics_nvoc.c +++ b/src/nvidia/generated/g_kernel_graphics_nvoc.c @@ -278,7 +278,7 @@ void __nvoc_init_dataField_KernelGraphics(KernelGraphics *pThis, GpuHalspecOwner pThis->bOverrideContextBuffersToGpuCached = NV_FALSE; // Hal field -- bPeFiroBufferEnabled - if (( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x00000fe0UL) )) /* ChipHal: GB202 | GB203 | GB205 | GB206 | GB207 | GB20B | GB20C */ + if (( ((chipHal_HalVarIdx >> 5) == 2UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000003e0UL) )) /* ChipHal: GB202 | GB203 | GB205 | GB206 | GB207 */ { pThis->bPeFiroBufferEnabled = NV_TRUE; } diff --git a/src/nvidia/generated/g_kernel_gsp_nvoc.c b/src/nvidia/generated/g_kernel_gsp_nvoc.c index 938f8bafa..e1fb150b7 100644 --- a/src/nvidia/generated/g_kernel_gsp_nvoc.c +++ b/src/nvidia/generated/g_kernel_gsp_nvoc.c @@ -1117,6 +1117,23 @@ static void __nvoc_init_funcTable_KernelGsp_1(KernelGsp *pThis, GpuHalspecOwner } } + // kgspDumpMailbox -- halified (3 hals) body + if (( ((rmVariantHal_HalVarIdx >> 5) == 0UL) && ((1UL << (rmVariantHal_HalVarIdx & 0x1f)) & 0x00000001UL) )) /* RmVariantHal: VF */ + { + pThis->__kgspDumpMailbox__ = &kgspDumpMailbox_f2d351; + } + else + { + if (( ((chipHal_HalVarIdx >> 5) == 3UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x0000a000UL) )) /* ChipHal: T234D | T264D */ + { + pThis->__kgspDumpMailbox__ = &kgspDumpMailbox_f2d351; + } + else + { + pThis->__kgspDumpMailbox__ = &kgspDumpMailbox_TU102; + } + } + // kgspService -- halified (3 hals) body if (( ((rmVariantHal_HalVarIdx >> 5) == 0UL) && ((1UL << (rmVariantHal_HalVarIdx & 0x1f)) & 0x00000001UL) )) /* RmVariantHal: VF */ { @@ -1893,13 +1910,13 @@ static void __nvoc_init_funcTable_KernelGsp_1(KernelGsp *pThis, GpuHalspecOwner // kgspGetWFL0Offset -- virtual halified (2 hals) inherited (kcrashcatEngine) base (kflcn) pThis->__kgspGetWFL0Offset__ = &__nvoc_up_thunk_KernelCrashCatEngine_kgspGetWFL0Offset; -} // End __nvoc_init_funcTable_KernelGsp_1 with approximately 259 basic block(s). +} // End __nvoc_init_funcTable_KernelGsp_1 with approximately 262 basic block(s). -// Initialize vtable(s) for 92 virtual method(s). +// Initialize vtable(s) for 93 virtual method(s). void __nvoc_init_funcTable_KernelGsp(KernelGsp *pThis, GpuHalspecOwner *pGpuhalspecowner, RmHalspecOwner *pRmhalspecowner) { - // Initialize vtable(s) with 66 per-object function pointer(s). + // Initialize vtable(s) with 67 per-object function pointer(s). __nvoc_init_funcTable_KernelGsp_1(pThis, pGpuhalspecowner, pRmhalspecowner); } diff --git a/src/nvidia/generated/g_kernel_gsp_nvoc.h b/src/nvidia/generated/g_kernel_gsp_nvoc.h index 579f5eef7..9eaa6a35f 100644 --- a/src/nvidia/generated/g_kernel_gsp_nvoc.h +++ b/src/nvidia/generated/g_kernel_gsp_nvoc.h @@ -420,7 +420,7 @@ struct KernelGsp { struct KernelFalcon *__nvoc_pbase_KernelFalcon; // kflcn super struct KernelGsp *__nvoc_pbase_KernelGsp; // kgsp - // Vtable with 66 per-object function pointers + // Vtable with 67 per-object function pointers void (*__kgspConfigureFalcon__)(struct OBJGPU *, struct KernelGsp * /*this*/); // halified (4 hals) body NvBool (*__kgspIsDebugModeEnabled__)(struct OBJGPU *, struct KernelGsp * /*this*/); // halified (5 hals) body NV_STATUS (*__kgspAllocBootArgs__)(struct OBJGPU *, struct KernelGsp * /*this*/); // halified (4 hals) body @@ -443,6 +443,7 @@ struct KernelGsp { NvU32 (*__kgspReadUcodeFuseVersion__)(struct OBJGPU *, struct KernelGsp * /*this*/, NvU32); // halified (5 hals) body NV_STATUS (*__kgspResetHw__)(struct OBJGPU *, struct KernelGsp * /*this*/); // virtual halified (5 hals) override (kflcn) base (kflcn) body NvBool (*__kgspHealthCheck__)(struct OBJGPU *, struct KernelGsp * /*this*/); // halified (3 hals) body + void (*__kgspDumpMailbox__)(struct OBJGPU *, struct KernelGsp * /*this*/); // halified (3 hals) body NvU32 (*__kgspService__)(struct OBJGPU *, struct KernelGsp * /*this*/); // halified (3 hals) body void (*__kgspServiceFatalHwError__)(struct OBJGPU *, struct KernelGsp * /*this*/, NvU32); // halified (3 hals) body void (*__kgspEccServiceEvent__)(struct OBJGPU *, struct KernelGsp * /*this*/); // halified (3 hals) body @@ -930,6 +931,9 @@ static inline void kgspPrintGspBinBuildId(struct OBJGPU *pGpu, struct KernelGsp #define kgspHealthCheck_FNPTR(pKernelGsp) pKernelGsp->__kgspHealthCheck__ #define kgspHealthCheck(pGpu, pKernelGsp) kgspHealthCheck_DISPATCH(pGpu, pKernelGsp) #define kgspHealthCheck_HAL(pGpu, pKernelGsp) kgspHealthCheck_DISPATCH(pGpu, pKernelGsp) +#define kgspDumpMailbox_FNPTR(pKernelGsp) pKernelGsp->__kgspDumpMailbox__ +#define kgspDumpMailbox(pGpu, pKernelGsp) kgspDumpMailbox_DISPATCH(pGpu, pKernelGsp) +#define kgspDumpMailbox_HAL(pGpu, pKernelGsp) kgspDumpMailbox_DISPATCH(pGpu, pKernelGsp) #define kgspService_FNPTR(pKernelGsp) pKernelGsp->__kgspService__ #define kgspService(pGpu, pKernelGsp) kgspService_DISPATCH(pGpu, pKernelGsp) #define kgspService_HAL(pGpu, pKernelGsp) kgspService_DISPATCH(pGpu, pKernelGsp) @@ -1214,6 +1218,10 @@ static inline NvBool kgspHealthCheck_DISPATCH(struct OBJGPU *pGpu, struct Kernel return pKernelGsp->__kgspHealthCheck__(pGpu, pKernelGsp); } +static inline void kgspDumpMailbox_DISPATCH(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp) { + pKernelGsp->__kgspDumpMailbox__(pGpu, pKernelGsp); +} + static inline NvU32 kgspService_DISPATCH(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp) { return pKernelGsp->__kgspService__(pGpu, pKernelGsp); } @@ -1710,6 +1718,12 @@ static inline NvBool kgspHealthCheck_86b752(struct OBJGPU *pGpu, struct KernelGs NV_ASSERT_OR_RETURN_PRECOMP(0, NV_FALSE); } +void kgspDumpMailbox_TU102(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp); + +static inline void kgspDumpMailbox_f2d351(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp) { + NV_ASSERT_PRECOMP(0); +} + NvU32 kgspService_TU102(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp); static inline NvU32 kgspService_474d46(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp) { diff --git a/src/nvidia/generated/g_nv_name_released.h b/src/nvidia/generated/g_nv_name_released.h index 9a4d674f5..9912a95cf 100644 --- a/src/nvidia/generated/g_nv_name_released.h +++ b/src/nvidia/generated/g_nv_name_released.h @@ -149,7 +149,6 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x1E93, 0x1089, 0x1d05, "NVIDIA GeForce RTX 2080 Super with Max-Q Design" }, { 0x1EB0, 0x0000, 0x0000, "Quadro RTX 5000" }, { 0x1EB1, 0x0000, 0x0000, "Quadro RTX 4000" }, - { 0x1EB1, 0x12a0, 0x15c3, "EIZO Quadro MED-XN92" }, { 0x1EB5, 0x0000, 0x0000, "Quadro RTX 5000" }, { 0x1EB5, 0x1375, 0x1025, "Quadro RTX 5000 with Max-Q Design" }, { 0x1EB5, 0x1401, 0x1025, "Quadro RTX 5000 with Max-Q Design" }, @@ -796,9 +795,13 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x2BB1, 0x204b, 0x10de, "NVIDIA RTX PRO 6000 Blackwell Workstation Edition" }, { 0x2BB1, 0x204b, 0x17aa, "NVIDIA RTX PRO 6000 Blackwell Workstation Edition" }, { 0x2BB3, 0x204d, 0x1028, "NVIDIA RTX PRO 5000 Blackwell" }, + { 0x2BB3, 0x227a, 0x1028, "NVIDIA RTX PRO 5000 72GB Blackwell" }, { 0x2BB3, 0x204d, 0x103c, "NVIDIA RTX PRO 5000 Blackwell" }, + { 0x2BB3, 0x227a, 0x103c, "NVIDIA RTX PRO 5000 72GB Blackwell" }, { 0x2BB3, 0x204d, 0x10de, "NVIDIA RTX PRO 5000 Blackwell" }, + { 0x2BB3, 0x227a, 0x10de, "NVIDIA RTX PRO 5000 72GB Blackwell" }, { 0x2BB3, 0x204d, 0x17aa, "NVIDIA RTX PRO 5000 Blackwell" }, + { 0x2BB3, 0x227a, 0x17aa, "NVIDIA RTX PRO 5000 72GB Blackwell" }, { 0x2BB4, 0x204c, 0x1028, "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition" }, { 0x2BB4, 0x204c, 0x103c, "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition" }, { 0x2BB4, 0x204c, 0x10de, "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition" }, @@ -845,6 +848,7 @@ static const CHIPS_RELEASED sChipsReleased[] = { { 0x2DB9, 0x0000, 0x0000, "NVIDIA RTX PRO 500 Blackwell Generation Laptop GPU" }, { 0x2DD8, 0x0000, 0x0000, "NVIDIA GeForce RTX 5050 Laptop GPU" }, { 0x2DF9, 0x0000, 0x0000, "NVIDIA RTX PRO 500 Blackwell Embedded GPU" }, + { 0x2E12, 0x21ec, 0x10de, "NVIDIA GB10" }, { 0x2F04, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070" }, { 0x2F18, 0x0000, 0x0000, "NVIDIA GeForce RTX 5070 Ti Laptop GPU" }, { 0x2F38, 0x0000, 0x0000, "NVIDIA RTX PRO 3000 Blackwell Generation Laptop GPU" }, diff --git a/src/nvidia/inc/kernel/gpu/falcon/kernel_falcon_core_dump.h b/src/nvidia/inc/kernel/gpu/falcon/kernel_falcon_core_dump.h new file mode 100644 index 000000000..4cf529994 --- /dev/null +++ b/src/nvidia/inc/kernel/gpu/falcon/kernel_falcon_core_dump.h @@ -0,0 +1,57 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef KERNEL_FALCON_CORE_DUMP_H +#define KERNEL_FALCON_CORE_DUMP_H + +#include "gpu/falcon/kernel_falcon.h" +#define __RISCV_MAX_UNWIND_DEPTH 32 +#define __RISCV_MAX_TRACE_ENTRIES 64 + +typedef struct CoreDumpRegs +{ + NvU32 riscvPc; + NvU32 riscvCpuctl; + NvU32 riscvIrqmask; + NvU32 riscvIrqdest; + NvU32 riscvIrqdeleg; + NvU32 falconMailbox[2]; + NvU32 falconIrqstat; + NvU32 falconIrqmode; + NvU32 fbifInstblk; + NvU32 fbifCtl; + NvU32 fbifThrottle; + NvU32 fbifAchkBlk[2]; + NvU32 fbifAchkCtl[2]; + NvU32 fbifCg1; + // Ampere and above + NvU32 riscvPrivErrStat; + NvU32 riscvPrivErrInfo; + NvU32 riscvPrivErrAddrH; + NvU32 riscvPrivErrAddrL; + NvU32 riscvHubErrStat; + NvU32 tracePCEntries; + NvU64 tracePC[__RISCV_MAX_TRACE_ENTRIES]; +} CoreDumpRegs; + +#endif diff --git a/src/nvidia/src/kernel/gpu/falcon/arch/ampere/kernel_falcon_ga102.c b/src/nvidia/src/kernel/gpu/falcon/arch/ampere/kernel_falcon_ga102.c index cedd4633e..a78c7ff0b 100644 --- a/src/nvidia/src/kernel/gpu/falcon/arch/ampere/kernel_falcon_ga102.c +++ b/src/nvidia/src/kernel/gpu/falcon/arch/ampere/kernel_falcon_ga102.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -27,11 +27,13 @@ */ #include "gpu/falcon/kernel_falcon.h" +#include "gpu/falcon/kernel_falcon_core_dump.h" #include "os/os.h" #include "published/ampere/ga102/dev_falcon_v4.h" #include "published/ampere/ga102/dev_falcon_v4_addendum.h" #include "published/ampere/ga102/dev_riscv_pri.h" +#include "published/ampere/ga102/dev_fbif_v4.h" #define PRE_RESET_PRE_SILICON_TIMEOUT_US 300000 @@ -318,3 +320,157 @@ kflcnRiscvReadIntrStatus_GA102 kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_IRQDEST)); } +/*! + * Function to read the ICD_CMD register. + */ +NvU32 kflcnIcdReadCmdReg_GA102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn +) +{ + return kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_ICD_CMD); +} + +/*! + * Function to read the ICD_RDATA register pair. + */ +NvU64 kflcnRiscvIcdReadRdata_GA102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn +) +{ + return (((NvU64)kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_ICD_RDATA1)) << 32) | + kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_ICD_RDATA0); +} + +/*! + * Function to write the ICD_ADDR register pair. + */ +void kflcnRiscvIcdWriteAddress_GA102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn, + NvU64 address +) +{ + kflcnRiscvRegWrite_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_ICD_ADDR1, address >> 32); + kflcnRiscvRegWrite_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_ICD_ADDR0, (NvU32) address); +} + +/*! + * Function to write the ICD_CMD register. + */ +void kflcnIcdWriteCmdReg_GA102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn, + NvU32 value +) +{ + kflcnRiscvRegWrite_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_ICD_CMD, value); +} + +void +kflcnDumpTracepc_GA102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn, + CoreDumpRegs *pCore +) +{ + NvU64 pc; + NvU32 ctl; + NvU32 r, w, size; + NvU32 entry; + NvU32 count; + + r = kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_TRACE_RDIDX); + w = kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_TRACE_WTIDX); + + if (((r & 0xbadf0000) == 0xbadf0000) && + ((w & 0xbadf0000) == 0xbadf0000)) + { + NV_PRINTF(LEVEL_ERROR, "Trace buffer blocked, skipping.\n"); + return; + } + + size = DRF_VAL(_PRISCV_RISCV, _TRACE_RDIDX, _MAXIDX, r); + + if (size > __RISCV_MAX_TRACE_ENTRIES) + { + NV_PRINTF(LEVEL_ERROR, "Trace buffer larger than expected. Bailing!\n"); + return; + } + + r = DRF_VAL(_PRISCV_RISCV, _TRACE_RDIDX, _RDIDX, r); + w = DRF_VAL(_PRISCV_RISCV, _TRACE_WTIDX, _WTIDX, w); + + ctl = kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_TRACECTL); + + if ((w == r) && (DRF_VAL(_PRISCV_RISCV, _TRACECTL, _FULL, ctl) == 0)) + { + count = 0; + } + else + { + // + // The number of entries in trace buffer is how far the w (put) pointer + // is ahead of the r (get) pointer. If this value is negative, add + // the size of the circular buffer to bring the element count back into range. + // + count = w > r ? w - r : w - r + size; + } + + pCore->tracePCEntries = count; + + if (count) + { + for (entry = 0; entry < count; ++entry) + { + if (entry > w) + w += size; + kflcnRiscvRegWrite_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_TRACE_RDIDX, w - entry); + + pc = ((NvU64)kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_TRACEPC_HI) << 32ull) | + kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_TRACEPC_LO); + pCore->tracePC[entry] = pc; + } + } + + // Restore original value + kflcnRiscvRegWrite_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_TRACE_RDIDX, r); + return; +} + +NV_STATUS kflcnCoreDumpPc_GA102(OBJGPU *pGpu, KernelFalcon *pKernelFlcn, NvU64 *pc) +{ + // + // This code originally handled 0xbadfxxxx values and returned failure, + // however we may want to see badf values so it is now wired to return the read + // register always. We want to also ensure any automated processing will know to + // attempt a soft decode of the lower 32 bits as it is not a complete address. + // + *pc = 0xfa11bacc00000000ull | (NvU64)kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_RPC); + return NV_OK; +} + +void +kflcnDumpCoreRegs_GA102(OBJGPU *pGpu, KernelFalcon *pKernelFlcn, CoreDumpRegs *pCore) +{ +#define __CORE_DUMP_RISCV_REG(x,y) do { pCore->x = kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, (y)); } while (0) + __CORE_DUMP_RISCV_REG(riscvCpuctl, NV_PRISCV_RISCV_CPUCTL); + __CORE_DUMP_RISCV_REG(riscvIrqmask, NV_PRISCV_RISCV_IRQMASK); + __CORE_DUMP_RISCV_REG(riscvIrqdest, NV_PRISCV_RISCV_IRQDEST); + + __CORE_DUMP_RISCV_REG(riscvPc, NV_PRISCV_RISCV_RPC); + __CORE_DUMP_RISCV_REG(riscvIrqdeleg, NV_PRISCV_RISCV_IRQDELEG); + __CORE_DUMP_RISCV_REG(riscvPrivErrStat, NV_PRISCV_RISCV_PRIV_ERR_STAT); + __CORE_DUMP_RISCV_REG(riscvPrivErrInfo, NV_PRISCV_RISCV_PRIV_ERR_INFO); + __CORE_DUMP_RISCV_REG(riscvPrivErrAddrH, NV_PRISCV_RISCV_PRIV_ERR_ADDR_HI); + __CORE_DUMP_RISCV_REG(riscvPrivErrAddrL, NV_PRISCV_RISCV_PRIV_ERR_ADDR); + __CORE_DUMP_RISCV_REG(riscvHubErrStat, NV_PRISCV_RISCV_HUB_ERR_STAT); +#undef __CORE_DUMP_RISCV_REG +} + diff --git a/src/nvidia/src/kernel/gpu/falcon/arch/blackwell/kernel_falcon_gb202.c b/src/nvidia/src/kernel/gpu/falcon/arch/blackwell/kernel_falcon_gb202.c new file mode 100644 index 000000000..8b993c9f7 --- /dev/null +++ b/src/nvidia/src/kernel/gpu/falcon/arch/blackwell/kernel_falcon_gb202.c @@ -0,0 +1,51 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/*! + * Provides the implementation for all GB100+ specific KernelFalcon + * interfaces. + */ + +#include "kernel/gpu/gpu.h" +#include "gpu/falcon/kernel_falcon.h" +#include "gpu/falcon/kernel_falcon_core_dump.h" + +#include "published/blackwell/gb202/dev_riscv_pri.h" + +void +kflcnDumpCoreRegs_GB202(OBJGPU *pGpu, KernelFalcon *pKernelFlcn, CoreDumpRegs *pCore) +{ +#define __CORE_DUMP_RISCV_REG(x,y) do { pCore->x = kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, (y)); } while (0) + __CORE_DUMP_RISCV_REG(riscvCpuctl, NV_PRISCV_RISCV_CPUCTL); + __CORE_DUMP_RISCV_REG(riscvIrqmask, NV_PRISCV_RISCV_IRQMASK); + __CORE_DUMP_RISCV_REG(riscvIrqdest, NV_PRISCV_RISCV_IRQDEST); + + __CORE_DUMP_RISCV_REG(riscvPc, NV_PRISCV_RISCV_RPC); + __CORE_DUMP_RISCV_REG(riscvIrqdeleg, NV_PRISCV_RISCV_IRQDELEG); + __CORE_DUMP_RISCV_REG(riscvPrivErrStat, NV_PRISCV_RISCV_PRIV_ERR_STAT); + __CORE_DUMP_RISCV_REG(riscvPrivErrInfo, NV_PRISCV_RISCV_PRIV_ERR_INFO); + __CORE_DUMP_RISCV_REG(riscvPrivErrAddrH, NV_PRISCV_RISCV_PRIV_ERR_ADDR_HI); + __CORE_DUMP_RISCV_REG(riscvPrivErrAddrL, NV_PRISCV_RISCV_PRIV_ERR_ADDR); + __CORE_DUMP_RISCV_REG(riscvHubErrStat, NV_PRISCV_RISCV_HUB_ERR_STAT); +#undef __CORE_DUMP_RISCV_REG +} \ No newline at end of file diff --git a/src/nvidia/src/kernel/gpu/falcon/arch/turing/kernel_falcon_tu102.c b/src/nvidia/src/kernel/gpu/falcon/arch/turing/kernel_falcon_tu102.c index 14cc8c970..8b828fc69 100644 --- a/src/nvidia/src/kernel/gpu/falcon/arch/turing/kernel_falcon_tu102.c +++ b/src/nvidia/src/kernel/gpu/falcon/arch/turing/kernel_falcon_tu102.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2017-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2017-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -26,6 +26,7 @@ */ #include "gpu/falcon/kernel_falcon.h" +#include "gpu/falcon/kernel_falcon_core_dump.h" #include "os/os.h" #include "published/turing/tu102/dev_riscv_pri.h" @@ -426,3 +427,511 @@ kflcnMaskDmemAddr_TU102 return (addr & (DRF_SHIFTMASK(NV_PFALCON_FALCON_DMEMC_OFFS) | DRF_SHIFTMASK(NV_PFALCON_FALCON_DMEMC_BLK))); } + +/*! + * Function to read the ICD_CMD register. + */ +NvU32 kflcnIcdReadCmdReg_TU102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn +) +{ + return kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_ICD_CMD); +} + +/*! + * Function to read the ICD_RDATA register pair. + */ +NvU64 kflcnRiscvIcdReadRdata_TU102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn +) +{ + return (((NvU64)kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_ICD_RDATA1)) << 32) | + kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_ICD_RDATA0); +} + +/*! + * Function to write the ICD_ADDR register pair. + */ +void kflcnRiscvIcdWriteAddress_TU102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn, + NvU64 address +) +{ + kflcnRiscvRegWrite_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_ICD_ADDR1, address >> 32); + kflcnRiscvRegWrite_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_ICD_ADDR0, (NvU32) address); +} + +/*! + * Function to write the ICD_CMD register. + */ +void kflcnIcdWriteCmdReg_TU102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn, + NvU32 value +) +{ + kflcnRiscvRegWrite_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_ICD_CMD, value); +} + +static NvBool +s_riscvIsIcdNotBusy +( + OBJGPU *pGpu, + void *pVoid +) +{ + KernelFalcon *pKernelFlcn = reinterpretCast(pVoid, KernelFalcon *); + NvU32 reg; + reg = kflcnIcdReadCmdReg_HAL(pGpu, pKernelFlcn); + + return FLD_TEST_DRF(_PRISCV_RISCV, _ICD_CMD, _BUSY, _FALSE, reg); +} + +static NV_STATUS +s_riscvIcdGetValue +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn, + NvU64 *pValue +) +{ + // Wait for ICD to become idle before reading out value. + NV_STATUS status = kflcnRiscvIcdWaitForIdle_HAL(pGpu, pKernelFlcn); + if (status == NV_OK) + { + *pValue = kflcnRiscvIcdReadRdata_HAL(pGpu, pKernelFlcn); + } + else if (status == NV_ERR_INVALID_STATE) + { + return NV_ERR_INVALID_ARGUMENT; + } + else + { + return NV_ERR_INVALID_STATE; + } + + return NV_OK; +} + +/*! + * Function to wait for the ICD to become idle. + * + * @param[in] pGpu OBJGPU pointer + * @param[in] pKernelFlcn KernelFalcon object pointer + * + * @return 'NV_OK' if idle and no error + * 'NV_ERR_INVALID_STATE' if idle and error; typically bad command. + * 'NV_ERR_TIMEOUT' if busy and timed out. This usually indicates + * a fatal error, eg. core has hung or GPU is off the bus. + */ +NV_STATUS +kflcnRiscvIcdWaitForIdle_TU102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn +) +{ + NvU32 icdCmd; + RMTIMEOUT timeout; + + gpuSetTimeout(pGpu, 125*1000, &timeout, GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE); // Wait up to 125ms + if (gpuTimeoutCondWait(pGpu, s_riscvIsIcdNotBusy, pKernelFlcn, &timeout) != NV_OK) + { + return NV_ERR_TIMEOUT; + } + + icdCmd = kflcnIcdReadCmdReg_HAL(pGpu, pKernelFlcn); + + if (FLD_TEST_DRF(_PRISCV_RISCV, _ICD_CMD, _ERROR, _TRUE, icdCmd)) + { + return NV_ERR_INVALID_STATE; + } + + return NV_OK; +} + +/*! + * Function to tell RISCV ICD to read RISCV virtual addresses. + * + * @param[in] pGpu OBJGPU pointer + * @param[in] pKernelFlcn KernelFalcon object pointer + * @param[in] address Address of memory to read. + * @param[in] size Size of access (1-8 bytes, pow2) + * @param[out] pValue register value + * + * @return 'NV_OK' if register value was read + * 'NV_ERR_INVALID_STATE' if core is not booted or didn't halt. + * 'NV_ERR_INVALID_ARGUMENT' if size is invalid + */ +NV_STATUS +kflcnRiscvIcdReadMem_TU102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn, + NvU64 address, + NvU64 size, + NvU64 *pValue +) +{ + NvU32 icdCmd; + + // Only pow2 sizes are allowed + if ((size != 1) && (size != 2) && (size != 4) && (size != 8)) + { + return NV_ERR_INVALID_ARGUMENT; + } + if ((address & (size - 1))) // Addresses must be aligned to the size. This is a RISCV architecture design decision. + { + return NV_ERR_INVALID_ARGUMENT; + } + NvU32 size_shift = 0; + while (size != 1) + { + size = size >> 1; + size_shift++; + } + + if (kflcnIsRiscvActive_HAL(pGpu, pKernelFlcn) && + (kflcnRiscvIcdWaitForIdle_HAL(pGpu, pKernelFlcn) != NV_ERR_TIMEOUT)) + { + icdCmd = DRF_DEF(_PRISCV_RISCV, _ICD_CMD, _OPC, _RDM); + icdCmd = FLD_SET_DRF_NUM(_PRISCV_RISCV, _ICD_CMD, _SZ, size_shift, icdCmd); + icdCmd = FLD_SET_DRF_NUM(_PRISCV_RISCV, _ICD_CMD, _PARM, 1, icdCmd); + + kflcnRiscvIcdWriteAddress_HAL(pGpu, pKernelFlcn, address); + + kflcnIcdWriteCmdReg_HAL(pGpu, pKernelFlcn, icdCmd); + } + else + { + // RISCV core was not booted, or ICD failed to execute command. + return NV_ERR_INVALID_STATE; + } + + return s_riscvIcdGetValue(pGpu, pKernelFlcn, pValue); +} + +/*! + * Function to tell RISCV ICD to read RISCV register. + * + * @param[in] pGpu OBJGPU pointer + * @param[in] pKernelFlcn KernelFalcon object pointer + * @param[in] reg which register to read. Valid: 0-31 (0 is x0, so it is skipped) + * @param[out] pValue register value + * + * @return 'NV_OK' if register value was read + * 'NV_ERR_INVALID_STATE' if core is not booted or didn't halt. + * 'NV_ERR_INVALID_ARGUMENT' if register is invalid. + */ +NV_STATUS +kflcnRiscvIcdReadReg_TU102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn, + NvU32 reg, + NvU64 *pValue +) +{ + NvU32 icdCmd; + + // x0..x31 are valid RISCV register values. + if (reg >= 32) + { + return NV_ERR_INVALID_ARGUMENT; + } + + if (reg == 0) + { + *pValue = 0; + return NV_OK; + } + + if (kflcnIsRiscvActive_HAL(pGpu, pKernelFlcn) && + (kflcnRiscvIcdWaitForIdle_HAL(pGpu, pKernelFlcn) != NV_ERR_TIMEOUT)) + { + icdCmd = DRF_DEF(_PRISCV_RISCV, _ICD_CMD, _OPC, _RREG); + icdCmd = FLD_SET_DRF_NUM(_PRISCV_RISCV, _ICD_CMD, _IDX, reg, icdCmd); + + kflcnIcdWriteCmdReg_HAL(pGpu, pKernelFlcn, icdCmd); + } + else + { + // RISCV core was not booted, or ICD failed to execute command. + return NV_ERR_INVALID_STATE; + } + + return s_riscvIcdGetValue(pGpu, pKernelFlcn, pValue); +} + +/*! + * Function to tell RISCV ICD to read RISCV CSR. + * + * @param[in] pGpu OBJGPU pointer + * @param[in] pKernelFlcn KernelFalcon object pointer + * @param[in] csr which CSR register to read. Valid: 0-4095 + * @param[out] pValue CSR register value + * + * @return 'NV_OK' if CSR value was read + * 'NV_ERR_INVALID_STATE' if core is not booted or didn't halt. + * 'NV_ERR_INVALID_ARGUMENT' if CSR is invalid. + */ +NV_STATUS +kflcnRiscvIcdRcsr_TU102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn, + NvU32 csr, + NvU64 *pValue +) +{ + NvU32 icdCmd; + + // CSR must be between 0 and 4095, inclusive, as this is part of the RISCV spec. + if (csr >= 4096) + { + return NV_ERR_INVALID_ARGUMENT; + } + + if (kflcnIsRiscvActive_HAL(pGpu, pKernelFlcn) && + (kflcnRiscvIcdWaitForIdle_HAL(pGpu, pKernelFlcn) != NV_ERR_TIMEOUT)) + { + icdCmd = DRF_DEF(_PRISCV_RISCV, _ICD_CMD, _OPC, _RCSR); + icdCmd = FLD_SET_DRF_NUM(_PRISCV_RISCV, _ICD_CMD, _PARM, csr, icdCmd); + + kflcnIcdWriteCmdReg_HAL(pGpu, pKernelFlcn, icdCmd); + } + else + { + // RISCV core was not booted, or ICD failed to read CSR. + return NV_ERR_INVALID_STATE; + } + + return s_riscvIcdGetValue(pGpu, pKernelFlcn, pValue); +} + +/*! + * Function to tell RISCV ICD to read RSTAT register. + * + * @param[in] pGpu OBJGPU pointer + * @param[in] pKernelFlcn KernelFalcon object pointer + * @param[in] index which RSTAT register to read. Valid: 0 3 4 + * @param[out] pValue RSTAT register value + * + * @return 'NV_OK' if RSTAT value was read + * 'NV_ERR_INVALID_STATE' if core is not booted or didn't halt. + * 'NV_ERR_INVALID_ARGUMENT' if invalid RSTAT register was specified. + */ +NV_STATUS +kflcnRiscvIcdRstat_TU102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn, + NvU32 index, + NvU64 *pValue +) +{ + NvU32 icdCmd; + + if (kflcnIsRiscvActive_HAL(pGpu, pKernelFlcn) && + (kflcnRiscvIcdWaitForIdle_HAL(pGpu, pKernelFlcn) != NV_ERR_TIMEOUT)) + { + icdCmd = DRF_DEF(_PRISCV_RISCV, _ICD_CMD, _OPC, _RSTAT); + icdCmd = FLD_SET_DRF_NUM(_PRISCV_RISCV, _ICD_CMD, _IDX, index, icdCmd); + + kflcnIcdWriteCmdReg_HAL(pGpu, pKernelFlcn, icdCmd); + } + else + { + // RISCV core was not booted, or ICD misbehaved. + return NV_ERR_INVALID_STATE; + } + + return s_riscvIcdGetValue(pGpu, pKernelFlcn, pValue); +} + +/*! + * Function to tell RISCV ICD to read PC. + * + * @param[in] pGpu OBJGPU pointer + * @param[in] pKernelFlcn KernelFalcon object pointer + * @param[out] pValue PC value + * + * @return 'NV_OK' if RSTAT value was read + * 'NV_ERR_INVALID_STATE' if core is not booted or didn't halt. + * 'NV_ERR_INVALID_ARGUMENT' should not happen. + */ +NV_STATUS +kflcnRiscvIcdRpc_TU102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn, + NvU64 *pValue +) +{ + NvU32 icdCmd; + + if (kflcnIsRiscvActive_HAL(pGpu, pKernelFlcn) && + (kflcnRiscvIcdWaitForIdle_HAL(pGpu, pKernelFlcn) != NV_ERR_TIMEOUT)) + { + icdCmd = DRF_DEF(_PRISCV_RISCV, _ICD_CMD, _OPC, _RPC); + kflcnIcdWriteCmdReg_HAL(pGpu, pKernelFlcn, icdCmd); + } + else + { + // RISCV core was not booted, or ICD failed to retrieve PC. + return NV_ERR_INVALID_STATE; + } + + return s_riscvIcdGetValue(pGpu, pKernelFlcn, pValue); +} + +/*! + * Function to tell RISCV core to enter ICD mode. + * + * @param[in] pGpu OBJGPU pointer + * @param[in] pKernelFlcn KernelFalcon object pointer + * + * @return 'NV_OK' if core has entered ICD + * 'NV_ERR_INVALID_STATE' if core is not booted. + * 'NV_ERR_TIMEOUT' if core did not successfully halt. + */ +NV_STATUS +kflcnRiscvIcdHalt_TU102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn +) +{ + NV_STATUS status = NV_OK; + NvU32 icdCmd; + NvU8 tries = 9; // This should be set to allow retries for over a second. + + if (kflcnIsRiscvActive_HAL(pGpu, pKernelFlcn)) + { + do + { + icdCmd = DRF_DEF(_PRISCV_RISCV, _ICD_CMD, _OPC, _STOP); + kflcnIcdWriteCmdReg_HAL(pGpu, pKernelFlcn, icdCmd); + status = kflcnRiscvIcdWaitForIdle_HAL(pGpu, pKernelFlcn); + if (tries == 0) + break; + tries--; + } + while (status != NV_OK); + } + else // RISCV core was not booted; die immediately. + { + return NV_ERR_INVALID_STATE; + } + + return status; +} + +void +kflcnDumpTracepc_TU102 +( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn, + CoreDumpRegs *pCore +) +{ + NvU64 pc; + NvU32 ctl; + NvU32 r, w, size; + NvU32 entry; + NvU32 count; + + r = kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_TRACE_RDIDX); + w = kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_TRACE_WTIDX); + + size = DRF_VAL(_PRISCV_RISCV, _TRACE_RDIDX, _MAXIDX, r); + + if (size > __RISCV_MAX_TRACE_ENTRIES) + { + NV_PRINTF(LEVEL_ERROR, "Trace buffer larger than expected. Bailing!\n"); + return; + } + + r = DRF_VAL(_PRISCV_RISCV, _TRACE_RDIDX, _RDIDX, r); + w = DRF_VAL(_PRISCV_RISCV, _TRACE_WTIDX, _WTIDX, w); + + ctl = kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_TRACECTL); + + if ((w == r) && (DRF_VAL(_PRISCV_RISCV, _TRACECTL, _FULL, ctl) == 0)) + { + count = 0; + } + else + { + // + // The number of entries in trace buffer is how far the w (put) pointer + // is ahead of the r (get) pointer. If this value is negative, add + // the size of the circular buffer to bring the element count back into range. + // + count = w > r ? w - r : w - r + size; + } + + pCore->tracePCEntries = count; + + if (count) + { + for (entry = 0; entry < count; ++entry) + { + if (entry > w) + w += size; + kflcnRiscvRegWrite_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_TRACE_RDIDX, w - entry); + + pc = ((NvU64)kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_TRACEPC_HI) << 32ull) | + kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_TRACEPC_LO); + pCore->tracePC[entry] = pc; + } + } + + // Restore original value + kflcnRiscvRegWrite_HAL(pGpu, pKernelFlcn, NV_PRISCV_RISCV_TRACE_RDIDX, r); + return; +} + +void +kflcnDumpCoreRegs_TU102(OBJGPU *pGpu, KernelFalcon *pKernelFlcn, CoreDumpRegs *pCore) +{ +#define __CORE_DUMP_RISCV_REG(x,y) do { pCore->x = kflcnRiscvRegRead_HAL(pGpu, pKernelFlcn, (y)); } while (0) + __CORE_DUMP_RISCV_REG(riscvCpuctl, NV_PRISCV_RISCV_CPUCTL); + __CORE_DUMP_RISCV_REG(riscvIrqmask, NV_PRISCV_RISCV_IRQMASK); + __CORE_DUMP_RISCV_REG(riscvIrqdest, NV_PRISCV_RISCV_IRQDEST); + __CORE_DUMP_RISCV_REG(riscvPrivErrStat, NV_PRISCV_RISCV_PRIV_ERR_STAT); + __CORE_DUMP_RISCV_REG(riscvPrivErrInfo, NV_PRISCV_RISCV_PRIV_ERR_INFO); + __CORE_DUMP_RISCV_REG(riscvPrivErrAddrL, NV_PRISCV_RISCV_PRIV_ERR_ADDR); + __CORE_DUMP_RISCV_REG(riscvHubErrStat, NV_PRISCV_RISCV_HUB_ERR_STAT); +#undef __CORE_DUMP_RISCV_REG +} + +void +kflcnDumpPeripheralRegs_TU102(OBJGPU *pGpu, KernelFalcon *pKernelFlcn, CoreDumpRegs *pCore) +{ +#define __CORE_DUMP_REG(x,y) do { pCore->x = kflcnRegRead_HAL(pGpu, pKernelFlcn, (y)); } while (0) + __CORE_DUMP_REG(falconMailbox[0], NV_PFALCON_FALCON_MAILBOX0); + __CORE_DUMP_REG(falconMailbox[1], NV_PFALCON_FALCON_MAILBOX1); + __CORE_DUMP_REG(falconIrqstat, NV_PFALCON_FALCON_IRQSTAT); + __CORE_DUMP_REG(falconIrqmode, NV_PFALCON_FALCON_IRQMODE); +#undef __CORE_DUMP_REG + +#define __CORE_DUMP_RAW(x,y) do { pCore->x = GPU_REG_RD32(pGpu, (y)); } while (0) + __CORE_DUMP_RAW(fbifInstblk, pKernelFlcn->fbifBase + NV_PFALCON_FBIF_INSTBLK); + __CORE_DUMP_RAW(fbifCtl, pKernelFlcn->fbifBase + NV_PFALCON_FBIF_CTL); + __CORE_DUMP_RAW(fbifThrottle, pKernelFlcn->fbifBase + NV_PFALCON_FBIF_THROTTLE); + __CORE_DUMP_RAW(fbifAchkBlk[0], pKernelFlcn->fbifBase + NV_PFALCON_FBIF_ACHK_BLK(0)); + __CORE_DUMP_RAW(fbifAchkBlk[1], pKernelFlcn->fbifBase + NV_PFALCON_FBIF_ACHK_BLK(1)); + __CORE_DUMP_RAW(fbifAchkCtl[0], pKernelFlcn->fbifBase + NV_PFALCON_FBIF_ACHK_CTL(0)); + __CORE_DUMP_RAW(fbifAchkCtl[1], pKernelFlcn->fbifBase + NV_PFALCON_FBIF_ACHK_CTL(1)); + __CORE_DUMP_RAW(fbifCg1, pKernelFlcn->fbifBase + NV_PFALCON_FBIF_CG1); +#undef __CORE_DUMP_RAW +} + diff --git a/src/nvidia/src/kernel/gpu/falcon/kernel_falcon.c b/src/nvidia/src/kernel/gpu/falcon/kernel_falcon.c index e9901b776..74b5f2aad 100644 --- a/src/nvidia/src/kernel/gpu/falcon/kernel_falcon.c +++ b/src/nvidia/src/kernel/gpu/falcon/kernel_falcon.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -21,6 +21,7 @@ * DEALINGS IN THE SOFTWARE. */ #include "gpu/falcon/kernel_falcon.h" +#include "gpu/falcon/kernel_falcon_core_dump.h" #include "gpu/sec2/kernel_sec2.h" #include "gpu/gsp/kernel_gsp.h" @@ -441,3 +442,221 @@ NV_STATUS gkflcnServiceNotificationInterrupt_IMPL(OBJGPU *pGpu, GenericKernelFal return NV_OK; } + +NV_STATUS kflcnCoreDumpNondestructive( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn, + NvU32 verbosity +) +{ + CoreDumpRegs PeregrineCoreRegisters = { 0 }; + + kflcnDumpCoreRegs_HAL(pGpu, pKernelFlcn, &PeregrineCoreRegisters); + if (verbosity >= 1) + { + kflcnDumpPeripheralRegs_HAL(pGpu, pKernelFlcn, &PeregrineCoreRegisters); + } + if (verbosity >= 2) + { + kflcnDumpTracepc(pGpu, pKernelFlcn, &PeregrineCoreRegisters); + } + + NV_PRINTF(LEVEL_ERROR, "PRI: riscvPc : %08x\n", PeregrineCoreRegisters.riscvPc); + if (verbosity >= 1) + { + NV_PRINTF(LEVEL_ERROR, "PRI: riscvCpuctl : %08x\n", PeregrineCoreRegisters.riscvCpuctl); + NV_PRINTF(LEVEL_ERROR, "PRI: riscvIrqmask : %08x\n", PeregrineCoreRegisters.riscvIrqmask); + NV_PRINTF(LEVEL_ERROR, "PRI: riscvIrqdest : %08x\n", PeregrineCoreRegisters.riscvIrqdest); + NV_PRINTF(LEVEL_ERROR, "PRI: riscvPrivErrStat : %08x\n", PeregrineCoreRegisters.riscvPrivErrStat); + NV_PRINTF(LEVEL_ERROR, "PRI: riscvPrivErrInfo : %08x\n", PeregrineCoreRegisters.riscvPrivErrInfo); + NV_PRINTF(LEVEL_ERROR, "PRI: riscvPrivErrAddr : %016" NvU64_fmtx "\n", (((NvU64)PeregrineCoreRegisters.riscvPrivErrAddrH << 32ull) | PeregrineCoreRegisters.riscvPrivErrAddrL)); + NV_PRINTF(LEVEL_ERROR, "PRI: riscvHubErrStat : %08x\n", PeregrineCoreRegisters.riscvHubErrStat); + NV_PRINTF(LEVEL_ERROR, "PRI: falconMailbox : 0:%08x 1:%08x\n", PeregrineCoreRegisters.falconMailbox[0], PeregrineCoreRegisters.falconMailbox[1]); + NV_PRINTF(LEVEL_ERROR, "PRI: falconIrqstat : %08x\n", PeregrineCoreRegisters.falconIrqstat); + NV_PRINTF(LEVEL_ERROR, "PRI: falconIrqmode : %08x\n", PeregrineCoreRegisters.falconIrqmode); + NV_PRINTF(LEVEL_ERROR, "PRI: fbifInstblk : %08x\n", PeregrineCoreRegisters.fbifInstblk); + NV_PRINTF(LEVEL_ERROR, "PRI: fbifCtl : %08x\n", PeregrineCoreRegisters.fbifCtl); + NV_PRINTF(LEVEL_ERROR, "PRI: fbifThrottle : %08x\n", PeregrineCoreRegisters.fbifThrottle); + NV_PRINTF(LEVEL_ERROR, "PRI: fbifAchkBlk : 0:%08x 1:%08x\n", PeregrineCoreRegisters.fbifAchkBlk[0], PeregrineCoreRegisters.fbifAchkBlk[1]); + NV_PRINTF(LEVEL_ERROR, "PRI: fbifAchkCtl : 0:%08x 1:%08x\n", PeregrineCoreRegisters.fbifAchkCtl[0], PeregrineCoreRegisters.fbifAchkCtl[1]); + NV_PRINTF(LEVEL_ERROR, "PRI: fbifCg1 : %08x\n", PeregrineCoreRegisters.fbifCg1); + } + if (verbosity >= 2) + { + for (unsigned int n = 0; n < PeregrineCoreRegisters.tracePCEntries; n++) + { + NV_PRINTF(LEVEL_ERROR, "TRACE: %02u = 0x%016" NvU64_fmtx "\n", n, PeregrineCoreRegisters.tracePC[n]); + } + } + + return NV_OK; +} + +NV_STATUS kflcnCoreDumpDestructive( + OBJGPU *pGpu, + KernelFalcon *pKernelFlcn +) +{ + // Initialise state - nothing succeeded yet. + NvU64 pc = 1; + NvU64 traceRa = 0; + NvU64 traceS0 = 0; + NvU32 unwindDepth = 0; + NvU64 regValue64; + NvU64 riscvCoreRegisters[32]; + NvU32 anySuccess = 0; + + // Check if PRI is alive / core is booted. + { + if (kflcnIsRiscvActive_HAL(pGpu, pKernelFlcn)) // If core is not booted, abort - nothing to do. + { + NV_PRINTF(LEVEL_ERROR, "ICD: Core is booted.\n"); + } + else + { + NV_PRINTF(LEVEL_ERROR, "ICD: [ERROR] Core is not booted.\n"); + return NV_OK; + } + } + + // Check if ICD RSTAT works. + { + for (int i = 0; i < 8; i++) + { + if (kflcnRiscvIcdRstat_HAL(pGpu, pKernelFlcn, i, ®Value64) == NV_OK) + { + NV_PRINTF(LEVEL_ERROR, "ICD: RSTAT%d 0x%016" NvU64_fmtx "\n", i, regValue64); + anySuccess++; + } + } + if (!anySuccess) + { + NV_PRINTF(LEVEL_ERROR, "ICD: [ERROR] Unable to retrieve any RSTAT register.\n"); + return NV_OK; // Failed to read ANY RSTAT value. This means ICD is dead. + } + } + + // ATTEMPT ICD HALT, and dump state. Check if ICD commands work. + { + if (kflcnRiscvIcdHalt_HAL(pGpu, pKernelFlcn) != NV_OK) + { + NV_PRINTF(LEVEL_ERROR, "ICD: [ERROR] ICD Halt command failed.\n"); + return NV_OK; // Failed to halt core. Typical end point for "core is hung" scenario. + } + } + + // Dump PC, as much as we can get. + if (kflcnRiscvIcdRpc_HAL(pGpu, pKernelFlcn, &pc) != NV_OK) + { + if (kflcnCoreDumpPc_HAL(pGpu, pKernelFlcn, &pc) != NV_OK) + { + NV_PRINTF(LEVEL_ERROR, "ICD: [WARN] Cannot retrieve PC.\n"); + } + else + { + NV_PRINTF(LEVEL_ERROR, "ICD: PC = 0x--------%08llx\n", pc & 0xffffffff); + } + } + else + { + NV_PRINTF(LEVEL_ERROR, "ICD: PC = 0x%016" NvU64_fmtx "\n", pc); + } + + // Dump registers + for (int a = 0; a < 32; a++) + { + if (kflcnRiscvIcdReadReg_HAL(pGpu, pKernelFlcn, a, ®Value64) == NV_OK) + { + riscvCoreRegisters[a] = regValue64; + + // Save off registers needed for unwinding. + if (a == 1) + traceRa = regValue64; + if (a == 8) + traceS0 = regValue64; + } + else + { + NV_PRINTF(LEVEL_ERROR, "ICD: register read failed for x%02d\n", a); + riscvCoreRegisters[a] = 0xbaadbaadbaadbaad; + } + } + NV_PRINTF(LEVEL_ERROR, + "ICD: ra:0x%016" NvU64_fmtx " sp:0x%016" NvU64_fmtx " gp:0x%016" NvU64_fmtx " tp:0x%016" NvU64_fmtx "\n", + riscvCoreRegisters[1], riscvCoreRegisters[2], riscvCoreRegisters[3], riscvCoreRegisters[4]); + NV_PRINTF(LEVEL_ERROR, + "ICD: a0:0x%016" NvU64_fmtx " a1:0x%016" NvU64_fmtx " a2:0x%016" NvU64_fmtx " a3:0x%016" NvU64_fmtx "\n", + riscvCoreRegisters[5], riscvCoreRegisters[6], riscvCoreRegisters[7], riscvCoreRegisters[8]); + NV_PRINTF(LEVEL_ERROR, + "ICD: a4:0x%016" NvU64_fmtx " a5:0x%016" NvU64_fmtx " a6:0x%016" NvU64_fmtx " a7:0x%016" NvU64_fmtx "\n", + riscvCoreRegisters[9], riscvCoreRegisters[10], riscvCoreRegisters[11], riscvCoreRegisters[12]); + NV_PRINTF(LEVEL_ERROR, + "ICD: s0:0x%016" NvU64_fmtx " s1:0x%016" NvU64_fmtx " s2:0x%016" NvU64_fmtx " s3:0x%016" NvU64_fmtx "\n", + riscvCoreRegisters[13], riscvCoreRegisters[14], riscvCoreRegisters[15], riscvCoreRegisters[16]); + NV_PRINTF(LEVEL_ERROR, + "ICD: s4:0x%016" NvU64_fmtx " s5:0x%016" NvU64_fmtx " s6:0x%016" NvU64_fmtx " s7:0x%016" NvU64_fmtx "\n", + riscvCoreRegisters[17], riscvCoreRegisters[18], riscvCoreRegisters[19], riscvCoreRegisters[20]); + NV_PRINTF(LEVEL_ERROR, + "ICD: s8:0x%016" NvU64_fmtx " s9:0x%016" NvU64_fmtx " s10:0x%016" NvU64_fmtx " s11:0x%016" NvU64_fmtx "\n", + riscvCoreRegisters[21], riscvCoreRegisters[22], riscvCoreRegisters[23], riscvCoreRegisters[24]); + NV_PRINTF(LEVEL_ERROR, + "ICD: t0:0x%016" NvU64_fmtx " t1:0x%016" NvU64_fmtx " t2:0x%016" NvU64_fmtx " t3:0x%016" NvU64_fmtx "\n", + riscvCoreRegisters[25], riscvCoreRegisters[26], riscvCoreRegisters[27], riscvCoreRegisters[28]); + NV_PRINTF(LEVEL_ERROR, + "ICD: t4:0x%016" NvU64_fmtx " t5:0x%016" NvU64_fmtx " t6:0x%016" NvU64_fmtx "\n", + riscvCoreRegisters[29], riscvCoreRegisters[30], riscvCoreRegisters[31]); + + // Dump CSRs + for (int a = 0; a < 4096; a++) + { + if (kflcnRiscvIcdRcsr_HAL(pGpu, pKernelFlcn, a, ®Value64) == NV_OK) + { + NV_PRINTF(LEVEL_ERROR, "ICD: csr[%03x] = 0x%016" NvU64_fmtx "\n", a, regValue64); + } + } + + // + // Attempt core unwind. For various reasons, may fail very early. + // To unwind, we use s0 as the frame pointer and ra as the return address (adding that to the callstack). + // s0[-2] contains the previous stack pointer, and s0[-1] contains the previous return address. + // We continue until the memory is not readable, or we hit some "very definitely wrong" values like zero or + // misaligned stack. If we unwind even once, we declare our unwind a great success and move on. + // + { + if ((!traceRa) || (!traceS0)) + return NV_OK; // Fail to unwind - the ra/s0 registers are not valid. + + do + { + if ((!traceS0) || // S0 cannot be zero + (!traceRa) || // RA cannot be zero + (traceS0 & 7)) // stack cannot be misaligned + goto abortUnwind; + + traceS0 -= 16; + if (kflcnRiscvIcdReadMem_HAL(pGpu, pKernelFlcn, traceS0 + 8, 8, &traceRa) != NV_OK) + goto abortUnwind; + if (kflcnRiscvIcdReadMem_HAL(pGpu, pKernelFlcn, traceS0 + 0, 8, &traceS0) != NV_OK) + goto abortUnwind; + + NV_PRINTF(LEVEL_ERROR, "ICD: unwind%02u: 0x%016" NvU64_fmtx "\n", unwindDepth, traceRa); + unwindDepth++; + } while (unwindDepth < __RISCV_MAX_UNWIND_DEPTH); + + // Core unwind attempt finished. The call stack was too deep. + NV_PRINTF(LEVEL_ERROR, "ICD: [WARN] unwind greater than max depth...\n"); + goto unwindFull; + } +abortUnwind: + // Core unwind attempt finished. No unwind past the register (ra) was possible. + if (unwindDepth == 0) + { + NV_PRINTF(LEVEL_ERROR, "ICD: [WARN] unwind retrieved zero values :(\n"); + return NV_OK; + } + + // Core unwind attempt finished. Unwind successfully got 1 or more entries. +unwindFull: + NV_PRINTF(LEVEL_ERROR, "ICD: unwind complete.\n"); + return NV_OK; +} diff --git a/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_tu102.c b/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_tu102.c index 068977509..71d1de4e9 100644 --- a/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_tu102.c +++ b/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_tu102.c @@ -1355,6 +1355,23 @@ exit_fail_cleanup: return nvStatus; } +void +kgspDumpMailbox_TU102 +( + OBJGPU *pGpu, + KernelGsp *pKernelGsp +) +{ + NvU32 idx; + NvU32 data; + + for (idx = 0; idx < NV_PGSP_MAILBOX__SIZE_1; idx++) + { + data = GPU_REG_RD32(pGpu, NV_PGSP_MAILBOX(idx)); + NV_PRINTF(LEVEL_ERROR, "GSP: MAILBOX(%d) = 0x%08X\n", idx, data); + } +} + void kgspReadEmem_TU102 ( diff --git a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c index 66152a45f..be52b5133 100644 --- a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c +++ b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c @@ -24,6 +24,7 @@ #include "resserv/rs_server.h" #include "gpu/gsp/kernel_gsp.h" +#include "gpu/falcon/kernel_falcon.h" #include "kernel/core/thread_state.h" #include "kernel/core/locks.h" @@ -2142,6 +2143,7 @@ _kgspLogXid119 NvU64 duration; char durationUnitsChar; KernelGsp *pKernelGsp = GPU_GET_KERNEL_GSP(pGpu); + KernelFalcon *pKernelFlcn = staticCast(pKernelGsp, KernelFalcon); if (pRpc->timeoutCount == 1) { @@ -2186,9 +2188,22 @@ _kgspLogXid119 kgspLogRpcDebugInfo(pGpu, pRpc, GSP_RPC_TIMEOUT, NV_TRUE/*bPollingForRpcResponse*/); osAssertFailed(); + // + // Dump registers / core state, non-destructively here. + // On production boards, ICD dump cannot be done because halt is final. + // Do not print this if we already consider GSP dead (prevents spam overload) + // + kgspDumpMailbox_HAL(pGpu, pKernelGsp); + kflcnCoreDumpNondestructive(pGpu, pKernelFlcn, 2); + NV_PRINTF(LEVEL_ERROR, "********************************************************************************\n"); } + else + { + kgspDumpMailbox_HAL(pGpu, pKernelGsp); // Always dump mailboxes + kflcnCoreDumpNondestructive(pGpu, pKernelFlcn, 0); // simple version + } } static void @@ -2389,8 +2404,14 @@ _kgspRpcRecvPoll goto done; } + // + // Today, we will soldier on if GSP times out. This can cause future issues if the action + // requested never actually occurs. + // if (timeoutStatus == NV_ERR_TIMEOUT) { + KernelFalcon *pKernelFlcn = staticCast(pKernelGsp, KernelFalcon); + rpcStatus = timeoutStatus; _kgspRpcIncrementTimeoutCountAndRateLimitPrints(pGpu, pRpc); @@ -2408,6 +2429,9 @@ _kgspRpcRecvPoll gpuMarkDeviceForReset(pGpu); pKernelGsp->bFatalError = NV_TRUE; + // Do a destructive ICD dump - core is unrecoverable. + kflcnCoreDumpDestructive(pGpu, pKernelFlcn); + // For Windows, if TDR is supported, trigger TDR to recover the system. if (pGpu->getProperty(pGpu, PDB_PROP_GPU_SUPPORTS_TDR_EVENT)) { diff --git a/src/nvidia/src/kernel/gpu/mig_mgr/arch/blackwell/kmigmgr_gb202.c b/src/nvidia/src/kernel/gpu/mig_mgr/arch/blackwell/kmigmgr_gb202.c index 13b36d761..d330804a0 100644 --- a/src/nvidia/src/kernel/gpu/mig_mgr/arch/blackwell/kmigmgr_gb202.c +++ b/src/nvidia/src/kernel/gpu/mig_mgr/arch/blackwell/kmigmgr_gb202.c @@ -123,6 +123,53 @@ kmigmgrGpuInstanceSupportVgpuTimeslice_GB202 return gfxSizeFlag == NV2080_CTRL_GPU_PARTITION_FLAG_GFX_SIZE_NONE ? NV_FALSE : NV_TRUE; } + +static NvBool +s_kmigmgrIsSingleSliceConfig_GB202 +( + OBJGPU *pGpu, + KernelMIGManager *pKernelMIGManager, + NvU32 gpuInstanceFlag +) +{ + NvU32 computeSizeFlag = DRF_VAL(2080_CTRL_GPU, _PARTITION_FLAG, _COMPUTE_SIZE, gpuInstanceFlag); + NvU32 syspipeMask = 0; + NvBool isSingleSliceProfile = NV_FALSE; + NvU32 actualMigCount = 0; + NvU32 i; + + for (i = 0; i < RM_ENGINE_TYPE_GR_SIZE; ++i) + { + if (gpuCheckEngine_HAL(pGpu, ENG_GR(i))) + { + syspipeMask |= NVBIT32(i); + } + } + actualMigCount = nvPopCount32(syspipeMask); + + switch (computeSizeFlag) + { + case NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_MINI_HALF: + if (actualMigCount == 2) + { + // + // On 2 slice configurations, MINI_HALF is the smallest available partition + // QUARTER would be hidden by NVML See bug 5592609 for more details. + // + isSingleSliceProfile = NV_TRUE; + } + break; + case NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_QUARTER: + isSingleSliceProfile = NV_TRUE; + break; + default: + // nothing do do. default value is already initialized to NV_FALSE + break; + } + + return isSingleSliceProfile; +} + /*! * @brief Function to determine whether gpu instance flag combinations are valid * for this GPU @@ -138,20 +185,17 @@ kmigmgrIsGPUInstanceCombinationValid_GB202 NvU32 memSizeFlag = DRF_VAL(2080_CTRL_GPU, _PARTITION_FLAG, _MEMORY_SIZE, gpuInstanceFlag); NvU32 computeSizeFlag = DRF_VAL(2080_CTRL_GPU, _PARTITION_FLAG, _COMPUTE_SIZE, gpuInstanceFlag); NvU32 gfxSizeFlag = DRF_VAL(2080_CTRL_GPU, _PARTITION_FLAG, _GFX_SIZE, gpuInstanceFlag); - NvU32 smallestComputeSizeFlag; if (!kmigmgrIsGPUInstanceFlagValid_HAL(pGpu, pKernelMIGManager, gpuInstanceFlag)) { return NV_FALSE; } - smallestComputeSizeFlag = kmigmgrSmallestComputeProfileSize(pGpu, pKernelMIGManager); - NV_CHECK_OR_RETURN(LEVEL_ERROR, smallestComputeSizeFlag != KMIGMGR_COMPUTE_SIZE_INVALID, NV_FALSE); - // JPG_OFA profile is only available on the smallest available partition + // JPG_OFA profile is only available on single slice GPU Instances if (FLD_TEST_REF(NV2080_CTRL_GPU_PARTITION_FLAG_REQ_DEC_JPG_OFA, _ENABLE, gpuInstanceFlag)) { - if (computeSizeFlag != smallestComputeSizeFlag) + if (!s_kmigmgrIsSingleSliceConfig_GB202(pGpu, pKernelMIGManager, gpuInstanceFlag)) { return NV_FALSE; } diff --git a/src/nvidia/src/kernel/gpu/mig_mgr/gpu_instance_subscription.c b/src/nvidia/src/kernel/gpu/mig_mgr/gpu_instance_subscription.c index aaea95747..987a26982 100644 --- a/src/nvidia/src/kernel/gpu/mig_mgr/gpu_instance_subscription.c +++ b/src/nvidia/src/kernel/gpu/mig_mgr/gpu_instance_subscription.c @@ -374,6 +374,92 @@ gisubscriptionCanCopy_IMPL return NV_TRUE; } +/*! + * @brief Helper function to allocate and init KERNEL_WATCHDOG under the CI if it's GFX-capable + */ +static NV_STATUS +_gisubscriptionAllocKernelWatchdog +( + OBJGPU *pGpu, + MIG_COMPUTE_INSTANCE *pMIGComputeInstance +) +{ + // Allocate watchdog channel for valid GFX-capable CI + if (pMIGComputeInstance->bValid && (pMIGComputeInstance->resourceAllocation.gfxGpcCount > 0)) + { + RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); + KernelRc *pKernelRc = GPU_GET_KERNEL_RC(pGpu); + RsResourceRef *pKernelWatchdogRef; + KernelWatchdog *pKernelWatchdog; + + NV_PRINTF(LEVEL_INFO, "Allocating KERNEL_WATCHDOG object for CI hClient 0x%x, hSubdevice 0x%x, gfxGpcCount(%d)\n", + pMIGComputeInstance->instanceHandles.hClient, + pMIGComputeInstance->instanceHandles.hSubdevice, + pMIGComputeInstance->resourceAllocation.gfxGpcCount); + + NV_ASSERT_OK_OR_RETURN( + pRmApi->AllocWithHandle(pRmApi, + pMIGComputeInstance->instanceHandles.hClient, + pMIGComputeInstance->instanceHandles.hSubdevice, + KERNEL_WATCHDOG_OBJECT_ID, + KERNEL_WATCHDOG, + NvP64_NULL, + 0)); + + NV_ASSERT_OK_OR_RETURN( + serverutilGetResourceRefWithType(pMIGComputeInstance->instanceHandles.hClient, + KERNEL_WATCHDOG_OBJECT_ID, + classId(KernelWatchdog), + &pKernelWatchdogRef)); + + pKernelWatchdog = dynamicCast(pKernelWatchdogRef->pResource, KernelWatchdog); + + NV_ASSERT_OR_RETURN(pKernelWatchdog != NULL, NV_ERR_INVALID_STATE); + + NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, krcWatchdogInit(pGpu, pKernelRc, pKernelWatchdog)); + } + + return NV_OK; +} + +/*! + * @brief Helper function to shutdown and free KERNEL_WATCHDOG under the CI + */ +static NV_STATUS +_gisubscriptionFreeKernelWatchdog +( + OBJGPU *pGpu, + MIG_COMPUTE_INSTANCE *pMIGComputeInstance +) +{ + if (pMIGComputeInstance->bValid && (pMIGComputeInstance->resourceAllocation.gfxGpcCount > 0)) + { + RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); + RsResourceRef *pKernelWatchdogRef; + KernelRc *pKernelRc = GPU_GET_KERNEL_RC(pGpu); + KernelWatchdog *pKernelWatchdog; + + NV_PRINTF(LEVEL_INFO, "Freeing KERNEL_WATCHDOG object for CI hClient 0x%x, gfxGpcCount(%d)\n", + pMIGComputeInstance->instanceHandles.hClient, + pMIGComputeInstance->resourceAllocation.gfxGpcCount); + + NV_ASSERT_OK_OR_RETURN( + serverutilGetResourceRefWithType(pMIGComputeInstance->instanceHandles.hClient, + KERNEL_WATCHDOG_OBJECT_ID, + classId(KernelWatchdog), + &pKernelWatchdogRef)); + + pKernelWatchdog = dynamicCast(pKernelWatchdogRef->pResource, KernelWatchdog); + NV_ASSERT_OR_RETURN(pKernelWatchdog != NULL, NV_ERR_INVALID_STATE); + + NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, krcWatchdogShutdown(pGpu, pKernelRc, pKernelWatchdog)); + + pRmApi->Free(pRmApi, pMIGComputeInstance->instanceHandles.hClient, KERNEL_WATCHDOG_OBJECT_ID); + } + + return NV_OK; +} + // // gisubscriptionCtrlCmdExecPartitionsCreate // @@ -564,36 +650,7 @@ gisubscriptionCtrlCmdExecPartitionsCreate_IMPL { for (i = 0; i < pParams->execPartCount; i++) { - MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pKernelMIGGpuInstance->MIGComputeInstance[pParams->execPartId[i]]; - - // Allocate watchdog channel for each valid GFX-capable CI - if (pMIGComputeInstance->bValid && (pMIGComputeInstance->resourceAllocation.gfxGpcCount > 0)) - { - RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); - KernelRc *pKernelRc = GPU_GET_KERNEL_RC(pGpu); - RsResourceRef *pKernelWatchdogRef; - KernelWatchdog *pKernelWatchdog; - - NV_ASSERT_OK_OR_RETURN( - pRmApi->AllocWithHandle(pRmApi, - pMIGComputeInstance->instanceHandles.hClient, - pMIGComputeInstance->instanceHandles.hSubdevice, - KERNEL_WATCHDOG_OBJECT_ID, - KERNEL_WATCHDOG, - NvP64_NULL, - 0)); - - NV_ASSERT_OK_OR_RETURN( - serverutilGetResourceRefWithType(pMIGComputeInstance->instanceHandles.hClient, - KERNEL_WATCHDOG_OBJECT_ID, - classId(KernelWatchdog), - &pKernelWatchdogRef)); - - pKernelWatchdog = dynamicCast(pKernelWatchdogRef->pResource, KernelWatchdog); - NV_ASSERT_OR_RETURN(pKernelWatchdog != NULL, NV_ERR_INVALID_STATE); - - NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, krcWatchdogInit(pGpu, pKernelRc, pKernelWatchdog)); - } + NV_ASSERT_OK_OR_RETURN(_gisubscriptionAllocKernelWatchdog(pGpu, &pKernelMIGGpuInstance->MIGComputeInstance[pParams->execPartId[i]])); } } @@ -688,31 +745,11 @@ gisubscriptionCtrlCmdExecPartitionsDelete_IMPL for (execPartIdx = 0; execPartIdx < pParams->execPartCount; ++execPartIdx) { KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); + if (gpuIsClassSupported(pGpu, KERNEL_WATCHDOG) && !(IS_GSP_CLIENT(pGpu) && IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu))) { - RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); - MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pKernelMIGGpuInstance->MIGComputeInstance[pParams->execPartId[execPartIdx]]; - - if (pMIGComputeInstance->bValid && (pMIGComputeInstance->resourceAllocation.gfxGpcCount > 0)) - { - KernelRc *pKernelRc = GPU_GET_KERNEL_RC(pGpu); - RsResourceRef *pKernelWatchdogRef; - KernelWatchdog *pKernelWatchdog; - - NV_ASSERT_OK_OR_RETURN( - serverutilGetResourceRefWithType(pMIGComputeInstance->instanceHandles.hClient, - KERNEL_WATCHDOG_OBJECT_ID, - classId(KernelWatchdog), - &pKernelWatchdogRef)); - - pKernelWatchdog = dynamicCast(pKernelWatchdogRef->pResource, KernelWatchdog); - NV_ASSERT_OR_RETURN(pKernelWatchdog != NULL, NV_ERR_INVALID_STATE); - - NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, krcWatchdogShutdown(pGpu, pKernelRc, pKernelWatchdog)); - - pRmApi->Free(pRmApi, pMIGComputeInstance->instanceHandles.hClient, KERNEL_WATCHDOG_OBJECT_ID); - } + NV_ASSERT_OK_OR_RETURN(_gisubscriptionFreeKernelWatchdog(pGpu, &pKernelMIGGpuInstance->MIGComputeInstance[pParams->execPartId[execPartIdx]])); } if (IS_VIRTUAL(pGpu) || IS_GSP_CLIENT(pGpu)) @@ -1078,6 +1115,12 @@ gisubscriptionCtrlCmdExecPartitionsImport_IMPL } } + if (gpuIsClassSupported(pGpu, KERNEL_WATCHDOG) && + !(IS_GSP_CLIENT(pGpu) && IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu))) + { + NV_ASSERT_OK_OR_GOTO(status, _gisubscriptionAllocKernelWatchdog(pGpu, &pGPUInstance->MIGComputeInstance[pParams->id]), cleanup_rpc); + } + return NV_OK; cleanup_rpc: diff --git a/src/nvidia/srcs.mk b/src/nvidia/srcs.mk index 21bb28961..675167f37 100644 --- a/src/nvidia/srcs.mk +++ b/src/nvidia/srcs.mk @@ -478,6 +478,7 @@ SRCS += src/kernel/gpu/external_device/kern_external_device.c SRCS += src/kernel/gpu/falcon/arch/ampere/kernel_falcon_ga100.c SRCS += src/kernel/gpu/falcon/arch/ampere/kernel_falcon_ga102.c SRCS += src/kernel/gpu/falcon/arch/blackwell/kernel_falcon_gb100.c +SRCS += src/kernel/gpu/falcon/arch/blackwell/kernel_falcon_gb202.c SRCS += src/kernel/gpu/falcon/arch/turing/kernel_crashcat_engine_tu102.c SRCS += src/kernel/gpu/falcon/arch/turing/kernel_falcon_tu102.c SRCS += src/kernel/gpu/falcon/kernel_crashcat_engine.c diff --git a/version.mk b/version.mk index bb0970b7a..6f60532ba 100644 --- a/version.mk +++ b/version.mk @@ -1,5 +1,5 @@ -NVIDIA_VERSION = 590.44.01 -NVIDIA_NVID_VERSION = 590.44.01 +NVIDIA_VERSION = 590.48.01 +NVIDIA_NVID_VERSION = 590.48.01 NVIDIA_NVID_EXTRA = # This file.