mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-01-27 11:39:46 +00:00
570.190
This commit is contained in:
@@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc
|
||||
ccflags-y += -I$(src)
|
||||
ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
|
||||
ccflags-y += -DNV_VERSION_STRING=\"570.181\"
|
||||
ccflags-y += -DNV_VERSION_STRING=\"570.190\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
ccflags-y += -I$(SYSSRCHOST1X)
|
||||
|
||||
@@ -4071,6 +4071,43 @@ compile_test() {
|
||||
fi
|
||||
;;
|
||||
|
||||
drm_fb_create_takes_format_info)
|
||||
#
|
||||
# Determine if a `struct drm_format_info *` is passed into
|
||||
# the .fb_create callback. If so, it will have 4 arguments.
|
||||
# This parameter was added in commit 81112eaac559 ("drm:
|
||||
# Pass the format info to .fb_create") in linux-next
|
||||
# (2025-07-16)
|
||||
CODE="
|
||||
#include <drm/drm_mode_config.h>
|
||||
#include <drm/drm_framebuffer.h>
|
||||
|
||||
static const struct drm_mode_config_funcs funcs;
|
||||
void conftest_drm_fb_create_takes_format_info(void) {
|
||||
funcs.fb_create(NULL, NULL, NULL, NULL);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_FB_CREATE_TAKES_FORMAT_INFO" "" "types"
|
||||
;;
|
||||
|
||||
drm_fill_fb_struct_takes_format_info)
|
||||
#
|
||||
# Determine if a `struct drm_format_info *` is passed into
|
||||
# drm_helper_mode_fill_fb_struct(). If so, it will have 4 arguments.
|
||||
# This parameter was added in commit a34cc7bf1034 ("drm:
|
||||
# Allow the caller to pass in the format info to
|
||||
# drm_helper_mode_fill_fb_struct()") in linux-next
|
||||
# (2025-07-16)
|
||||
CODE="
|
||||
#include <drm/drm_modeset_helper.h>
|
||||
|
||||
void conftest_drm_fill_fb_struct_takes_format_info(void) {
|
||||
drm_helper_mode_fill_fb_struct(NULL, NULL, NULL, NULL);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_FILL_FB_STRUCT_TAKES_FORMAT_INFO" "" "types"
|
||||
;;
|
||||
|
||||
drm_connector_funcs_have_mode_in_name)
|
||||
#
|
||||
# Determine if _mode_ is present in connector function names. We
|
||||
|
||||
@@ -209,11 +209,14 @@ static void nv_drm_output_poll_changed(struct drm_device *dev)
|
||||
static struct drm_framebuffer *nv_drm_framebuffer_create(
|
||||
struct drm_device *dev,
|
||||
struct drm_file *file,
|
||||
#if defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG)
|
||||
#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO)
|
||||
const struct drm_format_info *info,
|
||||
#endif
|
||||
#if defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG)
|
||||
const struct drm_mode_fb_cmd2 *cmd
|
||||
#else
|
||||
#else
|
||||
struct drm_mode_fb_cmd2 *cmd
|
||||
#endif
|
||||
#endif
|
||||
)
|
||||
{
|
||||
struct drm_mode_fb_cmd2 local_cmd;
|
||||
@@ -224,11 +227,14 @@ static struct drm_framebuffer *nv_drm_framebuffer_create(
|
||||
fb = nv_drm_internal_framebuffer_create(
|
||||
dev,
|
||||
file,
|
||||
#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO)
|
||||
info,
|
||||
#endif
|
||||
&local_cmd);
|
||||
|
||||
#if !defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG)
|
||||
#if !defined(NV_DRM_HELPER_MODE_FILL_FB_STRUCT_HAS_CONST_MODE_CMD_ARG)
|
||||
*cmd = local_cmd;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return fb;
|
||||
}
|
||||
@@ -2046,13 +2052,13 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
#endif
|
||||
nvKms->framebufferConsoleDisabled(nv_dev->pDevice);
|
||||
}
|
||||
#if defined(NV_DRM_CLIENT_AVAILABLE)
|
||||
#if defined(NV_DRM_CLIENT_AVAILABLE)
|
||||
drm_client_setup(dev, NULL);
|
||||
#elif defined(NV_DRM_FBDEV_TTM_AVAILABLE)
|
||||
#elif defined(NV_DRM_FBDEV_TTM_AVAILABLE)
|
||||
drm_fbdev_ttm_setup(dev, 32);
|
||||
#elif defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#elif defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
drm_fbdev_generic_setup(dev, 32);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
#endif /* defined(NV_DRM_FBDEV_AVAILABLE) */
|
||||
|
||||
|
||||
@@ -220,6 +220,9 @@ fail:
|
||||
struct drm_framebuffer *nv_drm_internal_framebuffer_create(
|
||||
struct drm_device *dev,
|
||||
struct drm_file *file,
|
||||
#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO)
|
||||
const struct drm_format_info *info,
|
||||
#endif
|
||||
struct drm_mode_fb_cmd2 *cmd)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
@@ -273,6 +276,9 @@ struct drm_framebuffer *nv_drm_internal_framebuffer_create(
|
||||
dev,
|
||||
#endif
|
||||
&nv_fb->base,
|
||||
#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO)
|
||||
info,
|
||||
#endif
|
||||
cmd);
|
||||
|
||||
/*
|
||||
|
||||
@@ -84,6 +84,9 @@ static inline void nv_fb_set_gem_obj(
|
||||
struct drm_framebuffer *nv_drm_internal_framebuffer_create(
|
||||
struct drm_device *dev,
|
||||
struct drm_file *file,
|
||||
#if defined(NV_DRM_FB_CREATE_TAKES_FORMAT_INFO)
|
||||
const struct drm_format_info *info,
|
||||
#endif
|
||||
struct drm_mode_fb_cmd2 *cmd);
|
||||
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
|
||||
@@ -703,6 +703,13 @@ int nv_drm_atomic_commit(struct drm_device *dev,
|
||||
#else
|
||||
drm_atomic_helper_swap_state(dev, state);
|
||||
#endif
|
||||
/*
|
||||
* Used to update legacy modeset state pointers to support UAPIs not updated
|
||||
* by the core atomic modeset infrastructure.
|
||||
*
|
||||
* Example: /sys/class/drm/<card connector>/enabled
|
||||
*/
|
||||
drm_atomic_helper_update_legacy_modeset_state(dev, state);
|
||||
|
||||
/*
|
||||
* nv_drm_atomic_commit_internal() must not return failure after
|
||||
|
||||
@@ -148,3 +148,4 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_date
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations_fop_unsigned_offset_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_helper_funcs_mode_valid_has_const_mode_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_fb_create_takes_format_info
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -37,6 +37,7 @@ typedef struct
|
||||
// This stores pointers to uvm_va_block_t for HMM blocks.
|
||||
uvm_range_tree_t blocks;
|
||||
uvm_mutex_t blocks_lock;
|
||||
atomic64_t allocated_page_count;
|
||||
} uvm_hmm_va_space_t;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
|
||||
@@ -402,7 +402,10 @@ static void chunk_pin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
{
|
||||
uvm_gpu_root_chunk_t *root_chunk = root_chunk_from_chunk(pmm, chunk);
|
||||
|
||||
uvm_assert_spinlock_locked(&pmm->list_lock);
|
||||
// The PMM list_lock must be held, but calling uvm_assert_spinlock_locked()
|
||||
// is not possible here due to the absence of the UVM context pointer in
|
||||
// the interrupt context when called from devmem_page_free().
|
||||
|
||||
UVM_ASSERT(chunk->state != UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
chunk->state = UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED;
|
||||
|
||||
@@ -415,8 +418,9 @@ static void chunk_pin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
|
||||
// The passed-in subchunk is not the root chunk so the root chunk has to be
|
||||
// split.
|
||||
UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, "chunk state %s\n",
|
||||
uvm_pmm_gpu_chunk_state_string(chunk->state));
|
||||
UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT,
|
||||
"chunk state %s\n",
|
||||
uvm_pmm_gpu_chunk_state_string(chunk->state));
|
||||
|
||||
chunk->suballoc->pinned_leaf_chunks++;
|
||||
}
|
||||
@@ -429,7 +433,6 @@ static void chunk_unpin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_pmm_gpu_
|
||||
|
||||
uvm_assert_spinlock_locked(&pmm->list_lock);
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
UVM_ASSERT(chunk->va_block == NULL);
|
||||
UVM_ASSERT(chunk_is_root_chunk_pinned(pmm, chunk));
|
||||
UVM_ASSERT(new_state != UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
|
||||
@@ -444,8 +447,9 @@ static void chunk_unpin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_pmm_gpu_
|
||||
|
||||
// The passed-in subchunk is not the root chunk so the root chunk has to be
|
||||
// split.
|
||||
UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, "chunk state %s\n",
|
||||
uvm_pmm_gpu_chunk_state_string(chunk->state));
|
||||
UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT,
|
||||
"chunk state %s\n",
|
||||
uvm_pmm_gpu_chunk_state_string(chunk->state));
|
||||
|
||||
UVM_ASSERT(chunk->suballoc->pinned_leaf_chunks != 0);
|
||||
chunk->suballoc->pinned_leaf_chunks--;
|
||||
@@ -597,8 +601,6 @@ NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
|
||||
return status;
|
||||
|
||||
for (i = 0; i < num_chunks; ++i) {
|
||||
UVM_ASSERT(chunks[i]->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
chunk_unpin(pmm, chunks[i], UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
|
||||
chunks[i]->is_referenced = false;
|
||||
@@ -644,45 +646,29 @@ static void chunk_update_lists_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk
|
||||
list_del_init(&chunk->list);
|
||||
}
|
||||
|
||||
static void gpu_unpin_temp(uvm_pmm_gpu_t *pmm,
|
||||
uvm_gpu_chunk_t *chunk,
|
||||
uvm_va_block_t *va_block,
|
||||
bool is_referenced)
|
||||
void uvm_pmm_gpu_unpin_allocated(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block)
|
||||
{
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
UVM_ASSERT(uvm_gpu_chunk_is_user(chunk));
|
||||
|
||||
INIT_LIST_HEAD(&chunk->list);
|
||||
UVM_ASSERT(list_empty(&chunk->list));
|
||||
UVM_ASSERT(va_block);
|
||||
UVM_ASSERT(chunk->va_block == va_block);
|
||||
UVM_ASSERT(chunk->va_block_page_index < uvm_va_block_num_cpu_pages(va_block));
|
||||
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
|
||||
UVM_ASSERT(!chunk->va_block);
|
||||
UVM_ASSERT(va_block);
|
||||
UVM_ASSERT(chunk->va_block_page_index < uvm_va_block_num_cpu_pages(va_block));
|
||||
|
||||
chunk_unpin(pmm, chunk, UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
|
||||
chunk->is_referenced = is_referenced;
|
||||
chunk->va_block = va_block;
|
||||
chunk_update_lists_locked(pmm, chunk);
|
||||
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
}
|
||||
|
||||
void uvm_pmm_gpu_unpin_allocated(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block)
|
||||
{
|
||||
gpu_unpin_temp(pmm, chunk, va_block, false);
|
||||
}
|
||||
|
||||
void uvm_pmm_gpu_unpin_referenced(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block)
|
||||
{
|
||||
gpu_unpin_temp(pmm, chunk, va_block, true);
|
||||
}
|
||||
|
||||
void uvm_pmm_gpu_free(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_tracker_t *tracker)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
if (!chunk)
|
||||
// Referenced chunks are freed by Linux when the reference is released.
|
||||
if (!chunk || chunk->is_referenced)
|
||||
return;
|
||||
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED ||
|
||||
@@ -748,6 +734,10 @@ static bool assert_chunk_mergeable(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
size_t i;
|
||||
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT);
|
||||
UVM_ASSERT_MSG(chunk->suballoc->allocated == num_subchunks(chunk),
|
||||
"%u != %u\n",
|
||||
chunk->suballoc->allocated,
|
||||
num_subchunks(chunk));
|
||||
UVM_ASSERT(first_child->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED ||
|
||||
first_child->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
|
||||
|
||||
@@ -766,14 +756,6 @@ static bool assert_chunk_mergeable(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
}
|
||||
}
|
||||
|
||||
if (first_child->state == UVM_PMM_GPU_CHUNK_STATE_FREE) {
|
||||
UVM_ASSERT(chunk->suballoc->allocated == 0);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT_MSG(chunk->suballoc->allocated == num_subchunks(chunk), "%u != %u\n",
|
||||
chunk->suballoc->allocated, num_subchunks(chunk));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -812,6 +794,7 @@ static void merge_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
else if (child_state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
|
||||
UVM_ASSERT(root_chunk->chunk.suballoc->pinned_leaf_chunks >= num_sub);
|
||||
root_chunk->chunk.suballoc->pinned_leaf_chunks += 1 - num_sub;
|
||||
chunk->va_block = subchunk->va_block;
|
||||
}
|
||||
|
||||
chunk->state = child_state;
|
||||
@@ -835,7 +818,7 @@ static void merge_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
UVM_ASSERT(list_empty(&subchunk->list));
|
||||
|
||||
if ((child_state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) && uvm_gpu_chunk_is_user(subchunk))
|
||||
UVM_ASSERT(subchunk->va_block != NULL);
|
||||
UVM_ASSERT(subchunk->va_block);
|
||||
|
||||
kmem_cache_free(CHUNK_CACHE, subchunk);
|
||||
}
|
||||
@@ -1202,7 +1185,7 @@ void uvm_pmm_gpu_mark_chunk_evicted(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
|
||||
UVM_ASSERT(chunk_is_in_eviction(pmm, chunk));
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
|
||||
UVM_ASSERT(chunk->va_block != NULL);
|
||||
UVM_ASSERT(chunk->va_block);
|
||||
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
@@ -1259,11 +1242,13 @@ static NV_STATUS find_and_retain_va_block_to_evict(uvm_pmm_gpu_t *pmm, uvm_gpu_c
|
||||
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
|
||||
// All free chunks should have been pinned already by pin_free_chunks_func().
|
||||
// All free chunks should have been pinned already by
|
||||
// pin_free_chunks_func().
|
||||
UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED ||
|
||||
chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED ||
|
||||
chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT,
|
||||
"state %s\n", uvm_pmm_gpu_chunk_state_string(chunk->state));
|
||||
"state %s\n",
|
||||
uvm_pmm_gpu_chunk_state_string(chunk->state));
|
||||
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
|
||||
UVM_ASSERT(chunk->va_block);
|
||||
@@ -1750,8 +1735,10 @@ static NV_STATUS alloc_chunk_with_splits(uvm_pmm_gpu_t *pmm,
|
||||
UVM_ASSERT(chunk->parent->suballoc);
|
||||
UVM_ASSERT(uvm_gpu_chunk_get_size(chunk->parent) == uvm_chunk_find_next_size(chunk_sizes, cur_size));
|
||||
UVM_ASSERT(chunk->parent->type == type);
|
||||
UVM_ASSERT_MSG(chunk->parent->suballoc->allocated <= num_subchunks(chunk->parent), "allocated %u num %u\n",
|
||||
chunk->parent->suballoc->allocated, num_subchunks(chunk->parent));
|
||||
UVM_ASSERT_MSG(chunk->parent->suballoc->allocated <= num_subchunks(chunk->parent),
|
||||
"allocated %u num %u\n",
|
||||
chunk->parent->suballoc->allocated,
|
||||
num_subchunks(chunk->parent));
|
||||
}
|
||||
|
||||
if (cur_size == chunk_size) {
|
||||
@@ -1856,9 +1843,9 @@ static void init_root_chunk(uvm_pmm_gpu_t *pmm,
|
||||
uvm_pmm_gpu_chunk_state_string(chunk->state),
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
UVM_ASSERT(chunk->parent == NULL);
|
||||
UVM_ASSERT(chunk->suballoc == NULL);
|
||||
UVM_ASSERT(chunk->va_block == NULL);
|
||||
UVM_ASSERT(!chunk->parent);
|
||||
UVM_ASSERT(!chunk->suballoc);
|
||||
UVM_ASSERT(!chunk->va_block);
|
||||
UVM_ASSERT(chunk->va_block_page_index == PAGES_PER_UVM_VA_BLOCK);
|
||||
UVM_ASSERT(list_empty(&chunk->list));
|
||||
UVM_ASSERT(uvm_gpu_chunk_get_size(chunk) == UVM_CHUNK_SIZE_MAX);
|
||||
@@ -2116,6 +2103,9 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
subchunk->va_block_page_index = chunk->va_block_page_index + (i * subchunk_size) / PAGE_SIZE;
|
||||
subchunk->is_referenced = chunk->is_referenced;
|
||||
}
|
||||
else if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
|
||||
subchunk->va_block = chunk->va_block;
|
||||
}
|
||||
}
|
||||
|
||||
// We're splitting an allocated or pinned chunk in-place.
|
||||
@@ -2141,6 +2131,10 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
// accounting for the root chunk itself so add the 1 back.
|
||||
if (chunk_is_root_chunk(chunk))
|
||||
root_chunk->chunk.suballoc->pinned_leaf_chunks += 1;
|
||||
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
chunk->is_referenced = false;
|
||||
}
|
||||
|
||||
chunk->state = UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT;
|
||||
@@ -2222,16 +2216,16 @@ static void chunk_free_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
|
||||
if (root_chunk->chunk.in_eviction) {
|
||||
// A root chunk with pinned subchunks would never be picked for eviction
|
||||
// so this one has to be in the allocated state. Pin it and let the
|
||||
// evicting thread pick it up.
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
|
||||
UVM_ASSERT(chunk->va_block != NULL);
|
||||
UVM_ASSERT(chunk->va_block_page_index != PAGES_PER_UVM_VA_BLOCK);
|
||||
UVM_ASSERT(list_empty(&chunk->list));
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
chunk->is_zero = false;
|
||||
chunk_pin(pmm, chunk);
|
||||
// but HMM evictions will end up here so leave the chunk pinned (or pin
|
||||
// it) and let the eviction thread pick it up.
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
|
||||
UVM_ASSERT(chunk->va_block);
|
||||
UVM_ASSERT(list_empty(&chunk->list));
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
chunk->is_zero = false;
|
||||
chunk_pin(pmm, chunk);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2245,17 +2239,15 @@ static void chunk_free_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
}
|
||||
}
|
||||
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
|
||||
chunk_unpin(pmm, chunk, UVM_PMM_GPU_CHUNK_STATE_FREE);
|
||||
}
|
||||
else {
|
||||
chunk->state = UVM_PMM_GPU_CHUNK_STATE_FREE;
|
||||
chunk->va_block = NULL;
|
||||
}
|
||||
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
chunk->is_zero = false;
|
||||
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED)
|
||||
chunk_unpin(pmm, chunk, UVM_PMM_GPU_CHUNK_STATE_FREE);
|
||||
else
|
||||
chunk->state = UVM_PMM_GPU_CHUNK_STATE_FREE;
|
||||
|
||||
chunk_update_lists_locked(pmm, chunk);
|
||||
}
|
||||
|
||||
@@ -2369,8 +2361,8 @@ static void free_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
try_free = is_root;
|
||||
}
|
||||
else {
|
||||
// Freeing a chunk can only fail if it requires merging. Take the PMM lock
|
||||
// and free it with merges supported.
|
||||
// Freeing a chunk can only fail if it requires merging. Take the PMM
|
||||
// lock and free it with merges supported.
|
||||
uvm_mutex_lock(&pmm->lock);
|
||||
free_chunk_with_merges(pmm, chunk);
|
||||
uvm_mutex_unlock(&pmm->lock);
|
||||
@@ -3088,6 +3080,11 @@ static bool uvm_pmm_gpu_check_orphan_pages(uvm_pmm_gpu_t *pmm)
|
||||
break;
|
||||
}
|
||||
|
||||
if (page->zone_device_data) {
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (page_count(page)) {
|
||||
ret = false;
|
||||
break;
|
||||
@@ -3102,6 +3099,14 @@ static void devmem_page_free(struct page *page)
|
||||
uvm_gpu_chunk_t *chunk = uvm_pmm_devmem_page_to_chunk(page);
|
||||
uvm_gpu_t *gpu = uvm_gpu_chunk_get_gpu(chunk);
|
||||
|
||||
if (chunk->va_block) {
|
||||
uvm_va_space_t *va_space = chunk->va_block->hmm.va_space;
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
atomic64_dec(&va_space->hmm.allocated_page_count);
|
||||
UVM_ASSERT(atomic64_read(&va_space->hmm.allocated_page_count) >= 0);
|
||||
}
|
||||
|
||||
page->zone_device_data = NULL;
|
||||
|
||||
// We should be calling free_chunk() except that it acquires a mutex and
|
||||
@@ -3111,7 +3116,20 @@ static void devmem_page_free(struct page *page)
|
||||
spin_lock(&gpu->pmm.list_lock.lock);
|
||||
|
||||
UVM_ASSERT(chunk->is_referenced);
|
||||
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
chunk->is_referenced = false;
|
||||
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
|
||||
list_del_init(&chunk->list);
|
||||
chunk_pin(&gpu->pmm, chunk);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
UVM_ASSERT(list_empty(&chunk->list));
|
||||
}
|
||||
|
||||
list_add_tail(&chunk->list, &gpu->pmm.root_chunks.va_block_lazy_free);
|
||||
|
||||
spin_unlock(&gpu->pmm.list_lock.lock);
|
||||
@@ -3362,6 +3380,7 @@ static void process_lazy_free(uvm_pmm_gpu_t *pmm)
|
||||
// is empty.
|
||||
while (!list_empty(&pmm->root_chunks.va_block_lazy_free)) {
|
||||
chunk = list_first_entry(&pmm->root_chunks.va_block_lazy_free, uvm_gpu_chunk_t, list);
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
list_del_init(&chunk->list);
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
|
||||
@@ -3414,6 +3433,7 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
|
||||
|
||||
for (i = 0; i < UVM_PMM_GPU_MEMORY_TYPE_COUNT; i++) {
|
||||
pmm->chunk_sizes[i] = 0;
|
||||
|
||||
// Add the common root chunk size to all memory types
|
||||
pmm->chunk_sizes[i] |= UVM_CHUNK_SIZE_MAX;
|
||||
for (j = 0; j < ARRAY_SIZE(chunk_size_init); j++)
|
||||
@@ -3421,7 +3441,9 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
|
||||
|
||||
UVM_ASSERT(pmm->chunk_sizes[i] < UVM_CHUNK_SIZE_INVALID);
|
||||
UVM_ASSERT_MSG(hweight_long(pmm->chunk_sizes[i]) <= UVM_MAX_CHUNK_SIZES,
|
||||
"chunk sizes %lu, max chunk sizes %u\n", hweight_long(pmm->chunk_sizes[i]), UVM_MAX_CHUNK_SIZES);
|
||||
"chunk sizes %lu, max chunk sizes %u\n",
|
||||
hweight_long(pmm->chunk_sizes[i]),
|
||||
UVM_MAX_CHUNK_SIZES);
|
||||
}
|
||||
|
||||
status = init_caches(pmm);
|
||||
@@ -3515,9 +3537,9 @@ void uvm_pmm_gpu_deinit(uvm_pmm_gpu_t *pmm)
|
||||
|
||||
gpu = uvm_pmm_to_gpu(pmm);
|
||||
|
||||
UVM_ASSERT(uvm_pmm_gpu_check_orphan_pages(pmm));
|
||||
nv_kthread_q_flush(&gpu->parent->lazy_free_q);
|
||||
UVM_ASSERT(list_empty(&pmm->root_chunks.va_block_lazy_free));
|
||||
UVM_ASSERT(uvm_pmm_gpu_check_orphan_pages(pmm));
|
||||
release_free_root_chunks(pmm);
|
||||
|
||||
if (gpu->mem_info.size != 0 && gpu_supports_pma_eviction(gpu))
|
||||
|
||||
@@ -271,6 +271,11 @@ struct uvm_gpu_chunk_struct
|
||||
|
||||
// This flag indicates an allocated user chunk is referenced by a device
|
||||
// private struct page PTE and therefore expects a page_free() callback.
|
||||
// The flag is only for sanity checking since uvm_pmm_gpu_free()
|
||||
// shouldn't be called if Linux has a device private reference to this
|
||||
// chunk and devmem_page_free() should only be called from the Linux
|
||||
// callback if a reference was created.
|
||||
// See uvm_hmm_va_block_service_locked() and fill_dst_pfn() for details.
|
||||
//
|
||||
// This field is always false in kernel chunks.
|
||||
bool is_referenced : 1;
|
||||
@@ -300,6 +305,9 @@ struct uvm_gpu_chunk_struct
|
||||
// The VA block using the chunk, if any.
|
||||
// User chunks that are not backed by a VA block are considered to be
|
||||
// temporarily pinned and cannot be evicted.
|
||||
// Note that the chunk state is normally UVM_PMM_GPU_CHUNK_STATE_ALLOCATED
|
||||
// but can also be UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED if an HMM va_block
|
||||
// and device private struct page have a pointer to this chunk.
|
||||
//
|
||||
// This field is always NULL in kernel chunks.
|
||||
uvm_va_block_t *va_block;
|
||||
@@ -437,17 +445,16 @@ struct page *uvm_gpu_chunk_to_page(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
|
||||
// Allocates num_chunks chunks of size chunk_size in caller-supplied array
|
||||
// (chunks).
|
||||
//
|
||||
// Returned chunks are in the TEMP_PINNED state, requiring a call to either
|
||||
// uvm_pmm_gpu_unpin_allocated, uvm_pmm_gpu_unpin_referenced, or
|
||||
// uvm_pmm_gpu_free. If a tracker is passed in, all
|
||||
// the pending operations on the allocated chunks will be added to it
|
||||
// Returned chunks are in the TEMP_PINNED state, requiring a call to
|
||||
// uvm_pmm_gpu_unpin_allocated or uvm_pmm_gpu_free. If a tracker is passed in,
|
||||
// all the pending operations on the allocated chunks will be added to it
|
||||
// guaranteeing that all the entries come from the same GPU as the PMM.
|
||||
// Otherwise, when tracker is NULL, all the pending operations will be
|
||||
// synchronized before returning to the caller.
|
||||
//
|
||||
// Each of the allocated chunks list nodes (uvm_gpu_chunk_t::list) can be used
|
||||
// by the caller until the chunk is unpinned (uvm_pmm_gpu_unpin_allocated,
|
||||
// uvm_pmm_gpu_unpin_referenced) or freed (uvm_pmm_gpu_free). If used, the list
|
||||
// by the caller until the chunk is unpinned (uvm_pmm_gpu_unpin_allocated)
|
||||
// or freed (uvm_pmm_gpu_free). If used, the list
|
||||
// node has to be returned to a valid state before calling either of the APIs.
|
||||
//
|
||||
// In case of an error, the chunks array is guaranteed to be cleared.
|
||||
@@ -480,12 +487,6 @@ NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
|
||||
// Can only be used on user memory.
|
||||
void uvm_pmm_gpu_unpin_allocated(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block);
|
||||
|
||||
// Unpin a temporarily pinned chunk, set its reverse map to a VA block, and
|
||||
// mark it as referenced.
|
||||
//
|
||||
// Can only be used on user memory.
|
||||
void uvm_pmm_gpu_unpin_referenced(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block);
|
||||
|
||||
// Free a user or kernel chunk. Temporarily pinned chunks are unpinned.
|
||||
//
|
||||
// The tracker is optional and a NULL tracker indicates that no new operation
|
||||
|
||||
@@ -426,11 +426,13 @@ static uvm_cpu_chunk_t *uvm_cpu_chunk_get_chunk_for_page_resident(uvm_va_block_t
|
||||
return chunk;
|
||||
}
|
||||
|
||||
void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block, int nid, uvm_page_index_t page_index)
|
||||
void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
int nid,
|
||||
uvm_page_index_t page_index)
|
||||
{
|
||||
uvm_va_block_cpu_node_state_t *node_state = block_node_state_get(va_block, nid);
|
||||
uvm_cpu_chunk_storage_mixed_t *mixed;
|
||||
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(va_block, nid, page_index);
|
||||
uvm_va_block_region_t chunk_region = uvm_cpu_chunk_block_region(va_block, chunk, page_index);
|
||||
size_t slot_index;
|
||||
uvm_cpu_chunk_t **chunks;
|
||||
@@ -765,7 +767,7 @@ static bool block_check_cpu_chunks(uvm_va_block_t *block)
|
||||
int nid;
|
||||
uvm_page_mask_t *temp_resident_mask;
|
||||
|
||||
temp_resident_mask = kmem_cache_alloc(g_uvm_page_mask_cache, NV_UVM_GFP_FLAGS | __GFP_ZERO);
|
||||
temp_resident_mask = nv_kmem_cache_zalloc(g_uvm_page_mask_cache, NV_UVM_GFP_FLAGS);
|
||||
|
||||
for_each_possible_uvm_node(nid) {
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
@@ -827,16 +829,16 @@ void uvm_va_block_retry_deinit(uvm_va_block_retry_t *retry, uvm_va_block_t *va_b
|
||||
uvm_pmm_gpu_free(&gpu->pmm, gpu_chunk, NULL);
|
||||
}
|
||||
|
||||
// HMM should have already moved allocated GPU chunks to the referenced
|
||||
// state or freed them.
|
||||
if (uvm_va_block_is_hmm(va_block))
|
||||
UVM_ASSERT(list_empty(&retry->used_chunks));
|
||||
|
||||
// Unpin all the used chunks now that we are done
|
||||
list_for_each_entry_safe(gpu_chunk, next_chunk, &retry->used_chunks, list) {
|
||||
list_del_init(&gpu_chunk->list);
|
||||
gpu = uvm_gpu_chunk_get_gpu(gpu_chunk);
|
||||
// HMM should have already moved allocated blocks to the referenced
|
||||
// state so any left over were not migrated and should be freed.
|
||||
if (uvm_va_block_is_hmm(va_block))
|
||||
uvm_pmm_gpu_free(&gpu->pmm, gpu_chunk, NULL);
|
||||
else
|
||||
uvm_pmm_gpu_unpin_allocated(&gpu->pmm, gpu_chunk, va_block);
|
||||
uvm_pmm_gpu_unpin_allocated(&gpu->pmm, gpu_chunk, va_block);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1158,6 +1160,8 @@ static size_t block_gpu_chunk_index(uvm_va_block_t *block,
|
||||
UVM_ASSERT(gpu_state->chunks);
|
||||
chunk = gpu_state->chunks[index];
|
||||
if (chunk) {
|
||||
UVM_ASSERT(uvm_gpu_chunk_is_user(chunk));
|
||||
UVM_ASSERT(uvm_id_equal(uvm_gpu_id_from_index(chunk->gpu_index), gpu->id));
|
||||
UVM_ASSERT(uvm_gpu_chunk_get_size(chunk) == size);
|
||||
UVM_ASSERT(chunk->state != UVM_PMM_GPU_CHUNK_STATE_PMA_OWNED);
|
||||
UVM_ASSERT(chunk->state != UVM_PMM_GPU_CHUNK_STATE_FREE);
|
||||
@@ -1385,10 +1389,7 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
// Retrieves the gpu_state for the given GPU. The returned pointer is
|
||||
// internally managed and will be allocated (and freed) automatically,
|
||||
// rather than by the caller.
|
||||
static uvm_va_block_gpu_state_t *block_gpu_state_get_alloc(uvm_va_block_t *block, uvm_gpu_t *gpu)
|
||||
uvm_va_block_gpu_state_t *uvm_va_block_gpu_state_get_alloc(uvm_va_block_t *block, uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(block, gpu->id);
|
||||
@@ -1420,22 +1421,6 @@ error:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_va_block_gpu_state_alloc(uvm_va_block_t *va_block)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_gpu_id_t gpu_id;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
for_each_gpu_id_in_mask(gpu_id, &va_space->registered_gpus) {
|
||||
if (!block_gpu_state_get_alloc(va_block, uvm_gpu_get(gpu_id)))
|
||||
return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *block,
|
||||
uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
@@ -1490,7 +1475,7 @@ void uvm_va_block_remove_cpu_chunks(uvm_va_block_t *va_block, uvm_va_block_regio
|
||||
uvm_page_mask_region_clear(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_READ], chunk_region);
|
||||
uvm_page_mask_region_clear(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_WRITE], chunk_region);
|
||||
uvm_va_block_cpu_clear_resident_region(va_block, nid, chunk_region);
|
||||
uvm_cpu_chunk_remove_from_block(va_block, nid, page_index);
|
||||
uvm_cpu_chunk_remove_from_block(va_block, chunk, nid, page_index);
|
||||
uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk);
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
}
|
||||
@@ -1586,26 +1571,6 @@ static NV_STATUS block_alloc_cpu_chunk(uvm_va_block_t *block,
|
||||
return status;
|
||||
}
|
||||
|
||||
// Same as block_alloc_cpu_chunk() but allocate a chunk suitable for use as
|
||||
// a HMM destination page. The main difference is UVM does not own the reference
|
||||
// on the struct page backing these chunks.
|
||||
static NV_STATUS block_alloc_hmm_cpu_chunk(uvm_va_block_t *block,
|
||||
uvm_chunk_sizes_mask_t cpu_allocation_sizes,
|
||||
uvm_cpu_chunk_alloc_flags_t flags,
|
||||
int nid,
|
||||
uvm_cpu_chunk_t **chunk)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(block));
|
||||
|
||||
status = block_alloc_cpu_chunk(block, cpu_allocation_sizes, flags, nid, chunk);
|
||||
if (status == NV_OK)
|
||||
(*chunk)->type = UVM_CPU_CHUNK_TYPE_HMM;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Find the largest allocation size we can use for the given page_index in the
|
||||
// given block. Returns the mask of possible sizes and region covered by the
|
||||
// largest. Callers may also elect to use a smaller size.
|
||||
@@ -1837,7 +1802,7 @@ static NV_STATUS block_add_cpu_chunk(uvm_va_block_t *block,
|
||||
|
||||
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk);
|
||||
if (status != NV_OK) {
|
||||
uvm_cpu_chunk_remove_from_block(block, uvm_cpu_chunk_get_numa_node(chunk), page_index);
|
||||
uvm_cpu_chunk_remove_from_block(block, chunk, uvm_cpu_chunk_get_numa_node(chunk), page_index);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
@@ -1859,10 +1824,9 @@ out:
|
||||
// is required for IOMMU support. Skipped on GPUs without access to CPU memory.
|
||||
// e.g., this happens when the Confidential Computing Feature is enabled.
|
||||
static NV_STATUS block_populate_pages_cpu(uvm_va_block_t *block,
|
||||
uvm_page_mask_t *populate_page_mask,
|
||||
const uvm_page_mask_t *populate_page_mask,
|
||||
uvm_va_block_region_t populate_region,
|
||||
uvm_va_block_context_t *block_context,
|
||||
bool staged)
|
||||
uvm_va_block_context_t *block_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
@@ -1956,13 +1920,7 @@ static NV_STATUS block_populate_pages_cpu(uvm_va_block_t *block,
|
||||
if (!uvm_page_mask_region_full(resident_mask, region))
|
||||
chunk_alloc_flags |= UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO;
|
||||
|
||||
// Management of a page used for a staged migration is never handed off
|
||||
// to the kernel and is really just a driver managed page. Therefore
|
||||
// don't allocate a HMM chunk in this case.
|
||||
if (uvm_va_block_is_hmm(block) && !staged)
|
||||
status = block_alloc_hmm_cpu_chunk(block, allocation_sizes, chunk_alloc_flags, preferred_nid, &chunk);
|
||||
else
|
||||
status = block_alloc_cpu_chunk(block, allocation_sizes, chunk_alloc_flags, preferred_nid, &chunk);
|
||||
status = block_alloc_cpu_chunk(block, allocation_sizes, chunk_alloc_flags, preferred_nid, &chunk);
|
||||
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
|
||||
alloc_flags &= ~UVM_CPU_CHUNK_ALLOC_FLAGS_STRICT;
|
||||
@@ -1973,7 +1931,8 @@ static NV_STATUS block_populate_pages_cpu(uvm_va_block_t *block,
|
||||
return status;
|
||||
}
|
||||
|
||||
// A smaller chunk than the maximum size may have been allocated, update the region accordingly.
|
||||
// A smaller chunk than the maximum size may have been allocated,
|
||||
// update the region accordingly.
|
||||
region = uvm_va_block_chunk_region(block, uvm_cpu_chunk_get_size(chunk), page_index);
|
||||
status = block_add_cpu_chunk(block, node_pages_mask, chunk, region);
|
||||
if (status != NV_OK)
|
||||
@@ -1981,50 +1940,14 @@ static NV_STATUS block_populate_pages_cpu(uvm_va_block_t *block,
|
||||
|
||||
// Skip iterating over all pages covered by the allocated chunk.
|
||||
page_index = region.outer - 1;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
if (uvm_va_block_is_hmm(block) && block_context)
|
||||
block_context->hmm.dst_pfns[page_index] = migrate_pfn(page_to_pfn(chunk->page));
|
||||
#endif
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Note this clears the block_context caller_page_mask.
|
||||
NV_STATUS uvm_va_block_populate_page_cpu(uvm_va_block_t *va_block, uvm_page_index_t page_index, uvm_va_block_context_t *block_context)
|
||||
{
|
||||
uvm_page_mask_t *page_mask = &block_context->caller_page_mask;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
uvm_page_mask_zero(page_mask);
|
||||
uvm_page_mask_set(page_mask, page_index);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
const uvm_va_policy_t *policy;
|
||||
uvm_va_block_region_t region;
|
||||
uvm_va_policy_node_t *node;
|
||||
|
||||
uvm_for_each_va_policy_in(policy, va_block, va_block->start, va_block->end, node, region) {
|
||||
status = block_populate_pages_cpu(va_block,
|
||||
page_mask,
|
||||
region,
|
||||
block_context,
|
||||
false);
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
status = block_populate_pages_cpu(va_block,
|
||||
page_mask,
|
||||
uvm_va_block_region_from_block(va_block),
|
||||
block_context,
|
||||
false);
|
||||
}
|
||||
|
||||
return status;
|
||||
return block_populate_pages_cpu(va_block, NULL, uvm_va_block_region_for_page(page_index), block_context);
|
||||
}
|
||||
|
||||
// Try allocating a chunk. If eviction was required,
|
||||
@@ -2413,7 +2336,7 @@ static uvm_page_mask_t *block_resident_mask_get_alloc(uvm_va_block_t *block, uvm
|
||||
if (UVM_ID_IS_CPU(processor))
|
||||
return uvm_va_block_resident_mask_get(block, processor, nid);
|
||||
|
||||
gpu_state = block_gpu_state_get_alloc(block, uvm_gpu_get(processor));
|
||||
gpu_state = uvm_va_block_gpu_state_get_alloc(block, uvm_gpu_get(processor));
|
||||
if (!gpu_state)
|
||||
return NULL;
|
||||
|
||||
@@ -2453,9 +2376,15 @@ void uvm_va_block_unmapped_pages_get(uvm_va_block_t *va_block,
|
||||
return;
|
||||
}
|
||||
|
||||
uvm_page_mask_zero(out_mask);
|
||||
uvm_page_mask_region_fill(out_mask, region);
|
||||
|
||||
for_each_id_in_mask(id, &va_block->mapped) {
|
||||
// UVM-HMM doesn't always know when CPU pages are mapped or not since there
|
||||
// is no notification when CPU page tables are upgraded. If the page is
|
||||
// resident, assume the CPU has some mapping.
|
||||
uvm_page_mask_andnot(out_mask, out_mask, uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU, NUMA_NO_NODE));
|
||||
|
||||
for_each_gpu_id_in_mask(id, &va_block->mapped) {
|
||||
uvm_page_mask_andnot(out_mask, out_mask, uvm_va_block_map_mask_get(va_block, id));
|
||||
}
|
||||
}
|
||||
@@ -2951,7 +2880,7 @@ static NV_STATUS block_populate_gpu_chunk(uvm_va_block_t *block,
|
||||
size_t chunk_index,
|
||||
uvm_va_block_region_t chunk_region)
|
||||
{
|
||||
uvm_va_block_gpu_state_t *gpu_state = block_gpu_state_get_alloc(block, gpu);
|
||||
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get_alloc(block, gpu);
|
||||
uvm_gpu_chunk_t *chunk = NULL;
|
||||
uvm_chunk_size_t chunk_size = uvm_va_block_region_size(chunk_region);
|
||||
uvm_va_block_test_t *block_test = uvm_va_block_get_test(block);
|
||||
@@ -3005,8 +2934,10 @@ static NV_STATUS block_populate_gpu_chunk(uvm_va_block_t *block,
|
||||
}
|
||||
|
||||
// Record the used chunk so that it can be unpinned at the end of the whole
|
||||
// operation.
|
||||
// operation. HMM chunks are unpinned after a successful migration.
|
||||
block_retry_add_used_chunk(retry, chunk);
|
||||
|
||||
chunk->va_block = block;
|
||||
gpu_state->chunks[chunk_index] = chunk;
|
||||
|
||||
return NV_OK;
|
||||
@@ -3023,12 +2954,13 @@ chunk_free:
|
||||
}
|
||||
|
||||
// Populate all chunks which cover the given region and page mask.
|
||||
static NV_STATUS block_populate_pages_gpu(uvm_va_block_t *block,
|
||||
NV_STATUS uvm_va_block_populate_pages_gpu(uvm_va_block_t *block,
|
||||
uvm_va_block_retry_t *retry,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_gpu_id_t gpu_id,
|
||||
uvm_va_block_region_t region,
|
||||
const uvm_page_mask_t *populate_mask)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_gpu_get(gpu_id);
|
||||
uvm_va_block_region_t chunk_region, check_region;
|
||||
size_t chunk_index;
|
||||
uvm_page_index_t page_index;
|
||||
@@ -3105,7 +3037,7 @@ static NV_STATUS block_populate_pages(uvm_va_block_t *block,
|
||||
if (!tmp_processor_mask)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
status = block_populate_pages_gpu(block, retry, uvm_gpu_get(dest_id), region, populate_page_mask);
|
||||
status = uvm_va_block_populate_pages_gpu(block, retry, dest_id, region, populate_page_mask);
|
||||
if (status != NV_OK) {
|
||||
uvm_processor_mask_cache_free(tmp_processor_mask);
|
||||
return status;
|
||||
@@ -3150,7 +3082,7 @@ static NV_STATUS block_populate_pages(uvm_va_block_t *block,
|
||||
}
|
||||
|
||||
uvm_memcg_context_start(&memcg_context, block_context->mm);
|
||||
status = block_populate_pages_cpu(block, cpu_populate_mask, region, block_context, UVM_ID_IS_GPU(dest_id));
|
||||
status = block_populate_pages_cpu(block, cpu_populate_mask, region, block_context);
|
||||
uvm_memcg_context_end(&memcg_context);
|
||||
return status;
|
||||
}
|
||||
@@ -4180,7 +4112,7 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
|
||||
|
||||
// Ensure that there is GPU state that can be used for CPU-to-CPU copies
|
||||
if (UVM_ID_IS_CPU(dst_id) && uvm_id_equal(src_id, dst_id)) {
|
||||
uvm_va_block_gpu_state_t *gpu_state = block_gpu_state_get_alloc(block, copying_gpu);
|
||||
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get_alloc(block, copying_gpu);
|
||||
if (!gpu_state) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
break;
|
||||
@@ -4841,6 +4773,7 @@ static void block_cleanup_temp_pinned_gpu_chunks(uvm_va_block_t *va_block, uvm_g
|
||||
// block_populate_pages above. Release them since the copy
|
||||
// failed and they won't be mapped to userspace.
|
||||
if (chunk && chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
|
||||
list_del_init(&chunk->list);
|
||||
uvm_mmu_chunk_unmap(chunk, &va_block->tracker);
|
||||
uvm_pmm_gpu_free(&gpu->pmm, chunk, &va_block->tracker);
|
||||
gpu_state->chunks[i] = NULL;
|
||||
@@ -4935,7 +4868,8 @@ NV_STATUS uvm_va_block_make_resident_copy(uvm_va_block_t *va_block,
|
||||
prefetch_page_mask,
|
||||
UVM_VA_BLOCK_TRANSFER_MODE_MOVE);
|
||||
|
||||
if (status != NV_OK) {
|
||||
// HMM does its own clean up.
|
||||
if (status != NV_OK && !uvm_va_block_is_hmm(va_block)) {
|
||||
if (UVM_ID_IS_GPU(dest_id))
|
||||
block_cleanup_temp_pinned_gpu_chunks(va_block, dest_id);
|
||||
|
||||
@@ -7891,7 +7825,7 @@ static NV_STATUS block_pre_populate_pde1_gpu(uvm_va_block_t *block,
|
||||
gpu = gpu_va_space->gpu;
|
||||
big_page_size = gpu_va_space->page_tables.big_page_size;
|
||||
|
||||
gpu_state = block_gpu_state_get_alloc(block, gpu);
|
||||
gpu_state = uvm_va_block_gpu_state_get_alloc(block, gpu);
|
||||
if (!gpu_state)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
@@ -8604,12 +8538,12 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
|
||||
|
||||
gpu = uvm_gpu_get(id);
|
||||
|
||||
// Although this GPU UUID is registered in the VA space, it might not have a
|
||||
// GPU VA space registered.
|
||||
// Although this GPU UUID is registered in the VA space, it might not
|
||||
// have a GPU VA space registered.
|
||||
if (!uvm_gpu_va_space_get(va_space, gpu))
|
||||
return NV_OK;
|
||||
|
||||
gpu_state = block_gpu_state_get_alloc(va_block, gpu);
|
||||
gpu_state = uvm_va_block_gpu_state_get_alloc(va_block, gpu);
|
||||
if (!gpu_state)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
@@ -9608,7 +9542,7 @@ static void block_kill(uvm_va_block_t *block)
|
||||
if (!uvm_va_block_is_hmm(block))
|
||||
uvm_cpu_chunk_mark_dirty(chunk, 0);
|
||||
|
||||
uvm_cpu_chunk_remove_from_block(block, nid, page_index);
|
||||
uvm_cpu_chunk_remove_from_block(block, chunk, nid, page_index);
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
}
|
||||
|
||||
@@ -9672,13 +9606,12 @@ void uvm_va_block_kill(uvm_va_block_t *va_block)
|
||||
static void block_gpu_release_region(uvm_va_block_t *va_block,
|
||||
uvm_gpu_id_t gpu_id,
|
||||
uvm_va_block_gpu_state_t *gpu_state,
|
||||
uvm_page_mask_t *page_mask,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
uvm_page_index_t page_index;
|
||||
uvm_gpu_t *gpu = uvm_gpu_get(gpu_id);
|
||||
|
||||
for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
|
||||
for_each_va_block_page_in_region(page_index, region) {
|
||||
size_t chunk_index = block_gpu_chunk_index(va_block, gpu, page_index, NULL);
|
||||
uvm_gpu_chunk_t *gpu_chunk = gpu_state->chunks[chunk_index];
|
||||
|
||||
@@ -9723,7 +9656,7 @@ void uvm_va_block_munmap_region(uvm_va_block_t *va_block,
|
||||
uvm_processor_mask_clear(&va_block->evicted_gpus, gpu_id);
|
||||
|
||||
if (gpu_state->chunks) {
|
||||
block_gpu_release_region(va_block, gpu_id, gpu_state, NULL, region);
|
||||
block_gpu_release_region(va_block, gpu_id, gpu_state, region);
|
||||
|
||||
// TODO: bug 3660922: Need to update the read duplicated pages mask
|
||||
// when read duplication is supported for HMM.
|
||||
@@ -10294,7 +10227,7 @@ static NV_STATUS block_split_preallocate_no_retry(uvm_va_block_t *existing, uvm_
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
if (!block_gpu_state_get_alloc(new, gpu)) {
|
||||
if (!uvm_va_block_gpu_state_get_alloc(new, gpu)) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto error;
|
||||
}
|
||||
@@ -10468,7 +10401,7 @@ static void block_split_cpu(uvm_va_block_t *existing, uvm_va_block_t *new)
|
||||
uvm_page_index_t new_chunk_page_index;
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_cpu_chunk_remove_from_block(existing, nid, page_index);
|
||||
uvm_cpu_chunk_remove_from_block(existing, chunk, nid, page_index);
|
||||
|
||||
// The chunk has to be adjusted for the new block before inserting it.
|
||||
new_chunk_page_index = page_index - split_page_index;
|
||||
@@ -13067,7 +13000,7 @@ out:
|
||||
|
||||
static NV_STATUS block_gpu_force_4k_ptes(uvm_va_block_t *block, uvm_va_block_context_t *block_context, uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_va_block_gpu_state_t *gpu_state = block_gpu_state_get_alloc(block, gpu);
|
||||
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get_alloc(block, gpu);
|
||||
uvm_push_t push;
|
||||
NV_STATUS status;
|
||||
|
||||
|
||||
@@ -1339,9 +1339,11 @@ NV_STATUS uvm_va_block_service_finish(uvm_processor_id_t processor_id,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_service_block_context_t *service_context);
|
||||
|
||||
// Allocate GPU state for the given va_block and registered GPUs.
|
||||
// Returns the gpu_state for the given GPU. The returned pointer is
|
||||
// internally managed and will be allocated (and freed) automatically,
|
||||
// rather than by the caller. Returns NULL if there is no memory.
|
||||
// Locking: The block lock must be held.
|
||||
NV_STATUS uvm_va_block_gpu_state_alloc(uvm_va_block_t *va_block);
|
||||
uvm_va_block_gpu_state_t *uvm_va_block_gpu_state_get_alloc(uvm_va_block_t *va_block, uvm_gpu_t *gpu);
|
||||
|
||||
// Release any GPU or policy data associated with the given region in response
|
||||
// to munmap().
|
||||
@@ -2113,10 +2115,13 @@ bool uvm_va_block_cpu_is_region_resident_on(uvm_va_block_t *va_block, int nid, u
|
||||
// Locking: The va_block lock must be held.
|
||||
NV_STATUS uvm_cpu_chunk_insert_in_block(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
|
||||
|
||||
// Remove a CPU chunk at the given page_index from the va_block.
|
||||
// Remove the given CPU chunk at the given page_index from the va_block.
|
||||
// nid cannot be NUMA_NO_NODE.
|
||||
// Locking: The va_block lock must be held.
|
||||
void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block, int nid, uvm_page_index_t page_index);
|
||||
void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
int nid,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Return the CPU chunk at the given page_index on the given NUMA node from the
|
||||
// va_block. nid cannot be NUMA_NO_NODE.
|
||||
@@ -2249,6 +2254,13 @@ NV_STATUS uvm_va_block_populate_page_cpu(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_va_block_context_t *block_context);
|
||||
|
||||
// Populate all GPU chunks which cover the given region and page mask.
|
||||
NV_STATUS uvm_va_block_populate_pages_gpu(uvm_va_block_t *block,
|
||||
uvm_va_block_retry_t *retry,
|
||||
uvm_gpu_id_t gpu_id,
|
||||
uvm_va_block_region_t region,
|
||||
const uvm_page_mask_t *populate_mask);
|
||||
|
||||
// A helper macro for handling allocation-retry
|
||||
//
|
||||
// The macro takes a VA block, uvm_va_block_retry_t struct and a function call
|
||||
|
||||
@@ -2497,8 +2497,13 @@ nvidia_ioctl(
|
||||
|
||||
NV_CTL_DEVICE_ONLY(nv);
|
||||
|
||||
if (num_arg_gpus == 0 || nvlfp->num_attached_gpus != 0 ||
|
||||
arg_size % sizeof(NvU32) != 0)
|
||||
if ((num_arg_gpus == 0) || (arg_size % sizeof(NvU32) != 0))
|
||||
{
|
||||
status = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (nvlfp->num_attached_gpus != 0)
|
||||
{
|
||||
status = -EINVAL;
|
||||
goto done;
|
||||
@@ -2527,6 +2532,7 @@ nvidia_ioctl(
|
||||
if (nvlfp->attached_gpus[i] != 0)
|
||||
nvidia_dev_put(nvlfp->attached_gpus[i], sp);
|
||||
}
|
||||
|
||||
NV_KFREE(nvlfp->attached_gpus, arg_size);
|
||||
nvlfp->num_attached_gpus = 0;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user