mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-02-06 16:19:58 +00:00
570.190
This commit is contained in:
@@ -402,7 +402,10 @@ static void chunk_pin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
{
|
||||
uvm_gpu_root_chunk_t *root_chunk = root_chunk_from_chunk(pmm, chunk);
|
||||
|
||||
uvm_assert_spinlock_locked(&pmm->list_lock);
|
||||
// The PMM list_lock must be held, but calling uvm_assert_spinlock_locked()
|
||||
// is not possible here due to the absence of the UVM context pointer in
|
||||
// the interrupt context when called from devmem_page_free().
|
||||
|
||||
UVM_ASSERT(chunk->state != UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
chunk->state = UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED;
|
||||
|
||||
@@ -415,8 +418,9 @@ static void chunk_pin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
|
||||
// The passed-in subchunk is not the root chunk so the root chunk has to be
|
||||
// split.
|
||||
UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, "chunk state %s\n",
|
||||
uvm_pmm_gpu_chunk_state_string(chunk->state));
|
||||
UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT,
|
||||
"chunk state %s\n",
|
||||
uvm_pmm_gpu_chunk_state_string(chunk->state));
|
||||
|
||||
chunk->suballoc->pinned_leaf_chunks++;
|
||||
}
|
||||
@@ -429,7 +433,6 @@ static void chunk_unpin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_pmm_gpu_
|
||||
|
||||
uvm_assert_spinlock_locked(&pmm->list_lock);
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
UVM_ASSERT(chunk->va_block == NULL);
|
||||
UVM_ASSERT(chunk_is_root_chunk_pinned(pmm, chunk));
|
||||
UVM_ASSERT(new_state != UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
|
||||
@@ -444,8 +447,9 @@ static void chunk_unpin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_pmm_gpu_
|
||||
|
||||
// The passed-in subchunk is not the root chunk so the root chunk has to be
|
||||
// split.
|
||||
UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, "chunk state %s\n",
|
||||
uvm_pmm_gpu_chunk_state_string(chunk->state));
|
||||
UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT,
|
||||
"chunk state %s\n",
|
||||
uvm_pmm_gpu_chunk_state_string(chunk->state));
|
||||
|
||||
UVM_ASSERT(chunk->suballoc->pinned_leaf_chunks != 0);
|
||||
chunk->suballoc->pinned_leaf_chunks--;
|
||||
@@ -597,8 +601,6 @@ NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
|
||||
return status;
|
||||
|
||||
for (i = 0; i < num_chunks; ++i) {
|
||||
UVM_ASSERT(chunks[i]->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
chunk_unpin(pmm, chunks[i], UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
|
||||
chunks[i]->is_referenced = false;
|
||||
@@ -644,45 +646,29 @@ static void chunk_update_lists_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk
|
||||
list_del_init(&chunk->list);
|
||||
}
|
||||
|
||||
static void gpu_unpin_temp(uvm_pmm_gpu_t *pmm,
|
||||
uvm_gpu_chunk_t *chunk,
|
||||
uvm_va_block_t *va_block,
|
||||
bool is_referenced)
|
||||
void uvm_pmm_gpu_unpin_allocated(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block)
|
||||
{
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
UVM_ASSERT(uvm_gpu_chunk_is_user(chunk));
|
||||
|
||||
INIT_LIST_HEAD(&chunk->list);
|
||||
UVM_ASSERT(list_empty(&chunk->list));
|
||||
UVM_ASSERT(va_block);
|
||||
UVM_ASSERT(chunk->va_block == va_block);
|
||||
UVM_ASSERT(chunk->va_block_page_index < uvm_va_block_num_cpu_pages(va_block));
|
||||
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
|
||||
UVM_ASSERT(!chunk->va_block);
|
||||
UVM_ASSERT(va_block);
|
||||
UVM_ASSERT(chunk->va_block_page_index < uvm_va_block_num_cpu_pages(va_block));
|
||||
|
||||
chunk_unpin(pmm, chunk, UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
|
||||
chunk->is_referenced = is_referenced;
|
||||
chunk->va_block = va_block;
|
||||
chunk_update_lists_locked(pmm, chunk);
|
||||
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
}
|
||||
|
||||
void uvm_pmm_gpu_unpin_allocated(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block)
|
||||
{
|
||||
gpu_unpin_temp(pmm, chunk, va_block, false);
|
||||
}
|
||||
|
||||
void uvm_pmm_gpu_unpin_referenced(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block)
|
||||
{
|
||||
gpu_unpin_temp(pmm, chunk, va_block, true);
|
||||
}
|
||||
|
||||
void uvm_pmm_gpu_free(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_tracker_t *tracker)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
if (!chunk)
|
||||
// Referenced chunks are freed by Linux when the reference is released.
|
||||
if (!chunk || chunk->is_referenced)
|
||||
return;
|
||||
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED ||
|
||||
@@ -748,6 +734,10 @@ static bool assert_chunk_mergeable(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
size_t i;
|
||||
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT);
|
||||
UVM_ASSERT_MSG(chunk->suballoc->allocated == num_subchunks(chunk),
|
||||
"%u != %u\n",
|
||||
chunk->suballoc->allocated,
|
||||
num_subchunks(chunk));
|
||||
UVM_ASSERT(first_child->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED ||
|
||||
first_child->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
|
||||
|
||||
@@ -766,14 +756,6 @@ static bool assert_chunk_mergeable(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
}
|
||||
}
|
||||
|
||||
if (first_child->state == UVM_PMM_GPU_CHUNK_STATE_FREE) {
|
||||
UVM_ASSERT(chunk->suballoc->allocated == 0);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT_MSG(chunk->suballoc->allocated == num_subchunks(chunk), "%u != %u\n",
|
||||
chunk->suballoc->allocated, num_subchunks(chunk));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -812,6 +794,7 @@ static void merge_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
else if (child_state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
|
||||
UVM_ASSERT(root_chunk->chunk.suballoc->pinned_leaf_chunks >= num_sub);
|
||||
root_chunk->chunk.suballoc->pinned_leaf_chunks += 1 - num_sub;
|
||||
chunk->va_block = subchunk->va_block;
|
||||
}
|
||||
|
||||
chunk->state = child_state;
|
||||
@@ -835,7 +818,7 @@ static void merge_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
UVM_ASSERT(list_empty(&subchunk->list));
|
||||
|
||||
if ((child_state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) && uvm_gpu_chunk_is_user(subchunk))
|
||||
UVM_ASSERT(subchunk->va_block != NULL);
|
||||
UVM_ASSERT(subchunk->va_block);
|
||||
|
||||
kmem_cache_free(CHUNK_CACHE, subchunk);
|
||||
}
|
||||
@@ -1202,7 +1185,7 @@ void uvm_pmm_gpu_mark_chunk_evicted(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
|
||||
UVM_ASSERT(chunk_is_in_eviction(pmm, chunk));
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
|
||||
UVM_ASSERT(chunk->va_block != NULL);
|
||||
UVM_ASSERT(chunk->va_block);
|
||||
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
@@ -1259,11 +1242,13 @@ static NV_STATUS find_and_retain_va_block_to_evict(uvm_pmm_gpu_t *pmm, uvm_gpu_c
|
||||
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
|
||||
// All free chunks should have been pinned already by pin_free_chunks_func().
|
||||
// All free chunks should have been pinned already by
|
||||
// pin_free_chunks_func().
|
||||
UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED ||
|
||||
chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED ||
|
||||
chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT,
|
||||
"state %s\n", uvm_pmm_gpu_chunk_state_string(chunk->state));
|
||||
"state %s\n",
|
||||
uvm_pmm_gpu_chunk_state_string(chunk->state));
|
||||
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
|
||||
UVM_ASSERT(chunk->va_block);
|
||||
@@ -1750,8 +1735,10 @@ static NV_STATUS alloc_chunk_with_splits(uvm_pmm_gpu_t *pmm,
|
||||
UVM_ASSERT(chunk->parent->suballoc);
|
||||
UVM_ASSERT(uvm_gpu_chunk_get_size(chunk->parent) == uvm_chunk_find_next_size(chunk_sizes, cur_size));
|
||||
UVM_ASSERT(chunk->parent->type == type);
|
||||
UVM_ASSERT_MSG(chunk->parent->suballoc->allocated <= num_subchunks(chunk->parent), "allocated %u num %u\n",
|
||||
chunk->parent->suballoc->allocated, num_subchunks(chunk->parent));
|
||||
UVM_ASSERT_MSG(chunk->parent->suballoc->allocated <= num_subchunks(chunk->parent),
|
||||
"allocated %u num %u\n",
|
||||
chunk->parent->suballoc->allocated,
|
||||
num_subchunks(chunk->parent));
|
||||
}
|
||||
|
||||
if (cur_size == chunk_size) {
|
||||
@@ -1856,9 +1843,9 @@ static void init_root_chunk(uvm_pmm_gpu_t *pmm,
|
||||
uvm_pmm_gpu_chunk_state_string(chunk->state),
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
UVM_ASSERT(chunk->parent == NULL);
|
||||
UVM_ASSERT(chunk->suballoc == NULL);
|
||||
UVM_ASSERT(chunk->va_block == NULL);
|
||||
UVM_ASSERT(!chunk->parent);
|
||||
UVM_ASSERT(!chunk->suballoc);
|
||||
UVM_ASSERT(!chunk->va_block);
|
||||
UVM_ASSERT(chunk->va_block_page_index == PAGES_PER_UVM_VA_BLOCK);
|
||||
UVM_ASSERT(list_empty(&chunk->list));
|
||||
UVM_ASSERT(uvm_gpu_chunk_get_size(chunk) == UVM_CHUNK_SIZE_MAX);
|
||||
@@ -2116,6 +2103,9 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
subchunk->va_block_page_index = chunk->va_block_page_index + (i * subchunk_size) / PAGE_SIZE;
|
||||
subchunk->is_referenced = chunk->is_referenced;
|
||||
}
|
||||
else if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
|
||||
subchunk->va_block = chunk->va_block;
|
||||
}
|
||||
}
|
||||
|
||||
// We're splitting an allocated or pinned chunk in-place.
|
||||
@@ -2141,6 +2131,10 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
// accounting for the root chunk itself so add the 1 back.
|
||||
if (chunk_is_root_chunk(chunk))
|
||||
root_chunk->chunk.suballoc->pinned_leaf_chunks += 1;
|
||||
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
chunk->is_referenced = false;
|
||||
}
|
||||
|
||||
chunk->state = UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT;
|
||||
@@ -2222,16 +2216,16 @@ static void chunk_free_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
|
||||
if (root_chunk->chunk.in_eviction) {
|
||||
// A root chunk with pinned subchunks would never be picked for eviction
|
||||
// so this one has to be in the allocated state. Pin it and let the
|
||||
// evicting thread pick it up.
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
|
||||
UVM_ASSERT(chunk->va_block != NULL);
|
||||
UVM_ASSERT(chunk->va_block_page_index != PAGES_PER_UVM_VA_BLOCK);
|
||||
UVM_ASSERT(list_empty(&chunk->list));
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
chunk->is_zero = false;
|
||||
chunk_pin(pmm, chunk);
|
||||
// but HMM evictions will end up here so leave the chunk pinned (or pin
|
||||
// it) and let the eviction thread pick it up.
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
|
||||
UVM_ASSERT(chunk->va_block);
|
||||
UVM_ASSERT(list_empty(&chunk->list));
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
chunk->is_zero = false;
|
||||
chunk_pin(pmm, chunk);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2245,17 +2239,15 @@ static void chunk_free_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
}
|
||||
}
|
||||
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
|
||||
chunk_unpin(pmm, chunk, UVM_PMM_GPU_CHUNK_STATE_FREE);
|
||||
}
|
||||
else {
|
||||
chunk->state = UVM_PMM_GPU_CHUNK_STATE_FREE;
|
||||
chunk->va_block = NULL;
|
||||
}
|
||||
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
chunk->is_zero = false;
|
||||
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED)
|
||||
chunk_unpin(pmm, chunk, UVM_PMM_GPU_CHUNK_STATE_FREE);
|
||||
else
|
||||
chunk->state = UVM_PMM_GPU_CHUNK_STATE_FREE;
|
||||
|
||||
chunk_update_lists_locked(pmm, chunk);
|
||||
}
|
||||
|
||||
@@ -2369,8 +2361,8 @@ static void free_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
try_free = is_root;
|
||||
}
|
||||
else {
|
||||
// Freeing a chunk can only fail if it requires merging. Take the PMM lock
|
||||
// and free it with merges supported.
|
||||
// Freeing a chunk can only fail if it requires merging. Take the PMM
|
||||
// lock and free it with merges supported.
|
||||
uvm_mutex_lock(&pmm->lock);
|
||||
free_chunk_with_merges(pmm, chunk);
|
||||
uvm_mutex_unlock(&pmm->lock);
|
||||
@@ -3088,6 +3080,11 @@ static bool uvm_pmm_gpu_check_orphan_pages(uvm_pmm_gpu_t *pmm)
|
||||
break;
|
||||
}
|
||||
|
||||
if (page->zone_device_data) {
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (page_count(page)) {
|
||||
ret = false;
|
||||
break;
|
||||
@@ -3102,6 +3099,14 @@ static void devmem_page_free(struct page *page)
|
||||
uvm_gpu_chunk_t *chunk = uvm_pmm_devmem_page_to_chunk(page);
|
||||
uvm_gpu_t *gpu = uvm_gpu_chunk_get_gpu(chunk);
|
||||
|
||||
if (chunk->va_block) {
|
||||
uvm_va_space_t *va_space = chunk->va_block->hmm.va_space;
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
atomic64_dec(&va_space->hmm.allocated_page_count);
|
||||
UVM_ASSERT(atomic64_read(&va_space->hmm.allocated_page_count) >= 0);
|
||||
}
|
||||
|
||||
page->zone_device_data = NULL;
|
||||
|
||||
// We should be calling free_chunk() except that it acquires a mutex and
|
||||
@@ -3111,7 +3116,20 @@ static void devmem_page_free(struct page *page)
|
||||
spin_lock(&gpu->pmm.list_lock.lock);
|
||||
|
||||
UVM_ASSERT(chunk->is_referenced);
|
||||
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
chunk->is_referenced = false;
|
||||
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
|
||||
list_del_init(&chunk->list);
|
||||
chunk_pin(&gpu->pmm, chunk);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
UVM_ASSERT(list_empty(&chunk->list));
|
||||
}
|
||||
|
||||
list_add_tail(&chunk->list, &gpu->pmm.root_chunks.va_block_lazy_free);
|
||||
|
||||
spin_unlock(&gpu->pmm.list_lock.lock);
|
||||
@@ -3362,6 +3380,7 @@ static void process_lazy_free(uvm_pmm_gpu_t *pmm)
|
||||
// is empty.
|
||||
while (!list_empty(&pmm->root_chunks.va_block_lazy_free)) {
|
||||
chunk = list_first_entry(&pmm->root_chunks.va_block_lazy_free, uvm_gpu_chunk_t, list);
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
list_del_init(&chunk->list);
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
|
||||
@@ -3414,6 +3433,7 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
|
||||
|
||||
for (i = 0; i < UVM_PMM_GPU_MEMORY_TYPE_COUNT; i++) {
|
||||
pmm->chunk_sizes[i] = 0;
|
||||
|
||||
// Add the common root chunk size to all memory types
|
||||
pmm->chunk_sizes[i] |= UVM_CHUNK_SIZE_MAX;
|
||||
for (j = 0; j < ARRAY_SIZE(chunk_size_init); j++)
|
||||
@@ -3421,7 +3441,9 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
|
||||
|
||||
UVM_ASSERT(pmm->chunk_sizes[i] < UVM_CHUNK_SIZE_INVALID);
|
||||
UVM_ASSERT_MSG(hweight_long(pmm->chunk_sizes[i]) <= UVM_MAX_CHUNK_SIZES,
|
||||
"chunk sizes %lu, max chunk sizes %u\n", hweight_long(pmm->chunk_sizes[i]), UVM_MAX_CHUNK_SIZES);
|
||||
"chunk sizes %lu, max chunk sizes %u\n",
|
||||
hweight_long(pmm->chunk_sizes[i]),
|
||||
UVM_MAX_CHUNK_SIZES);
|
||||
}
|
||||
|
||||
status = init_caches(pmm);
|
||||
@@ -3515,9 +3537,9 @@ void uvm_pmm_gpu_deinit(uvm_pmm_gpu_t *pmm)
|
||||
|
||||
gpu = uvm_pmm_to_gpu(pmm);
|
||||
|
||||
UVM_ASSERT(uvm_pmm_gpu_check_orphan_pages(pmm));
|
||||
nv_kthread_q_flush(&gpu->parent->lazy_free_q);
|
||||
UVM_ASSERT(list_empty(&pmm->root_chunks.va_block_lazy_free));
|
||||
UVM_ASSERT(uvm_pmm_gpu_check_orphan_pages(pmm));
|
||||
release_free_root_chunks(pmm);
|
||||
|
||||
if (gpu->mem_info.size != 0 && gpu_supports_pma_eviction(gpu))
|
||||
|
||||
Reference in New Issue
Block a user