This commit is contained in:
Andy Ritger
2022-11-10 08:39:33 -08:00
parent 7c345b838b
commit 758b4ee818
1323 changed files with 262135 additions and 60754 deletions

View File

@@ -30,31 +30,6 @@
#include "uvm_va_range.h"
#include "uvm_test.h"
// Global cache to allocate the per-VA block prefetch detection structures
static struct kmem_cache *g_prefetch_info_cache __read_mostly;
// Per-VA block prefetch detection structure
typedef struct
{
uvm_page_mask_t prefetch_pages;
uvm_page_mask_t migrate_pages;
uvm_va_block_bitmap_tree_t bitmap_tree;
uvm_processor_id_t last_migration_proc_id;
uvm_va_block_region_t region;
size_t big_page_size;
uvm_va_block_region_t big_pages_region;
NvU16 pending_prefetch_pages;
NvU16 fault_migrations_to_last_proc;
} block_prefetch_info_t;
//
// Tunables for prefetch detection/prevention (configurable via module parameters)
//
@@ -88,19 +63,54 @@ static bool g_uvm_perf_prefetch_enable;
static unsigned g_uvm_perf_prefetch_threshold;
static unsigned g_uvm_perf_prefetch_min_faults;
// Callback declaration for the performance heuristics events
static void prefetch_block_destroy_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data);
void uvm_perf_prefetch_bitmap_tree_iter_init(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_page_index_t page_index,
uvm_perf_prefetch_bitmap_tree_iter_t *iter)
{
UVM_ASSERT(bitmap_tree->level_count > 0);
UVM_ASSERT_MSG(page_index < bitmap_tree->leaf_count,
"%zd vs %zd",
(size_t)page_index,
(size_t)bitmap_tree->leaf_count);
static uvm_va_block_region_t compute_prefetch_region(uvm_page_index_t page_index, block_prefetch_info_t *prefetch_info)
iter->level_idx = bitmap_tree->level_count - 1;
iter->node_idx = page_index;
}
uvm_va_block_region_t uvm_perf_prefetch_bitmap_tree_iter_get_range(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
const uvm_perf_prefetch_bitmap_tree_iter_t *iter)
{
NvU16 range_leaves = uvm_perf_tree_iter_leaf_range(bitmap_tree, iter);
NvU16 range_start = uvm_perf_tree_iter_leaf_range_start(bitmap_tree, iter);
uvm_va_block_region_t subregion = uvm_va_block_region(range_start, range_start + range_leaves);
UVM_ASSERT(iter->level_idx >= 0);
UVM_ASSERT(iter->level_idx < bitmap_tree->level_count);
return subregion;
}
NvU16 uvm_perf_prefetch_bitmap_tree_iter_get_count(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
const uvm_perf_prefetch_bitmap_tree_iter_t *iter)
{
uvm_va_block_region_t subregion = uvm_perf_prefetch_bitmap_tree_iter_get_range(bitmap_tree, iter);
return uvm_page_mask_region_weight(&bitmap_tree->pages, subregion);
}
static uvm_va_block_region_t compute_prefetch_region(uvm_page_index_t page_index,
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_va_block_region_t max_prefetch_region)
{
NvU16 counter;
uvm_va_block_bitmap_tree_iter_t iter;
uvm_va_block_bitmap_tree_t *bitmap_tree = &prefetch_info->bitmap_tree;
uvm_va_block_region_t prefetch_region = uvm_va_block_region(bitmap_tree->leaf_count,
bitmap_tree->leaf_count + 1);
uvm_perf_prefetch_bitmap_tree_iter_t iter;
uvm_va_block_region_t prefetch_region = uvm_va_block_region(0, 0);
uvm_va_block_bitmap_tree_traverse_counters(counter, bitmap_tree, page_index, &iter) {
uvm_va_block_region_t subregion = uvm_va_block_bitmap_tree_iter_get_range(bitmap_tree, &iter);
uvm_perf_prefetch_bitmap_tree_traverse_counters(counter,
bitmap_tree,
page_index - max_prefetch_region.first + bitmap_tree->offset,
&iter) {
uvm_va_block_region_t subregion = uvm_perf_prefetch_bitmap_tree_iter_get_range(bitmap_tree, &iter);
NvU16 subregion_pages = uvm_va_block_region_num_pages(subregion);
UVM_ASSERT(counter <= subregion_pages);
@@ -109,289 +119,287 @@ static uvm_va_block_region_t compute_prefetch_region(uvm_page_index_t page_index
}
// Clamp prefetch region to actual pages
if (prefetch_region.first < bitmap_tree->leaf_count) {
if (prefetch_region.first < prefetch_info->region.first)
prefetch_region.first = prefetch_info->region.first;
if (prefetch_region.outer) {
prefetch_region.first += max_prefetch_region.first;
if (prefetch_region.first < bitmap_tree->offset) {
prefetch_region.first = bitmap_tree->offset;
}
else {
prefetch_region.first -= bitmap_tree->offset;
if (prefetch_region.first < max_prefetch_region.first)
prefetch_region.first = max_prefetch_region.first;
}
if (prefetch_region.outer > prefetch_info->region.outer)
prefetch_region.outer = prefetch_info->region.outer;
prefetch_region.outer += max_prefetch_region.first;
if (prefetch_region.outer < bitmap_tree->offset) {
prefetch_region.outer = bitmap_tree->offset;
}
else {
prefetch_region.outer -= bitmap_tree->offset;
if (prefetch_region.outer > max_prefetch_region.outer)
prefetch_region.outer = max_prefetch_region.outer;
}
}
return prefetch_region;
}
// Performance heuristics module for prefetch
static uvm_perf_module_t g_module_prefetch;
static uvm_perf_module_event_callback_desc_t g_callbacks_prefetch[] = {
{ UVM_PERF_EVENT_BLOCK_DESTROY, prefetch_block_destroy_cb },
{ UVM_PERF_EVENT_MODULE_UNLOAD, prefetch_block_destroy_cb },
{ UVM_PERF_EVENT_BLOCK_SHRINK, prefetch_block_destroy_cb }
};
// Get the prefetch detection struct for the given block
static block_prefetch_info_t *prefetch_info_get(uvm_va_block_t *va_block)
{
return uvm_perf_module_type_data(va_block->perf_modules_data, UVM_PERF_MODULE_TYPE_PREFETCH);
}
static void prefetch_info_destroy(uvm_va_block_t *va_block)
{
block_prefetch_info_t *prefetch_info = prefetch_info_get(va_block);
if (prefetch_info) {
kmem_cache_free(g_prefetch_info_cache, prefetch_info);
uvm_perf_module_type_unset_data(va_block->perf_modules_data, UVM_PERF_MODULE_TYPE_PREFETCH);
}
}
// Get the prefetch detection struct for the given block or create it if it
// does not exist
static block_prefetch_info_t *prefetch_info_get_create(uvm_va_block_t *va_block)
{
block_prefetch_info_t *prefetch_info = prefetch_info_get(va_block);
if (!prefetch_info) {
// Create some ghost leaves so we can align the tree to big page boundary. We use the
// largest page size to handle the worst-case scenario
size_t big_page_size = UVM_PAGE_SIZE_128K;
uvm_va_block_region_t big_pages_region = uvm_va_block_big_page_region_all(va_block, big_page_size);
size_t num_leaves = uvm_va_block_num_cpu_pages(va_block);
// If the va block is not big enough to fit 128KB pages, maybe it still can fit 64KB pages
if (big_pages_region.outer == 0) {
big_page_size = UVM_PAGE_SIZE_64K;
big_pages_region = uvm_va_block_big_page_region_all(va_block, big_page_size);
}
if (big_pages_region.first > 0)
num_leaves += (big_page_size / PAGE_SIZE - big_pages_region.first);
UVM_ASSERT(num_leaves <= PAGES_PER_UVM_VA_BLOCK);
prefetch_info = nv_kmem_cache_zalloc(g_prefetch_info_cache, NV_UVM_GFP_FLAGS);
if (!prefetch_info)
goto fail;
prefetch_info->last_migration_proc_id = UVM_ID_INVALID;
uvm_va_block_bitmap_tree_init_from_page_count(&prefetch_info->bitmap_tree, num_leaves);
uvm_perf_module_type_set_data(va_block->perf_modules_data, prefetch_info, UVM_PERF_MODULE_TYPE_PREFETCH);
}
return prefetch_info;
fail:
prefetch_info_destroy(va_block);
return NULL;
}
static void grow_fault_granularity_if_no_thrashing(block_prefetch_info_t *prefetch_info,
static void grow_fault_granularity_if_no_thrashing(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_va_block_region_t region,
uvm_page_index_t first,
const uvm_page_mask_t *faulted_pages,
const uvm_page_mask_t *thrashing_pages)
{
if (!uvm_page_mask_region_empty(faulted_pages, region) &&
(!thrashing_pages || uvm_page_mask_region_empty(thrashing_pages, region))) {
region.first += prefetch_info->region.first;
region.outer += prefetch_info->region.first;
uvm_page_mask_region_fill(&prefetch_info->bitmap_tree.pages, region);
UVM_ASSERT(region.first >= first);
region.first = region.first - first + bitmap_tree->offset;
region.outer = region.outer - first + bitmap_tree->offset;
UVM_ASSERT(region.outer <= bitmap_tree->leaf_count);
uvm_page_mask_region_fill(&bitmap_tree->pages, region);
}
}
static void grow_fault_granularity(uvm_va_block_t *va_block,
block_prefetch_info_t *prefetch_info,
static void grow_fault_granularity(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
NvU32 big_page_size,
uvm_va_block_region_t big_pages_region,
uvm_va_block_region_t max_prefetch_region,
const uvm_page_mask_t *faulted_pages,
const uvm_page_mask_t *thrashing_pages)
{
size_t num_big_pages;
size_t big_page_index;
uvm_va_block_region_t block_region = uvm_va_block_region_from_block(va_block);
uvm_page_index_t pages_per_big_page = big_page_size / PAGE_SIZE;
uvm_page_index_t page_index;
// Migrate whole block if no big pages and no page in it is thrashing
if (!big_pages_region.outer) {
grow_fault_granularity_if_no_thrashing(bitmap_tree,
max_prefetch_region,
max_prefetch_region.first,
faulted_pages,
thrashing_pages);
return;
}
// Migrate whole "prefix" if no page in it is thrashing
if (prefetch_info->big_pages_region.first > 0) {
uvm_va_block_region_t prefix_region = uvm_va_block_region(0, prefetch_info->big_pages_region.first);
if (big_pages_region.first > max_prefetch_region.first) {
uvm_va_block_region_t prefix_region = uvm_va_block_region(max_prefetch_region.first, big_pages_region.first);
grow_fault_granularity_if_no_thrashing(prefetch_info, prefix_region, faulted_pages, thrashing_pages);
grow_fault_granularity_if_no_thrashing(bitmap_tree,
prefix_region,
max_prefetch_region.first,
faulted_pages,
thrashing_pages);
}
// Migrate whole big pages if they are not thrashing
num_big_pages = uvm_va_block_num_big_pages(va_block, prefetch_info->big_page_size);
for (big_page_index = 0; big_page_index < num_big_pages; ++big_page_index) {
uvm_va_block_region_t big_region = uvm_va_block_big_page_region(va_block,
big_page_index,
prefetch_info->big_page_size);
for (page_index = big_pages_region.first;
page_index < big_pages_region.outer;
page_index += pages_per_big_page) {
uvm_va_block_region_t big_region = uvm_va_block_region(page_index,
page_index + pages_per_big_page);
grow_fault_granularity_if_no_thrashing(prefetch_info, big_region, faulted_pages, thrashing_pages);
grow_fault_granularity_if_no_thrashing(bitmap_tree,
big_region,
max_prefetch_region.first,
faulted_pages,
thrashing_pages);
}
// Migrate whole "suffix" if no page in it is thrashing
if (prefetch_info->big_pages_region.outer < block_region.outer) {
uvm_va_block_region_t suffix_region = uvm_va_block_region(prefetch_info->big_pages_region.outer,
block_region.outer);
if (big_pages_region.outer < max_prefetch_region.outer) {
uvm_va_block_region_t suffix_region = uvm_va_block_region(big_pages_region.outer,
max_prefetch_region.outer);
grow_fault_granularity_if_no_thrashing(prefetch_info, suffix_region, faulted_pages, thrashing_pages);
grow_fault_granularity_if_no_thrashing(bitmap_tree,
suffix_region,
max_prefetch_region.first,
faulted_pages,
thrashing_pages);
}
}
// Within a block we only allow prefetching to a single processor. Therefore, if two processors
// are accessing non-overlapping regions within the same block they won't benefit from
// prefetching.
// Within a block we only allow prefetching to a single processor. Therefore,
// if two processors are accessing non-overlapping regions within the same
// block they won't benefit from prefetching.
//
// TODO: Bug 1778034: [uvm] Explore prefetching to different processors within a VA block
void uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t new_residency,
const uvm_page_mask_t *faulted_pages,
uvm_va_block_region_t region)
// TODO: Bug 1778034: [uvm] Explore prefetching to different processors within
// a VA block.
static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t new_residency,
const uvm_page_mask_t *faulted_pages,
uvm_va_block_region_t faulted_region,
uvm_page_mask_t *prefetch_pages,
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree)
{
uvm_page_index_t page_index;
block_prefetch_info_t *prefetch_info;
const uvm_page_mask_t *resident_mask = NULL;
const uvm_page_mask_t *thrashing_pages = NULL;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
uvm_va_policy_t *policy = va_block_context->policy;
uvm_va_block_region_t max_prefetch_region;
NvU32 big_page_size;
uvm_va_block_region_t big_pages_region;
uvm_assert_rwsem_locked(&va_space->lock);
if (!g_uvm_perf_prefetch_enable)
return;
prefetch_info = prefetch_info_get_create(va_block);
if (!prefetch_info)
return;
if (!uvm_id_equal(prefetch_info->last_migration_proc_id, new_residency)) {
prefetch_info->last_migration_proc_id = new_residency;
prefetch_info->fault_migrations_to_last_proc = 0;
if (!uvm_id_equal(va_block->prefetch_info.last_migration_proc_id, new_residency)) {
va_block->prefetch_info.last_migration_proc_id = new_residency;
va_block->prefetch_info.fault_migrations_to_last_proc = 0;
}
prefetch_info->pending_prefetch_pages = 0;
// Compute the expanded region that prefetching is allowed from.
if (uvm_va_block_is_hmm(va_block)) {
max_prefetch_region = uvm_hmm_get_prefetch_region(va_block,
va_block_context,
uvm_va_block_region_start(va_block, faulted_region));
}
else {
max_prefetch_region = uvm_va_block_region_from_block(va_block);
}
uvm_page_mask_zero(prefetch_pages);
if (UVM_ID_IS_CPU(new_residency) || va_block->gpus[uvm_id_gpu_index(new_residency)] != NULL)
resident_mask = uvm_va_block_resident_mask_get(va_block, new_residency);
// If this is a first-touch fault and the destination processor is the
// preferred location, populate the whole VA block
// preferred location, populate the whole max_prefetch_region.
if (uvm_processor_mask_empty(&va_block->resident) &&
uvm_id_equal(new_residency, policy->preferred_location)) {
uvm_page_mask_region_fill(&prefetch_info->prefetch_pages, uvm_va_block_region_from_block(va_block));
uvm_page_mask_region_fill(prefetch_pages, max_prefetch_region);
goto done;
}
if (resident_mask)
uvm_page_mask_or(&prefetch_info->bitmap_tree.pages, resident_mask, faulted_pages);
uvm_page_mask_or(&bitmap_tree->pages, resident_mask, faulted_pages);
else
uvm_page_mask_copy(&prefetch_info->bitmap_tree.pages, faulted_pages);
uvm_page_mask_copy(&bitmap_tree->pages, faulted_pages);
// Get the big page size for the new residency
// If we are using a subregion of the va_block, align bitmap_tree
uvm_page_mask_shift_right(&bitmap_tree->pages, &bitmap_tree->pages, max_prefetch_region.first);
// Get the big page size for the new residency.
// Assume 64K size if the new residency is the CPU or no GPU va space is
// registered in the current process for this GPU.
if (UVM_ID_IS_GPU(new_residency) &&
uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, new_residency)) {
uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, new_residency);
prefetch_info->big_page_size = uvm_va_block_gpu_big_page_size(va_block, gpu);
big_page_size = uvm_va_block_gpu_big_page_size(va_block, gpu);
}
else {
prefetch_info->big_page_size = UVM_PAGE_SIZE_64K;
big_page_size = UVM_PAGE_SIZE_64K;
}
big_pages_region = uvm_va_block_big_page_region_subset(va_block, max_prefetch_region, big_page_size);
// Adjust the prefetch tree to big page granularity to make sure that we
// get big page-friendly prefetching hints
prefetch_info->big_pages_region = uvm_va_block_big_page_region_all(va_block, prefetch_info->big_page_size);
if (prefetch_info->big_pages_region.first > 0) {
prefetch_info->region.first = prefetch_info->big_page_size / PAGE_SIZE - prefetch_info->big_pages_region.first;
if (big_pages_region.first - max_prefetch_region.first > 0) {
bitmap_tree->offset = big_page_size / PAGE_SIZE - (big_pages_region.first - max_prefetch_region.first);
bitmap_tree->leaf_count = uvm_va_block_region_num_pages(max_prefetch_region) + bitmap_tree->offset;
uvm_page_mask_shift_left(&prefetch_info->bitmap_tree.pages,
&prefetch_info->bitmap_tree.pages,
prefetch_info->region.first);
UVM_ASSERT(bitmap_tree->offset < big_page_size / PAGE_SIZE);
UVM_ASSERT(bitmap_tree->leaf_count <= PAGES_PER_UVM_VA_BLOCK);
uvm_page_mask_shift_left(&bitmap_tree->pages, &bitmap_tree->pages, bitmap_tree->offset);
}
else {
prefetch_info->region.first = 0;
bitmap_tree->offset = 0;
bitmap_tree->leaf_count = uvm_va_block_region_num_pages(max_prefetch_region);
}
prefetch_info->region.outer = prefetch_info->region.first + uvm_va_block_num_cpu_pages(va_block);
bitmap_tree->level_count = ilog2(roundup_pow_of_two(bitmap_tree->leaf_count)) + 1;
thrashing_pages = uvm_perf_thrashing_get_thrashing_pages(va_block);
// Assume big pages by default. Prefetch the rest of 4KB subregions within the big page
// region unless there is thrashing.
grow_fault_granularity(va_block, prefetch_info, faulted_pages, thrashing_pages);
// Assume big pages by default. Prefetch the rest of 4KB subregions within
// the big page region unless there is thrashing.
grow_fault_granularity(bitmap_tree,
big_page_size,
big_pages_region,
max_prefetch_region,
faulted_pages,
thrashing_pages);
// Do not compute prefetch regions with faults on pages that are thrashing
if (thrashing_pages)
uvm_page_mask_andnot(&prefetch_info->migrate_pages, faulted_pages, thrashing_pages);
uvm_page_mask_andnot(&va_block_context->scratch_page_mask, faulted_pages, thrashing_pages);
else
uvm_page_mask_copy(&prefetch_info->migrate_pages, faulted_pages);
uvm_page_mask_copy(&va_block_context->scratch_page_mask, faulted_pages);
// Update the tree using the migration mask to compute the pages to prefetch
uvm_page_mask_zero(&prefetch_info->prefetch_pages);
for_each_va_block_page_in_region_mask(page_index, &prefetch_info->migrate_pages, region) {
uvm_va_block_region_t prefetch_region = compute_prefetch_region(page_index + prefetch_info->region.first,
prefetch_info);
uvm_page_mask_region_fill(&prefetch_info->prefetch_pages, prefetch_region);
// Update the tree using the scratch mask to compute the pages to prefetch
for_each_va_block_page_in_region_mask(page_index, &va_block_context->scratch_page_mask, faulted_region) {
uvm_va_block_region_t region = compute_prefetch_region(page_index, bitmap_tree, max_prefetch_region);
uvm_page_mask_region_fill(prefetch_pages, region);
// Early out if we have already prefetched until the end of the VA block
if (prefetch_region.outer == prefetch_info->region.outer)
if (region.outer == max_prefetch_region.outer)
break;
}
// Adjust prefetching page mask
if (prefetch_info->region.first > 0) {
uvm_page_mask_shift_right(&prefetch_info->prefetch_pages,
&prefetch_info->prefetch_pages,
prefetch_info->region.first);
}
done:
// Do not prefetch pages that are going to be migrated/populated due to a
// fault
uvm_page_mask_andnot(&prefetch_info->prefetch_pages,
&prefetch_info->prefetch_pages,
faulted_pages);
uvm_page_mask_andnot(prefetch_pages, prefetch_pages, faulted_pages);
// TODO: Bug 1765432: prefetching pages that are already mapped on the CPU
// would trigger a remap, which may cause a large overhead. Therefore,
// exclude them from the mask.
if (UVM_ID_IS_CPU(new_residency)) {
// For HMM, we don't know what pages are mapped by the CPU unless we try to
// migrate them. Prefetch pages will only be opportunistically migrated.
if (UVM_ID_IS_CPU(new_residency) && !uvm_va_block_is_hmm(va_block)) {
uvm_page_mask_and(&va_block_context->scratch_page_mask,
resident_mask,
&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_READ]);
uvm_page_mask_andnot(&prefetch_info->prefetch_pages,
&prefetch_info->prefetch_pages,
&va_block_context->scratch_page_mask);
uvm_page_mask_andnot(prefetch_pages, prefetch_pages, &va_block_context->scratch_page_mask);
}
// Avoid prefetching pages that are thrashing
if (thrashing_pages) {
uvm_page_mask_andnot(&prefetch_info->prefetch_pages,
&prefetch_info->prefetch_pages,
thrashing_pages);
}
if (thrashing_pages)
uvm_page_mask_andnot(prefetch_pages, prefetch_pages, thrashing_pages);
prefetch_info->fault_migrations_to_last_proc += uvm_page_mask_region_weight(faulted_pages, region);
prefetch_info->pending_prefetch_pages = uvm_page_mask_weight(&prefetch_info->prefetch_pages);
va_block->prefetch_info.fault_migrations_to_last_proc += uvm_page_mask_region_weight(faulted_pages, faulted_region);
return uvm_page_mask_weight(prefetch_pages);
}
uvm_perf_prefetch_hint_t uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
const uvm_page_mask_t *new_residency_mask)
void uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t new_residency,
const uvm_page_mask_t *faulted_pages,
uvm_va_block_region_t faulted_region,
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_perf_prefetch_hint_t *out_hint)
{
uvm_perf_prefetch_hint_t ret = UVM_PERF_PREFETCH_HINT_NONE();
block_prefetch_info_t *prefetch_info;
uvm_va_policy_t *policy = va_block_context->policy;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
uvm_page_mask_t *prefetch_pages = &out_hint->prefetch_pages_mask;
NvU32 pending_prefetch_pages;
uvm_assert_rwsem_locked(&va_space->lock);
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, faulted_region));
UVM_ASSERT(uvm_hmm_va_block_context_vma_is_valid(va_block, va_block_context, faulted_region));
out_hint->residency = UVM_ID_INVALID;
if (!g_uvm_perf_prefetch_enable)
return ret;
return;
if (!va_space->test.page_prefetch_enabled)
return ret;
return;
prefetch_info = prefetch_info_get(va_block);
if (!prefetch_info)
return ret;
pending_prefetch_pages = uvm_perf_prefetch_prenotify_fault_migrations(va_block,
va_block_context,
new_residency,
faulted_pages,
faulted_region,
prefetch_pages,
bitmap_tree);
if (prefetch_info->fault_migrations_to_last_proc >= g_uvm_perf_prefetch_min_faults &&
prefetch_info->pending_prefetch_pages > 0) {
if (va_block->prefetch_info.fault_migrations_to_last_proc >= g_uvm_perf_prefetch_min_faults &&
pending_prefetch_pages > 0) {
bool changed = false;
uvm_range_group_range_t *rgr;
@@ -402,62 +410,19 @@ uvm_perf_prefetch_hint_t uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
max(rgr->node.start, va_block->start),
min(rgr->node.end, va_block->end));
if (uvm_page_mask_region_empty(new_residency_mask, region) &&
!uvm_page_mask_region_empty(&prefetch_info->prefetch_pages, region)) {
uvm_page_mask_region_clear(&prefetch_info->prefetch_pages, region);
if (uvm_page_mask_region_empty(faulted_pages, region) &&
!uvm_page_mask_region_empty(prefetch_pages, region)) {
uvm_page_mask_region_clear(prefetch_pages, region);
changed = true;
}
}
if (changed)
prefetch_info->pending_prefetch_pages = uvm_page_mask_weight(&prefetch_info->prefetch_pages);
pending_prefetch_pages = uvm_page_mask_weight(prefetch_pages);
if (prefetch_info->pending_prefetch_pages > 0) {
ret.residency = prefetch_info->last_migration_proc_id;
ret.prefetch_pages_mask = &prefetch_info->prefetch_pages;
}
if (pending_prefetch_pages > 0)
out_hint->residency = va_block->prefetch_info.last_migration_proc_id;
}
return ret;
}
void prefetch_block_destroy_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
{
uvm_va_block_t *va_block;
UVM_ASSERT(g_uvm_perf_prefetch_enable);
UVM_ASSERT(event_id == UVM_PERF_EVENT_BLOCK_DESTROY ||
event_id == UVM_PERF_EVENT_MODULE_UNLOAD ||
event_id == UVM_PERF_EVENT_BLOCK_SHRINK);
if (event_id == UVM_PERF_EVENT_BLOCK_DESTROY)
va_block = event_data->block_destroy.block;
else if (event_id == UVM_PERF_EVENT_BLOCK_SHRINK)
va_block = event_data->block_shrink.block;
else
va_block = event_data->module_unload.block;
if (!va_block)
return;
prefetch_info_destroy(va_block);
}
NV_STATUS uvm_perf_prefetch_load(uvm_va_space_t *va_space)
{
if (!g_uvm_perf_prefetch_enable)
return NV_OK;
return uvm_perf_module_load(&g_module_prefetch, va_space);
}
void uvm_perf_prefetch_unload(uvm_va_space_t *va_space)
{
if (!g_uvm_perf_prefetch_enable)
return;
uvm_perf_module_unload(&g_module_prefetch, va_space);
}
NV_STATUS uvm_perf_prefetch_init()
@@ -467,13 +432,6 @@ NV_STATUS uvm_perf_prefetch_init()
if (!g_uvm_perf_prefetch_enable)
return NV_OK;
uvm_perf_module_init("perf_prefetch", UVM_PERF_MODULE_TYPE_PREFETCH, g_callbacks_prefetch,
ARRAY_SIZE(g_callbacks_prefetch), &g_module_prefetch);
g_prefetch_info_cache = NV_KMEM_CACHE_CREATE("block_prefetch_info_t", block_prefetch_info_t);
if (!g_prefetch_info_cache)
return NV_ERR_NO_MEMORY;
if (uvm_perf_prefetch_threshold <= 100) {
g_uvm_perf_prefetch_threshold = uvm_perf_prefetch_threshold;
}
@@ -498,14 +456,6 @@ NV_STATUS uvm_perf_prefetch_init()
return NV_OK;
}
void uvm_perf_prefetch_exit()
{
if (!g_uvm_perf_prefetch_enable)
return;
kmem_cache_destroy_safe(&g_prefetch_info_cache);
}
NV_STATUS uvm_test_set_page_prefetch_policy(UVM_TEST_SET_PAGE_PREFETCH_POLICY_PARAMS *params, struct file *filp)
{
uvm_va_space_t *va_space = uvm_va_space_get(filp);