550.40.07

This commit is contained in:
Bernhard Stoeckner
2024-01-24 17:51:53 +01:00
parent bb2dac1f20
commit 91676d6628
1411 changed files with 261367 additions and 145959 deletions

View File

@@ -21,8 +21,36 @@
*******************************************************************************/
#include "uvm_linux.h"
#include "uvm_processors.h"
static struct kmem_cache *g_uvm_processor_mask_cache __read_mostly;
NV_STATUS uvm_processor_mask_cache_init(void)
{
g_uvm_processor_mask_cache = NV_KMEM_CACHE_CREATE("uvm_processor_mask_t", uvm_processor_mask_t);
if (!g_uvm_processor_mask_cache)
return NV_ERR_NO_MEMORY;
return NV_OK;
}
void uvm_processor_mask_cache_exit(void)
{
kmem_cache_destroy_safe(&g_uvm_processor_mask_cache);
}
uvm_processor_mask_t *uvm_processor_mask_cache_alloc(void)
{
return kmem_cache_alloc(g_uvm_processor_mask_cache, NV_UVM_GFP_FLAGS);
}
void uvm_processor_mask_cache_free(uvm_processor_mask_t *mask)
{
if (mask)
kmem_cache_free(g_uvm_processor_mask_cache, mask);
}
int uvm_find_closest_node_mask(int src, const nodemask_t *mask)
{
int nid;
@@ -38,3 +66,48 @@ int uvm_find_closest_node_mask(int src, const nodemask_t *mask)
return closest_nid;
}
// This implementation avoids having to dynamically allocate a temporary
// uvm_processor_mask_t on the stack (due to size) or the heap (due to possible
// allocation failures).
bool uvm_processor_mask_gpu_subset(const uvm_processor_mask_t *subset,
const uvm_processor_mask_t *mask)
{
DECLARE_BITMAP(first_word, BITS_PER_LONG);
BUILD_BUG_ON(UVM_ID_CPU_VALUE > BITS_PER_LONG);
BUILD_BUG_ON(BITS_PER_LONG > UVM_ID_MAX_PROCESSORS);
bitmap_copy(first_word, subset->bitmap, BITS_PER_LONG);
__clear_bit(UVM_ID_CPU_VALUE, first_word);
return bitmap_subset(first_word, mask->bitmap, BITS_PER_LONG) &&
bitmap_subset(subset->bitmap + 1, mask->bitmap + 1, UVM_ID_MAX_PROCESSORS - BITS_PER_LONG);
}
void uvm_parent_gpus_from_processor_mask(uvm_parent_processor_mask_t *parent_mask,
const uvm_processor_mask_t *mask)
{
uvm_gpu_id_t gpu_id;
uvm_parent_processor_mask_zero(parent_mask);
for_each_gpu_id_in_mask(gpu_id, mask)
uvm_parent_processor_mask_set(parent_mask, uvm_parent_gpu_id_from_gpu_id(gpu_id));
}
bool uvm_numa_id_eq(int nid0, int nid1)
{
UVM_ASSERT(nid0 == -1 || nid0 < MAX_NUMNODES);
UVM_ASSERT(nid1 == -1 || nid1 < MAX_NUMNODES);
if ((nid0 == NUMA_NO_NODE || nid1 == NUMA_NO_NODE) && nodes_weight(node_possible_map) == 1) {
if (nid0 == NUMA_NO_NODE)
nid0 = first_node(node_possible_map);
if (nid1 == NUMA_NO_NODE)
nid1 = first_node(node_possible_map);
}
return nid0 == nid1;
}