515.43.04

2026-03-12 08:39:53 +00:00 · 2022-05-09 13:18:59 -07:00
commit 1739a20efc
2519 changed files with 1060036 additions and 0 deletions
--- a/kernel-open/nvidia-peermem/nv-p2p.h
+++ b/kernel-open/nvidia-peermem/nv-p2p.h
@@ -0,0 +1,427 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2011-2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _NV_P2P_H_
+#define _NV_P2P_H_
+
+/*
+ * NVIDIA P2P Structure Versioning
+ *
+ * For the nvidia_p2p_*_t structures allocated by the NVIDIA driver, it will
+ * set the version field of the structure according to the definition used by
+ * the NVIDIA driver. The "major" field of the version is defined as the upper
+ * 16 bits, and the "minor" field of the version is defined as the lower 16
+ * bits. The version field will always be the first 4 bytes of the structure,
+ * and third-party drivers should check the value of this field in structures
+ * allocated by the NVIDIA driver to ensure runtime compatibility.
+ *
+ * In general, version numbers will be incremented as follows:
+ * - When a backwards-compatible change is made to the structure layout, the
+ *   minor version for that structure will be incremented. Third-party drivers
+ *   built against an older minor version will continue to work with the newer
+ *   minor version used by the NVIDIA driver, without recompilation.
+ * - When a breaking change is made to the structure layout, the major version
+ *   will be incremented. Third-party drivers built against an older major
+ *   version require at least recompilation and potentially additional updates
+ *   to use the new API.
+ */
+#define NVIDIA_P2P_MAJOR_VERSION_MASK   0xffff0000
+#define NVIDIA_P2P_MINOR_VERSION_MASK   0x0000ffff
+
+#define NVIDIA_P2P_MAJOR_VERSION(v) \
+    (((v) & NVIDIA_P2P_MAJOR_VERSION_MASK) >> 16)
+
+#define NVIDIA_P2P_MINOR_VERSION(v) \
+    (((v) & NVIDIA_P2P_MINOR_VERSION_MASK))
+
+#define NVIDIA_P2P_MAJOR_VERSION_MATCHES(p, v) \
+    (NVIDIA_P2P_MAJOR_VERSION((p)->version) == NVIDIA_P2P_MAJOR_VERSION(v))
+
+#define NVIDIA_P2P_VERSION_COMPATIBLE(p, v)    \
+    (NVIDIA_P2P_MAJOR_VERSION_MATCHES(p, v) && \
+     (NVIDIA_P2P_MINOR_VERSION((p)->version) >= (NVIDIA_P2P_MINOR_VERSION(v))))
+
+enum {
+    NVIDIA_P2P_ARCHITECTURE_TESLA = 0,
+    NVIDIA_P2P_ARCHITECTURE_FERMI,
+    NVIDIA_P2P_ARCHITECTURE_CURRENT = NVIDIA_P2P_ARCHITECTURE_FERMI
+};
+
+#define NVIDIA_P2P_PARAMS_VERSION   0x00010001
+
+enum {
+    NVIDIA_P2P_PARAMS_ADDRESS_INDEX_GPU = 0,
+    NVIDIA_P2P_PARAMS_ADDRESS_INDEX_THIRD_PARTY_DEVICE,
+    NVIDIA_P2P_PARAMS_ADDRESS_INDEX_MAX = \
+        NVIDIA_P2P_PARAMS_ADDRESS_INDEX_THIRD_PARTY_DEVICE
+};
+
+#define NVIDIA_P2P_GPU_UUID_LEN   16
+
+typedef
+struct nvidia_p2p_params {
+    uint32_t version;
+    uint32_t architecture;
+    union nvidia_p2p_mailbox_addresses {
+        struct {
+            uint64_t wmb_addr;
+            uint64_t wmb_data;
+            uint64_t rreq_addr;
+            uint64_t rcomp_addr;
+            uint64_t reserved[2];
+        } fermi;
+    } addresses[NVIDIA_P2P_PARAMS_ADDRESS_INDEX_MAX+1];
+} nvidia_p2p_params_t;
+
+/*
+ * Capability flag for users to detect
+ * driver support for persistent pages.
+ */
+extern int nvidia_p2p_cap_persistent_pages;
+#define NVIDIA_P2P_CAP_PERSISTENT_PAGES
+
+/*
+ * This API is not supported.
+ */
+int nvidia_p2p_init_mapping(uint64_t p2p_token,
+        struct nvidia_p2p_params *params,
+        void (*destroy_callback)(void *data),
+        void *data);
+
+/*
+ * This API is not supported.
+ */
+int nvidia_p2p_destroy_mapping(uint64_t p2p_token);
+
+enum nvidia_p2p_page_size_type {
+    NVIDIA_P2P_PAGE_SIZE_4KB = 0,
+    NVIDIA_P2P_PAGE_SIZE_64KB,
+    NVIDIA_P2P_PAGE_SIZE_128KB,
+    NVIDIA_P2P_PAGE_SIZE_COUNT
+};
+
+typedef
+struct nvidia_p2p_page {
+    uint64_t physical_address;
+    union nvidia_p2p_request_registers {
+        struct {
+            uint32_t wreqmb_h;
+            uint32_t rreqmb_h;
+            uint32_t rreqmb_0;
+            uint32_t reserved[3];
+        } fermi;
+    } registers;
+} nvidia_p2p_page_t;
+
+#define NVIDIA_P2P_PAGE_TABLE_VERSION   0x00010002
+
+#define NVIDIA_P2P_PAGE_TABLE_VERSION_COMPATIBLE(p) \
+    NVIDIA_P2P_VERSION_COMPATIBLE(p, NVIDIA_P2P_PAGE_TABLE_VERSION)
+
+typedef
+struct nvidia_p2p_page_table {
+    uint32_t version;
+    uint32_t page_size; /* enum nvidia_p2p_page_size_type */
+    struct nvidia_p2p_page **pages;
+    uint32_t entries;
+    uint8_t *gpu_uuid;
+} nvidia_p2p_page_table_t;
+
+/*
+ * @brief
+ *   Make the pages underlying a range of GPU virtual memory
+ *   accessible to a third-party device.
+ *
+ *   This API only supports pinned, GPU-resident memory, such as that provided
+ *   by cudaMalloc().
+ *
+ *   This API may sleep.
+ *
+ * @param[in]     p2p_token
+ *   A token that uniquely identifies the P2P mapping.
+ * @param[in]     va_space
+ *   A GPU virtual address space qualifier.
+ * @param[in]     virtual_address
+ *   The start address in the specified virtual address space.
+ *   Address must be aligned to the 64KB boundary.
+ * @param[in]     length
+ *   The length of the requested P2P mapping.
+ *   Length must be a multiple of 64KB.
+ * @param[out]    page_table
+ *   A pointer to an array of structures with P2P PTEs.
+ * @param[in]     free_callback
+ *   A pointer to the function to be invoked when the pages
+ *   underlying the virtual address range are freed
+ *   implicitly.
+ *   If NULL, persistent pages will be returned.
+ *   This means the pages underlying the range of GPU virtual memory
+ *   will persist until explicitly freed by nvidia_p2p_put_pages().
+ *   Persistent GPU memory mappings are not supported on PowerPC,
+
+
+
+ *   MIG-enabled devices and vGPU.
+
+ * @param[in]     data
+ *   A non-NULL opaque pointer to private data to be passed to the
+ *   callback function.
+ *
+ * @return
+ *    0           upon successful completion.
+ *   -EINVAL      if an invalid argument was supplied.
+ *   -ENOTSUPP    if the requested operation is not supported.
+ *   -ENOMEM      if the driver failed to allocate memory or if
+ *     insufficient resources were available to complete the operation.
+ *   -EIO         if an unknown error occurred.
+ */
+int nvidia_p2p_get_pages(uint64_t p2p_token, uint32_t va_space,
+        uint64_t virtual_address,
+        uint64_t length,
+        struct nvidia_p2p_page_table **page_table,
+        void (*free_callback)(void *data),
+        void *data);
+
+#define NVIDIA_P2P_DMA_MAPPING_VERSION   0x00020003
+
+#define NVIDIA_P2P_DMA_MAPPING_VERSION_COMPATIBLE(p) \
+    NVIDIA_P2P_VERSION_COMPATIBLE(p, NVIDIA_P2P_DMA_MAPPING_VERSION)
+
+struct pci_dev;
+
+typedef
+struct nvidia_p2p_dma_mapping {
+    uint32_t version;
+    enum nvidia_p2p_page_size_type page_size_type;
+    uint32_t entries;
+    uint64_t *dma_addresses;
+    void *private;
+    struct pci_dev *pci_dev;
+} nvidia_p2p_dma_mapping_t;
+
+/*
+ * @brief
+ *   Make the physical pages retrieved using nvidia_p2p_get_pages accessible to
+ *   a third-party device.
+ *
+ * @param[in]     peer
+ *   The struct pci_dev * of the peer device that needs to DMA to/from the
+ *   mapping.
+ * @param[in]     page_table
+ *   The page table outlining the physical pages underlying the mapping, as
+ *   retrieved with nvidia_p2p_get_pages().
+ * @param[out]    dma_mapping
+ *   The DMA mapping containing the DMA addresses to use on the third-party
+ *   device.
+ *
+ * @return
+ *    0           upon successful completion.
+ *    -EINVAL     if an invalid argument was supplied.
+ *    -ENOTSUPP   if the requested operation is not supported.
+ *    -EIO        if an unknown error occurred.
+ */
+int nvidia_p2p_dma_map_pages(struct pci_dev *peer,
+        struct nvidia_p2p_page_table *page_table,
+        struct nvidia_p2p_dma_mapping **dma_mapping);
+
+/*
+ * @brief
+ *   Unmap the physical pages previously mapped to the third-party device by
+ *   nvidia_p2p_dma_map_pages().
+ *
+ * @param[in]     peer
+ *   The struct pci_dev * of the peer device that the DMA mapping belongs to.
+ * @param[in]     page_table
+ *   The page table backing the DMA mapping to be unmapped.
+ * @param[in]     dma_mapping
+ *   The DMA mapping containing the DMA addresses used by the third-party
+ *   device, as retrieved with nvidia_p2p_dma_map_pages(). After this call
+ *   returns, neither this struct nor the addresses contained within will be
+ *   valid for use by the third-party device.
+ *
+ * @return
+ *    0           upon successful completion.
+ *    -EINVAL     if an invalid argument was supplied.
+ *    -EIO        if an unknown error occurred.
+ */
+int nvidia_p2p_dma_unmap_pages(struct pci_dev *peer,
+        struct nvidia_p2p_page_table *page_table,
+        struct nvidia_p2p_dma_mapping *dma_mapping);
+
+/*
+ * @brief
+ *   Release a set of pages previously made accessible to
+ *   a third-party device.
+ *
+ * @param[in]     p2p_token
+ *   A token that uniquely identifies the P2P mapping.
+ * @param[in]     va_space
+ *   A GPU virtual address space qualifier.
+ * @param[in]     virtual_address
+ *   The start address in the specified virtual address space.
+ * @param[in]     page_table
+ *   A pointer to the array of structures with P2P PTEs.
+ *
+ * @return
+ *    0           upon successful completion.
+ *   -EINVAL      if an invalid argument was supplied.
+ *   -EIO         if an unknown error occurred.
+ */
+int nvidia_p2p_put_pages(uint64_t p2p_token, uint32_t va_space,
+        uint64_t virtual_address,
+        struct nvidia_p2p_page_table *page_table);
+
+/*
+ * @brief
+ *    Free a third-party P2P page table. (This function is a no-op.)
+ *
+ * @param[in]     page_table
+ *   A pointer to the array of structures with P2P PTEs.
+ *
+ * @return
+ *    0           upon successful completion.
+ *   -EINVAL      if an invalid argument was supplied.
+ */
+int nvidia_p2p_free_page_table(struct nvidia_p2p_page_table *page_table);
+
+/*
+ * @brief
+ *   Free a third-party P2P DMA mapping. (This function is a no-op.)
+ *
+ * @param[in]     dma_mapping
+ *   A pointer to the DMA mapping structure.
+ *
+ * @return
+ *    0           upon successful completion.
+ *    -EINVAL     if an invalid argument was supplied.
+ */
+int nvidia_p2p_free_dma_mapping(struct nvidia_p2p_dma_mapping *dma_mapping);
+
+#define NVIDIA_P2P_RSYNC_DRIVER_VERSION   0x00010001
+
+#define NVIDIA_P2P_RSYNC_DRIVER_VERSION_COMPATIBLE(p) \
+    NVIDIA_P2P_VERSION_COMPATIBLE(p, NVIDIA_P2P_RSYNC_DRIVER_VERSION)
+
+typedef
+struct nvidia_p2p_rsync_driver {
+    uint32_t version;
+    int (*get_relaxed_ordering_mode)(int *mode, void *data);
+    void (*put_relaxed_ordering_mode)(int mode, void *data);
+    void (*wait_for_rsync)(struct pci_dev *gpu, void *data);
+} nvidia_p2p_rsync_driver_t;
+
+/*
+ * @brief
+ *   Registers the rsync driver.
+ *
+ * @param[in]     driver
+ *   A pointer to the rsync driver structure. The NVIDIA driver would use,
+ *
+ *   get_relaxed_ordering_mode to obtain a reference to the current relaxed
+ *   ordering mode (treated as a boolean) from the rsync driver.
+ *
+ *   put_relaxed_ordering_mode to release a reference to the current relaxed
+ *   ordering mode back to the rsync driver. The NVIDIA driver will call this
+ *   function once for each successful call to get_relaxed_ordering_mode, and
+ *   the relaxed ordering mode must not change until the last reference is
+ *   released.
+ *
+ *   wait_for_rsync to call into the rsync module to issue RSYNC. This callback
+ *   can't sleep or re-schedule as it may arrive under spinlocks.
+ * @param[in]     data
+ *   A pointer to the rsync driver's private data.
+ *
+ * @Returns
+ *   0            upon successful completion.
+ *   -EINVAL      parameters are incorrect.
+ *   -EBUSY       if a module is already registered or GPU devices are in use.
+ */
+int nvidia_p2p_register_rsync_driver(nvidia_p2p_rsync_driver_t *driver,
+                                     void *data);
+
+/*
+ * @brief
+ *   Unregisters the rsync driver.
+ *
+ * @param[in]     driver
+ *   A pointer to the rsync driver structure.
+ * @param[in]     data
+ *   A pointer to the rsync driver's private data.
+ */
+void nvidia_p2p_unregister_rsync_driver(nvidia_p2p_rsync_driver_t *driver,
+                                        void *data);
+
+#define NVIDIA_P2P_RSYNC_REG_INFO_VERSION   0x00020001
+
+#define NVIDIA_P2P_RSYNC_REG_INFO_VERSION_COMPATIBLE(p) \
+    NVIDIA_P2P_VERSION_COMPATIBLE(p, NVIDIA_P2P_RSYNC_REG_INFO_VERSION)
+
+typedef struct nvidia_p2p_rsync_reg {
+    void *ptr;
+    size_t size;
+    struct pci_dev *ibmnpu;
+    struct pci_dev *gpu;
+    uint32_t cluster_id;
+    uint32_t socket_id;
+} nvidia_p2p_rsync_reg_t;
+
+typedef struct nvidia_p2p_rsync_reg_info {
+    uint32_t version;
+    nvidia_p2p_rsync_reg_t *regs;
+    size_t entries;
+} nvidia_p2p_rsync_reg_info_t;
+
+/*
+ * @brief
+ *   Gets rsync (GEN-ID) register information associated with the supported
+ *   NPUs.
+ *
+ *   The caller would use the returned information {GPU device, NPU device,
+ *   socket-id, cluster-id} to pick the optimal generation registers to issue
+ *   RSYNC (NVLink HW flush).
+ *
+ *   The interface allocates structures to return the information, hence
+ *   nvidia_p2p_put_rsync_registers() must be called to free the structures.
+ *
+ *   Note, cluster-id is hardcoded to zero as early system configurations would
+ *   only support cluster mode i.e. all devices would share the same cluster-id
+ *   (0). In the future, appropriate kernel support would be needed to query
+ *   cluster-ids.
+ *
+ * @param[out]     reg_info
+ *   A pointer to the rsync reg info structure.
+ *
+ * @Returns
+ *   0 Upon successful completion. Otherwise, returns negative value.
+ */
+int nvidia_p2p_get_rsync_registers(nvidia_p2p_rsync_reg_info_t **reg_info);
+
+/*
+ * @brief
+ *   Frees the structures allocated by nvidia_p2p_get_rsync_registers().
+ *
+ * @param[in]     reg_info
+ *   A pointer to the rsync reg info structure.
+ */
+void nvidia_p2p_put_rsync_registers(nvidia_p2p_rsync_reg_info_t *reg_info);
+
+#endif /* _NV_P2P_H_ */
--- a/kernel-open/nvidia-peermem/nvidia-peermem.Kbuild
+++ b/kernel-open/nvidia-peermem/nvidia-peermem.Kbuild
@@ -0,0 +1,61 @@
+###########################################################################
+# Kbuild fragment for nvidia-peermem.ko
+###########################################################################
+
+#
+# Define NVIDIA_PEERMEM_{SOURCES,OBJECTS}
+#
+
+NVIDIA_PEERMEM_SOURCES =
+NVIDIA_PEERMEM_SOURCES += nvidia-peermem/nvidia-peermem.c
+
+NVIDIA_PEERMEM_OBJECTS = $(patsubst %.c,%.o,$(NVIDIA_PEERMEM_SOURCES))
+
+obj-m += nvidia-peermem.o
+nvidia-peermem-y := $(NVIDIA_PEERMEM_OBJECTS)
+
+NVIDIA_PEERMEM_KO = nvidia-peermem/nvidia-peermem.ko
+
+NV_KERNEL_MODULE_TARGETS += $(NVIDIA_PEERMEM_KO)
+
+#
+# Define nvidia-peermem.ko-specific CFLAGS.
+#
+NVIDIA_PEERMEM_CFLAGS += -I$(src)/nvidia-peermem
+NVIDIA_PEERMEM_CFLAGS += -UDEBUG -U_DEBUG -DNDEBUG -DNV_BUILD_MODULE_INSTANCES=0
+
+#
+# In case of MOFED installation, nvidia-peermem compilation
+# needs paths to the MOFED headers in CFLAGS.
+# MOFED's Module.symvers is needed for the build
+# to find the additional ib_* symbols.
+#
+OFA_DIR := /usr/src/ofa_kernel
+OFA_CANDIDATES = $(OFA_DIR)/$(ARCH)/$(KERNELRELEASE) $(OFA_DIR)/$(KERNELRELEASE) $(OFA_DIR)/default /var/lib/dkms/mlnx-ofed-kernel
+MLNX_OFED_KERNEL := $(shell for d in $(OFA_CANDIDATES); do \
+                              if [ -d "$$d" ]; then \
+                                echo "$$d"; \
+                                exit 0; \
+                              fi; \
+                            done; \
+                            echo $(OFA_DIR) \
+                     )
+
+ifneq ($(shell test -d $(MLNX_OFED_KERNEL) && echo "true" || echo "" ),)
+    NVIDIA_PEERMEM_CFLAGS += -I$(MLNX_OFED_KERNEL)/include -I$(MLNX_OFED_KERNEL)/include/rdma
+    KBUILD_EXTRA_SYMBOLS := $(MLNX_OFED_KERNEL)/Module.symvers
+endif
+
+$(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_PEERMEM_OBJECTS), $(NVIDIA_PEERMEM_CFLAGS))
+
+#
+# Register the conftests needed by nvidia-peermem.ko
+#
+
+NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_PEERMEM_OBJECTS)
+
+NV_CONFTEST_GENERIC_COMPILE_TESTS += ib_peer_memory_symbols
+
+NV_CONFTEST_FUNCTION_COMPILE_TESTS +=
+
+NV_CONFTEST_TYPE_COMPILE_TESTS +=
--- a/kernel-open/nvidia-peermem/nvidia-peermem.c
+++ b/kernel-open/nvidia-peermem/nvidia-peermem.c
@@ -0,0 +1,523 @@
+/* SPDX-License-Identifier: Linux-OpenIB */
+/*
+ * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  - Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer.
+ *
+ *  - Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/hugetlb.h>
+#include <linux/pci.h>
+
+#include "nv-p2p.h"
+#include "peer_mem.h"
+#include "conftest.h"
+
+#define DRV_NAME    "nv_mem"
+#define DRV_VERSION NV_VERSION_STRING
+
+MODULE_AUTHOR("Yishai Hadas");
+MODULE_DESCRIPTION("NVIDIA GPU memory plug-in");
+MODULE_LICENSE("Linux-OpenIB");
+MODULE_VERSION(DRV_VERSION);
+enum {
+        NV_MEM_PEERDIRECT_SUPPORT_DEFAULT = 0,
+        NV_MEM_PEERDIRECT_SUPPORT_LEGACY = 1,
+};
+static int peerdirect_support = NV_MEM_PEERDIRECT_SUPPORT_DEFAULT;
+module_param(peerdirect_support, int, S_IRUGO);
+MODULE_PARM_DESC(peerdirect_support, "Set level of support for Peer-direct, 0 [default] or 1 [legacy, for example MLNX_OFED 4.9 LTS]");
+
+#define peer_err(FMT, ARGS...) printk(KERN_ERR "nvidia-peermem" " %s:%d " FMT, __FUNCTION__, __LINE__, ## ARGS)
+
+#if defined(NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT)
+
+#ifndef READ_ONCE
+#define READ_ONCE(x) ACCESS_ONCE(x)
+#endif
+
+#ifndef WRITE_ONCE
+#define WRITE_ONCE(x, val) ({ ACCESS_ONCE(x) = (val); })
+#endif
+
+#define GPU_PAGE_SHIFT   16
+#define GPU_PAGE_SIZE    ((u64)1 << GPU_PAGE_SHIFT)
+#define GPU_PAGE_OFFSET  (GPU_PAGE_SIZE-1)
+#define GPU_PAGE_MASK    (~GPU_PAGE_OFFSET)
+
+invalidate_peer_memory mem_invalidate_callback;
+static void *reg_handle = NULL;
+static void *reg_handle_nc = NULL;
+
+struct nv_mem_context {
+    struct nvidia_p2p_page_table *page_table;
+    struct nvidia_p2p_dma_mapping *dma_mapping;
+    u64 core_context;
+    u64 page_virt_start;
+    u64 page_virt_end;
+    size_t mapped_size;
+    unsigned long npages;
+    unsigned long page_size;
+    struct task_struct *callback_task;
+    int sg_allocated;
+    struct sg_table sg_head;
+};
+
+
+static void nv_get_p2p_free_callback(void *data)
+{
+    int ret = 0;
+    struct nv_mem_context *nv_mem_context = (struct nv_mem_context *)data;
+    struct nvidia_p2p_page_table *page_table = NULL;
+    struct nvidia_p2p_dma_mapping *dma_mapping = NULL;
+
+    __module_get(THIS_MODULE);
+    if (!nv_mem_context) {
+        peer_err("nv_get_p2p_free_callback -- invalid nv_mem_context\n");
+        goto out;
+    }
+
+    if (!nv_mem_context->page_table) {
+        peer_err("nv_get_p2p_free_callback -- invalid page_table\n");
+        goto out;
+    }
+
+    /* Save page_table locally to prevent it being freed as part of nv_mem_release
+     *  in case it's called internally by that callback.
+     */
+    page_table = nv_mem_context->page_table;
+
+    if (!nv_mem_context->dma_mapping) {
+        peer_err("nv_get_p2p_free_callback -- invalid dma_mapping\n");
+        goto out;
+    }
+    dma_mapping = nv_mem_context->dma_mapping;
+
+    /* For now don't set nv_mem_context->page_table to NULL,
+     * confirmed by NVIDIA that inflight put_pages with valid pointer will fail gracefully.
+     */
+
+    nv_mem_context->callback_task = current;
+    (*mem_invalidate_callback) (reg_handle, nv_mem_context->core_context);
+    nv_mem_context->callback_task = NULL;
+
+    ret = nvidia_p2p_free_dma_mapping(dma_mapping);
+    if (ret)
+        peer_err("nv_get_p2p_free_callback -- error %d while calling nvidia_p2p_free_dma_mapping()\n", ret);
+
+    ret = nvidia_p2p_free_page_table(page_table);
+    if (ret)
+        peer_err("nv_get_p2p_free_callback -- error %d while calling nvidia_p2p_free_page_table()\n", ret);
+
+out:
+    module_put(THIS_MODULE);
+    return;
+
+}
+
+/* At that function we don't call IB core - no ticket exists */
+static void nv_mem_dummy_callback(void *data)
+{
+    struct nv_mem_context *nv_mem_context = (struct nv_mem_context *)data;
+    int ret = 0;
+
+    __module_get(THIS_MODULE);
+
+    ret = nvidia_p2p_free_page_table(nv_mem_context->page_table);
+    if (ret)
+        peer_err("nv_mem_dummy_callback -- error %d while calling nvidia_p2p_free_page_table()\n", ret);
+
+    module_put(THIS_MODULE);
+    return;
+}
+
+/* acquire return code: 1 mine, 0 - not mine */
+static int nv_mem_acquire(unsigned long addr, size_t size, void *peer_mem_private_data,
+                          char *peer_mem_name, void **client_context)
+{
+
+    int ret = 0;
+    struct nv_mem_context *nv_mem_context;
+
+    nv_mem_context = kzalloc(sizeof *nv_mem_context, GFP_KERNEL);
+    if (!nv_mem_context)
+        /* Error case handled as not mine */
+        return 0;
+
+    nv_mem_context->page_virt_start = addr & GPU_PAGE_MASK;
+    nv_mem_context->page_virt_end   = (addr + size + GPU_PAGE_SIZE - 1) & GPU_PAGE_MASK;
+    nv_mem_context->mapped_size  = nv_mem_context->page_virt_end - nv_mem_context->page_virt_start;
+
+    ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
+                               &nv_mem_context->page_table, nv_mem_dummy_callback, nv_mem_context);
+
+    if (ret < 0)
+        goto err;
+
+    ret = nvidia_p2p_put_pages(0, 0, nv_mem_context->page_virt_start,
+                               nv_mem_context->page_table);
+    if (ret < 0) {
+        /* Not expected, however in case callback was called on that buffer just before
+            put pages we'll expect to fail gracefully (confirmed by NVIDIA) and return an error.
+        */
+        peer_err("nv_mem_acquire -- error %d while calling nvidia_p2p_put_pages()\n", ret);
+        goto err;
+    }
+
+    /* 1 means mine */
+    *client_context = nv_mem_context;
+    __module_get(THIS_MODULE);
+    return 1;
+
+err:
+    kfree(nv_mem_context);
+
+    /* Error case handled as not mine */
+    return 0;
+}
+
+static int nv_dma_map(struct sg_table *sg_head, void *context,
+                      struct device *dma_device, int dmasync,
+                      int *nmap)
+{
+    int i, ret;
+    struct scatterlist *sg;
+    struct nv_mem_context *nv_mem_context =
+        (struct nv_mem_context *) context;
+    struct nvidia_p2p_page_table *page_table = nv_mem_context->page_table;
+    struct nvidia_p2p_dma_mapping *dma_mapping;
+    struct pci_dev *pdev = to_pci_dev(dma_device);
+
+    if (page_table->page_size != NVIDIA_P2P_PAGE_SIZE_64KB) {
+        peer_err("nv_dma_map -- assumption of 64KB pages failed size_id=%u\n",
+                    nv_mem_context->page_table->page_size);
+        return -EINVAL;
+    }
+
+    if (!pdev) {
+        peer_err("nv_dma_map -- invalid pci_dev\n");
+        return -EINVAL;
+    }
+
+    ret = nvidia_p2p_dma_map_pages(pdev, page_table, &dma_mapping);
+    if (ret) {
+        peer_err("nv_dma_map -- error %d while calling nvidia_p2p_dma_map_pages()\n", ret);
+        return ret;
+    }
+
+    if (!NVIDIA_P2P_DMA_MAPPING_VERSION_COMPATIBLE(dma_mapping)) {
+        peer_err("error, incompatible dma mapping version 0x%08x\n",
+                 dma_mapping->version);
+        nvidia_p2p_dma_unmap_pages(pdev, page_table, dma_mapping);
+        return -EINVAL;
+    }
+
+    nv_mem_context->npages = dma_mapping->entries;
+
+    ret = sg_alloc_table(sg_head, dma_mapping->entries, GFP_KERNEL);
+    if (ret) {
+        nvidia_p2p_dma_unmap_pages(pdev, page_table, dma_mapping);
+        return ret;
+    }
+
+    nv_mem_context->dma_mapping = dma_mapping;
+    nv_mem_context->sg_allocated = 1;
+    for_each_sg(sg_head->sgl, sg, nv_mem_context->npages, i) {
+        sg_set_page(sg, NULL, nv_mem_context->page_size, 0);
+        sg->dma_address = dma_mapping->dma_addresses[i];
+        sg->dma_length = nv_mem_context->page_size;
+    }
+    nv_mem_context->sg_head = *sg_head;
+    *nmap = nv_mem_context->npages;
+
+    return 0;
+}
+
+static int nv_dma_unmap(struct sg_table *sg_head, void *context,
+               struct device  *dma_device)
+{
+    struct pci_dev *pdev = to_pci_dev(dma_device);
+    struct nv_mem_context *nv_mem_context =
+        (struct nv_mem_context *)context;
+
+    if (!nv_mem_context) {
+        peer_err("nv_dma_unmap -- invalid nv_mem_context\n");
+        return -EINVAL;
+    }
+
+    if (WARN_ON(0 != memcmp(sg_head, &nv_mem_context->sg_head, sizeof(*sg_head))))
+        return -EINVAL;
+
+    if (nv_mem_context->callback_task == current)
+        goto out;
+
+    if (nv_mem_context->dma_mapping)
+        nvidia_p2p_dma_unmap_pages(pdev, nv_mem_context->page_table,
+                                   nv_mem_context->dma_mapping);
+
+out:
+    return 0;
+}
+
+
+static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
+{
+    int ret = 0;
+    struct nv_mem_context *nv_mem_context =
+        (struct nv_mem_context *) context;
+
+    if (!nv_mem_context) {
+        peer_err("nv_mem_put_pages -- invalid nv_mem_context\n");
+        return;
+    }
+
+    if (WARN_ON(0 != memcmp(sg_head, &nv_mem_context->sg_head, sizeof(*sg_head))))
+        return;
+
+    if (nv_mem_context->callback_task == current)
+        return;
+
+    ret = nvidia_p2p_put_pages(0, 0, nv_mem_context->page_virt_start,
+                               nv_mem_context->page_table);
+
+#ifdef _DEBUG_ONLY_
+    /* Here we expect an error in real life cases that should be ignored - not printed.
+      * (e.g. concurrent callback with that call)
+    */
+    if (ret < 0) {
+        printk(KERN_ERR "error %d while calling nvidia_p2p_put_pages, page_table=%p \n",
+               ret,  nv_mem_context->page_table);
+    }
+#endif
+
+    return;
+}
+
+static void nv_mem_release(void *context)
+{
+    struct nv_mem_context *nv_mem_context =
+        (struct nv_mem_context *) context;
+    if (nv_mem_context->sg_allocated) {
+        sg_free_table(&nv_mem_context->sg_head);
+        nv_mem_context->sg_allocated = 0;
+    }
+    kfree(nv_mem_context);
+    module_put(THIS_MODULE);
+    return;
+}
+
+static int nv_mem_get_pages(unsigned long addr,
+                            size_t size, int write, int force,
+                            struct sg_table *sg_head,
+                            void *client_context,
+                            u64 core_context)
+{
+    int ret;
+    struct nv_mem_context *nv_mem_context;
+
+    nv_mem_context = (struct nv_mem_context *)client_context;
+    if (!nv_mem_context)
+        return -EINVAL;
+
+    nv_mem_context->core_context = core_context;
+    nv_mem_context->page_size = GPU_PAGE_SIZE;
+
+    ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
+                               &nv_mem_context->page_table, nv_get_p2p_free_callback, nv_mem_context);
+    if (ret < 0) {
+        peer_err("error %d while calling nvidia_p2p_get_pages()\n", ret);
+        return ret;
+    }
+
+    /* No extra access to nv_mem_context->page_table here as we are
+        called not under a lock and may race with inflight invalidate callback on that buffer.
+        Extra handling was delayed to be done under nv_dma_map.
+     */
+    return 0;
+}
+
+static unsigned long nv_mem_get_page_size(void *context)
+{
+    struct nv_mem_context *nv_mem_context =
+                (struct nv_mem_context *)context;
+
+    return nv_mem_context->page_size;
+}
+
+
+static struct peer_memory_client_ex nv_mem_client_ex = { .client = {
+    .acquire        = nv_mem_acquire,
+    .get_pages  = nv_mem_get_pages,
+    .dma_map    = nv_dma_map,
+    .dma_unmap  = nv_dma_unmap,
+    .put_pages  = nv_mem_put_pages,
+    .get_page_size  = nv_mem_get_page_size,
+    .release        = nv_mem_release,
+}};
+
+static int nv_mem_get_pages_nc(unsigned long addr,
+                            size_t size, int write, int force,
+                            struct sg_table *sg_head,
+                            void *client_context,
+                            u64 core_context)
+{
+    int ret;
+    struct nv_mem_context *nv_mem_context;
+
+    nv_mem_context = (struct nv_mem_context *)client_context;
+    if (!nv_mem_context)
+        return -EINVAL;
+
+    nv_mem_context->core_context = core_context;
+    nv_mem_context->page_size = GPU_PAGE_SIZE;
+
+    ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
+                               &nv_mem_context->page_table, NULL, NULL);
+    if (ret < 0) {
+        peer_err("error %d while calling nvidia_p2p_get_pages() with NULL callback\n", ret);
+        return ret;
+    }
+
+    return 0;
+}
+
+static struct peer_memory_client nv_mem_client_nc = {
+	.acquire        = nv_mem_acquire,
+	.get_pages      = nv_mem_get_pages_nc,
+	.dma_map        = nv_dma_map,
+	.dma_unmap      = nv_dma_unmap,
+	.put_pages      = nv_mem_put_pages,
+	.get_page_size  = nv_mem_get_page_size,
+	.release        = nv_mem_release,
+};
+
+#endif /* NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT */
+
+static int nv_mem_param_conf_check(void)
+{
+    int rc = 0;
+    switch (peerdirect_support) {
+    case NV_MEM_PEERDIRECT_SUPPORT_DEFAULT:
+    case NV_MEM_PEERDIRECT_SUPPORT_LEGACY:
+        break;
+    default:
+        peer_err("invalid peerdirect_support param value %d\n", peerdirect_support);
+        rc = -EINVAL;
+        break;
+    }
+    return rc;
+}
+
+static int __init nv_mem_client_init(void)
+{
+    int rc;
+    rc = nv_mem_param_conf_check();
+    if (rc) {
+        return rc;
+    }
+
+#if defined (NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT)
+    int status = 0;
+
+    // off by one, to leave space for the trailing '1' which is flagging
+    // the new client type
+    BUG_ON(strlen(DRV_NAME) > IB_PEER_MEMORY_NAME_MAX-1);
+    strcpy(nv_mem_client_ex.client.name, DRV_NAME);
+
+    // [VER_MAX-1]=1 <-- last byte is used as flag
+    // [VER_MAX-2]=0 <-- version string terminator
+    BUG_ON(strlen(DRV_VERSION) > IB_PEER_MEMORY_VER_MAX-2);
+    strcpy(nv_mem_client_ex.client.version, DRV_VERSION);
+
+    nv_mem_client_ex.client.version[IB_PEER_MEMORY_VER_MAX-1] = 1;
+
+    if (peerdirect_support != NV_MEM_PEERDIRECT_SUPPORT_LEGACY) {
+        nv_mem_client_ex.ex_size = sizeof(struct peer_memory_client_ex);
+        // PEER_MEM_INVALIDATE_UNMAPS allow clients to opt out of
+        // unmap/put_pages during invalidation, i.e. the client tells the
+        // infiniband layer that it does not need to call
+        // unmap/put_pages in the invalidation callback
+        nv_mem_client_ex.flags = PEER_MEM_INVALIDATE_UNMAPS;
+    } else {
+        nv_mem_client_ex.ex_size = 0;
+        nv_mem_client_ex.flags = 0;
+    }
+
+    reg_handle = ib_register_peer_memory_client(&nv_mem_client_ex.client,
+                         &mem_invalidate_callback);
+    if (!reg_handle) {
+        peer_err("nv_mem_client_init -- error while registering traditional client\n");
+        status = -EINVAL;
+        goto out;
+    }
+
+    // The nc client enables support for persistent pages.
+    // Thanks to this check, nvidia-peermem requires the new symbol from nvidia.ko, which 
+    // prevents users to unintentionally load this module with unsupported nvidia.ko.
+    BUG_ON(!nvidia_p2p_cap_persistent_pages);
+    strcpy(nv_mem_client_nc.name, DRV_NAME "_nc");
+    strcpy(nv_mem_client_nc.version, DRV_VERSION);
+    reg_handle_nc = ib_register_peer_memory_client(&nv_mem_client_nc, NULL);
+    if (!reg_handle_nc) {
+        peer_err("nv_mem_client_init -- error while registering nc client\n");
+        status = -EINVAL;
+        goto out;
+    }
+
+out:
+    if (status) {
+        if (reg_handle) {
+            ib_unregister_peer_memory_client(reg_handle);
+            reg_handle = NULL;
+        }
+
+        if (reg_handle_nc) {
+            ib_unregister_peer_memory_client(reg_handle_nc);
+            reg_handle_nc = NULL;
+        }
+    }
+
+    return status;
+#else
+    return -EINVAL;
+#endif
+}
+
+static void __exit nv_mem_client_cleanup(void)
+{
+#if defined (NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT)
+    if (reg_handle)
+        ib_unregister_peer_memory_client(reg_handle);
+
+    if (reg_handle_nc)
+        ib_unregister_peer_memory_client(reg_handle_nc);
+#endif
+}
+
+module_init(nv_mem_client_init);
+module_exit(nv_mem_client_cleanup);
--- a/kernel-open/nvidia-peermem/peer_mem.h
+++ b/kernel-open/nvidia-peermem/peer_mem.h
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: Linux-OpenIB */
+/*
+ * Copyright (c) 2014-2020,  Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  - Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer.
+ *  - Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef RDMA_PEER_MEM_H
+#define RDMA_PEER_MEM_H
+
+#include <linux/scatterlist.h>
+
+#define IB_PEER_MEMORY_NAME_MAX 64
+#define IB_PEER_MEMORY_VER_MAX 16
+
+/*
+ * Prior versions used a void * for core_context, at some point this was
+ * switched to use u64. Be careful if compiling this as 32 bit. To help the
+ * value of core_context is limited to u32 so it should work OK despite the
+ * type change.
+ */
+#define PEER_MEM_U64_CORE_CONTEXT
+
+struct device;
+
+/**
+ *  struct peer_memory_client - registration information for user virtual
+ *                              memory handlers
+ *
+ * The peer_memory_client scheme allows a driver to register with the ib_umem
+ * system that it has the ability to understand user virtual address ranges
+ * that are not compatible with get_user_pages(). For instance VMAs created
+ * with io_remap_pfn_range(), or other driver special VMA.
+ *
+ * For ranges the interface understands it can provide a DMA mapped sg_table
+ * for use by the ib_umem, allowing user virtual ranges that cannot be
+ * supported by get_user_pages() to be used as umems.
+ */
+struct peer_memory_client {
+    char name[IB_PEER_MEMORY_NAME_MAX];
+    char version[IB_PEER_MEMORY_VER_MAX];
+
+    /**
+     * acquire - Begin working with a user space virtual address range
+     *
+     * @addr - Virtual address to be checked whether belongs to peer.
+     * @size - Length of the virtual memory area starting at addr.
+     * @peer_mem_private_data - Obsolete, always NULL
+     * @peer_mem_name - Obsolete, always NULL
+     * @client_context - Returns an opaque value for this acquire use in
+     *                   other APIs
+     *
+     * Returns 1 if the peer_memory_client supports the entire virtual
+     * address range, 0 or -ERRNO otherwise.  If 1 is returned then
+     * release() will be called to release the acquire().
+     */
+    int (*acquire)(unsigned long addr, size_t size,
+               void *peer_mem_private_data, char *peer_mem_name,
+               void **client_context);
+    /**
+     * get_pages - Fill in the first part of a sg_table for a virtual
+     *             address range
+     *
+     * @addr - Virtual address to be checked whether belongs to peer.
+     * @size - Length of the virtual memory area starting at addr.
+     * @write - Always 1
+     * @force - 1 if write is required
+     * @sg_head - Obsolete, always NULL
+     * @client_context - Value returned by acquire()
+     * @core_context - Value to be passed to invalidate_peer_memory for
+     *                 this get
+     *
+     * addr/size are passed as the raw virtual address range requested by
+     * the user, it is not aligned to any page size. get_pages() is always
+     * followed by dma_map().
+     *
+     * Upon return the caller can call the invalidate_callback().
+     *
+     * Returns 0 on success, -ERRNO on failure. After success put_pages()
+     * will be called to return the pages.
+     */
+    int (*get_pages)(unsigned long addr, size_t size, int write, int force,
+             struct sg_table *sg_head, void *client_context,
+             u64 core_context);
+    /**
+     * dma_map - Create a DMA mapped sg_table
+     *
+     * @sg_head - The sg_table to allocate
+     * @client_context - Value returned by acquire()
+     * @dma_device - The device that will be doing DMA from these addresses
+     * @dmasync - Obsolete, always 0
+     * @nmap - Returns the number of dma mapped entries in the sg_head
+     *
+     * Must be called after get_pages(). This must fill in the sg_head with
+     * DMA mapped SGLs for dma_device. Each SGL start and end must meet a
+     * minimum alignment of at least PAGE_SIZE, though individual sgls can
+     * be multiples of PAGE_SIZE, in any mixture. Since the user virtual
+     * address/size are not page aligned, the implementation must increase
+     * it to the logical alignment when building the SGLs.
+     *
+     * Returns 0 on success, -ERRNO on failure. After success dma_unmap()
+     * will be called to unmap the pages. On failure sg_head must be left
+     * untouched or point to a valid sg_table.
+     */
+    int (*dma_map)(struct sg_table *sg_head, void *client_context,
+               struct device *dma_device, int dmasync, int *nmap);
+    /**
+     * dma_unmap - Unmap a DMA mapped sg_table
+     *
+     * @sg_head - The sg_table to unmap
+     * @client_context - Value returned by acquire()
+     * @dma_device - The device that will be doing DMA from these addresses
+     *
+     * sg_head will not be touched after this function returns.
+     *
+     * Must return 0.
+     */
+    int (*dma_unmap)(struct sg_table *sg_head, void *client_context,
+             struct device *dma_device);
+    /**
+     * put_pages - Unpin a SGL
+     *
+     * @sg_head - The sg_table to unpin
+     * @client_context - Value returned by acquire()
+     *
+     * sg_head must be freed on return.
+     */
+    void (*put_pages)(struct sg_table *sg_head, void *client_context);
+    /* Obsolete, not used */
+    unsigned long (*get_page_size)(void *client_context);
+    /**
+     * release - Undo acquire
+     *
+     * @client_context - Value returned by acquire()
+     *
+     * If acquire() returns 1 then release() must be called. All
+     * get_pages() and dma_map()'s must be undone before calling this
+     * function.
+     */
+    void (*release)(void *client_context);
+};
+
+enum {
+        PEER_MEM_INVALIDATE_UNMAPS = 1 << 0,
+};
+
+struct peer_memory_client_ex {
+        struct peer_memory_client client;
+        size_t ex_size;
+        u32 flags;
+};
+
+/*
+ * If invalidate_callback() is non-NULL then the client will only support
+ * umems which can be invalidated. The caller may call the
+ * invalidate_callback() after acquire() on return the range will no longer
+ * have DMA active, and release() will have been called.
+ *
+ * Note: The implementation locking must ensure that get_pages(), and
+ * dma_map() do not have locking dependencies with invalidate_callback(). The
+ * ib_core will wait until any concurrent get_pages() or dma_map() completes
+ * before returning.
+ *
+ * Similarly, this can call dma_unmap(), put_pages() and release() from within
+ * the callback, or will wait for another thread doing those operations to
+ * complete.
+ *
+ * For these reasons the user of invalidate_callback() must be careful with
+ * locking.
+ */
+typedef int (*invalidate_peer_memory)(void *reg_handle, u64 core_context);
+
+void *
+ib_register_peer_memory_client(const struct peer_memory_client *peer_client,
+                   invalidate_peer_memory *invalidate_callback);
+void ib_unregister_peer_memory_client(void *reg_handle);
+
+#endif