mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2026-03-12 08:39:53 +00:00
515.43.04
This commit is contained in:
427
kernel-open/nvidia-peermem/nv-p2p.h
Normal file
427
kernel-open/nvidia-peermem/nv-p2p.h
Normal file
@@ -0,0 +1,427 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2011-2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _NV_P2P_H_
|
||||
#define _NV_P2P_H_
|
||||
|
||||
/*
|
||||
* NVIDIA P2P Structure Versioning
|
||||
*
|
||||
* For the nvidia_p2p_*_t structures allocated by the NVIDIA driver, it will
|
||||
* set the version field of the structure according to the definition used by
|
||||
* the NVIDIA driver. The "major" field of the version is defined as the upper
|
||||
* 16 bits, and the "minor" field of the version is defined as the lower 16
|
||||
* bits. The version field will always be the first 4 bytes of the structure,
|
||||
* and third-party drivers should check the value of this field in structures
|
||||
* allocated by the NVIDIA driver to ensure runtime compatibility.
|
||||
*
|
||||
* In general, version numbers will be incremented as follows:
|
||||
* - When a backwards-compatible change is made to the structure layout, the
|
||||
* minor version for that structure will be incremented. Third-party drivers
|
||||
* built against an older minor version will continue to work with the newer
|
||||
* minor version used by the NVIDIA driver, without recompilation.
|
||||
* - When a breaking change is made to the structure layout, the major version
|
||||
* will be incremented. Third-party drivers built against an older major
|
||||
* version require at least recompilation and potentially additional updates
|
||||
* to use the new API.
|
||||
*/
|
||||
#define NVIDIA_P2P_MAJOR_VERSION_MASK 0xffff0000
|
||||
#define NVIDIA_P2P_MINOR_VERSION_MASK 0x0000ffff
|
||||
|
||||
#define NVIDIA_P2P_MAJOR_VERSION(v) \
|
||||
(((v) & NVIDIA_P2P_MAJOR_VERSION_MASK) >> 16)
|
||||
|
||||
#define NVIDIA_P2P_MINOR_VERSION(v) \
|
||||
(((v) & NVIDIA_P2P_MINOR_VERSION_MASK))
|
||||
|
||||
#define NVIDIA_P2P_MAJOR_VERSION_MATCHES(p, v) \
|
||||
(NVIDIA_P2P_MAJOR_VERSION((p)->version) == NVIDIA_P2P_MAJOR_VERSION(v))
|
||||
|
||||
#define NVIDIA_P2P_VERSION_COMPATIBLE(p, v) \
|
||||
(NVIDIA_P2P_MAJOR_VERSION_MATCHES(p, v) && \
|
||||
(NVIDIA_P2P_MINOR_VERSION((p)->version) >= (NVIDIA_P2P_MINOR_VERSION(v))))
|
||||
|
||||
enum {
|
||||
NVIDIA_P2P_ARCHITECTURE_TESLA = 0,
|
||||
NVIDIA_P2P_ARCHITECTURE_FERMI,
|
||||
NVIDIA_P2P_ARCHITECTURE_CURRENT = NVIDIA_P2P_ARCHITECTURE_FERMI
|
||||
};
|
||||
|
||||
#define NVIDIA_P2P_PARAMS_VERSION 0x00010001
|
||||
|
||||
enum {
|
||||
NVIDIA_P2P_PARAMS_ADDRESS_INDEX_GPU = 0,
|
||||
NVIDIA_P2P_PARAMS_ADDRESS_INDEX_THIRD_PARTY_DEVICE,
|
||||
NVIDIA_P2P_PARAMS_ADDRESS_INDEX_MAX = \
|
||||
NVIDIA_P2P_PARAMS_ADDRESS_INDEX_THIRD_PARTY_DEVICE
|
||||
};
|
||||
|
||||
#define NVIDIA_P2P_GPU_UUID_LEN 16
|
||||
|
||||
typedef
|
||||
struct nvidia_p2p_params {
|
||||
uint32_t version;
|
||||
uint32_t architecture;
|
||||
union nvidia_p2p_mailbox_addresses {
|
||||
struct {
|
||||
uint64_t wmb_addr;
|
||||
uint64_t wmb_data;
|
||||
uint64_t rreq_addr;
|
||||
uint64_t rcomp_addr;
|
||||
uint64_t reserved[2];
|
||||
} fermi;
|
||||
} addresses[NVIDIA_P2P_PARAMS_ADDRESS_INDEX_MAX+1];
|
||||
} nvidia_p2p_params_t;
|
||||
|
||||
/*
|
||||
* Capability flag for users to detect
|
||||
* driver support for persistent pages.
|
||||
*/
|
||||
extern int nvidia_p2p_cap_persistent_pages;
|
||||
#define NVIDIA_P2P_CAP_PERSISTENT_PAGES
|
||||
|
||||
/*
|
||||
* This API is not supported.
|
||||
*/
|
||||
int nvidia_p2p_init_mapping(uint64_t p2p_token,
|
||||
struct nvidia_p2p_params *params,
|
||||
void (*destroy_callback)(void *data),
|
||||
void *data);
|
||||
|
||||
/*
|
||||
* This API is not supported.
|
||||
*/
|
||||
int nvidia_p2p_destroy_mapping(uint64_t p2p_token);
|
||||
|
||||
enum nvidia_p2p_page_size_type {
|
||||
NVIDIA_P2P_PAGE_SIZE_4KB = 0,
|
||||
NVIDIA_P2P_PAGE_SIZE_64KB,
|
||||
NVIDIA_P2P_PAGE_SIZE_128KB,
|
||||
NVIDIA_P2P_PAGE_SIZE_COUNT
|
||||
};
|
||||
|
||||
typedef
|
||||
struct nvidia_p2p_page {
|
||||
uint64_t physical_address;
|
||||
union nvidia_p2p_request_registers {
|
||||
struct {
|
||||
uint32_t wreqmb_h;
|
||||
uint32_t rreqmb_h;
|
||||
uint32_t rreqmb_0;
|
||||
uint32_t reserved[3];
|
||||
} fermi;
|
||||
} registers;
|
||||
} nvidia_p2p_page_t;
|
||||
|
||||
#define NVIDIA_P2P_PAGE_TABLE_VERSION 0x00010002
|
||||
|
||||
#define NVIDIA_P2P_PAGE_TABLE_VERSION_COMPATIBLE(p) \
|
||||
NVIDIA_P2P_VERSION_COMPATIBLE(p, NVIDIA_P2P_PAGE_TABLE_VERSION)
|
||||
|
||||
typedef
|
||||
struct nvidia_p2p_page_table {
|
||||
uint32_t version;
|
||||
uint32_t page_size; /* enum nvidia_p2p_page_size_type */
|
||||
struct nvidia_p2p_page **pages;
|
||||
uint32_t entries;
|
||||
uint8_t *gpu_uuid;
|
||||
} nvidia_p2p_page_table_t;
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Make the pages underlying a range of GPU virtual memory
|
||||
* accessible to a third-party device.
|
||||
*
|
||||
* This API only supports pinned, GPU-resident memory, such as that provided
|
||||
* by cudaMalloc().
|
||||
*
|
||||
* This API may sleep.
|
||||
*
|
||||
* @param[in] p2p_token
|
||||
* A token that uniquely identifies the P2P mapping.
|
||||
* @param[in] va_space
|
||||
* A GPU virtual address space qualifier.
|
||||
* @param[in] virtual_address
|
||||
* The start address in the specified virtual address space.
|
||||
* Address must be aligned to the 64KB boundary.
|
||||
* @param[in] length
|
||||
* The length of the requested P2P mapping.
|
||||
* Length must be a multiple of 64KB.
|
||||
* @param[out] page_table
|
||||
* A pointer to an array of structures with P2P PTEs.
|
||||
* @param[in] free_callback
|
||||
* A pointer to the function to be invoked when the pages
|
||||
* underlying the virtual address range are freed
|
||||
* implicitly.
|
||||
* If NULL, persistent pages will be returned.
|
||||
* This means the pages underlying the range of GPU virtual memory
|
||||
* will persist until explicitly freed by nvidia_p2p_put_pages().
|
||||
* Persistent GPU memory mappings are not supported on PowerPC,
|
||||
|
||||
|
||||
|
||||
* MIG-enabled devices and vGPU.
|
||||
|
||||
* @param[in] data
|
||||
* A non-NULL opaque pointer to private data to be passed to the
|
||||
* callback function.
|
||||
*
|
||||
* @return
|
||||
* 0 upon successful completion.
|
||||
* -EINVAL if an invalid argument was supplied.
|
||||
* -ENOTSUPP if the requested operation is not supported.
|
||||
* -ENOMEM if the driver failed to allocate memory or if
|
||||
* insufficient resources were available to complete the operation.
|
||||
* -EIO if an unknown error occurred.
|
||||
*/
|
||||
int nvidia_p2p_get_pages(uint64_t p2p_token, uint32_t va_space,
|
||||
uint64_t virtual_address,
|
||||
uint64_t length,
|
||||
struct nvidia_p2p_page_table **page_table,
|
||||
void (*free_callback)(void *data),
|
||||
void *data);
|
||||
|
||||
#define NVIDIA_P2P_DMA_MAPPING_VERSION 0x00020003
|
||||
|
||||
#define NVIDIA_P2P_DMA_MAPPING_VERSION_COMPATIBLE(p) \
|
||||
NVIDIA_P2P_VERSION_COMPATIBLE(p, NVIDIA_P2P_DMA_MAPPING_VERSION)
|
||||
|
||||
struct pci_dev;
|
||||
|
||||
typedef
|
||||
struct nvidia_p2p_dma_mapping {
|
||||
uint32_t version;
|
||||
enum nvidia_p2p_page_size_type page_size_type;
|
||||
uint32_t entries;
|
||||
uint64_t *dma_addresses;
|
||||
void *private;
|
||||
struct pci_dev *pci_dev;
|
||||
} nvidia_p2p_dma_mapping_t;
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Make the physical pages retrieved using nvidia_p2p_get_pages accessible to
|
||||
* a third-party device.
|
||||
*
|
||||
* @param[in] peer
|
||||
* The struct pci_dev * of the peer device that needs to DMA to/from the
|
||||
* mapping.
|
||||
* @param[in] page_table
|
||||
* The page table outlining the physical pages underlying the mapping, as
|
||||
* retrieved with nvidia_p2p_get_pages().
|
||||
* @param[out] dma_mapping
|
||||
* The DMA mapping containing the DMA addresses to use on the third-party
|
||||
* device.
|
||||
*
|
||||
* @return
|
||||
* 0 upon successful completion.
|
||||
* -EINVAL if an invalid argument was supplied.
|
||||
* -ENOTSUPP if the requested operation is not supported.
|
||||
* -EIO if an unknown error occurred.
|
||||
*/
|
||||
int nvidia_p2p_dma_map_pages(struct pci_dev *peer,
|
||||
struct nvidia_p2p_page_table *page_table,
|
||||
struct nvidia_p2p_dma_mapping **dma_mapping);
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Unmap the physical pages previously mapped to the third-party device by
|
||||
* nvidia_p2p_dma_map_pages().
|
||||
*
|
||||
* @param[in] peer
|
||||
* The struct pci_dev * of the peer device that the DMA mapping belongs to.
|
||||
* @param[in] page_table
|
||||
* The page table backing the DMA mapping to be unmapped.
|
||||
* @param[in] dma_mapping
|
||||
* The DMA mapping containing the DMA addresses used by the third-party
|
||||
* device, as retrieved with nvidia_p2p_dma_map_pages(). After this call
|
||||
* returns, neither this struct nor the addresses contained within will be
|
||||
* valid for use by the third-party device.
|
||||
*
|
||||
* @return
|
||||
* 0 upon successful completion.
|
||||
* -EINVAL if an invalid argument was supplied.
|
||||
* -EIO if an unknown error occurred.
|
||||
*/
|
||||
int nvidia_p2p_dma_unmap_pages(struct pci_dev *peer,
|
||||
struct nvidia_p2p_page_table *page_table,
|
||||
struct nvidia_p2p_dma_mapping *dma_mapping);
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Release a set of pages previously made accessible to
|
||||
* a third-party device.
|
||||
*
|
||||
* @param[in] p2p_token
|
||||
* A token that uniquely identifies the P2P mapping.
|
||||
* @param[in] va_space
|
||||
* A GPU virtual address space qualifier.
|
||||
* @param[in] virtual_address
|
||||
* The start address in the specified virtual address space.
|
||||
* @param[in] page_table
|
||||
* A pointer to the array of structures with P2P PTEs.
|
||||
*
|
||||
* @return
|
||||
* 0 upon successful completion.
|
||||
* -EINVAL if an invalid argument was supplied.
|
||||
* -EIO if an unknown error occurred.
|
||||
*/
|
||||
int nvidia_p2p_put_pages(uint64_t p2p_token, uint32_t va_space,
|
||||
uint64_t virtual_address,
|
||||
struct nvidia_p2p_page_table *page_table);
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Free a third-party P2P page table. (This function is a no-op.)
|
||||
*
|
||||
* @param[in] page_table
|
||||
* A pointer to the array of structures with P2P PTEs.
|
||||
*
|
||||
* @return
|
||||
* 0 upon successful completion.
|
||||
* -EINVAL if an invalid argument was supplied.
|
||||
*/
|
||||
int nvidia_p2p_free_page_table(struct nvidia_p2p_page_table *page_table);
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Free a third-party P2P DMA mapping. (This function is a no-op.)
|
||||
*
|
||||
* @param[in] dma_mapping
|
||||
* A pointer to the DMA mapping structure.
|
||||
*
|
||||
* @return
|
||||
* 0 upon successful completion.
|
||||
* -EINVAL if an invalid argument was supplied.
|
||||
*/
|
||||
int nvidia_p2p_free_dma_mapping(struct nvidia_p2p_dma_mapping *dma_mapping);
|
||||
|
||||
#define NVIDIA_P2P_RSYNC_DRIVER_VERSION 0x00010001
|
||||
|
||||
#define NVIDIA_P2P_RSYNC_DRIVER_VERSION_COMPATIBLE(p) \
|
||||
NVIDIA_P2P_VERSION_COMPATIBLE(p, NVIDIA_P2P_RSYNC_DRIVER_VERSION)
|
||||
|
||||
typedef
|
||||
struct nvidia_p2p_rsync_driver {
|
||||
uint32_t version;
|
||||
int (*get_relaxed_ordering_mode)(int *mode, void *data);
|
||||
void (*put_relaxed_ordering_mode)(int mode, void *data);
|
||||
void (*wait_for_rsync)(struct pci_dev *gpu, void *data);
|
||||
} nvidia_p2p_rsync_driver_t;
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Registers the rsync driver.
|
||||
*
|
||||
* @param[in] driver
|
||||
* A pointer to the rsync driver structure. The NVIDIA driver would use,
|
||||
*
|
||||
* get_relaxed_ordering_mode to obtain a reference to the current relaxed
|
||||
* ordering mode (treated as a boolean) from the rsync driver.
|
||||
*
|
||||
* put_relaxed_ordering_mode to release a reference to the current relaxed
|
||||
* ordering mode back to the rsync driver. The NVIDIA driver will call this
|
||||
* function once for each successful call to get_relaxed_ordering_mode, and
|
||||
* the relaxed ordering mode must not change until the last reference is
|
||||
* released.
|
||||
*
|
||||
* wait_for_rsync to call into the rsync module to issue RSYNC. This callback
|
||||
* can't sleep or re-schedule as it may arrive under spinlocks.
|
||||
* @param[in] data
|
||||
* A pointer to the rsync driver's private data.
|
||||
*
|
||||
* @Returns
|
||||
* 0 upon successful completion.
|
||||
* -EINVAL parameters are incorrect.
|
||||
* -EBUSY if a module is already registered or GPU devices are in use.
|
||||
*/
|
||||
int nvidia_p2p_register_rsync_driver(nvidia_p2p_rsync_driver_t *driver,
|
||||
void *data);
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Unregisters the rsync driver.
|
||||
*
|
||||
* @param[in] driver
|
||||
* A pointer to the rsync driver structure.
|
||||
* @param[in] data
|
||||
* A pointer to the rsync driver's private data.
|
||||
*/
|
||||
void nvidia_p2p_unregister_rsync_driver(nvidia_p2p_rsync_driver_t *driver,
|
||||
void *data);
|
||||
|
||||
#define NVIDIA_P2P_RSYNC_REG_INFO_VERSION 0x00020001
|
||||
|
||||
#define NVIDIA_P2P_RSYNC_REG_INFO_VERSION_COMPATIBLE(p) \
|
||||
NVIDIA_P2P_VERSION_COMPATIBLE(p, NVIDIA_P2P_RSYNC_REG_INFO_VERSION)
|
||||
|
||||
typedef struct nvidia_p2p_rsync_reg {
|
||||
void *ptr;
|
||||
size_t size;
|
||||
struct pci_dev *ibmnpu;
|
||||
struct pci_dev *gpu;
|
||||
uint32_t cluster_id;
|
||||
uint32_t socket_id;
|
||||
} nvidia_p2p_rsync_reg_t;
|
||||
|
||||
typedef struct nvidia_p2p_rsync_reg_info {
|
||||
uint32_t version;
|
||||
nvidia_p2p_rsync_reg_t *regs;
|
||||
size_t entries;
|
||||
} nvidia_p2p_rsync_reg_info_t;
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Gets rsync (GEN-ID) register information associated with the supported
|
||||
* NPUs.
|
||||
*
|
||||
* The caller would use the returned information {GPU device, NPU device,
|
||||
* socket-id, cluster-id} to pick the optimal generation registers to issue
|
||||
* RSYNC (NVLink HW flush).
|
||||
*
|
||||
* The interface allocates structures to return the information, hence
|
||||
* nvidia_p2p_put_rsync_registers() must be called to free the structures.
|
||||
*
|
||||
* Note, cluster-id is hardcoded to zero as early system configurations would
|
||||
* only support cluster mode i.e. all devices would share the same cluster-id
|
||||
* (0). In the future, appropriate kernel support would be needed to query
|
||||
* cluster-ids.
|
||||
*
|
||||
* @param[out] reg_info
|
||||
* A pointer to the rsync reg info structure.
|
||||
*
|
||||
* @Returns
|
||||
* 0 Upon successful completion. Otherwise, returns negative value.
|
||||
*/
|
||||
int nvidia_p2p_get_rsync_registers(nvidia_p2p_rsync_reg_info_t **reg_info);
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Frees the structures allocated by nvidia_p2p_get_rsync_registers().
|
||||
*
|
||||
* @param[in] reg_info
|
||||
* A pointer to the rsync reg info structure.
|
||||
*/
|
||||
void nvidia_p2p_put_rsync_registers(nvidia_p2p_rsync_reg_info_t *reg_info);
|
||||
|
||||
#endif /* _NV_P2P_H_ */
|
||||
61
kernel-open/nvidia-peermem/nvidia-peermem.Kbuild
Normal file
61
kernel-open/nvidia-peermem/nvidia-peermem.Kbuild
Normal file
@@ -0,0 +1,61 @@
|
||||
###########################################################################
|
||||
# Kbuild fragment for nvidia-peermem.ko
|
||||
###########################################################################
|
||||
|
||||
#
|
||||
# Define NVIDIA_PEERMEM_{SOURCES,OBJECTS}
|
||||
#
|
||||
|
||||
NVIDIA_PEERMEM_SOURCES =
|
||||
NVIDIA_PEERMEM_SOURCES += nvidia-peermem/nvidia-peermem.c
|
||||
|
||||
NVIDIA_PEERMEM_OBJECTS = $(patsubst %.c,%.o,$(NVIDIA_PEERMEM_SOURCES))
|
||||
|
||||
obj-m += nvidia-peermem.o
|
||||
nvidia-peermem-y := $(NVIDIA_PEERMEM_OBJECTS)
|
||||
|
||||
NVIDIA_PEERMEM_KO = nvidia-peermem/nvidia-peermem.ko
|
||||
|
||||
NV_KERNEL_MODULE_TARGETS += $(NVIDIA_PEERMEM_KO)
|
||||
|
||||
#
|
||||
# Define nvidia-peermem.ko-specific CFLAGS.
|
||||
#
|
||||
NVIDIA_PEERMEM_CFLAGS += -I$(src)/nvidia-peermem
|
||||
NVIDIA_PEERMEM_CFLAGS += -UDEBUG -U_DEBUG -DNDEBUG -DNV_BUILD_MODULE_INSTANCES=0
|
||||
|
||||
#
|
||||
# In case of MOFED installation, nvidia-peermem compilation
|
||||
# needs paths to the MOFED headers in CFLAGS.
|
||||
# MOFED's Module.symvers is needed for the build
|
||||
# to find the additional ib_* symbols.
|
||||
#
|
||||
OFA_DIR := /usr/src/ofa_kernel
|
||||
OFA_CANDIDATES = $(OFA_DIR)/$(ARCH)/$(KERNELRELEASE) $(OFA_DIR)/$(KERNELRELEASE) $(OFA_DIR)/default /var/lib/dkms/mlnx-ofed-kernel
|
||||
MLNX_OFED_KERNEL := $(shell for d in $(OFA_CANDIDATES); do \
|
||||
if [ -d "$$d" ]; then \
|
||||
echo "$$d"; \
|
||||
exit 0; \
|
||||
fi; \
|
||||
done; \
|
||||
echo $(OFA_DIR) \
|
||||
)
|
||||
|
||||
ifneq ($(shell test -d $(MLNX_OFED_KERNEL) && echo "true" || echo "" ),)
|
||||
NVIDIA_PEERMEM_CFLAGS += -I$(MLNX_OFED_KERNEL)/include -I$(MLNX_OFED_KERNEL)/include/rdma
|
||||
KBUILD_EXTRA_SYMBOLS := $(MLNX_OFED_KERNEL)/Module.symvers
|
||||
endif
|
||||
|
||||
$(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_PEERMEM_OBJECTS), $(NVIDIA_PEERMEM_CFLAGS))
|
||||
|
||||
#
|
||||
# Register the conftests needed by nvidia-peermem.ko
|
||||
#
|
||||
|
||||
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_PEERMEM_OBJECTS)
|
||||
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += ib_peer_memory_symbols
|
||||
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS +=
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS +=
|
||||
523
kernel-open/nvidia-peermem/nvidia-peermem.c
Normal file
523
kernel-open/nvidia-peermem/nvidia-peermem.c
Normal file
@@ -0,0 +1,523 @@
|
||||
/* SPDX-License-Identifier: Linux-OpenIB */
|
||||
/*
|
||||
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer.
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
#include <linux/mm.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/pci.h>
|
||||
|
||||
#include "nv-p2p.h"
|
||||
#include "peer_mem.h"
|
||||
#include "conftest.h"
|
||||
|
||||
#define DRV_NAME "nv_mem"
|
||||
#define DRV_VERSION NV_VERSION_STRING
|
||||
|
||||
MODULE_AUTHOR("Yishai Hadas");
|
||||
MODULE_DESCRIPTION("NVIDIA GPU memory plug-in");
|
||||
MODULE_LICENSE("Linux-OpenIB");
|
||||
MODULE_VERSION(DRV_VERSION);
|
||||
enum {
|
||||
NV_MEM_PEERDIRECT_SUPPORT_DEFAULT = 0,
|
||||
NV_MEM_PEERDIRECT_SUPPORT_LEGACY = 1,
|
||||
};
|
||||
static int peerdirect_support = NV_MEM_PEERDIRECT_SUPPORT_DEFAULT;
|
||||
module_param(peerdirect_support, int, S_IRUGO);
|
||||
MODULE_PARM_DESC(peerdirect_support, "Set level of support for Peer-direct, 0 [default] or 1 [legacy, for example MLNX_OFED 4.9 LTS]");
|
||||
|
||||
#define peer_err(FMT, ARGS...) printk(KERN_ERR "nvidia-peermem" " %s:%d " FMT, __FUNCTION__, __LINE__, ## ARGS)
|
||||
|
||||
#if defined(NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT)
|
||||
|
||||
#ifndef READ_ONCE
|
||||
#define READ_ONCE(x) ACCESS_ONCE(x)
|
||||
#endif
|
||||
|
||||
#ifndef WRITE_ONCE
|
||||
#define WRITE_ONCE(x, val) ({ ACCESS_ONCE(x) = (val); })
|
||||
#endif
|
||||
|
||||
#define GPU_PAGE_SHIFT 16
|
||||
#define GPU_PAGE_SIZE ((u64)1 << GPU_PAGE_SHIFT)
|
||||
#define GPU_PAGE_OFFSET (GPU_PAGE_SIZE-1)
|
||||
#define GPU_PAGE_MASK (~GPU_PAGE_OFFSET)
|
||||
|
||||
invalidate_peer_memory mem_invalidate_callback;
|
||||
static void *reg_handle = NULL;
|
||||
static void *reg_handle_nc = NULL;
|
||||
|
||||
struct nv_mem_context {
|
||||
struct nvidia_p2p_page_table *page_table;
|
||||
struct nvidia_p2p_dma_mapping *dma_mapping;
|
||||
u64 core_context;
|
||||
u64 page_virt_start;
|
||||
u64 page_virt_end;
|
||||
size_t mapped_size;
|
||||
unsigned long npages;
|
||||
unsigned long page_size;
|
||||
struct task_struct *callback_task;
|
||||
int sg_allocated;
|
||||
struct sg_table sg_head;
|
||||
};
|
||||
|
||||
|
||||
static void nv_get_p2p_free_callback(void *data)
|
||||
{
|
||||
int ret = 0;
|
||||
struct nv_mem_context *nv_mem_context = (struct nv_mem_context *)data;
|
||||
struct nvidia_p2p_page_table *page_table = NULL;
|
||||
struct nvidia_p2p_dma_mapping *dma_mapping = NULL;
|
||||
|
||||
__module_get(THIS_MODULE);
|
||||
if (!nv_mem_context) {
|
||||
peer_err("nv_get_p2p_free_callback -- invalid nv_mem_context\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!nv_mem_context->page_table) {
|
||||
peer_err("nv_get_p2p_free_callback -- invalid page_table\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Save page_table locally to prevent it being freed as part of nv_mem_release
|
||||
* in case it's called internally by that callback.
|
||||
*/
|
||||
page_table = nv_mem_context->page_table;
|
||||
|
||||
if (!nv_mem_context->dma_mapping) {
|
||||
peer_err("nv_get_p2p_free_callback -- invalid dma_mapping\n");
|
||||
goto out;
|
||||
}
|
||||
dma_mapping = nv_mem_context->dma_mapping;
|
||||
|
||||
/* For now don't set nv_mem_context->page_table to NULL,
|
||||
* confirmed by NVIDIA that inflight put_pages with valid pointer will fail gracefully.
|
||||
*/
|
||||
|
||||
nv_mem_context->callback_task = current;
|
||||
(*mem_invalidate_callback) (reg_handle, nv_mem_context->core_context);
|
||||
nv_mem_context->callback_task = NULL;
|
||||
|
||||
ret = nvidia_p2p_free_dma_mapping(dma_mapping);
|
||||
if (ret)
|
||||
peer_err("nv_get_p2p_free_callback -- error %d while calling nvidia_p2p_free_dma_mapping()\n", ret);
|
||||
|
||||
ret = nvidia_p2p_free_page_table(page_table);
|
||||
if (ret)
|
||||
peer_err("nv_get_p2p_free_callback -- error %d while calling nvidia_p2p_free_page_table()\n", ret);
|
||||
|
||||
out:
|
||||
module_put(THIS_MODULE);
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
/* At that function we don't call IB core - no ticket exists */
|
||||
static void nv_mem_dummy_callback(void *data)
|
||||
{
|
||||
struct nv_mem_context *nv_mem_context = (struct nv_mem_context *)data;
|
||||
int ret = 0;
|
||||
|
||||
__module_get(THIS_MODULE);
|
||||
|
||||
ret = nvidia_p2p_free_page_table(nv_mem_context->page_table);
|
||||
if (ret)
|
||||
peer_err("nv_mem_dummy_callback -- error %d while calling nvidia_p2p_free_page_table()\n", ret);
|
||||
|
||||
module_put(THIS_MODULE);
|
||||
return;
|
||||
}
|
||||
|
||||
/* acquire return code: 1 mine, 0 - not mine */
|
||||
static int nv_mem_acquire(unsigned long addr, size_t size, void *peer_mem_private_data,
|
||||
char *peer_mem_name, void **client_context)
|
||||
{
|
||||
|
||||
int ret = 0;
|
||||
struct nv_mem_context *nv_mem_context;
|
||||
|
||||
nv_mem_context = kzalloc(sizeof *nv_mem_context, GFP_KERNEL);
|
||||
if (!nv_mem_context)
|
||||
/* Error case handled as not mine */
|
||||
return 0;
|
||||
|
||||
nv_mem_context->page_virt_start = addr & GPU_PAGE_MASK;
|
||||
nv_mem_context->page_virt_end = (addr + size + GPU_PAGE_SIZE - 1) & GPU_PAGE_MASK;
|
||||
nv_mem_context->mapped_size = nv_mem_context->page_virt_end - nv_mem_context->page_virt_start;
|
||||
|
||||
ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
|
||||
&nv_mem_context->page_table, nv_mem_dummy_callback, nv_mem_context);
|
||||
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
|
||||
ret = nvidia_p2p_put_pages(0, 0, nv_mem_context->page_virt_start,
|
||||
nv_mem_context->page_table);
|
||||
if (ret < 0) {
|
||||
/* Not expected, however in case callback was called on that buffer just before
|
||||
put pages we'll expect to fail gracefully (confirmed by NVIDIA) and return an error.
|
||||
*/
|
||||
peer_err("nv_mem_acquire -- error %d while calling nvidia_p2p_put_pages()\n", ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* 1 means mine */
|
||||
*client_context = nv_mem_context;
|
||||
__module_get(THIS_MODULE);
|
||||
return 1;
|
||||
|
||||
err:
|
||||
kfree(nv_mem_context);
|
||||
|
||||
/* Error case handled as not mine */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_dma_map(struct sg_table *sg_head, void *context,
|
||||
struct device *dma_device, int dmasync,
|
||||
int *nmap)
|
||||
{
|
||||
int i, ret;
|
||||
struct scatterlist *sg;
|
||||
struct nv_mem_context *nv_mem_context =
|
||||
(struct nv_mem_context *) context;
|
||||
struct nvidia_p2p_page_table *page_table = nv_mem_context->page_table;
|
||||
struct nvidia_p2p_dma_mapping *dma_mapping;
|
||||
struct pci_dev *pdev = to_pci_dev(dma_device);
|
||||
|
||||
if (page_table->page_size != NVIDIA_P2P_PAGE_SIZE_64KB) {
|
||||
peer_err("nv_dma_map -- assumption of 64KB pages failed size_id=%u\n",
|
||||
nv_mem_context->page_table->page_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!pdev) {
|
||||
peer_err("nv_dma_map -- invalid pci_dev\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = nvidia_p2p_dma_map_pages(pdev, page_table, &dma_mapping);
|
||||
if (ret) {
|
||||
peer_err("nv_dma_map -- error %d while calling nvidia_p2p_dma_map_pages()\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!NVIDIA_P2P_DMA_MAPPING_VERSION_COMPATIBLE(dma_mapping)) {
|
||||
peer_err("error, incompatible dma mapping version 0x%08x\n",
|
||||
dma_mapping->version);
|
||||
nvidia_p2p_dma_unmap_pages(pdev, page_table, dma_mapping);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
nv_mem_context->npages = dma_mapping->entries;
|
||||
|
||||
ret = sg_alloc_table(sg_head, dma_mapping->entries, GFP_KERNEL);
|
||||
if (ret) {
|
||||
nvidia_p2p_dma_unmap_pages(pdev, page_table, dma_mapping);
|
||||
return ret;
|
||||
}
|
||||
|
||||
nv_mem_context->dma_mapping = dma_mapping;
|
||||
nv_mem_context->sg_allocated = 1;
|
||||
for_each_sg(sg_head->sgl, sg, nv_mem_context->npages, i) {
|
||||
sg_set_page(sg, NULL, nv_mem_context->page_size, 0);
|
||||
sg->dma_address = dma_mapping->dma_addresses[i];
|
||||
sg->dma_length = nv_mem_context->page_size;
|
||||
}
|
||||
nv_mem_context->sg_head = *sg_head;
|
||||
*nmap = nv_mem_context->npages;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_dma_unmap(struct sg_table *sg_head, void *context,
|
||||
struct device *dma_device)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(dma_device);
|
||||
struct nv_mem_context *nv_mem_context =
|
||||
(struct nv_mem_context *)context;
|
||||
|
||||
if (!nv_mem_context) {
|
||||
peer_err("nv_dma_unmap -- invalid nv_mem_context\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (WARN_ON(0 != memcmp(sg_head, &nv_mem_context->sg_head, sizeof(*sg_head))))
|
||||
return -EINVAL;
|
||||
|
||||
if (nv_mem_context->callback_task == current)
|
||||
goto out;
|
||||
|
||||
if (nv_mem_context->dma_mapping)
|
||||
nvidia_p2p_dma_unmap_pages(pdev, nv_mem_context->page_table,
|
||||
nv_mem_context->dma_mapping);
|
||||
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
|
||||
{
|
||||
int ret = 0;
|
||||
struct nv_mem_context *nv_mem_context =
|
||||
(struct nv_mem_context *) context;
|
||||
|
||||
if (!nv_mem_context) {
|
||||
peer_err("nv_mem_put_pages -- invalid nv_mem_context\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (WARN_ON(0 != memcmp(sg_head, &nv_mem_context->sg_head, sizeof(*sg_head))))
|
||||
return;
|
||||
|
||||
if (nv_mem_context->callback_task == current)
|
||||
return;
|
||||
|
||||
ret = nvidia_p2p_put_pages(0, 0, nv_mem_context->page_virt_start,
|
||||
nv_mem_context->page_table);
|
||||
|
||||
#ifdef _DEBUG_ONLY_
|
||||
/* Here we expect an error in real life cases that should be ignored - not printed.
|
||||
* (e.g. concurrent callback with that call)
|
||||
*/
|
||||
if (ret < 0) {
|
||||
printk(KERN_ERR "error %d while calling nvidia_p2p_put_pages, page_table=%p \n",
|
||||
ret, nv_mem_context->page_table);
|
||||
}
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void nv_mem_release(void *context)
|
||||
{
|
||||
struct nv_mem_context *nv_mem_context =
|
||||
(struct nv_mem_context *) context;
|
||||
if (nv_mem_context->sg_allocated) {
|
||||
sg_free_table(&nv_mem_context->sg_head);
|
||||
nv_mem_context->sg_allocated = 0;
|
||||
}
|
||||
kfree(nv_mem_context);
|
||||
module_put(THIS_MODULE);
|
||||
return;
|
||||
}
|
||||
|
||||
static int nv_mem_get_pages(unsigned long addr,
|
||||
size_t size, int write, int force,
|
||||
struct sg_table *sg_head,
|
||||
void *client_context,
|
||||
u64 core_context)
|
||||
{
|
||||
int ret;
|
||||
struct nv_mem_context *nv_mem_context;
|
||||
|
||||
nv_mem_context = (struct nv_mem_context *)client_context;
|
||||
if (!nv_mem_context)
|
||||
return -EINVAL;
|
||||
|
||||
nv_mem_context->core_context = core_context;
|
||||
nv_mem_context->page_size = GPU_PAGE_SIZE;
|
||||
|
||||
ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
|
||||
&nv_mem_context->page_table, nv_get_p2p_free_callback, nv_mem_context);
|
||||
if (ret < 0) {
|
||||
peer_err("error %d while calling nvidia_p2p_get_pages()\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* No extra access to nv_mem_context->page_table here as we are
|
||||
called not under a lock and may race with inflight invalidate callback on that buffer.
|
||||
Extra handling was delayed to be done under nv_dma_map.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long nv_mem_get_page_size(void *context)
|
||||
{
|
||||
struct nv_mem_context *nv_mem_context =
|
||||
(struct nv_mem_context *)context;
|
||||
|
||||
return nv_mem_context->page_size;
|
||||
}
|
||||
|
||||
|
||||
static struct peer_memory_client_ex nv_mem_client_ex = { .client = {
|
||||
.acquire = nv_mem_acquire,
|
||||
.get_pages = nv_mem_get_pages,
|
||||
.dma_map = nv_dma_map,
|
||||
.dma_unmap = nv_dma_unmap,
|
||||
.put_pages = nv_mem_put_pages,
|
||||
.get_page_size = nv_mem_get_page_size,
|
||||
.release = nv_mem_release,
|
||||
}};
|
||||
|
||||
static int nv_mem_get_pages_nc(unsigned long addr,
|
||||
size_t size, int write, int force,
|
||||
struct sg_table *sg_head,
|
||||
void *client_context,
|
||||
u64 core_context)
|
||||
{
|
||||
int ret;
|
||||
struct nv_mem_context *nv_mem_context;
|
||||
|
||||
nv_mem_context = (struct nv_mem_context *)client_context;
|
||||
if (!nv_mem_context)
|
||||
return -EINVAL;
|
||||
|
||||
nv_mem_context->core_context = core_context;
|
||||
nv_mem_context->page_size = GPU_PAGE_SIZE;
|
||||
|
||||
ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
|
||||
&nv_mem_context->page_table, NULL, NULL);
|
||||
if (ret < 0) {
|
||||
peer_err("error %d while calling nvidia_p2p_get_pages() with NULL callback\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct peer_memory_client nv_mem_client_nc = {
|
||||
.acquire = nv_mem_acquire,
|
||||
.get_pages = nv_mem_get_pages_nc,
|
||||
.dma_map = nv_dma_map,
|
||||
.dma_unmap = nv_dma_unmap,
|
||||
.put_pages = nv_mem_put_pages,
|
||||
.get_page_size = nv_mem_get_page_size,
|
||||
.release = nv_mem_release,
|
||||
};
|
||||
|
||||
#endif /* NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT */
|
||||
|
||||
static int nv_mem_param_conf_check(void)
|
||||
{
|
||||
int rc = 0;
|
||||
switch (peerdirect_support) {
|
||||
case NV_MEM_PEERDIRECT_SUPPORT_DEFAULT:
|
||||
case NV_MEM_PEERDIRECT_SUPPORT_LEGACY:
|
||||
break;
|
||||
default:
|
||||
peer_err("invalid peerdirect_support param value %d\n", peerdirect_support);
|
||||
rc = -EINVAL;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __init nv_mem_client_init(void)
|
||||
{
|
||||
int rc;
|
||||
rc = nv_mem_param_conf_check();
|
||||
if (rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
#if defined (NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT)
|
||||
int status = 0;
|
||||
|
||||
// off by one, to leave space for the trailing '1' which is flagging
|
||||
// the new client type
|
||||
BUG_ON(strlen(DRV_NAME) > IB_PEER_MEMORY_NAME_MAX-1);
|
||||
strcpy(nv_mem_client_ex.client.name, DRV_NAME);
|
||||
|
||||
// [VER_MAX-1]=1 <-- last byte is used as flag
|
||||
// [VER_MAX-2]=0 <-- version string terminator
|
||||
BUG_ON(strlen(DRV_VERSION) > IB_PEER_MEMORY_VER_MAX-2);
|
||||
strcpy(nv_mem_client_ex.client.version, DRV_VERSION);
|
||||
|
||||
nv_mem_client_ex.client.version[IB_PEER_MEMORY_VER_MAX-1] = 1;
|
||||
|
||||
if (peerdirect_support != NV_MEM_PEERDIRECT_SUPPORT_LEGACY) {
|
||||
nv_mem_client_ex.ex_size = sizeof(struct peer_memory_client_ex);
|
||||
// PEER_MEM_INVALIDATE_UNMAPS allow clients to opt out of
|
||||
// unmap/put_pages during invalidation, i.e. the client tells the
|
||||
// infiniband layer that it does not need to call
|
||||
// unmap/put_pages in the invalidation callback
|
||||
nv_mem_client_ex.flags = PEER_MEM_INVALIDATE_UNMAPS;
|
||||
} else {
|
||||
nv_mem_client_ex.ex_size = 0;
|
||||
nv_mem_client_ex.flags = 0;
|
||||
}
|
||||
|
||||
reg_handle = ib_register_peer_memory_client(&nv_mem_client_ex.client,
|
||||
&mem_invalidate_callback);
|
||||
if (!reg_handle) {
|
||||
peer_err("nv_mem_client_init -- error while registering traditional client\n");
|
||||
status = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
// The nc client enables support for persistent pages.
|
||||
// Thanks to this check, nvidia-peermem requires the new symbol from nvidia.ko, which
|
||||
// prevents users to unintentionally load this module with unsupported nvidia.ko.
|
||||
BUG_ON(!nvidia_p2p_cap_persistent_pages);
|
||||
strcpy(nv_mem_client_nc.name, DRV_NAME "_nc");
|
||||
strcpy(nv_mem_client_nc.version, DRV_VERSION);
|
||||
reg_handle_nc = ib_register_peer_memory_client(&nv_mem_client_nc, NULL);
|
||||
if (!reg_handle_nc) {
|
||||
peer_err("nv_mem_client_init -- error while registering nc client\n");
|
||||
status = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
if (status) {
|
||||
if (reg_handle) {
|
||||
ib_unregister_peer_memory_client(reg_handle);
|
||||
reg_handle = NULL;
|
||||
}
|
||||
|
||||
if (reg_handle_nc) {
|
||||
ib_unregister_peer_memory_client(reg_handle_nc);
|
||||
reg_handle_nc = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
#else
|
||||
return -EINVAL;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __exit nv_mem_client_cleanup(void)
|
||||
{
|
||||
#if defined (NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT)
|
||||
if (reg_handle)
|
||||
ib_unregister_peer_memory_client(reg_handle);
|
||||
|
||||
if (reg_handle_nc)
|
||||
ib_unregister_peer_memory_client(reg_handle_nc);
|
||||
#endif
|
||||
}
|
||||
|
||||
module_init(nv_mem_client_init);
|
||||
module_exit(nv_mem_client_cleanup);
|
||||
196
kernel-open/nvidia-peermem/peer_mem.h
Normal file
196
kernel-open/nvidia-peermem/peer_mem.h
Normal file
@@ -0,0 +1,196 @@
|
||||
/* SPDX-License-Identifier: Linux-OpenIB */
|
||||
/*
|
||||
* Copyright (c) 2014-2020, Mellanox Technologies. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
#ifndef RDMA_PEER_MEM_H
|
||||
#define RDMA_PEER_MEM_H
|
||||
|
||||
#include <linux/scatterlist.h>
|
||||
|
||||
#define IB_PEER_MEMORY_NAME_MAX 64
|
||||
#define IB_PEER_MEMORY_VER_MAX 16
|
||||
|
||||
/*
|
||||
* Prior versions used a void * for core_context, at some point this was
|
||||
* switched to use u64. Be careful if compiling this as 32 bit. To help the
|
||||
* value of core_context is limited to u32 so it should work OK despite the
|
||||
* type change.
|
||||
*/
|
||||
#define PEER_MEM_U64_CORE_CONTEXT
|
||||
|
||||
struct device;
|
||||
|
||||
/**
|
||||
* struct peer_memory_client - registration information for user virtual
|
||||
* memory handlers
|
||||
*
|
||||
* The peer_memory_client scheme allows a driver to register with the ib_umem
|
||||
* system that it has the ability to understand user virtual address ranges
|
||||
* that are not compatible with get_user_pages(). For instance VMAs created
|
||||
* with io_remap_pfn_range(), or other driver special VMA.
|
||||
*
|
||||
* For ranges the interface understands it can provide a DMA mapped sg_table
|
||||
* for use by the ib_umem, allowing user virtual ranges that cannot be
|
||||
* supported by get_user_pages() to be used as umems.
|
||||
*/
|
||||
struct peer_memory_client {
|
||||
char name[IB_PEER_MEMORY_NAME_MAX];
|
||||
char version[IB_PEER_MEMORY_VER_MAX];
|
||||
|
||||
/**
|
||||
* acquire - Begin working with a user space virtual address range
|
||||
*
|
||||
* @addr - Virtual address to be checked whether belongs to peer.
|
||||
* @size - Length of the virtual memory area starting at addr.
|
||||
* @peer_mem_private_data - Obsolete, always NULL
|
||||
* @peer_mem_name - Obsolete, always NULL
|
||||
* @client_context - Returns an opaque value for this acquire use in
|
||||
* other APIs
|
||||
*
|
||||
* Returns 1 if the peer_memory_client supports the entire virtual
|
||||
* address range, 0 or -ERRNO otherwise. If 1 is returned then
|
||||
* release() will be called to release the acquire().
|
||||
*/
|
||||
int (*acquire)(unsigned long addr, size_t size,
|
||||
void *peer_mem_private_data, char *peer_mem_name,
|
||||
void **client_context);
|
||||
/**
|
||||
* get_pages - Fill in the first part of a sg_table for a virtual
|
||||
* address range
|
||||
*
|
||||
* @addr - Virtual address to be checked whether belongs to peer.
|
||||
* @size - Length of the virtual memory area starting at addr.
|
||||
* @write - Always 1
|
||||
* @force - 1 if write is required
|
||||
* @sg_head - Obsolete, always NULL
|
||||
* @client_context - Value returned by acquire()
|
||||
* @core_context - Value to be passed to invalidate_peer_memory for
|
||||
* this get
|
||||
*
|
||||
* addr/size are passed as the raw virtual address range requested by
|
||||
* the user, it is not aligned to any page size. get_pages() is always
|
||||
* followed by dma_map().
|
||||
*
|
||||
* Upon return the caller can call the invalidate_callback().
|
||||
*
|
||||
* Returns 0 on success, -ERRNO on failure. After success put_pages()
|
||||
* will be called to return the pages.
|
||||
*/
|
||||
int (*get_pages)(unsigned long addr, size_t size, int write, int force,
|
||||
struct sg_table *sg_head, void *client_context,
|
||||
u64 core_context);
|
||||
/**
|
||||
* dma_map - Create a DMA mapped sg_table
|
||||
*
|
||||
* @sg_head - The sg_table to allocate
|
||||
* @client_context - Value returned by acquire()
|
||||
* @dma_device - The device that will be doing DMA from these addresses
|
||||
* @dmasync - Obsolete, always 0
|
||||
* @nmap - Returns the number of dma mapped entries in the sg_head
|
||||
*
|
||||
* Must be called after get_pages(). This must fill in the sg_head with
|
||||
* DMA mapped SGLs for dma_device. Each SGL start and end must meet a
|
||||
* minimum alignment of at least PAGE_SIZE, though individual sgls can
|
||||
* be multiples of PAGE_SIZE, in any mixture. Since the user virtual
|
||||
* address/size are not page aligned, the implementation must increase
|
||||
* it to the logical alignment when building the SGLs.
|
||||
*
|
||||
* Returns 0 on success, -ERRNO on failure. After success dma_unmap()
|
||||
* will be called to unmap the pages. On failure sg_head must be left
|
||||
* untouched or point to a valid sg_table.
|
||||
*/
|
||||
int (*dma_map)(struct sg_table *sg_head, void *client_context,
|
||||
struct device *dma_device, int dmasync, int *nmap);
|
||||
/**
|
||||
* dma_unmap - Unmap a DMA mapped sg_table
|
||||
*
|
||||
* @sg_head - The sg_table to unmap
|
||||
* @client_context - Value returned by acquire()
|
||||
* @dma_device - The device that will be doing DMA from these addresses
|
||||
*
|
||||
* sg_head will not be touched after this function returns.
|
||||
*
|
||||
* Must return 0.
|
||||
*/
|
||||
int (*dma_unmap)(struct sg_table *sg_head, void *client_context,
|
||||
struct device *dma_device);
|
||||
/**
|
||||
* put_pages - Unpin a SGL
|
||||
*
|
||||
* @sg_head - The sg_table to unpin
|
||||
* @client_context - Value returned by acquire()
|
||||
*
|
||||
* sg_head must be freed on return.
|
||||
*/
|
||||
void (*put_pages)(struct sg_table *sg_head, void *client_context);
|
||||
/* Obsolete, not used */
|
||||
unsigned long (*get_page_size)(void *client_context);
|
||||
/**
|
||||
* release - Undo acquire
|
||||
*
|
||||
* @client_context - Value returned by acquire()
|
||||
*
|
||||
* If acquire() returns 1 then release() must be called. All
|
||||
* get_pages() and dma_map()'s must be undone before calling this
|
||||
* function.
|
||||
*/
|
||||
void (*release)(void *client_context);
|
||||
};
|
||||
|
||||
enum {
|
||||
PEER_MEM_INVALIDATE_UNMAPS = 1 << 0,
|
||||
};
|
||||
|
||||
struct peer_memory_client_ex {
|
||||
struct peer_memory_client client;
|
||||
size_t ex_size;
|
||||
u32 flags;
|
||||
};
|
||||
|
||||
/*
|
||||
* If invalidate_callback() is non-NULL then the client will only support
|
||||
* umems which can be invalidated. The caller may call the
|
||||
* invalidate_callback() after acquire() on return the range will no longer
|
||||
* have DMA active, and release() will have been called.
|
||||
*
|
||||
* Note: The implementation locking must ensure that get_pages(), and
|
||||
* dma_map() do not have locking dependencies with invalidate_callback(). The
|
||||
* ib_core will wait until any concurrent get_pages() or dma_map() completes
|
||||
* before returning.
|
||||
*
|
||||
* Similarly, this can call dma_unmap(), put_pages() and release() from within
|
||||
* the callback, or will wait for another thread doing those operations to
|
||||
* complete.
|
||||
*
|
||||
* For these reasons the user of invalidate_callback() must be careful with
|
||||
* locking.
|
||||
*/
|
||||
typedef int (*invalidate_peer_memory)(void *reg_handle, u64 core_context);
|
||||
|
||||
void *
|
||||
ib_register_peer_memory_client(const struct peer_memory_client *peer_client,
|
||||
invalidate_peer_memory *invalidate_callback);
|
||||
void ib_unregister_peer_memory_client(void *reg_handle);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user