diff options
Diffstat (limited to 'include/rdma')
-rw-r--r-- | include/rdma/ib_umem.h | 34 | ||||
-rw-r--r-- | include/rdma/ib_umem_odp.h | 160 | ||||
-rw-r--r-- | include/rdma/ib_verbs.h | 54 |
3 files changed, 243 insertions, 5 deletions
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index a2bf41e0bde..2d83cfd7e6c 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -38,11 +38,12 @@ #include <linux/workqueue.h> struct ib_ucontext; +struct ib_umem_odp; struct ib_umem { struct ib_ucontext *context; size_t length; - int offset; + unsigned long address; int page_size; int writable; int hugetlb; @@ -50,17 +51,43 @@ struct ib_umem { struct pid *pid; struct mm_struct *mm; unsigned long diff; + struct ib_umem_odp *odp_data; struct sg_table sg_head; int nmap; int npages; }; +/* Returns the offset of the umem start relative to the first page. */ +static inline int ib_umem_offset(struct ib_umem *umem) +{ + return umem->address & ((unsigned long)umem->page_size - 1); +} + +/* Returns the first page of an ODP umem. */ +static inline unsigned long ib_umem_start(struct ib_umem *umem) +{ + return umem->address - ib_umem_offset(umem); +} + +/* Returns the address of the page after the last one of an ODP umem. */ +static inline unsigned long ib_umem_end(struct ib_umem *umem) +{ + return PAGE_ALIGN(umem->address + umem->length); +} + +static inline size_t ib_umem_num_pages(struct ib_umem *umem) +{ + return (ib_umem_end(umem) - ib_umem_start(umem)) >> PAGE_SHIFT; +} + #ifdef CONFIG_INFINIBAND_USER_MEM struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, size_t size, int access, int dmasync); void ib_umem_release(struct ib_umem *umem); int ib_umem_page_count(struct ib_umem *umem); +int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, + size_t length); #else /* CONFIG_INFINIBAND_USER_MEM */ @@ -73,7 +100,10 @@ static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context, } static inline void ib_umem_release(struct ib_umem *umem) { } static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; } - +static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, + size_t length) { + return -EINVAL; +} #endif /* CONFIG_INFINIBAND_USER_MEM */ #endif /* IB_UMEM_H */ diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h new file mode 100644 index 00000000000..3da0b167041 --- /dev/null +++ b/include/rdma/ib_umem_odp.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2014 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_UMEM_ODP_H +#define IB_UMEM_ODP_H + +#include <rdma/ib_umem.h> +#include <rdma/ib_verbs.h> +#include <linux/interval_tree.h> + +struct umem_odp_node { + u64 __subtree_last; + struct rb_node rb; +}; + +struct ib_umem_odp { + /* + * An array of the pages included in the on-demand paging umem. + * Indices of pages that are currently not mapped into the device will + * contain NULL. + */ + struct page **page_list; + /* + * An array of the same size as page_list, with DMA addresses mapped + * for pages the pages in page_list. The lower two bits designate + * access permissions. See ODP_READ_ALLOWED_BIT and + * ODP_WRITE_ALLOWED_BIT. + */ + dma_addr_t *dma_list; + /* + * The umem_mutex protects the page_list and dma_list fields of an ODP + * umem, allowing only a single thread to map/unmap pages. The mutex + * also protects access to the mmu notifier counters. + */ + struct mutex umem_mutex; + void *private; /* for the HW driver to use. */ + + /* When false, use the notifier counter in the ucontext struct. */ + bool mn_counters_active; + int notifiers_seq; + int notifiers_count; + + /* A linked list of umems that don't have private mmu notifier + * counters yet. */ + struct list_head no_private_counters; + struct ib_umem *umem; + + /* Tree tracking */ + struct umem_odp_node interval_tree; + + struct completion notifier_completion; + int dying; +}; + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + +int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem); + +void ib_umem_odp_release(struct ib_umem *umem); + +/* + * The lower 2 bits of the DMA address signal the R/W permissions for + * the entry. To upgrade the permissions, provide the appropriate + * bitmask to the map_dma_pages function. + * + * Be aware that upgrading a mapped address might result in change of + * the DMA address for the page. + */ +#define ODP_READ_ALLOWED_BIT (1<<0ULL) +#define ODP_WRITE_ALLOWED_BIT (1<<1ULL) + +#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) + +int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt, + u64 access_mask, unsigned long current_seq); + +void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset, + u64 bound); + +void rbt_ib_umem_insert(struct umem_odp_node *node, struct rb_root *root); +void rbt_ib_umem_remove(struct umem_odp_node *node, struct rb_root *root); +typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end, + void *cookie); +/* + * Call the callback on each ib_umem in the range. Returns the logical or of + * the return values of the functions called. + */ +int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end, + umem_call_back cb, void *cookie); + +struct umem_odp_node *rbt_ib_umem_iter_first(struct rb_root *root, + u64 start, u64 last); +struct umem_odp_node *rbt_ib_umem_iter_next(struct umem_odp_node *node, + u64 start, u64 last); + +static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item, + unsigned long mmu_seq) +{ + /* + * This code is strongly based on the KVM code from + * mmu_notifier_retry. Should be called with + * the relevant locks taken (item->odp_data->umem_mutex + * and the ucontext umem_mutex semaphore locked for read). + */ + + /* Do not allow page faults while the new ib_umem hasn't seen a state + * with zero notifiers yet, and doesn't have its own valid set of + * private counters. */ + if (!item->odp_data->mn_counters_active) + return 1; + + if (unlikely(item->odp_data->notifiers_count)) + return 1; + if (item->odp_data->notifiers_seq != mmu_seq) + return 1; + return 0; +} + +#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ + +static inline int ib_umem_odp_get(struct ib_ucontext *context, + struct ib_umem *umem) +{ + return -EINVAL; +} + +static inline void ib_umem_odp_release(struct ib_umem *umem) {} + +#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ + +#endif /* IB_UMEM_ODP_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 470a011d6fa..0d74f1de99a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -51,6 +51,7 @@ #include <uapi/linux/if_ether.h> #include <linux/atomic.h> +#include <linux/mmu_notifier.h> #include <asm/uaccess.h> extern struct workqueue_struct *ib_wq; @@ -123,7 +124,8 @@ enum ib_device_cap_flags { IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23), IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24), IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29), - IB_DEVICE_SIGNATURE_HANDOVER = (1<<30) + IB_DEVICE_SIGNATURE_HANDOVER = (1<<30), + IB_DEVICE_ON_DEMAND_PAGING = (1<<31), }; enum ib_signature_prot_cap { @@ -143,6 +145,27 @@ enum ib_atomic_cap { IB_ATOMIC_GLOB }; +enum ib_odp_general_cap_bits { + IB_ODP_SUPPORT = 1 << 0, +}; + +enum ib_odp_transport_cap_bits { + IB_ODP_SUPPORT_SEND = 1 << 0, + IB_ODP_SUPPORT_RECV = 1 << 1, + IB_ODP_SUPPORT_WRITE = 1 << 2, + IB_ODP_SUPPORT_READ = 1 << 3, + IB_ODP_SUPPORT_ATOMIC = 1 << 4, +}; + +struct ib_odp_caps { + uint64_t general_caps; + struct { + uint32_t rc_odp_caps; + uint32_t uc_odp_caps; + uint32_t ud_odp_caps; + } per_transport_caps; +}; + struct ib_device_attr { u64 fw_ver; __be64 sys_image_guid; @@ -186,6 +209,7 @@ struct ib_device_attr { u8 local_ca_ack_delay; int sig_prot_cap; int sig_guard_cap; + struct ib_odp_caps odp_caps; }; enum ib_mtu { @@ -1073,7 +1097,8 @@ enum ib_access_flags { IB_ACCESS_REMOTE_READ = (1<<2), IB_ACCESS_REMOTE_ATOMIC = (1<<3), IB_ACCESS_MW_BIND = (1<<4), - IB_ZERO_BASED = (1<<5) + IB_ZERO_BASED = (1<<5), + IB_ACCESS_ON_DEMAND = (1<<6), }; struct ib_phys_buf { @@ -1115,6 +1140,8 @@ struct ib_fmr_attr { u8 page_shift; }; +struct ib_umem; + struct ib_ucontext { struct ib_device *device; struct list_head pd_list; @@ -1127,6 +1154,24 @@ struct ib_ucontext { struct list_head xrcd_list; struct list_head rule_list; int closing; + + struct pid *tgid; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + struct rb_root umem_tree; + /* + * Protects .umem_rbroot and tree, as well as odp_mrs_count and + * mmu notifiers registration. + */ + struct rw_semaphore umem_rwsem; + void (*invalidate_range)(struct ib_umem *umem, + unsigned long start, unsigned long end); + + struct mmu_notifier mn; + atomic_t notifier_count; + /* A list of umems that don't have private mmu notifier counters yet. */ + struct list_head no_private_counters; + int odp_mrs_count; +#endif }; struct ib_uobject { @@ -1662,7 +1707,10 @@ static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) { - return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; + size_t copy_sz; + + copy_sz = min_t(size_t, len, udata->outlen); + return copy_to_user(udata->outbuf, src, copy_sz) ? -EFAULT : 0; } /** |