From 765d67748bcf802c4642a49cd0139787d0d80783 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 17 Feb 2016 08:15:41 -0800 Subject: IB: new common API for draining queues Add provider-specific drain_sq/drain_rq functions for providers needing special drain logic. Add static functions __ib_drain_sq() and __ib_drain_rq() which post noop WRs to the SQ or RQ and block until their completions are processed. This ensures the applications completions for work requests posted prior to the drain work request have all been processed. Add API functions ib_drain_sq(), ib_drain_rq(), and ib_drain_qp(). For the drain logic to work, the caller must: ensure there is room in the CQ(s) and QP for the drain work request and completion. allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be IB_POLL_DIRECT. ensure that there are no other contexts that are posting WRs concurrently. Otherwise the drain is not guaranteed. Reviewed-by: Chuck Lever Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 284b00c8fea4..68b7e978a27d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1846,6 +1846,8 @@ struct ib_device { int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); + void (*drain_rq)(struct ib_qp *qp); + void (*drain_sq)(struct ib_qp *qp); struct ib_dma_mapping_ops *dma_ops; @@ -3094,4 +3096,7 @@ int ib_sg_to_pages(struct ib_mr *mr, int sg_nents, int (*set_page)(struct ib_mr *, u64)); +void ib_drain_rq(struct ib_qp *qp); +void ib_drain_sq(struct ib_qp *qp); +void ib_drain_qp(struct ib_qp *qp); #endif /* IB_VERBS_H */ -- cgit v1.2.3 From a3100a78794175d7f2488a3155d247da3d7390e4 Mon Sep 17 00:00:00 2001 From: Marina Varshaver Date: Thu, 18 Feb 2016 18:31:05 +0200 Subject: IB/core: Add don't trap flag to flow creation Don't trap flag (i.e. IB_FLOW_ATTR_FLAGS_DONT_TRAP) indicates that QP will receive traffic, but will not steal it. When a packet matches a flow steering rule that was created with the don't trap flag, the QPs assigned to this rule will get this packet, but matching will continue to other equal/lower priority rules. This will let other QPs assigned to those rules to get the packet too. If both don't trap rule and other rules have the same priority and match the same packet, the behavior is undefined. The don't trap flag can't be set with default rule types (i.e. IB_FLOW_ATTR_ALL_DEFAULT, IB_FLOW_ATTR_MC_DEFAULT) as default rules don't have rules after them and don't trap has no meaning here. Signed-off-by: Marina Varshaver Reviewed-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 284b00c8fea4..514223f522c8 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1487,6 +1487,11 @@ enum ib_flow_domain { IB_FLOW_DOMAIN_NUM /* Must be last */ }; +enum ib_flow_flags { + IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */ + IB_FLOW_ATTR_FLAGS_RESERVED = 1UL << 2 /* Must be last */ +}; + struct ib_flow_eth_filter { u8 dst_mac[6]; u8 src_mac[6]; -- cgit v1.2.3 From b2a239df4e65fe35240ddf3e5f9f31335c90589b Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Mon, 29 Feb 2016 18:05:29 +0200 Subject: IB/core: Add vendor's specific data to alloc mw Passing udata to the vendor's driver in order to pass data from the user-space driver to the kernel-space driver. This data will be used in downstream patches. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 284b00c8fea4..3f79070de547 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1808,7 +1808,8 @@ struct ib_device { struct scatterlist *sg, int sg_nents); struct ib_mw * (*alloc_mw)(struct ib_pd *pd, - enum ib_mw_type type); + enum ib_mw_type type, + struct ib_udata *udata); int (*dealloc_mw)(struct ib_mw *mw); struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, int mr_access_flags, -- cgit v1.2.3 From 5a30247bf09eeffaa46c00d59a62359aeb7d0462 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Tue, 5 Jan 2016 13:52:55 -0500 Subject: IB/core: Documentation fix in the MAD header file In ib_mad.h, ib_mad_snoop_handler uses send_buf rather than send_wr Signed-off-by: Hal Rosenstock Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford --- include/rdma/ib_mad.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 0ff049bd9ad4..37dd534cbeab 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -424,11 +424,11 @@ typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, /** * ib_mad_snoop_handler - Callback handler for snooping sent MADs. * @mad_agent: MAD agent that snooped the MAD. - * @send_wr: Work request information on the sent MAD. + * @send_buf: send MAD data buffer. * @mad_send_wc: Work completion information on the sent MAD. Valid * only for snooping that occurs on a send completion. * - * Clients snooping MADs should not modify data referenced by the @send_wr + * Clients snooping MADs should not modify data referenced by the @send_buf * or @mad_send_wc. */ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, -- cgit v1.2.3 From f5aa9159a418726d74b67c8815ffd2739afb4c7a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 29 Feb 2016 19:07:32 +0200 Subject: IB/core: Add arbitrary sg_list support Devices that are capable in registering SG lists with gaps can now expose it in the core to ULPs using a new device capability IB_DEVICE_SG_GAPS_REG (in a new field device_cap_flags_ex in the device attributes as we ran out of bits), and a new mr_type IB_MR_TYPE_SG_GAPS_REG which allocates a memory region which is capable of handling SG lists with gaps. Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3f79070de547..bcd5b242e6b1 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -212,6 +212,7 @@ enum ib_device_cap_flags { IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), IB_DEVICE_ON_DEMAND_PAGING = (1 << 31), + IB_DEVICE_SG_GAPS_REG = (1ULL << 32), }; enum ib_signature_prot_cap { @@ -662,10 +663,15 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate); * @IB_MR_TYPE_SIGNATURE: memory region that is used for * signature operations (data-integrity * capable regions) + * @IB_MR_TYPE_SG_GAPS: memory region that is capable to + * register any arbitrary sg lists (without + * the normal mr constraints - see + * ib_map_mr_sg) */ enum ib_mr_type { IB_MR_TYPE_MEM_REG, IB_MR_TYPE_SIGNATURE, + IB_MR_TYPE_SG_GAPS, }; /** -- cgit v1.2.3 From 0194621b225348428c212f330c26d194fc77bd15 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:50:24 -0800 Subject: IB/rdmavt: Create module framework and handle driver registration This patch introduces the basics for a new module called rdma_vt. This new driver is a software implementation of the InfiniBand verbs and aims to replace the multiple implementations that exist and duplicate each others' code. While the call to actually register the device with the IB core happens in rdma_vt, most of the work is still done in the drivers themselves. This will be changing in a follow on patch this is just laying the groundwork for this infrastructure. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 include/rdma/rdma_vt.h (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h new file mode 100644 index 000000000000..0438bf229306 --- /dev/null +++ b/include/rdma/rdma_vt.h @@ -0,0 +1,70 @@ +#ifndef DEF_RDMA_VT_H +#define DEF_RDMA_VT_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Structure that low level drivers will populate in order to register with the + * rdmavt layer. + */ + +#include "ib_verbs.h" +struct rvt_dev_info { + struct ib_device ibdev; + int (*port_callback)(struct ib_device *, u8, struct kobject *); + + /* + * TODO: + * need to reflect module parameters that may vary by dev + */ +}; + +int rvt_register_device(struct rvt_dev_info *rvd); +void rvt_unregister_device(struct rvt_dev_info *rvd); + +#endif /* DEF_RDMA_VT_H */ -- cgit v1.2.3 From 8afd32eb58b6885fc3e268c69b1b1b627aa2afaf Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:51:48 -0800 Subject: IB/rdmavt: Add protection domain to rdmavt. Add datastructure for and allocation/deallocation of protection domains for RDMAVT. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 0438bf229306..6bf5fd40081d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -54,16 +54,42 @@ */ #include "ib_verbs.h" + +/* + * Things that are driver specific, module parameters in hfi1 and qib + */ +struct rvt_driver_params { + int max_pds; +}; + +/* Protection domain */ +struct rvt_pd { + struct ib_pd ibpd; + int user; /* non-zero if created from user space */ +}; + struct rvt_dev_info { struct ib_device ibdev; + + /* Driver specific */ + struct rvt_driver_params dparms; int (*port_callback)(struct ib_device *, u8, struct kobject *); - /* - * TODO: - * need to reflect module parameters that may vary by dev - */ + /* Internal use */ + int n_pds_allocated; + spinlock_t n_pds_lock; /* Protect pd allocated count */ }; +static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct rvt_pd, ibpd); +} + +static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) +{ + return container_of(ibdev, struct rvt_dev_info, ibdev); +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); -- cgit v1.2.3 From b1070a7a4d304e680eb6c1158d76645cf5a923f1 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:52:19 -0800 Subject: IB/rdmavt: Add ib core device attributes to rvt driver params list Instead of trying to handle each parameter separately, add ib_device_attr to rvt_driver_params. This means drivers will fill this in and pass to the rvt registration function. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 6bf5fd40081d..2990e03bdd9e 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -59,7 +59,45 @@ * Things that are driver specific, module parameters in hfi1 and qib */ struct rvt_driver_params { - int max_pds; + /* + * driver required fields: + * node_guid + * phys_port_cnt + * dma_device + * owner + * driver optional fields (rvt will provide generic value if blank): + * name + * node_desc + * rvt fields, driver value ignored: + * uverbs_abi_ver + * node_type + * num_comp_vectors + * uverbs_cmd_mask + */ + struct ib_device_attr props; + + /* + * Drivers will need to support a number of notifications to rvt in + * accordance with certain events. This structure should contain a mask + * of the supported events. Such events that the rvt may need to know + * about include: + * port errors + * port active + * lid change + * sm change + * client reregister + * pkey change + * + * There may also be other events that the rvt layers needs to know + * about this is not an exhaustive list. Some events though rvt does not + * need to rely on the driver for such as completion queue error. + */ + int rvt_signal_supported; + + /* + * Anything driver specific that is not covered by props + * For instance special module parameters. Goes here. + */ }; /* Protection domain */ @@ -69,10 +107,25 @@ struct rvt_pd { }; struct rvt_dev_info { + /* + * Prior to calling for registration the driver will be responsible for + * allocating space for this structure. + * + * The driver will also be responsible for filling in certain members of + * dparms.props + */ + struct ib_device ibdev; - /* Driver specific */ + /* Driver specific properties */ struct rvt_driver_params dparms; + + /* + * The work to create port files in /sys/class Infiniband is different + * depending on the driver. This should not be extracted away and + * instead drivers are responsible for setting the correct callback for + * this. + */ int (*port_callback)(struct ib_device *, u8, struct kobject *); /* Internal use */ -- cgit v1.2.3 From 30588643f95e1bb1239e2568de7a653722832a5e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:54:16 -0800 Subject: IB/rdmavt: Add pkey query stub The pkey table will reside in the rvt structure but it will be modified only when the driver requests then rvt will simply read the value to return in the query. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 2990e03bdd9e..bf072a436a34 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -114,12 +114,13 @@ struct rvt_dev_info { * The driver will also be responsible for filling in certain members of * dparms.props */ - struct ib_device ibdev; /* Driver specific properties */ struct rvt_driver_params dparms; + /* PKey Table goes here */ + /* * The work to create port files in /sys/class Infiniband is different * depending on the driver. This should not be extracted away and -- cgit v1.2.3 From b92a7568037e2a28f61c3f79c2320431bb24dfab Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:01:42 -0800 Subject: IB/rdmavt: Move MR datastructures into rvt This patch adds the MR datastructures based on hfi1 into rvt. For now the data structures are defined in include/rdma/rdma_vt.h but once all MR functionality has been moved from the drivers into rvt these should move to rdmavt/mr.h Reviewed-by: Ira Weiny Reviewed-by: Dean Luick Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index bf072a436a34..f232e39a5d69 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,6 +55,56 @@ #include "ib_verbs.h" +/* + * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once + * drivers no longer need access to the MR directly. + */ + +/* + * A segment is a linear region of low physical memory. + * Used by the verbs layer. + */ +struct rvt_seg { + void *vaddr; + size_t length; +}; + +/* The number of rvt_segs that fit in a page. */ +#define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg)) + +struct rvt_segarray { + struct rvt_seg segs[RVT_SEGSZ]; +}; + +struct rvt_mregion { + struct ib_pd *pd; /* shares refcnt of ibmr.pd */ + u64 user_base; /* User's address for this region */ + u64 iova; /* IB start address of this region */ + size_t length; + u32 lkey; + u32 offset; /* offset (bytes) to start of region */ + int access_flags; + u32 max_segs; /* number of rvt_segs in all the arrays */ + u32 mapsz; /* size of the map array */ + u8 page_shift; /* 0 - non unform/non powerof2 sizes */ + u8 lkey_published; /* in global table */ + struct completion comp; /* complete when refcount goes to zero */ + atomic_t refcount; + struct rvt_segarray *map[0]; /* the segments */ +}; + +#define RVT_MAX_LKEY_TABLE_BITS 23 + +struct rvt_lkey_table { + spinlock_t lock; /* protect changes in this struct */ + u32 next; /* next unused index (speeds search) */ + u32 gen; /* generation count */ + u32 max; /* size of the table */ + struct rvt_mregion __rcu **table; +}; + +/* End Memmory Region */ + /* * Things that are driver specific, module parameters in hfi1 and qib */ @@ -119,6 +169,9 @@ struct rvt_dev_info { /* Driver specific properties */ struct rvt_driver_params dparms; + struct rvt_mregion __rcu *dma_mr; + struct rvt_lkey_table lkey_table; + /* PKey Table goes here */ /* -- cgit v1.2.3 From ca889e8ad3af9f1dfeb827356bc9839fb20f32be Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:02:41 -0800 Subject: IB/rdmavt: Add queue pair data structure to rdmavt Add queue pair data structure as well as supporting structures to rdmavt. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 233 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index f232e39a5d69..9baa7f04e8d0 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -105,6 +105,239 @@ struct rvt_lkey_table { /* End Memmory Region */ +/* + * Things needed for the Queue Pair definition. Like the MR stuff above the + * following should probably get moved to qp.h once drivers stop trying to make + * and manipulate thier own QPs. For the few instnaces where a driver may need + * to look into a queue pair there should be a pointer to a driver priavte data + * structure that they can look at. + */ + +/* + * These keep track of the copy progress within a memory region. + * Used by the verbs layer. + */ +struct rvt_sge { + struct rvt_mregion *mr; + void *vaddr; /* kernel virtual address of segment */ + u32 sge_length; /* length of the SGE */ + u32 length; /* remaining length of the segment */ + u16 m; /* current index: mr->map[m] */ + u16 n; /* current index: mr->map[m]->segs[n] */ +}; + +/* + * Send work request queue entry. + * The size of the sg_list is determined when the QP is created and stored + * in qp->s_max_sge. + */ +struct rvt_swqe { + union { + struct ib_send_wr wr; /* don't use wr.sg_list */ + struct ib_ud_wr ud_wr; + struct ib_reg_wr reg_wr; + struct ib_rdma_wr rdma_wr; + struct ib_atomic_wr atomic_wr; + }; + u32 psn; /* first packet sequence number */ + u32 lpsn; /* last packet sequence number */ + u32 ssn; /* send sequence number */ + u32 length; /* total length of data in sg_list */ + struct rvt_sge sg_list[0]; +}; + +/* + * Receive work request queue entry. + * The size of the sg_list is determined when the QP (or SRQ) is created + * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). + */ +struct rvt_rwqe { + u64 wr_id; + u8 num_sge; + struct ib_sge sg_list[0]; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and receive work queue entries as a single memory allocation so + * it can be mmap'ed into user space. + * Note that the wq array elements are variable size so you can't + * just index into the array to get the N'th element; + * use get_rwqe_ptr() instead. + */ +struct rvt_rwq { + u32 head; /* new work requests posted to the head */ + u32 tail; /* receives pull requests from here. */ + struct rvt_rwqe wq[0]; +}; + +struct rvt_rq { + struct rvt_rwq *wq; + u32 size; /* size of RWQE array */ + u8 max_sge; + /* protect changes in this struct */ + spinlock_t lock ____cacheline_aligned_in_smp; +}; + +/* + * This structure is used by rvt_mmap() to validate an offset + * when an mmap() request is made. The vm_area_struct then uses + * this as its vm_private_data. + */ +struct rvt_mmap_info { + struct list_head pending_mmaps; + struct ib_ucontext *context; + void *obj; + __u64 offset; + struct kref ref; + unsigned size; +}; + +#define RVT_MAX_RDMA_ATOMIC 16 + +/* + * This structure holds the information that the send tasklet needs + * to send a RDMA read response or atomic operation. + */ +struct rvt_ack_entry { + u8 opcode; + u8 sent; + u32 psn; + u32 lpsn; + union { + struct rvt_sge rdma_sge; + u64 atomic_data; + }; +}; + +struct rvt_sge_state { + struct rvt_sge *sg_list; /* next SGE to be used if any */ + struct rvt_sge sge; /* progress state for the current SGE */ + u32 total_len; + u8 num_sge; +}; + +/* + * Variables prefixed with s_ are for the requester (sender). + * Variables prefixed with r_ are for the responder (receiver). + * Variables prefixed with ack_ are for responder replies. + * + * Common variables are protected by both r_rq.lock and s_lock in that order + * which only happens in modify_qp() or changing the QP 'state'. + */ +struct rvt_qp { + struct ib_qp ibqp; + void *priv; /* Driver private data */ + /* read mostly fields above and below */ + struct ib_ah_attr remote_ah_attr; + struct ib_ah_attr alt_ah_attr; + struct rvt_qp __rcu *next; /* link list for QPN hash table */ + struct rvt_swqe *s_wq; /* send work queue */ + struct rvt_mmap_info *ip; + + unsigned long timeout_jiffies; /* computed from timeout */ + + enum ib_mtu path_mtu; + int srate_mbps; /* s_srate (below) converted to Mbit/s */ + u32 remote_qpn; + u32 pmtu; /* decoded from path_mtu */ + u32 qkey; /* QKEY for this QP (for UD or RD) */ + u32 s_size; /* send work queue size */ + u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ + u32 s_ahgpsn; /* set to the psn in the copy of the header */ + + u8 state; /* QP state */ + u8 allowed_ops; /* high order bits of allowed opcodes */ + u8 qp_access_flags; + u8 alt_timeout; /* Alternate path timeout for this QP */ + u8 timeout; /* Timeout for this QP */ + u8 s_srate; + u8 s_mig_state; + u8 port_num; + u8 s_pkey_index; /* PKEY index to use */ + u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ + u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ + u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ + u8 s_retry_cnt; /* number of times to retry */ + u8 s_rnr_retry_cnt; + u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ + u8 s_max_sge; /* size of s_wq->sg_list */ + u8 s_draining; + + /* start of read/write fields */ + atomic_t refcount ____cacheline_aligned_in_smp; + wait_queue_head_t wait; + + struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] + ____cacheline_aligned_in_smp; + struct rvt_sge_state s_rdma_read_sge; + + spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ + unsigned long r_aflags; + u64 r_wr_id; /* ID for current receive WQE */ + u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ + u32 r_len; /* total length of r_sge */ + u32 r_rcv_len; /* receive data len processed */ + u32 r_psn; /* expected rcv packet sequence number */ + u32 r_msn; /* message sequence number */ + + u8 r_state; /* opcode of last packet received */ + u8 r_flags; + u8 r_head_ack_queue; /* index into s_ack_queue[] */ + + struct list_head rspwait; /* link for waiting to respond */ + + struct rvt_sge_state r_sge; /* current receive data */ + struct rvt_rq r_rq; /* receive work queue */ + + spinlock_t s_lock ____cacheline_aligned_in_smp; + struct rvt_sge_state *s_cur_sge; + u32 s_flags; + struct rvt_swqe *s_wqe; + struct rvt_sge_state s_sge; /* current send request data */ + struct rvt_mregion *s_rdma_mr; + struct sdma_engine *s_sde; /* current sde */ + u32 s_cur_size; /* size of send packet in bytes */ + u32 s_len; /* total length of s_sge */ + u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ + u32 s_next_psn; /* PSN for next request */ + u32 s_last_psn; /* last response PSN processed */ + u32 s_sending_psn; /* lowest PSN that is being sent */ + u32 s_sending_hpsn; /* highest PSN that is being sent */ + u32 s_psn; /* current packet sequence number */ + u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ + u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ + u32 s_head; /* new entries added here */ + u32 s_tail; /* next entry to process */ + u32 s_cur; /* current work queue entry */ + u32 s_acked; /* last un-ACK'ed entry */ + u32 s_last; /* last completed entry */ + u32 s_ssn; /* SSN of tail entry */ + u32 s_lsn; /* limit sequence number (credit) */ + u16 s_hdrwords; /* size of s_hdr in 32 bit words */ + u16 s_rdma_ack_cnt; + s8 s_ahgidx; + u8 s_state; /* opcode of last packet sent */ + u8 s_ack_state; /* opcode of packet to ACK */ + u8 s_nak_state; /* non-zero if NAK is pending */ + u8 r_nak_state; /* non-zero if NAK is pending */ + u8 s_retry; /* requester retry counter */ + u8 s_rnr_retry; /* requester RNR retry counter */ + u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ + u8 s_tail_ack_queue; /* index into s_ack_queue[] */ + + struct rvt_sge_state s_ack_rdma_sge; + struct timer_list s_timer; + + /* + * This sge list MUST be last. Do not add anything below here. + */ + struct rvt_sge r_sg_list[0] /* verified SGEs */ + ____cacheline_aligned_in_smp; +}; + +/* End QP section */ + /* * Things that are driver specific, module parameters in hfi1 and qib */ -- cgit v1.2.3 From aec5778775ac03ee6cfd6480adbbf6b05513d77b Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:02:52 -0800 Subject: IB/rdmavt: Move driver helper functions to a common structure Drivers are going to need to provide multiple functions for rdmavt to call in to. We already have one, so go ahead and push this into a data structure designated for driver supplied functions. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 9baa7f04e8d0..e0beedc6110e 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -383,6 +383,19 @@ struct rvt_driver_params { */ }; +/* + * Functions that drivers are required to support + */ +struct rvt_driver_provided { + /* + * The work to create port files in /sys/class Infiniband is different + * depending on the driver. This should not be extracted away and + * instead drivers are responsible for setting the correct callback for + * this. + */ + int (*port_callback)(struct ib_device *, u8, struct kobject *); +}; + /* Protection domain */ struct rvt_pd { struct ib_pd ibpd; @@ -407,13 +420,8 @@ struct rvt_dev_info { /* PKey Table goes here */ - /* - * The work to create port files in /sys/class Infiniband is different - * depending on the driver. This should not be extracted away and - * instead drivers are responsible for setting the correct callback for - * this. - */ - int (*port_callback)(struct ib_device *, u8, struct kobject *); + /* Driver specific helper functions */ + struct rvt_driver_provided driver_f; /* Internal use */ int n_pds_allocated; -- cgit v1.2.3 From b534875d5ab348fb9193692589e2ee82ae768e3a Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:02:59 -0800 Subject: IB/rdmavt: Add device specific info prints Follow hfi1's example for printing information about the driver and incorporate into rdmavt. This requires two new functions to be provided by the driver, one to get_card_name and one to get_pci_dev. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index e0beedc6110e..4b83770bc312 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -386,6 +386,7 @@ struct rvt_driver_params { /* * Functions that drivers are required to support */ +struct rvt_dev_info; struct rvt_driver_provided { /* * The work to create port files in /sys/class Infiniband is different @@ -394,6 +395,8 @@ struct rvt_driver_provided { * this. */ int (*port_callback)(struct ib_device *, u8, struct kobject *); + const char * (*get_card_name)(struct rvt_dev_info *rdi); + struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); }; /* Protection domain */ -- cgit v1.2.3 From 0b8a8aae02abfbd724186cffe400fbdbf0cb41d6 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:03:07 -0800 Subject: IB/rdmavt: Add the start of capability flags Drivers will need a set of flags to dictate behavior to rdmavt. This patch adds a placeholder and a spot for it to live, as well as a few flags that will be used. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4b83770bc312..b44ac176217b 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,6 +55,16 @@ #include "ib_verbs.h" +/* + * For some of the IBTA objects there will likely be some + * initializations required. We need flags to determine whether it is OK + * for rdmavt to do this or not. This does not imply any functions of a + * partiuclar IBTA object are overridden. + */ +#define RVT_FLAG_MR_INIT_DRIVER BIT(1) +#define RVT_FLAG_QP_INIT_DRIVER BIT(2) +#define RVT_FLAG_CQ_INIT_DRIVER BIT(3) + /* * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once * drivers no longer need access to the MR directly. @@ -429,6 +439,8 @@ struct rvt_dev_info { /* Internal use */ int n_pds_allocated; spinlock_t n_pds_lock; /* Protect pd allocated count */ + + int flags; }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) -- cgit v1.2.3 From 7b1e2099adc8e66f78fee2dd2f10cb8a11362083 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:03:31 -0800 Subject: IB/rdmavt: Move memory registration into rdmavt Use the memory registration routines in hfi1 and move them to rdmavt. A follow on patch will address removing the duplicated code in the hfi1 and qib drivers. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index b44ac176217b..9a479575078f 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -391,6 +391,7 @@ struct rvt_driver_params { * Anything driver specific that is not covered by props * For instance special module parameters. Goes here. */ + unsigned int lkey_table_size; }; /* @@ -416,6 +417,8 @@ struct rvt_pd { }; struct rvt_dev_info { + struct ib_device ibdev; /* Keep this first. Nothing above here */ + /* * Prior to calling for registration the driver will be responsible for * allocating space for this structure. @@ -423,7 +426,6 @@ struct rvt_dev_info { * The driver will also be responsible for filling in certain members of * dparms.props */ - struct ib_device ibdev; /* Driver specific properties */ struct rvt_driver_params dparms; @@ -453,7 +455,22 @@ static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) return container_of(ibdev, struct rvt_dev_info, ibdev); } +static inline void rvt_put_mr(struct rvt_mregion *mr) +{ + if (unlikely(atomic_dec_and_test(&mr->refcount))) + complete(&mr->comp); +} + +static inline void rvt_get_mr(struct rvt_mregion *mr) +{ + atomic_inc(&mr->refcount); +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); +int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, + u32 len, u64 vaddr, u32 rkey, int acc); +int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, + struct rvt_sge *isge, struct ib_sge *sge, int acc); #endif /* DEF_RDMA_VT_H */ -- cgit v1.2.3 From f2f342115ef2b0755abd73573831351e371f6242 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 6 Jan 2016 10:03:47 -0800 Subject: IB/rdmavt: Add common LID defines to rdmavt Original patch is from Kamal Heib . It has been split into separate patches. This patch adds RVT_PERMISSIVE_LID and RVT_MULTICAST_LID_BASE to rdmavt. Reviewed-by: Ira Weiny Signed-off-by: Kamal Heib Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 9a479575078f..dbb45bcd1fea 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,6 +55,9 @@ #include "ib_verbs.h" +#define RVT_MULTICAST_LID_BASE 0xC000 +#define RVT_PERMISSIVE_LID 0xFFFF + /* * For some of the IBTA objects there will likely be some * initializations required. We need flags to determine whether it is OK -- cgit v1.2.3 From 119a8e708d16d38eedfa3d920b89b709dda41a8f Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 6 Jan 2016 10:03:59 -0800 Subject: IB/rdmavt: Add AH to rdmavt Original patch is from Kamal Heib . It has been split into three separate patches. This one for rdmavt, a follow on for qib, and one for hfi1. Create datastructure for address handle and implement the create/destroy/modify/query of address handle for rdmavt. Reviewed-by: Ira Weiny Signed-off-by: Kamal Heib Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index dbb45bcd1fea..36cced63af77 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -411,6 +411,7 @@ struct rvt_driver_provided { int (*port_callback)(struct ib_device *, u8, struct kobject *); const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); + int (*check_ah)(struct ib_device *, struct ib_ah_attr *); }; /* Protection domain */ @@ -419,6 +420,13 @@ struct rvt_pd { int user; /* non-zero if created from user space */ }; +/* Address handle */ +struct rvt_ah { + struct ib_ah ibah; + struct ib_ah_attr attr; + atomic_t refcount; +}; + struct rvt_dev_info { struct ib_device ibdev; /* Keep this first. Nothing above here */ @@ -445,6 +453,9 @@ struct rvt_dev_info { int n_pds_allocated; spinlock_t n_pds_lock; /* Protect pd allocated count */ + int n_ahs_allocated; + spinlock_t n_ahs_lock; /* Protect ah allocated count */ + int flags; }; @@ -453,6 +464,11 @@ static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) return container_of(ibpd, struct rvt_pd, ibpd); } +static inline struct rvt_ah *ibah_to_rvtah(struct ib_ah *ibah) +{ + return container_of(ibah, struct rvt_ah, ibah); +} + static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) { return container_of(ibdev, struct rvt_dev_info, ibdev); @@ -471,6 +487,7 @@ static inline void rvt_get_mr(struct rvt_mregion *mr) int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); +int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, -- cgit v1.2.3 From 70a1a351626073123ab79de24119977c4a297fdf Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:06 -0800 Subject: IB/rdmavt: Move SRQ data structure into rdmavt Patch moves the srq data structure into rdmavt in preparation for removal from qib and hfi1 which will follow in subsequent patches. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 36cced63af77..fcf3ec05da70 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -349,6 +349,14 @@ struct rvt_qp { ____cacheline_aligned_in_smp; }; +struct rvt_srq { + struct ib_srq ibsrq; + struct rvt_rq rq; + struct rvt_mmap_info *ip; + /* send signal when number of RWQEs < limit */ + u32 limit; +}; + /* End QP section */ /* @@ -485,6 +493,11 @@ static inline void rvt_get_mr(struct rvt_mregion *mr) atomic_inc(&mr->refcount); } +static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct rvt_srq, ibsrq); +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); -- cgit v1.2.3 From f3d01bbcdc47a728336008a9254732c1652aeddd Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:13 -0800 Subject: IB/rdmavt: Add an ibport data structure to rdmavt Converge the ibport data structures of qib and hfi1 into a common ib port structure. Also provides a place to keep track of these ports in case rdmavt needs it. Along with this goes an attach and detach function for drivers to use to notify rdmavt of the ports. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 66 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index fcf3ec05da70..a3d6a5bd0c02 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -53,6 +53,8 @@ * rdmavt layer. */ +#include +#include #include "ib_verbs.h" #define RVT_MULTICAST_LID_BASE 0xC000 @@ -359,6 +361,65 @@ struct rvt_srq { /* End QP section */ +struct rvt_ibport { + struct rvt_qp __rcu *qp[2]; + struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ + struct rb_root mcast_tree; + spinlock_t lock; /* protect changes in this struct */ + + /* non-zero when timer is set */ + unsigned long mkey_lease_timeout; + unsigned long trap_timeout; + __be64 gid_prefix; /* in network order */ + __be64 mkey; + u64 tid; + u32 port_cap_flags; + u32 pma_sample_start; + u32 pma_sample_interval; + __be16 pma_counter_select[5]; + u16 pma_tag; + u16 mkey_lease_period; + u16 sm_lid; + u8 sm_sl; + u8 mkeyprot; + u8 subnet_timeout; + u8 vl_high_limit; + + /* + * Driver is expected to keep these up to date. These + * counters are informational only and not required to be + * completely accurate. + */ + u64 n_rc_resends; + u64 n_seq_naks; + u64 n_rdma_seq; + u64 n_rnr_naks; + u64 n_other_naks; + u64 n_loop_pkts; + u64 n_pkt_drops; + u64 n_vl15_dropped; + u64 n_rc_timeouts; + u64 n_dmawait; + u64 n_unaligned; + u64 n_rc_dupreq; + u64 n_rc_seqnak; + u16 pkey_violations; + u16 qkey_violations; + u16 mkey_violations; + + /* Hot-path per CPU counters to avoid cacheline trading to update */ + u64 z_rc_acks; + u64 z_rc_qacks; + u64 z_rc_delayed_comp; + u64 __percpu *rc_acks; + u64 __percpu *rc_qacks; + u64 __percpu *rc_delayed_comp; + + void *priv; /* driver private data */ + + /* TODO: Move sm_ah and smi_ah into here as well*/ +}; + /* * Things that are driver specific, module parameters in hfi1 and qib */ @@ -403,6 +464,7 @@ struct rvt_driver_params { * For instance special module parameters. Goes here. */ unsigned int lkey_table_size; + int nports; }; /* @@ -465,6 +527,7 @@ struct rvt_dev_info { spinlock_t n_ahs_lock; /* Protect ah allocated count */ int flags; + struct rvt_ibport **ports; }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) @@ -501,9 +564,10 @@ static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); +void rvt_attach_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, + int portnum); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_sge *isge, struct ib_sge *sge, int acc); - #endif /* DEF_RDMA_VT_H */ -- cgit v1.2.3 From b036db83c0ec8d1e81df19410a494be4cfe0b186 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:23 -0800 Subject: IB/rdmavt: Add driver notification for new AH Drivers may need to do some work once an address handle has been created. Add a driver function for this purpose. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index a3d6a5bd0c02..ef66d2b0ec37 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -467,9 +467,21 @@ struct rvt_driver_params { int nports; }; -/* - * Functions that drivers are required to support - */ +/* Protection domain */ +struct rvt_pd { + struct ib_pd ibpd; + int user; /* non-zero if created from user space */ +}; + +/* Address handle */ +struct rvt_ah { + struct ib_ah ibah; + struct ib_ah_attr attr; + atomic_t refcount; + u8 vl; + u8 log_pmtu; +}; + struct rvt_dev_info; struct rvt_driver_provided { /* @@ -478,23 +490,20 @@ struct rvt_driver_provided { * instead drivers are responsible for setting the correct callback for * this. */ + + /* -------------------*/ + /* Required functions */ + /* -------------------*/ int (*port_callback)(struct ib_device *, u8, struct kobject *); const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); - int (*check_ah)(struct ib_device *, struct ib_ah_attr *); -}; -/* Protection domain */ -struct rvt_pd { - struct ib_pd ibpd; - int user; /* non-zero if created from user space */ -}; - -/* Address handle */ -struct rvt_ah { - struct ib_ah ibah; - struct ib_ah_attr attr; - atomic_t refcount; + /*--------------------*/ + /* Optional functions */ + /*--------------------*/ + int (*check_ah)(struct ib_device *, struct ib_ah_attr *); + void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, + struct rvt_ah *); }; struct rvt_dev_info { -- cgit v1.2.3 From b4e64397dabc946b83ffb1defa1215ede84c3b97 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:31 -0800 Subject: IB/rdmavt: Break rdma_vt main include header file up Until all functionality is moved over to rdmavt drivers still need to access a number of fields in data structures that are predominantly meant to be used by rdmavt. Once these rdmavt_.h header files are no longer being touched by drivers their content should be moved to rdmavt/.h. While here move a couple #defines over to more general IB verbs header files because they fit better. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 1 + include/rdma/rdma_vt.h | 309 +---------------------------------------------- include/rdma/rdmavt_mr.h | 130 ++++++++++++++++++++ include/rdma/rdmavt_qp.h | 262 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 396 insertions(+), 306 deletions(-) create mode 100644 include/rdma/rdmavt_mr.h create mode 100644 include/rdma/rdmavt_qp.h (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 284b00c8fea4..d7d531cf00b7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -613,6 +613,7 @@ enum { }; #define IB_LID_PERMISSIVE cpu_to_be16(0xFFFF) +#define IB_MULTICAST_LID_BASE cpu_to_be16(0xC000) enum ib_ah_flags { IB_AH_GRH = 1 diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index ef66d2b0ec37..79da8ee3e2b3 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,10 +55,9 @@ #include #include -#include "ib_verbs.h" - -#define RVT_MULTICAST_LID_BASE 0xC000 -#define RVT_PERMISSIVE_LID 0xFFFF +#include +#include +#include /* * For some of the IBTA objects there will likely be some @@ -70,297 +69,6 @@ #define RVT_FLAG_QP_INIT_DRIVER BIT(2) #define RVT_FLAG_CQ_INIT_DRIVER BIT(3) -/* - * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once - * drivers no longer need access to the MR directly. - */ - -/* - * A segment is a linear region of low physical memory. - * Used by the verbs layer. - */ -struct rvt_seg { - void *vaddr; - size_t length; -}; - -/* The number of rvt_segs that fit in a page. */ -#define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg)) - -struct rvt_segarray { - struct rvt_seg segs[RVT_SEGSZ]; -}; - -struct rvt_mregion { - struct ib_pd *pd; /* shares refcnt of ibmr.pd */ - u64 user_base; /* User's address for this region */ - u64 iova; /* IB start address of this region */ - size_t length; - u32 lkey; - u32 offset; /* offset (bytes) to start of region */ - int access_flags; - u32 max_segs; /* number of rvt_segs in all the arrays */ - u32 mapsz; /* size of the map array */ - u8 page_shift; /* 0 - non unform/non powerof2 sizes */ - u8 lkey_published; /* in global table */ - struct completion comp; /* complete when refcount goes to zero */ - atomic_t refcount; - struct rvt_segarray *map[0]; /* the segments */ -}; - -#define RVT_MAX_LKEY_TABLE_BITS 23 - -struct rvt_lkey_table { - spinlock_t lock; /* protect changes in this struct */ - u32 next; /* next unused index (speeds search) */ - u32 gen; /* generation count */ - u32 max; /* size of the table */ - struct rvt_mregion __rcu **table; -}; - -/* End Memmory Region */ - -/* - * Things needed for the Queue Pair definition. Like the MR stuff above the - * following should probably get moved to qp.h once drivers stop trying to make - * and manipulate thier own QPs. For the few instnaces where a driver may need - * to look into a queue pair there should be a pointer to a driver priavte data - * structure that they can look at. - */ - -/* - * These keep track of the copy progress within a memory region. - * Used by the verbs layer. - */ -struct rvt_sge { - struct rvt_mregion *mr; - void *vaddr; /* kernel virtual address of segment */ - u32 sge_length; /* length of the SGE */ - u32 length; /* remaining length of the segment */ - u16 m; /* current index: mr->map[m] */ - u16 n; /* current index: mr->map[m]->segs[n] */ -}; - -/* - * Send work request queue entry. - * The size of the sg_list is determined when the QP is created and stored - * in qp->s_max_sge. - */ -struct rvt_swqe { - union { - struct ib_send_wr wr; /* don't use wr.sg_list */ - struct ib_ud_wr ud_wr; - struct ib_reg_wr reg_wr; - struct ib_rdma_wr rdma_wr; - struct ib_atomic_wr atomic_wr; - }; - u32 psn; /* first packet sequence number */ - u32 lpsn; /* last packet sequence number */ - u32 ssn; /* send sequence number */ - u32 length; /* total length of data in sg_list */ - struct rvt_sge sg_list[0]; -}; - -/* - * Receive work request queue entry. - * The size of the sg_list is determined when the QP (or SRQ) is created - * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). - */ -struct rvt_rwqe { - u64 wr_id; - u8 num_sge; - struct ib_sge sg_list[0]; -}; - -/* - * This structure is used to contain the head pointer, tail pointer, - * and receive work queue entries as a single memory allocation so - * it can be mmap'ed into user space. - * Note that the wq array elements are variable size so you can't - * just index into the array to get the N'th element; - * use get_rwqe_ptr() instead. - */ -struct rvt_rwq { - u32 head; /* new work requests posted to the head */ - u32 tail; /* receives pull requests from here. */ - struct rvt_rwqe wq[0]; -}; - -struct rvt_rq { - struct rvt_rwq *wq; - u32 size; /* size of RWQE array */ - u8 max_sge; - /* protect changes in this struct */ - spinlock_t lock ____cacheline_aligned_in_smp; -}; - -/* - * This structure is used by rvt_mmap() to validate an offset - * when an mmap() request is made. The vm_area_struct then uses - * this as its vm_private_data. - */ -struct rvt_mmap_info { - struct list_head pending_mmaps; - struct ib_ucontext *context; - void *obj; - __u64 offset; - struct kref ref; - unsigned size; -}; - -#define RVT_MAX_RDMA_ATOMIC 16 - -/* - * This structure holds the information that the send tasklet needs - * to send a RDMA read response or atomic operation. - */ -struct rvt_ack_entry { - u8 opcode; - u8 sent; - u32 psn; - u32 lpsn; - union { - struct rvt_sge rdma_sge; - u64 atomic_data; - }; -}; - -struct rvt_sge_state { - struct rvt_sge *sg_list; /* next SGE to be used if any */ - struct rvt_sge sge; /* progress state for the current SGE */ - u32 total_len; - u8 num_sge; -}; - -/* - * Variables prefixed with s_ are for the requester (sender). - * Variables prefixed with r_ are for the responder (receiver). - * Variables prefixed with ack_ are for responder replies. - * - * Common variables are protected by both r_rq.lock and s_lock in that order - * which only happens in modify_qp() or changing the QP 'state'. - */ -struct rvt_qp { - struct ib_qp ibqp; - void *priv; /* Driver private data */ - /* read mostly fields above and below */ - struct ib_ah_attr remote_ah_attr; - struct ib_ah_attr alt_ah_attr; - struct rvt_qp __rcu *next; /* link list for QPN hash table */ - struct rvt_swqe *s_wq; /* send work queue */ - struct rvt_mmap_info *ip; - - unsigned long timeout_jiffies; /* computed from timeout */ - - enum ib_mtu path_mtu; - int srate_mbps; /* s_srate (below) converted to Mbit/s */ - u32 remote_qpn; - u32 pmtu; /* decoded from path_mtu */ - u32 qkey; /* QKEY for this QP (for UD or RD) */ - u32 s_size; /* send work queue size */ - u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ - u32 s_ahgpsn; /* set to the psn in the copy of the header */ - - u8 state; /* QP state */ - u8 allowed_ops; /* high order bits of allowed opcodes */ - u8 qp_access_flags; - u8 alt_timeout; /* Alternate path timeout for this QP */ - u8 timeout; /* Timeout for this QP */ - u8 s_srate; - u8 s_mig_state; - u8 port_num; - u8 s_pkey_index; /* PKEY index to use */ - u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ - u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ - u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ - u8 s_retry_cnt; /* number of times to retry */ - u8 s_rnr_retry_cnt; - u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ - u8 s_max_sge; /* size of s_wq->sg_list */ - u8 s_draining; - - /* start of read/write fields */ - atomic_t refcount ____cacheline_aligned_in_smp; - wait_queue_head_t wait; - - struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] - ____cacheline_aligned_in_smp; - struct rvt_sge_state s_rdma_read_sge; - - spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ - unsigned long r_aflags; - u64 r_wr_id; /* ID for current receive WQE */ - u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ - u32 r_len; /* total length of r_sge */ - u32 r_rcv_len; /* receive data len processed */ - u32 r_psn; /* expected rcv packet sequence number */ - u32 r_msn; /* message sequence number */ - - u8 r_state; /* opcode of last packet received */ - u8 r_flags; - u8 r_head_ack_queue; /* index into s_ack_queue[] */ - - struct list_head rspwait; /* link for waiting to respond */ - - struct rvt_sge_state r_sge; /* current receive data */ - struct rvt_rq r_rq; /* receive work queue */ - - spinlock_t s_lock ____cacheline_aligned_in_smp; - struct rvt_sge_state *s_cur_sge; - u32 s_flags; - struct rvt_swqe *s_wqe; - struct rvt_sge_state s_sge; /* current send request data */ - struct rvt_mregion *s_rdma_mr; - struct sdma_engine *s_sde; /* current sde */ - u32 s_cur_size; /* size of send packet in bytes */ - u32 s_len; /* total length of s_sge */ - u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ - u32 s_next_psn; /* PSN for next request */ - u32 s_last_psn; /* last response PSN processed */ - u32 s_sending_psn; /* lowest PSN that is being sent */ - u32 s_sending_hpsn; /* highest PSN that is being sent */ - u32 s_psn; /* current packet sequence number */ - u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ - u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ - u32 s_head; /* new entries added here */ - u32 s_tail; /* next entry to process */ - u32 s_cur; /* current work queue entry */ - u32 s_acked; /* last un-ACK'ed entry */ - u32 s_last; /* last completed entry */ - u32 s_ssn; /* SSN of tail entry */ - u32 s_lsn; /* limit sequence number (credit) */ - u16 s_hdrwords; /* size of s_hdr in 32 bit words */ - u16 s_rdma_ack_cnt; - s8 s_ahgidx; - u8 s_state; /* opcode of last packet sent */ - u8 s_ack_state; /* opcode of packet to ACK */ - u8 s_nak_state; /* non-zero if NAK is pending */ - u8 r_nak_state; /* non-zero if NAK is pending */ - u8 s_retry; /* requester retry counter */ - u8 s_rnr_retry; /* requester RNR retry counter */ - u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ - u8 s_tail_ack_queue; /* index into s_ack_queue[] */ - - struct rvt_sge_state s_ack_rdma_sge; - struct timer_list s_timer; - - /* - * This sge list MUST be last. Do not add anything below here. - */ - struct rvt_sge r_sg_list[0] /* verified SGEs */ - ____cacheline_aligned_in_smp; -}; - -struct rvt_srq { - struct ib_srq ibsrq; - struct rvt_rq rq; - struct rvt_mmap_info *ip; - /* send signal when number of RWQEs < limit */ - u32 limit; -}; - -/* End QP section */ - struct rvt_ibport { struct rvt_qp __rcu *qp[2]; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ @@ -554,17 +262,6 @@ static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) return container_of(ibdev, struct rvt_dev_info, ibdev); } -static inline void rvt_put_mr(struct rvt_mregion *mr) -{ - if (unlikely(atomic_dec_and_test(&mr->refcount))) - complete(&mr->comp); -} - -static inline void rvt_get_mr(struct rvt_mregion *mr) -{ - atomic_inc(&mr->refcount); -} - static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) { return container_of(ibsrq, struct rvt_srq, ibsrq); diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h new file mode 100644 index 000000000000..ea60476c6b6b --- /dev/null +++ b/include/rdma/rdmavt_mr.h @@ -0,0 +1,130 @@ +#ifndef DEF_RDMAVT_INCMR_H +#define DEF_RDMAVT_INCMR_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once + * drivers no longer need access to the MR directly. + */ + +/* + * A segment is a linear region of low physical memory. + * Used by the verbs layer. + */ +struct rvt_seg { + void *vaddr; + size_t length; +}; + +/* The number of rvt_segs that fit in a page. */ +#define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg)) + +struct rvt_segarray { + struct rvt_seg segs[RVT_SEGSZ]; +}; + +struct rvt_mregion { + struct ib_pd *pd; /* shares refcnt of ibmr.pd */ + u64 user_base; /* User's address for this region */ + u64 iova; /* IB start address of this region */ + size_t length; + u32 lkey; + u32 offset; /* offset (bytes) to start of region */ + int access_flags; + u32 max_segs; /* number of rvt_segs in all the arrays */ + u32 mapsz; /* size of the map array */ + u8 page_shift; /* 0 - non unform/non powerof2 sizes */ + u8 lkey_published; /* in global table */ + struct completion comp; /* complete when refcount goes to zero */ + atomic_t refcount; + struct rvt_segarray *map[0]; /* the segments */ +}; + +#define RVT_MAX_LKEY_TABLE_BITS 23 + +struct rvt_lkey_table { + spinlock_t lock; /* protect changes in this struct */ + u32 next; /* next unused index (speeds search) */ + u32 gen; /* generation count */ + u32 max; /* size of the table */ + struct rvt_mregion __rcu **table; +}; + +/* + * These keep track of the copy progress within a memory region. + * Used by the verbs layer. + */ +struct rvt_sge { + struct rvt_mregion *mr; + void *vaddr; /* kernel virtual address of segment */ + u32 sge_length; /* length of the SGE */ + u32 length; /* remaining length of the segment */ + u16 m; /* current index: mr->map[m] */ + u16 n; /* current index: mr->map[m]->segs[n] */ +}; + +struct rvt_sge_state { + struct rvt_sge *sg_list; /* next SGE to be used if any */ + struct rvt_sge sge; /* progress state for the current SGE */ + u32 total_len; + u8 num_sge; +}; + +static inline void rvt_put_mr(struct rvt_mregion *mr) +{ + if (unlikely(atomic_dec_and_test(&mr->refcount))) + complete(&mr->comp); +} + +static inline void rvt_get_mr(struct rvt_mregion *mr) +{ + atomic_inc(&mr->refcount); +} + +#endif /* DEF_RDMAVT_INCMRH */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h new file mode 100644 index 000000000000..f33fbb0b3824 --- /dev/null +++ b/include/rdma/rdmavt_qp.h @@ -0,0 +1,262 @@ +#ifndef DEF_RDMAVT_INCQP_H +#define DEF_RDMAVT_INCQP_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Send work request queue entry. + * The size of the sg_list is determined when the QP is created and stored + * in qp->s_max_sge. + */ +struct rvt_swqe { + union { + struct ib_send_wr wr; /* don't use wr.sg_list */ + struct ib_ud_wr ud_wr; + struct ib_reg_wr reg_wr; + struct ib_rdma_wr rdma_wr; + struct ib_atomic_wr atomic_wr; + }; + u32 psn; /* first packet sequence number */ + u32 lpsn; /* last packet sequence number */ + u32 ssn; /* send sequence number */ + u32 length; /* total length of data in sg_list */ + struct rvt_sge sg_list[0]; +}; + +/* + * Receive work request queue entry. + * The size of the sg_list is determined when the QP (or SRQ) is created + * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). + */ +struct rvt_rwqe { + u64 wr_id; + u8 num_sge; + struct ib_sge sg_list[0]; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and receive work queue entries as a single memory allocation so + * it can be mmap'ed into user space. + * Note that the wq array elements are variable size so you can't + * just index into the array to get the N'th element; + * use get_rwqe_ptr() instead. + */ +struct rvt_rwq { + u32 head; /* new work requests posted to the head */ + u32 tail; /* receives pull requests from here. */ + struct rvt_rwqe wq[0]; +}; + +struct rvt_rq { + struct rvt_rwq *wq; + u32 size; /* size of RWQE array */ + u8 max_sge; + /* protect changes in this struct */ + spinlock_t lock ____cacheline_aligned_in_smp; +}; + +/* + * This structure is used by rvt_mmap() to validate an offset + * when an mmap() request is made. The vm_area_struct then uses + * this as its vm_private_data. + */ +struct rvt_mmap_info { + struct list_head pending_mmaps; + struct ib_ucontext *context; + void *obj; + __u64 offset; + struct kref ref; + unsigned size; +}; + +#define RVT_MAX_RDMA_ATOMIC 16 + +/* + * This structure holds the information that the send tasklet needs + * to send a RDMA read response or atomic operation. + */ +struct rvt_ack_entry { + u8 opcode; + u8 sent; + u32 psn; + u32 lpsn; + union { + struct rvt_sge rdma_sge; + u64 atomic_data; + }; +}; + +/* + * Variables prefixed with s_ are for the requester (sender). + * Variables prefixed with r_ are for the responder (receiver). + * Variables prefixed with ack_ are for responder replies. + * + * Common variables are protected by both r_rq.lock and s_lock in that order + * which only happens in modify_qp() or changing the QP 'state'. + */ +struct rvt_qp { + struct ib_qp ibqp; + void *priv; /* Driver private data */ + /* read mostly fields above and below */ + struct ib_ah_attr remote_ah_attr; + struct ib_ah_attr alt_ah_attr; + struct rvt_qp __rcu *next; /* link list for QPN hash table */ + struct rvt_swqe *s_wq; /* send work queue */ + struct rvt_mmap_info *ip; + + unsigned long timeout_jiffies; /* computed from timeout */ + + enum ib_mtu path_mtu; + int srate_mbps; /* s_srate (below) converted to Mbit/s */ + u32 remote_qpn; + u32 pmtu; /* decoded from path_mtu */ + u32 qkey; /* QKEY for this QP (for UD or RD) */ + u32 s_size; /* send work queue size */ + u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ + u32 s_ahgpsn; /* set to the psn in the copy of the header */ + + u8 state; /* QP state */ + u8 allowed_ops; /* high order bits of allowed opcodes */ + u8 qp_access_flags; + u8 alt_timeout; /* Alternate path timeout for this QP */ + u8 timeout; /* Timeout for this QP */ + u8 s_srate; + u8 s_mig_state; + u8 port_num; + u8 s_pkey_index; /* PKEY index to use */ + u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ + u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ + u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ + u8 s_retry_cnt; /* number of times to retry */ + u8 s_rnr_retry_cnt; + u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ + u8 s_max_sge; /* size of s_wq->sg_list */ + u8 s_draining; + + /* start of read/write fields */ + atomic_t refcount ____cacheline_aligned_in_smp; + wait_queue_head_t wait; + + struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] + ____cacheline_aligned_in_smp; + struct rvt_sge_state s_rdma_read_sge; + + spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ + unsigned long r_aflags; + u64 r_wr_id; /* ID for current receive WQE */ + u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ + u32 r_len; /* total length of r_sge */ + u32 r_rcv_len; /* receive data len processed */ + u32 r_psn; /* expected rcv packet sequence number */ + u32 r_msn; /* message sequence number */ + + u8 r_state; /* opcode of last packet received */ + u8 r_flags; + u8 r_head_ack_queue; /* index into s_ack_queue[] */ + + struct list_head rspwait; /* link for waiting to respond */ + + struct rvt_sge_state r_sge; /* current receive data */ + struct rvt_rq r_rq; /* receive work queue */ + + spinlock_t s_lock ____cacheline_aligned_in_smp; + struct rvt_sge_state *s_cur_sge; + u32 s_flags; + struct rvt_swqe *s_wqe; + struct rvt_sge_state s_sge; /* current send request data */ + struct rvt_mregion *s_rdma_mr; + struct sdma_engine *s_sde; /* current sde */ + u32 s_cur_size; /* size of send packet in bytes */ + u32 s_len; /* total length of s_sge */ + u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ + u32 s_next_psn; /* PSN for next request */ + u32 s_last_psn; /* last response PSN processed */ + u32 s_sending_psn; /* lowest PSN that is being sent */ + u32 s_sending_hpsn; /* highest PSN that is being sent */ + u32 s_psn; /* current packet sequence number */ + u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ + u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ + u32 s_head; /* new entries added here */ + u32 s_tail; /* next entry to process */ + u32 s_cur; /* current work queue entry */ + u32 s_acked; /* last un-ACK'ed entry */ + u32 s_last; /* last completed entry */ + u32 s_ssn; /* SSN of tail entry */ + u32 s_lsn; /* limit sequence number (credit) */ + u16 s_hdrwords; /* size of s_hdr in 32 bit words */ + u16 s_rdma_ack_cnt; + s8 s_ahgidx; + u8 s_state; /* opcode of last packet sent */ + u8 s_ack_state; /* opcode of packet to ACK */ + u8 s_nak_state; /* non-zero if NAK is pending */ + u8 r_nak_state; /* non-zero if NAK is pending */ + u8 s_retry; /* requester retry counter */ + u8 s_rnr_retry; /* requester RNR retry counter */ + u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ + u8 s_tail_ack_queue; /* index into s_ack_queue[] */ + + struct rvt_sge_state s_ack_rdma_sge; + struct timer_list s_timer; + + /* + * This sge list MUST be last. Do not add anything below here. + */ + struct rvt_sge r_sg_list[0] /* verified SGEs */ + ____cacheline_aligned_in_smp; +}; + +struct rvt_srq { + struct ib_srq ibsrq; + struct rvt_rq rq; + struct rvt_mmap_info *ip; + /* send signal when number of RWQEs < limit */ + u32 limit; +}; + +#endif /* DEF_RDMAVT_INCQP_H */ -- cgit v1.2.3 From 0acb0cc7ecc1e4860b056368566c0c2c254ae281 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:46 -0800 Subject: IB/rdmavt: Initialize and teardown of qpn table Add table init as well as teardown for handling qpn maps. Drivers can still provide this functionality by setting the QP_INIT_DRIVER bit. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 9 +++++++++ include/rdma/rdmavt_qp.h | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 79da8ee3e2b3..950c2910e3f4 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -172,7 +172,13 @@ struct rvt_driver_params { * For instance special module parameters. Goes here. */ unsigned int lkey_table_size; + unsigned int qp_table_size; + int qpn_start; + int qpn_inc; + int qpn_res_start; + int qpn_res_end; int nports; + u8 qos_shift; }; /* Protection domain */ @@ -205,6 +211,7 @@ struct rvt_driver_provided { int (*port_callback)(struct ib_device *, u8, struct kobject *); const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); + void (*free_all_qps)(struct rvt_dev_info *rdi); /*--------------------*/ /* Optional functions */ @@ -245,6 +252,8 @@ struct rvt_dev_info { int flags; struct rvt_ibport **ports; + + struct rvt_qp_ibdev *qp_dev; }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f33fbb0b3824..e6a7d17dcd30 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -259,4 +259,37 @@ struct rvt_srq { u32 limit; }; +#define RVT_QPN_MAX BIT(24) +#define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) +#define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) +#define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) + +/* + * QPN-map pages start out as NULL, they get allocated upon + * first use and are never deallocated. This way, + * large bitmaps are not allocated unless large numbers of QPs are used. + */ +struct rvt_qpn_map { + void *page; +}; + +struct rvt_qpn_table { + spinlock_t lock; /* protect changes to the qp table */ + unsigned flags; /* flags for QP0/1 allocated for each port */ + u32 last; /* last QP number allocated */ + u32 nmaps; /* size of the map table */ + u16 limit; + u8 incr; + /* bit map of free QP numbers other than 0/1 */ + struct rvt_qpn_map map[RVT_QPNMAP_ENTRIES]; +}; + +struct rvt_qp_ibdev { + u32 qp_table_size; + u32 qp_table_bits; + struct rvt_qp __rcu **qp_table; + spinlock_t qpt_lock; /* qptable lock */ + struct rvt_qpn_table qpn_table; +}; + #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v1.2.3 From 822514d75a9647662fff39d728c1f4636b75d904 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:57 -0800 Subject: IB/rdmavt: Add mmap related functions The mmap data structure was moved in a previous commit. This patch now pulls in the related functions. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 950c2910e3f4..fd25d2309ee3 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -254,6 +254,12 @@ struct rvt_dev_info { struct rvt_ibport **ports; struct rvt_qp_ibdev *qp_dev; + + /* memory maps */ + struct list_head pending_mmaps; + spinlock_t mmap_offset_lock; /* protect mmap_offset */ + u32 mmap_offset; + spinlock_t pending_lock; /* protect pending mmap list */ }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) @@ -285,4 +291,13 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_sge *isge, struct ib_sge *sge, int acc); +int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); +void rvt_release_mmap_info(struct kref *ref); +struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, + u32 size, + struct ib_ucontext *context, + void *obj); +void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, + u32 size, void *obj); + #endif /* DEF_RDMA_VT_H */ -- cgit v1.2.3 From 38ce2c6f3ae8dda0ee42dc8474759ff949994bea Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:05:12 -0800 Subject: IB/rdmavt: Add pkey support Add pkey table in rdi per port data structure. Also bring in related pkey functions. Drivers will still be responsible for allocating and maintaining the pkey table. However they need to tell rdmavt where to find the pkey table. We can not move the pkey table up into rdmavt because drivers need to manipulate this long before registering with it. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index fd25d2309ee3..3a78f20cbf2d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -69,6 +69,8 @@ #define RVT_FLAG_QP_INIT_DRIVER BIT(2) #define RVT_FLAG_CQ_INIT_DRIVER BIT(3) +#define RVT_MAX_PKEY_VALUES 16 + struct rvt_ibport { struct rvt_qp __rcu *qp[2]; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ @@ -125,6 +127,14 @@ struct rvt_ibport { void *priv; /* driver private data */ + /* + * The pkey table is allocated and maintained by the driver. Drivers + * need to have access to this before registering with rdmav. However + * rdmavt will need access to it so drivers need to proviee this during + * the attach port API call. + */ + u16 *pkey_table; + /* TODO: Move sm_ah and smi_ah into here as well*/ }; @@ -178,6 +188,7 @@ struct rvt_driver_params { int qpn_res_start; int qpn_res_end; int nports; + int npkeys; u8 qos_shift; }; @@ -238,8 +249,6 @@ struct rvt_dev_info { struct rvt_mregion __rcu *dma_mr; struct rvt_lkey_table lkey_table; - /* PKey Table goes here */ - /* Driver specific helper functions */ struct rvt_driver_provided driver_f; @@ -282,11 +291,32 @@ static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) return container_of(ibsrq, struct rvt_srq, ibsrq); } +static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) +{ + /* + * All ports have same number of pkeys. + */ + return rdi->dparms.npkeys; +} + +/* + * Return the indexed PKEY from the port PKEY table. + */ +static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, + int port_index, + unsigned index) +{ + if (index >= rvt_get_npkeys(rdi)) + return 0; + else + return rdi->ports[port_index]->pkey_table[index]; +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); -void rvt_attach_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, - int portnum); +int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, + int portnum, u16 *pkey_table); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, -- cgit v1.2.3 From 050eb7fbe0ff2bcd95833ff180337116d5907483 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:50:11 -0800 Subject: IB/rdmavt: Add R and S flags for queue pairs Use the flags originally provided for hfi1 in the rdmavt driver. These will be made available to drivers in the qp header file. Reviewed-by: Harish Chegondi Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdmavt_qp.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index e6a7d17dcd30..1aa8b5b40f9f 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -48,6 +48,86 @@ * */ +#include +/* + * Atomic bit definitions for r_aflags. + */ +#define RVT_R_WRID_VALID 0 +#define RVT_R_REWIND_SGE 1 + +/* + * Bit definitions for r_flags. + */ +#define RVT_R_REUSE_SGE 0x01 +#define RVT_R_RDMAR_SEQ 0x02 +#define RVT_R_RSP_NAK 0x04 +#define RVT_R_RSP_SEND 0x08 +#define RVT_R_COMM_EST 0x10 + +/* + * Bit definitions for s_flags. + * + * RVT_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled + * RVT_S_BUSY - send tasklet is processing the QP + * RVT_S_TIMER - the RC retry timer is active + * RVT_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics + * RVT_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs + * before processing the next SWQE + * RVT_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete + * before processing the next SWQE + * RVT_S_WAIT_RNR - waiting for RNR timeout + * RVT_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE + * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating + * next send completion entry not via send DMA + * RVT_S_WAIT_PIO - waiting for a send buffer to be available + * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available + * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available + * RVT_S_WAIT_KMEM - waiting for kernel memory to be available + * RVT_S_WAIT_PSN - waiting for a packet to exit the send DMA queue + * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests + * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK + * RVT_S_ECN - a BECN was queued to the send engine + */ +#define RVT_S_SIGNAL_REQ_WR 0x0001 +#define RVT_S_BUSY 0x0002 +#define RVT_S_TIMER 0x0004 +#define RVT_S_RESP_PENDING 0x0008 +#define RVT_S_ACK_PENDING 0x0010 +#define RVT_S_WAIT_FENCE 0x0020 +#define RVT_S_WAIT_RDMAR 0x0040 +#define RVT_S_WAIT_RNR 0x0080 +#define RVT_S_WAIT_SSN_CREDIT 0x0100 +#define RVT_S_WAIT_DMA 0x0200 +#define RVT_S_WAIT_PIO 0x0400 +#define RVT_S_WAIT_TX 0x0800 +#define RVT_S_WAIT_DMA_DESC 0x1000 +#define RVT_S_WAIT_KMEM 0x2000 +#define RVT_S_WAIT_PSN 0x4000 +#define RVT_S_WAIT_ACK 0x8000 +#define RVT_S_SEND_ONE 0x10000 +#define RVT_S_UNLIMITED_CREDIT 0x20000 +#define RVT_S_AHG_VALID 0x40000 +#define RVT_S_AHG_CLEAR 0x80000 +#define RVT_S_ECN 0x100000 + +/* + * Wait flags that would prevent any packet type from being sent. + */ +#define RVT_S_ANY_WAIT_IO (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \ + RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM) + +/* + * Wait flags that would prevent send work requests from making progress. + */ +#define RVT_S_ANY_WAIT_SEND (RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR | \ + RVT_S_WAIT_RNR | RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA | \ + RVT_S_WAIT_PSN | RVT_S_WAIT_ACK) + +#define RVT_S_ANY_WAIT (RVT_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND) + +/* Number of bits to pay attention to in the opcode for checking qp type */ +#define RVT_OPCODE_QP_MASK 0xE0 + /* * Send work request queue entry. * The size of the sg_list is determined when the QP is created and stored -- cgit v1.2.3 From 515667f8f8b48bdbcad61c5681291cb970e36ac3 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:50:17 -0800 Subject: IB/rdmavt: Add create queue pair functionality Add create queue pair verbs call as well as supporting functions. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 3a78f20cbf2d..3bdeac7b9a48 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -222,7 +222,10 @@ struct rvt_driver_provided { int (*port_callback)(struct ib_device *, u8, struct kobject *); const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); - void (*free_all_qps)(struct rvt_dev_info *rdi); + unsigned (*free_all_qps)(struct rvt_dev_info *rdi); + void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp); + void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); + void (*notify_qp_reset)(struct rvt_qp *qp); /*--------------------*/ /* Optional functions */ @@ -230,6 +233,8 @@ struct rvt_driver_provided { int (*check_ah)(struct ib_device *, struct ib_ah_attr *); void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, struct rvt_ah *); + int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, + enum ib_qp_type type, u8 port); }; struct rvt_dev_info { @@ -262,7 +267,10 @@ struct rvt_dev_info { int flags; struct rvt_ibport **ports; + /* QP */ struct rvt_qp_ibdev *qp_dev; + u32 n_qps_allocated; /* number of QPs allocated for device */ + spinlock_t n_qps_lock; /* keep track of number of qps */ /* memory maps */ struct list_head pending_mmaps; -- cgit v1.2.3 From 5a9cf6f27e36ece71cc8a192a4ca39b62a460807 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:50:24 -0800 Subject: IB/rdmavt: Export reset_qp in rdmavt Until all queue pair functionality is moved to rdmavt we need to provide access to the reset function. This is only temporary and will be reverted back to a static, non exported function in the end. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 3 +++ include/rdma/rdmavt_qp.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 3bdeac7b9a48..e412e670e687 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -338,4 +338,7 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, u32 size, void *obj); +/* Temporary export */ +void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type); #endif /* DEF_RDMA_VT_H */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 1aa8b5b40f9f..bce0a03a7c07 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -48,6 +48,7 @@ * */ +#include #include /* * Atomic bit definitions for r_aflags. -- cgit v1.2.3 From feaeb6e26fd15f6531e28f2900e0b59705bfc3d4 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 12:50:36 -0800 Subject: IB/rdmavt: Add support for rvt_query_device function With this commit, the drivers using rdmavt need not define query_device function. But they should fill in the IB device attributes structure rvt_dev_info.dparms.props Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index e412e670e687..725778a6781d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -245,7 +245,10 @@ struct rvt_dev_info { * allocating space for this structure. * * The driver will also be responsible for filling in certain members of - * dparms.props + * dparms.props. The driver needs to fill in dparms exactly as it would + * want values reported to a ULP. This will be returned to the caller + * in rdmavt's device. The driver should also therefore refrain from + * modifying this directly after registration with rdmavt. */ /* Driver specific properties */ -- cgit v1.2.3 From d2b8d4da1ca5052b72e043d2ce68157abf3f2d24 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 22 Jan 2016 12:50:43 -0800 Subject: IB/rdmavt: Support creating qps with GFP_NOIO flag The current code is problematic when the QP creation and ipoib is used to support NFS and NFS desires to do IO for paging purposes. In that case, the GFP_KERNEL allocation within create_qp causes a deadlock in tight memory situations. This fix adds support to create queue pair with GFP_NOIO flag for connected mode only to cleanly fail the create queue pair in those situations. This was previously fixed in qib but needed to get ported to hfi1. This patch handles that for both hardwares in the new rdmavt common layer. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 725778a6781d..70a9596b859d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -223,7 +223,8 @@ struct rvt_driver_provided { const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); unsigned (*free_all_qps)(struct rvt_dev_info *rdi); - void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp); + void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp, + gfp_t gfp); void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); void (*notify_qp_reset)(struct rvt_qp *qp); @@ -234,7 +235,7 @@ struct rvt_driver_provided { void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, struct rvt_ah *); int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port); + enum ib_qp_type type, u8 port, gfp_t gfp); }; struct rvt_dev_info { -- cgit v1.2.3 From 6f6387ae75c1499b315ddcd3c74402d44423e1cc Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:15 -0800 Subject: IB/rdmavt: Add completion queue functions Brings in completion queue functionality. A kthread worker is added to the rvt_dev_info to serve as a worker for completion queues. Reviewed-by: Mike Marciniszyn Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 9 +++++ include/rdma/rdmavt_cq.h | 99 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 include/rdma/rdmavt_cq.h (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 70a9596b859d..79080e3b09f8 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -138,6 +138,8 @@ struct rvt_ibport { /* TODO: Move sm_ah and smi_ah into here as well*/ }; +#define RVT_CQN_MAX 16 /* maximum length of cq name */ + /* * Things that are driver specific, module parameters in hfi1 and qib */ @@ -190,6 +192,8 @@ struct rvt_driver_params { int nports; int npkeys; u8 qos_shift; + char cq_name[RVT_CQN_MAX]; + int node; }; /* Protection domain */ @@ -281,6 +285,11 @@ struct rvt_dev_info { spinlock_t mmap_offset_lock; /* protect mmap_offset */ u32 mmap_offset; spinlock_t pending_lock; /* protect pending mmap list */ + + /* CQ */ + struct kthread_worker *worker; /* per device cq worker */ + u32 n_cqs_allocated; /* number of CQs allocated for device */ + spinlock_t n_cqs_lock; /* protect count of in use cqs */ }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h new file mode 100644 index 000000000000..51fd00b243d0 --- /dev/null +++ b/include/rdma/rdmavt_cq.h @@ -0,0 +1,99 @@ +#ifndef DEF_RDMAVT_INCCQ_H +#define DEF_RDMAVT_INCCQ_H + +/* + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include + +/* + * Define an ib_cq_notify value that is not valid so we know when CQ + * notifications are armed. + */ +#define RVT_CQ_NONE (IB_CQ_NEXT_COMP + 1) + +/* + * This structure is used to contain the head pointer, tail pointer, + * and completion queue entries as a single memory allocation so + * it can be mmap'ed into user space. + */ +struct rvt_cq_wc { + u32 head; /* index of next entry to fill */ + u32 tail; /* index of next ib_poll_cq() entry */ + union { + /* these are actually size ibcq.cqe + 1 */ + struct ib_uverbs_wc uqueue[0]; + struct ib_wc kqueue[0]; + }; +}; + +/* + * The completion queue structure. + */ +struct rvt_cq { + struct ib_cq ibcq; + struct kthread_work comptask; + spinlock_t lock; /* protect changes in this struct */ + u8 notify; + u8 triggered; + struct rvt_dev_info *rdi; + struct rvt_cq_wc *queue; + struct rvt_mmap_info *ip; +}; + +static inline struct rvt_cq *ibcq_to_rvtcq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct rvt_cq, ibcq); +} + +void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited); + +#endif /* DEF_RDMAVT_INCCQH */ -- cgit v1.2.3 From bfbac097b6e8023e10fdadab2527d0a1a3160d7e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:22 -0800 Subject: IB/rdmavt: Add post send to rdmavt Add in a post_send and post_one_send to rdmavt. The ULP will provide a WQE to rdmavt which will then walk and queue each element. Rdmavt will then queue the work to be done in the driver or kick the driver's progress routine. There needs to be a follow on patch which adds in another lock for the head of the queue so that it can be added to and read from in parallel. This will touch protocol handlers and require other changes in the drivers. This will be done separately. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 7 +++++++ include/rdma/rdmavt_qp.h | 26 ++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 79080e3b09f8..36e4fb4c0df3 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -231,6 +231,8 @@ struct rvt_driver_provided { gfp_t gfp); void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); void (*notify_qp_reset)(struct rvt_qp *qp); + void (*schedule_send)(struct rvt_qp *qp); + void (*do_send)(struct rvt_qp *qp); /*--------------------*/ /* Optional functions */ @@ -312,6 +314,11 @@ static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) return container_of(ibsrq, struct rvt_srq, ibsrq); } +static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct rvt_qp, ibqp); +} + static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) { /* diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index bce0a03a7c07..3189f195538c 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -129,6 +129,17 @@ /* Number of bits to pay attention to in the opcode for checking qp type */ #define RVT_OPCODE_QP_MASK 0xE0 +/* Flags for checking QP state (see ib_rvt_state_ops[]) */ +#define RVT_POST_SEND_OK 0x01 +#define RVT_POST_RECV_OK 0x02 +#define RVT_PROCESS_RECV_OK 0x04 +#define RVT_PROCESS_SEND_OK 0x08 +#define RVT_PROCESS_NEXT_SEND_OK 0x10 +#define RVT_FLUSH_SEND 0x20 +#define RVT_FLUSH_RECV 0x40 +#define RVT_PROCESS_OR_FLUSH_SEND \ + (RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND) + /* * Send work request queue entry. * The size of the sg_list is determined when the QP is created and stored @@ -373,4 +384,19 @@ struct rvt_qp_ibdev { struct rvt_qpn_table qpn_table; }; +/* + * Since struct rvt_swqe is not a fixed size, we can't simply index into + * struct hfi1_qp.s_wq. This function does the array index computation. + */ +static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, + unsigned n) +{ + return (struct rvt_swqe *)((char *)qp->s_wq + + (sizeof(struct rvt_swqe) + + qp->s_max_sge * + sizeof(struct rvt_sge)) * n); +} + +extern const int ib_rvt_state_ops[]; + #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v1.2.3 From 3b0b3fb3c1bbf50a2f88ea7345448a41dcba3c57 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:35 -0800 Subject: IB/rdmavt: Add modify qp Add modify qp and supporting functions. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 42 ++++++++++++++++++++++++++++++++++++++++++ include/rdma/rdmavt_mr.h | 9 +++++++++ include/rdma/rdmavt_qp.h | 20 ++++++++++++++++++++ 3 files changed, 71 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 36e4fb4c0df3..1c7123ff3656 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,6 +55,7 @@ #include #include +#include #include #include #include @@ -194,6 +195,10 @@ struct rvt_driver_params { u8 qos_shift; char cq_name[RVT_CQN_MAX]; int node; + int max_rdma_atomic; + int psn_mask; + int psn_shift; + int psn_modify_mask; }; /* Protection domain */ @@ -233,6 +238,15 @@ struct rvt_driver_provided { void (*notify_qp_reset)(struct rvt_qp *qp); void (*schedule_send)(struct rvt_qp *qp); void (*do_send)(struct rvt_qp *qp); + int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_attr *attr); + void (*flush_qp_waiters)(struct rvt_qp *qp); + void (*stop_send_queue)(struct rvt_qp *qp); + void (*quiesce_qp)(struct rvt_qp *qp); + void (*notify_error_qp)(struct rvt_qp *qp); + u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp, + u32 pmtu); + int (*mtu_to_path_mtu)(u32 mtu); /*--------------------*/ /* Optional functions */ @@ -340,6 +354,34 @@ static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, return rdi->ports[port_index]->pkey_table[index]; } +/** + * rvt_lookup_qpn - return the QP with the given QPN + * @ibp: the ibport + * @qpn: the QP number to look up + * + * The caller must hold the rcu_read_lock(), and keep the lock until + * the returned qp is no longer in use. + */ +/* TODO: Remove this and put in rdmavt/qp.h when no longer needed by drivers */ +static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, + struct rvt_ibport *rvp, + u32 qpn) __must_hold(RCU) +{ + struct rvt_qp *qp = NULL; + + if (unlikely(qpn <= 1)) { + qp = rcu_dereference(rvp->qp[qpn]); + } else { + u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits); + + for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp; + qp = rcu_dereference(qp->next)) + if (qp->ibqp.qp_num == qpn) + break; + } + return qp; +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h index ea60476c6b6b..4aa81713b4f3 100644 --- a/include/rdma/rdmavt_mr.h +++ b/include/rdma/rdmavt_mr.h @@ -127,4 +127,13 @@ static inline void rvt_get_mr(struct rvt_mregion *mr) atomic_inc(&mr->refcount); } +static inline void rvt_put_ss(struct rvt_sge_state *ss) +{ + while (ss->num_sge) { + rvt_put_mr(ss->sge.mr); + if (--ss->num_sge) + ss->sge = *ss->sg_list++; + } +} + #endif /* DEF_RDMAVT_INCMRH */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 3189f195538c..e66bcc96d273 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -355,6 +355,7 @@ struct rvt_srq { #define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) #define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) #define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) +#define RVT_QPN_MASK 0xFFFFFF /* * QPN-map pages start out as NULL, they get allocated upon @@ -397,6 +398,25 @@ static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, sizeof(struct rvt_sge)) * n); } +/* + * Since struct rvt_rwqe is not a fixed size, we can't simply index into + * struct rvt_rwq.wq. This function does the array index computation. + */ +static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n) +{ + return (struct rvt_rwqe *) + ((char *)rq->wq->wq + + (sizeof(struct rvt_rwqe) + + rq->max_sge * sizeof(struct ib_sge)) * n); +} + extern const int ib_rvt_state_ops[]; +struct rvt_dev_info; +void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp); +void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends); +int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err); +void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn); +void rvt_dec_qp_cnt(struct rvt_dev_info *rdi); + #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v1.2.3 From 4e74080b248701c0c2d1af2764bf02f9c531020a Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:55 -0800 Subject: IB/rdmavt: Add multicast functions This patch adds in the multicast add and remove functions as well as the ancillary infrastructure needed. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 8 ++++++++ include/rdma/rdmavt_qp.h | 22 +++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 1c7123ff3656..04e90192a50d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -306,6 +306,11 @@ struct rvt_dev_info { struct kthread_worker *worker; /* per device cq worker */ u32 n_cqs_allocated; /* number of CQs allocated for device */ spinlock_t n_cqs_lock; /* protect count of in use cqs */ + + /* Multicast */ + u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ + spinlock_t n_mcast_grps_lock; + }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) @@ -399,8 +404,11 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, void *obj); void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, u32 size, void *obj); +int rvt_reg_mr(struct rvt_qp *qp, struct ib_reg_wr *wr); +struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid); /* Temporary export */ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, enum ib_qp_type type); + #endif /* DEF_RDMA_VT_H */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index e66bcc96d273..a97b95ba893f 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -50,6 +50,7 @@ #include #include +#include /* * Atomic bit definitions for r_aflags. */ @@ -385,9 +386,28 @@ struct rvt_qp_ibdev { struct rvt_qpn_table qpn_table; }; +/* + * There is one struct rvt_mcast for each multicast GID. + * All attached QPs are then stored as a list of + * struct rvt_mcast_qp. + */ +struct rvt_mcast_qp { + struct list_head list; + struct rvt_qp *qp; +}; + +struct rvt_mcast { + struct rb_node rb_node; + union ib_gid mgid; + struct list_head qp_list; + wait_queue_head_t wait; + atomic_t refcount; + int n_attached; +}; + /* * Since struct rvt_swqe is not a fixed size, we can't simply index into - * struct hfi1_qp.s_wq. This function does the array index computation. + * struct rvt_qp.s_wq. This function does the array index computation. */ static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, unsigned n) -- cgit v1.2.3 From 2b047ea7a3ceef0322e666782e0a82e98424f6f1 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 22 Jan 2016 13:04:32 -0800 Subject: IB/rdmavt: Remove unused variable from Queue Pair s_sde should be in the low level driver QP private data. Remove the definition from rvt_qp. Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- include/rdma/rdmavt_qp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index a97b95ba893f..f0e24266bdb4 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -304,7 +304,6 @@ struct rvt_qp { struct rvt_swqe *s_wqe; struct rvt_sge_state s_sge; /* current send request data */ struct rvt_mregion *s_rdma_mr; - struct sdma_engine *s_sde; /* current sde */ u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ -- cgit v1.2.3 From e85ec33d820e1f3f763a46f9fd41230ca0ce40c6 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 22 Jan 2016 13:04:38 -0800 Subject: IB/rdmavt: add modify queue pair driver helpers Low level drivers need to be able to check incoming attributes as well as be able to adjust their private data on queue pair modification. Add 2 driver callbacks, check_modify_qp and modify_qp, to facilitate this. Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 04e90192a50d..e382cca3fc4f 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -256,6 +256,13 @@ struct rvt_driver_provided { struct rvt_ah *); int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, enum ib_qp_type type, u8 port, gfp_t gfp); + /** + * Return 0 if modification is valid, -errno otherwise + */ + int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); + void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); }; struct rvt_dev_info { -- cgit v1.2.3 From ff6acd69518e0a84bd9c9b7f1bd4313f7076db97 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:04:45 -0800 Subject: IB/rdmavt: Add device structure allocation This patch adds rdmavt device structure allocation in rdamvt. The ib_device alloc is now done in rdmavt instead of the driver. Drivers need to tell rdmavt the number of ports when calling. A side of effect of this patch is fixing a bug with port initialization where the device structure port array was allocated over top of an existing one. Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index e382cca3fc4f..7768e041f244 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -394,6 +394,7 @@ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, return qp; } +struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); -- cgit v1.2.3 From 3711baf27d78475436b063f33399908ba208a8f2 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:04:51 -0800 Subject: IB/rdmavt: Add mad agents to rdmavt This patch adds mad agent create and free to rdmavt. Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 7768e041f244..31f9e5a08da0 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -136,7 +136,8 @@ struct rvt_ibport { */ u16 *pkey_table; - /* TODO: Move sm_ah and smi_ah into here as well*/ + struct rvt_ah *sm_ah; + struct rvt_ah *smi_ah; }; #define RVT_CQN_MAX 16 /* maximum length of cq name */ @@ -263,6 +264,9 @@ struct rvt_driver_provided { int attr_mask, struct ib_udata *udata); void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); + + void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); + void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); }; struct rvt_dev_info { -- cgit v1.2.3 From fe31419501ba133a967da7b7da0d32945ef21840 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:04:58 -0800 Subject: IB/rdmavt: Fix copyright date Update all files added by rdmavt which do not yet have 2016 as the copyright year. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 2 +- include/rdma/rdmavt_mr.h | 2 +- include/rdma/rdmavt_qp.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 31f9e5a08da0..f6569b24497f 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -2,7 +2,7 @@ #define DEF_RDMA_VT_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h index 4aa81713b4f3..5edffdca8c53 100644 --- a/include/rdma/rdmavt_mr.h +++ b/include/rdma/rdmavt_mr.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_INCMR_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f0e24266bdb4..91f20fd91e00 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_INCQP_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. -- cgit v1.2.3 From b8f881b913f34f712185b2ff7a41645dcad9a868 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Wed, 3 Feb 2016 14:14:36 -0800 Subject: IB/rdmavt: Add srq functionality to rdmavt Fill in srq function stubs with code derived from hfi1 and qib. Move necessary functions and data structure members as well. Reviewed-by: Dennis Dalessandro Reviewed-by: Harish Chegondi Signed-off-by: Jubin John Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index f6569b24497f..1b770650cf60 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -299,6 +299,9 @@ struct rvt_dev_info { int n_ahs_allocated; spinlock_t n_ahs_lock; /* Protect ah allocated count */ + u32 n_srqs_allocated; + spinlock_t n_srqs_lock; /* Protect srqs allocated count */ + int flags; struct rvt_ibport **ports; -- cgit v1.2.3 From 60c30f572595e46c819503b5a8c3a8e2f922de7a Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 3 Feb 2016 14:14:45 -0800 Subject: IB/rdmavt: Add hardware driver send work request check Some hardware drivers requires additional checks on send WRs. Create an optional call back to allow hardware drivers to reject a send WR. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 1b770650cf60..52dfa9cf8621 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -265,6 +265,8 @@ struct rvt_driver_provided { void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); + int (*check_send_wr)(struct rvt_qp *qp, struct ib_send_wr *wr); + void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); }; -- cgit v1.2.3 From f1badc716349cc2ac6e55ad50dcff598ef97bad5 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 3 Feb 2016 14:15:02 -0800 Subject: IB/rdmavt: Clean up distinction between port number and index IB core uses 1 relative indexing for ports. All of our data structures use 0 based indexing. Add an inline function that we can use whenever we need to validate a legal value and try to convert a port number to a port index at the entrance into rdmavt. Try to follow the policy that when we are talking about a port from IB core point of view we refer to it as a port number. When port is an index into our arrays refer to it as a port index. Reviewed-by: Mike Marciniszyn Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 52dfa9cf8621..5d1c694a2731 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -256,7 +256,7 @@ struct rvt_driver_provided { void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, struct rvt_ah *); int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port, gfp_t gfp); + enum ib_qp_type type, u8 port_num, gfp_t gfp); /** * Return 0 if modification is valid, -errno otherwise */ @@ -408,7 +408,7 @@ int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, - int portnum, u16 *pkey_table); + int port_index, u16 *pkey_table); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, -- cgit v1.2.3 From 1f024992ef05d1eb9b3a0becd1611ecfa21854a6 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 3 Feb 2016 14:15:11 -0800 Subject: IB/rdmavt: Add query gid support. Addin query gid support. Rdmavt still relies on the driver to maintain the gid table. Rdmavt simply calls into the driver to retrive the guid for a particular port. Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 5d1c694a2731..dabf4d52b4fc 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -248,6 +248,8 @@ struct rvt_driver_provided { u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu); int (*mtu_to_path_mtu)(u32 mtu); + int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, + int guid_index, __be64 *guid); /*--------------------*/ /* Optional functions */ -- cgit v1.2.3 From 61a650c14d728354b2d493bed3f1b0531f033dac Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Wed, 3 Feb 2016 14:15:20 -0800 Subject: IB/rdmavt: Add support for query_port, modify_port and get_port_immutable rvt_query_port calls into the driver through a call back function query_port_state to populate the rest of ib_port_attr elements. rvt_modify_port calls into the driver if needed through a call back function shut_down_port() Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index dabf4d52b4fc..4242fea9cf4e 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -200,6 +200,8 @@ struct rvt_driver_params { int psn_mask; int psn_shift; int psn_modify_mask; + u32 core_cap_flags; + u32 max_mad_size; }; /* Protection domain */ @@ -250,6 +252,10 @@ struct rvt_driver_provided { int (*mtu_to_path_mtu)(u32 mtu); int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, int guid_index, __be64 *guid); + int (*query_port_state)(struct rvt_dev_info *rdi, u8 port_num, + struct ib_port_attr *props); + int (*shut_down_port)(struct rvt_dev_info *rdi, u8 port_num); + void (*cap_mask_chg)(struct rvt_dev_info *rdi, u8 port_num); /*--------------------*/ /* Optional functions */ -- cgit v1.2.3 From e1bf0d5ecdc49cd4e2014da0d60efa74f5714fba Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Wed, 3 Feb 2016 14:36:58 -0800 Subject: staging/rdma/hfi1, IB/core: Fix LinkDownReason define for consistency LinkDownReason LocalMediaNotInstalled lacked an underscore and was inconsistent with other defines in the same family. This patch fixes this. Reviewed-by: Ira Weiny Signed-off-by: Easwar Hariharan Signed-off-by: Doug Ledford --- include/rdma/opa_port_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index a0fa975cd1c1..2b95c2c336eb 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -97,7 +97,7 @@ #define OPA_LINKDOWN_REASON_WIDTH_POLICY 41 /* 42-48 reserved */ #define OPA_LINKDOWN_REASON_DISCONNECTED 49 -#define OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED 50 +#define OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED 50 #define OPA_LINKDOWN_REASON_NOT_INSTALLED 51 #define OPA_LINKDOWN_REASON_CHASSIS_CONFIG 52 /* 53 reserved */ -- cgit v1.2.3 From 066fad289408e7d48ce00b54a292e7a90602cb30 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 4 Feb 2016 11:03:36 -0800 Subject: IB/rdmavt: remove unused qp field The field is a vestige from ipath. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- include/rdma/rdmavt_qp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 91f20fd91e00..aed13e13591c 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -251,7 +251,6 @@ struct rvt_qp { u32 pmtu; /* decoded from path_mtu */ u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ - u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ u32 s_ahgpsn; /* set to the psn in the copy of the header */ u8 state; /* QP state */ -- cgit v1.2.3 From bfee5e32e701b98634b380a9eef8b5820feb7488 Mon Sep 17 00:00:00 2001 From: Vennila Megavannan Date: Tue, 9 Feb 2016 14:29:49 -0800 Subject: IB/rdmavt, staging/rdma/hfi1: use qps to dynamically scale timeout value A busy_jiffies variable is maintained and updated when rc qps are created and deleted. busy_jiffies is a scaled value of the number of rc qps in the device. busy_jiffies is incremented every rc qp scaling interval. busy_jiffies is added to the rc timeout in add_retry_timer and mod_retry_timer. The rc qp scaling interval is selected based on extensive performance evaluation of targeted workloads. Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Vennila Megavannan Signed-off-by: Jubin John Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 4 +++- include/rdma/rdmavt_qp.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4242fea9cf4e..5ccf683b28f1 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -318,7 +318,9 @@ struct rvt_dev_info { /* QP */ struct rvt_qp_ibdev *qp_dev; u32 n_qps_allocated; /* number of QPs allocated for device */ - spinlock_t n_qps_lock; /* keep track of number of qps */ + u32 n_rc_qps; /* number of RC QPs allocated for device */ + u32 busy_jiffies; /* timeout scaling based on RC QP count */ + spinlock_t n_qps_lock; /* protect qps, rc qps and busy jiffy counts */ /* memory maps */ struct list_head pending_mmaps; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index aed13e13591c..b3ea74579316 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -225,6 +225,8 @@ struct rvt_ack_entry { }; }; +#define RC_QP_SCALING_INTERVAL 5 + /* * Variables prefixed with s_ are for the requester (sender). * Variables prefixed with r_ are for the responder (receiver). -- cgit v1.2.3 From 46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:10:04 -0800 Subject: IB/qib, staging/rdma/hfi1: add s_hlock for use in post send This patch adds an additional lock to reduce contention on the s_lock. This lock is used in post_send() so that the post_send is not serialized with the send engine and other send related processing. To do this the s_next_psn is now maintained on post_send() while post_send() related fields are moved to a new cache line. There is an s_avail maintained for the post_send() to mitigate trading cache lines with the send engine. The lock is released/acquired around releasing the just built packet to the egress mechanism. Reviewed-by: Jubin John Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Harish Chegondi Signed-off-by: Mike Marciniszyn Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 4 +++- include/rdma/rdmavt_qp.h | 13 +++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 5ccf683b28f1..aabd2e5bc5d7 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -220,6 +220,7 @@ struct rvt_ah { }; struct rvt_dev_info; +struct rvt_swqe; struct rvt_driver_provided { /* * The work to create port files in /sys/class Infiniband is different @@ -240,6 +241,7 @@ struct rvt_driver_provided { void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); void (*notify_qp_reset)(struct rvt_qp *qp); void (*schedule_send)(struct rvt_qp *qp); + void (*schedule_send_no_lock)(struct rvt_qp *qp); void (*do_send)(struct rvt_qp *qp); int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp, struct ib_qp_attr *attr); @@ -273,7 +275,7 @@ struct rvt_driver_provided { void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); - int (*check_send_wr)(struct rvt_qp *qp, struct ib_send_wr *wr); + int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe); void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index b3ea74579316..1066b5d1b4d2 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -250,11 +250,12 @@ struct rvt_qp { enum ib_mtu path_mtu; int srate_mbps; /* s_srate (below) converted to Mbit/s */ u32 remote_qpn; - u32 pmtu; /* decoded from path_mtu */ u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ u32 s_ahgpsn; /* set to the psn in the copy of the header */ + u16 pmtu; /* decoded from path_mtu */ + u8 log_pmtu; /* shift for pmtu */ u8 state; /* QP state */ u8 allowed_ops; /* high order bits of allowed opcodes */ u8 qp_access_flags; @@ -299,6 +300,13 @@ struct rvt_qp { struct rvt_sge_state r_sge; /* current receive data */ struct rvt_rq r_rq; /* receive work queue */ + /* post send line */ + spinlock_t s_hlock ____cacheline_aligned_in_smp; + u32 s_head; /* new entries added here */ + u32 s_next_psn; /* PSN for next request */ + u32 s_avail; /* number of entries avail */ + u32 s_ssn; /* SSN of tail entry */ + spinlock_t s_lock ____cacheline_aligned_in_smp; struct rvt_sge_state *s_cur_sge; u32 s_flags; @@ -308,19 +316,16 @@ struct rvt_qp { u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ - u32 s_next_psn; /* PSN for next request */ u32 s_last_psn; /* last response PSN processed */ u32 s_sending_psn; /* lowest PSN that is being sent */ u32 s_sending_hpsn; /* highest PSN that is being sent */ u32 s_psn; /* current packet sequence number */ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ - u32 s_head; /* new entries added here */ u32 s_tail; /* next entry to process */ u32 s_cur; /* current work queue entry */ u32 s_acked; /* last un-ACK'ed entry */ u32 s_last; /* last completed entry */ - u32 s_ssn; /* SSN of tail entry */ u32 s_lsn; /* limit sequence number (credit) */ u16 s_hdrwords; /* size of s_hdr in 32 bit words */ u16 s_rdma_ack_cnt; -- cgit v1.2.3 From 90793f7179478df19ac4b2244cfd9764b28e4b38 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:10:29 -0800 Subject: IB/rdmavt: Clean up comments and add more documentation Add, remove, and otherwise clean up existing comments that are leftover from the initial code postings of rdmavt. Many of the comments were added to provide an idea on the direction we were thinking of going. Now that the design is solidified make a pass over and clean everything up. Also add details where lacking. Ensure all non static functions have nano comments. Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index aabd2e5bc5d7..57c708dddab4 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -146,21 +146,6 @@ struct rvt_ibport { * Things that are driver specific, module parameters in hfi1 and qib */ struct rvt_driver_params { - /* - * driver required fields: - * node_guid - * phys_port_cnt - * dma_device - * owner - * driver optional fields (rvt will provide generic value if blank): - * name - * node_desc - * rvt fields, driver value ignored: - * uverbs_abi_ver - * node_type - * num_comp_vectors - * uverbs_cmd_mask - */ struct ib_device_attr props; /* -- cgit v1.2.3 From 1348d706f254fe7030221251a5e1685ff3d9f86a Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:10:37 -0800 Subject: IB/rdmavt: Add per verb driver callback checking For each verb validate that all requirements for driver callbacks are met. If a function is called without checking for a valid pointer, it is a required function. Also document what each callback function does. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 123 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 109 insertions(+), 14 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 57c708dddab4..ec658d8bf34e 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -208,62 +208,157 @@ struct rvt_dev_info; struct rvt_swqe; struct rvt_driver_provided { /* - * The work to create port files in /sys/class Infiniband is different - * depending on the driver. This should not be extracted away and - * instead drivers are responsible for setting the correct callback for - * this. + * Which functions are required depends on which verbs rdmavt is + * providing and which verbs the driver is overriding. See + * check_support() for details. */ - /* -------------------*/ - /* Required functions */ - /* -------------------*/ + /* Passed to ib core registration. Callback to create syfs files */ int (*port_callback)(struct ib_device *, u8, struct kobject *); + + /* + * Returns a string to represent the device for which is being + * registered. This is primarily used for error and debug messages on + * the console. + */ const char * (*get_card_name)(struct rvt_dev_info *rdi); + + /* + * Returns a pointer to the undelying hardware's PCI device. This is + * used to display information as to what hardware is being referenced + * in an output message + */ struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); - unsigned (*free_all_qps)(struct rvt_dev_info *rdi); + + /* + * Allocate a private queue pair data structure for driver specific + * information which is opaque to rdmavt. + */ void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp); + + /* + * Free the driver's private qp structure. + */ void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); + + /* + * Inform the driver the particular qp in quesiton has been reset so + * that it can clean up anything it needs to. + */ void (*notify_qp_reset)(struct rvt_qp *qp); + + /* + * Give the driver a notice that there is send work to do. It is up to + * the driver to generally push the packets out, this just queues the + * work with the driver. There are two variants here. The no_lock + * version requires the s_lock not to be held. The other assumes the + * s_lock is held. + */ void (*schedule_send)(struct rvt_qp *qp); void (*schedule_send_no_lock)(struct rvt_qp *qp); + + /* + * Sometimes rdmavt needs to kick the driver's send progress. That is + * done by this call back. + */ void (*do_send)(struct rvt_qp *qp); + + /* + * Get a path mtu from the driver based on qp attributes. + */ int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp, struct ib_qp_attr *attr); + + /* + * Notify driver that it needs to flush any outstanding IO requests that + * are waiting on a qp. + */ void (*flush_qp_waiters)(struct rvt_qp *qp); + + /* + * Notify driver to stop its queue of sending packets. Nothing else + * should be posted to the queue pair after this has been called. + */ void (*stop_send_queue)(struct rvt_qp *qp); + + /* + * Have the drivr drain any in progress operations + */ void (*quiesce_qp)(struct rvt_qp *qp); + + /* + * Inform the driver a qp has went to error state. + */ void (*notify_error_qp)(struct rvt_qp *qp); + + /* + * Get an MTU for a qp. + */ u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu); + /* + * Convert an mtu to a path mtu + */ int (*mtu_to_path_mtu)(u32 mtu); + + /* + * Get the guid of a port in big endian byte order + */ int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, int guid_index, __be64 *guid); + + /* + * Query driver for the state of the port. + */ int (*query_port_state)(struct rvt_dev_info *rdi, u8 port_num, struct ib_port_attr *props); + + /* + * Tell driver to shutdown a port + */ int (*shut_down_port)(struct rvt_dev_info *rdi, u8 port_num); + + /* Tell driver to send a trap for changed port capabilities */ void (*cap_mask_chg)(struct rvt_dev_info *rdi, u8 port_num); - /*--------------------*/ - /* Optional functions */ - /*--------------------*/ + /* + * The following functions can be safely ignored completely. Any use of + * these is checked for NULL before blindly calling. Rdmavt should also + * be functional if drivers omit these. + */ + + /* Called to inform the driver that all qps should now be freed. */ + unsigned (*free_all_qps)(struct rvt_dev_info *rdi); + + /* Driver specific AH validation */ int (*check_ah)(struct ib_device *, struct ib_ah_attr *); + + /* Inform the driver a new AH has been created */ void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, struct rvt_ah *); + + /* Let the driver pick the next queue pair number*/ int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, enum ib_qp_type type, u8 port_num, gfp_t gfp); - /** - * Return 0 if modification is valid, -errno otherwise - */ + + /* Determine if its safe or allowed to modify the qp */ int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); + + /* Driver specific QP modification/notification-of */ void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); + /* Driver specific work request checking */ int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe); + /* Notify driver a mad agent has been created */ void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); + + /* Notify driver a mad agent has been removed */ void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); + }; struct rvt_dev_info { -- cgit v1.2.3 From 4eadd8ff21bffcaf3338267dcca571accc612c8e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:10:55 -0800 Subject: IB/qib,rdmavt: Move smi_ah to qib Rdmavt adopted an smi_ah from qib which is not needed by hfi1. Move this back to qib and get it out of the common library. Reviewed-by: Ira Weiny Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index ec658d8bf34e..2c3a04c630fd 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -137,7 +137,6 @@ struct rvt_ibport { u16 *pkey_table; struct rvt_ah *sm_ah; - struct rvt_ah *smi_ah; }; #define RVT_CQN_MAX 16 /* maximum length of cq name */ -- cgit v1.2.3 From ce73fe25a6bd4a4deabed57e2553100e10fb8362 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:11:03 -0800 Subject: IB/rdmavt: Remove RVT_FLAGs While hfi1 and qib were still supporting bits and pieces of core verbs components there needed to be a way to convey if rdmavt should handle allocation and initialize of resources like the queue pair table. Now that all of this is moved into rdmavt there is no need for these flags. They are no longer used in the drivers. Reviewed-by: Ira Weiny Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 2c3a04c630fd..264c514c7da2 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -60,16 +60,6 @@ #include #include -/* - * For some of the IBTA objects there will likely be some - * initializations required. We need flags to determine whether it is OK - * for rdmavt to do this or not. This does not imply any functions of a - * partiuclar IBTA object are overridden. - */ -#define RVT_FLAG_MR_INIT_DRIVER BIT(1) -#define RVT_FLAG_QP_INIT_DRIVER BIT(2) -#define RVT_FLAG_CQ_INIT_DRIVER BIT(3) - #define RVT_MAX_PKEY_VALUES 16 struct rvt_ibport { -- cgit v1.2.3 From 0765b01b8e2da50ad56f6544f935f5eaef1389f2 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:11:12 -0800 Subject: IB/rdmavt: Remove signal_supported and comments Initially it was intended that rdmavt would support some signaling between the underlying driver and itself. However this turned out to be unnecessary for qib and hfi1. If we need to add something like this in later to support another driver we should do it then. As of now this essentially dead code so remove it. Reviewed-by: Ira Weiny Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 264c514c7da2..4c50bbb75d77 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -137,24 +137,6 @@ struct rvt_ibport { struct rvt_driver_params { struct ib_device_attr props; - /* - * Drivers will need to support a number of notifications to rvt in - * accordance with certain events. This structure should contain a mask - * of the supported events. Such events that the rvt may need to know - * about include: - * port errors - * port active - * lid change - * sm change - * client reregister - * pkey change - * - * There may also be other events that the rvt layers needs to know - * about this is not an exhaustive list. Some events though rvt does not - * need to rely on the driver for such as completion queue error. - */ - int rvt_signal_supported; - /* * Anything driver specific that is not covered by props * For instance special module parameters. Goes here. -- cgit v1.2.3 From 79a225be38932b17707009767e85d6edf450e7cc Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:11:20 -0800 Subject: IB/rdmavt: Remove unnecessary exported functions Remove exported functions which are no longer required as the functionality has moved into rdmavt. This also requires re-ordering some of the functions since their prototype no longer appears in a header file. Rather than add forward declarations it is just cleaner to re-order some of the functions. Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 13 ------------- include/rdma/rdmavt_qp.h | 4 ---- 2 files changed, 17 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4c50bbb75d77..a8696551abb1 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -476,19 +476,6 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_sge *isge, struct ib_sge *sge, int acc); -int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); -void rvt_release_mmap_info(struct kref *ref); -struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, - u32 size, - struct ib_ucontext *context, - void *obj); -void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, - u32 size, void *obj); -int rvt_reg_mr(struct rvt_qp *qp, struct ib_reg_wr *wr); struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid); -/* Temporary export */ -void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, - enum ib_qp_type type); - #endif /* DEF_RDMA_VT_H */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 1066b5d1b4d2..933f14f92da6 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -438,10 +438,6 @@ static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n) extern const int ib_rvt_state_ops[]; struct rvt_dev_info; -void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp); -void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends); int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err); -void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn); -void rvt_dec_qp_cnt(struct rvt_dev_info *rdi); #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v1.2.3 From d2421a82f6d8ad407d3f4acdbacedfb06d9f47f5 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:44:26 -0800 Subject: IB/rdmvt: close send engine struct holes pahole noted the wasted 4 bytes after s_lock and r_lock. Move s_flags and r_psn to fill the holes. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- include/rdma/rdmavt_qp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 933f14f92da6..5c307ed4d195 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -283,12 +283,12 @@ struct rvt_qp { struct rvt_sge_state s_rdma_read_sge; spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ + u32 r_psn; /* expected rcv packet sequence number */ unsigned long r_aflags; u64 r_wr_id; /* ID for current receive WQE */ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ u32 r_len; /* total length of r_sge */ u32 r_rcv_len; /* receive data len processed */ - u32 r_psn; /* expected rcv packet sequence number */ u32 r_msn; /* message sequence number */ u8 r_state; /* opcode of last packet received */ @@ -308,8 +308,8 @@ struct rvt_qp { u32 s_ssn; /* SSN of tail entry */ spinlock_t s_lock ____cacheline_aligned_in_smp; - struct rvt_sge_state *s_cur_sge; u32 s_flags; + struct rvt_sge_state *s_cur_sge; struct rvt_swqe *s_wqe; struct rvt_sge_state s_sge; /* current send request data */ struct rvt_mregion *s_rdma_mr; -- cgit v1.2.3 From 14553ca11039732bcba3c160a26d702dbe71dd49 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:45:36 -0800 Subject: staging/rdma/hfi1: Adaptive PIO for short messages The change requires a new pio_busy field in the iowait structure to track the number of outstanding pios. The new counter together with the sdma counter serve as the basis for a packet by packet decision as to which egress mechanism to use. Since packets given to different egress mechanisms are not ordered, this scheme will preserve the order. The iowait drain/wait mechanisms are extended for a pio case. An additional qp wait flag is added for the PIO drain wait case. Currently the only pio wait is for buffers, so the no_bufs_available() routine name is changed to pio_wait() and a third argument is passed with one of the two pio wait flags to generalize the routine. A module parameter is added to hold a configurable threshold. For now, the module parameter is zero. A heuristic routine is added to return the func pointer of the proper egress routine to use. The heuristic is as follows: - SMI always uses pio - GSI,UD qps <= threshold use pio - UD qps > threadhold use sdma o No coordination with sdma is required because order is not required and this qp pio count is not maintained for UD - RC/UC ONLY packets <= threshold chose as follows: o If sdmas pending, use SDMA o Otherwise use pio and enable the pio tracking count at the time the pio buffer is allocated - RC/UC ONLY packets > threshold use SDMA o If pio's are pending the pio_wait with the new wait flag is called to delay for pios to drain The threshold is potentially reduced by the QP's mtu. The sc_buffer_alloc() has two additional args (a callback, a void *) which are exploited by the RC/UC cases to pass a new complete routine and a qp *. When the shadow ring completes the credit associated with a packet, the new complete routine is called. The verbs_pio_complete() will then decrement the busy count and trigger any drain waiters in qp destroy or reset. Reviewed-by: Jubin John Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- include/rdma/rdmavt_qp.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 5c307ed4d195..f2f4df023aaa 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -82,6 +82,7 @@ * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating * next send completion entry not via send DMA * RVT_S_WAIT_PIO - waiting for a send buffer to be available + * RVT_S_WAIT_PIO_DRAIN - waiting for a qp to drain pio packets * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available * RVT_S_WAIT_KMEM - waiting for kernel memory to be available @@ -101,16 +102,17 @@ #define RVT_S_WAIT_SSN_CREDIT 0x0100 #define RVT_S_WAIT_DMA 0x0200 #define RVT_S_WAIT_PIO 0x0400 -#define RVT_S_WAIT_TX 0x0800 -#define RVT_S_WAIT_DMA_DESC 0x1000 -#define RVT_S_WAIT_KMEM 0x2000 -#define RVT_S_WAIT_PSN 0x4000 -#define RVT_S_WAIT_ACK 0x8000 -#define RVT_S_SEND_ONE 0x10000 -#define RVT_S_UNLIMITED_CREDIT 0x20000 -#define RVT_S_AHG_VALID 0x40000 -#define RVT_S_AHG_CLEAR 0x80000 -#define RVT_S_ECN 0x100000 +#define RVT_S_WAIT_PIO_DRAIN 0x0800 +#define RVT_S_WAIT_TX 0x1000 +#define RVT_S_WAIT_DMA_DESC 0x2000 +#define RVT_S_WAIT_KMEM 0x4000 +#define RVT_S_WAIT_PSN 0x8000 +#define RVT_S_WAIT_ACK 0x10000 +#define RVT_S_SEND_ONE 0x20000 +#define RVT_S_UNLIMITED_CREDIT 0x40000 +#define RVT_S_AHG_VALID 0x80000 +#define RVT_S_AHG_CLEAR 0x100000 +#define RVT_S_ECN 0x200000 /* * Wait flags that would prevent any packet type from being sent. -- cgit v1.2.3 From b493d91d333e867a043f7ff1397bcba6e2d0dda2 Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Fri, 26 Feb 2016 09:18:00 -0600 Subject: iwcm: common code for port mapper moved port mapper related code from drivers into common code Signed-off-by: Mustafa Ismail Signed-off-by: Tatyana E. Nikolova Signed-off-by: Faisal Latif Reviewed-by: Steve Wise Tested-by: Steve Wise Signed-off-by: Doug Ledford --- include/rdma/iw_cm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h index 036bd2772662..6d0065c322b7 100644 --- a/include/rdma/iw_cm.h +++ b/include/rdma/iw_cm.h @@ -83,8 +83,10 @@ struct iw_cm_id { iw_cm_handler cm_handler; /* client callback function */ void *context; /* client cb context */ struct ib_device *device; - struct sockaddr_storage local_addr; + struct sockaddr_storage local_addr; /* local addr */ struct sockaddr_storage remote_addr; + struct sockaddr_storage m_local_addr; /* nmapped local addr */ + struct sockaddr_storage m_remote_addr; /* nmapped rem addr */ void *provider_data; /* provider private data */ iw_event_handler event_handler; /* cb for provider events */ @@ -92,6 +94,7 @@ struct iw_cm_id { void (*add_ref)(struct iw_cm_id *); void (*rem_ref)(struct iw_cm_id *); u8 tos; + bool mapped; }; struct iw_cm_conn_param { @@ -123,6 +126,7 @@ struct iw_cm_verbs { int backlog); int (*destroy_listen)(struct iw_cm_id *cm_id); + char ifname[IFNAMSIZ]; }; /** -- cgit v1.2.3 From ef086c0d5dd9a151578c72b6f257e5b0e77d65eb Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 7 Mar 2016 11:35:08 -0800 Subject: IB/hfi1: Report pid in qp_stats to aid debug Tracking user/QP ownership is needed to debug issues with user ULPs like OpenMPI. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- include/rdma/rdmavt_qp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/rdma') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f2f4df023aaa..497e59065c2c 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -251,6 +251,7 @@ struct rvt_qp { enum ib_mtu path_mtu; int srate_mbps; /* s_srate (below) converted to Mbit/s */ + pid_t pid; /* pid for user mode QPs */ u32 remote_qpn; u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ -- cgit v1.2.3 From fb532d6a79b96a4c8f678024d7ed3549ff0ca916 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 Feb 2016 10:25:25 +0200 Subject: IB/{core, ulp} Support above 32 possible device capability flags The old bitwise device_cap_flags variable was limited to u32 which has all bits already defined. In order to overcome it, we converted device_cap_flags variable to be u64 type. Signed-off-by: Leon Romanovsky Reviewed-by: Matan Barak Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3a03c1d18afa..c9b62344d22e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -274,7 +274,7 @@ struct ib_device_attr { u32 hw_ver; int max_qp; int max_qp_wr; - int device_cap_flags; + u64 device_cap_flags; int max_sge; int max_sge_rd; int max_cq; -- cgit v1.2.3 From fad61ad4e755f5dd13c7702a87cd907207392534 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 11 Mar 2016 22:58:36 +0200 Subject: IB/core: Add subnet prefix to port info The subnet prefix is a part of the port_info MAD returned and should be available at the ib_port_attr struct. We define it here and provide a default implementation in case the hardware driver does not provide one. The subnet prefix is required when creating the address vector to access the SA in networks where GRH must be used. Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c9b62344d22e..7239b9a4499e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -490,6 +490,7 @@ union rdma_protocol_stats { | RDMA_CORE_CAP_OPA_MAD) struct ib_port_attr { + u64 subnet_prefix; enum ib_port_state state; enum ib_mtu max_mtu; enum ib_mtu active_mtu; -- cgit v1.2.3 From a0c1b2a3508714281f604db818fa0cc83c2f9b6a Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 11 Mar 2016 22:58:37 +0200 Subject: IB/core: Support accessing SA in virtualized environment Per the ongoing standardisation process, when virtual HCAs are present in a network, traffic is routed based on a destination GID. In order to access the SA we use the well known SA GID. We also add a GRH required boolean field to the port attributes which is used to report to the verbs consumer whether this port is connected to a virtual network. We use this field to realize whether we need to create an address vector with GRH to access the subnet administrator. We clear the port attributes struct before calling the hardware driver to make sure the default remains that GRH is not required. Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7239b9a4499e..3a5a66b7a33f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -97,6 +97,11 @@ enum rdma_node_type { RDMA_NODE_USNIC_UDP, }; +enum { + /* set the local administered indication */ + IB_SA_WELL_KNOWN_GUID = BIT_ULL(57) | 2, +}; + enum rdma_transport_type { RDMA_TRANSPORT_IB, RDMA_TRANSPORT_IWARP, @@ -510,6 +515,7 @@ struct ib_port_attr { u8 active_width; u8 active_speed; u8 phys_state; + bool grh_required; }; enum ib_device_modify_flags { -- cgit v1.2.3 From 50174a7f2c24d13cdeec435ee1ba70b1e0b1318f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 11 Mar 2016 22:58:38 +0200 Subject: IB/core: Add interfaces to control VF attributes Following the practice exercised for network devices which allow the PF net device to configure attributes of its virtual functions, we introduce the following functions to be used by IPoIB which is the network driver implementation for IB devices. ib_set_vf_link_state - set the policy for a VF link. More below. ib_get_vf_config - read configuration information of a VF ib_get_vf_stats - read VF statistics ib_set_vf_guid - set the node or port GUID of a VF Also add an indication in the device cap flags that indicates that this IB devices is based on a virtual function. A VF shares the physical port with the PF and other VFs. When setting the link state we have three options: 1. Auto - in this mode, the virtual port follows the state of the physical port and becomes active only if the physical port's state is active. In all other cases it remains in a Down state. 2. Down - sets the state of the virtual port to Down 3. Up - causes the virtual port to transition into Initialize state if it was not already in this state. A virtualization aware subnet manager can then bring the state of the port into the Active state. Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3a5a66b7a33f..8a245a7f981a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -56,6 +56,7 @@ #include #include +#include #include #include #include @@ -218,6 +219,7 @@ enum ib_device_cap_flags { IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), IB_DEVICE_ON_DEMAND_PAGING = (1 << 31), IB_DEVICE_SG_GAPS_REG = (1ULL << 32), + IB_DEVICE_VIRTUAL_FUNCTION = ((u64)1 << 33), }; enum ib_signature_prot_cap { @@ -1867,6 +1869,14 @@ struct ib_device { void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); void (*drain_rq)(struct ib_qp *qp); void (*drain_sq)(struct ib_qp *qp); + int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port, + int state); + int (*get_vf_config)(struct ib_device *device, int vf, u8 port, + struct ifla_vf_info *ivf); + int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, + struct ifla_vf_stats *stats); + int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, + int type); struct ib_dma_mapping_ops *dma_ops; @@ -2310,6 +2320,15 @@ int ib_query_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid, struct ib_gid_attr *attr); +int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, + int state); +int ib_get_vf_config(struct ib_device *device, int vf, u8 port, + struct ifla_vf_info *info); +int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, + struct ifla_vf_stats *stats); +int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, + int type); + int ib_query_pkey(struct ib_device *device, u8 port_num, u16 index, u16 *pkey); -- cgit v1.2.3