blob: 9fb46a63a17e486f6ba155e4c5c365c730065b38 [file] [log] [blame]
Ursula Brauna4cf0442017-01-09 16:55:14 +01001/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 *
4 * IB infrastructure:
5 * Establish SMC-R as an Infiniband Client to be notified about added and
6 * removed IB devices of type RDMA.
7 * Determine device and port characteristics for these IB devices.
8 *
9 * Copyright IBM Corp. 2016
10 *
11 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
12 */
13
14#include <linux/random.h>
15#include <rdma/ib_verbs.h>
16
Thomas Richter6812baa2017-01-09 16:55:15 +010017#include "smc_pnet.h"
Ursula Brauna4cf0442017-01-09 16:55:14 +010018#include "smc_ib.h"
Ursula Brauncd6851f2017-01-09 16:55:18 +010019#include "smc_core.h"
Ursula Braunf38ba1792017-01-09 16:55:19 +010020#include "smc_wr.h"
Ursula Brauna4cf0442017-01-09 16:55:14 +010021#include "smc.h"
22
23struct smc_ib_devices smc_ib_devices = { /* smc-registered ib devices */
24 .lock = __SPIN_LOCK_UNLOCKED(smc_ib_devices.lock),
25 .list = LIST_HEAD_INIT(smc_ib_devices.list),
26};
27
28#define SMC_LOCAL_SYSTEMID_RESET "%%%%%%%"
29
30u8 local_systemid[SMC_SYSTEMID_LEN] = SMC_LOCAL_SYSTEMID_RESET; /* unique system
31 * identifier
32 */
33
Ursula Braunf38ba1792017-01-09 16:55:19 +010034void smc_ib_dealloc_protection_domain(struct smc_link *lnk)
35{
36 ib_dealloc_pd(lnk->roce_pd);
37 lnk->roce_pd = NULL;
38}
39
40int smc_ib_create_protection_domain(struct smc_link *lnk)
41{
42 int rc;
43
44 lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0);
45 rc = PTR_ERR_OR_ZERO(lnk->roce_pd);
46 if (IS_ERR(lnk->roce_pd))
47 lnk->roce_pd = NULL;
48 return rc;
49}
50
51static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
52{
53 switch (ibevent->event) {
54 case IB_EVENT_DEVICE_FATAL:
55 case IB_EVENT_GID_CHANGE:
56 case IB_EVENT_PORT_ERR:
57 case IB_EVENT_QP_ACCESS_ERR:
58 /* tbd in follow-on patch:
59 * abnormal close of corresponding connections
60 */
61 break;
62 default:
63 break;
64 }
65}
66
67void smc_ib_destroy_queue_pair(struct smc_link *lnk)
68{
69 ib_destroy_qp(lnk->roce_qp);
70 lnk->roce_qp = NULL;
71}
72
73/* create a queue pair within the protection domain for a link */
74int smc_ib_create_queue_pair(struct smc_link *lnk)
75{
76 struct ib_qp_init_attr qp_attr = {
77 .event_handler = smc_ib_qp_event_handler,
78 .qp_context = lnk,
79 .send_cq = lnk->smcibdev->roce_cq_send,
80 .recv_cq = lnk->smcibdev->roce_cq_recv,
81 .srq = NULL,
82 .cap = {
83 .max_send_wr = SMC_WR_BUF_CNT,
84 /* include unsolicited rdma_writes as well,
85 * there are max. 2 RDMA_WRITE per 1 WR_SEND
86 */
87 .max_recv_wr = SMC_WR_BUF_CNT * 3,
88 .max_send_sge = SMC_IB_MAX_SEND_SGE,
89 .max_recv_sge = 1,
90 .max_inline_data = SMC_WR_TX_SIZE,
91 },
92 .sq_sig_type = IB_SIGNAL_REQ_WR,
93 .qp_type = IB_QPT_RC,
94 };
95 int rc;
96
97 lnk->roce_qp = ib_create_qp(lnk->roce_pd, &qp_attr);
98 rc = PTR_ERR_OR_ZERO(lnk->roce_qp);
99 if (IS_ERR(lnk->roce_qp))
100 lnk->roce_qp = NULL;
101 else
102 smc_wr_remember_qp_attr(lnk);
103 return rc;
104}
105
Ursula Brauncd6851f2017-01-09 16:55:18 +0100106/* map a new TX or RX buffer to DMA */
107int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size,
108 struct smc_buf_desc *buf_slot,
109 enum dma_data_direction data_direction)
110{
111 int rc = 0;
112
113 if (buf_slot->dma_addr[SMC_SINGLE_LINK])
114 return rc; /* already mapped */
115 buf_slot->dma_addr[SMC_SINGLE_LINK] =
116 ib_dma_map_single(smcibdev->ibdev, buf_slot->cpu_addr,
117 buf_size, data_direction);
118 if (ib_dma_mapping_error(smcibdev->ibdev,
119 buf_slot->dma_addr[SMC_SINGLE_LINK]))
120 rc = -EIO;
121 return rc;
122}
123
Ursula Brauna4cf0442017-01-09 16:55:14 +0100124static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
125{
126 struct net_device *ndev;
127 int rc;
128
129 rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
130 &smcibdev->gid[ibport - 1], NULL);
131 /* the SMC protocol requires specification of the roce MAC address;
132 * if net_device cannot be determined, it can be derived from gid 0
133 */
134 ndev = smcibdev->ibdev->get_netdev(smcibdev->ibdev, ibport);
135 if (ndev) {
136 memcpy(&smcibdev->mac, ndev->dev_addr, ETH_ALEN);
137 } else if (!rc) {
138 memcpy(&smcibdev->mac[ibport - 1][0],
139 &smcibdev->gid[ibport - 1].raw[8], 3);
140 memcpy(&smcibdev->mac[ibport - 1][3],
141 &smcibdev->gid[ibport - 1].raw[13], 3);
142 smcibdev->mac[ibport - 1][0] &= ~0x02;
143 }
144 return rc;
145}
146
147/* Create an identifier unique for this instance of SMC-R.
148 * The MAC-address of the first active registered IB device
149 * plus a random 2-byte number is used to create this identifier.
150 * This name is delivered to the peer during connection initialization.
151 */
152static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev,
153 u8 ibport)
154{
155 memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1],
156 sizeof(smcibdev->mac[ibport - 1]));
157 get_random_bytes(&local_systemid[0], 2);
158}
159
160bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
161{
162 return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
163}
164
165int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
166{
167 int rc;
168
169 memset(&smcibdev->pattr[ibport - 1], 0,
170 sizeof(smcibdev->pattr[ibport - 1]));
171 rc = ib_query_port(smcibdev->ibdev, ibport,
172 &smcibdev->pattr[ibport - 1]);
173 if (rc)
174 goto out;
175 rc = smc_ib_fill_gid_and_mac(smcibdev, ibport);
176 if (rc)
177 goto out;
178 if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET,
179 sizeof(local_systemid)) &&
180 smc_ib_port_active(smcibdev, ibport))
181 /* create unique system identifier */
182 smc_ib_define_local_systemid(smcibdev, ibport);
183out:
184 return rc;
185}
186
187static struct ib_client smc_ib_client;
188
189/* callback function for ib_register_client() */
190static void smc_ib_add_dev(struct ib_device *ibdev)
191{
192 struct smc_ib_device *smcibdev;
193
194 if (ibdev->node_type != RDMA_NODE_IB_CA)
195 return;
196
197 smcibdev = kzalloc(sizeof(*smcibdev), GFP_KERNEL);
198 if (!smcibdev)
199 return;
200
201 smcibdev->ibdev = ibdev;
202
203 spin_lock(&smc_ib_devices.lock);
204 list_add_tail(&smcibdev->list, &smc_ib_devices.list);
205 spin_unlock(&smc_ib_devices.lock);
206 ib_set_client_data(ibdev, &smc_ib_client, smcibdev);
207}
208
209/* callback function for ib_register_client() */
210static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
211{
212 struct smc_ib_device *smcibdev;
213
214 smcibdev = ib_get_client_data(ibdev, &smc_ib_client);
215 ib_set_client_data(ibdev, &smc_ib_client, NULL);
216 spin_lock(&smc_ib_devices.lock);
217 list_del_init(&smcibdev->list); /* remove from smc_ib_devices */
218 spin_unlock(&smc_ib_devices.lock);
Thomas Richter6812baa2017-01-09 16:55:15 +0100219 smc_pnet_remove_by_ibdev(smcibdev);
Ursula Brauna4cf0442017-01-09 16:55:14 +0100220 kfree(smcibdev);
221}
222
223static struct ib_client smc_ib_client = {
224 .name = "smc_ib",
225 .add = smc_ib_add_dev,
226 .remove = smc_ib_remove_dev,
227};
228
229int __init smc_ib_register_client(void)
230{
231 return ib_register_client(&smc_ib_client);
232}
233
234void smc_ib_unregister_client(void)
235{
236 ib_unregister_client(&smc_ib_client);
237}