net/memif: support zero-copy slave
[dpdk.git] / drivers / net / memif / rte_eth_memif.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
3  */
4
5 #include <stdlib.h>
6 #include <fcntl.h>
7 #include <unistd.h>
8 #include <sys/types.h>
9 #include <sys/socket.h>
10 #include <sys/un.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13 #include <linux/if_ether.h>
14 #include <errno.h>
15 #include <sys/eventfd.h>
16
17 #include <rte_version.h>
18 #include <rte_mbuf.h>
19 #include <rte_ether.h>
20 #include <rte_ethdev_driver.h>
21 #include <rte_ethdev_vdev.h>
22 #include <rte_malloc.h>
23 #include <rte_kvargs.h>
24 #include <rte_bus_vdev.h>
25 #include <rte_string_fns.h>
26 #include <rte_errno.h>
27 #include <rte_memory.h>
28 #include <rte_memzone.h>
29 #include <rte_eal_memconfig.h>
30
31 #include "rte_eth_memif.h"
32 #include "memif_socket.h"
33
34 #define ETH_MEMIF_ID_ARG                "id"
35 #define ETH_MEMIF_ROLE_ARG              "role"
36 #define ETH_MEMIF_PKT_BUFFER_SIZE_ARG   "bsize"
37 #define ETH_MEMIF_RING_SIZE_ARG         "rsize"
38 #define ETH_MEMIF_SOCKET_ARG            "socket"
39 #define ETH_MEMIF_MAC_ARG               "mac"
40 #define ETH_MEMIF_ZC_ARG                "zero-copy"
41 #define ETH_MEMIF_SECRET_ARG            "secret"
42
43 static const char * const valid_arguments[] = {
44         ETH_MEMIF_ID_ARG,
45         ETH_MEMIF_ROLE_ARG,
46         ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
47         ETH_MEMIF_RING_SIZE_ARG,
48         ETH_MEMIF_SOCKET_ARG,
49         ETH_MEMIF_MAC_ARG,
50         ETH_MEMIF_ZC_ARG,
51         ETH_MEMIF_SECRET_ARG,
52         NULL
53 };
54
55 #define MEMIF_MP_SEND_REGION            "memif_mp_send_region"
56
57
58 static int memif_region_init_zc(const struct rte_memseg_list *msl,
59                                 const struct rte_memseg *ms, void *arg);
60
61 const char *
62 memif_version(void)
63 {
64         return ("memif-" RTE_STR(MEMIF_VERSION_MAJOR) "." RTE_STR(MEMIF_VERSION_MINOR));
65 }
66
67 /* Message header to synchronize regions */
68 struct mp_region_msg {
69         char port_name[RTE_DEV_NAME_MAX_LEN];
70         memif_region_index_t idx;
71         memif_region_size_t size;
72 };
73
74 static int
75 memif_mp_send_region(const struct rte_mp_msg *msg, const void *peer)
76 {
77         struct rte_eth_dev *dev;
78         struct pmd_process_private *proc_private;
79         const struct mp_region_msg *msg_param = (const struct mp_region_msg *)msg->param;
80         struct rte_mp_msg reply;
81         struct mp_region_msg *reply_param = (struct mp_region_msg *)reply.param;
82         uint16_t port_id;
83         int ret;
84
85         /* Get requested port */
86         ret = rte_eth_dev_get_port_by_name(msg_param->port_name, &port_id);
87         if (ret) {
88                 MIF_LOG(ERR, "Failed to get port id for %s",
89                         msg_param->port_name);
90                 return -1;
91         }
92         dev = &rte_eth_devices[port_id];
93         proc_private = dev->process_private;
94
95         memset(&reply, 0, sizeof(reply));
96         strlcpy(reply.name, msg->name, sizeof(reply.name));
97         reply_param->idx = msg_param->idx;
98         if (proc_private->regions[msg_param->idx] != NULL) {
99                 reply_param->size = proc_private->regions[msg_param->idx]->region_size;
100                 reply.fds[0] = proc_private->regions[msg_param->idx]->fd;
101                 reply.num_fds = 1;
102         }
103         reply.len_param = sizeof(*reply_param);
104         if (rte_mp_reply(&reply, peer) < 0) {
105                 MIF_LOG(ERR, "Failed to reply to an add region request");
106                 return -1;
107         }
108
109         return 0;
110 }
111
112 /*
113  * Request regions
114  * Called by secondary process, when ports link status goes up.
115  */
116 static int
117 memif_mp_request_regions(struct rte_eth_dev *dev)
118 {
119         int ret, i;
120         struct timespec timeout = {.tv_sec = 5, .tv_nsec = 0};
121         struct rte_mp_msg msg, *reply;
122         struct rte_mp_reply replies;
123         struct mp_region_msg *msg_param = (struct mp_region_msg *)msg.param;
124         struct mp_region_msg *reply_param;
125         struct memif_region *r;
126         struct pmd_process_private *proc_private = dev->process_private;
127         struct pmd_internals *pmd = dev->data->dev_private;
128         /* in case of zero-copy slave, only request region 0 */
129         uint16_t max_region_num = (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) ?
130                                    1 : ETH_MEMIF_MAX_REGION_NUM;
131
132         MIF_LOG(DEBUG, "Requesting memory regions");
133
134         for (i = 0; i < max_region_num; i++) {
135                 /* Prepare the message */
136                 memset(&msg, 0, sizeof(msg));
137                 strlcpy(msg.name, MEMIF_MP_SEND_REGION, sizeof(msg.name));
138                 strlcpy(msg_param->port_name, dev->data->name,
139                         sizeof(msg_param->port_name));
140                 msg_param->idx = i;
141                 msg.len_param = sizeof(*msg_param);
142
143                 /* Send message */
144                 ret = rte_mp_request_sync(&msg, &replies, &timeout);
145                 if (ret < 0 || replies.nb_received != 1) {
146                         MIF_LOG(ERR, "Failed to send mp msg: %d",
147                                 rte_errno);
148                         return -1;
149                 }
150
151                 reply = &replies.msgs[0];
152                 reply_param = (struct mp_region_msg *)reply->param;
153
154                 if (reply_param->size > 0) {
155                         r = rte_zmalloc("region", sizeof(struct memif_region), 0);
156                         if (r == NULL) {
157                                 MIF_LOG(ERR, "Failed to alloc memif region.");
158                                 free(reply);
159                                 return -ENOMEM;
160                         }
161                         r->region_size = reply_param->size;
162                         if (reply->num_fds < 1) {
163                                 MIF_LOG(ERR, "Missing file descriptor.");
164                                 free(reply);
165                                 return -1;
166                         }
167                         r->fd = reply->fds[0];
168                         r->addr = NULL;
169
170                         proc_private->regions[reply_param->idx] = r;
171                         proc_private->regions_num++;
172                 }
173                 free(reply);
174         }
175
176         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
177                 ret = rte_memseg_walk(memif_region_init_zc, (void *)proc_private);
178                 if (ret < 0)
179                         return ret;
180         }
181
182         return memif_connect(dev);
183 }
184
185 static int
186 memif_dev_info(struct rte_eth_dev *dev __rte_unused, struct rte_eth_dev_info *dev_info)
187 {
188         dev_info->max_mac_addrs = 1;
189         dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN;
190         dev_info->max_rx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
191         dev_info->max_tx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
192         dev_info->min_rx_bufsize = 0;
193
194         return 0;
195 }
196
197 static memif_ring_t *
198 memif_get_ring(struct pmd_internals *pmd, struct pmd_process_private *proc_private,
199                memif_ring_type_t type, uint16_t ring_num)
200 {
201         /* rings only in region 0 */
202         void *p = proc_private->regions[0]->addr;
203         int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) *
204             (1 << pmd->run.log2_ring_size);
205
206         p = (uint8_t *)p + (ring_num + type * pmd->run.num_s2m_rings) * ring_size;
207
208         return (memif_ring_t *)p;
209 }
210
211 static memif_region_offset_t
212 memif_get_ring_offset(struct rte_eth_dev *dev, struct memif_queue *mq,
213                       memif_ring_type_t type, uint16_t num)
214 {
215         struct pmd_internals *pmd = dev->data->dev_private;
216         struct pmd_process_private *proc_private = dev->process_private;
217
218         return ((uint8_t *)memif_get_ring(pmd, proc_private, type, num) -
219                 (uint8_t *)proc_private->regions[mq->region]->addr);
220 }
221
222 static memif_ring_t *
223 memif_get_ring_from_queue(struct pmd_process_private *proc_private,
224                           struct memif_queue *mq)
225 {
226         struct memif_region *r;
227
228         r = proc_private->regions[mq->region];
229         if (r == NULL)
230                 return NULL;
231
232         return (memif_ring_t *)((uint8_t *)r->addr + mq->ring_offset);
233 }
234
235 static void *
236 memif_get_buffer(struct pmd_process_private *proc_private, memif_desc_t *d)
237 {
238         return ((uint8_t *)proc_private->regions[d->region]->addr + d->offset);
239 }
240
241 /* Free mbufs received by master */
242 static void
243 memif_free_stored_mbufs(struct pmd_process_private *proc_private, struct memif_queue *mq)
244 {
245         uint16_t mask = (1 << mq->log2_ring_size) - 1;
246         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
247
248         /* FIXME: improve performance */
249         while (mq->last_tail != ring->tail) {
250                 RTE_MBUF_PREFETCH_TO_FREE(mq->buffers[(mq->last_tail + 1) & mask]);
251                 /* Decrement refcnt and free mbuf. (current segment) */
252                 rte_mbuf_refcnt_update(mq->buffers[mq->last_tail & mask], -1);
253                 rte_pktmbuf_free_seg(mq->buffers[mq->last_tail & mask]);
254                 mq->last_tail++;
255         }
256 }
257
258 static int
259 memif_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *cur_tail,
260                     struct rte_mbuf *tail)
261 {
262         /* Check for number-of-segments-overflow */
263         if (unlikely(head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS))
264                 return -EOVERFLOW;
265
266         /* Chain 'tail' onto the old tail */
267         cur_tail->next = tail;
268
269         /* accumulate number of segments and total length. */
270         head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs);
271
272         tail->pkt_len = tail->data_len;
273         head->pkt_len += tail->pkt_len;
274
275         return 0;
276 }
277
278 static uint16_t
279 eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
280 {
281         struct memif_queue *mq = queue;
282         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
283         struct pmd_process_private *proc_private =
284                 rte_eth_devices[mq->in_port].process_private;
285         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
286         uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0;
287         uint16_t n_rx_pkts = 0;
288         uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) -
289                 RTE_PKTMBUF_HEADROOM;
290         uint16_t src_len, src_off, dst_len, dst_off, cp_len;
291         memif_ring_type_t type = mq->type;
292         memif_desc_t *d0;
293         struct rte_mbuf *mbuf, *mbuf_head, *mbuf_tail;
294         uint64_t b;
295         ssize_t size __rte_unused;
296         uint16_t head;
297         int ret;
298         struct rte_eth_link link;
299
300         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
301                 return 0;
302         if (unlikely(ring == NULL)) {
303                 /* Secondary process will attempt to request regions. */
304                 ret = rte_eth_link_get(mq->in_port, &link);
305                 if (ret < 0)
306                         MIF_LOG(ERR, "Failed to get port %u link info: %s",
307                                 mq->in_port, rte_strerror(-ret));
308                 return 0;
309         }
310
311         /* consume interrupt */
312         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0)
313                 size = read(mq->intr_handle.fd, &b, sizeof(b));
314
315         ring_size = 1 << mq->log2_ring_size;
316         mask = ring_size - 1;
317
318         if (type == MEMIF_RING_S2M) {
319                 cur_slot = mq->last_head;
320                 last_slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
321         } else {
322                 cur_slot = mq->last_tail;
323                 last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
324         }
325
326         if (cur_slot == last_slot)
327                 goto refill;
328         n_slots = last_slot - cur_slot;
329
330         while (n_slots && n_rx_pkts < nb_pkts) {
331                 mbuf_head = rte_pktmbuf_alloc(mq->mempool);
332                 if (unlikely(mbuf_head == NULL))
333                         goto no_free_bufs;
334                 mbuf = mbuf_head;
335                 mbuf->port = mq->in_port;
336
337 next_slot:
338                 s0 = cur_slot & mask;
339                 d0 = &ring->desc[s0];
340
341                 src_len = d0->length;
342                 dst_off = 0;
343                 src_off = 0;
344
345                 do {
346                         dst_len = mbuf_size - dst_off;
347                         if (dst_len == 0) {
348                                 dst_off = 0;
349                                 dst_len = mbuf_size;
350
351                                 /* store pointer to tail */
352                                 mbuf_tail = mbuf;
353                                 mbuf = rte_pktmbuf_alloc(mq->mempool);
354                                 if (unlikely(mbuf == NULL))
355                                         goto no_free_bufs;
356                                 mbuf->port = mq->in_port;
357                                 ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
358                                 if (unlikely(ret < 0)) {
359                                         MIF_LOG(ERR, "number-of-segments-overflow");
360                                         rte_pktmbuf_free(mbuf);
361                                         goto no_free_bufs;
362                                 }
363                         }
364                         cp_len = RTE_MIN(dst_len, src_len);
365
366                         rte_pktmbuf_data_len(mbuf) += cp_len;
367                         rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
368                         if (mbuf != mbuf_head)
369                                 rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
370
371                         memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, dst_off),
372                                (uint8_t *)memif_get_buffer(proc_private, d0) + src_off,
373                                cp_len);
374
375                         src_off += cp_len;
376                         dst_off += cp_len;
377                         src_len -= cp_len;
378                 } while (src_len);
379
380                 cur_slot++;
381                 n_slots--;
382
383                 if (d0->flags & MEMIF_DESC_FLAG_NEXT)
384                         goto next_slot;
385
386                 mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
387                 *bufs++ = mbuf_head;
388                 n_rx_pkts++;
389         }
390
391 no_free_bufs:
392         if (type == MEMIF_RING_S2M) {
393                 __atomic_store_n(&ring->tail, cur_slot, __ATOMIC_RELEASE);
394                 mq->last_head = cur_slot;
395         } else {
396                 mq->last_tail = cur_slot;
397         }
398
399 refill:
400         if (type == MEMIF_RING_M2S) {
401                 head = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
402                 n_slots = ring_size - head + mq->last_tail;
403
404                 while (n_slots--) {
405                         s0 = head++ & mask;
406                         d0 = &ring->desc[s0];
407                         d0->length = pmd->run.pkt_buffer_size;
408                 }
409                 __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
410         }
411
412         mq->n_pkts += n_rx_pkts;
413         return n_rx_pkts;
414 }
415
416 static uint16_t
417 eth_memif_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
418 {
419         struct memif_queue *mq = queue;
420         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
421         struct pmd_process_private *proc_private =
422                 rte_eth_devices[mq->in_port].process_private;
423         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
424         uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0, head;
425         uint16_t n_rx_pkts = 0;
426         memif_desc_t *d0;
427         struct rte_mbuf *mbuf, *mbuf_tail;
428         struct rte_mbuf *mbuf_head = NULL;
429         int ret;
430         struct rte_eth_link link;
431
432         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
433                 return 0;
434         if (unlikely(ring == NULL)) {
435                 /* Secondary process will attempt to request regions. */
436                 rte_eth_link_get(mq->in_port, &link);
437                 return 0;
438         }
439
440         /* consume interrupt */
441         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
442                 uint64_t b;
443                 ssize_t size __rte_unused;
444                 size = read(mq->intr_handle.fd, &b, sizeof(b));
445         }
446
447         ring_size = 1 << mq->log2_ring_size;
448         mask = ring_size - 1;
449
450         cur_slot = mq->last_tail;
451         last_slot = ring->tail;
452         if (cur_slot == last_slot)
453                 goto refill;
454         n_slots = last_slot - cur_slot;
455
456         while (n_slots && n_rx_pkts < nb_pkts) {
457                 s0 = cur_slot & mask;
458
459                 d0 = &ring->desc[s0];
460                 mbuf_head = mq->buffers[s0];
461                 mbuf = mbuf_head;
462
463 next_slot:
464                 /* prefetch next descriptor */
465                 if (n_rx_pkts + 1 < nb_pkts)
466                         rte_prefetch0(&ring->desc[(cur_slot + 1) & mask]);
467
468                 mbuf->port = mq->in_port;
469                 rte_pktmbuf_data_len(mbuf) = d0->length;
470                 rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
471
472                 mq->n_bytes += rte_pktmbuf_data_len(mbuf);
473
474                 cur_slot++;
475                 n_slots--;
476                 if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
477                         s0 = cur_slot & mask;
478                         d0 = &ring->desc[s0];
479                         mbuf_tail = mbuf;
480                         mbuf = mq->buffers[s0];
481                         ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
482                         if (unlikely(ret < 0)) {
483                                 MIF_LOG(ERR, "number-of-segments-overflow");
484                                 goto refill;
485                         }
486                         goto next_slot;
487                 }
488
489                 *bufs++ = mbuf_head;
490                 n_rx_pkts++;
491         }
492
493         mq->last_tail = cur_slot;
494
495 /* Supply master with new buffers */
496 refill:
497         head = ring->head;
498         n_slots = ring_size - head + mq->last_tail;
499
500         if (n_slots < 32)
501                 goto no_free_mbufs;
502
503         ret = rte_pktmbuf_alloc_bulk(mq->mempool, &mq->buffers[head & mask], n_slots);
504         if (unlikely(ret < 0))
505                 goto no_free_mbufs;
506
507         while (n_slots--) {
508                 s0 = head++ & mask;
509                 if (n_slots > 0)
510                         rte_prefetch0(mq->buffers[head & mask]);
511                 d0 = &ring->desc[s0];
512                 /* store buffer header */
513                 mbuf = mq->buffers[s0];
514                 /* populate descriptor */
515                 d0->length = rte_pktmbuf_data_room_size(mq->mempool) -
516                                 RTE_PKTMBUF_HEADROOM;
517                 d0->region = 1;
518                 d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
519                         (uint8_t *)proc_private->regions[d0->region]->addr;
520         }
521 no_free_mbufs:
522         rte_mb();
523         ring->head = head;
524
525         mq->n_pkts += n_rx_pkts;
526
527         return n_rx_pkts;
528 }
529
530 static uint16_t
531 eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
532 {
533         struct memif_queue *mq = queue;
534         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
535         struct pmd_process_private *proc_private =
536                 rte_eth_devices[mq->in_port].process_private;
537         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
538         uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0;
539         uint16_t src_len, src_off, dst_len, dst_off, cp_len;
540         memif_ring_type_t type = mq->type;
541         memif_desc_t *d0;
542         struct rte_mbuf *mbuf;
543         struct rte_mbuf *mbuf_head;
544         uint64_t a;
545         ssize_t size;
546         struct rte_eth_link link;
547
548         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
549                 return 0;
550         if (unlikely(ring == NULL)) {
551                 int ret;
552
553                 /* Secondary process will attempt to request regions. */
554                 ret = rte_eth_link_get(mq->in_port, &link);
555                 if (ret < 0)
556                         MIF_LOG(ERR, "Failed to get port %u link info: %s",
557                                 mq->in_port, rte_strerror(-ret));
558                 return 0;
559         }
560
561         ring_size = 1 << mq->log2_ring_size;
562         mask = ring_size - 1;
563
564         n_free = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE) - mq->last_tail;
565         mq->last_tail += n_free;
566
567         if (type == MEMIF_RING_S2M) {
568                 slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
569                 n_free = ring_size - slot + mq->last_tail;
570         } else {
571                 slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
572                 n_free = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE) - slot;
573         }
574
575         while (n_tx_pkts < nb_pkts && n_free) {
576                 mbuf_head = *bufs++;
577                 mbuf = mbuf_head;
578
579                 saved_slot = slot;
580                 d0 = &ring->desc[slot & mask];
581                 dst_off = 0;
582                 dst_len = (type == MEMIF_RING_S2M) ?
583                         pmd->run.pkt_buffer_size : d0->length;
584
585 next_in_chain:
586                 src_off = 0;
587                 src_len = rte_pktmbuf_data_len(mbuf);
588
589                 while (src_len) {
590                         if (dst_len == 0) {
591                                 if (n_free) {
592                                         slot++;
593                                         n_free--;
594                                         d0->flags |= MEMIF_DESC_FLAG_NEXT;
595                                         d0 = &ring->desc[slot & mask];
596                                         dst_off = 0;
597                                         dst_len = (type == MEMIF_RING_S2M) ?
598                                             pmd->run.pkt_buffer_size : d0->length;
599                                         d0->flags = 0;
600                                 } else {
601                                         slot = saved_slot;
602                                         goto no_free_slots;
603                                 }
604                         }
605                         cp_len = RTE_MIN(dst_len, src_len);
606
607                         memcpy((uint8_t *)memif_get_buffer(proc_private, d0) + dst_off,
608                                rte_pktmbuf_mtod_offset(mbuf, void *, src_off),
609                                cp_len);
610
611                         mq->n_bytes += cp_len;
612                         src_off += cp_len;
613                         dst_off += cp_len;
614                         src_len -= cp_len;
615                         dst_len -= cp_len;
616
617                         d0->length = dst_off;
618                 }
619
620                 if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
621                         mbuf = mbuf->next;
622                         goto next_in_chain;
623                 }
624
625                 n_tx_pkts++;
626                 slot++;
627                 n_free--;
628                 rte_pktmbuf_free(mbuf_head);
629         }
630
631 no_free_slots:
632         if (type == MEMIF_RING_S2M)
633                 __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
634         else
635                 __atomic_store_n(&ring->tail, slot, __ATOMIC_RELEASE);
636
637         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
638                 a = 1;
639                 size = write(mq->intr_handle.fd, &a, sizeof(a));
640                 if (unlikely(size < 0)) {
641                         MIF_LOG(WARNING,
642                                 "Failed to send interrupt. %s", strerror(errno));
643                 }
644         }
645
646         mq->n_pkts += n_tx_pkts;
647         return n_tx_pkts;
648 }
649
650
651 static int
652 memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq,
653                 memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask,
654                 uint16_t slot, uint16_t n_free)
655 {
656         memif_desc_t *d0;
657         int used_slots = 1;
658
659 next_in_chain:
660         /* store pointer to mbuf to free it later */
661         mq->buffers[slot & mask] = mbuf;
662         /* Increment refcnt to make sure the buffer is not freed before master
663          * receives it. (current segment)
664          */
665         rte_mbuf_refcnt_update(mbuf, 1);
666         /* populate descriptor */
667         d0 = &ring->desc[slot & mask];
668         d0->length = rte_pktmbuf_data_len(mbuf);
669         /* FIXME: get region index */
670         d0->region = 1;
671         d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
672                 (uint8_t *)proc_private->regions[d0->region]->addr;
673         d0->flags = 0;
674
675         /* check if buffer is chained */
676         if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
677                 if (n_free < 2)
678                         return 0;
679                 /* mark buffer as chained */
680                 d0->flags |= MEMIF_DESC_FLAG_NEXT;
681                 /* advance mbuf */
682                 mbuf = mbuf->next;
683                 /* update counters */
684                 used_slots++;
685                 slot++;
686                 n_free--;
687                 goto next_in_chain;
688         }
689         return used_slots;
690 }
691
692 static uint16_t
693 eth_memif_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
694 {
695         struct memif_queue *mq = queue;
696         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
697         struct pmd_process_private *proc_private =
698                 rte_eth_devices[mq->in_port].process_private;
699         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
700         uint16_t slot, n_free, ring_size, mask, n_tx_pkts = 0;
701         memif_ring_type_t type = mq->type;
702         struct rte_eth_link link;
703
704         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
705                 return 0;
706         if (unlikely(ring == NULL)) {
707                 /* Secondary process will attempt to request regions. */
708                 rte_eth_link_get(mq->in_port, &link);
709                 return 0;
710         }
711
712         ring_size = 1 << mq->log2_ring_size;
713         mask = ring_size - 1;
714
715         /* free mbufs received by master */
716         memif_free_stored_mbufs(proc_private, mq);
717
718         /* ring type always MEMIF_RING_S2M */
719         slot = ring->head;
720         n_free = ring_size - ring->head + mq->last_tail;
721
722         int used_slots;
723
724         while (n_free && (n_tx_pkts < nb_pkts)) {
725                 while ((n_free > 4) && ((nb_pkts - n_tx_pkts) > 4)) {
726                         if ((nb_pkts - n_tx_pkts) > 8) {
727                                 rte_prefetch0(*bufs + 4);
728                                 rte_prefetch0(*bufs + 5);
729                                 rte_prefetch0(*bufs + 6);
730                                 rte_prefetch0(*bufs + 7);
731                         }
732                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
733                                 mask, slot, n_free);
734                         if (unlikely(used_slots < 1))
735                                 goto no_free_slots;
736                         n_tx_pkts++;
737                         slot += used_slots;
738                         n_free -= used_slots;
739
740                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
741                                 mask, slot, n_free);
742                         if (unlikely(used_slots < 1))
743                                 goto no_free_slots;
744                         n_tx_pkts++;
745                         slot += used_slots;
746                         n_free -= used_slots;
747
748                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
749                                 mask, slot, n_free);
750                         if (unlikely(used_slots < 1))
751                                 goto no_free_slots;
752                         n_tx_pkts++;
753                         slot += used_slots;
754                         n_free -= used_slots;
755
756                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
757                                 mask, slot, n_free);
758                         if (unlikely(used_slots < 1))
759                                 goto no_free_slots;
760                         n_tx_pkts++;
761                         slot += used_slots;
762                         n_free -= used_slots;
763                 }
764                 used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
765                         mask, slot, n_free);
766                 if (unlikely(used_slots < 1))
767                         goto no_free_slots;
768                 n_tx_pkts++;
769                 slot += used_slots;
770                 n_free -= used_slots;
771         }
772
773 no_free_slots:
774         rte_mb();
775         /* update ring pointers */
776         if (type == MEMIF_RING_S2M)
777                 ring->head = slot;
778         else
779                 ring->tail = slot;
780
781         /* Send interrupt, if enabled. */
782         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
783                 uint64_t a = 1;
784                 ssize_t size = write(mq->intr_handle.fd, &a, sizeof(a));
785                 if (unlikely(size < 0)) {
786                         MIF_LOG(WARNING,
787                                 "Failed to send interrupt. %s", strerror(errno));
788                 }
789         }
790
791         /* increment queue counters */
792         mq->n_pkts += n_tx_pkts;
793
794         return n_tx_pkts;
795 }
796
797 void
798 memif_free_regions(struct rte_eth_dev *dev)
799 {
800         struct pmd_process_private *proc_private = dev->process_private;
801         struct pmd_internals *pmd = dev->data->dev_private;
802         int i;
803         struct memif_region *r;
804
805         /* regions are allocated contiguously, so it's
806          * enough to loop until 'proc_private->regions_num'
807          */
808         for (i = 0; i < proc_private->regions_num; i++) {
809                 r = proc_private->regions[i];
810                 if (r != NULL) {
811                         /* This is memzone */
812                         if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
813                                 r->addr = NULL;
814                                 if (r->fd > 0)
815                                         close(r->fd);
816                         }
817                         if (r->addr != NULL) {
818                                 munmap(r->addr, r->region_size);
819                                 if (r->fd > 0) {
820                                         close(r->fd);
821                                         r->fd = -1;
822                                 }
823                         }
824                         rte_free(r);
825                         proc_private->regions[i] = NULL;
826                 }
827         }
828         proc_private->regions_num = 0;
829 }
830
831 static int
832 memif_region_init_zc(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
833                      void *arg)
834 {
835         struct pmd_process_private *proc_private = (struct pmd_process_private *)arg;
836         struct memif_region *r;
837
838         if (proc_private->regions_num < 1) {
839                 MIF_LOG(ERR, "Missing descriptor region");
840                 return -1;
841         }
842
843         r = proc_private->regions[proc_private->regions_num - 1];
844
845         if (r->addr != msl->base_va)
846                 r = proc_private->regions[++proc_private->regions_num - 1];
847
848         if (r == NULL) {
849                 r = rte_zmalloc("region", sizeof(struct memif_region), 0);
850                 if (r == NULL) {
851                         MIF_LOG(ERR, "Failed to alloc memif region.");
852                         return -ENOMEM;
853                 }
854
855                 r->addr = msl->base_va;
856                 r->region_size = ms->len;
857                 r->fd = rte_memseg_get_fd(ms);
858                 if (r->fd < 0)
859                         return -1;
860                 r->pkt_buffer_offset = 0;
861
862                 proc_private->regions[proc_private->regions_num - 1] = r;
863         } else {
864                 r->region_size += ms->len;
865         }
866
867         return 0;
868 }
869
870 static int
871 memif_region_init_shm(struct rte_eth_dev *dev, uint8_t has_buffers)
872 {
873         struct pmd_internals *pmd = dev->data->dev_private;
874         struct pmd_process_private *proc_private = dev->process_private;
875         char shm_name[ETH_MEMIF_SHM_NAME_SIZE];
876         int ret = 0;
877         struct memif_region *r;
878
879         if (proc_private->regions_num >= ETH_MEMIF_MAX_REGION_NUM) {
880                 MIF_LOG(ERR, "Too many regions.");
881                 return -1;
882         }
883
884         r = rte_zmalloc("region", sizeof(struct memif_region), 0);
885         if (r == NULL) {
886                 MIF_LOG(ERR, "Failed to alloc memif region.");
887                 return -ENOMEM;
888         }
889
890         /* calculate buffer offset */
891         r->pkt_buffer_offset = (pmd->run.num_s2m_rings + pmd->run.num_m2s_rings) *
892             (sizeof(memif_ring_t) + sizeof(memif_desc_t) *
893             (1 << pmd->run.log2_ring_size));
894
895         r->region_size = r->pkt_buffer_offset;
896         /* if region has buffers, add buffers size to region_size */
897         if (has_buffers == 1)
898                 r->region_size += (uint32_t)(pmd->run.pkt_buffer_size *
899                         (1 << pmd->run.log2_ring_size) *
900                         (pmd->run.num_s2m_rings +
901                          pmd->run.num_m2s_rings));
902
903         memset(shm_name, 0, sizeof(char) * ETH_MEMIF_SHM_NAME_SIZE);
904         snprintf(shm_name, ETH_MEMIF_SHM_NAME_SIZE, "memif_region_%d",
905                  proc_private->regions_num);
906
907         r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING);
908         if (r->fd < 0) {
909                 MIF_LOG(ERR, "Failed to create shm file: %s.", strerror(errno));
910                 ret = -1;
911                 goto error;
912         }
913
914         ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK);
915         if (ret < 0) {
916                 MIF_LOG(ERR, "Failed to add seals to shm file: %s.", strerror(errno));
917                 goto error;
918         }
919
920         ret = ftruncate(r->fd, r->region_size);
921         if (ret < 0) {
922                 MIF_LOG(ERR, "Failed to truncate shm file: %s.", strerror(errno));
923                 goto error;
924         }
925
926         r->addr = mmap(NULL, r->region_size, PROT_READ |
927                        PROT_WRITE, MAP_SHARED, r->fd, 0);
928         if (r->addr == MAP_FAILED) {
929                 MIF_LOG(ERR, "Failed to mmap shm region: %s.", strerror(ret));
930                 ret = -1;
931                 goto error;
932         }
933
934         proc_private->regions[proc_private->regions_num] = r;
935         proc_private->regions_num++;
936
937         return ret;
938
939 error:
940         if (r->fd > 0)
941                 close(r->fd);
942         r->fd = -1;
943
944         return ret;
945 }
946
947 static int
948 memif_regions_init(struct rte_eth_dev *dev)
949 {
950         struct pmd_internals *pmd = dev->data->dev_private;
951         int ret;
952
953         /*
954          * Zero-copy exposes dpdk memory.
955          * Each memseg list will be represented by memif region.
956          * Zero-copy regions indexing: memseg list idx + 1,
957          * as we already have region 0 reserved for descriptors.
958          */
959         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
960                 /* create region idx 0 containing descriptors */
961                 ret = memif_region_init_shm(dev, 0);
962                 if (ret < 0)
963                         return ret;
964                 ret = rte_memseg_walk(memif_region_init_zc, (void *)dev->process_private);
965                 if (ret < 0)
966                         return ret;
967         } else {
968                 /* create one memory region contaning rings and buffers */
969                 ret = memif_region_init_shm(dev, /* has buffers */ 1);
970                 if (ret < 0)
971                         return ret;
972         }
973
974         return 0;
975 }
976
977 static void
978 memif_init_rings(struct rte_eth_dev *dev)
979 {
980         struct pmd_internals *pmd = dev->data->dev_private;
981         struct pmd_process_private *proc_private = dev->process_private;
982         memif_ring_t *ring;
983         int i, j;
984         uint16_t slot;
985
986         for (i = 0; i < pmd->run.num_s2m_rings; i++) {
987                 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_S2M, i);
988                 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
989                 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
990                 ring->cookie = MEMIF_COOKIE;
991                 ring->flags = 0;
992
993                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
994                         continue;
995
996                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
997                         slot = i * (1 << pmd->run.log2_ring_size) + j;
998                         ring->desc[j].region = 0;
999                         ring->desc[j].offset =
1000                                 proc_private->regions[0]->pkt_buffer_offset +
1001                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
1002                         ring->desc[j].length = pmd->run.pkt_buffer_size;
1003                 }
1004         }
1005
1006         for (i = 0; i < pmd->run.num_m2s_rings; i++) {
1007                 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_M2S, i);
1008                 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1009                 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1010                 ring->cookie = MEMIF_COOKIE;
1011                 ring->flags = 0;
1012
1013                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1014                         continue;
1015
1016                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1017                         slot = (i + pmd->run.num_s2m_rings) *
1018                             (1 << pmd->run.log2_ring_size) + j;
1019                         ring->desc[j].region = 0;
1020                         ring->desc[j].offset =
1021                                 proc_private->regions[0]->pkt_buffer_offset +
1022                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
1023                         ring->desc[j].length = pmd->run.pkt_buffer_size;
1024                 }
1025         }
1026 }
1027
1028 /* called only by slave */
1029 static int
1030 memif_init_queues(struct rte_eth_dev *dev)
1031 {
1032         struct pmd_internals *pmd = dev->data->dev_private;
1033         struct memif_queue *mq;
1034         int i;
1035
1036         for (i = 0; i < pmd->run.num_s2m_rings; i++) {
1037                 mq = dev->data->tx_queues[i];
1038                 mq->log2_ring_size = pmd->run.log2_ring_size;
1039                 /* queues located only in region 0 */
1040                 mq->region = 0;
1041                 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_S2M, i);
1042                 mq->last_head = 0;
1043                 mq->last_tail = 0;
1044                 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1045                 if (mq->intr_handle.fd < 0) {
1046                         MIF_LOG(WARNING,
1047                                 "Failed to create eventfd for tx queue %d: %s.", i,
1048                                 strerror(errno));
1049                 }
1050                 mq->buffers = NULL;
1051                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1052                         mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1053                                                   (1 << mq->log2_ring_size), 0);
1054                         if (mq->buffers == NULL)
1055                                 return -ENOMEM;
1056                 }
1057         }
1058
1059         for (i = 0; i < pmd->run.num_m2s_rings; i++) {
1060                 mq = dev->data->rx_queues[i];
1061                 mq->log2_ring_size = pmd->run.log2_ring_size;
1062                 /* queues located only in region 0 */
1063                 mq->region = 0;
1064                 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_M2S, i);
1065                 mq->last_head = 0;
1066                 mq->last_tail = 0;
1067                 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1068                 if (mq->intr_handle.fd < 0) {
1069                         MIF_LOG(WARNING,
1070                                 "Failed to create eventfd for rx queue %d: %s.", i,
1071                                 strerror(errno));
1072                 }
1073                 mq->buffers = NULL;
1074                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1075                         mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1076                                                   (1 << mq->log2_ring_size), 0);
1077                         if (mq->buffers == NULL)
1078                                 return -ENOMEM;
1079                 }
1080         }
1081         return 0;
1082 }
1083
1084 int
1085 memif_init_regions_and_queues(struct rte_eth_dev *dev)
1086 {
1087         int ret;
1088
1089         ret = memif_regions_init(dev);
1090         if (ret < 0)
1091                 return ret;
1092
1093         memif_init_rings(dev);
1094
1095         ret = memif_init_queues(dev);
1096         if (ret < 0)
1097                 return ret;
1098
1099         return 0;
1100 }
1101
1102 int
1103 memif_connect(struct rte_eth_dev *dev)
1104 {
1105         struct pmd_internals *pmd = dev->data->dev_private;
1106         struct pmd_process_private *proc_private = dev->process_private;
1107         struct memif_region *mr;
1108         struct memif_queue *mq;
1109         memif_ring_t *ring;
1110         int i;
1111
1112         for (i = 0; i < proc_private->regions_num; i++) {
1113                 mr = proc_private->regions[i];
1114                 if (mr != NULL) {
1115                         if (mr->addr == NULL) {
1116                                 if (mr->fd < 0)
1117                                         return -1;
1118                                 mr->addr = mmap(NULL, mr->region_size,
1119                                                 PROT_READ | PROT_WRITE,
1120                                                 MAP_SHARED, mr->fd, 0);
1121                                 if (mr->addr == MAP_FAILED) {
1122                                         MIF_LOG(ERR, "mmap failed: %s\n",
1123                                                 strerror(errno));
1124                                         return -1;
1125                                 }
1126                         }
1127                         if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
1128                                 /* close memseg file */
1129                                 close(mr->fd);
1130                                 mr->fd = -1;
1131                         }
1132                 }
1133         }
1134
1135         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1136                 for (i = 0; i < pmd->run.num_s2m_rings; i++) {
1137                         mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
1138                             dev->data->tx_queues[i] : dev->data->rx_queues[i];
1139                         ring = memif_get_ring_from_queue(proc_private, mq);
1140                         if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1141                                 MIF_LOG(ERR, "Wrong ring");
1142                                 return -1;
1143                         }
1144                         __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1145                         __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1146                         mq->last_head = 0;
1147                         mq->last_tail = 0;
1148                         /* enable polling mode */
1149                         if (pmd->role == MEMIF_ROLE_MASTER)
1150                                 ring->flags = MEMIF_RING_FLAG_MASK_INT;
1151                 }
1152                 for (i = 0; i < pmd->run.num_m2s_rings; i++) {
1153                         mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
1154                             dev->data->rx_queues[i] : dev->data->tx_queues[i];
1155                         ring = memif_get_ring_from_queue(proc_private, mq);
1156                         if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1157                                 MIF_LOG(ERR, "Wrong ring");
1158                                 return -1;
1159                         }
1160                         __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1161                         __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1162                         mq->last_head = 0;
1163                         mq->last_tail = 0;
1164                         /* enable polling mode */
1165                         if (pmd->role == MEMIF_ROLE_SLAVE)
1166                                 ring->flags = MEMIF_RING_FLAG_MASK_INT;
1167                 }
1168
1169                 pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
1170                 pmd->flags |= ETH_MEMIF_FLAG_CONNECTED;
1171                 dev->data->dev_link.link_status = ETH_LINK_UP;
1172         }
1173         MIF_LOG(INFO, "Connected.");
1174         return 0;
1175 }
1176
1177 static int
1178 memif_dev_start(struct rte_eth_dev *dev)
1179 {
1180         struct pmd_internals *pmd = dev->data->dev_private;
1181         int ret = 0;
1182
1183         switch (pmd->role) {
1184         case MEMIF_ROLE_SLAVE:
1185                 ret = memif_connect_slave(dev);
1186                 break;
1187         case MEMIF_ROLE_MASTER:
1188                 ret = memif_connect_master(dev);
1189                 break;
1190         default:
1191                 MIF_LOG(ERR, "Unknown role: %d.", pmd->role);
1192                 ret = -1;
1193                 break;
1194         }
1195
1196         return ret;
1197 }
1198
1199 static void
1200 memif_dev_close(struct rte_eth_dev *dev)
1201 {
1202         struct pmd_internals *pmd = dev->data->dev_private;
1203         int i;
1204
1205         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1206                 memif_msg_enq_disconnect(pmd->cc, "Device closed", 0);
1207                 memif_disconnect(dev);
1208
1209                 for (i = 0; i < dev->data->nb_rx_queues; i++)
1210                         (*dev->dev_ops->rx_queue_release)(dev->data->rx_queues[i]);
1211                 for (i = 0; i < dev->data->nb_tx_queues; i++)
1212                         (*dev->dev_ops->tx_queue_release)(dev->data->tx_queues[i]);
1213
1214                 memif_socket_remove_device(dev);
1215         } else {
1216                 memif_disconnect(dev);
1217         }
1218
1219         rte_free(dev->process_private);
1220 }
1221
1222 static int
1223 memif_dev_configure(struct rte_eth_dev *dev)
1224 {
1225         struct pmd_internals *pmd = dev->data->dev_private;
1226
1227         /*
1228          * SLAVE - TXQ
1229          * MASTER - RXQ
1230          */
1231         pmd->cfg.num_s2m_rings = (pmd->role == MEMIF_ROLE_SLAVE) ?
1232                                   dev->data->nb_tx_queues : dev->data->nb_rx_queues;
1233
1234         /*
1235          * SLAVE - RXQ
1236          * MASTER - TXQ
1237          */
1238         pmd->cfg.num_m2s_rings = (pmd->role == MEMIF_ROLE_SLAVE) ?
1239                                   dev->data->nb_rx_queues : dev->data->nb_tx_queues;
1240
1241         return 0;
1242 }
1243
1244 static int
1245 memif_tx_queue_setup(struct rte_eth_dev *dev,
1246                      uint16_t qid,
1247                      uint16_t nb_tx_desc __rte_unused,
1248                      unsigned int socket_id __rte_unused,
1249                      const struct rte_eth_txconf *tx_conf __rte_unused)
1250 {
1251         struct pmd_internals *pmd = dev->data->dev_private;
1252         struct memif_queue *mq;
1253
1254         mq = rte_zmalloc("tx-queue", sizeof(struct memif_queue), 0);
1255         if (mq == NULL) {
1256                 MIF_LOG(ERR, "Failed to allocate tx queue id: %u", qid);
1257                 return -ENOMEM;
1258         }
1259
1260         mq->type =
1261             (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_S2M : MEMIF_RING_M2S;
1262         mq->n_pkts = 0;
1263         mq->n_bytes = 0;
1264         mq->intr_handle.fd = -1;
1265         mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1266         mq->in_port = dev->data->port_id;
1267         dev->data->tx_queues[qid] = mq;
1268
1269         return 0;
1270 }
1271
1272 static int
1273 memif_rx_queue_setup(struct rte_eth_dev *dev,
1274                      uint16_t qid,
1275                      uint16_t nb_rx_desc __rte_unused,
1276                      unsigned int socket_id __rte_unused,
1277                      const struct rte_eth_rxconf *rx_conf __rte_unused,
1278                      struct rte_mempool *mb_pool)
1279 {
1280         struct pmd_internals *pmd = dev->data->dev_private;
1281         struct memif_queue *mq;
1282
1283         mq = rte_zmalloc("rx-queue", sizeof(struct memif_queue), 0);
1284         if (mq == NULL) {
1285                 MIF_LOG(ERR, "Failed to allocate rx queue id: %u", qid);
1286                 return -ENOMEM;
1287         }
1288
1289         mq->type = (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_M2S : MEMIF_RING_S2M;
1290         mq->n_pkts = 0;
1291         mq->n_bytes = 0;
1292         mq->intr_handle.fd = -1;
1293         mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1294         mq->mempool = mb_pool;
1295         mq->in_port = dev->data->port_id;
1296         dev->data->rx_queues[qid] = mq;
1297
1298         return 0;
1299 }
1300
1301 static void
1302 memif_queue_release(void *queue)
1303 {
1304         struct memif_queue *mq = (struct memif_queue *)queue;
1305
1306         if (!mq)
1307                 return;
1308
1309         rte_free(mq);
1310 }
1311
1312 static int
1313 memif_link_update(struct rte_eth_dev *dev,
1314                   int wait_to_complete __rte_unused)
1315 {
1316         struct pmd_process_private *proc_private;
1317
1318         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1319                 proc_private = dev->process_private;
1320                 if (dev->data->dev_link.link_status == ETH_LINK_UP &&
1321                                 proc_private->regions_num == 0) {
1322                         memif_mp_request_regions(dev);
1323                 } else if (dev->data->dev_link.link_status == ETH_LINK_DOWN &&
1324                                 proc_private->regions_num > 0) {
1325                         memif_free_regions(dev);
1326                 }
1327         }
1328         return 0;
1329 }
1330
1331 static int
1332 memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1333 {
1334         struct pmd_internals *pmd = dev->data->dev_private;
1335         struct memif_queue *mq;
1336         int i;
1337         uint8_t tmp, nq;
1338
1339         stats->ipackets = 0;
1340         stats->ibytes = 0;
1341         stats->opackets = 0;
1342         stats->obytes = 0;
1343
1344         tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_s2m_rings :
1345             pmd->run.num_m2s_rings;
1346         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1347             RTE_ETHDEV_QUEUE_STAT_CNTRS;
1348
1349         /* RX stats */
1350         for (i = 0; i < nq; i++) {
1351                 mq = dev->data->rx_queues[i];
1352                 stats->q_ipackets[i] = mq->n_pkts;
1353                 stats->q_ibytes[i] = mq->n_bytes;
1354                 stats->ipackets += mq->n_pkts;
1355                 stats->ibytes += mq->n_bytes;
1356         }
1357
1358         tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_m2s_rings :
1359             pmd->run.num_s2m_rings;
1360         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1361             RTE_ETHDEV_QUEUE_STAT_CNTRS;
1362
1363         /* TX stats */
1364         for (i = 0; i < nq; i++) {
1365                 mq = dev->data->tx_queues[i];
1366                 stats->q_opackets[i] = mq->n_pkts;
1367                 stats->q_obytes[i] = mq->n_bytes;
1368                 stats->opackets += mq->n_pkts;
1369                 stats->obytes += mq->n_bytes;
1370         }
1371         return 0;
1372 }
1373
1374 static int
1375 memif_stats_reset(struct rte_eth_dev *dev)
1376 {
1377         struct pmd_internals *pmd = dev->data->dev_private;
1378         int i;
1379         struct memif_queue *mq;
1380
1381         for (i = 0; i < pmd->run.num_s2m_rings; i++) {
1382                 mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->tx_queues[i] :
1383                     dev->data->rx_queues[i];
1384                 mq->n_pkts = 0;
1385                 mq->n_bytes = 0;
1386         }
1387         for (i = 0; i < pmd->run.num_m2s_rings; i++) {
1388                 mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->rx_queues[i] :
1389                     dev->data->tx_queues[i];
1390                 mq->n_pkts = 0;
1391                 mq->n_bytes = 0;
1392         }
1393
1394         return 0;
1395 }
1396
1397 static int
1398 memif_rx_queue_intr_enable(struct rte_eth_dev *dev __rte_unused,
1399                            uint16_t qid __rte_unused)
1400 {
1401         MIF_LOG(WARNING, "Interrupt mode not supported.");
1402
1403         return -1;
1404 }
1405
1406 static int
1407 memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
1408 {
1409         struct pmd_internals *pmd __rte_unused = dev->data->dev_private;
1410
1411         return 0;
1412 }
1413
1414 static const struct eth_dev_ops ops = {
1415         .dev_start = memif_dev_start,
1416         .dev_close = memif_dev_close,
1417         .dev_infos_get = memif_dev_info,
1418         .dev_configure = memif_dev_configure,
1419         .tx_queue_setup = memif_tx_queue_setup,
1420         .rx_queue_setup = memif_rx_queue_setup,
1421         .rx_queue_release = memif_queue_release,
1422         .tx_queue_release = memif_queue_release,
1423         .rx_queue_intr_enable = memif_rx_queue_intr_enable,
1424         .rx_queue_intr_disable = memif_rx_queue_intr_disable,
1425         .link_update = memif_link_update,
1426         .stats_get = memif_stats_get,
1427         .stats_reset = memif_stats_reset,
1428 };
1429
1430 static int
1431 memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
1432              memif_interface_id_t id, uint32_t flags,
1433              const char *socket_filename,
1434              memif_log2_ring_size_t log2_ring_size,
1435              uint16_t pkt_buffer_size, const char *secret,
1436              struct rte_ether_addr *ether_addr)
1437 {
1438         int ret = 0;
1439         struct rte_eth_dev *eth_dev;
1440         struct rte_eth_dev_data *data;
1441         struct pmd_internals *pmd;
1442         struct pmd_process_private *process_private;
1443         const unsigned int numa_node = vdev->device.numa_node;
1444         const char *name = rte_vdev_device_name(vdev);
1445
1446         eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
1447         if (eth_dev == NULL) {
1448                 MIF_LOG(ERR, "%s: Unable to allocate device struct.", name);
1449                 return -1;
1450         }
1451
1452         process_private = (struct pmd_process_private *)
1453                 rte_zmalloc(name, sizeof(struct pmd_process_private),
1454                             RTE_CACHE_LINE_SIZE);
1455
1456         if (process_private == NULL) {
1457                 MIF_LOG(ERR, "Failed to alloc memory for process private");
1458                 return -1;
1459         }
1460         eth_dev->process_private = process_private;
1461
1462         pmd = eth_dev->data->dev_private;
1463         memset(pmd, 0, sizeof(*pmd));
1464
1465         pmd->id = id;
1466         pmd->flags = flags;
1467         pmd->flags |= ETH_MEMIF_FLAG_DISABLED;
1468         pmd->role = role;
1469         /* Zero-copy flag irelevant to master. */
1470         if (pmd->role == MEMIF_ROLE_MASTER)
1471                 pmd->flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1472
1473         ret = memif_socket_init(eth_dev, socket_filename);
1474         if (ret < 0)
1475                 return ret;
1476
1477         memset(pmd->secret, 0, sizeof(char) * ETH_MEMIF_SECRET_SIZE);
1478         if (secret != NULL)
1479                 strlcpy(pmd->secret, secret, sizeof(pmd->secret));
1480
1481         pmd->cfg.log2_ring_size = log2_ring_size;
1482         /* set in .dev_configure() */
1483         pmd->cfg.num_s2m_rings = 0;
1484         pmd->cfg.num_m2s_rings = 0;
1485
1486         pmd->cfg.pkt_buffer_size = pkt_buffer_size;
1487
1488         data = eth_dev->data;
1489         data->dev_private = pmd;
1490         data->numa_node = numa_node;
1491         data->mac_addrs = ether_addr;
1492
1493         eth_dev->dev_ops = &ops;
1494         eth_dev->device = &vdev->device;
1495         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1496                 eth_dev->rx_pkt_burst = eth_memif_rx_zc;
1497                 eth_dev->tx_pkt_burst = eth_memif_tx_zc;
1498         } else {
1499                 eth_dev->rx_pkt_burst = eth_memif_rx;
1500                 eth_dev->tx_pkt_burst = eth_memif_tx;
1501         }
1502
1503
1504         eth_dev->data->dev_flags &= RTE_ETH_DEV_CLOSE_REMOVE;
1505
1506         rte_eth_dev_probing_finish(eth_dev);
1507
1508         return 0;
1509 }
1510
1511 static int
1512 memif_set_role(const char *key __rte_unused, const char *value,
1513                void *extra_args)
1514 {
1515         enum memif_role_t *role = (enum memif_role_t *)extra_args;
1516
1517         if (strstr(value, "master") != NULL) {
1518                 *role = MEMIF_ROLE_MASTER;
1519         } else if (strstr(value, "slave") != NULL) {
1520                 *role = MEMIF_ROLE_SLAVE;
1521         } else {
1522                 MIF_LOG(ERR, "Unknown role: %s.", value);
1523                 return -EINVAL;
1524         }
1525         return 0;
1526 }
1527
1528 static int
1529 memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args)
1530 {
1531         uint32_t *flags = (uint32_t *)extra_args;
1532
1533         if (strstr(value, "yes") != NULL) {
1534                 if (!rte_mcfg_get_single_file_segments()) {
1535                         MIF_LOG(ERR, "Zero-copy doesn't support multi-file segments.");
1536                         return -ENOTSUP;
1537                 }
1538                 *flags |= ETH_MEMIF_FLAG_ZERO_COPY;
1539         } else if (strstr(value, "no") != NULL) {
1540                 *flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1541         } else {
1542                 MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value);
1543                 return -EINVAL;
1544         }
1545         return 0;
1546 }
1547
1548 static int
1549 memif_set_id(const char *key __rte_unused, const char *value, void *extra_args)
1550 {
1551         memif_interface_id_t *id = (memif_interface_id_t *)extra_args;
1552
1553         /* even if parsing fails, 0 is a valid id */
1554         *id = strtoul(value, NULL, 10);
1555         return 0;
1556 }
1557
1558 static int
1559 memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args)
1560 {
1561         unsigned long tmp;
1562         uint16_t *pkt_buffer_size = (uint16_t *)extra_args;
1563
1564         tmp = strtoul(value, NULL, 10);
1565         if (tmp == 0 || tmp > 0xFFFF) {
1566                 MIF_LOG(ERR, "Invalid buffer size: %s.", value);
1567                 return -EINVAL;
1568         }
1569         *pkt_buffer_size = tmp;
1570         return 0;
1571 }
1572
1573 static int
1574 memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args)
1575 {
1576         unsigned long tmp;
1577         memif_log2_ring_size_t *log2_ring_size =
1578             (memif_log2_ring_size_t *)extra_args;
1579
1580         tmp = strtoul(value, NULL, 10);
1581         if (tmp == 0 || tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE) {
1582                 MIF_LOG(ERR, "Invalid ring size: %s (max %u).",
1583                         value, ETH_MEMIF_MAX_LOG2_RING_SIZE);
1584                 return -EINVAL;
1585         }
1586         *log2_ring_size = tmp;
1587         return 0;
1588 }
1589
1590 /* check if directory exists and if we have permission to read/write */
1591 static int
1592 memif_check_socket_filename(const char *filename)
1593 {
1594         char *dir = NULL, *tmp;
1595         uint32_t idx;
1596         int ret = 0;
1597
1598         if (strlen(filename) >= MEMIF_SOCKET_UN_SIZE) {
1599                 MIF_LOG(ERR, "Unix socket address too long (max 108).");
1600                 return -1;
1601         }
1602
1603         tmp = strrchr(filename, '/');
1604         if (tmp != NULL) {
1605                 idx = tmp - filename;
1606                 dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 1), 0);
1607                 if (dir == NULL) {
1608                         MIF_LOG(ERR, "Failed to allocate memory.");
1609                         return -1;
1610                 }
1611                 strlcpy(dir, filename, sizeof(char) * (idx + 1));
1612         }
1613
1614         if (dir == NULL || (faccessat(-1, dir, F_OK | R_OK |
1615                                         W_OK, AT_EACCESS) < 0)) {
1616                 MIF_LOG(ERR, "Invalid socket directory.");
1617                 ret = -EINVAL;
1618         }
1619
1620         if (dir != NULL)
1621                 rte_free(dir);
1622
1623         return ret;
1624 }
1625
1626 static int
1627 memif_set_socket_filename(const char *key __rte_unused, const char *value,
1628                           void *extra_args)
1629 {
1630         const char **socket_filename = (const char **)extra_args;
1631
1632         *socket_filename = value;
1633         return memif_check_socket_filename(*socket_filename);
1634 }
1635
1636 static int
1637 memif_set_mac(const char *key __rte_unused, const char *value, void *extra_args)
1638 {
1639         struct rte_ether_addr *ether_addr = (struct rte_ether_addr *)extra_args;
1640
1641         if (rte_ether_unformat_addr(value, ether_addr) < 0)
1642                 MIF_LOG(WARNING, "Failed to parse mac '%s'.", value);
1643         return 0;
1644 }
1645
1646 static int
1647 memif_set_secret(const char *key __rte_unused, const char *value, void *extra_args)
1648 {
1649         const char **secret = (const char **)extra_args;
1650
1651         *secret = value;
1652         return 0;
1653 }
1654
1655 static int
1656 rte_pmd_memif_probe(struct rte_vdev_device *vdev)
1657 {
1658         RTE_BUILD_BUG_ON(sizeof(memif_msg_t) != 128);
1659         RTE_BUILD_BUG_ON(sizeof(memif_desc_t) != 16);
1660         int ret = 0;
1661         struct rte_kvargs *kvlist;
1662         const char *name = rte_vdev_device_name(vdev);
1663         enum memif_role_t role = MEMIF_ROLE_SLAVE;
1664         memif_interface_id_t id = 0;
1665         uint16_t pkt_buffer_size = ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE;
1666         memif_log2_ring_size_t log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE;
1667         const char *socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME;
1668         uint32_t flags = 0;
1669         const char *secret = NULL;
1670         struct rte_ether_addr *ether_addr = rte_zmalloc("",
1671                 sizeof(struct rte_ether_addr), 0);
1672         struct rte_eth_dev *eth_dev;
1673
1674         rte_eth_random_addr(ether_addr->addr_bytes);
1675
1676         MIF_LOG(INFO, "Initialize MEMIF: %s.", name);
1677
1678         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1679                 eth_dev = rte_eth_dev_attach_secondary(name);
1680                 if (!eth_dev) {
1681                         MIF_LOG(ERR, "Failed to probe %s", name);
1682                         return -1;
1683                 }
1684
1685                 eth_dev->dev_ops = &ops;
1686                 eth_dev->device = &vdev->device;
1687                 eth_dev->rx_pkt_burst = eth_memif_rx;
1688                 eth_dev->tx_pkt_burst = eth_memif_tx;
1689
1690                 if (!rte_eal_primary_proc_alive(NULL)) {
1691                         MIF_LOG(ERR, "Primary process is missing");
1692                         return -1;
1693                 }
1694
1695                 eth_dev->process_private = (struct pmd_process_private *)
1696                         rte_zmalloc(name,
1697                                 sizeof(struct pmd_process_private),
1698                                 RTE_CACHE_LINE_SIZE);
1699                 if (eth_dev->process_private == NULL) {
1700                         MIF_LOG(ERR,
1701                                 "Failed to alloc memory for process private");
1702                         return -1;
1703                 }
1704
1705                 rte_eth_dev_probing_finish(eth_dev);
1706
1707                 return 0;
1708         }
1709
1710         ret = rte_mp_action_register(MEMIF_MP_SEND_REGION, memif_mp_send_region);
1711         /*
1712          * Primary process can continue probing, but secondary process won't
1713          * be able to get memory regions information
1714          */
1715         if (ret < 0 && rte_errno != EEXIST)
1716                 MIF_LOG(WARNING, "Failed to register mp action callback: %s",
1717                         strerror(rte_errno));
1718
1719         kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments);
1720
1721         /* parse parameters */
1722         if (kvlist != NULL) {
1723                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG,
1724                                          &memif_set_role, &role);
1725                 if (ret < 0)
1726                         goto exit;
1727                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG,
1728                                          &memif_set_id, &id);
1729                 if (ret < 0)
1730                         goto exit;
1731                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
1732                                          &memif_set_bs, &pkt_buffer_size);
1733                 if (ret < 0)
1734                         goto exit;
1735                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG,
1736                                          &memif_set_rs, &log2_ring_size);
1737                 if (ret < 0)
1738                         goto exit;
1739                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ARG,
1740                                          &memif_set_socket_filename,
1741                                          (void *)(&socket_filename));
1742                 if (ret < 0)
1743                         goto exit;
1744                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_MAC_ARG,
1745                                          &memif_set_mac, ether_addr);
1746                 if (ret < 0)
1747                         goto exit;
1748                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG,
1749                                          &memif_set_zc, &flags);
1750                 if (ret < 0)
1751                         goto exit;
1752                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SECRET_ARG,
1753                                          &memif_set_secret, (void *)(&secret));
1754                 if (ret < 0)
1755                         goto exit;
1756         }
1757
1758         /* create interface */
1759         ret = memif_create(vdev, role, id, flags, socket_filename,
1760                            log2_ring_size, pkt_buffer_size, secret, ether_addr);
1761
1762 exit:
1763         if (kvlist != NULL)
1764                 rte_kvargs_free(kvlist);
1765         return ret;
1766 }
1767
1768 static int
1769 rte_pmd_memif_remove(struct rte_vdev_device *vdev)
1770 {
1771         struct rte_eth_dev *eth_dev;
1772
1773         eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
1774         if (eth_dev == NULL)
1775                 return 0;
1776
1777         rte_eth_dev_close(eth_dev->data->port_id);
1778
1779         return 0;
1780 }
1781
1782 static struct rte_vdev_driver pmd_memif_drv = {
1783         .probe = rte_pmd_memif_probe,
1784         .remove = rte_pmd_memif_remove,
1785 };
1786
1787 RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv);
1788
1789 RTE_PMD_REGISTER_PARAM_STRING(net_memif,
1790                               ETH_MEMIF_ID_ARG "=<int>"
1791                               ETH_MEMIF_ROLE_ARG "=master|slave"
1792                               ETH_MEMIF_PKT_BUFFER_SIZE_ARG "=<int>"
1793                               ETH_MEMIF_RING_SIZE_ARG "=<int>"
1794                               ETH_MEMIF_SOCKET_ARG "=<string>"
1795                               ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx"
1796                               ETH_MEMIF_ZC_ARG "=yes|no"
1797                               ETH_MEMIF_SECRET_ARG "=<string>");
1798
1799 int memif_logtype;
1800
1801 RTE_INIT(memif_init_log)
1802 {
1803         memif_logtype = rte_log_register("pmd.net.memif");
1804         if (memif_logtype >= 0)
1805                 rte_log_set_level(memif_logtype, RTE_LOG_NOTICE);
1806 }