net/memif: fix Tx bps statistics for zero-copy
[dpdk.git] / drivers / net / memif / rte_eth_memif.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
3  */
4
5 #include <stdlib.h>
6 #include <fcntl.h>
7 #include <unistd.h>
8 #include <sys/types.h>
9 #include <sys/socket.h>
10 #include <sys/un.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13 #include <linux/if_ether.h>
14 #include <errno.h>
15 #include <sys/eventfd.h>
16
17 #include <rte_version.h>
18 #include <rte_mbuf.h>
19 #include <rte_ether.h>
20 #include <ethdev_driver.h>
21 #include <ethdev_vdev.h>
22 #include <rte_malloc.h>
23 #include <rte_kvargs.h>
24 #include <rte_bus_vdev.h>
25 #include <rte_string_fns.h>
26 #include <rte_errno.h>
27 #include <rte_memory.h>
28 #include <rte_memzone.h>
29 #include <rte_eal_memconfig.h>
30
31 #include "rte_eth_memif.h"
32 #include "memif_socket.h"
33
34 #define ETH_MEMIF_ID_ARG                "id"
35 #define ETH_MEMIF_ROLE_ARG              "role"
36 #define ETH_MEMIF_PKT_BUFFER_SIZE_ARG   "bsize"
37 #define ETH_MEMIF_RING_SIZE_ARG         "rsize"
38 #define ETH_MEMIF_SOCKET_ARG            "socket"
39 #define ETH_MEMIF_SOCKET_ABSTRACT_ARG   "socket-abstract"
40 #define ETH_MEMIF_MAC_ARG               "mac"
41 #define ETH_MEMIF_ZC_ARG                "zero-copy"
42 #define ETH_MEMIF_SECRET_ARG            "secret"
43
44 static const char * const valid_arguments[] = {
45         ETH_MEMIF_ID_ARG,
46         ETH_MEMIF_ROLE_ARG,
47         ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
48         ETH_MEMIF_RING_SIZE_ARG,
49         ETH_MEMIF_SOCKET_ARG,
50         ETH_MEMIF_SOCKET_ABSTRACT_ARG,
51         ETH_MEMIF_MAC_ARG,
52         ETH_MEMIF_ZC_ARG,
53         ETH_MEMIF_SECRET_ARG,
54         NULL
55 };
56
57 static const struct rte_eth_link pmd_link = {
58         .link_speed = ETH_SPEED_NUM_10G,
59         .link_duplex = ETH_LINK_FULL_DUPLEX,
60         .link_status = ETH_LINK_DOWN,
61         .link_autoneg = ETH_LINK_AUTONEG
62 };
63
64 #define MEMIF_MP_SEND_REGION            "memif_mp_send_region"
65
66
67 static int memif_region_init_zc(const struct rte_memseg_list *msl,
68                                 const struct rte_memseg *ms, void *arg);
69
70 const char *
71 memif_version(void)
72 {
73         return ("memif-" RTE_STR(MEMIF_VERSION_MAJOR) "." RTE_STR(MEMIF_VERSION_MINOR));
74 }
75
76 /* Message header to synchronize regions */
77 struct mp_region_msg {
78         char port_name[RTE_DEV_NAME_MAX_LEN];
79         memif_region_index_t idx;
80         memif_region_size_t size;
81 };
82
83 static int
84 memif_mp_send_region(const struct rte_mp_msg *msg, const void *peer)
85 {
86         struct rte_eth_dev *dev;
87         struct pmd_process_private *proc_private;
88         const struct mp_region_msg *msg_param = (const struct mp_region_msg *)msg->param;
89         struct rte_mp_msg reply;
90         struct mp_region_msg *reply_param = (struct mp_region_msg *)reply.param;
91         uint16_t port_id;
92         int ret;
93
94         /* Get requested port */
95         ret = rte_eth_dev_get_port_by_name(msg_param->port_name, &port_id);
96         if (ret) {
97                 MIF_LOG(ERR, "Failed to get port id for %s",
98                         msg_param->port_name);
99                 return -1;
100         }
101         dev = &rte_eth_devices[port_id];
102         proc_private = dev->process_private;
103
104         memset(&reply, 0, sizeof(reply));
105         strlcpy(reply.name, msg->name, sizeof(reply.name));
106         reply_param->idx = msg_param->idx;
107         if (proc_private->regions[msg_param->idx] != NULL) {
108                 reply_param->size = proc_private->regions[msg_param->idx]->region_size;
109                 reply.fds[0] = proc_private->regions[msg_param->idx]->fd;
110                 reply.num_fds = 1;
111         }
112         reply.len_param = sizeof(*reply_param);
113         if (rte_mp_reply(&reply, peer) < 0) {
114                 MIF_LOG(ERR, "Failed to reply to an add region request");
115                 return -1;
116         }
117
118         return 0;
119 }
120
121 /*
122  * Request regions
123  * Called by secondary process, when ports link status goes up.
124  */
125 static int
126 memif_mp_request_regions(struct rte_eth_dev *dev)
127 {
128         int ret, i;
129         struct timespec timeout = {.tv_sec = 5, .tv_nsec = 0};
130         struct rte_mp_msg msg, *reply;
131         struct rte_mp_reply replies;
132         struct mp_region_msg *msg_param = (struct mp_region_msg *)msg.param;
133         struct mp_region_msg *reply_param;
134         struct memif_region *r;
135         struct pmd_process_private *proc_private = dev->process_private;
136         struct pmd_internals *pmd = dev->data->dev_private;
137         /* in case of zero-copy client, only request region 0 */
138         uint16_t max_region_num = (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) ?
139                                    1 : ETH_MEMIF_MAX_REGION_NUM;
140
141         MIF_LOG(DEBUG, "Requesting memory regions");
142
143         for (i = 0; i < max_region_num; i++) {
144                 /* Prepare the message */
145                 memset(&msg, 0, sizeof(msg));
146                 strlcpy(msg.name, MEMIF_MP_SEND_REGION, sizeof(msg.name));
147                 strlcpy(msg_param->port_name, dev->data->name,
148                         sizeof(msg_param->port_name));
149                 msg_param->idx = i;
150                 msg.len_param = sizeof(*msg_param);
151
152                 /* Send message */
153                 ret = rte_mp_request_sync(&msg, &replies, &timeout);
154                 if (ret < 0 || replies.nb_received != 1) {
155                         MIF_LOG(ERR, "Failed to send mp msg: %d",
156                                 rte_errno);
157                         return -1;
158                 }
159
160                 reply = &replies.msgs[0];
161                 reply_param = (struct mp_region_msg *)reply->param;
162
163                 if (reply_param->size > 0) {
164                         r = rte_zmalloc("region", sizeof(struct memif_region), 0);
165                         if (r == NULL) {
166                                 MIF_LOG(ERR, "Failed to alloc memif region.");
167                                 free(reply);
168                                 return -ENOMEM;
169                         }
170                         r->region_size = reply_param->size;
171                         if (reply->num_fds < 1) {
172                                 MIF_LOG(ERR, "Missing file descriptor.");
173                                 free(reply);
174                                 return -1;
175                         }
176                         r->fd = reply->fds[0];
177                         r->addr = NULL;
178
179                         proc_private->regions[reply_param->idx] = r;
180                         proc_private->regions_num++;
181                 }
182                 free(reply);
183         }
184
185         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
186                 ret = rte_memseg_walk(memif_region_init_zc, (void *)proc_private);
187                 if (ret < 0)
188                         return ret;
189         }
190
191         return memif_connect(dev);
192 }
193
194 static int
195 memif_dev_info(struct rte_eth_dev *dev __rte_unused, struct rte_eth_dev_info *dev_info)
196 {
197         dev_info->max_mac_addrs = 1;
198         dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN;
199         dev_info->max_rx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
200         dev_info->max_tx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
201         dev_info->min_rx_bufsize = 0;
202
203         return 0;
204 }
205
206 static memif_ring_t *
207 memif_get_ring(struct pmd_internals *pmd, struct pmd_process_private *proc_private,
208                memif_ring_type_t type, uint16_t ring_num)
209 {
210         /* rings only in region 0 */
211         void *p = proc_private->regions[0]->addr;
212         int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) *
213             (1 << pmd->run.log2_ring_size);
214
215         p = (uint8_t *)p + (ring_num + type * pmd->run.num_c2s_rings) * ring_size;
216
217         return (memif_ring_t *)p;
218 }
219
220 static memif_region_offset_t
221 memif_get_ring_offset(struct rte_eth_dev *dev, struct memif_queue *mq,
222                       memif_ring_type_t type, uint16_t num)
223 {
224         struct pmd_internals *pmd = dev->data->dev_private;
225         struct pmd_process_private *proc_private = dev->process_private;
226
227         return ((uint8_t *)memif_get_ring(pmd, proc_private, type, num) -
228                 (uint8_t *)proc_private->regions[mq->region]->addr);
229 }
230
231 static memif_ring_t *
232 memif_get_ring_from_queue(struct pmd_process_private *proc_private,
233                           struct memif_queue *mq)
234 {
235         struct memif_region *r;
236
237         r = proc_private->regions[mq->region];
238         if (r == NULL)
239                 return NULL;
240
241         return (memif_ring_t *)((uint8_t *)r->addr + mq->ring_offset);
242 }
243
244 static void *
245 memif_get_buffer(struct pmd_process_private *proc_private, memif_desc_t *d)
246 {
247         return ((uint8_t *)proc_private->regions[d->region]->addr + d->offset);
248 }
249
250 /* Free mbufs received by server */
251 static void
252 memif_free_stored_mbufs(struct pmd_process_private *proc_private, struct memif_queue *mq)
253 {
254         uint16_t cur_tail;
255         uint16_t mask = (1 << mq->log2_ring_size) - 1;
256         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
257
258         /* FIXME: improve performance */
259         /* The ring->tail acts as a guard variable between Tx and Rx
260          * threads, so using load-acquire pairs with store-release
261          * in function eth_memif_rx for C2S queues.
262          */
263         cur_tail = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
264         while (mq->last_tail != cur_tail) {
265                 RTE_MBUF_PREFETCH_TO_FREE(mq->buffers[(mq->last_tail + 1) & mask]);
266                 /* Decrement refcnt and free mbuf. (current segment) */
267                 rte_mbuf_refcnt_update(mq->buffers[mq->last_tail & mask], -1);
268                 rte_pktmbuf_free_seg(mq->buffers[mq->last_tail & mask]);
269                 mq->last_tail++;
270         }
271 }
272
273 static int
274 memif_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *cur_tail,
275                     struct rte_mbuf *tail)
276 {
277         /* Check for number-of-segments-overflow */
278         if (unlikely(head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS))
279                 return -EOVERFLOW;
280
281         /* Chain 'tail' onto the old tail */
282         cur_tail->next = tail;
283
284         /* accumulate number of segments and total length. */
285         head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs);
286
287         tail->pkt_len = tail->data_len;
288         head->pkt_len += tail->pkt_len;
289
290         return 0;
291 }
292
293 static uint16_t
294 eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
295 {
296         struct memif_queue *mq = queue;
297         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
298         struct pmd_process_private *proc_private =
299                 rte_eth_devices[mq->in_port].process_private;
300         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
301         uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0;
302         uint16_t n_rx_pkts = 0;
303         uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) -
304                 RTE_PKTMBUF_HEADROOM;
305         uint16_t src_len, src_off, dst_len, dst_off, cp_len;
306         memif_ring_type_t type = mq->type;
307         memif_desc_t *d0;
308         struct rte_mbuf *mbuf, *mbuf_head, *mbuf_tail;
309         uint64_t b;
310         ssize_t size __rte_unused;
311         uint16_t head;
312         int ret;
313         struct rte_eth_link link;
314
315         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
316                 return 0;
317         if (unlikely(ring == NULL)) {
318                 /* Secondary process will attempt to request regions. */
319                 ret = rte_eth_link_get(mq->in_port, &link);
320                 if (ret < 0)
321                         MIF_LOG(ERR, "Failed to get port %u link info: %s",
322                                 mq->in_port, rte_strerror(-ret));
323                 return 0;
324         }
325
326         /* consume interrupt */
327         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0)
328                 size = read(mq->intr_handle.fd, &b, sizeof(b));
329
330         ring_size = 1 << mq->log2_ring_size;
331         mask = ring_size - 1;
332
333         if (type == MEMIF_RING_C2S) {
334                 cur_slot = mq->last_head;
335                 last_slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
336         } else {
337                 cur_slot = mq->last_tail;
338                 last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
339         }
340
341         if (cur_slot == last_slot)
342                 goto refill;
343         n_slots = last_slot - cur_slot;
344
345         while (n_slots && n_rx_pkts < nb_pkts) {
346                 mbuf_head = rte_pktmbuf_alloc(mq->mempool);
347                 if (unlikely(mbuf_head == NULL))
348                         goto no_free_bufs;
349                 mbuf = mbuf_head;
350                 mbuf->port = mq->in_port;
351
352 next_slot:
353                 s0 = cur_slot & mask;
354                 d0 = &ring->desc[s0];
355
356                 src_len = d0->length;
357                 dst_off = 0;
358                 src_off = 0;
359
360                 do {
361                         dst_len = mbuf_size - dst_off;
362                         if (dst_len == 0) {
363                                 dst_off = 0;
364                                 dst_len = mbuf_size;
365
366                                 /* store pointer to tail */
367                                 mbuf_tail = mbuf;
368                                 mbuf = rte_pktmbuf_alloc(mq->mempool);
369                                 if (unlikely(mbuf == NULL))
370                                         goto no_free_bufs;
371                                 mbuf->port = mq->in_port;
372                                 ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
373                                 if (unlikely(ret < 0)) {
374                                         MIF_LOG(ERR, "number-of-segments-overflow");
375                                         rte_pktmbuf_free(mbuf);
376                                         goto no_free_bufs;
377                                 }
378                         }
379                         cp_len = RTE_MIN(dst_len, src_len);
380
381                         rte_pktmbuf_data_len(mbuf) += cp_len;
382                         rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
383                         if (mbuf != mbuf_head)
384                                 rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
385
386                         memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, dst_off),
387                                (uint8_t *)memif_get_buffer(proc_private, d0) + src_off,
388                                cp_len);
389
390                         src_off += cp_len;
391                         dst_off += cp_len;
392                         src_len -= cp_len;
393                 } while (src_len);
394
395                 cur_slot++;
396                 n_slots--;
397
398                 if (d0->flags & MEMIF_DESC_FLAG_NEXT)
399                         goto next_slot;
400
401                 mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
402                 *bufs++ = mbuf_head;
403                 n_rx_pkts++;
404         }
405
406 no_free_bufs:
407         if (type == MEMIF_RING_C2S) {
408                 __atomic_store_n(&ring->tail, cur_slot, __ATOMIC_RELEASE);
409                 mq->last_head = cur_slot;
410         } else {
411                 mq->last_tail = cur_slot;
412         }
413
414 refill:
415         if (type == MEMIF_RING_S2C) {
416                 /* ring->head is updated by the receiver and this function
417                  * is called in the context of receiver thread. The loads in
418                  * the receiver do not need to synchronize with its own stores.
419                  */
420                 head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
421                 n_slots = ring_size - head + mq->last_tail;
422
423                 while (n_slots--) {
424                         s0 = head++ & mask;
425                         d0 = &ring->desc[s0];
426                         d0->length = pmd->run.pkt_buffer_size;
427                 }
428                 __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
429         }
430
431         mq->n_pkts += n_rx_pkts;
432         return n_rx_pkts;
433 }
434
435 static uint16_t
436 eth_memif_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
437 {
438         struct memif_queue *mq = queue;
439         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
440         struct pmd_process_private *proc_private =
441                 rte_eth_devices[mq->in_port].process_private;
442         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
443         uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0, head;
444         uint16_t n_rx_pkts = 0;
445         memif_desc_t *d0;
446         struct rte_mbuf *mbuf, *mbuf_tail;
447         struct rte_mbuf *mbuf_head = NULL;
448         int ret;
449         struct rte_eth_link link;
450
451         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
452                 return 0;
453         if (unlikely(ring == NULL)) {
454                 /* Secondary process will attempt to request regions. */
455                 rte_eth_link_get(mq->in_port, &link);
456                 return 0;
457         }
458
459         /* consume interrupt */
460         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
461                 uint64_t b;
462                 ssize_t size __rte_unused;
463                 size = read(mq->intr_handle.fd, &b, sizeof(b));
464         }
465
466         ring_size = 1 << mq->log2_ring_size;
467         mask = ring_size - 1;
468
469         cur_slot = mq->last_tail;
470         /* The ring->tail acts as a guard variable between Tx and Rx
471          * threads, so using load-acquire pairs with store-release
472          * to synchronize it between threads.
473          */
474         last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
475         if (cur_slot == last_slot)
476                 goto refill;
477         n_slots = last_slot - cur_slot;
478
479         while (n_slots && n_rx_pkts < nb_pkts) {
480                 s0 = cur_slot & mask;
481
482                 d0 = &ring->desc[s0];
483                 mbuf_head = mq->buffers[s0];
484                 mbuf = mbuf_head;
485
486 next_slot:
487                 /* prefetch next descriptor */
488                 if (n_rx_pkts + 1 < nb_pkts)
489                         rte_prefetch0(&ring->desc[(cur_slot + 1) & mask]);
490
491                 mbuf->port = mq->in_port;
492                 rte_pktmbuf_data_len(mbuf) = d0->length;
493                 rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
494
495                 mq->n_bytes += rte_pktmbuf_data_len(mbuf);
496
497                 cur_slot++;
498                 n_slots--;
499                 if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
500                         s0 = cur_slot & mask;
501                         d0 = &ring->desc[s0];
502                         mbuf_tail = mbuf;
503                         mbuf = mq->buffers[s0];
504                         ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
505                         if (unlikely(ret < 0)) {
506                                 MIF_LOG(ERR, "number-of-segments-overflow");
507                                 goto refill;
508                         }
509                         goto next_slot;
510                 }
511
512                 *bufs++ = mbuf_head;
513                 n_rx_pkts++;
514         }
515
516         mq->last_tail = cur_slot;
517
518 /* Supply server with new buffers */
519 refill:
520         /* ring->head is updated by the receiver and this function
521          * is called in the context of receiver thread. The loads in
522          * the receiver do not need to synchronize with its own stores.
523          */
524         head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
525         n_slots = ring_size - head + mq->last_tail;
526
527         if (n_slots < 32)
528                 goto no_free_mbufs;
529
530         ret = rte_pktmbuf_alloc_bulk(mq->mempool, &mq->buffers[head & mask], n_slots);
531         if (unlikely(ret < 0))
532                 goto no_free_mbufs;
533
534         while (n_slots--) {
535                 s0 = head++ & mask;
536                 if (n_slots > 0)
537                         rte_prefetch0(mq->buffers[head & mask]);
538                 d0 = &ring->desc[s0];
539                 /* store buffer header */
540                 mbuf = mq->buffers[s0];
541                 /* populate descriptor */
542                 d0->length = rte_pktmbuf_data_room_size(mq->mempool) -
543                                 RTE_PKTMBUF_HEADROOM;
544                 d0->region = 1;
545                 d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
546                         (uint8_t *)proc_private->regions[d0->region]->addr;
547         }
548 no_free_mbufs:
549         /* The ring->head acts as a guard variable between Tx and Rx
550          * threads, so using store-release pairs with load-acquire
551          * in function eth_memif_tx.
552          */
553         __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
554
555         mq->n_pkts += n_rx_pkts;
556
557         return n_rx_pkts;
558 }
559
560 static uint16_t
561 eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
562 {
563         struct memif_queue *mq = queue;
564         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
565         struct pmd_process_private *proc_private =
566                 rte_eth_devices[mq->in_port].process_private;
567         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
568         uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0;
569         uint16_t src_len, src_off, dst_len, dst_off, cp_len;
570         memif_ring_type_t type = mq->type;
571         memif_desc_t *d0;
572         struct rte_mbuf *mbuf;
573         struct rte_mbuf *mbuf_head;
574         uint64_t a;
575         ssize_t size;
576         struct rte_eth_link link;
577
578         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
579                 return 0;
580         if (unlikely(ring == NULL)) {
581                 int ret;
582
583                 /* Secondary process will attempt to request regions. */
584                 ret = rte_eth_link_get(mq->in_port, &link);
585                 if (ret < 0)
586                         MIF_LOG(ERR, "Failed to get port %u link info: %s",
587                                 mq->in_port, rte_strerror(-ret));
588                 return 0;
589         }
590
591         ring_size = 1 << mq->log2_ring_size;
592         mask = ring_size - 1;
593
594         if (type == MEMIF_RING_C2S) {
595                 /* For C2S queues ring->head is updated by the sender and
596                  * this function is called in the context of sending thread.
597                  * The loads in the sender do not need to synchronize with
598                  * its own stores. Hence, the following load can be a
599                  * relaxed load.
600                  */
601                 slot = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
602                 n_free = ring_size - slot +
603                                 __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
604         } else {
605                 /* For S2C queues ring->tail is updated by the sender and
606                  * this function is called in the context of sending thread.
607                  * The loads in the sender do not need to synchronize with
608                  * its own stores. Hence, the following load can be a
609                  * relaxed load.
610                  */
611                 slot = __atomic_load_n(&ring->tail, __ATOMIC_RELAXED);
612                 n_free = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE) - slot;
613         }
614
615         while (n_tx_pkts < nb_pkts && n_free) {
616                 mbuf_head = *bufs++;
617                 mbuf = mbuf_head;
618
619                 saved_slot = slot;
620                 d0 = &ring->desc[slot & mask];
621                 dst_off = 0;
622                 dst_len = (type == MEMIF_RING_C2S) ?
623                         pmd->run.pkt_buffer_size : d0->length;
624
625 next_in_chain:
626                 src_off = 0;
627                 src_len = rte_pktmbuf_data_len(mbuf);
628
629                 while (src_len) {
630                         if (dst_len == 0) {
631                                 if (n_free) {
632                                         slot++;
633                                         n_free--;
634                                         d0->flags |= MEMIF_DESC_FLAG_NEXT;
635                                         d0 = &ring->desc[slot & mask];
636                                         dst_off = 0;
637                                         dst_len = (type == MEMIF_RING_C2S) ?
638                                             pmd->run.pkt_buffer_size : d0->length;
639                                         d0->flags = 0;
640                                 } else {
641                                         slot = saved_slot;
642                                         goto no_free_slots;
643                                 }
644                         }
645                         cp_len = RTE_MIN(dst_len, src_len);
646
647                         memcpy((uint8_t *)memif_get_buffer(proc_private, d0) + dst_off,
648                                rte_pktmbuf_mtod_offset(mbuf, void *, src_off),
649                                cp_len);
650
651                         mq->n_bytes += cp_len;
652                         src_off += cp_len;
653                         dst_off += cp_len;
654                         src_len -= cp_len;
655                         dst_len -= cp_len;
656
657                         d0->length = dst_off;
658                 }
659
660                 if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
661                         mbuf = mbuf->next;
662                         goto next_in_chain;
663                 }
664
665                 n_tx_pkts++;
666                 slot++;
667                 n_free--;
668                 rte_pktmbuf_free(mbuf_head);
669         }
670
671 no_free_slots:
672         if (type == MEMIF_RING_C2S)
673                 __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
674         else
675                 __atomic_store_n(&ring->tail, slot, __ATOMIC_RELEASE);
676
677         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
678                 a = 1;
679                 size = write(mq->intr_handle.fd, &a, sizeof(a));
680                 if (unlikely(size < 0)) {
681                         MIF_LOG(WARNING,
682                                 "Failed to send interrupt. %s", strerror(errno));
683                 }
684         }
685
686         mq->n_pkts += n_tx_pkts;
687         return n_tx_pkts;
688 }
689
690
691 static int
692 memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq,
693                 memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask,
694                 uint16_t slot, uint16_t n_free)
695 {
696         memif_desc_t *d0;
697         int used_slots = 1;
698
699 next_in_chain:
700         /* store pointer to mbuf to free it later */
701         mq->buffers[slot & mask] = mbuf;
702         /* Increment refcnt to make sure the buffer is not freed before server
703          * receives it. (current segment)
704          */
705         rte_mbuf_refcnt_update(mbuf, 1);
706         /* populate descriptor */
707         d0 = &ring->desc[slot & mask];
708         d0->length = rte_pktmbuf_data_len(mbuf);
709         mq->n_bytes += rte_pktmbuf_data_len(mbuf);
710         /* FIXME: get region index */
711         d0->region = 1;
712         d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
713                 (uint8_t *)proc_private->regions[d0->region]->addr;
714         d0->flags = 0;
715
716         /* check if buffer is chained */
717         if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
718                 if (n_free < 2)
719                         return 0;
720                 /* mark buffer as chained */
721                 d0->flags |= MEMIF_DESC_FLAG_NEXT;
722                 /* advance mbuf */
723                 mbuf = mbuf->next;
724                 /* update counters */
725                 used_slots++;
726                 slot++;
727                 n_free--;
728                 goto next_in_chain;
729         }
730         return used_slots;
731 }
732
733 static uint16_t
734 eth_memif_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
735 {
736         struct memif_queue *mq = queue;
737         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
738         struct pmd_process_private *proc_private =
739                 rte_eth_devices[mq->in_port].process_private;
740         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
741         uint16_t slot, n_free, ring_size, mask, n_tx_pkts = 0;
742         struct rte_eth_link link;
743
744         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
745                 return 0;
746         if (unlikely(ring == NULL)) {
747                 /* Secondary process will attempt to request regions. */
748                 rte_eth_link_get(mq->in_port, &link);
749                 return 0;
750         }
751
752         ring_size = 1 << mq->log2_ring_size;
753         mask = ring_size - 1;
754
755         /* free mbufs received by server */
756         memif_free_stored_mbufs(proc_private, mq);
757
758         /* ring type always MEMIF_RING_C2S */
759         /* For C2S queues ring->head is updated by the sender and
760          * this function is called in the context of sending thread.
761          * The loads in the sender do not need to synchronize with
762          * its own stores. Hence, the following load can be a
763          * relaxed load.
764          */
765         slot = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
766         n_free = ring_size - slot + mq->last_tail;
767
768         int used_slots;
769
770         while (n_free && (n_tx_pkts < nb_pkts)) {
771                 while ((n_free > 4) && ((nb_pkts - n_tx_pkts) > 4)) {
772                         if ((nb_pkts - n_tx_pkts) > 8) {
773                                 rte_prefetch0(*bufs + 4);
774                                 rte_prefetch0(*bufs + 5);
775                                 rte_prefetch0(*bufs + 6);
776                                 rte_prefetch0(*bufs + 7);
777                         }
778                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
779                                 mask, slot, n_free);
780                         if (unlikely(used_slots < 1))
781                                 goto no_free_slots;
782                         n_tx_pkts++;
783                         slot += used_slots;
784                         n_free -= used_slots;
785
786                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
787                                 mask, slot, n_free);
788                         if (unlikely(used_slots < 1))
789                                 goto no_free_slots;
790                         n_tx_pkts++;
791                         slot += used_slots;
792                         n_free -= used_slots;
793
794                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
795                                 mask, slot, n_free);
796                         if (unlikely(used_slots < 1))
797                                 goto no_free_slots;
798                         n_tx_pkts++;
799                         slot += used_slots;
800                         n_free -= used_slots;
801
802                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
803                                 mask, slot, n_free);
804                         if (unlikely(used_slots < 1))
805                                 goto no_free_slots;
806                         n_tx_pkts++;
807                         slot += used_slots;
808                         n_free -= used_slots;
809                 }
810                 used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
811                         mask, slot, n_free);
812                 if (unlikely(used_slots < 1))
813                         goto no_free_slots;
814                 n_tx_pkts++;
815                 slot += used_slots;
816                 n_free -= used_slots;
817         }
818
819 no_free_slots:
820         /* ring type always MEMIF_RING_C2S */
821         /* The ring->head acts as a guard variable between Tx and Rx
822          * threads, so using store-release pairs with load-acquire
823          * in function eth_memif_rx for C2S rings.
824          */
825         __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
826
827         /* Send interrupt, if enabled. */
828         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
829                 uint64_t a = 1;
830                 ssize_t size = write(mq->intr_handle.fd, &a, sizeof(a));
831                 if (unlikely(size < 0)) {
832                         MIF_LOG(WARNING,
833                                 "Failed to send interrupt. %s", strerror(errno));
834                 }
835         }
836
837         /* increment queue counters */
838         mq->n_pkts += n_tx_pkts;
839
840         return n_tx_pkts;
841 }
842
843 void
844 memif_free_regions(struct rte_eth_dev *dev)
845 {
846         struct pmd_process_private *proc_private = dev->process_private;
847         struct pmd_internals *pmd = dev->data->dev_private;
848         int i;
849         struct memif_region *r;
850
851         /* regions are allocated contiguously, so it's
852          * enough to loop until 'proc_private->regions_num'
853          */
854         for (i = 0; i < proc_private->regions_num; i++) {
855                 r = proc_private->regions[i];
856                 if (r != NULL) {
857                         /* This is memzone */
858                         if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
859                                 r->addr = NULL;
860                                 if (r->fd > 0)
861                                         close(r->fd);
862                         }
863                         if (r->addr != NULL) {
864                                 munmap(r->addr, r->region_size);
865                                 if (r->fd > 0) {
866                                         close(r->fd);
867                                         r->fd = -1;
868                                 }
869                         }
870                         rte_free(r);
871                         proc_private->regions[i] = NULL;
872                 }
873         }
874         proc_private->regions_num = 0;
875 }
876
877 static int
878 memif_region_init_zc(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
879                      void *arg)
880 {
881         struct pmd_process_private *proc_private = (struct pmd_process_private *)arg;
882         struct memif_region *r;
883
884         if (proc_private->regions_num < 1) {
885                 MIF_LOG(ERR, "Missing descriptor region");
886                 return -1;
887         }
888
889         r = proc_private->regions[proc_private->regions_num - 1];
890
891         if (r->addr != msl->base_va)
892                 r = proc_private->regions[++proc_private->regions_num - 1];
893
894         if (r == NULL) {
895                 r = rte_zmalloc("region", sizeof(struct memif_region), 0);
896                 if (r == NULL) {
897                         MIF_LOG(ERR, "Failed to alloc memif region.");
898                         return -ENOMEM;
899                 }
900
901                 r->addr = msl->base_va;
902                 r->region_size = ms->len;
903                 r->fd = rte_memseg_get_fd(ms);
904                 if (r->fd < 0)
905                         return -1;
906                 r->pkt_buffer_offset = 0;
907
908                 proc_private->regions[proc_private->regions_num - 1] = r;
909         } else {
910                 r->region_size += ms->len;
911         }
912
913         return 0;
914 }
915
916 static int
917 memif_region_init_shm(struct rte_eth_dev *dev, uint8_t has_buffers)
918 {
919         struct pmd_internals *pmd = dev->data->dev_private;
920         struct pmd_process_private *proc_private = dev->process_private;
921         char shm_name[ETH_MEMIF_SHM_NAME_SIZE];
922         int ret = 0;
923         struct memif_region *r;
924
925         if (proc_private->regions_num >= ETH_MEMIF_MAX_REGION_NUM) {
926                 MIF_LOG(ERR, "Too many regions.");
927                 return -1;
928         }
929
930         r = rte_zmalloc("region", sizeof(struct memif_region), 0);
931         if (r == NULL) {
932                 MIF_LOG(ERR, "Failed to alloc memif region.");
933                 return -ENOMEM;
934         }
935
936         /* calculate buffer offset */
937         r->pkt_buffer_offset = (pmd->run.num_c2s_rings + pmd->run.num_s2c_rings) *
938             (sizeof(memif_ring_t) + sizeof(memif_desc_t) *
939             (1 << pmd->run.log2_ring_size));
940
941         r->region_size = r->pkt_buffer_offset;
942         /* if region has buffers, add buffers size to region_size */
943         if (has_buffers == 1)
944                 r->region_size += (uint32_t)(pmd->run.pkt_buffer_size *
945                         (1 << pmd->run.log2_ring_size) *
946                         (pmd->run.num_c2s_rings +
947                          pmd->run.num_s2c_rings));
948
949         memset(shm_name, 0, sizeof(char) * ETH_MEMIF_SHM_NAME_SIZE);
950         snprintf(shm_name, ETH_MEMIF_SHM_NAME_SIZE, "memif_region_%d",
951                  proc_private->regions_num);
952
953         r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING);
954         if (r->fd < 0) {
955                 MIF_LOG(ERR, "Failed to create shm file: %s.", strerror(errno));
956                 ret = -1;
957                 goto error;
958         }
959
960         ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK);
961         if (ret < 0) {
962                 MIF_LOG(ERR, "Failed to add seals to shm file: %s.", strerror(errno));
963                 goto error;
964         }
965
966         ret = ftruncate(r->fd, r->region_size);
967         if (ret < 0) {
968                 MIF_LOG(ERR, "Failed to truncate shm file: %s.", strerror(errno));
969                 goto error;
970         }
971
972         r->addr = mmap(NULL, r->region_size, PROT_READ |
973                        PROT_WRITE, MAP_SHARED, r->fd, 0);
974         if (r->addr == MAP_FAILED) {
975                 MIF_LOG(ERR, "Failed to mmap shm region: %s.", strerror(ret));
976                 ret = -1;
977                 goto error;
978         }
979
980         proc_private->regions[proc_private->regions_num] = r;
981         proc_private->regions_num++;
982
983         return ret;
984
985 error:
986         if (r->fd > 0)
987                 close(r->fd);
988         r->fd = -1;
989
990         return ret;
991 }
992
993 static int
994 memif_regions_init(struct rte_eth_dev *dev)
995 {
996         struct pmd_internals *pmd = dev->data->dev_private;
997         int ret;
998
999         /*
1000          * Zero-copy exposes dpdk memory.
1001          * Each memseg list will be represented by memif region.
1002          * Zero-copy regions indexing: memseg list idx + 1,
1003          * as we already have region 0 reserved for descriptors.
1004          */
1005         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1006                 /* create region idx 0 containing descriptors */
1007                 ret = memif_region_init_shm(dev, 0);
1008                 if (ret < 0)
1009                         return ret;
1010                 ret = rte_memseg_walk(memif_region_init_zc, (void *)dev->process_private);
1011                 if (ret < 0)
1012                         return ret;
1013         } else {
1014                 /* create one memory region contaning rings and buffers */
1015                 ret = memif_region_init_shm(dev, /* has buffers */ 1);
1016                 if (ret < 0)
1017                         return ret;
1018         }
1019
1020         return 0;
1021 }
1022
1023 static void
1024 memif_init_rings(struct rte_eth_dev *dev)
1025 {
1026         struct pmd_internals *pmd = dev->data->dev_private;
1027         struct pmd_process_private *proc_private = dev->process_private;
1028         memif_ring_t *ring;
1029         int i, j;
1030         uint16_t slot;
1031
1032         for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1033                 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_C2S, i);
1034                 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1035                 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1036                 ring->cookie = MEMIF_COOKIE;
1037                 ring->flags = 0;
1038
1039                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1040                         continue;
1041
1042                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1043                         slot = i * (1 << pmd->run.log2_ring_size) + j;
1044                         ring->desc[j].region = 0;
1045                         ring->desc[j].offset =
1046                                 proc_private->regions[0]->pkt_buffer_offset +
1047                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
1048                         ring->desc[j].length = pmd->run.pkt_buffer_size;
1049                 }
1050         }
1051
1052         for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1053                 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_S2C, i);
1054                 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1055                 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1056                 ring->cookie = MEMIF_COOKIE;
1057                 ring->flags = 0;
1058
1059                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1060                         continue;
1061
1062                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1063                         slot = (i + pmd->run.num_c2s_rings) *
1064                             (1 << pmd->run.log2_ring_size) + j;
1065                         ring->desc[j].region = 0;
1066                         ring->desc[j].offset =
1067                                 proc_private->regions[0]->pkt_buffer_offset +
1068                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
1069                         ring->desc[j].length = pmd->run.pkt_buffer_size;
1070                 }
1071         }
1072 }
1073
1074 /* called only by client */
1075 static int
1076 memif_init_queues(struct rte_eth_dev *dev)
1077 {
1078         struct pmd_internals *pmd = dev->data->dev_private;
1079         struct memif_queue *mq;
1080         int i;
1081
1082         for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1083                 mq = dev->data->tx_queues[i];
1084                 mq->log2_ring_size = pmd->run.log2_ring_size;
1085                 /* queues located only in region 0 */
1086                 mq->region = 0;
1087                 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_C2S, i);
1088                 mq->last_head = 0;
1089                 mq->last_tail = 0;
1090                 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1091                 if (mq->intr_handle.fd < 0) {
1092                         MIF_LOG(WARNING,
1093                                 "Failed to create eventfd for tx queue %d: %s.", i,
1094                                 strerror(errno));
1095                 }
1096                 mq->buffers = NULL;
1097                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1098                         mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1099                                                   (1 << mq->log2_ring_size), 0);
1100                         if (mq->buffers == NULL)
1101                                 return -ENOMEM;
1102                 }
1103         }
1104
1105         for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1106                 mq = dev->data->rx_queues[i];
1107                 mq->log2_ring_size = pmd->run.log2_ring_size;
1108                 /* queues located only in region 0 */
1109                 mq->region = 0;
1110                 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_S2C, i);
1111                 mq->last_head = 0;
1112                 mq->last_tail = 0;
1113                 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1114                 if (mq->intr_handle.fd < 0) {
1115                         MIF_LOG(WARNING,
1116                                 "Failed to create eventfd for rx queue %d: %s.", i,
1117                                 strerror(errno));
1118                 }
1119                 mq->buffers = NULL;
1120                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1121                         mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1122                                                   (1 << mq->log2_ring_size), 0);
1123                         if (mq->buffers == NULL)
1124                                 return -ENOMEM;
1125                 }
1126         }
1127         return 0;
1128 }
1129
1130 int
1131 memif_init_regions_and_queues(struct rte_eth_dev *dev)
1132 {
1133         int ret;
1134
1135         ret = memif_regions_init(dev);
1136         if (ret < 0)
1137                 return ret;
1138
1139         memif_init_rings(dev);
1140
1141         ret = memif_init_queues(dev);
1142         if (ret < 0)
1143                 return ret;
1144
1145         return 0;
1146 }
1147
1148 int
1149 memif_connect(struct rte_eth_dev *dev)
1150 {
1151         struct pmd_internals *pmd = dev->data->dev_private;
1152         struct pmd_process_private *proc_private = dev->process_private;
1153         struct memif_region *mr;
1154         struct memif_queue *mq;
1155         memif_ring_t *ring;
1156         int i;
1157
1158         for (i = 0; i < proc_private->regions_num; i++) {
1159                 mr = proc_private->regions[i];
1160                 if (mr != NULL) {
1161                         if (mr->addr == NULL) {
1162                                 if (mr->fd < 0)
1163                                         return -1;
1164                                 mr->addr = mmap(NULL, mr->region_size,
1165                                                 PROT_READ | PROT_WRITE,
1166                                                 MAP_SHARED, mr->fd, 0);
1167                                 if (mr->addr == MAP_FAILED) {
1168                                         MIF_LOG(ERR, "mmap failed: %s\n",
1169                                                 strerror(errno));
1170                                         return -1;
1171                                 }
1172                         }
1173                         if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
1174                                 /* close memseg file */
1175                                 close(mr->fd);
1176                                 mr->fd = -1;
1177                         }
1178                 }
1179         }
1180
1181         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1182                 for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1183                         mq = (pmd->role == MEMIF_ROLE_CLIENT) ?
1184                             dev->data->tx_queues[i] : dev->data->rx_queues[i];
1185                         ring = memif_get_ring_from_queue(proc_private, mq);
1186                         if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1187                                 MIF_LOG(ERR, "Wrong ring");
1188                                 return -1;
1189                         }
1190                         __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1191                         __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1192                         mq->last_head = 0;
1193                         mq->last_tail = 0;
1194                         /* enable polling mode */
1195                         if (pmd->role == MEMIF_ROLE_SERVER)
1196                                 ring->flags = MEMIF_RING_FLAG_MASK_INT;
1197                 }
1198                 for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1199                         mq = (pmd->role == MEMIF_ROLE_CLIENT) ?
1200                             dev->data->rx_queues[i] : dev->data->tx_queues[i];
1201                         ring = memif_get_ring_from_queue(proc_private, mq);
1202                         if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1203                                 MIF_LOG(ERR, "Wrong ring");
1204                                 return -1;
1205                         }
1206                         __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1207                         __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1208                         mq->last_head = 0;
1209                         mq->last_tail = 0;
1210                         /* enable polling mode */
1211                         if (pmd->role == MEMIF_ROLE_CLIENT)
1212                                 ring->flags = MEMIF_RING_FLAG_MASK_INT;
1213                 }
1214
1215                 pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
1216                 pmd->flags |= ETH_MEMIF_FLAG_CONNECTED;
1217                 dev->data->dev_link.link_status = ETH_LINK_UP;
1218         }
1219         MIF_LOG(INFO, "Connected.");
1220         return 0;
1221 }
1222
1223 static int
1224 memif_dev_start(struct rte_eth_dev *dev)
1225 {
1226         struct pmd_internals *pmd = dev->data->dev_private;
1227         int ret = 0;
1228
1229         switch (pmd->role) {
1230         case MEMIF_ROLE_CLIENT:
1231                 ret = memif_connect_client(dev);
1232                 break;
1233         case MEMIF_ROLE_SERVER:
1234                 ret = memif_connect_server(dev);
1235                 break;
1236         default:
1237                 MIF_LOG(ERR, "Unknown role: %d.", pmd->role);
1238                 ret = -1;
1239                 break;
1240         }
1241
1242         return ret;
1243 }
1244
1245 static int
1246 memif_dev_close(struct rte_eth_dev *dev)
1247 {
1248         struct pmd_internals *pmd = dev->data->dev_private;
1249         int i;
1250
1251         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1252                 memif_msg_enq_disconnect(pmd->cc, "Device closed", 0);
1253                 memif_disconnect(dev);
1254
1255                 for (i = 0; i < dev->data->nb_rx_queues; i++)
1256                         (*dev->dev_ops->rx_queue_release)(dev->data->rx_queues[i]);
1257                 for (i = 0; i < dev->data->nb_tx_queues; i++)
1258                         (*dev->dev_ops->tx_queue_release)(dev->data->tx_queues[i]);
1259
1260                 memif_socket_remove_device(dev);
1261         } else {
1262                 memif_disconnect(dev);
1263         }
1264
1265         rte_free(dev->process_private);
1266
1267         return 0;
1268 }
1269
1270 static int
1271 memif_dev_configure(struct rte_eth_dev *dev)
1272 {
1273         struct pmd_internals *pmd = dev->data->dev_private;
1274
1275         /*
1276          * CLIENT - TXQ
1277          * SERVER - RXQ
1278          */
1279         pmd->cfg.num_c2s_rings = (pmd->role == MEMIF_ROLE_CLIENT) ?
1280                                   dev->data->nb_tx_queues : dev->data->nb_rx_queues;
1281
1282         /*
1283          * CLIENT - RXQ
1284          * SERVER - TXQ
1285          */
1286         pmd->cfg.num_s2c_rings = (pmd->role == MEMIF_ROLE_CLIENT) ?
1287                                   dev->data->nb_rx_queues : dev->data->nb_tx_queues;
1288
1289         return 0;
1290 }
1291
1292 static int
1293 memif_tx_queue_setup(struct rte_eth_dev *dev,
1294                      uint16_t qid,
1295                      uint16_t nb_tx_desc __rte_unused,
1296                      unsigned int socket_id __rte_unused,
1297                      const struct rte_eth_txconf *tx_conf __rte_unused)
1298 {
1299         struct pmd_internals *pmd = dev->data->dev_private;
1300         struct memif_queue *mq;
1301
1302         mq = rte_zmalloc("tx-queue", sizeof(struct memif_queue), 0);
1303         if (mq == NULL) {
1304                 MIF_LOG(ERR, "Failed to allocate tx queue id: %u", qid);
1305                 return -ENOMEM;
1306         }
1307
1308         mq->type =
1309             (pmd->role == MEMIF_ROLE_CLIENT) ? MEMIF_RING_C2S : MEMIF_RING_S2C;
1310         mq->n_pkts = 0;
1311         mq->n_bytes = 0;
1312         mq->intr_handle.fd = -1;
1313         mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1314         mq->in_port = dev->data->port_id;
1315         dev->data->tx_queues[qid] = mq;
1316
1317         return 0;
1318 }
1319
1320 static int
1321 memif_rx_queue_setup(struct rte_eth_dev *dev,
1322                      uint16_t qid,
1323                      uint16_t nb_rx_desc __rte_unused,
1324                      unsigned int socket_id __rte_unused,
1325                      const struct rte_eth_rxconf *rx_conf __rte_unused,
1326                      struct rte_mempool *mb_pool)
1327 {
1328         struct pmd_internals *pmd = dev->data->dev_private;
1329         struct memif_queue *mq;
1330
1331         mq = rte_zmalloc("rx-queue", sizeof(struct memif_queue), 0);
1332         if (mq == NULL) {
1333                 MIF_LOG(ERR, "Failed to allocate rx queue id: %u", qid);
1334                 return -ENOMEM;
1335         }
1336
1337         mq->type = (pmd->role == MEMIF_ROLE_CLIENT) ? MEMIF_RING_S2C : MEMIF_RING_C2S;
1338         mq->n_pkts = 0;
1339         mq->n_bytes = 0;
1340         mq->intr_handle.fd = -1;
1341         mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1342         mq->mempool = mb_pool;
1343         mq->in_port = dev->data->port_id;
1344         dev->data->rx_queues[qid] = mq;
1345
1346         return 0;
1347 }
1348
1349 static void
1350 memif_queue_release(void *queue)
1351 {
1352         struct memif_queue *mq = (struct memif_queue *)queue;
1353
1354         if (!mq)
1355                 return;
1356
1357         rte_free(mq);
1358 }
1359
1360 static int
1361 memif_link_update(struct rte_eth_dev *dev,
1362                   int wait_to_complete __rte_unused)
1363 {
1364         struct pmd_process_private *proc_private;
1365
1366         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1367                 proc_private = dev->process_private;
1368                 if (dev->data->dev_link.link_status == ETH_LINK_UP &&
1369                                 proc_private->regions_num == 0) {
1370                         memif_mp_request_regions(dev);
1371                 } else if (dev->data->dev_link.link_status == ETH_LINK_DOWN &&
1372                                 proc_private->regions_num > 0) {
1373                         memif_free_regions(dev);
1374                 }
1375         }
1376         return 0;
1377 }
1378
1379 static int
1380 memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1381 {
1382         struct pmd_internals *pmd = dev->data->dev_private;
1383         struct memif_queue *mq;
1384         int i;
1385         uint8_t tmp, nq;
1386
1387         stats->ipackets = 0;
1388         stats->ibytes = 0;
1389         stats->opackets = 0;
1390         stats->obytes = 0;
1391
1392         tmp = (pmd->role == MEMIF_ROLE_CLIENT) ? pmd->run.num_c2s_rings :
1393             pmd->run.num_s2c_rings;
1394         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1395             RTE_ETHDEV_QUEUE_STAT_CNTRS;
1396
1397         /* RX stats */
1398         for (i = 0; i < nq; i++) {
1399                 mq = dev->data->rx_queues[i];
1400                 stats->q_ipackets[i] = mq->n_pkts;
1401                 stats->q_ibytes[i] = mq->n_bytes;
1402                 stats->ipackets += mq->n_pkts;
1403                 stats->ibytes += mq->n_bytes;
1404         }
1405
1406         tmp = (pmd->role == MEMIF_ROLE_CLIENT) ? pmd->run.num_s2c_rings :
1407             pmd->run.num_c2s_rings;
1408         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1409             RTE_ETHDEV_QUEUE_STAT_CNTRS;
1410
1411         /* TX stats */
1412         for (i = 0; i < nq; i++) {
1413                 mq = dev->data->tx_queues[i];
1414                 stats->q_opackets[i] = mq->n_pkts;
1415                 stats->q_obytes[i] = mq->n_bytes;
1416                 stats->opackets += mq->n_pkts;
1417                 stats->obytes += mq->n_bytes;
1418         }
1419         return 0;
1420 }
1421
1422 static int
1423 memif_stats_reset(struct rte_eth_dev *dev)
1424 {
1425         struct pmd_internals *pmd = dev->data->dev_private;
1426         int i;
1427         struct memif_queue *mq;
1428
1429         for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1430                 mq = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->tx_queues[i] :
1431                     dev->data->rx_queues[i];
1432                 mq->n_pkts = 0;
1433                 mq->n_bytes = 0;
1434         }
1435         for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1436                 mq = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->rx_queues[i] :
1437                     dev->data->tx_queues[i];
1438                 mq->n_pkts = 0;
1439                 mq->n_bytes = 0;
1440         }
1441
1442         return 0;
1443 }
1444
1445 static int
1446 memif_rx_queue_intr_enable(struct rte_eth_dev *dev __rte_unused,
1447                            uint16_t qid __rte_unused)
1448 {
1449         MIF_LOG(WARNING, "Interrupt mode not supported.");
1450
1451         return -1;
1452 }
1453
1454 static int
1455 memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
1456 {
1457         struct pmd_internals *pmd __rte_unused = dev->data->dev_private;
1458
1459         return 0;
1460 }
1461
1462 static const struct eth_dev_ops ops = {
1463         .dev_start = memif_dev_start,
1464         .dev_close = memif_dev_close,
1465         .dev_infos_get = memif_dev_info,
1466         .dev_configure = memif_dev_configure,
1467         .tx_queue_setup = memif_tx_queue_setup,
1468         .rx_queue_setup = memif_rx_queue_setup,
1469         .rx_queue_release = memif_queue_release,
1470         .tx_queue_release = memif_queue_release,
1471         .rx_queue_intr_enable = memif_rx_queue_intr_enable,
1472         .rx_queue_intr_disable = memif_rx_queue_intr_disable,
1473         .link_update = memif_link_update,
1474         .stats_get = memif_stats_get,
1475         .stats_reset = memif_stats_reset,
1476 };
1477
1478 static int
1479 memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
1480              memif_interface_id_t id, uint32_t flags,
1481              const char *socket_filename,
1482              memif_log2_ring_size_t log2_ring_size,
1483              uint16_t pkt_buffer_size, const char *secret,
1484              struct rte_ether_addr *ether_addr)
1485 {
1486         int ret = 0;
1487         struct rte_eth_dev *eth_dev;
1488         struct rte_eth_dev_data *data;
1489         struct pmd_internals *pmd;
1490         struct pmd_process_private *process_private;
1491         const unsigned int numa_node = vdev->device.numa_node;
1492         const char *name = rte_vdev_device_name(vdev);
1493
1494         eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
1495         if (eth_dev == NULL) {
1496                 MIF_LOG(ERR, "%s: Unable to allocate device struct.", name);
1497                 return -1;
1498         }
1499
1500         process_private = (struct pmd_process_private *)
1501                 rte_zmalloc(name, sizeof(struct pmd_process_private),
1502                             RTE_CACHE_LINE_SIZE);
1503
1504         if (process_private == NULL) {
1505                 MIF_LOG(ERR, "Failed to alloc memory for process private");
1506                 return -1;
1507         }
1508         eth_dev->process_private = process_private;
1509
1510         pmd = eth_dev->data->dev_private;
1511         memset(pmd, 0, sizeof(*pmd));
1512
1513         pmd->id = id;
1514         pmd->flags = flags;
1515         pmd->flags |= ETH_MEMIF_FLAG_DISABLED;
1516         pmd->role = role;
1517         /* Zero-copy flag irelevant to server. */
1518         if (pmd->role == MEMIF_ROLE_SERVER)
1519                 pmd->flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1520
1521         ret = memif_socket_init(eth_dev, socket_filename);
1522         if (ret < 0)
1523                 return ret;
1524
1525         memset(pmd->secret, 0, sizeof(char) * ETH_MEMIF_SECRET_SIZE);
1526         if (secret != NULL)
1527                 strlcpy(pmd->secret, secret, sizeof(pmd->secret));
1528
1529         pmd->cfg.log2_ring_size = log2_ring_size;
1530         /* set in .dev_configure() */
1531         pmd->cfg.num_c2s_rings = 0;
1532         pmd->cfg.num_s2c_rings = 0;
1533
1534         pmd->cfg.pkt_buffer_size = pkt_buffer_size;
1535         rte_spinlock_init(&pmd->cc_lock);
1536
1537         data = eth_dev->data;
1538         data->dev_private = pmd;
1539         data->numa_node = numa_node;
1540         data->dev_link = pmd_link;
1541         data->mac_addrs = ether_addr;
1542         data->promiscuous = 1;
1543         data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1544
1545         eth_dev->dev_ops = &ops;
1546         eth_dev->device = &vdev->device;
1547         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1548                 eth_dev->rx_pkt_burst = eth_memif_rx_zc;
1549                 eth_dev->tx_pkt_burst = eth_memif_tx_zc;
1550         } else {
1551                 eth_dev->rx_pkt_burst = eth_memif_rx;
1552                 eth_dev->tx_pkt_burst = eth_memif_tx;
1553         }
1554
1555         rte_eth_dev_probing_finish(eth_dev);
1556
1557         return 0;
1558 }
1559
1560 static int
1561 memif_set_role(const char *key __rte_unused, const char *value,
1562                void *extra_args)
1563 {
1564         enum memif_role_t *role = (enum memif_role_t *)extra_args;
1565
1566         if (strstr(value, "server") != NULL) {
1567                 *role = MEMIF_ROLE_SERVER;
1568         } else if (strstr(value, "client") != NULL) {
1569                 *role = MEMIF_ROLE_CLIENT;
1570         } else if (strstr(value, "master") != NULL) {
1571                 MIF_LOG(NOTICE, "Role argument \"master\" is deprecated, use \"server\"");
1572                 *role = MEMIF_ROLE_SERVER;
1573         } else if (strstr(value, "slave") != NULL) {
1574                 MIF_LOG(NOTICE, "Role argument \"slave\" is deprecated, use \"client\"");
1575                 *role = MEMIF_ROLE_CLIENT;
1576         } else {
1577                 MIF_LOG(ERR, "Unknown role: %s.", value);
1578                 return -EINVAL;
1579         }
1580         return 0;
1581 }
1582
1583 static int
1584 memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args)
1585 {
1586         uint32_t *flags = (uint32_t *)extra_args;
1587
1588         if (strstr(value, "yes") != NULL) {
1589                 if (!rte_mcfg_get_single_file_segments()) {
1590                         MIF_LOG(ERR, "Zero-copy doesn't support multi-file segments.");
1591                         return -ENOTSUP;
1592                 }
1593                 *flags |= ETH_MEMIF_FLAG_ZERO_COPY;
1594         } else if (strstr(value, "no") != NULL) {
1595                 *flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1596         } else {
1597                 MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value);
1598                 return -EINVAL;
1599         }
1600         return 0;
1601 }
1602
1603 static int
1604 memif_set_id(const char *key __rte_unused, const char *value, void *extra_args)
1605 {
1606         memif_interface_id_t *id = (memif_interface_id_t *)extra_args;
1607
1608         /* even if parsing fails, 0 is a valid id */
1609         *id = strtoul(value, NULL, 10);
1610         return 0;
1611 }
1612
1613 static int
1614 memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args)
1615 {
1616         unsigned long tmp;
1617         uint16_t *pkt_buffer_size = (uint16_t *)extra_args;
1618
1619         tmp = strtoul(value, NULL, 10);
1620         if (tmp == 0 || tmp > 0xFFFF) {
1621                 MIF_LOG(ERR, "Invalid buffer size: %s.", value);
1622                 return -EINVAL;
1623         }
1624         *pkt_buffer_size = tmp;
1625         return 0;
1626 }
1627
1628 static int
1629 memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args)
1630 {
1631         unsigned long tmp;
1632         memif_log2_ring_size_t *log2_ring_size =
1633             (memif_log2_ring_size_t *)extra_args;
1634
1635         tmp = strtoul(value, NULL, 10);
1636         if (tmp == 0 || tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE) {
1637                 MIF_LOG(ERR, "Invalid ring size: %s (max %u).",
1638                         value, ETH_MEMIF_MAX_LOG2_RING_SIZE);
1639                 return -EINVAL;
1640         }
1641         *log2_ring_size = tmp;
1642         return 0;
1643 }
1644
1645 /* check if directory exists and if we have permission to read/write */
1646 static int
1647 memif_check_socket_filename(const char *filename)
1648 {
1649         char *dir = NULL, *tmp;
1650         uint32_t idx;
1651         int ret = 0;
1652
1653         if (strlen(filename) >= MEMIF_SOCKET_UN_SIZE) {
1654                 MIF_LOG(ERR, "Unix socket address too long (max 108).");
1655                 return -1;
1656         }
1657
1658         tmp = strrchr(filename, '/');
1659         if (tmp != NULL) {
1660                 idx = tmp - filename;
1661                 dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 1), 0);
1662                 if (dir == NULL) {
1663                         MIF_LOG(ERR, "Failed to allocate memory.");
1664                         return -1;
1665                 }
1666                 strlcpy(dir, filename, sizeof(char) * (idx + 1));
1667         }
1668
1669         if (dir == NULL || (faccessat(-1, dir, F_OK | R_OK |
1670                                         W_OK, AT_EACCESS) < 0)) {
1671                 MIF_LOG(ERR, "Invalid socket directory.");
1672                 ret = -EINVAL;
1673         }
1674
1675         if (dir != NULL)
1676                 rte_free(dir);
1677
1678         return ret;
1679 }
1680
1681 static int
1682 memif_set_socket_filename(const char *key __rte_unused, const char *value,
1683                           void *extra_args)
1684 {
1685         const char **socket_filename = (const char **)extra_args;
1686
1687         *socket_filename = value;
1688         return 0;
1689 }
1690
1691 static int
1692 memif_set_is_socket_abstract(const char *key __rte_unused, const char *value, void *extra_args)
1693 {
1694         uint32_t *flags = (uint32_t *)extra_args;
1695
1696         if (strstr(value, "yes") != NULL) {
1697                 *flags |= ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1698         } else if (strstr(value, "no") != NULL) {
1699                 *flags &= ~ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1700         } else {
1701                 MIF_LOG(ERR, "Failed to parse socket-abstract param: %s.", value);
1702                 return -EINVAL;
1703         }
1704         return 0;
1705 }
1706
1707 static int
1708 memif_set_mac(const char *key __rte_unused, const char *value, void *extra_args)
1709 {
1710         struct rte_ether_addr *ether_addr = (struct rte_ether_addr *)extra_args;
1711
1712         if (rte_ether_unformat_addr(value, ether_addr) < 0)
1713                 MIF_LOG(WARNING, "Failed to parse mac '%s'.", value);
1714         return 0;
1715 }
1716
1717 static int
1718 memif_set_secret(const char *key __rte_unused, const char *value, void *extra_args)
1719 {
1720         const char **secret = (const char **)extra_args;
1721
1722         *secret = value;
1723         return 0;
1724 }
1725
1726 static int
1727 rte_pmd_memif_probe(struct rte_vdev_device *vdev)
1728 {
1729         RTE_BUILD_BUG_ON(sizeof(memif_msg_t) != 128);
1730         RTE_BUILD_BUG_ON(sizeof(memif_desc_t) != 16);
1731         int ret = 0;
1732         struct rte_kvargs *kvlist;
1733         const char *name = rte_vdev_device_name(vdev);
1734         enum memif_role_t role = MEMIF_ROLE_CLIENT;
1735         memif_interface_id_t id = 0;
1736         uint16_t pkt_buffer_size = ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE;
1737         memif_log2_ring_size_t log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE;
1738         const char *socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME;
1739         uint32_t flags = 0;
1740         const char *secret = NULL;
1741         struct rte_ether_addr *ether_addr = rte_zmalloc("",
1742                 sizeof(struct rte_ether_addr), 0);
1743         struct rte_eth_dev *eth_dev;
1744
1745         rte_eth_random_addr(ether_addr->addr_bytes);
1746
1747         MIF_LOG(INFO, "Initialize MEMIF: %s.", name);
1748
1749         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1750                 eth_dev = rte_eth_dev_attach_secondary(name);
1751                 if (!eth_dev) {
1752                         MIF_LOG(ERR, "Failed to probe %s", name);
1753                         return -1;
1754                 }
1755
1756                 eth_dev->dev_ops = &ops;
1757                 eth_dev->device = &vdev->device;
1758                 eth_dev->rx_pkt_burst = eth_memif_rx;
1759                 eth_dev->tx_pkt_burst = eth_memif_tx;
1760
1761                 if (!rte_eal_primary_proc_alive(NULL)) {
1762                         MIF_LOG(ERR, "Primary process is missing");
1763                         return -1;
1764                 }
1765
1766                 eth_dev->process_private = (struct pmd_process_private *)
1767                         rte_zmalloc(name,
1768                                 sizeof(struct pmd_process_private),
1769                                 RTE_CACHE_LINE_SIZE);
1770                 if (eth_dev->process_private == NULL) {
1771                         MIF_LOG(ERR,
1772                                 "Failed to alloc memory for process private");
1773                         return -1;
1774                 }
1775
1776                 rte_eth_dev_probing_finish(eth_dev);
1777
1778                 return 0;
1779         }
1780
1781         ret = rte_mp_action_register(MEMIF_MP_SEND_REGION, memif_mp_send_region);
1782         /*
1783          * Primary process can continue probing, but secondary process won't
1784          * be able to get memory regions information
1785          */
1786         if (ret < 0 && rte_errno != EEXIST)
1787                 MIF_LOG(WARNING, "Failed to register mp action callback: %s",
1788                         strerror(rte_errno));
1789
1790         /* use abstract address by default */
1791         flags |= ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1792
1793         kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments);
1794
1795         /* parse parameters */
1796         if (kvlist != NULL) {
1797                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG,
1798                                          &memif_set_role, &role);
1799                 if (ret < 0)
1800                         goto exit;
1801                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG,
1802                                          &memif_set_id, &id);
1803                 if (ret < 0)
1804                         goto exit;
1805                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
1806                                          &memif_set_bs, &pkt_buffer_size);
1807                 if (ret < 0)
1808                         goto exit;
1809                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG,
1810                                          &memif_set_rs, &log2_ring_size);
1811                 if (ret < 0)
1812                         goto exit;
1813                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ARG,
1814                                          &memif_set_socket_filename,
1815                                          (void *)(&socket_filename));
1816                 if (ret < 0)
1817                         goto exit;
1818                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ABSTRACT_ARG,
1819                                          &memif_set_is_socket_abstract, &flags);
1820                 if (ret < 0)
1821                         goto exit;
1822                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_MAC_ARG,
1823                                          &memif_set_mac, ether_addr);
1824                 if (ret < 0)
1825                         goto exit;
1826                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG,
1827                                          &memif_set_zc, &flags);
1828                 if (ret < 0)
1829                         goto exit;
1830                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SECRET_ARG,
1831                                          &memif_set_secret, (void *)(&secret));
1832                 if (ret < 0)
1833                         goto exit;
1834         }
1835
1836         if (!(flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT)) {
1837                 ret = memif_check_socket_filename(socket_filename);
1838                 if (ret < 0)
1839                         goto exit;
1840         }
1841
1842         /* create interface */
1843         ret = memif_create(vdev, role, id, flags, socket_filename,
1844                            log2_ring_size, pkt_buffer_size, secret, ether_addr);
1845
1846 exit:
1847         if (kvlist != NULL)
1848                 rte_kvargs_free(kvlist);
1849         return ret;
1850 }
1851
1852 static int
1853 rte_pmd_memif_remove(struct rte_vdev_device *vdev)
1854 {
1855         struct rte_eth_dev *eth_dev;
1856
1857         eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
1858         if (eth_dev == NULL)
1859                 return 0;
1860
1861         return rte_eth_dev_close(eth_dev->data->port_id);
1862 }
1863
1864 static struct rte_vdev_driver pmd_memif_drv = {
1865         .probe = rte_pmd_memif_probe,
1866         .remove = rte_pmd_memif_remove,
1867 };
1868
1869 RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv);
1870
1871 RTE_PMD_REGISTER_PARAM_STRING(net_memif,
1872                               ETH_MEMIF_ID_ARG "=<int>"
1873                               ETH_MEMIF_ROLE_ARG "=server|client"
1874                               ETH_MEMIF_PKT_BUFFER_SIZE_ARG "=<int>"
1875                               ETH_MEMIF_RING_SIZE_ARG "=<int>"
1876                               ETH_MEMIF_SOCKET_ARG "=<string>"
1877                                   ETH_MEMIF_SOCKET_ABSTRACT_ARG "=yes|no"
1878                               ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx"
1879                               ETH_MEMIF_ZC_ARG "=yes|no"
1880                               ETH_MEMIF_SECRET_ARG "=<string>");
1881
1882 RTE_LOG_REGISTER_DEFAULT(memif_logtype, NOTICE);