ethdev: add device flag to bypass auto-filled queue xstats
[dpdk.git] / drivers / net / memif / rte_eth_memif.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
3  */
4
5 #include <stdlib.h>
6 #include <fcntl.h>
7 #include <unistd.h>
8 #include <sys/types.h>
9 #include <sys/socket.h>
10 #include <sys/un.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13 #include <linux/if_ether.h>
14 #include <errno.h>
15 #include <sys/eventfd.h>
16
17 #include <rte_version.h>
18 #include <rte_mbuf.h>
19 #include <rte_ether.h>
20 #include <rte_ethdev_driver.h>
21 #include <rte_ethdev_vdev.h>
22 #include <rte_malloc.h>
23 #include <rte_kvargs.h>
24 #include <rte_bus_vdev.h>
25 #include <rte_string_fns.h>
26 #include <rte_errno.h>
27 #include <rte_memory.h>
28 #include <rte_memzone.h>
29 #include <rte_eal_memconfig.h>
30
31 #include "rte_eth_memif.h"
32 #include "memif_socket.h"
33
34 #define ETH_MEMIF_ID_ARG                "id"
35 #define ETH_MEMIF_ROLE_ARG              "role"
36 #define ETH_MEMIF_PKT_BUFFER_SIZE_ARG   "bsize"
37 #define ETH_MEMIF_RING_SIZE_ARG         "rsize"
38 #define ETH_MEMIF_SOCKET_ARG            "socket"
39 #define ETH_MEMIF_SOCKET_ABSTRACT_ARG   "socket-abstract"
40 #define ETH_MEMIF_MAC_ARG               "mac"
41 #define ETH_MEMIF_ZC_ARG                "zero-copy"
42 #define ETH_MEMIF_SECRET_ARG            "secret"
43
44 static const char * const valid_arguments[] = {
45         ETH_MEMIF_ID_ARG,
46         ETH_MEMIF_ROLE_ARG,
47         ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
48         ETH_MEMIF_RING_SIZE_ARG,
49         ETH_MEMIF_SOCKET_ARG,
50         ETH_MEMIF_SOCKET_ABSTRACT_ARG,
51         ETH_MEMIF_MAC_ARG,
52         ETH_MEMIF_ZC_ARG,
53         ETH_MEMIF_SECRET_ARG,
54         NULL
55 };
56
57 static const struct rte_eth_link pmd_link = {
58         .link_speed = ETH_SPEED_NUM_10G,
59         .link_duplex = ETH_LINK_FULL_DUPLEX,
60         .link_status = ETH_LINK_DOWN,
61         .link_autoneg = ETH_LINK_AUTONEG
62 };
63
64 #define MEMIF_MP_SEND_REGION            "memif_mp_send_region"
65
66
67 static int memif_region_init_zc(const struct rte_memseg_list *msl,
68                                 const struct rte_memseg *ms, void *arg);
69
70 const char *
71 memif_version(void)
72 {
73         return ("memif-" RTE_STR(MEMIF_VERSION_MAJOR) "." RTE_STR(MEMIF_VERSION_MINOR));
74 }
75
76 /* Message header to synchronize regions */
77 struct mp_region_msg {
78         char port_name[RTE_DEV_NAME_MAX_LEN];
79         memif_region_index_t idx;
80         memif_region_size_t size;
81 };
82
83 static int
84 memif_mp_send_region(const struct rte_mp_msg *msg, const void *peer)
85 {
86         struct rte_eth_dev *dev;
87         struct pmd_process_private *proc_private;
88         const struct mp_region_msg *msg_param = (const struct mp_region_msg *)msg->param;
89         struct rte_mp_msg reply;
90         struct mp_region_msg *reply_param = (struct mp_region_msg *)reply.param;
91         uint16_t port_id;
92         int ret;
93
94         /* Get requested port */
95         ret = rte_eth_dev_get_port_by_name(msg_param->port_name, &port_id);
96         if (ret) {
97                 MIF_LOG(ERR, "Failed to get port id for %s",
98                         msg_param->port_name);
99                 return -1;
100         }
101         dev = &rte_eth_devices[port_id];
102         proc_private = dev->process_private;
103
104         memset(&reply, 0, sizeof(reply));
105         strlcpy(reply.name, msg->name, sizeof(reply.name));
106         reply_param->idx = msg_param->idx;
107         if (proc_private->regions[msg_param->idx] != NULL) {
108                 reply_param->size = proc_private->regions[msg_param->idx]->region_size;
109                 reply.fds[0] = proc_private->regions[msg_param->idx]->fd;
110                 reply.num_fds = 1;
111         }
112         reply.len_param = sizeof(*reply_param);
113         if (rte_mp_reply(&reply, peer) < 0) {
114                 MIF_LOG(ERR, "Failed to reply to an add region request");
115                 return -1;
116         }
117
118         return 0;
119 }
120
121 /*
122  * Request regions
123  * Called by secondary process, when ports link status goes up.
124  */
125 static int
126 memif_mp_request_regions(struct rte_eth_dev *dev)
127 {
128         int ret, i;
129         struct timespec timeout = {.tv_sec = 5, .tv_nsec = 0};
130         struct rte_mp_msg msg, *reply;
131         struct rte_mp_reply replies;
132         struct mp_region_msg *msg_param = (struct mp_region_msg *)msg.param;
133         struct mp_region_msg *reply_param;
134         struct memif_region *r;
135         struct pmd_process_private *proc_private = dev->process_private;
136         struct pmd_internals *pmd = dev->data->dev_private;
137         /* in case of zero-copy slave, only request region 0 */
138         uint16_t max_region_num = (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) ?
139                                    1 : ETH_MEMIF_MAX_REGION_NUM;
140
141         MIF_LOG(DEBUG, "Requesting memory regions");
142
143         for (i = 0; i < max_region_num; i++) {
144                 /* Prepare the message */
145                 memset(&msg, 0, sizeof(msg));
146                 strlcpy(msg.name, MEMIF_MP_SEND_REGION, sizeof(msg.name));
147                 strlcpy(msg_param->port_name, dev->data->name,
148                         sizeof(msg_param->port_name));
149                 msg_param->idx = i;
150                 msg.len_param = sizeof(*msg_param);
151
152                 /* Send message */
153                 ret = rte_mp_request_sync(&msg, &replies, &timeout);
154                 if (ret < 0 || replies.nb_received != 1) {
155                         MIF_LOG(ERR, "Failed to send mp msg: %d",
156                                 rte_errno);
157                         return -1;
158                 }
159
160                 reply = &replies.msgs[0];
161                 reply_param = (struct mp_region_msg *)reply->param;
162
163                 if (reply_param->size > 0) {
164                         r = rte_zmalloc("region", sizeof(struct memif_region), 0);
165                         if (r == NULL) {
166                                 MIF_LOG(ERR, "Failed to alloc memif region.");
167                                 free(reply);
168                                 return -ENOMEM;
169                         }
170                         r->region_size = reply_param->size;
171                         if (reply->num_fds < 1) {
172                                 MIF_LOG(ERR, "Missing file descriptor.");
173                                 free(reply);
174                                 return -1;
175                         }
176                         r->fd = reply->fds[0];
177                         r->addr = NULL;
178
179                         proc_private->regions[reply_param->idx] = r;
180                         proc_private->regions_num++;
181                 }
182                 free(reply);
183         }
184
185         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
186                 ret = rte_memseg_walk(memif_region_init_zc, (void *)proc_private);
187                 if (ret < 0)
188                         return ret;
189         }
190
191         return memif_connect(dev);
192 }
193
194 static int
195 memif_dev_info(struct rte_eth_dev *dev __rte_unused, struct rte_eth_dev_info *dev_info)
196 {
197         dev_info->max_mac_addrs = 1;
198         dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN;
199         dev_info->max_rx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
200         dev_info->max_tx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
201         dev_info->min_rx_bufsize = 0;
202
203         return 0;
204 }
205
206 static memif_ring_t *
207 memif_get_ring(struct pmd_internals *pmd, struct pmd_process_private *proc_private,
208                memif_ring_type_t type, uint16_t ring_num)
209 {
210         /* rings only in region 0 */
211         void *p = proc_private->regions[0]->addr;
212         int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) *
213             (1 << pmd->run.log2_ring_size);
214
215         p = (uint8_t *)p + (ring_num + type * pmd->run.num_s2m_rings) * ring_size;
216
217         return (memif_ring_t *)p;
218 }
219
220 static memif_region_offset_t
221 memif_get_ring_offset(struct rte_eth_dev *dev, struct memif_queue *mq,
222                       memif_ring_type_t type, uint16_t num)
223 {
224         struct pmd_internals *pmd = dev->data->dev_private;
225         struct pmd_process_private *proc_private = dev->process_private;
226
227         return ((uint8_t *)memif_get_ring(pmd, proc_private, type, num) -
228                 (uint8_t *)proc_private->regions[mq->region]->addr);
229 }
230
231 static memif_ring_t *
232 memif_get_ring_from_queue(struct pmd_process_private *proc_private,
233                           struct memif_queue *mq)
234 {
235         struct memif_region *r;
236
237         r = proc_private->regions[mq->region];
238         if (r == NULL)
239                 return NULL;
240
241         return (memif_ring_t *)((uint8_t *)r->addr + mq->ring_offset);
242 }
243
244 static void *
245 memif_get_buffer(struct pmd_process_private *proc_private, memif_desc_t *d)
246 {
247         return ((uint8_t *)proc_private->regions[d->region]->addr + d->offset);
248 }
249
250 /* Free mbufs received by master */
251 static void
252 memif_free_stored_mbufs(struct pmd_process_private *proc_private, struct memif_queue *mq)
253 {
254         uint16_t cur_tail;
255         uint16_t mask = (1 << mq->log2_ring_size) - 1;
256         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
257
258         /* FIXME: improve performance */
259         /* The ring->tail acts as a guard variable between Tx and Rx
260          * threads, so using load-acquire pairs with store-release
261          * in function eth_memif_rx for S2M queues.
262          */
263         cur_tail = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
264         while (mq->last_tail != cur_tail) {
265                 RTE_MBUF_PREFETCH_TO_FREE(mq->buffers[(mq->last_tail + 1) & mask]);
266                 /* Decrement refcnt and free mbuf. (current segment) */
267                 rte_mbuf_refcnt_update(mq->buffers[mq->last_tail & mask], -1);
268                 rte_pktmbuf_free_seg(mq->buffers[mq->last_tail & mask]);
269                 mq->last_tail++;
270         }
271 }
272
273 static int
274 memif_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *cur_tail,
275                     struct rte_mbuf *tail)
276 {
277         /* Check for number-of-segments-overflow */
278         if (unlikely(head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS))
279                 return -EOVERFLOW;
280
281         /* Chain 'tail' onto the old tail */
282         cur_tail->next = tail;
283
284         /* accumulate number of segments and total length. */
285         head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs);
286
287         tail->pkt_len = tail->data_len;
288         head->pkt_len += tail->pkt_len;
289
290         return 0;
291 }
292
293 static uint16_t
294 eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
295 {
296         struct memif_queue *mq = queue;
297         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
298         struct pmd_process_private *proc_private =
299                 rte_eth_devices[mq->in_port].process_private;
300         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
301         uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0;
302         uint16_t n_rx_pkts = 0;
303         uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) -
304                 RTE_PKTMBUF_HEADROOM;
305         uint16_t src_len, src_off, dst_len, dst_off, cp_len;
306         memif_ring_type_t type = mq->type;
307         memif_desc_t *d0;
308         struct rte_mbuf *mbuf, *mbuf_head, *mbuf_tail;
309         uint64_t b;
310         ssize_t size __rte_unused;
311         uint16_t head;
312         int ret;
313         struct rte_eth_link link;
314
315         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
316                 return 0;
317         if (unlikely(ring == NULL)) {
318                 /* Secondary process will attempt to request regions. */
319                 ret = rte_eth_link_get(mq->in_port, &link);
320                 if (ret < 0)
321                         MIF_LOG(ERR, "Failed to get port %u link info: %s",
322                                 mq->in_port, rte_strerror(-ret));
323                 return 0;
324         }
325
326         /* consume interrupt */
327         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0)
328                 size = read(mq->intr_handle.fd, &b, sizeof(b));
329
330         ring_size = 1 << mq->log2_ring_size;
331         mask = ring_size - 1;
332
333         if (type == MEMIF_RING_S2M) {
334                 cur_slot = mq->last_head;
335                 last_slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
336         } else {
337                 cur_slot = mq->last_tail;
338                 last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
339         }
340
341         if (cur_slot == last_slot)
342                 goto refill;
343         n_slots = last_slot - cur_slot;
344
345         while (n_slots && n_rx_pkts < nb_pkts) {
346                 mbuf_head = rte_pktmbuf_alloc(mq->mempool);
347                 if (unlikely(mbuf_head == NULL))
348                         goto no_free_bufs;
349                 mbuf = mbuf_head;
350                 mbuf->port = mq->in_port;
351
352 next_slot:
353                 s0 = cur_slot & mask;
354                 d0 = &ring->desc[s0];
355
356                 src_len = d0->length;
357                 dst_off = 0;
358                 src_off = 0;
359
360                 do {
361                         dst_len = mbuf_size - dst_off;
362                         if (dst_len == 0) {
363                                 dst_off = 0;
364                                 dst_len = mbuf_size;
365
366                                 /* store pointer to tail */
367                                 mbuf_tail = mbuf;
368                                 mbuf = rte_pktmbuf_alloc(mq->mempool);
369                                 if (unlikely(mbuf == NULL))
370                                         goto no_free_bufs;
371                                 mbuf->port = mq->in_port;
372                                 ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
373                                 if (unlikely(ret < 0)) {
374                                         MIF_LOG(ERR, "number-of-segments-overflow");
375                                         rte_pktmbuf_free(mbuf);
376                                         goto no_free_bufs;
377                                 }
378                         }
379                         cp_len = RTE_MIN(dst_len, src_len);
380
381                         rte_pktmbuf_data_len(mbuf) += cp_len;
382                         rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
383                         if (mbuf != mbuf_head)
384                                 rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
385
386                         memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, dst_off),
387                                (uint8_t *)memif_get_buffer(proc_private, d0) + src_off,
388                                cp_len);
389
390                         src_off += cp_len;
391                         dst_off += cp_len;
392                         src_len -= cp_len;
393                 } while (src_len);
394
395                 cur_slot++;
396                 n_slots--;
397
398                 if (d0->flags & MEMIF_DESC_FLAG_NEXT)
399                         goto next_slot;
400
401                 mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
402                 *bufs++ = mbuf_head;
403                 n_rx_pkts++;
404         }
405
406 no_free_bufs:
407         if (type == MEMIF_RING_S2M) {
408                 __atomic_store_n(&ring->tail, cur_slot, __ATOMIC_RELEASE);
409                 mq->last_head = cur_slot;
410         } else {
411                 mq->last_tail = cur_slot;
412         }
413
414 refill:
415         if (type == MEMIF_RING_M2S) {
416                 /* ring->head is updated by the receiver and this function
417                  * is called in the context of receiver thread. The loads in
418                  * the receiver do not need to synchronize with its own stores.
419                  */
420                 head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
421                 n_slots = ring_size - head + mq->last_tail;
422
423                 while (n_slots--) {
424                         s0 = head++ & mask;
425                         d0 = &ring->desc[s0];
426                         d0->length = pmd->run.pkt_buffer_size;
427                 }
428                 __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
429         }
430
431         mq->n_pkts += n_rx_pkts;
432         return n_rx_pkts;
433 }
434
435 static uint16_t
436 eth_memif_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
437 {
438         struct memif_queue *mq = queue;
439         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
440         struct pmd_process_private *proc_private =
441                 rte_eth_devices[mq->in_port].process_private;
442         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
443         uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0, head;
444         uint16_t n_rx_pkts = 0;
445         memif_desc_t *d0;
446         struct rte_mbuf *mbuf, *mbuf_tail;
447         struct rte_mbuf *mbuf_head = NULL;
448         int ret;
449         struct rte_eth_link link;
450
451         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
452                 return 0;
453         if (unlikely(ring == NULL)) {
454                 /* Secondary process will attempt to request regions. */
455                 rte_eth_link_get(mq->in_port, &link);
456                 return 0;
457         }
458
459         /* consume interrupt */
460         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
461                 uint64_t b;
462                 ssize_t size __rte_unused;
463                 size = read(mq->intr_handle.fd, &b, sizeof(b));
464         }
465
466         ring_size = 1 << mq->log2_ring_size;
467         mask = ring_size - 1;
468
469         cur_slot = mq->last_tail;
470         /* The ring->tail acts as a guard variable between Tx and Rx
471          * threads, so using load-acquire pairs with store-release
472          * to synchronize it between threads.
473          */
474         last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
475         if (cur_slot == last_slot)
476                 goto refill;
477         n_slots = last_slot - cur_slot;
478
479         while (n_slots && n_rx_pkts < nb_pkts) {
480                 s0 = cur_slot & mask;
481
482                 d0 = &ring->desc[s0];
483                 mbuf_head = mq->buffers[s0];
484                 mbuf = mbuf_head;
485
486 next_slot:
487                 /* prefetch next descriptor */
488                 if (n_rx_pkts + 1 < nb_pkts)
489                         rte_prefetch0(&ring->desc[(cur_slot + 1) & mask]);
490
491                 mbuf->port = mq->in_port;
492                 rte_pktmbuf_data_len(mbuf) = d0->length;
493                 rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
494
495                 mq->n_bytes += rte_pktmbuf_data_len(mbuf);
496
497                 cur_slot++;
498                 n_slots--;
499                 if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
500                         s0 = cur_slot & mask;
501                         d0 = &ring->desc[s0];
502                         mbuf_tail = mbuf;
503                         mbuf = mq->buffers[s0];
504                         ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
505                         if (unlikely(ret < 0)) {
506                                 MIF_LOG(ERR, "number-of-segments-overflow");
507                                 goto refill;
508                         }
509                         goto next_slot;
510                 }
511
512                 *bufs++ = mbuf_head;
513                 n_rx_pkts++;
514         }
515
516         mq->last_tail = cur_slot;
517
518 /* Supply master with new buffers */
519 refill:
520         /* ring->head is updated by the receiver and this function
521          * is called in the context of receiver thread. The loads in
522          * the receiver do not need to synchronize with its own stores.
523          */
524         head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
525         n_slots = ring_size - head + mq->last_tail;
526
527         if (n_slots < 32)
528                 goto no_free_mbufs;
529
530         ret = rte_pktmbuf_alloc_bulk(mq->mempool, &mq->buffers[head & mask], n_slots);
531         if (unlikely(ret < 0))
532                 goto no_free_mbufs;
533
534         while (n_slots--) {
535                 s0 = head++ & mask;
536                 if (n_slots > 0)
537                         rte_prefetch0(mq->buffers[head & mask]);
538                 d0 = &ring->desc[s0];
539                 /* store buffer header */
540                 mbuf = mq->buffers[s0];
541                 /* populate descriptor */
542                 d0->length = rte_pktmbuf_data_room_size(mq->mempool) -
543                                 RTE_PKTMBUF_HEADROOM;
544                 d0->region = 1;
545                 d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
546                         (uint8_t *)proc_private->regions[d0->region]->addr;
547         }
548 no_free_mbufs:
549         /* The ring->head acts as a guard variable between Tx and Rx
550          * threads, so using store-release pairs with load-acquire
551          * in function eth_memif_tx.
552          */
553         __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
554
555         mq->n_pkts += n_rx_pkts;
556
557         return n_rx_pkts;
558 }
559
560 static uint16_t
561 eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
562 {
563         struct memif_queue *mq = queue;
564         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
565         struct pmd_process_private *proc_private =
566                 rte_eth_devices[mq->in_port].process_private;
567         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
568         uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0;
569         uint16_t src_len, src_off, dst_len, dst_off, cp_len;
570         memif_ring_type_t type = mq->type;
571         memif_desc_t *d0;
572         struct rte_mbuf *mbuf;
573         struct rte_mbuf *mbuf_head;
574         uint64_t a;
575         ssize_t size;
576         struct rte_eth_link link;
577
578         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
579                 return 0;
580         if (unlikely(ring == NULL)) {
581                 int ret;
582
583                 /* Secondary process will attempt to request regions. */
584                 ret = rte_eth_link_get(mq->in_port, &link);
585                 if (ret < 0)
586                         MIF_LOG(ERR, "Failed to get port %u link info: %s",
587                                 mq->in_port, rte_strerror(-ret));
588                 return 0;
589         }
590
591         ring_size = 1 << mq->log2_ring_size;
592         mask = ring_size - 1;
593
594         if (type == MEMIF_RING_S2M) {
595                 /* For S2M queues ring->head is updated by the sender and
596                  * this function is called in the context of sending thread.
597                  * The loads in the sender do not need to synchronize with
598                  * its own stores. Hence, the following load can be a
599                  * relaxed load.
600                  */
601                 slot = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
602                 n_free = ring_size - slot +
603                                 __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
604         } else {
605                 /* For M2S queues ring->tail is updated by the sender and
606                  * this function is called in the context of sending thread.
607                  * The loads in the sender do not need to synchronize with
608                  * its own stores. Hence, the following load can be a
609                  * relaxed load.
610                  */
611                 slot = __atomic_load_n(&ring->tail, __ATOMIC_RELAXED);
612                 n_free = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE) - slot;
613         }
614
615         while (n_tx_pkts < nb_pkts && n_free) {
616                 mbuf_head = *bufs++;
617                 mbuf = mbuf_head;
618
619                 saved_slot = slot;
620                 d0 = &ring->desc[slot & mask];
621                 dst_off = 0;
622                 dst_len = (type == MEMIF_RING_S2M) ?
623                         pmd->run.pkt_buffer_size : d0->length;
624
625 next_in_chain:
626                 src_off = 0;
627                 src_len = rte_pktmbuf_data_len(mbuf);
628
629                 while (src_len) {
630                         if (dst_len == 0) {
631                                 if (n_free) {
632                                         slot++;
633                                         n_free--;
634                                         d0->flags |= MEMIF_DESC_FLAG_NEXT;
635                                         d0 = &ring->desc[slot & mask];
636                                         dst_off = 0;
637                                         dst_len = (type == MEMIF_RING_S2M) ?
638                                             pmd->run.pkt_buffer_size : d0->length;
639                                         d0->flags = 0;
640                                 } else {
641                                         slot = saved_slot;
642                                         goto no_free_slots;
643                                 }
644                         }
645                         cp_len = RTE_MIN(dst_len, src_len);
646
647                         memcpy((uint8_t *)memif_get_buffer(proc_private, d0) + dst_off,
648                                rte_pktmbuf_mtod_offset(mbuf, void *, src_off),
649                                cp_len);
650
651                         mq->n_bytes += cp_len;
652                         src_off += cp_len;
653                         dst_off += cp_len;
654                         src_len -= cp_len;
655                         dst_len -= cp_len;
656
657                         d0->length = dst_off;
658                 }
659
660                 if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
661                         mbuf = mbuf->next;
662                         goto next_in_chain;
663                 }
664
665                 n_tx_pkts++;
666                 slot++;
667                 n_free--;
668                 rte_pktmbuf_free(mbuf_head);
669         }
670
671 no_free_slots:
672         if (type == MEMIF_RING_S2M)
673                 __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
674         else
675                 __atomic_store_n(&ring->tail, slot, __ATOMIC_RELEASE);
676
677         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
678                 a = 1;
679                 size = write(mq->intr_handle.fd, &a, sizeof(a));
680                 if (unlikely(size < 0)) {
681                         MIF_LOG(WARNING,
682                                 "Failed to send interrupt. %s", strerror(errno));
683                 }
684         }
685
686         mq->n_pkts += n_tx_pkts;
687         return n_tx_pkts;
688 }
689
690
691 static int
692 memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq,
693                 memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask,
694                 uint16_t slot, uint16_t n_free)
695 {
696         memif_desc_t *d0;
697         int used_slots = 1;
698
699 next_in_chain:
700         /* store pointer to mbuf to free it later */
701         mq->buffers[slot & mask] = mbuf;
702         /* Increment refcnt to make sure the buffer is not freed before master
703          * receives it. (current segment)
704          */
705         rte_mbuf_refcnt_update(mbuf, 1);
706         /* populate descriptor */
707         d0 = &ring->desc[slot & mask];
708         d0->length = rte_pktmbuf_data_len(mbuf);
709         /* FIXME: get region index */
710         d0->region = 1;
711         d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
712                 (uint8_t *)proc_private->regions[d0->region]->addr;
713         d0->flags = 0;
714
715         /* check if buffer is chained */
716         if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
717                 if (n_free < 2)
718                         return 0;
719                 /* mark buffer as chained */
720                 d0->flags |= MEMIF_DESC_FLAG_NEXT;
721                 /* advance mbuf */
722                 mbuf = mbuf->next;
723                 /* update counters */
724                 used_slots++;
725                 slot++;
726                 n_free--;
727                 goto next_in_chain;
728         }
729         return used_slots;
730 }
731
732 static uint16_t
733 eth_memif_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
734 {
735         struct memif_queue *mq = queue;
736         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
737         struct pmd_process_private *proc_private =
738                 rte_eth_devices[mq->in_port].process_private;
739         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
740         uint16_t slot, n_free, ring_size, mask, n_tx_pkts = 0;
741         struct rte_eth_link link;
742
743         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
744                 return 0;
745         if (unlikely(ring == NULL)) {
746                 /* Secondary process will attempt to request regions. */
747                 rte_eth_link_get(mq->in_port, &link);
748                 return 0;
749         }
750
751         ring_size = 1 << mq->log2_ring_size;
752         mask = ring_size - 1;
753
754         /* free mbufs received by master */
755         memif_free_stored_mbufs(proc_private, mq);
756
757         /* ring type always MEMIF_RING_S2M */
758         /* For S2M queues ring->head is updated by the sender and
759          * this function is called in the context of sending thread.
760          * The loads in the sender do not need to synchronize with
761          * its own stores. Hence, the following load can be a
762          * relaxed load.
763          */
764         slot = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
765         n_free = ring_size - slot + mq->last_tail;
766
767         int used_slots;
768
769         while (n_free && (n_tx_pkts < nb_pkts)) {
770                 while ((n_free > 4) && ((nb_pkts - n_tx_pkts) > 4)) {
771                         if ((nb_pkts - n_tx_pkts) > 8) {
772                                 rte_prefetch0(*bufs + 4);
773                                 rte_prefetch0(*bufs + 5);
774                                 rte_prefetch0(*bufs + 6);
775                                 rte_prefetch0(*bufs + 7);
776                         }
777                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
778                                 mask, slot, n_free);
779                         if (unlikely(used_slots < 1))
780                                 goto no_free_slots;
781                         n_tx_pkts++;
782                         slot += used_slots;
783                         n_free -= used_slots;
784
785                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
786                                 mask, slot, n_free);
787                         if (unlikely(used_slots < 1))
788                                 goto no_free_slots;
789                         n_tx_pkts++;
790                         slot += used_slots;
791                         n_free -= used_slots;
792
793                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
794                                 mask, slot, n_free);
795                         if (unlikely(used_slots < 1))
796                                 goto no_free_slots;
797                         n_tx_pkts++;
798                         slot += used_slots;
799                         n_free -= used_slots;
800
801                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
802                                 mask, slot, n_free);
803                         if (unlikely(used_slots < 1))
804                                 goto no_free_slots;
805                         n_tx_pkts++;
806                         slot += used_slots;
807                         n_free -= used_slots;
808                 }
809                 used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
810                         mask, slot, n_free);
811                 if (unlikely(used_slots < 1))
812                         goto no_free_slots;
813                 n_tx_pkts++;
814                 slot += used_slots;
815                 n_free -= used_slots;
816         }
817
818 no_free_slots:
819         /* ring type always MEMIF_RING_S2M */
820         /* The ring->head acts as a guard variable between Tx and Rx
821          * threads, so using store-release pairs with load-acquire
822          * in function eth_memif_rx for S2M rings.
823          */
824         __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
825
826         /* Send interrupt, if enabled. */
827         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
828                 uint64_t a = 1;
829                 ssize_t size = write(mq->intr_handle.fd, &a, sizeof(a));
830                 if (unlikely(size < 0)) {
831                         MIF_LOG(WARNING,
832                                 "Failed to send interrupt. %s", strerror(errno));
833                 }
834         }
835
836         /* increment queue counters */
837         mq->n_pkts += n_tx_pkts;
838
839         return n_tx_pkts;
840 }
841
842 void
843 memif_free_regions(struct rte_eth_dev *dev)
844 {
845         struct pmd_process_private *proc_private = dev->process_private;
846         struct pmd_internals *pmd = dev->data->dev_private;
847         int i;
848         struct memif_region *r;
849
850         /* regions are allocated contiguously, so it's
851          * enough to loop until 'proc_private->regions_num'
852          */
853         for (i = 0; i < proc_private->regions_num; i++) {
854                 r = proc_private->regions[i];
855                 if (r != NULL) {
856                         /* This is memzone */
857                         if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
858                                 r->addr = NULL;
859                                 if (r->fd > 0)
860                                         close(r->fd);
861                         }
862                         if (r->addr != NULL) {
863                                 munmap(r->addr, r->region_size);
864                                 if (r->fd > 0) {
865                                         close(r->fd);
866                                         r->fd = -1;
867                                 }
868                         }
869                         rte_free(r);
870                         proc_private->regions[i] = NULL;
871                 }
872         }
873         proc_private->regions_num = 0;
874 }
875
876 static int
877 memif_region_init_zc(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
878                      void *arg)
879 {
880         struct pmd_process_private *proc_private = (struct pmd_process_private *)arg;
881         struct memif_region *r;
882
883         if (proc_private->regions_num < 1) {
884                 MIF_LOG(ERR, "Missing descriptor region");
885                 return -1;
886         }
887
888         r = proc_private->regions[proc_private->regions_num - 1];
889
890         if (r->addr != msl->base_va)
891                 r = proc_private->regions[++proc_private->regions_num - 1];
892
893         if (r == NULL) {
894                 r = rte_zmalloc("region", sizeof(struct memif_region), 0);
895                 if (r == NULL) {
896                         MIF_LOG(ERR, "Failed to alloc memif region.");
897                         return -ENOMEM;
898                 }
899
900                 r->addr = msl->base_va;
901                 r->region_size = ms->len;
902                 r->fd = rte_memseg_get_fd(ms);
903                 if (r->fd < 0)
904                         return -1;
905                 r->pkt_buffer_offset = 0;
906
907                 proc_private->regions[proc_private->regions_num - 1] = r;
908         } else {
909                 r->region_size += ms->len;
910         }
911
912         return 0;
913 }
914
915 static int
916 memif_region_init_shm(struct rte_eth_dev *dev, uint8_t has_buffers)
917 {
918         struct pmd_internals *pmd = dev->data->dev_private;
919         struct pmd_process_private *proc_private = dev->process_private;
920         char shm_name[ETH_MEMIF_SHM_NAME_SIZE];
921         int ret = 0;
922         struct memif_region *r;
923
924         if (proc_private->regions_num >= ETH_MEMIF_MAX_REGION_NUM) {
925                 MIF_LOG(ERR, "Too many regions.");
926                 return -1;
927         }
928
929         r = rte_zmalloc("region", sizeof(struct memif_region), 0);
930         if (r == NULL) {
931                 MIF_LOG(ERR, "Failed to alloc memif region.");
932                 return -ENOMEM;
933         }
934
935         /* calculate buffer offset */
936         r->pkt_buffer_offset = (pmd->run.num_s2m_rings + pmd->run.num_m2s_rings) *
937             (sizeof(memif_ring_t) + sizeof(memif_desc_t) *
938             (1 << pmd->run.log2_ring_size));
939
940         r->region_size = r->pkt_buffer_offset;
941         /* if region has buffers, add buffers size to region_size */
942         if (has_buffers == 1)
943                 r->region_size += (uint32_t)(pmd->run.pkt_buffer_size *
944                         (1 << pmd->run.log2_ring_size) *
945                         (pmd->run.num_s2m_rings +
946                          pmd->run.num_m2s_rings));
947
948         memset(shm_name, 0, sizeof(char) * ETH_MEMIF_SHM_NAME_SIZE);
949         snprintf(shm_name, ETH_MEMIF_SHM_NAME_SIZE, "memif_region_%d",
950                  proc_private->regions_num);
951
952         r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING);
953         if (r->fd < 0) {
954                 MIF_LOG(ERR, "Failed to create shm file: %s.", strerror(errno));
955                 ret = -1;
956                 goto error;
957         }
958
959         ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK);
960         if (ret < 0) {
961                 MIF_LOG(ERR, "Failed to add seals to shm file: %s.", strerror(errno));
962                 goto error;
963         }
964
965         ret = ftruncate(r->fd, r->region_size);
966         if (ret < 0) {
967                 MIF_LOG(ERR, "Failed to truncate shm file: %s.", strerror(errno));
968                 goto error;
969         }
970
971         r->addr = mmap(NULL, r->region_size, PROT_READ |
972                        PROT_WRITE, MAP_SHARED, r->fd, 0);
973         if (r->addr == MAP_FAILED) {
974                 MIF_LOG(ERR, "Failed to mmap shm region: %s.", strerror(ret));
975                 ret = -1;
976                 goto error;
977         }
978
979         proc_private->regions[proc_private->regions_num] = r;
980         proc_private->regions_num++;
981
982         return ret;
983
984 error:
985         if (r->fd > 0)
986                 close(r->fd);
987         r->fd = -1;
988
989         return ret;
990 }
991
992 static int
993 memif_regions_init(struct rte_eth_dev *dev)
994 {
995         struct pmd_internals *pmd = dev->data->dev_private;
996         int ret;
997
998         /*
999          * Zero-copy exposes dpdk memory.
1000          * Each memseg list will be represented by memif region.
1001          * Zero-copy regions indexing: memseg list idx + 1,
1002          * as we already have region 0 reserved for descriptors.
1003          */
1004         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1005                 /* create region idx 0 containing descriptors */
1006                 ret = memif_region_init_shm(dev, 0);
1007                 if (ret < 0)
1008                         return ret;
1009                 ret = rte_memseg_walk(memif_region_init_zc, (void *)dev->process_private);
1010                 if (ret < 0)
1011                         return ret;
1012         } else {
1013                 /* create one memory region contaning rings and buffers */
1014                 ret = memif_region_init_shm(dev, /* has buffers */ 1);
1015                 if (ret < 0)
1016                         return ret;
1017         }
1018
1019         return 0;
1020 }
1021
1022 static void
1023 memif_init_rings(struct rte_eth_dev *dev)
1024 {
1025         struct pmd_internals *pmd = dev->data->dev_private;
1026         struct pmd_process_private *proc_private = dev->process_private;
1027         memif_ring_t *ring;
1028         int i, j;
1029         uint16_t slot;
1030
1031         for (i = 0; i < pmd->run.num_s2m_rings; i++) {
1032                 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_S2M, i);
1033                 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1034                 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1035                 ring->cookie = MEMIF_COOKIE;
1036                 ring->flags = 0;
1037
1038                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1039                         continue;
1040
1041                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1042                         slot = i * (1 << pmd->run.log2_ring_size) + j;
1043                         ring->desc[j].region = 0;
1044                         ring->desc[j].offset =
1045                                 proc_private->regions[0]->pkt_buffer_offset +
1046                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
1047                         ring->desc[j].length = pmd->run.pkt_buffer_size;
1048                 }
1049         }
1050
1051         for (i = 0; i < pmd->run.num_m2s_rings; i++) {
1052                 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_M2S, i);
1053                 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1054                 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1055                 ring->cookie = MEMIF_COOKIE;
1056                 ring->flags = 0;
1057
1058                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1059                         continue;
1060
1061                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1062                         slot = (i + pmd->run.num_s2m_rings) *
1063                             (1 << pmd->run.log2_ring_size) + j;
1064                         ring->desc[j].region = 0;
1065                         ring->desc[j].offset =
1066                                 proc_private->regions[0]->pkt_buffer_offset +
1067                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
1068                         ring->desc[j].length = pmd->run.pkt_buffer_size;
1069                 }
1070         }
1071 }
1072
1073 /* called only by slave */
1074 static int
1075 memif_init_queues(struct rte_eth_dev *dev)
1076 {
1077         struct pmd_internals *pmd = dev->data->dev_private;
1078         struct memif_queue *mq;
1079         int i;
1080
1081         for (i = 0; i < pmd->run.num_s2m_rings; i++) {
1082                 mq = dev->data->tx_queues[i];
1083                 mq->log2_ring_size = pmd->run.log2_ring_size;
1084                 /* queues located only in region 0 */
1085                 mq->region = 0;
1086                 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_S2M, i);
1087                 mq->last_head = 0;
1088                 mq->last_tail = 0;
1089                 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1090                 if (mq->intr_handle.fd < 0) {
1091                         MIF_LOG(WARNING,
1092                                 "Failed to create eventfd for tx queue %d: %s.", i,
1093                                 strerror(errno));
1094                 }
1095                 mq->buffers = NULL;
1096                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1097                         mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1098                                                   (1 << mq->log2_ring_size), 0);
1099                         if (mq->buffers == NULL)
1100                                 return -ENOMEM;
1101                 }
1102         }
1103
1104         for (i = 0; i < pmd->run.num_m2s_rings; i++) {
1105                 mq = dev->data->rx_queues[i];
1106                 mq->log2_ring_size = pmd->run.log2_ring_size;
1107                 /* queues located only in region 0 */
1108                 mq->region = 0;
1109                 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_M2S, i);
1110                 mq->last_head = 0;
1111                 mq->last_tail = 0;
1112                 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1113                 if (mq->intr_handle.fd < 0) {
1114                         MIF_LOG(WARNING,
1115                                 "Failed to create eventfd for rx queue %d: %s.", i,
1116                                 strerror(errno));
1117                 }
1118                 mq->buffers = NULL;
1119                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1120                         mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1121                                                   (1 << mq->log2_ring_size), 0);
1122                         if (mq->buffers == NULL)
1123                                 return -ENOMEM;
1124                 }
1125         }
1126         return 0;
1127 }
1128
1129 int
1130 memif_init_regions_and_queues(struct rte_eth_dev *dev)
1131 {
1132         int ret;
1133
1134         ret = memif_regions_init(dev);
1135         if (ret < 0)
1136                 return ret;
1137
1138         memif_init_rings(dev);
1139
1140         ret = memif_init_queues(dev);
1141         if (ret < 0)
1142                 return ret;
1143
1144         return 0;
1145 }
1146
1147 int
1148 memif_connect(struct rte_eth_dev *dev)
1149 {
1150         struct pmd_internals *pmd = dev->data->dev_private;
1151         struct pmd_process_private *proc_private = dev->process_private;
1152         struct memif_region *mr;
1153         struct memif_queue *mq;
1154         memif_ring_t *ring;
1155         int i;
1156
1157         for (i = 0; i < proc_private->regions_num; i++) {
1158                 mr = proc_private->regions[i];
1159                 if (mr != NULL) {
1160                         if (mr->addr == NULL) {
1161                                 if (mr->fd < 0)
1162                                         return -1;
1163                                 mr->addr = mmap(NULL, mr->region_size,
1164                                                 PROT_READ | PROT_WRITE,
1165                                                 MAP_SHARED, mr->fd, 0);
1166                                 if (mr->addr == MAP_FAILED) {
1167                                         MIF_LOG(ERR, "mmap failed: %s\n",
1168                                                 strerror(errno));
1169                                         return -1;
1170                                 }
1171                         }
1172                         if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
1173                                 /* close memseg file */
1174                                 close(mr->fd);
1175                                 mr->fd = -1;
1176                         }
1177                 }
1178         }
1179
1180         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1181                 for (i = 0; i < pmd->run.num_s2m_rings; i++) {
1182                         mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
1183                             dev->data->tx_queues[i] : dev->data->rx_queues[i];
1184                         ring = memif_get_ring_from_queue(proc_private, mq);
1185                         if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1186                                 MIF_LOG(ERR, "Wrong ring");
1187                                 return -1;
1188                         }
1189                         __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1190                         __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1191                         mq->last_head = 0;
1192                         mq->last_tail = 0;
1193                         /* enable polling mode */
1194                         if (pmd->role == MEMIF_ROLE_MASTER)
1195                                 ring->flags = MEMIF_RING_FLAG_MASK_INT;
1196                 }
1197                 for (i = 0; i < pmd->run.num_m2s_rings; i++) {
1198                         mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
1199                             dev->data->rx_queues[i] : dev->data->tx_queues[i];
1200                         ring = memif_get_ring_from_queue(proc_private, mq);
1201                         if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1202                                 MIF_LOG(ERR, "Wrong ring");
1203                                 return -1;
1204                         }
1205                         __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1206                         __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1207                         mq->last_head = 0;
1208                         mq->last_tail = 0;
1209                         /* enable polling mode */
1210                         if (pmd->role == MEMIF_ROLE_SLAVE)
1211                                 ring->flags = MEMIF_RING_FLAG_MASK_INT;
1212                 }
1213
1214                 pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
1215                 pmd->flags |= ETH_MEMIF_FLAG_CONNECTED;
1216                 dev->data->dev_link.link_status = ETH_LINK_UP;
1217         }
1218         MIF_LOG(INFO, "Connected.");
1219         return 0;
1220 }
1221
1222 static int
1223 memif_dev_start(struct rte_eth_dev *dev)
1224 {
1225         struct pmd_internals *pmd = dev->data->dev_private;
1226         int ret = 0;
1227
1228         switch (pmd->role) {
1229         case MEMIF_ROLE_SLAVE:
1230                 ret = memif_connect_slave(dev);
1231                 break;
1232         case MEMIF_ROLE_MASTER:
1233                 ret = memif_connect_master(dev);
1234                 break;
1235         default:
1236                 MIF_LOG(ERR, "Unknown role: %d.", pmd->role);
1237                 ret = -1;
1238                 break;
1239         }
1240
1241         return ret;
1242 }
1243
1244 static int
1245 memif_dev_close(struct rte_eth_dev *dev)
1246 {
1247         struct pmd_internals *pmd = dev->data->dev_private;
1248         int i;
1249
1250         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1251                 memif_msg_enq_disconnect(pmd->cc, "Device closed", 0);
1252                 memif_disconnect(dev);
1253
1254                 for (i = 0; i < dev->data->nb_rx_queues; i++)
1255                         (*dev->dev_ops->rx_queue_release)(dev->data->rx_queues[i]);
1256                 for (i = 0; i < dev->data->nb_tx_queues; i++)
1257                         (*dev->dev_ops->tx_queue_release)(dev->data->tx_queues[i]);
1258
1259                 memif_socket_remove_device(dev);
1260         } else {
1261                 memif_disconnect(dev);
1262         }
1263
1264         rte_free(dev->process_private);
1265
1266         return 0;
1267 }
1268
1269 static int
1270 memif_dev_configure(struct rte_eth_dev *dev)
1271 {
1272         struct pmd_internals *pmd = dev->data->dev_private;
1273
1274         /*
1275          * SLAVE - TXQ
1276          * MASTER - RXQ
1277          */
1278         pmd->cfg.num_s2m_rings = (pmd->role == MEMIF_ROLE_SLAVE) ?
1279                                   dev->data->nb_tx_queues : dev->data->nb_rx_queues;
1280
1281         /*
1282          * SLAVE - RXQ
1283          * MASTER - TXQ
1284          */
1285         pmd->cfg.num_m2s_rings = (pmd->role == MEMIF_ROLE_SLAVE) ?
1286                                   dev->data->nb_rx_queues : dev->data->nb_tx_queues;
1287
1288         return 0;
1289 }
1290
1291 static int
1292 memif_tx_queue_setup(struct rte_eth_dev *dev,
1293                      uint16_t qid,
1294                      uint16_t nb_tx_desc __rte_unused,
1295                      unsigned int socket_id __rte_unused,
1296                      const struct rte_eth_txconf *tx_conf __rte_unused)
1297 {
1298         struct pmd_internals *pmd = dev->data->dev_private;
1299         struct memif_queue *mq;
1300
1301         mq = rte_zmalloc("tx-queue", sizeof(struct memif_queue), 0);
1302         if (mq == NULL) {
1303                 MIF_LOG(ERR, "Failed to allocate tx queue id: %u", qid);
1304                 return -ENOMEM;
1305         }
1306
1307         mq->type =
1308             (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_S2M : MEMIF_RING_M2S;
1309         mq->n_pkts = 0;
1310         mq->n_bytes = 0;
1311         mq->intr_handle.fd = -1;
1312         mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1313         mq->in_port = dev->data->port_id;
1314         dev->data->tx_queues[qid] = mq;
1315
1316         return 0;
1317 }
1318
1319 static int
1320 memif_rx_queue_setup(struct rte_eth_dev *dev,
1321                      uint16_t qid,
1322                      uint16_t nb_rx_desc __rte_unused,
1323                      unsigned int socket_id __rte_unused,
1324                      const struct rte_eth_rxconf *rx_conf __rte_unused,
1325                      struct rte_mempool *mb_pool)
1326 {
1327         struct pmd_internals *pmd = dev->data->dev_private;
1328         struct memif_queue *mq;
1329
1330         mq = rte_zmalloc("rx-queue", sizeof(struct memif_queue), 0);
1331         if (mq == NULL) {
1332                 MIF_LOG(ERR, "Failed to allocate rx queue id: %u", qid);
1333                 return -ENOMEM;
1334         }
1335
1336         mq->type = (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_M2S : MEMIF_RING_S2M;
1337         mq->n_pkts = 0;
1338         mq->n_bytes = 0;
1339         mq->intr_handle.fd = -1;
1340         mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1341         mq->mempool = mb_pool;
1342         mq->in_port = dev->data->port_id;
1343         dev->data->rx_queues[qid] = mq;
1344
1345         return 0;
1346 }
1347
1348 static void
1349 memif_queue_release(void *queue)
1350 {
1351         struct memif_queue *mq = (struct memif_queue *)queue;
1352
1353         if (!mq)
1354                 return;
1355
1356         rte_free(mq);
1357 }
1358
1359 static int
1360 memif_link_update(struct rte_eth_dev *dev,
1361                   int wait_to_complete __rte_unused)
1362 {
1363         struct pmd_process_private *proc_private;
1364
1365         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1366                 proc_private = dev->process_private;
1367                 if (dev->data->dev_link.link_status == ETH_LINK_UP &&
1368                                 proc_private->regions_num == 0) {
1369                         memif_mp_request_regions(dev);
1370                 } else if (dev->data->dev_link.link_status == ETH_LINK_DOWN &&
1371                                 proc_private->regions_num > 0) {
1372                         memif_free_regions(dev);
1373                 }
1374         }
1375         return 0;
1376 }
1377
1378 static int
1379 memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1380 {
1381         struct pmd_internals *pmd = dev->data->dev_private;
1382         struct memif_queue *mq;
1383         int i;
1384         uint8_t tmp, nq;
1385
1386         stats->ipackets = 0;
1387         stats->ibytes = 0;
1388         stats->opackets = 0;
1389         stats->obytes = 0;
1390
1391         tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_s2m_rings :
1392             pmd->run.num_m2s_rings;
1393         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1394             RTE_ETHDEV_QUEUE_STAT_CNTRS;
1395
1396         /* RX stats */
1397         for (i = 0; i < nq; i++) {
1398                 mq = dev->data->rx_queues[i];
1399                 stats->q_ipackets[i] = mq->n_pkts;
1400                 stats->q_ibytes[i] = mq->n_bytes;
1401                 stats->ipackets += mq->n_pkts;
1402                 stats->ibytes += mq->n_bytes;
1403         }
1404
1405         tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_m2s_rings :
1406             pmd->run.num_s2m_rings;
1407         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1408             RTE_ETHDEV_QUEUE_STAT_CNTRS;
1409
1410         /* TX stats */
1411         for (i = 0; i < nq; i++) {
1412                 mq = dev->data->tx_queues[i];
1413                 stats->q_opackets[i] = mq->n_pkts;
1414                 stats->q_obytes[i] = mq->n_bytes;
1415                 stats->opackets += mq->n_pkts;
1416                 stats->obytes += mq->n_bytes;
1417         }
1418         return 0;
1419 }
1420
1421 static int
1422 memif_stats_reset(struct rte_eth_dev *dev)
1423 {
1424         struct pmd_internals *pmd = dev->data->dev_private;
1425         int i;
1426         struct memif_queue *mq;
1427
1428         for (i = 0; i < pmd->run.num_s2m_rings; i++) {
1429                 mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->tx_queues[i] :
1430                     dev->data->rx_queues[i];
1431                 mq->n_pkts = 0;
1432                 mq->n_bytes = 0;
1433         }
1434         for (i = 0; i < pmd->run.num_m2s_rings; i++) {
1435                 mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->rx_queues[i] :
1436                     dev->data->tx_queues[i];
1437                 mq->n_pkts = 0;
1438                 mq->n_bytes = 0;
1439         }
1440
1441         return 0;
1442 }
1443
1444 static int
1445 memif_rx_queue_intr_enable(struct rte_eth_dev *dev __rte_unused,
1446                            uint16_t qid __rte_unused)
1447 {
1448         MIF_LOG(WARNING, "Interrupt mode not supported.");
1449
1450         return -1;
1451 }
1452
1453 static int
1454 memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
1455 {
1456         struct pmd_internals *pmd __rte_unused = dev->data->dev_private;
1457
1458         return 0;
1459 }
1460
1461 static const struct eth_dev_ops ops = {
1462         .dev_start = memif_dev_start,
1463         .dev_close = memif_dev_close,
1464         .dev_infos_get = memif_dev_info,
1465         .dev_configure = memif_dev_configure,
1466         .tx_queue_setup = memif_tx_queue_setup,
1467         .rx_queue_setup = memif_rx_queue_setup,
1468         .rx_queue_release = memif_queue_release,
1469         .tx_queue_release = memif_queue_release,
1470         .rx_queue_intr_enable = memif_rx_queue_intr_enable,
1471         .rx_queue_intr_disable = memif_rx_queue_intr_disable,
1472         .link_update = memif_link_update,
1473         .stats_get = memif_stats_get,
1474         .stats_reset = memif_stats_reset,
1475 };
1476
1477 static int
1478 memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
1479              memif_interface_id_t id, uint32_t flags,
1480              const char *socket_filename,
1481              memif_log2_ring_size_t log2_ring_size,
1482              uint16_t pkt_buffer_size, const char *secret,
1483              struct rte_ether_addr *ether_addr)
1484 {
1485         int ret = 0;
1486         struct rte_eth_dev *eth_dev;
1487         struct rte_eth_dev_data *data;
1488         struct pmd_internals *pmd;
1489         struct pmd_process_private *process_private;
1490         const unsigned int numa_node = vdev->device.numa_node;
1491         const char *name = rte_vdev_device_name(vdev);
1492
1493         eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
1494         if (eth_dev == NULL) {
1495                 MIF_LOG(ERR, "%s: Unable to allocate device struct.", name);
1496                 return -1;
1497         }
1498
1499         process_private = (struct pmd_process_private *)
1500                 rte_zmalloc(name, sizeof(struct pmd_process_private),
1501                             RTE_CACHE_LINE_SIZE);
1502
1503         if (process_private == NULL) {
1504                 MIF_LOG(ERR, "Failed to alloc memory for process private");
1505                 return -1;
1506         }
1507         eth_dev->process_private = process_private;
1508
1509         pmd = eth_dev->data->dev_private;
1510         memset(pmd, 0, sizeof(*pmd));
1511
1512         pmd->id = id;
1513         pmd->flags = flags;
1514         pmd->flags |= ETH_MEMIF_FLAG_DISABLED;
1515         pmd->role = role;
1516         /* Zero-copy flag irelevant to master. */
1517         if (pmd->role == MEMIF_ROLE_MASTER)
1518                 pmd->flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1519
1520         ret = memif_socket_init(eth_dev, socket_filename);
1521         if (ret < 0)
1522                 return ret;
1523
1524         memset(pmd->secret, 0, sizeof(char) * ETH_MEMIF_SECRET_SIZE);
1525         if (secret != NULL)
1526                 strlcpy(pmd->secret, secret, sizeof(pmd->secret));
1527
1528         pmd->cfg.log2_ring_size = log2_ring_size;
1529         /* set in .dev_configure() */
1530         pmd->cfg.num_s2m_rings = 0;
1531         pmd->cfg.num_m2s_rings = 0;
1532
1533         pmd->cfg.pkt_buffer_size = pkt_buffer_size;
1534         rte_spinlock_init(&pmd->cc_lock);
1535
1536         data = eth_dev->data;
1537         data->dev_private = pmd;
1538         data->numa_node = numa_node;
1539         data->dev_link = pmd_link;
1540         data->mac_addrs = ether_addr;
1541         data->promiscuous = 1;
1542         data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1543
1544         eth_dev->dev_ops = &ops;
1545         eth_dev->device = &vdev->device;
1546         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1547                 eth_dev->rx_pkt_burst = eth_memif_rx_zc;
1548                 eth_dev->tx_pkt_burst = eth_memif_tx_zc;
1549         } else {
1550                 eth_dev->rx_pkt_burst = eth_memif_rx;
1551                 eth_dev->tx_pkt_burst = eth_memif_tx;
1552         }
1553
1554         rte_eth_dev_probing_finish(eth_dev);
1555
1556         return 0;
1557 }
1558
1559 static int
1560 memif_set_role(const char *key __rte_unused, const char *value,
1561                void *extra_args)
1562 {
1563         enum memif_role_t *role = (enum memif_role_t *)extra_args;
1564
1565         if (strstr(value, "master") != NULL) {
1566                 *role = MEMIF_ROLE_MASTER;
1567         } else if (strstr(value, "slave") != NULL) {
1568                 *role = MEMIF_ROLE_SLAVE;
1569         } else {
1570                 MIF_LOG(ERR, "Unknown role: %s.", value);
1571                 return -EINVAL;
1572         }
1573         return 0;
1574 }
1575
1576 static int
1577 memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args)
1578 {
1579         uint32_t *flags = (uint32_t *)extra_args;
1580
1581         if (strstr(value, "yes") != NULL) {
1582                 if (!rte_mcfg_get_single_file_segments()) {
1583                         MIF_LOG(ERR, "Zero-copy doesn't support multi-file segments.");
1584                         return -ENOTSUP;
1585                 }
1586                 *flags |= ETH_MEMIF_FLAG_ZERO_COPY;
1587         } else if (strstr(value, "no") != NULL) {
1588                 *flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1589         } else {
1590                 MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value);
1591                 return -EINVAL;
1592         }
1593         return 0;
1594 }
1595
1596 static int
1597 memif_set_id(const char *key __rte_unused, const char *value, void *extra_args)
1598 {
1599         memif_interface_id_t *id = (memif_interface_id_t *)extra_args;
1600
1601         /* even if parsing fails, 0 is a valid id */
1602         *id = strtoul(value, NULL, 10);
1603         return 0;
1604 }
1605
1606 static int
1607 memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args)
1608 {
1609         unsigned long tmp;
1610         uint16_t *pkt_buffer_size = (uint16_t *)extra_args;
1611
1612         tmp = strtoul(value, NULL, 10);
1613         if (tmp == 0 || tmp > 0xFFFF) {
1614                 MIF_LOG(ERR, "Invalid buffer size: %s.", value);
1615                 return -EINVAL;
1616         }
1617         *pkt_buffer_size = tmp;
1618         return 0;
1619 }
1620
1621 static int
1622 memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args)
1623 {
1624         unsigned long tmp;
1625         memif_log2_ring_size_t *log2_ring_size =
1626             (memif_log2_ring_size_t *)extra_args;
1627
1628         tmp = strtoul(value, NULL, 10);
1629         if (tmp == 0 || tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE) {
1630                 MIF_LOG(ERR, "Invalid ring size: %s (max %u).",
1631                         value, ETH_MEMIF_MAX_LOG2_RING_SIZE);
1632                 return -EINVAL;
1633         }
1634         *log2_ring_size = tmp;
1635         return 0;
1636 }
1637
1638 /* check if directory exists and if we have permission to read/write */
1639 static int
1640 memif_check_socket_filename(const char *filename)
1641 {
1642         char *dir = NULL, *tmp;
1643         uint32_t idx;
1644         int ret = 0;
1645
1646         if (strlen(filename) >= MEMIF_SOCKET_UN_SIZE) {
1647                 MIF_LOG(ERR, "Unix socket address too long (max 108).");
1648                 return -1;
1649         }
1650
1651         tmp = strrchr(filename, '/');
1652         if (tmp != NULL) {
1653                 idx = tmp - filename;
1654                 dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 1), 0);
1655                 if (dir == NULL) {
1656                         MIF_LOG(ERR, "Failed to allocate memory.");
1657                         return -1;
1658                 }
1659                 strlcpy(dir, filename, sizeof(char) * (idx + 1));
1660         }
1661
1662         if (dir == NULL || (faccessat(-1, dir, F_OK | R_OK |
1663                                         W_OK, AT_EACCESS) < 0)) {
1664                 MIF_LOG(ERR, "Invalid socket directory.");
1665                 ret = -EINVAL;
1666         }
1667
1668         if (dir != NULL)
1669                 rte_free(dir);
1670
1671         return ret;
1672 }
1673
1674 static int
1675 memif_set_socket_filename(const char *key __rte_unused, const char *value,
1676                           void *extra_args)
1677 {
1678         const char **socket_filename = (const char **)extra_args;
1679
1680         *socket_filename = value;
1681         return 0;
1682 }
1683
1684 static int
1685 memif_set_is_socket_abstract(const char *key __rte_unused, const char *value, void *extra_args)
1686 {
1687         uint32_t *flags = (uint32_t *)extra_args;
1688
1689         if (strstr(value, "yes") != NULL) {
1690                 *flags |= ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1691         } else if (strstr(value, "no") != NULL) {
1692                 *flags &= ~ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1693         } else {
1694                 MIF_LOG(ERR, "Failed to parse socket-abstract param: %s.", value);
1695                 return -EINVAL;
1696         }
1697         return 0;
1698 }
1699
1700 static int
1701 memif_set_mac(const char *key __rte_unused, const char *value, void *extra_args)
1702 {
1703         struct rte_ether_addr *ether_addr = (struct rte_ether_addr *)extra_args;
1704
1705         if (rte_ether_unformat_addr(value, ether_addr) < 0)
1706                 MIF_LOG(WARNING, "Failed to parse mac '%s'.", value);
1707         return 0;
1708 }
1709
1710 static int
1711 memif_set_secret(const char *key __rte_unused, const char *value, void *extra_args)
1712 {
1713         const char **secret = (const char **)extra_args;
1714
1715         *secret = value;
1716         return 0;
1717 }
1718
1719 static int
1720 rte_pmd_memif_probe(struct rte_vdev_device *vdev)
1721 {
1722         RTE_BUILD_BUG_ON(sizeof(memif_msg_t) != 128);
1723         RTE_BUILD_BUG_ON(sizeof(memif_desc_t) != 16);
1724         int ret = 0;
1725         struct rte_kvargs *kvlist;
1726         const char *name = rte_vdev_device_name(vdev);
1727         enum memif_role_t role = MEMIF_ROLE_SLAVE;
1728         memif_interface_id_t id = 0;
1729         uint16_t pkt_buffer_size = ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE;
1730         memif_log2_ring_size_t log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE;
1731         const char *socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME;
1732         uint32_t flags = 0;
1733         const char *secret = NULL;
1734         struct rte_ether_addr *ether_addr = rte_zmalloc("",
1735                 sizeof(struct rte_ether_addr), 0);
1736         struct rte_eth_dev *eth_dev;
1737
1738         rte_eth_random_addr(ether_addr->addr_bytes);
1739
1740         MIF_LOG(INFO, "Initialize MEMIF: %s.", name);
1741
1742         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1743                 eth_dev = rte_eth_dev_attach_secondary(name);
1744                 if (!eth_dev) {
1745                         MIF_LOG(ERR, "Failed to probe %s", name);
1746                         return -1;
1747                 }
1748
1749                 eth_dev->dev_ops = &ops;
1750                 eth_dev->device = &vdev->device;
1751                 eth_dev->rx_pkt_burst = eth_memif_rx;
1752                 eth_dev->tx_pkt_burst = eth_memif_tx;
1753
1754                 if (!rte_eal_primary_proc_alive(NULL)) {
1755                         MIF_LOG(ERR, "Primary process is missing");
1756                         return -1;
1757                 }
1758
1759                 eth_dev->process_private = (struct pmd_process_private *)
1760                         rte_zmalloc(name,
1761                                 sizeof(struct pmd_process_private),
1762                                 RTE_CACHE_LINE_SIZE);
1763                 if (eth_dev->process_private == NULL) {
1764                         MIF_LOG(ERR,
1765                                 "Failed to alloc memory for process private");
1766                         return -1;
1767                 }
1768
1769                 rte_eth_dev_probing_finish(eth_dev);
1770
1771                 return 0;
1772         }
1773
1774         ret = rte_mp_action_register(MEMIF_MP_SEND_REGION, memif_mp_send_region);
1775         /*
1776          * Primary process can continue probing, but secondary process won't
1777          * be able to get memory regions information
1778          */
1779         if (ret < 0 && rte_errno != EEXIST)
1780                 MIF_LOG(WARNING, "Failed to register mp action callback: %s",
1781                         strerror(rte_errno));
1782
1783         /* use abstract address by default */
1784         flags |= ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1785
1786         kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments);
1787
1788         /* parse parameters */
1789         if (kvlist != NULL) {
1790                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG,
1791                                          &memif_set_role, &role);
1792                 if (ret < 0)
1793                         goto exit;
1794                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG,
1795                                          &memif_set_id, &id);
1796                 if (ret < 0)
1797                         goto exit;
1798                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
1799                                          &memif_set_bs, &pkt_buffer_size);
1800                 if (ret < 0)
1801                         goto exit;
1802                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG,
1803                                          &memif_set_rs, &log2_ring_size);
1804                 if (ret < 0)
1805                         goto exit;
1806                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ARG,
1807                                          &memif_set_socket_filename,
1808                                          (void *)(&socket_filename));
1809                 if (ret < 0)
1810                         goto exit;
1811                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ABSTRACT_ARG,
1812                                          &memif_set_is_socket_abstract, &flags);
1813                 if (ret < 0)
1814                         goto exit;
1815                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_MAC_ARG,
1816                                          &memif_set_mac, ether_addr);
1817                 if (ret < 0)
1818                         goto exit;
1819                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG,
1820                                          &memif_set_zc, &flags);
1821                 if (ret < 0)
1822                         goto exit;
1823                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SECRET_ARG,
1824                                          &memif_set_secret, (void *)(&secret));
1825                 if (ret < 0)
1826                         goto exit;
1827         }
1828
1829         if (!(flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT)) {
1830                 ret = memif_check_socket_filename(socket_filename);
1831                 if (ret < 0)
1832                         goto exit;
1833         }
1834
1835         /* create interface */
1836         ret = memif_create(vdev, role, id, flags, socket_filename,
1837                            log2_ring_size, pkt_buffer_size, secret, ether_addr);
1838
1839 exit:
1840         if (kvlist != NULL)
1841                 rte_kvargs_free(kvlist);
1842         return ret;
1843 }
1844
1845 static int
1846 rte_pmd_memif_remove(struct rte_vdev_device *vdev)
1847 {
1848         struct rte_eth_dev *eth_dev;
1849
1850         eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
1851         if (eth_dev == NULL)
1852                 return 0;
1853
1854         return rte_eth_dev_close(eth_dev->data->port_id);
1855 }
1856
1857 static struct rte_vdev_driver pmd_memif_drv = {
1858         .probe = rte_pmd_memif_probe,
1859         .remove = rte_pmd_memif_remove,
1860 };
1861
1862 RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv);
1863
1864 RTE_PMD_REGISTER_PARAM_STRING(net_memif,
1865                               ETH_MEMIF_ID_ARG "=<int>"
1866                               ETH_MEMIF_ROLE_ARG "=master|slave"
1867                               ETH_MEMIF_PKT_BUFFER_SIZE_ARG "=<int>"
1868                               ETH_MEMIF_RING_SIZE_ARG "=<int>"
1869                               ETH_MEMIF_SOCKET_ARG "=<string>"
1870                                   ETH_MEMIF_SOCKET_ABSTRACT_ARG "=yes|no"
1871                               ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx"
1872                               ETH_MEMIF_ZC_ARG "=yes|no"
1873                               ETH_MEMIF_SECRET_ARG "=<string>");
1874
1875 RTE_LOG_REGISTER(memif_logtype, pmd.net.memif, NOTICE);