net/mlx5: fix meter profile validation
[dpdk.git] / drivers / net / memif / rte_eth_memif.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
3  */
4
5 #include <stdlib.h>
6 #include <fcntl.h>
7 #include <unistd.h>
8 #include <sys/types.h>
9 #include <sys/socket.h>
10 #include <sys/un.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13 #include <linux/if_ether.h>
14 #include <errno.h>
15 #include <sys/eventfd.h>
16
17 #include <rte_version.h>
18 #include <rte_mbuf.h>
19 #include <rte_ether.h>
20 #include <ethdev_driver.h>
21 #include <ethdev_vdev.h>
22 #include <rte_malloc.h>
23 #include <rte_kvargs.h>
24 #include <rte_bus_vdev.h>
25 #include <rte_string_fns.h>
26 #include <rte_errno.h>
27 #include <rte_memory.h>
28 #include <rte_memzone.h>
29 #include <rte_eal_memconfig.h>
30
31 #include "rte_eth_memif.h"
32 #include "memif_socket.h"
33
34 #define ETH_MEMIF_ID_ARG                "id"
35 #define ETH_MEMIF_ROLE_ARG              "role"
36 #define ETH_MEMIF_PKT_BUFFER_SIZE_ARG   "bsize"
37 #define ETH_MEMIF_RING_SIZE_ARG         "rsize"
38 #define ETH_MEMIF_SOCKET_ARG            "socket"
39 #define ETH_MEMIF_SOCKET_ABSTRACT_ARG   "socket-abstract"
40 #define ETH_MEMIF_MAC_ARG               "mac"
41 #define ETH_MEMIF_ZC_ARG                "zero-copy"
42 #define ETH_MEMIF_SECRET_ARG            "secret"
43
44 static const char * const valid_arguments[] = {
45         ETH_MEMIF_ID_ARG,
46         ETH_MEMIF_ROLE_ARG,
47         ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
48         ETH_MEMIF_RING_SIZE_ARG,
49         ETH_MEMIF_SOCKET_ARG,
50         ETH_MEMIF_SOCKET_ABSTRACT_ARG,
51         ETH_MEMIF_MAC_ARG,
52         ETH_MEMIF_ZC_ARG,
53         ETH_MEMIF_SECRET_ARG,
54         NULL
55 };
56
57 static const struct rte_eth_link pmd_link = {
58         .link_speed = ETH_SPEED_NUM_10G,
59         .link_duplex = ETH_LINK_FULL_DUPLEX,
60         .link_status = ETH_LINK_DOWN,
61         .link_autoneg = ETH_LINK_AUTONEG
62 };
63
64 #define MEMIF_MP_SEND_REGION            "memif_mp_send_region"
65
66
67 static int memif_region_init_zc(const struct rte_memseg_list *msl,
68                                 const struct rte_memseg *ms, void *arg);
69
70 const char *
71 memif_version(void)
72 {
73         return ("memif-" RTE_STR(MEMIF_VERSION_MAJOR) "." RTE_STR(MEMIF_VERSION_MINOR));
74 }
75
76 /* Message header to synchronize regions */
77 struct mp_region_msg {
78         char port_name[RTE_DEV_NAME_MAX_LEN];
79         memif_region_index_t idx;
80         memif_region_size_t size;
81 };
82
83 static int
84 memif_mp_send_region(const struct rte_mp_msg *msg, const void *peer)
85 {
86         struct rte_eth_dev *dev;
87         struct pmd_process_private *proc_private;
88         const struct mp_region_msg *msg_param = (const struct mp_region_msg *)msg->param;
89         struct rte_mp_msg reply;
90         struct mp_region_msg *reply_param = (struct mp_region_msg *)reply.param;
91         uint16_t port_id;
92         int ret;
93
94         /* Get requested port */
95         ret = rte_eth_dev_get_port_by_name(msg_param->port_name, &port_id);
96         if (ret) {
97                 MIF_LOG(ERR, "Failed to get port id for %s",
98                         msg_param->port_name);
99                 return -1;
100         }
101         dev = &rte_eth_devices[port_id];
102         proc_private = dev->process_private;
103
104         memset(&reply, 0, sizeof(reply));
105         strlcpy(reply.name, msg->name, sizeof(reply.name));
106         reply_param->idx = msg_param->idx;
107         if (proc_private->regions[msg_param->idx] != NULL) {
108                 reply_param->size = proc_private->regions[msg_param->idx]->region_size;
109                 reply.fds[0] = proc_private->regions[msg_param->idx]->fd;
110                 reply.num_fds = 1;
111         }
112         reply.len_param = sizeof(*reply_param);
113         if (rte_mp_reply(&reply, peer) < 0) {
114                 MIF_LOG(ERR, "Failed to reply to an add region request");
115                 return -1;
116         }
117
118         return 0;
119 }
120
121 /*
122  * Request regions
123  * Called by secondary process, when ports link status goes up.
124  */
125 static int
126 memif_mp_request_regions(struct rte_eth_dev *dev)
127 {
128         int ret, i;
129         struct timespec timeout = {.tv_sec = 5, .tv_nsec = 0};
130         struct rte_mp_msg msg, *reply;
131         struct rte_mp_reply replies;
132         struct mp_region_msg *msg_param = (struct mp_region_msg *)msg.param;
133         struct mp_region_msg *reply_param;
134         struct memif_region *r;
135         struct pmd_process_private *proc_private = dev->process_private;
136         struct pmd_internals *pmd = dev->data->dev_private;
137         /* in case of zero-copy client, only request region 0 */
138         uint16_t max_region_num = (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) ?
139                                    1 : ETH_MEMIF_MAX_REGION_NUM;
140
141         MIF_LOG(DEBUG, "Requesting memory regions");
142
143         for (i = 0; i < max_region_num; i++) {
144                 /* Prepare the message */
145                 memset(&msg, 0, sizeof(msg));
146                 strlcpy(msg.name, MEMIF_MP_SEND_REGION, sizeof(msg.name));
147                 strlcpy(msg_param->port_name, dev->data->name,
148                         sizeof(msg_param->port_name));
149                 msg_param->idx = i;
150                 msg.len_param = sizeof(*msg_param);
151
152                 /* Send message */
153                 ret = rte_mp_request_sync(&msg, &replies, &timeout);
154                 if (ret < 0 || replies.nb_received != 1) {
155                         MIF_LOG(ERR, "Failed to send mp msg: %d",
156                                 rte_errno);
157                         return -1;
158                 }
159
160                 reply = &replies.msgs[0];
161                 reply_param = (struct mp_region_msg *)reply->param;
162
163                 if (reply_param->size > 0) {
164                         r = rte_zmalloc("region", sizeof(struct memif_region), 0);
165                         if (r == NULL) {
166                                 MIF_LOG(ERR, "Failed to alloc memif region.");
167                                 free(reply);
168                                 return -ENOMEM;
169                         }
170                         r->region_size = reply_param->size;
171                         if (reply->num_fds < 1) {
172                                 MIF_LOG(ERR, "Missing file descriptor.");
173                                 free(reply);
174                                 return -1;
175                         }
176                         r->fd = reply->fds[0];
177                         r->addr = NULL;
178
179                         proc_private->regions[reply_param->idx] = r;
180                         proc_private->regions_num++;
181                 }
182                 free(reply);
183         }
184
185         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
186                 ret = rte_memseg_walk(memif_region_init_zc, (void *)proc_private);
187                 if (ret < 0)
188                         return ret;
189         }
190
191         return memif_connect(dev);
192 }
193
194 static int
195 memif_dev_info(struct rte_eth_dev *dev __rte_unused, struct rte_eth_dev_info *dev_info)
196 {
197         dev_info->max_mac_addrs = 1;
198         dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN;
199         dev_info->max_rx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
200         dev_info->max_tx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
201         dev_info->min_rx_bufsize = 0;
202
203         return 0;
204 }
205
206 static memif_ring_t *
207 memif_get_ring(struct pmd_internals *pmd, struct pmd_process_private *proc_private,
208                memif_ring_type_t type, uint16_t ring_num)
209 {
210         /* rings only in region 0 */
211         void *p = proc_private->regions[0]->addr;
212         int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) *
213             (1 << pmd->run.log2_ring_size);
214
215         p = (uint8_t *)p + (ring_num + type * pmd->run.num_c2s_rings) * ring_size;
216
217         return (memif_ring_t *)p;
218 }
219
220 static memif_region_offset_t
221 memif_get_ring_offset(struct rte_eth_dev *dev, struct memif_queue *mq,
222                       memif_ring_type_t type, uint16_t num)
223 {
224         struct pmd_internals *pmd = dev->data->dev_private;
225         struct pmd_process_private *proc_private = dev->process_private;
226
227         return ((uint8_t *)memif_get_ring(pmd, proc_private, type, num) -
228                 (uint8_t *)proc_private->regions[mq->region]->addr);
229 }
230
231 static memif_ring_t *
232 memif_get_ring_from_queue(struct pmd_process_private *proc_private,
233                           struct memif_queue *mq)
234 {
235         struct memif_region *r;
236
237         r = proc_private->regions[mq->region];
238         if (r == NULL)
239                 return NULL;
240
241         return (memif_ring_t *)((uint8_t *)r->addr + mq->ring_offset);
242 }
243
244 static void *
245 memif_get_buffer(struct pmd_process_private *proc_private, memif_desc_t *d)
246 {
247         return ((uint8_t *)proc_private->regions[d->region]->addr + d->offset);
248 }
249
250 /* Free mbufs received by server */
251 static void
252 memif_free_stored_mbufs(struct pmd_process_private *proc_private, struct memif_queue *mq)
253 {
254         uint16_t cur_tail;
255         uint16_t mask = (1 << mq->log2_ring_size) - 1;
256         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
257
258         /* FIXME: improve performance */
259         /* The ring->tail acts as a guard variable between Tx and Rx
260          * threads, so using load-acquire pairs with store-release
261          * in function eth_memif_rx for C2S queues.
262          */
263         cur_tail = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
264         while (mq->last_tail != cur_tail) {
265                 RTE_MBUF_PREFETCH_TO_FREE(mq->buffers[(mq->last_tail + 1) & mask]);
266                 /* Decrement refcnt and free mbuf. (current segment) */
267                 rte_mbuf_refcnt_update(mq->buffers[mq->last_tail & mask], -1);
268                 rte_pktmbuf_free_seg(mq->buffers[mq->last_tail & mask]);
269                 mq->last_tail++;
270         }
271 }
272
273 static int
274 memif_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *cur_tail,
275                     struct rte_mbuf *tail)
276 {
277         /* Check for number-of-segments-overflow */
278         if (unlikely(head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS))
279                 return -EOVERFLOW;
280
281         /* Chain 'tail' onto the old tail */
282         cur_tail->next = tail;
283
284         /* accumulate number of segments and total length. */
285         head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs);
286
287         tail->pkt_len = tail->data_len;
288         head->pkt_len += tail->pkt_len;
289
290         return 0;
291 }
292
293 static uint16_t
294 eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
295 {
296         struct memif_queue *mq = queue;
297         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
298         struct pmd_process_private *proc_private =
299                 rte_eth_devices[mq->in_port].process_private;
300         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
301         uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0;
302         uint16_t n_rx_pkts = 0;
303         uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) -
304                 RTE_PKTMBUF_HEADROOM;
305         uint16_t src_len, src_off, dst_len, dst_off, cp_len;
306         memif_ring_type_t type = mq->type;
307         memif_desc_t *d0;
308         struct rte_mbuf *mbuf, *mbuf_head, *mbuf_tail;
309         uint64_t b;
310         ssize_t size __rte_unused;
311         uint16_t head;
312         int ret;
313         struct rte_eth_link link;
314
315         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
316                 return 0;
317         if (unlikely(ring == NULL)) {
318                 /* Secondary process will attempt to request regions. */
319                 ret = rte_eth_link_get(mq->in_port, &link);
320                 if (ret < 0)
321                         MIF_LOG(ERR, "Failed to get port %u link info: %s",
322                                 mq->in_port, rte_strerror(-ret));
323                 return 0;
324         }
325
326         /* consume interrupt */
327         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0)
328                 size = read(mq->intr_handle.fd, &b, sizeof(b));
329
330         ring_size = 1 << mq->log2_ring_size;
331         mask = ring_size - 1;
332
333         if (type == MEMIF_RING_C2S) {
334                 cur_slot = mq->last_head;
335                 last_slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
336         } else {
337                 cur_slot = mq->last_tail;
338                 last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
339         }
340
341         if (cur_slot == last_slot)
342                 goto refill;
343         n_slots = last_slot - cur_slot;
344
345         while (n_slots && n_rx_pkts < nb_pkts) {
346                 mbuf_head = rte_pktmbuf_alloc(mq->mempool);
347                 if (unlikely(mbuf_head == NULL))
348                         goto no_free_bufs;
349                 mbuf = mbuf_head;
350                 mbuf->port = mq->in_port;
351
352 next_slot:
353                 s0 = cur_slot & mask;
354                 d0 = &ring->desc[s0];
355
356                 src_len = d0->length;
357                 dst_off = 0;
358                 src_off = 0;
359
360                 do {
361                         dst_len = mbuf_size - dst_off;
362                         if (dst_len == 0) {
363                                 dst_off = 0;
364                                 dst_len = mbuf_size;
365
366                                 /* store pointer to tail */
367                                 mbuf_tail = mbuf;
368                                 mbuf = rte_pktmbuf_alloc(mq->mempool);
369                                 if (unlikely(mbuf == NULL))
370                                         goto no_free_bufs;
371                                 mbuf->port = mq->in_port;
372                                 ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
373                                 if (unlikely(ret < 0)) {
374                                         MIF_LOG(ERR, "number-of-segments-overflow");
375                                         rte_pktmbuf_free(mbuf);
376                                         goto no_free_bufs;
377                                 }
378                         }
379                         cp_len = RTE_MIN(dst_len, src_len);
380
381                         rte_pktmbuf_data_len(mbuf) += cp_len;
382                         rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
383                         if (mbuf != mbuf_head)
384                                 rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
385
386                         rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *,
387                                                            dst_off),
388                                 (uint8_t *)memif_get_buffer(proc_private, d0) +
389                                 src_off, cp_len);
390
391                         src_off += cp_len;
392                         dst_off += cp_len;
393                         src_len -= cp_len;
394                 } while (src_len);
395
396                 cur_slot++;
397                 n_slots--;
398
399                 if (d0->flags & MEMIF_DESC_FLAG_NEXT)
400                         goto next_slot;
401
402                 mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
403                 *bufs++ = mbuf_head;
404                 n_rx_pkts++;
405         }
406
407 no_free_bufs:
408         if (type == MEMIF_RING_C2S) {
409                 __atomic_store_n(&ring->tail, cur_slot, __ATOMIC_RELEASE);
410                 mq->last_head = cur_slot;
411         } else {
412                 mq->last_tail = cur_slot;
413         }
414
415 refill:
416         if (type == MEMIF_RING_S2C) {
417                 /* ring->head is updated by the receiver and this function
418                  * is called in the context of receiver thread. The loads in
419                  * the receiver do not need to synchronize with its own stores.
420                  */
421                 head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
422                 n_slots = ring_size - head + mq->last_tail;
423
424                 while (n_slots--) {
425                         s0 = head++ & mask;
426                         d0 = &ring->desc[s0];
427                         d0->length = pmd->run.pkt_buffer_size;
428                 }
429                 __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
430         }
431
432         mq->n_pkts += n_rx_pkts;
433         return n_rx_pkts;
434 }
435
436 static uint16_t
437 eth_memif_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
438 {
439         struct memif_queue *mq = queue;
440         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
441         struct pmd_process_private *proc_private =
442                 rte_eth_devices[mq->in_port].process_private;
443         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
444         uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0, head;
445         uint16_t n_rx_pkts = 0;
446         memif_desc_t *d0;
447         struct rte_mbuf *mbuf, *mbuf_tail;
448         struct rte_mbuf *mbuf_head = NULL;
449         int ret;
450         struct rte_eth_link link;
451
452         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
453                 return 0;
454         if (unlikely(ring == NULL)) {
455                 /* Secondary process will attempt to request regions. */
456                 rte_eth_link_get(mq->in_port, &link);
457                 return 0;
458         }
459
460         /* consume interrupt */
461         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
462                 uint64_t b;
463                 ssize_t size __rte_unused;
464                 size = read(mq->intr_handle.fd, &b, sizeof(b));
465         }
466
467         ring_size = 1 << mq->log2_ring_size;
468         mask = ring_size - 1;
469
470         cur_slot = mq->last_tail;
471         /* The ring->tail acts as a guard variable between Tx and Rx
472          * threads, so using load-acquire pairs with store-release
473          * to synchronize it between threads.
474          */
475         last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
476         if (cur_slot == last_slot)
477                 goto refill;
478         n_slots = last_slot - cur_slot;
479
480         while (n_slots && n_rx_pkts < nb_pkts) {
481                 s0 = cur_slot & mask;
482
483                 d0 = &ring->desc[s0];
484                 mbuf_head = mq->buffers[s0];
485                 mbuf = mbuf_head;
486
487 next_slot:
488                 /* prefetch next descriptor */
489                 if (n_rx_pkts + 1 < nb_pkts)
490                         rte_prefetch0(&ring->desc[(cur_slot + 1) & mask]);
491
492                 mbuf->port = mq->in_port;
493                 rte_pktmbuf_data_len(mbuf) = d0->length;
494                 rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
495
496                 mq->n_bytes += rte_pktmbuf_data_len(mbuf);
497
498                 cur_slot++;
499                 n_slots--;
500                 if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
501                         s0 = cur_slot & mask;
502                         d0 = &ring->desc[s0];
503                         mbuf_tail = mbuf;
504                         mbuf = mq->buffers[s0];
505                         ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
506                         if (unlikely(ret < 0)) {
507                                 MIF_LOG(ERR, "number-of-segments-overflow");
508                                 goto refill;
509                         }
510                         goto next_slot;
511                 }
512
513                 *bufs++ = mbuf_head;
514                 n_rx_pkts++;
515         }
516
517         mq->last_tail = cur_slot;
518
519 /* Supply server with new buffers */
520 refill:
521         /* ring->head is updated by the receiver and this function
522          * is called in the context of receiver thread. The loads in
523          * the receiver do not need to synchronize with its own stores.
524          */
525         head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
526         n_slots = ring_size - head + mq->last_tail;
527
528         if (n_slots < 32)
529                 goto no_free_mbufs;
530
531         ret = rte_pktmbuf_alloc_bulk(mq->mempool, &mq->buffers[head & mask], n_slots);
532         if (unlikely(ret < 0))
533                 goto no_free_mbufs;
534
535         while (n_slots--) {
536                 s0 = head++ & mask;
537                 if (n_slots > 0)
538                         rte_prefetch0(mq->buffers[head & mask]);
539                 d0 = &ring->desc[s0];
540                 /* store buffer header */
541                 mbuf = mq->buffers[s0];
542                 /* populate descriptor */
543                 d0->length = rte_pktmbuf_data_room_size(mq->mempool) -
544                                 RTE_PKTMBUF_HEADROOM;
545                 d0->region = 1;
546                 d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
547                         (uint8_t *)proc_private->regions[d0->region]->addr;
548         }
549 no_free_mbufs:
550         /* The ring->head acts as a guard variable between Tx and Rx
551          * threads, so using store-release pairs with load-acquire
552          * in function eth_memif_tx.
553          */
554         __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
555
556         mq->n_pkts += n_rx_pkts;
557
558         return n_rx_pkts;
559 }
560
561 static uint16_t
562 eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
563 {
564         struct memif_queue *mq = queue;
565         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
566         struct pmd_process_private *proc_private =
567                 rte_eth_devices[mq->in_port].process_private;
568         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
569         uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0;
570         uint16_t src_len, src_off, dst_len, dst_off, cp_len;
571         memif_ring_type_t type = mq->type;
572         memif_desc_t *d0;
573         struct rte_mbuf *mbuf;
574         struct rte_mbuf *mbuf_head;
575         uint64_t a;
576         ssize_t size;
577         struct rte_eth_link link;
578
579         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
580                 return 0;
581         if (unlikely(ring == NULL)) {
582                 int ret;
583
584                 /* Secondary process will attempt to request regions. */
585                 ret = rte_eth_link_get(mq->in_port, &link);
586                 if (ret < 0)
587                         MIF_LOG(ERR, "Failed to get port %u link info: %s",
588                                 mq->in_port, rte_strerror(-ret));
589                 return 0;
590         }
591
592         ring_size = 1 << mq->log2_ring_size;
593         mask = ring_size - 1;
594
595         if (type == MEMIF_RING_C2S) {
596                 /* For C2S queues ring->head is updated by the sender and
597                  * this function is called in the context of sending thread.
598                  * The loads in the sender do not need to synchronize with
599                  * its own stores. Hence, the following load can be a
600                  * relaxed load.
601                  */
602                 slot = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
603                 n_free = ring_size - slot +
604                                 __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
605         } else {
606                 /* For S2C queues ring->tail is updated by the sender and
607                  * this function is called in the context of sending thread.
608                  * The loads in the sender do not need to synchronize with
609                  * its own stores. Hence, the following load can be a
610                  * relaxed load.
611                  */
612                 slot = __atomic_load_n(&ring->tail, __ATOMIC_RELAXED);
613                 n_free = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE) - slot;
614         }
615
616         while (n_tx_pkts < nb_pkts && n_free) {
617                 mbuf_head = *bufs++;
618                 mbuf = mbuf_head;
619
620                 saved_slot = slot;
621                 d0 = &ring->desc[slot & mask];
622                 dst_off = 0;
623                 dst_len = (type == MEMIF_RING_C2S) ?
624                         pmd->run.pkt_buffer_size : d0->length;
625
626 next_in_chain:
627                 src_off = 0;
628                 src_len = rte_pktmbuf_data_len(mbuf);
629
630                 while (src_len) {
631                         if (dst_len == 0) {
632                                 if (n_free) {
633                                         slot++;
634                                         n_free--;
635                                         d0->flags |= MEMIF_DESC_FLAG_NEXT;
636                                         d0 = &ring->desc[slot & mask];
637                                         dst_off = 0;
638                                         dst_len = (type == MEMIF_RING_C2S) ?
639                                             pmd->run.pkt_buffer_size : d0->length;
640                                         d0->flags = 0;
641                                 } else {
642                                         slot = saved_slot;
643                                         goto no_free_slots;
644                                 }
645                         }
646                         cp_len = RTE_MIN(dst_len, src_len);
647
648                         rte_memcpy((uint8_t *)memif_get_buffer(proc_private,
649                                                                d0) + dst_off,
650                                 rte_pktmbuf_mtod_offset(mbuf, void *, src_off),
651                                 cp_len);
652
653                         mq->n_bytes += cp_len;
654                         src_off += cp_len;
655                         dst_off += cp_len;
656                         src_len -= cp_len;
657                         dst_len -= cp_len;
658
659                         d0->length = dst_off;
660                 }
661
662                 if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
663                         mbuf = mbuf->next;
664                         goto next_in_chain;
665                 }
666
667                 n_tx_pkts++;
668                 slot++;
669                 n_free--;
670                 rte_pktmbuf_free(mbuf_head);
671         }
672
673 no_free_slots:
674         if (type == MEMIF_RING_C2S)
675                 __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
676         else
677                 __atomic_store_n(&ring->tail, slot, __ATOMIC_RELEASE);
678
679         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
680                 a = 1;
681                 size = write(mq->intr_handle.fd, &a, sizeof(a));
682                 if (unlikely(size < 0)) {
683                         MIF_LOG(WARNING,
684                                 "Failed to send interrupt. %s", strerror(errno));
685                 }
686         }
687
688         mq->n_pkts += n_tx_pkts;
689         return n_tx_pkts;
690 }
691
692
693 static int
694 memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq,
695                 memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask,
696                 uint16_t slot, uint16_t n_free)
697 {
698         memif_desc_t *d0;
699         int used_slots = 1;
700
701 next_in_chain:
702         /* store pointer to mbuf to free it later */
703         mq->buffers[slot & mask] = mbuf;
704         /* Increment refcnt to make sure the buffer is not freed before server
705          * receives it. (current segment)
706          */
707         rte_mbuf_refcnt_update(mbuf, 1);
708         /* populate descriptor */
709         d0 = &ring->desc[slot & mask];
710         d0->length = rte_pktmbuf_data_len(mbuf);
711         mq->n_bytes += rte_pktmbuf_data_len(mbuf);
712         /* FIXME: get region index */
713         d0->region = 1;
714         d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
715                 (uint8_t *)proc_private->regions[d0->region]->addr;
716         d0->flags = 0;
717
718         /* check if buffer is chained */
719         if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
720                 if (n_free < 2)
721                         return 0;
722                 /* mark buffer as chained */
723                 d0->flags |= MEMIF_DESC_FLAG_NEXT;
724                 /* advance mbuf */
725                 mbuf = mbuf->next;
726                 /* update counters */
727                 used_slots++;
728                 slot++;
729                 n_free--;
730                 goto next_in_chain;
731         }
732         return used_slots;
733 }
734
735 static uint16_t
736 eth_memif_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
737 {
738         struct memif_queue *mq = queue;
739         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
740         struct pmd_process_private *proc_private =
741                 rte_eth_devices[mq->in_port].process_private;
742         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
743         uint16_t slot, n_free, ring_size, mask, n_tx_pkts = 0;
744         struct rte_eth_link link;
745
746         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
747                 return 0;
748         if (unlikely(ring == NULL)) {
749                 /* Secondary process will attempt to request regions. */
750                 rte_eth_link_get(mq->in_port, &link);
751                 return 0;
752         }
753
754         ring_size = 1 << mq->log2_ring_size;
755         mask = ring_size - 1;
756
757         /* free mbufs received by server */
758         memif_free_stored_mbufs(proc_private, mq);
759
760         /* ring type always MEMIF_RING_C2S */
761         /* For C2S queues ring->head is updated by the sender and
762          * this function is called in the context of sending thread.
763          * The loads in the sender do not need to synchronize with
764          * its own stores. Hence, the following load can be a
765          * relaxed load.
766          */
767         slot = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
768         n_free = ring_size - slot + mq->last_tail;
769
770         int used_slots;
771
772         while (n_free && (n_tx_pkts < nb_pkts)) {
773                 while ((n_free > 4) && ((nb_pkts - n_tx_pkts) > 4)) {
774                         if ((nb_pkts - n_tx_pkts) > 8) {
775                                 rte_prefetch0(*bufs + 4);
776                                 rte_prefetch0(*bufs + 5);
777                                 rte_prefetch0(*bufs + 6);
778                                 rte_prefetch0(*bufs + 7);
779                         }
780                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
781                                 mask, slot, n_free);
782                         if (unlikely(used_slots < 1))
783                                 goto no_free_slots;
784                         n_tx_pkts++;
785                         slot += used_slots;
786                         n_free -= used_slots;
787
788                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
789                                 mask, slot, n_free);
790                         if (unlikely(used_slots < 1))
791                                 goto no_free_slots;
792                         n_tx_pkts++;
793                         slot += used_slots;
794                         n_free -= used_slots;
795
796                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
797                                 mask, slot, n_free);
798                         if (unlikely(used_slots < 1))
799                                 goto no_free_slots;
800                         n_tx_pkts++;
801                         slot += used_slots;
802                         n_free -= used_slots;
803
804                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
805                                 mask, slot, n_free);
806                         if (unlikely(used_slots < 1))
807                                 goto no_free_slots;
808                         n_tx_pkts++;
809                         slot += used_slots;
810                         n_free -= used_slots;
811                 }
812                 used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
813                         mask, slot, n_free);
814                 if (unlikely(used_slots < 1))
815                         goto no_free_slots;
816                 n_tx_pkts++;
817                 slot += used_slots;
818                 n_free -= used_slots;
819         }
820
821 no_free_slots:
822         /* ring type always MEMIF_RING_C2S */
823         /* The ring->head acts as a guard variable between Tx and Rx
824          * threads, so using store-release pairs with load-acquire
825          * in function eth_memif_rx for C2S rings.
826          */
827         __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
828
829         /* Send interrupt, if enabled. */
830         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
831                 uint64_t a = 1;
832                 ssize_t size = write(mq->intr_handle.fd, &a, sizeof(a));
833                 if (unlikely(size < 0)) {
834                         MIF_LOG(WARNING,
835                                 "Failed to send interrupt. %s", strerror(errno));
836                 }
837         }
838
839         /* increment queue counters */
840         mq->n_pkts += n_tx_pkts;
841
842         return n_tx_pkts;
843 }
844
845 void
846 memif_free_regions(struct rte_eth_dev *dev)
847 {
848         struct pmd_process_private *proc_private = dev->process_private;
849         struct pmd_internals *pmd = dev->data->dev_private;
850         int i;
851         struct memif_region *r;
852
853         /* regions are allocated contiguously, so it's
854          * enough to loop until 'proc_private->regions_num'
855          */
856         for (i = 0; i < proc_private->regions_num; i++) {
857                 r = proc_private->regions[i];
858                 if (r != NULL) {
859                         /* This is memzone */
860                         if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
861                                 r->addr = NULL;
862                                 if (r->fd > 0)
863                                         close(r->fd);
864                         }
865                         if (r->addr != NULL) {
866                                 munmap(r->addr, r->region_size);
867                                 if (r->fd > 0) {
868                                         close(r->fd);
869                                         r->fd = -1;
870                                 }
871                         }
872                         rte_free(r);
873                         proc_private->regions[i] = NULL;
874                 }
875         }
876         proc_private->regions_num = 0;
877 }
878
879 static int
880 memif_region_init_zc(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
881                      void *arg)
882 {
883         struct pmd_process_private *proc_private = (struct pmd_process_private *)arg;
884         struct memif_region *r;
885
886         if (proc_private->regions_num < 1) {
887                 MIF_LOG(ERR, "Missing descriptor region");
888                 return -1;
889         }
890
891         r = proc_private->regions[proc_private->regions_num - 1];
892
893         if (r->addr != msl->base_va)
894                 r = proc_private->regions[++proc_private->regions_num - 1];
895
896         if (r == NULL) {
897                 r = rte_zmalloc("region", sizeof(struct memif_region), 0);
898                 if (r == NULL) {
899                         MIF_LOG(ERR, "Failed to alloc memif region.");
900                         return -ENOMEM;
901                 }
902
903                 r->addr = msl->base_va;
904                 r->region_size = ms->len;
905                 r->fd = rte_memseg_get_fd(ms);
906                 if (r->fd < 0)
907                         return -1;
908                 r->pkt_buffer_offset = 0;
909
910                 proc_private->regions[proc_private->regions_num - 1] = r;
911         } else {
912                 r->region_size += ms->len;
913         }
914
915         return 0;
916 }
917
918 static int
919 memif_region_init_shm(struct rte_eth_dev *dev, uint8_t has_buffers)
920 {
921         struct pmd_internals *pmd = dev->data->dev_private;
922         struct pmd_process_private *proc_private = dev->process_private;
923         char shm_name[ETH_MEMIF_SHM_NAME_SIZE];
924         int ret = 0;
925         struct memif_region *r;
926
927         if (proc_private->regions_num >= ETH_MEMIF_MAX_REGION_NUM) {
928                 MIF_LOG(ERR, "Too many regions.");
929                 return -1;
930         }
931
932         r = rte_zmalloc("region", sizeof(struct memif_region), 0);
933         if (r == NULL) {
934                 MIF_LOG(ERR, "Failed to alloc memif region.");
935                 return -ENOMEM;
936         }
937
938         /* calculate buffer offset */
939         r->pkt_buffer_offset = (pmd->run.num_c2s_rings + pmd->run.num_s2c_rings) *
940             (sizeof(memif_ring_t) + sizeof(memif_desc_t) *
941             (1 << pmd->run.log2_ring_size));
942
943         r->region_size = r->pkt_buffer_offset;
944         /* if region has buffers, add buffers size to region_size */
945         if (has_buffers == 1)
946                 r->region_size += (uint32_t)(pmd->run.pkt_buffer_size *
947                         (1 << pmd->run.log2_ring_size) *
948                         (pmd->run.num_c2s_rings +
949                          pmd->run.num_s2c_rings));
950
951         memset(shm_name, 0, sizeof(char) * ETH_MEMIF_SHM_NAME_SIZE);
952         snprintf(shm_name, ETH_MEMIF_SHM_NAME_SIZE, "memif_region_%d",
953                  proc_private->regions_num);
954
955         r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING);
956         if (r->fd < 0) {
957                 MIF_LOG(ERR, "Failed to create shm file: %s.", strerror(errno));
958                 ret = -1;
959                 goto error;
960         }
961
962         ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK);
963         if (ret < 0) {
964                 MIF_LOG(ERR, "Failed to add seals to shm file: %s.", strerror(errno));
965                 goto error;
966         }
967
968         ret = ftruncate(r->fd, r->region_size);
969         if (ret < 0) {
970                 MIF_LOG(ERR, "Failed to truncate shm file: %s.", strerror(errno));
971                 goto error;
972         }
973
974         r->addr = mmap(NULL, r->region_size, PROT_READ |
975                        PROT_WRITE, MAP_SHARED, r->fd, 0);
976         if (r->addr == MAP_FAILED) {
977                 MIF_LOG(ERR, "Failed to mmap shm region: %s.", strerror(ret));
978                 ret = -1;
979                 goto error;
980         }
981
982         proc_private->regions[proc_private->regions_num] = r;
983         proc_private->regions_num++;
984
985         return ret;
986
987 error:
988         if (r->fd > 0)
989                 close(r->fd);
990         r->fd = -1;
991
992         return ret;
993 }
994
995 static int
996 memif_regions_init(struct rte_eth_dev *dev)
997 {
998         struct pmd_internals *pmd = dev->data->dev_private;
999         int ret;
1000
1001         /*
1002          * Zero-copy exposes dpdk memory.
1003          * Each memseg list will be represented by memif region.
1004          * Zero-copy regions indexing: memseg list idx + 1,
1005          * as we already have region 0 reserved for descriptors.
1006          */
1007         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1008                 /* create region idx 0 containing descriptors */
1009                 ret = memif_region_init_shm(dev, 0);
1010                 if (ret < 0)
1011                         return ret;
1012                 ret = rte_memseg_walk(memif_region_init_zc, (void *)dev->process_private);
1013                 if (ret < 0)
1014                         return ret;
1015         } else {
1016                 /* create one memory region contaning rings and buffers */
1017                 ret = memif_region_init_shm(dev, /* has buffers */ 1);
1018                 if (ret < 0)
1019                         return ret;
1020         }
1021
1022         return 0;
1023 }
1024
1025 static void
1026 memif_init_rings(struct rte_eth_dev *dev)
1027 {
1028         struct pmd_internals *pmd = dev->data->dev_private;
1029         struct pmd_process_private *proc_private = dev->process_private;
1030         memif_ring_t *ring;
1031         int i, j;
1032         uint16_t slot;
1033
1034         for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1035                 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_C2S, i);
1036                 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1037                 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1038                 ring->cookie = MEMIF_COOKIE;
1039                 ring->flags = 0;
1040
1041                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1042                         continue;
1043
1044                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1045                         slot = i * (1 << pmd->run.log2_ring_size) + j;
1046                         ring->desc[j].region = 0;
1047                         ring->desc[j].offset =
1048                                 proc_private->regions[0]->pkt_buffer_offset +
1049                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
1050                         ring->desc[j].length = pmd->run.pkt_buffer_size;
1051                 }
1052         }
1053
1054         for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1055                 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_S2C, i);
1056                 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1057                 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1058                 ring->cookie = MEMIF_COOKIE;
1059                 ring->flags = 0;
1060
1061                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1062                         continue;
1063
1064                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1065                         slot = (i + pmd->run.num_c2s_rings) *
1066                             (1 << pmd->run.log2_ring_size) + j;
1067                         ring->desc[j].region = 0;
1068                         ring->desc[j].offset =
1069                                 proc_private->regions[0]->pkt_buffer_offset +
1070                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
1071                         ring->desc[j].length = pmd->run.pkt_buffer_size;
1072                 }
1073         }
1074 }
1075
1076 /* called only by client */
1077 static int
1078 memif_init_queues(struct rte_eth_dev *dev)
1079 {
1080         struct pmd_internals *pmd = dev->data->dev_private;
1081         struct memif_queue *mq;
1082         int i;
1083
1084         for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1085                 mq = dev->data->tx_queues[i];
1086                 mq->log2_ring_size = pmd->run.log2_ring_size;
1087                 /* queues located only in region 0 */
1088                 mq->region = 0;
1089                 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_C2S, i);
1090                 mq->last_head = 0;
1091                 mq->last_tail = 0;
1092                 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1093                 if (mq->intr_handle.fd < 0) {
1094                         MIF_LOG(WARNING,
1095                                 "Failed to create eventfd for tx queue %d: %s.", i,
1096                                 strerror(errno));
1097                 }
1098                 mq->buffers = NULL;
1099                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1100                         mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1101                                                   (1 << mq->log2_ring_size), 0);
1102                         if (mq->buffers == NULL)
1103                                 return -ENOMEM;
1104                 }
1105         }
1106
1107         for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1108                 mq = dev->data->rx_queues[i];
1109                 mq->log2_ring_size = pmd->run.log2_ring_size;
1110                 /* queues located only in region 0 */
1111                 mq->region = 0;
1112                 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_S2C, i);
1113                 mq->last_head = 0;
1114                 mq->last_tail = 0;
1115                 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1116                 if (mq->intr_handle.fd < 0) {
1117                         MIF_LOG(WARNING,
1118                                 "Failed to create eventfd for rx queue %d: %s.", i,
1119                                 strerror(errno));
1120                 }
1121                 mq->buffers = NULL;
1122                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1123                         mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1124                                                   (1 << mq->log2_ring_size), 0);
1125                         if (mq->buffers == NULL)
1126                                 return -ENOMEM;
1127                 }
1128         }
1129         return 0;
1130 }
1131
1132 int
1133 memif_init_regions_and_queues(struct rte_eth_dev *dev)
1134 {
1135         int ret;
1136
1137         ret = memif_regions_init(dev);
1138         if (ret < 0)
1139                 return ret;
1140
1141         memif_init_rings(dev);
1142
1143         ret = memif_init_queues(dev);
1144         if (ret < 0)
1145                 return ret;
1146
1147         return 0;
1148 }
1149
1150 int
1151 memif_connect(struct rte_eth_dev *dev)
1152 {
1153         struct pmd_internals *pmd = dev->data->dev_private;
1154         struct pmd_process_private *proc_private = dev->process_private;
1155         struct memif_region *mr;
1156         struct memif_queue *mq;
1157         memif_ring_t *ring;
1158         int i;
1159
1160         for (i = 0; i < proc_private->regions_num; i++) {
1161                 mr = proc_private->regions[i];
1162                 if (mr != NULL) {
1163                         if (mr->addr == NULL) {
1164                                 if (mr->fd < 0)
1165                                         return -1;
1166                                 mr->addr = mmap(NULL, mr->region_size,
1167                                                 PROT_READ | PROT_WRITE,
1168                                                 MAP_SHARED, mr->fd, 0);
1169                                 if (mr->addr == MAP_FAILED) {
1170                                         MIF_LOG(ERR, "mmap failed: %s\n",
1171                                                 strerror(errno));
1172                                         return -1;
1173                                 }
1174                         }
1175                         if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
1176                                 /* close memseg file */
1177                                 close(mr->fd);
1178                                 mr->fd = -1;
1179                         }
1180                 }
1181         }
1182
1183         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1184                 for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1185                         mq = (pmd->role == MEMIF_ROLE_CLIENT) ?
1186                             dev->data->tx_queues[i] : dev->data->rx_queues[i];
1187                         ring = memif_get_ring_from_queue(proc_private, mq);
1188                         if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1189                                 MIF_LOG(ERR, "Wrong ring");
1190                                 return -1;
1191                         }
1192                         __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1193                         __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1194                         mq->last_head = 0;
1195                         mq->last_tail = 0;
1196                         /* enable polling mode */
1197                         if (pmd->role == MEMIF_ROLE_SERVER)
1198                                 ring->flags = MEMIF_RING_FLAG_MASK_INT;
1199                 }
1200                 for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1201                         mq = (pmd->role == MEMIF_ROLE_CLIENT) ?
1202                             dev->data->rx_queues[i] : dev->data->tx_queues[i];
1203                         ring = memif_get_ring_from_queue(proc_private, mq);
1204                         if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1205                                 MIF_LOG(ERR, "Wrong ring");
1206                                 return -1;
1207                         }
1208                         __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1209                         __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1210                         mq->last_head = 0;
1211                         mq->last_tail = 0;
1212                         /* enable polling mode */
1213                         if (pmd->role == MEMIF_ROLE_CLIENT)
1214                                 ring->flags = MEMIF_RING_FLAG_MASK_INT;
1215                 }
1216
1217                 pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
1218                 pmd->flags |= ETH_MEMIF_FLAG_CONNECTED;
1219                 dev->data->dev_link.link_status = ETH_LINK_UP;
1220         }
1221         MIF_LOG(INFO, "Connected.");
1222         return 0;
1223 }
1224
1225 static int
1226 memif_dev_start(struct rte_eth_dev *dev)
1227 {
1228         struct pmd_internals *pmd = dev->data->dev_private;
1229         int ret = 0;
1230
1231         switch (pmd->role) {
1232         case MEMIF_ROLE_CLIENT:
1233                 ret = memif_connect_client(dev);
1234                 break;
1235         case MEMIF_ROLE_SERVER:
1236                 ret = memif_connect_server(dev);
1237                 break;
1238         default:
1239                 MIF_LOG(ERR, "Unknown role: %d.", pmd->role);
1240                 ret = -1;
1241                 break;
1242         }
1243
1244         return ret;
1245 }
1246
1247 static int
1248 memif_dev_close(struct rte_eth_dev *dev)
1249 {
1250         struct pmd_internals *pmd = dev->data->dev_private;
1251         int i;
1252
1253         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1254                 memif_msg_enq_disconnect(pmd->cc, "Device closed", 0);
1255                 memif_disconnect(dev);
1256
1257                 for (i = 0; i < dev->data->nb_rx_queues; i++)
1258                         (*dev->dev_ops->rx_queue_release)(dev->data->rx_queues[i]);
1259                 for (i = 0; i < dev->data->nb_tx_queues; i++)
1260                         (*dev->dev_ops->tx_queue_release)(dev->data->tx_queues[i]);
1261
1262                 memif_socket_remove_device(dev);
1263         } else {
1264                 memif_disconnect(dev);
1265         }
1266
1267         rte_free(dev->process_private);
1268
1269         return 0;
1270 }
1271
1272 static int
1273 memif_dev_configure(struct rte_eth_dev *dev)
1274 {
1275         struct pmd_internals *pmd = dev->data->dev_private;
1276
1277         /*
1278          * CLIENT - TXQ
1279          * SERVER - RXQ
1280          */
1281         pmd->cfg.num_c2s_rings = (pmd->role == MEMIF_ROLE_CLIENT) ?
1282                                   dev->data->nb_tx_queues : dev->data->nb_rx_queues;
1283
1284         /*
1285          * CLIENT - RXQ
1286          * SERVER - TXQ
1287          */
1288         pmd->cfg.num_s2c_rings = (pmd->role == MEMIF_ROLE_CLIENT) ?
1289                                   dev->data->nb_rx_queues : dev->data->nb_tx_queues;
1290
1291         return 0;
1292 }
1293
1294 static int
1295 memif_tx_queue_setup(struct rte_eth_dev *dev,
1296                      uint16_t qid,
1297                      uint16_t nb_tx_desc __rte_unused,
1298                      unsigned int socket_id __rte_unused,
1299                      const struct rte_eth_txconf *tx_conf __rte_unused)
1300 {
1301         struct pmd_internals *pmd = dev->data->dev_private;
1302         struct memif_queue *mq;
1303
1304         mq = rte_zmalloc("tx-queue", sizeof(struct memif_queue), 0);
1305         if (mq == NULL) {
1306                 MIF_LOG(ERR, "Failed to allocate tx queue id: %u", qid);
1307                 return -ENOMEM;
1308         }
1309
1310         mq->type =
1311             (pmd->role == MEMIF_ROLE_CLIENT) ? MEMIF_RING_C2S : MEMIF_RING_S2C;
1312         mq->n_pkts = 0;
1313         mq->n_bytes = 0;
1314         mq->intr_handle.fd = -1;
1315         mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1316         mq->in_port = dev->data->port_id;
1317         dev->data->tx_queues[qid] = mq;
1318
1319         return 0;
1320 }
1321
1322 static int
1323 memif_rx_queue_setup(struct rte_eth_dev *dev,
1324                      uint16_t qid,
1325                      uint16_t nb_rx_desc __rte_unused,
1326                      unsigned int socket_id __rte_unused,
1327                      const struct rte_eth_rxconf *rx_conf __rte_unused,
1328                      struct rte_mempool *mb_pool)
1329 {
1330         struct pmd_internals *pmd = dev->data->dev_private;
1331         struct memif_queue *mq;
1332
1333         mq = rte_zmalloc("rx-queue", sizeof(struct memif_queue), 0);
1334         if (mq == NULL) {
1335                 MIF_LOG(ERR, "Failed to allocate rx queue id: %u", qid);
1336                 return -ENOMEM;
1337         }
1338
1339         mq->type = (pmd->role == MEMIF_ROLE_CLIENT) ? MEMIF_RING_S2C : MEMIF_RING_C2S;
1340         mq->n_pkts = 0;
1341         mq->n_bytes = 0;
1342         mq->intr_handle.fd = -1;
1343         mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1344         mq->mempool = mb_pool;
1345         mq->in_port = dev->data->port_id;
1346         dev->data->rx_queues[qid] = mq;
1347
1348         return 0;
1349 }
1350
1351 static void
1352 memif_queue_release(void *queue)
1353 {
1354         struct memif_queue *mq = (struct memif_queue *)queue;
1355
1356         if (!mq)
1357                 return;
1358
1359         rte_free(mq);
1360 }
1361
1362 static int
1363 memif_link_update(struct rte_eth_dev *dev,
1364                   int wait_to_complete __rte_unused)
1365 {
1366         struct pmd_process_private *proc_private;
1367
1368         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1369                 proc_private = dev->process_private;
1370                 if (dev->data->dev_link.link_status == ETH_LINK_UP &&
1371                                 proc_private->regions_num == 0) {
1372                         memif_mp_request_regions(dev);
1373                 } else if (dev->data->dev_link.link_status == ETH_LINK_DOWN &&
1374                                 proc_private->regions_num > 0) {
1375                         memif_free_regions(dev);
1376                 }
1377         }
1378         return 0;
1379 }
1380
1381 static int
1382 memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1383 {
1384         struct pmd_internals *pmd = dev->data->dev_private;
1385         struct memif_queue *mq;
1386         int i;
1387         uint8_t tmp, nq;
1388
1389         stats->ipackets = 0;
1390         stats->ibytes = 0;
1391         stats->opackets = 0;
1392         stats->obytes = 0;
1393
1394         tmp = (pmd->role == MEMIF_ROLE_CLIENT) ? pmd->run.num_c2s_rings :
1395             pmd->run.num_s2c_rings;
1396         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1397             RTE_ETHDEV_QUEUE_STAT_CNTRS;
1398
1399         /* RX stats */
1400         for (i = 0; i < nq; i++) {
1401                 mq = dev->data->rx_queues[i];
1402                 stats->q_ipackets[i] = mq->n_pkts;
1403                 stats->q_ibytes[i] = mq->n_bytes;
1404                 stats->ipackets += mq->n_pkts;
1405                 stats->ibytes += mq->n_bytes;
1406         }
1407
1408         tmp = (pmd->role == MEMIF_ROLE_CLIENT) ? pmd->run.num_s2c_rings :
1409             pmd->run.num_c2s_rings;
1410         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1411             RTE_ETHDEV_QUEUE_STAT_CNTRS;
1412
1413         /* TX stats */
1414         for (i = 0; i < nq; i++) {
1415                 mq = dev->data->tx_queues[i];
1416                 stats->q_opackets[i] = mq->n_pkts;
1417                 stats->q_obytes[i] = mq->n_bytes;
1418                 stats->opackets += mq->n_pkts;
1419                 stats->obytes += mq->n_bytes;
1420         }
1421         return 0;
1422 }
1423
1424 static int
1425 memif_stats_reset(struct rte_eth_dev *dev)
1426 {
1427         struct pmd_internals *pmd = dev->data->dev_private;
1428         int i;
1429         struct memif_queue *mq;
1430
1431         for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1432                 mq = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->tx_queues[i] :
1433                     dev->data->rx_queues[i];
1434                 mq->n_pkts = 0;
1435                 mq->n_bytes = 0;
1436         }
1437         for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1438                 mq = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->rx_queues[i] :
1439                     dev->data->tx_queues[i];
1440                 mq->n_pkts = 0;
1441                 mq->n_bytes = 0;
1442         }
1443
1444         return 0;
1445 }
1446
1447 static int
1448 memif_rx_queue_intr_enable(struct rte_eth_dev *dev __rte_unused,
1449                            uint16_t qid __rte_unused)
1450 {
1451         MIF_LOG(WARNING, "Interrupt mode not supported.");
1452
1453         return -1;
1454 }
1455
1456 static int
1457 memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
1458 {
1459         struct pmd_internals *pmd __rte_unused = dev->data->dev_private;
1460
1461         return 0;
1462 }
1463
1464 static const struct eth_dev_ops ops = {
1465         .dev_start = memif_dev_start,
1466         .dev_close = memif_dev_close,
1467         .dev_infos_get = memif_dev_info,
1468         .dev_configure = memif_dev_configure,
1469         .tx_queue_setup = memif_tx_queue_setup,
1470         .rx_queue_setup = memif_rx_queue_setup,
1471         .rx_queue_release = memif_queue_release,
1472         .tx_queue_release = memif_queue_release,
1473         .rx_queue_intr_enable = memif_rx_queue_intr_enable,
1474         .rx_queue_intr_disable = memif_rx_queue_intr_disable,
1475         .link_update = memif_link_update,
1476         .stats_get = memif_stats_get,
1477         .stats_reset = memif_stats_reset,
1478 };
1479
1480 static int
1481 memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
1482              memif_interface_id_t id, uint32_t flags,
1483              const char *socket_filename,
1484              memif_log2_ring_size_t log2_ring_size,
1485              uint16_t pkt_buffer_size, const char *secret,
1486              struct rte_ether_addr *ether_addr)
1487 {
1488         int ret = 0;
1489         struct rte_eth_dev *eth_dev;
1490         struct rte_eth_dev_data *data;
1491         struct pmd_internals *pmd;
1492         struct pmd_process_private *process_private;
1493         const unsigned int numa_node = vdev->device.numa_node;
1494         const char *name = rte_vdev_device_name(vdev);
1495
1496         eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
1497         if (eth_dev == NULL) {
1498                 MIF_LOG(ERR, "%s: Unable to allocate device struct.", name);
1499                 return -1;
1500         }
1501
1502         process_private = (struct pmd_process_private *)
1503                 rte_zmalloc(name, sizeof(struct pmd_process_private),
1504                             RTE_CACHE_LINE_SIZE);
1505
1506         if (process_private == NULL) {
1507                 MIF_LOG(ERR, "Failed to alloc memory for process private");
1508                 return -1;
1509         }
1510         eth_dev->process_private = process_private;
1511
1512         pmd = eth_dev->data->dev_private;
1513         memset(pmd, 0, sizeof(*pmd));
1514
1515         pmd->id = id;
1516         pmd->flags = flags;
1517         pmd->flags |= ETH_MEMIF_FLAG_DISABLED;
1518         pmd->role = role;
1519         /* Zero-copy flag irelevant to server. */
1520         if (pmd->role == MEMIF_ROLE_SERVER)
1521                 pmd->flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1522
1523         ret = memif_socket_init(eth_dev, socket_filename);
1524         if (ret < 0)
1525                 return ret;
1526
1527         memset(pmd->secret, 0, sizeof(char) * ETH_MEMIF_SECRET_SIZE);
1528         if (secret != NULL)
1529                 strlcpy(pmd->secret, secret, sizeof(pmd->secret));
1530
1531         pmd->cfg.log2_ring_size = log2_ring_size;
1532         /* set in .dev_configure() */
1533         pmd->cfg.num_c2s_rings = 0;
1534         pmd->cfg.num_s2c_rings = 0;
1535
1536         pmd->cfg.pkt_buffer_size = pkt_buffer_size;
1537         rte_spinlock_init(&pmd->cc_lock);
1538
1539         data = eth_dev->data;
1540         data->dev_private = pmd;
1541         data->numa_node = numa_node;
1542         data->dev_link = pmd_link;
1543         data->mac_addrs = ether_addr;
1544         data->promiscuous = 1;
1545         data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1546
1547         eth_dev->dev_ops = &ops;
1548         eth_dev->device = &vdev->device;
1549         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1550                 eth_dev->rx_pkt_burst = eth_memif_rx_zc;
1551                 eth_dev->tx_pkt_burst = eth_memif_tx_zc;
1552         } else {
1553                 eth_dev->rx_pkt_burst = eth_memif_rx;
1554                 eth_dev->tx_pkt_burst = eth_memif_tx;
1555         }
1556
1557         rte_eth_dev_probing_finish(eth_dev);
1558
1559         return 0;
1560 }
1561
1562 static int
1563 memif_set_role(const char *key __rte_unused, const char *value,
1564                void *extra_args)
1565 {
1566         enum memif_role_t *role = (enum memif_role_t *)extra_args;
1567
1568         if (strstr(value, "server") != NULL) {
1569                 *role = MEMIF_ROLE_SERVER;
1570         } else if (strstr(value, "client") != NULL) {
1571                 *role = MEMIF_ROLE_CLIENT;
1572         } else if (strstr(value, "master") != NULL) {
1573                 MIF_LOG(NOTICE, "Role argument \"master\" is deprecated, use \"server\"");
1574                 *role = MEMIF_ROLE_SERVER;
1575         } else if (strstr(value, "slave") != NULL) {
1576                 MIF_LOG(NOTICE, "Role argument \"slave\" is deprecated, use \"client\"");
1577                 *role = MEMIF_ROLE_CLIENT;
1578         } else {
1579                 MIF_LOG(ERR, "Unknown role: %s.", value);
1580                 return -EINVAL;
1581         }
1582         return 0;
1583 }
1584
1585 static int
1586 memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args)
1587 {
1588         uint32_t *flags = (uint32_t *)extra_args;
1589
1590         if (strstr(value, "yes") != NULL) {
1591                 if (!rte_mcfg_get_single_file_segments()) {
1592                         MIF_LOG(ERR, "Zero-copy doesn't support multi-file segments.");
1593                         return -ENOTSUP;
1594                 }
1595                 *flags |= ETH_MEMIF_FLAG_ZERO_COPY;
1596         } else if (strstr(value, "no") != NULL) {
1597                 *flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1598         } else {
1599                 MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value);
1600                 return -EINVAL;
1601         }
1602         return 0;
1603 }
1604
1605 static int
1606 memif_set_id(const char *key __rte_unused, const char *value, void *extra_args)
1607 {
1608         memif_interface_id_t *id = (memif_interface_id_t *)extra_args;
1609
1610         /* even if parsing fails, 0 is a valid id */
1611         *id = strtoul(value, NULL, 10);
1612         return 0;
1613 }
1614
1615 static int
1616 memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args)
1617 {
1618         unsigned long tmp;
1619         uint16_t *pkt_buffer_size = (uint16_t *)extra_args;
1620
1621         tmp = strtoul(value, NULL, 10);
1622         if (tmp == 0 || tmp > 0xFFFF) {
1623                 MIF_LOG(ERR, "Invalid buffer size: %s.", value);
1624                 return -EINVAL;
1625         }
1626         *pkt_buffer_size = tmp;
1627         return 0;
1628 }
1629
1630 static int
1631 memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args)
1632 {
1633         unsigned long tmp;
1634         memif_log2_ring_size_t *log2_ring_size =
1635             (memif_log2_ring_size_t *)extra_args;
1636
1637         tmp = strtoul(value, NULL, 10);
1638         if (tmp == 0 || tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE) {
1639                 MIF_LOG(ERR, "Invalid ring size: %s (max %u).",
1640                         value, ETH_MEMIF_MAX_LOG2_RING_SIZE);
1641                 return -EINVAL;
1642         }
1643         *log2_ring_size = tmp;
1644         return 0;
1645 }
1646
1647 /* check if directory exists and if we have permission to read/write */
1648 static int
1649 memif_check_socket_filename(const char *filename)
1650 {
1651         char *dir = NULL, *tmp;
1652         uint32_t idx;
1653         int ret = 0;
1654
1655         if (strlen(filename) >= MEMIF_SOCKET_UN_SIZE) {
1656                 MIF_LOG(ERR, "Unix socket address too long (max 108).");
1657                 return -1;
1658         }
1659
1660         tmp = strrchr(filename, '/');
1661         if (tmp != NULL) {
1662                 idx = tmp - filename;
1663                 dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 1), 0);
1664                 if (dir == NULL) {
1665                         MIF_LOG(ERR, "Failed to allocate memory.");
1666                         return -1;
1667                 }
1668                 strlcpy(dir, filename, sizeof(char) * (idx + 1));
1669         }
1670
1671         if (dir == NULL || (faccessat(-1, dir, F_OK | R_OK |
1672                                         W_OK, AT_EACCESS) < 0)) {
1673                 MIF_LOG(ERR, "Invalid socket directory.");
1674                 ret = -EINVAL;
1675         }
1676
1677         if (dir != NULL)
1678                 rte_free(dir);
1679
1680         return ret;
1681 }
1682
1683 static int
1684 memif_set_socket_filename(const char *key __rte_unused, const char *value,
1685                           void *extra_args)
1686 {
1687         const char **socket_filename = (const char **)extra_args;
1688
1689         *socket_filename = value;
1690         return 0;
1691 }
1692
1693 static int
1694 memif_set_is_socket_abstract(const char *key __rte_unused, const char *value, void *extra_args)
1695 {
1696         uint32_t *flags = (uint32_t *)extra_args;
1697
1698         if (strstr(value, "yes") != NULL) {
1699                 *flags |= ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1700         } else if (strstr(value, "no") != NULL) {
1701                 *flags &= ~ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1702         } else {
1703                 MIF_LOG(ERR, "Failed to parse socket-abstract param: %s.", value);
1704                 return -EINVAL;
1705         }
1706         return 0;
1707 }
1708
1709 static int
1710 memif_set_mac(const char *key __rte_unused, const char *value, void *extra_args)
1711 {
1712         struct rte_ether_addr *ether_addr = (struct rte_ether_addr *)extra_args;
1713
1714         if (rte_ether_unformat_addr(value, ether_addr) < 0)
1715                 MIF_LOG(WARNING, "Failed to parse mac '%s'.", value);
1716         return 0;
1717 }
1718
1719 static int
1720 memif_set_secret(const char *key __rte_unused, const char *value, void *extra_args)
1721 {
1722         const char **secret = (const char **)extra_args;
1723
1724         *secret = value;
1725         return 0;
1726 }
1727
1728 static int
1729 rte_pmd_memif_probe(struct rte_vdev_device *vdev)
1730 {
1731         RTE_BUILD_BUG_ON(sizeof(memif_msg_t) != 128);
1732         RTE_BUILD_BUG_ON(sizeof(memif_desc_t) != 16);
1733         int ret = 0;
1734         struct rte_kvargs *kvlist;
1735         const char *name = rte_vdev_device_name(vdev);
1736         enum memif_role_t role = MEMIF_ROLE_CLIENT;
1737         memif_interface_id_t id = 0;
1738         uint16_t pkt_buffer_size = ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE;
1739         memif_log2_ring_size_t log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE;
1740         const char *socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME;
1741         uint32_t flags = 0;
1742         const char *secret = NULL;
1743         struct rte_ether_addr *ether_addr = rte_zmalloc("",
1744                 sizeof(struct rte_ether_addr), 0);
1745         struct rte_eth_dev *eth_dev;
1746
1747         rte_eth_random_addr(ether_addr->addr_bytes);
1748
1749         MIF_LOG(INFO, "Initialize MEMIF: %s.", name);
1750
1751         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1752                 eth_dev = rte_eth_dev_attach_secondary(name);
1753                 if (!eth_dev) {
1754                         MIF_LOG(ERR, "Failed to probe %s", name);
1755                         return -1;
1756                 }
1757
1758                 eth_dev->dev_ops = &ops;
1759                 eth_dev->device = &vdev->device;
1760                 eth_dev->rx_pkt_burst = eth_memif_rx;
1761                 eth_dev->tx_pkt_burst = eth_memif_tx;
1762
1763                 if (!rte_eal_primary_proc_alive(NULL)) {
1764                         MIF_LOG(ERR, "Primary process is missing");
1765                         return -1;
1766                 }
1767
1768                 eth_dev->process_private = (struct pmd_process_private *)
1769                         rte_zmalloc(name,
1770                                 sizeof(struct pmd_process_private),
1771                                 RTE_CACHE_LINE_SIZE);
1772                 if (eth_dev->process_private == NULL) {
1773                         MIF_LOG(ERR,
1774                                 "Failed to alloc memory for process private");
1775                         return -1;
1776                 }
1777
1778                 rte_eth_dev_probing_finish(eth_dev);
1779
1780                 return 0;
1781         }
1782
1783         ret = rte_mp_action_register(MEMIF_MP_SEND_REGION, memif_mp_send_region);
1784         /*
1785          * Primary process can continue probing, but secondary process won't
1786          * be able to get memory regions information
1787          */
1788         if (ret < 0 && rte_errno != EEXIST)
1789                 MIF_LOG(WARNING, "Failed to register mp action callback: %s",
1790                         strerror(rte_errno));
1791
1792         /* use abstract address by default */
1793         flags |= ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1794
1795         kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments);
1796
1797         /* parse parameters */
1798         if (kvlist != NULL) {
1799                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG,
1800                                          &memif_set_role, &role);
1801                 if (ret < 0)
1802                         goto exit;
1803                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG,
1804                                          &memif_set_id, &id);
1805                 if (ret < 0)
1806                         goto exit;
1807                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
1808                                          &memif_set_bs, &pkt_buffer_size);
1809                 if (ret < 0)
1810                         goto exit;
1811                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG,
1812                                          &memif_set_rs, &log2_ring_size);
1813                 if (ret < 0)
1814                         goto exit;
1815                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ARG,
1816                                          &memif_set_socket_filename,
1817                                          (void *)(&socket_filename));
1818                 if (ret < 0)
1819                         goto exit;
1820                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ABSTRACT_ARG,
1821                                          &memif_set_is_socket_abstract, &flags);
1822                 if (ret < 0)
1823                         goto exit;
1824                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_MAC_ARG,
1825                                          &memif_set_mac, ether_addr);
1826                 if (ret < 0)
1827                         goto exit;
1828                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG,
1829                                          &memif_set_zc, &flags);
1830                 if (ret < 0)
1831                         goto exit;
1832                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SECRET_ARG,
1833                                          &memif_set_secret, (void *)(&secret));
1834                 if (ret < 0)
1835                         goto exit;
1836         }
1837
1838         if (!(flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT)) {
1839                 ret = memif_check_socket_filename(socket_filename);
1840                 if (ret < 0)
1841                         goto exit;
1842         }
1843
1844         /* create interface */
1845         ret = memif_create(vdev, role, id, flags, socket_filename,
1846                            log2_ring_size, pkt_buffer_size, secret, ether_addr);
1847
1848 exit:
1849         if (kvlist != NULL)
1850                 rte_kvargs_free(kvlist);
1851         return ret;
1852 }
1853
1854 static int
1855 rte_pmd_memif_remove(struct rte_vdev_device *vdev)
1856 {
1857         struct rte_eth_dev *eth_dev;
1858
1859         eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
1860         if (eth_dev == NULL)
1861                 return 0;
1862
1863         return rte_eth_dev_close(eth_dev->data->port_id);
1864 }
1865
1866 static struct rte_vdev_driver pmd_memif_drv = {
1867         .probe = rte_pmd_memif_probe,
1868         .remove = rte_pmd_memif_remove,
1869 };
1870
1871 RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv);
1872
1873 RTE_PMD_REGISTER_PARAM_STRING(net_memif,
1874                               ETH_MEMIF_ID_ARG "=<int>"
1875                               ETH_MEMIF_ROLE_ARG "=server|client"
1876                               ETH_MEMIF_PKT_BUFFER_SIZE_ARG "=<int>"
1877                               ETH_MEMIF_RING_SIZE_ARG "=<int>"
1878                               ETH_MEMIF_SOCKET_ARG "=<string>"
1879                                   ETH_MEMIF_SOCKET_ABSTRACT_ARG "=yes|no"
1880                               ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx"
1881                               ETH_MEMIF_ZC_ARG "=yes|no"
1882                               ETH_MEMIF_SECRET_ARG "=<string>");
1883
1884 RTE_LOG_REGISTER_DEFAULT(memif_logtype, NOTICE);