9deb7a5f1360d58989ed921a6f18c2b04ce1970a
[dpdk.git] / drivers / net / memif / rte_eth_memif.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
3  */
4
5 #include <stdlib.h>
6 #include <fcntl.h>
7 #include <unistd.h>
8 #include <sys/types.h>
9 #include <sys/socket.h>
10 #include <sys/un.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13 #include <linux/if_ether.h>
14 #include <errno.h>
15 #include <sys/eventfd.h>
16
17 #include <rte_version.h>
18 #include <rte_mbuf.h>
19 #include <rte_ether.h>
20 #include <ethdev_driver.h>
21 #include <ethdev_vdev.h>
22 #include <rte_malloc.h>
23 #include <rte_kvargs.h>
24 #include <rte_bus_vdev.h>
25 #include <rte_string_fns.h>
26 #include <rte_errno.h>
27 #include <rte_memory.h>
28 #include <rte_memzone.h>
29 #include <rte_eal_memconfig.h>
30
31 #include "rte_eth_memif.h"
32 #include "memif_socket.h"
33
34 #define ETH_MEMIF_ID_ARG                "id"
35 #define ETH_MEMIF_ROLE_ARG              "role"
36 #define ETH_MEMIF_PKT_BUFFER_SIZE_ARG   "bsize"
37 #define ETH_MEMIF_RING_SIZE_ARG         "rsize"
38 #define ETH_MEMIF_SOCKET_ARG            "socket"
39 #define ETH_MEMIF_SOCKET_ABSTRACT_ARG   "socket-abstract"
40 #define ETH_MEMIF_MAC_ARG               "mac"
41 #define ETH_MEMIF_ZC_ARG                "zero-copy"
42 #define ETH_MEMIF_SECRET_ARG            "secret"
43
44 static const char * const valid_arguments[] = {
45         ETH_MEMIF_ID_ARG,
46         ETH_MEMIF_ROLE_ARG,
47         ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
48         ETH_MEMIF_RING_SIZE_ARG,
49         ETH_MEMIF_SOCKET_ARG,
50         ETH_MEMIF_SOCKET_ABSTRACT_ARG,
51         ETH_MEMIF_MAC_ARG,
52         ETH_MEMIF_ZC_ARG,
53         ETH_MEMIF_SECRET_ARG,
54         NULL
55 };
56
57 static const struct rte_eth_link pmd_link = {
58         .link_speed = RTE_ETH_SPEED_NUM_10G,
59         .link_duplex = RTE_ETH_LINK_FULL_DUPLEX,
60         .link_status = RTE_ETH_LINK_DOWN,
61         .link_autoneg = RTE_ETH_LINK_AUTONEG
62 };
63
64 #define MEMIF_MP_SEND_REGION            "memif_mp_send_region"
65
66
67 static int memif_region_init_zc(const struct rte_memseg_list *msl,
68                                 const struct rte_memseg *ms, void *arg);
69
70 const char *
71 memif_version(void)
72 {
73         return ("memif-" RTE_STR(MEMIF_VERSION_MAJOR) "." RTE_STR(MEMIF_VERSION_MINOR));
74 }
75
76 /* Message header to synchronize regions */
77 struct mp_region_msg {
78         char port_name[RTE_DEV_NAME_MAX_LEN];
79         memif_region_index_t idx;
80         memif_region_size_t size;
81 };
82
83 static int
84 memif_mp_send_region(const struct rte_mp_msg *msg, const void *peer)
85 {
86         struct rte_eth_dev *dev;
87         struct pmd_process_private *proc_private;
88         const struct mp_region_msg *msg_param = (const struct mp_region_msg *)msg->param;
89         struct rte_mp_msg reply;
90         struct mp_region_msg *reply_param = (struct mp_region_msg *)reply.param;
91         uint16_t port_id;
92         int ret;
93
94         /* Get requested port */
95         ret = rte_eth_dev_get_port_by_name(msg_param->port_name, &port_id);
96         if (ret) {
97                 MIF_LOG(ERR, "Failed to get port id for %s",
98                         msg_param->port_name);
99                 return -1;
100         }
101         dev = &rte_eth_devices[port_id];
102         proc_private = dev->process_private;
103
104         memset(&reply, 0, sizeof(reply));
105         strlcpy(reply.name, msg->name, sizeof(reply.name));
106         reply_param->idx = msg_param->idx;
107         if (proc_private->regions[msg_param->idx] != NULL) {
108                 reply_param->size = proc_private->regions[msg_param->idx]->region_size;
109                 reply.fds[0] = proc_private->regions[msg_param->idx]->fd;
110                 reply.num_fds = 1;
111         }
112         reply.len_param = sizeof(*reply_param);
113         if (rte_mp_reply(&reply, peer) < 0) {
114                 MIF_LOG(ERR, "Failed to reply to an add region request");
115                 return -1;
116         }
117
118         return 0;
119 }
120
121 /*
122  * Request regions
123  * Called by secondary process, when ports link status goes up.
124  */
125 static int
126 memif_mp_request_regions(struct rte_eth_dev *dev)
127 {
128         int ret, i;
129         struct timespec timeout = {.tv_sec = 5, .tv_nsec = 0};
130         struct rte_mp_msg msg, *reply;
131         struct rte_mp_reply replies;
132         struct mp_region_msg *msg_param = (struct mp_region_msg *)msg.param;
133         struct mp_region_msg *reply_param;
134         struct memif_region *r;
135         struct pmd_process_private *proc_private = dev->process_private;
136         struct pmd_internals *pmd = dev->data->dev_private;
137         /* in case of zero-copy client, only request region 0 */
138         uint16_t max_region_num = (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) ?
139                                    1 : ETH_MEMIF_MAX_REGION_NUM;
140
141         MIF_LOG(DEBUG, "Requesting memory regions");
142
143         for (i = 0; i < max_region_num; i++) {
144                 /* Prepare the message */
145                 memset(&msg, 0, sizeof(msg));
146                 strlcpy(msg.name, MEMIF_MP_SEND_REGION, sizeof(msg.name));
147                 strlcpy(msg_param->port_name, dev->data->name,
148                         sizeof(msg_param->port_name));
149                 msg_param->idx = i;
150                 msg.len_param = sizeof(*msg_param);
151
152                 /* Send message */
153                 ret = rte_mp_request_sync(&msg, &replies, &timeout);
154                 if (ret < 0 || replies.nb_received != 1) {
155                         MIF_LOG(ERR, "Failed to send mp msg: %d",
156                                 rte_errno);
157                         return -1;
158                 }
159
160                 reply = &replies.msgs[0];
161                 reply_param = (struct mp_region_msg *)reply->param;
162
163                 if (reply_param->size > 0) {
164                         r = rte_zmalloc("region", sizeof(struct memif_region), 0);
165                         if (r == NULL) {
166                                 MIF_LOG(ERR, "Failed to alloc memif region.");
167                                 free(reply);
168                                 return -ENOMEM;
169                         }
170                         r->region_size = reply_param->size;
171                         if (reply->num_fds < 1) {
172                                 MIF_LOG(ERR, "Missing file descriptor.");
173                                 free(reply);
174                                 return -1;
175                         }
176                         r->fd = reply->fds[0];
177                         r->addr = NULL;
178
179                         proc_private->regions[reply_param->idx] = r;
180                         proc_private->regions_num++;
181                 }
182                 free(reply);
183         }
184
185         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
186                 ret = rte_memseg_walk(memif_region_init_zc, (void *)proc_private);
187                 if (ret < 0)
188                         return ret;
189         }
190
191         return memif_connect(dev);
192 }
193
194 static int
195 memif_dev_info(struct rte_eth_dev *dev __rte_unused, struct rte_eth_dev_info *dev_info)
196 {
197         dev_info->max_mac_addrs = 1;
198         dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN;
199         dev_info->max_rx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
200         dev_info->max_tx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
201         dev_info->min_rx_bufsize = 0;
202         dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
203
204         return 0;
205 }
206
207 static memif_ring_t *
208 memif_get_ring(struct pmd_internals *pmd, struct pmd_process_private *proc_private,
209                memif_ring_type_t type, uint16_t ring_num)
210 {
211         /* rings only in region 0 */
212         void *p = proc_private->regions[0]->addr;
213         int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) *
214             (1 << pmd->run.log2_ring_size);
215
216         p = (uint8_t *)p + (ring_num + type * pmd->run.num_c2s_rings) * ring_size;
217
218         return (memif_ring_t *)p;
219 }
220
221 static memif_region_offset_t
222 memif_get_ring_offset(struct rte_eth_dev *dev, struct memif_queue *mq,
223                       memif_ring_type_t type, uint16_t num)
224 {
225         struct pmd_internals *pmd = dev->data->dev_private;
226         struct pmd_process_private *proc_private = dev->process_private;
227
228         return ((uint8_t *)memif_get_ring(pmd, proc_private, type, num) -
229                 (uint8_t *)proc_private->regions[mq->region]->addr);
230 }
231
232 static memif_ring_t *
233 memif_get_ring_from_queue(struct pmd_process_private *proc_private,
234                           struct memif_queue *mq)
235 {
236         struct memif_region *r;
237
238         r = proc_private->regions[mq->region];
239         if (r == NULL)
240                 return NULL;
241
242         return (memif_ring_t *)((uint8_t *)r->addr + mq->ring_offset);
243 }
244
245 static void *
246 memif_get_buffer(struct pmd_process_private *proc_private, memif_desc_t *d)
247 {
248         return ((uint8_t *)proc_private->regions[d->region]->addr + d->offset);
249 }
250
251 /* Free mbufs received by server */
252 static void
253 memif_free_stored_mbufs(struct pmd_process_private *proc_private, struct memif_queue *mq)
254 {
255         uint16_t cur_tail;
256         uint16_t mask = (1 << mq->log2_ring_size) - 1;
257         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
258
259         /* FIXME: improve performance */
260         /* The ring->tail acts as a guard variable between Tx and Rx
261          * threads, so using load-acquire pairs with store-release
262          * in function eth_memif_rx for C2S queues.
263          */
264         cur_tail = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
265         while (mq->last_tail != cur_tail) {
266                 RTE_MBUF_PREFETCH_TO_FREE(mq->buffers[(mq->last_tail + 1) & mask]);
267                 /* Decrement refcnt and free mbuf. (current segment) */
268                 rte_mbuf_refcnt_update(mq->buffers[mq->last_tail & mask], -1);
269                 rte_pktmbuf_free_seg(mq->buffers[mq->last_tail & mask]);
270                 mq->last_tail++;
271         }
272 }
273
274 static int
275 memif_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *cur_tail,
276                     struct rte_mbuf *tail)
277 {
278         /* Check for number-of-segments-overflow */
279         if (unlikely(head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS))
280                 return -EOVERFLOW;
281
282         /* Chain 'tail' onto the old tail */
283         cur_tail->next = tail;
284
285         /* accumulate number of segments and total length. */
286         head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs);
287
288         tail->pkt_len = tail->data_len;
289         head->pkt_len += tail->pkt_len;
290
291         return 0;
292 }
293
294 static uint16_t
295 eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
296 {
297         struct memif_queue *mq = queue;
298         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
299         struct pmd_process_private *proc_private =
300                 rte_eth_devices[mq->in_port].process_private;
301         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
302         uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0;
303         uint16_t n_rx_pkts = 0;
304         uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) -
305                 RTE_PKTMBUF_HEADROOM;
306         uint16_t src_len, src_off, dst_len, dst_off, cp_len;
307         memif_ring_type_t type = mq->type;
308         memif_desc_t *d0;
309         struct rte_mbuf *mbuf, *mbuf_head, *mbuf_tail;
310         uint64_t b;
311         ssize_t size __rte_unused;
312         uint16_t head;
313         int ret;
314         struct rte_eth_link link;
315
316         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
317                 return 0;
318         if (unlikely(ring == NULL)) {
319                 /* Secondary process will attempt to request regions. */
320                 ret = rte_eth_link_get(mq->in_port, &link);
321                 if (ret < 0)
322                         MIF_LOG(ERR, "Failed to get port %u link info: %s",
323                                 mq->in_port, rte_strerror(-ret));
324                 return 0;
325         }
326
327         /* consume interrupt */
328         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0)
329                 size = read(mq->intr_handle.fd, &b, sizeof(b));
330
331         ring_size = 1 << mq->log2_ring_size;
332         mask = ring_size - 1;
333
334         if (type == MEMIF_RING_C2S) {
335                 cur_slot = mq->last_head;
336                 last_slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
337         } else {
338                 cur_slot = mq->last_tail;
339                 last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
340         }
341
342         if (cur_slot == last_slot)
343                 goto refill;
344         n_slots = last_slot - cur_slot;
345
346         while (n_slots && n_rx_pkts < nb_pkts) {
347                 mbuf_head = rte_pktmbuf_alloc(mq->mempool);
348                 if (unlikely(mbuf_head == NULL))
349                         goto no_free_bufs;
350                 mbuf = mbuf_head;
351                 mbuf->port = mq->in_port;
352
353 next_slot:
354                 s0 = cur_slot & mask;
355                 d0 = &ring->desc[s0];
356
357                 src_len = d0->length;
358                 dst_off = 0;
359                 src_off = 0;
360
361                 do {
362                         dst_len = mbuf_size - dst_off;
363                         if (dst_len == 0) {
364                                 dst_off = 0;
365                                 dst_len = mbuf_size;
366
367                                 /* store pointer to tail */
368                                 mbuf_tail = mbuf;
369                                 mbuf = rte_pktmbuf_alloc(mq->mempool);
370                                 if (unlikely(mbuf == NULL))
371                                         goto no_free_bufs;
372                                 mbuf->port = mq->in_port;
373                                 ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
374                                 if (unlikely(ret < 0)) {
375                                         MIF_LOG(ERR, "number-of-segments-overflow");
376                                         rte_pktmbuf_free(mbuf);
377                                         goto no_free_bufs;
378                                 }
379                         }
380                         cp_len = RTE_MIN(dst_len, src_len);
381
382                         rte_pktmbuf_data_len(mbuf) += cp_len;
383                         rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
384                         if (mbuf != mbuf_head)
385                                 rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
386
387                         rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *,
388                                                            dst_off),
389                                 (uint8_t *)memif_get_buffer(proc_private, d0) +
390                                 src_off, cp_len);
391
392                         src_off += cp_len;
393                         dst_off += cp_len;
394                         src_len -= cp_len;
395                 } while (src_len);
396
397                 cur_slot++;
398                 n_slots--;
399
400                 if (d0->flags & MEMIF_DESC_FLAG_NEXT)
401                         goto next_slot;
402
403                 mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
404                 *bufs++ = mbuf_head;
405                 n_rx_pkts++;
406         }
407
408 no_free_bufs:
409         if (type == MEMIF_RING_C2S) {
410                 __atomic_store_n(&ring->tail, cur_slot, __ATOMIC_RELEASE);
411                 mq->last_head = cur_slot;
412         } else {
413                 mq->last_tail = cur_slot;
414         }
415
416 refill:
417         if (type == MEMIF_RING_S2C) {
418                 /* ring->head is updated by the receiver and this function
419                  * is called in the context of receiver thread. The loads in
420                  * the receiver do not need to synchronize with its own stores.
421                  */
422                 head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
423                 n_slots = ring_size - head + mq->last_tail;
424
425                 while (n_slots--) {
426                         s0 = head++ & mask;
427                         d0 = &ring->desc[s0];
428                         d0->length = pmd->run.pkt_buffer_size;
429                 }
430                 __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
431         }
432
433         mq->n_pkts += n_rx_pkts;
434         return n_rx_pkts;
435 }
436
437 static uint16_t
438 eth_memif_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
439 {
440         struct memif_queue *mq = queue;
441         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
442         struct pmd_process_private *proc_private =
443                 rte_eth_devices[mq->in_port].process_private;
444         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
445         uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0, head;
446         uint16_t n_rx_pkts = 0;
447         memif_desc_t *d0;
448         struct rte_mbuf *mbuf, *mbuf_tail;
449         struct rte_mbuf *mbuf_head = NULL;
450         int ret;
451         struct rte_eth_link link;
452
453         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
454                 return 0;
455         if (unlikely(ring == NULL)) {
456                 /* Secondary process will attempt to request regions. */
457                 rte_eth_link_get(mq->in_port, &link);
458                 return 0;
459         }
460
461         /* consume interrupt */
462         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
463                 uint64_t b;
464                 ssize_t size __rte_unused;
465                 size = read(mq->intr_handle.fd, &b, sizeof(b));
466         }
467
468         ring_size = 1 << mq->log2_ring_size;
469         mask = ring_size - 1;
470
471         cur_slot = mq->last_tail;
472         /* The ring->tail acts as a guard variable between Tx and Rx
473          * threads, so using load-acquire pairs with store-release
474          * to synchronize it between threads.
475          */
476         last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
477         if (cur_slot == last_slot)
478                 goto refill;
479         n_slots = last_slot - cur_slot;
480
481         while (n_slots && n_rx_pkts < nb_pkts) {
482                 s0 = cur_slot & mask;
483
484                 d0 = &ring->desc[s0];
485                 mbuf_head = mq->buffers[s0];
486                 mbuf = mbuf_head;
487
488 next_slot:
489                 /* prefetch next descriptor */
490                 if (n_rx_pkts + 1 < nb_pkts)
491                         rte_prefetch0(&ring->desc[(cur_slot + 1) & mask]);
492
493                 mbuf->port = mq->in_port;
494                 rte_pktmbuf_data_len(mbuf) = d0->length;
495                 rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
496
497                 mq->n_bytes += rte_pktmbuf_data_len(mbuf);
498
499                 cur_slot++;
500                 n_slots--;
501                 if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
502                         s0 = cur_slot & mask;
503                         d0 = &ring->desc[s0];
504                         mbuf_tail = mbuf;
505                         mbuf = mq->buffers[s0];
506                         ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
507                         if (unlikely(ret < 0)) {
508                                 MIF_LOG(ERR, "number-of-segments-overflow");
509                                 goto refill;
510                         }
511                         goto next_slot;
512                 }
513
514                 *bufs++ = mbuf_head;
515                 n_rx_pkts++;
516         }
517
518         mq->last_tail = cur_slot;
519
520 /* Supply server with new buffers */
521 refill:
522         /* ring->head is updated by the receiver and this function
523          * is called in the context of receiver thread. The loads in
524          * the receiver do not need to synchronize with its own stores.
525          */
526         head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
527         n_slots = ring_size - head + mq->last_tail;
528
529         if (n_slots < 32)
530                 goto no_free_mbufs;
531
532         ret = rte_pktmbuf_alloc_bulk(mq->mempool, &mq->buffers[head & mask], n_slots);
533         if (unlikely(ret < 0))
534                 goto no_free_mbufs;
535
536         while (n_slots--) {
537                 s0 = head++ & mask;
538                 if (n_slots > 0)
539                         rte_prefetch0(mq->buffers[head & mask]);
540                 d0 = &ring->desc[s0];
541                 /* store buffer header */
542                 mbuf = mq->buffers[s0];
543                 /* populate descriptor */
544                 d0->length = rte_pktmbuf_data_room_size(mq->mempool) -
545                                 RTE_PKTMBUF_HEADROOM;
546                 d0->region = 1;
547                 d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
548                         (uint8_t *)proc_private->regions[d0->region]->addr;
549         }
550 no_free_mbufs:
551         /* The ring->head acts as a guard variable between Tx and Rx
552          * threads, so using store-release pairs with load-acquire
553          * in function eth_memif_tx.
554          */
555         __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
556
557         mq->n_pkts += n_rx_pkts;
558
559         return n_rx_pkts;
560 }
561
562 static uint16_t
563 eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
564 {
565         struct memif_queue *mq = queue;
566         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
567         struct pmd_process_private *proc_private =
568                 rte_eth_devices[mq->in_port].process_private;
569         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
570         uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0;
571         uint16_t src_len, src_off, dst_len, dst_off, cp_len, nb_segs;
572         memif_ring_type_t type = mq->type;
573         memif_desc_t *d0;
574         struct rte_mbuf *mbuf;
575         struct rte_mbuf *mbuf_head;
576         uint64_t a;
577         ssize_t size;
578         struct rte_eth_link link;
579
580         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
581                 return 0;
582         if (unlikely(ring == NULL)) {
583                 int ret;
584
585                 /* Secondary process will attempt to request regions. */
586                 ret = rte_eth_link_get(mq->in_port, &link);
587                 if (ret < 0)
588                         MIF_LOG(ERR, "Failed to get port %u link info: %s",
589                                 mq->in_port, rte_strerror(-ret));
590                 return 0;
591         }
592
593         ring_size = 1 << mq->log2_ring_size;
594         mask = ring_size - 1;
595
596         if (type == MEMIF_RING_C2S) {
597                 /* For C2S queues ring->head is updated by the sender and
598                  * this function is called in the context of sending thread.
599                  * The loads in the sender do not need to synchronize with
600                  * its own stores. Hence, the following load can be a
601                  * relaxed load.
602                  */
603                 slot = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
604                 n_free = ring_size - slot +
605                                 __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
606         } else {
607                 /* For S2C queues ring->tail is updated by the sender and
608                  * this function is called in the context of sending thread.
609                  * The loads in the sender do not need to synchronize with
610                  * its own stores. Hence, the following load can be a
611                  * relaxed load.
612                  */
613                 slot = __atomic_load_n(&ring->tail, __ATOMIC_RELAXED);
614                 n_free = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE) - slot;
615         }
616
617         while (n_tx_pkts < nb_pkts && n_free) {
618                 mbuf_head = *bufs++;
619                 nb_segs = mbuf_head->nb_segs;
620                 mbuf = mbuf_head;
621
622                 saved_slot = slot;
623                 d0 = &ring->desc[slot & mask];
624                 dst_off = 0;
625                 dst_len = (type == MEMIF_RING_C2S) ?
626                         pmd->run.pkt_buffer_size : d0->length;
627
628 next_in_chain:
629                 src_off = 0;
630                 src_len = rte_pktmbuf_data_len(mbuf);
631
632                 while (src_len) {
633                         if (dst_len == 0) {
634                                 if (n_free) {
635                                         slot++;
636                                         n_free--;
637                                         d0->flags |= MEMIF_DESC_FLAG_NEXT;
638                                         d0 = &ring->desc[slot & mask];
639                                         dst_off = 0;
640                                         dst_len = (type == MEMIF_RING_C2S) ?
641                                             pmd->run.pkt_buffer_size : d0->length;
642                                         d0->flags = 0;
643                                 } else {
644                                         slot = saved_slot;
645                                         goto no_free_slots;
646                                 }
647                         }
648                         cp_len = RTE_MIN(dst_len, src_len);
649
650                         rte_memcpy((uint8_t *)memif_get_buffer(proc_private,
651                                                                d0) + dst_off,
652                                 rte_pktmbuf_mtod_offset(mbuf, void *, src_off),
653                                 cp_len);
654
655                         mq->n_bytes += cp_len;
656                         src_off += cp_len;
657                         dst_off += cp_len;
658                         src_len -= cp_len;
659                         dst_len -= cp_len;
660
661                         d0->length = dst_off;
662                 }
663
664                 if (--nb_segs > 0) {
665                         mbuf = mbuf->next;
666                         goto next_in_chain;
667                 }
668
669                 n_tx_pkts++;
670                 slot++;
671                 n_free--;
672                 rte_pktmbuf_free(mbuf_head);
673         }
674
675 no_free_slots:
676         if (type == MEMIF_RING_C2S)
677                 __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
678         else
679                 __atomic_store_n(&ring->tail, slot, __ATOMIC_RELEASE);
680
681         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
682                 a = 1;
683                 size = write(mq->intr_handle.fd, &a, sizeof(a));
684                 if (unlikely(size < 0)) {
685                         MIF_LOG(WARNING,
686                                 "Failed to send interrupt. %s", strerror(errno));
687                 }
688         }
689
690         mq->n_pkts += n_tx_pkts;
691         return n_tx_pkts;
692 }
693
694
695 static int
696 memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq,
697                 memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask,
698                 uint16_t slot, uint16_t n_free)
699 {
700         memif_desc_t *d0;
701         uint16_t nb_segs = mbuf->nb_segs;
702         int used_slots = 1;
703
704 next_in_chain:
705         /* store pointer to mbuf to free it later */
706         mq->buffers[slot & mask] = mbuf;
707         /* Increment refcnt to make sure the buffer is not freed before server
708          * receives it. (current segment)
709          */
710         rte_mbuf_refcnt_update(mbuf, 1);
711         /* populate descriptor */
712         d0 = &ring->desc[slot & mask];
713         d0->length = rte_pktmbuf_data_len(mbuf);
714         mq->n_bytes += rte_pktmbuf_data_len(mbuf);
715         /* FIXME: get region index */
716         d0->region = 1;
717         d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
718                 (uint8_t *)proc_private->regions[d0->region]->addr;
719         d0->flags = 0;
720
721         /* check if buffer is chained */
722         if (--nb_segs > 0) {
723                 if (n_free < 2)
724                         return 0;
725                 /* mark buffer as chained */
726                 d0->flags |= MEMIF_DESC_FLAG_NEXT;
727                 /* advance mbuf */
728                 mbuf = mbuf->next;
729                 /* update counters */
730                 used_slots++;
731                 slot++;
732                 n_free--;
733                 goto next_in_chain;
734         }
735         return used_slots;
736 }
737
738 static uint16_t
739 eth_memif_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
740 {
741         struct memif_queue *mq = queue;
742         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
743         struct pmd_process_private *proc_private =
744                 rte_eth_devices[mq->in_port].process_private;
745         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
746         uint16_t slot, n_free, ring_size, mask, n_tx_pkts = 0;
747         struct rte_eth_link link;
748
749         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
750                 return 0;
751         if (unlikely(ring == NULL)) {
752                 /* Secondary process will attempt to request regions. */
753                 rte_eth_link_get(mq->in_port, &link);
754                 return 0;
755         }
756
757         ring_size = 1 << mq->log2_ring_size;
758         mask = ring_size - 1;
759
760         /* free mbufs received by server */
761         memif_free_stored_mbufs(proc_private, mq);
762
763         /* ring type always MEMIF_RING_C2S */
764         /* For C2S queues ring->head is updated by the sender and
765          * this function is called in the context of sending thread.
766          * The loads in the sender do not need to synchronize with
767          * its own stores. Hence, the following load can be a
768          * relaxed load.
769          */
770         slot = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
771         n_free = ring_size - slot + mq->last_tail;
772
773         int used_slots;
774
775         while (n_free && (n_tx_pkts < nb_pkts)) {
776                 while ((n_free > 4) && ((nb_pkts - n_tx_pkts) > 4)) {
777                         if ((nb_pkts - n_tx_pkts) > 8) {
778                                 rte_prefetch0(*bufs + 4);
779                                 rte_prefetch0(*bufs + 5);
780                                 rte_prefetch0(*bufs + 6);
781                                 rte_prefetch0(*bufs + 7);
782                         }
783                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
784                                 mask, slot, n_free);
785                         if (unlikely(used_slots < 1))
786                                 goto no_free_slots;
787                         n_tx_pkts++;
788                         slot += used_slots;
789                         n_free -= used_slots;
790
791                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
792                                 mask, slot, n_free);
793                         if (unlikely(used_slots < 1))
794                                 goto no_free_slots;
795                         n_tx_pkts++;
796                         slot += used_slots;
797                         n_free -= used_slots;
798
799                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
800                                 mask, slot, n_free);
801                         if (unlikely(used_slots < 1))
802                                 goto no_free_slots;
803                         n_tx_pkts++;
804                         slot += used_slots;
805                         n_free -= used_slots;
806
807                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
808                                 mask, slot, n_free);
809                         if (unlikely(used_slots < 1))
810                                 goto no_free_slots;
811                         n_tx_pkts++;
812                         slot += used_slots;
813                         n_free -= used_slots;
814                 }
815                 used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
816                         mask, slot, n_free);
817                 if (unlikely(used_slots < 1))
818                         goto no_free_slots;
819                 n_tx_pkts++;
820                 slot += used_slots;
821                 n_free -= used_slots;
822         }
823
824 no_free_slots:
825         /* ring type always MEMIF_RING_C2S */
826         /* The ring->head acts as a guard variable between Tx and Rx
827          * threads, so using store-release pairs with load-acquire
828          * in function eth_memif_rx for C2S rings.
829          */
830         __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
831
832         /* Send interrupt, if enabled. */
833         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
834                 uint64_t a = 1;
835                 ssize_t size = write(mq->intr_handle.fd, &a, sizeof(a));
836                 if (unlikely(size < 0)) {
837                         MIF_LOG(WARNING,
838                                 "Failed to send interrupt. %s", strerror(errno));
839                 }
840         }
841
842         /* increment queue counters */
843         mq->n_pkts += n_tx_pkts;
844
845         return n_tx_pkts;
846 }
847
848 void
849 memif_free_regions(struct rte_eth_dev *dev)
850 {
851         struct pmd_process_private *proc_private = dev->process_private;
852         struct pmd_internals *pmd = dev->data->dev_private;
853         int i;
854         struct memif_region *r;
855
856         /* regions are allocated contiguously, so it's
857          * enough to loop until 'proc_private->regions_num'
858          */
859         for (i = 0; i < proc_private->regions_num; i++) {
860                 r = proc_private->regions[i];
861                 if (r != NULL) {
862                         /* This is memzone */
863                         if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
864                                 r->addr = NULL;
865                                 if (r->fd > 0)
866                                         close(r->fd);
867                         }
868                         if (r->addr != NULL) {
869                                 munmap(r->addr, r->region_size);
870                                 if (r->fd > 0) {
871                                         close(r->fd);
872                                         r->fd = -1;
873                                 }
874                         }
875                         rte_free(r);
876                         proc_private->regions[i] = NULL;
877                 }
878         }
879         proc_private->regions_num = 0;
880 }
881
882 static int
883 memif_region_init_zc(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
884                      void *arg)
885 {
886         struct pmd_process_private *proc_private = (struct pmd_process_private *)arg;
887         struct memif_region *r;
888
889         if (proc_private->regions_num < 1) {
890                 MIF_LOG(ERR, "Missing descriptor region");
891                 return -1;
892         }
893
894         r = proc_private->regions[proc_private->regions_num - 1];
895
896         if (r->addr != msl->base_va)
897                 r = proc_private->regions[++proc_private->regions_num - 1];
898
899         if (r == NULL) {
900                 r = rte_zmalloc("region", sizeof(struct memif_region), 0);
901                 if (r == NULL) {
902                         MIF_LOG(ERR, "Failed to alloc memif region.");
903                         return -ENOMEM;
904                 }
905
906                 r->addr = msl->base_va;
907                 r->region_size = ms->len;
908                 r->fd = rte_memseg_get_fd(ms);
909                 if (r->fd < 0)
910                         return -1;
911                 r->pkt_buffer_offset = 0;
912
913                 proc_private->regions[proc_private->regions_num - 1] = r;
914         } else {
915                 r->region_size += ms->len;
916         }
917
918         return 0;
919 }
920
921 static int
922 memif_region_init_shm(struct rte_eth_dev *dev, uint8_t has_buffers)
923 {
924         struct pmd_internals *pmd = dev->data->dev_private;
925         struct pmd_process_private *proc_private = dev->process_private;
926         char shm_name[ETH_MEMIF_SHM_NAME_SIZE];
927         int ret = 0;
928         struct memif_region *r;
929
930         if (proc_private->regions_num >= ETH_MEMIF_MAX_REGION_NUM) {
931                 MIF_LOG(ERR, "Too many regions.");
932                 return -1;
933         }
934
935         r = rte_zmalloc("region", sizeof(struct memif_region), 0);
936         if (r == NULL) {
937                 MIF_LOG(ERR, "Failed to alloc memif region.");
938                 return -ENOMEM;
939         }
940
941         /* calculate buffer offset */
942         r->pkt_buffer_offset = (pmd->run.num_c2s_rings + pmd->run.num_s2c_rings) *
943             (sizeof(memif_ring_t) + sizeof(memif_desc_t) *
944             (1 << pmd->run.log2_ring_size));
945
946         r->region_size = r->pkt_buffer_offset;
947         /* if region has buffers, add buffers size to region_size */
948         if (has_buffers == 1)
949                 r->region_size += (uint32_t)(pmd->run.pkt_buffer_size *
950                         (1 << pmd->run.log2_ring_size) *
951                         (pmd->run.num_c2s_rings +
952                          pmd->run.num_s2c_rings));
953
954         memset(shm_name, 0, sizeof(char) * ETH_MEMIF_SHM_NAME_SIZE);
955         snprintf(shm_name, ETH_MEMIF_SHM_NAME_SIZE, "memif_region_%d",
956                  proc_private->regions_num);
957
958         r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING);
959         if (r->fd < 0) {
960                 MIF_LOG(ERR, "Failed to create shm file: %s.", strerror(errno));
961                 ret = -1;
962                 goto error;
963         }
964
965         ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK);
966         if (ret < 0) {
967                 MIF_LOG(ERR, "Failed to add seals to shm file: %s.", strerror(errno));
968                 goto error;
969         }
970
971         ret = ftruncate(r->fd, r->region_size);
972         if (ret < 0) {
973                 MIF_LOG(ERR, "Failed to truncate shm file: %s.", strerror(errno));
974                 goto error;
975         }
976
977         r->addr = mmap(NULL, r->region_size, PROT_READ |
978                        PROT_WRITE, MAP_SHARED, r->fd, 0);
979         if (r->addr == MAP_FAILED) {
980                 MIF_LOG(ERR, "Failed to mmap shm region: %s.", strerror(ret));
981                 ret = -1;
982                 goto error;
983         }
984
985         proc_private->regions[proc_private->regions_num] = r;
986         proc_private->regions_num++;
987
988         return ret;
989
990 error:
991         if (r->fd > 0)
992                 close(r->fd);
993         r->fd = -1;
994
995         return ret;
996 }
997
998 static int
999 memif_regions_init(struct rte_eth_dev *dev)
1000 {
1001         struct pmd_internals *pmd = dev->data->dev_private;
1002         int ret;
1003
1004         /*
1005          * Zero-copy exposes dpdk memory.
1006          * Each memseg list will be represented by memif region.
1007          * Zero-copy regions indexing: memseg list idx + 1,
1008          * as we already have region 0 reserved for descriptors.
1009          */
1010         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1011                 /* create region idx 0 containing descriptors */
1012                 ret = memif_region_init_shm(dev, 0);
1013                 if (ret < 0)
1014                         return ret;
1015                 ret = rte_memseg_walk(memif_region_init_zc, (void *)dev->process_private);
1016                 if (ret < 0)
1017                         return ret;
1018         } else {
1019                 /* create one memory region contaning rings and buffers */
1020                 ret = memif_region_init_shm(dev, /* has buffers */ 1);
1021                 if (ret < 0)
1022                         return ret;
1023         }
1024
1025         return 0;
1026 }
1027
1028 static void
1029 memif_init_rings(struct rte_eth_dev *dev)
1030 {
1031         struct pmd_internals *pmd = dev->data->dev_private;
1032         struct pmd_process_private *proc_private = dev->process_private;
1033         memif_ring_t *ring;
1034         int i, j;
1035         uint16_t slot;
1036
1037         for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1038                 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_C2S, i);
1039                 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1040                 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1041                 ring->cookie = MEMIF_COOKIE;
1042                 ring->flags = 0;
1043
1044                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1045                         continue;
1046
1047                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1048                         slot = i * (1 << pmd->run.log2_ring_size) + j;
1049                         ring->desc[j].region = 0;
1050                         ring->desc[j].offset =
1051                                 proc_private->regions[0]->pkt_buffer_offset +
1052                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
1053                         ring->desc[j].length = pmd->run.pkt_buffer_size;
1054                 }
1055         }
1056
1057         for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1058                 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_S2C, i);
1059                 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1060                 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1061                 ring->cookie = MEMIF_COOKIE;
1062                 ring->flags = 0;
1063
1064                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1065                         continue;
1066
1067                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1068                         slot = (i + pmd->run.num_c2s_rings) *
1069                             (1 << pmd->run.log2_ring_size) + j;
1070                         ring->desc[j].region = 0;
1071                         ring->desc[j].offset =
1072                                 proc_private->regions[0]->pkt_buffer_offset +
1073                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
1074                         ring->desc[j].length = pmd->run.pkt_buffer_size;
1075                 }
1076         }
1077 }
1078
1079 /* called only by client */
1080 static int
1081 memif_init_queues(struct rte_eth_dev *dev)
1082 {
1083         struct pmd_internals *pmd = dev->data->dev_private;
1084         struct memif_queue *mq;
1085         int i;
1086
1087         for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1088                 mq = dev->data->tx_queues[i];
1089                 mq->log2_ring_size = pmd->run.log2_ring_size;
1090                 /* queues located only in region 0 */
1091                 mq->region = 0;
1092                 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_C2S, i);
1093                 mq->last_head = 0;
1094                 mq->last_tail = 0;
1095                 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1096                 if (mq->intr_handle.fd < 0) {
1097                         MIF_LOG(WARNING,
1098                                 "Failed to create eventfd for tx queue %d: %s.", i,
1099                                 strerror(errno));
1100                 }
1101                 mq->buffers = NULL;
1102                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1103                         mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1104                                                   (1 << mq->log2_ring_size), 0);
1105                         if (mq->buffers == NULL)
1106                                 return -ENOMEM;
1107                 }
1108         }
1109
1110         for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1111                 mq = dev->data->rx_queues[i];
1112                 mq->log2_ring_size = pmd->run.log2_ring_size;
1113                 /* queues located only in region 0 */
1114                 mq->region = 0;
1115                 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_S2C, i);
1116                 mq->last_head = 0;
1117                 mq->last_tail = 0;
1118                 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1119                 if (mq->intr_handle.fd < 0) {
1120                         MIF_LOG(WARNING,
1121                                 "Failed to create eventfd for rx queue %d: %s.", i,
1122                                 strerror(errno));
1123                 }
1124                 mq->buffers = NULL;
1125                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1126                         mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1127                                                   (1 << mq->log2_ring_size), 0);
1128                         if (mq->buffers == NULL)
1129                                 return -ENOMEM;
1130                 }
1131         }
1132         return 0;
1133 }
1134
1135 int
1136 memif_init_regions_and_queues(struct rte_eth_dev *dev)
1137 {
1138         int ret;
1139
1140         ret = memif_regions_init(dev);
1141         if (ret < 0)
1142                 return ret;
1143
1144         memif_init_rings(dev);
1145
1146         ret = memif_init_queues(dev);
1147         if (ret < 0)
1148                 return ret;
1149
1150         return 0;
1151 }
1152
1153 int
1154 memif_connect(struct rte_eth_dev *dev)
1155 {
1156         struct pmd_internals *pmd = dev->data->dev_private;
1157         struct pmd_process_private *proc_private = dev->process_private;
1158         struct memif_region *mr;
1159         struct memif_queue *mq;
1160         memif_ring_t *ring;
1161         int i;
1162
1163         for (i = 0; i < proc_private->regions_num; i++) {
1164                 mr = proc_private->regions[i];
1165                 if (mr != NULL) {
1166                         if (mr->addr == NULL) {
1167                                 if (mr->fd < 0)
1168                                         return -1;
1169                                 mr->addr = mmap(NULL, mr->region_size,
1170                                                 PROT_READ | PROT_WRITE,
1171                                                 MAP_SHARED, mr->fd, 0);
1172                                 if (mr->addr == MAP_FAILED) {
1173                                         MIF_LOG(ERR, "mmap failed: %s\n",
1174                                                 strerror(errno));
1175                                         return -1;
1176                                 }
1177                         }
1178                         if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
1179                                 /* close memseg file */
1180                                 close(mr->fd);
1181                                 mr->fd = -1;
1182                         }
1183                 }
1184         }
1185
1186         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1187                 for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1188                         mq = (pmd->role == MEMIF_ROLE_CLIENT) ?
1189                             dev->data->tx_queues[i] : dev->data->rx_queues[i];
1190                         ring = memif_get_ring_from_queue(proc_private, mq);
1191                         if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1192                                 MIF_LOG(ERR, "Wrong ring");
1193                                 return -1;
1194                         }
1195                         __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1196                         __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1197                         mq->last_head = 0;
1198                         mq->last_tail = 0;
1199                         /* enable polling mode */
1200                         if (pmd->role == MEMIF_ROLE_SERVER)
1201                                 ring->flags = MEMIF_RING_FLAG_MASK_INT;
1202                 }
1203                 for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1204                         mq = (pmd->role == MEMIF_ROLE_CLIENT) ?
1205                             dev->data->rx_queues[i] : dev->data->tx_queues[i];
1206                         ring = memif_get_ring_from_queue(proc_private, mq);
1207                         if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1208                                 MIF_LOG(ERR, "Wrong ring");
1209                                 return -1;
1210                         }
1211                         __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1212                         __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1213                         mq->last_head = 0;
1214                         mq->last_tail = 0;
1215                         /* enable polling mode */
1216                         if (pmd->role == MEMIF_ROLE_CLIENT)
1217                                 ring->flags = MEMIF_RING_FLAG_MASK_INT;
1218                 }
1219
1220                 pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
1221                 pmd->flags |= ETH_MEMIF_FLAG_CONNECTED;
1222                 dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
1223         }
1224         MIF_LOG(INFO, "Connected.");
1225         return 0;
1226 }
1227
1228 static int
1229 memif_dev_start(struct rte_eth_dev *dev)
1230 {
1231         struct pmd_internals *pmd = dev->data->dev_private;
1232         int ret = 0;
1233
1234         switch (pmd->role) {
1235         case MEMIF_ROLE_CLIENT:
1236                 ret = memif_connect_client(dev);
1237                 break;
1238         case MEMIF_ROLE_SERVER:
1239                 ret = memif_connect_server(dev);
1240                 break;
1241         default:
1242                 MIF_LOG(ERR, "Unknown role: %d.", pmd->role);
1243                 ret = -1;
1244                 break;
1245         }
1246
1247         return ret;
1248 }
1249
1250 static int
1251 memif_dev_close(struct rte_eth_dev *dev)
1252 {
1253         struct pmd_internals *pmd = dev->data->dev_private;
1254         int i;
1255
1256         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1257                 memif_msg_enq_disconnect(pmd->cc, "Device closed", 0);
1258                 memif_disconnect(dev);
1259
1260                 for (i = 0; i < dev->data->nb_rx_queues; i++)
1261                         (*dev->dev_ops->rx_queue_release)(dev, i);
1262                 for (i = 0; i < dev->data->nb_tx_queues; i++)
1263                         (*dev->dev_ops->tx_queue_release)(dev, i);
1264
1265                 memif_socket_remove_device(dev);
1266         } else {
1267                 memif_disconnect(dev);
1268         }
1269
1270         rte_free(dev->process_private);
1271
1272         return 0;
1273 }
1274
1275 static int
1276 memif_dev_configure(struct rte_eth_dev *dev)
1277 {
1278         struct pmd_internals *pmd = dev->data->dev_private;
1279
1280         /*
1281          * CLIENT - TXQ
1282          * SERVER - RXQ
1283          */
1284         pmd->cfg.num_c2s_rings = (pmd->role == MEMIF_ROLE_CLIENT) ?
1285                                   dev->data->nb_tx_queues : dev->data->nb_rx_queues;
1286
1287         /*
1288          * CLIENT - RXQ
1289          * SERVER - TXQ
1290          */
1291         pmd->cfg.num_s2c_rings = (pmd->role == MEMIF_ROLE_CLIENT) ?
1292                                   dev->data->nb_rx_queues : dev->data->nb_tx_queues;
1293
1294         return 0;
1295 }
1296
1297 static int
1298 memif_tx_queue_setup(struct rte_eth_dev *dev,
1299                      uint16_t qid,
1300                      uint16_t nb_tx_desc __rte_unused,
1301                      unsigned int socket_id __rte_unused,
1302                      const struct rte_eth_txconf *tx_conf __rte_unused)
1303 {
1304         struct pmd_internals *pmd = dev->data->dev_private;
1305         struct memif_queue *mq;
1306
1307         mq = rte_zmalloc("tx-queue", sizeof(struct memif_queue), 0);
1308         if (mq == NULL) {
1309                 MIF_LOG(ERR, "Failed to allocate tx queue id: %u", qid);
1310                 return -ENOMEM;
1311         }
1312
1313         mq->type =
1314             (pmd->role == MEMIF_ROLE_CLIENT) ? MEMIF_RING_C2S : MEMIF_RING_S2C;
1315         mq->n_pkts = 0;
1316         mq->n_bytes = 0;
1317         mq->intr_handle.fd = -1;
1318         mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1319         mq->in_port = dev->data->port_id;
1320         dev->data->tx_queues[qid] = mq;
1321
1322         return 0;
1323 }
1324
1325 static int
1326 memif_rx_queue_setup(struct rte_eth_dev *dev,
1327                      uint16_t qid,
1328                      uint16_t nb_rx_desc __rte_unused,
1329                      unsigned int socket_id __rte_unused,
1330                      const struct rte_eth_rxconf *rx_conf __rte_unused,
1331                      struct rte_mempool *mb_pool)
1332 {
1333         struct pmd_internals *pmd = dev->data->dev_private;
1334         struct memif_queue *mq;
1335
1336         mq = rte_zmalloc("rx-queue", sizeof(struct memif_queue), 0);
1337         if (mq == NULL) {
1338                 MIF_LOG(ERR, "Failed to allocate rx queue id: %u", qid);
1339                 return -ENOMEM;
1340         }
1341
1342         mq->type = (pmd->role == MEMIF_ROLE_CLIENT) ? MEMIF_RING_S2C : MEMIF_RING_C2S;
1343         mq->n_pkts = 0;
1344         mq->n_bytes = 0;
1345         mq->intr_handle.fd = -1;
1346         mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1347         mq->mempool = mb_pool;
1348         mq->in_port = dev->data->port_id;
1349         dev->data->rx_queues[qid] = mq;
1350
1351         return 0;
1352 }
1353
1354 static void
1355 memif_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
1356 {
1357         struct memif_queue *mq = dev->data->rx_queues[qid];
1358
1359         if (!mq)
1360                 return;
1361
1362         rte_free(mq);
1363 }
1364
1365 static void
1366 memif_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
1367 {
1368         struct memif_queue *mq = dev->data->tx_queues[qid];
1369
1370         if (!mq)
1371                 return;
1372
1373         rte_free(mq);
1374 }
1375
1376 static int
1377 memif_link_update(struct rte_eth_dev *dev,
1378                   int wait_to_complete __rte_unused)
1379 {
1380         struct pmd_process_private *proc_private;
1381
1382         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1383                 proc_private = dev->process_private;
1384                 if (dev->data->dev_link.link_status == RTE_ETH_LINK_UP &&
1385                                 proc_private->regions_num == 0) {
1386                         memif_mp_request_regions(dev);
1387                 } else if (dev->data->dev_link.link_status == RTE_ETH_LINK_DOWN &&
1388                                 proc_private->regions_num > 0) {
1389                         memif_free_regions(dev);
1390                 }
1391         }
1392         return 0;
1393 }
1394
1395 static int
1396 memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1397 {
1398         struct pmd_internals *pmd = dev->data->dev_private;
1399         struct memif_queue *mq;
1400         int i;
1401         uint8_t tmp, nq;
1402
1403         stats->ipackets = 0;
1404         stats->ibytes = 0;
1405         stats->opackets = 0;
1406         stats->obytes = 0;
1407
1408         tmp = (pmd->role == MEMIF_ROLE_CLIENT) ? pmd->run.num_c2s_rings :
1409             pmd->run.num_s2c_rings;
1410         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1411             RTE_ETHDEV_QUEUE_STAT_CNTRS;
1412
1413         /* RX stats */
1414         for (i = 0; i < nq; i++) {
1415                 mq = dev->data->rx_queues[i];
1416                 stats->q_ipackets[i] = mq->n_pkts;
1417                 stats->q_ibytes[i] = mq->n_bytes;
1418                 stats->ipackets += mq->n_pkts;
1419                 stats->ibytes += mq->n_bytes;
1420         }
1421
1422         tmp = (pmd->role == MEMIF_ROLE_CLIENT) ? pmd->run.num_s2c_rings :
1423             pmd->run.num_c2s_rings;
1424         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1425             RTE_ETHDEV_QUEUE_STAT_CNTRS;
1426
1427         /* TX stats */
1428         for (i = 0; i < nq; i++) {
1429                 mq = dev->data->tx_queues[i];
1430                 stats->q_opackets[i] = mq->n_pkts;
1431                 stats->q_obytes[i] = mq->n_bytes;
1432                 stats->opackets += mq->n_pkts;
1433                 stats->obytes += mq->n_bytes;
1434         }
1435         return 0;
1436 }
1437
1438 static int
1439 memif_stats_reset(struct rte_eth_dev *dev)
1440 {
1441         struct pmd_internals *pmd = dev->data->dev_private;
1442         int i;
1443         struct memif_queue *mq;
1444
1445         for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1446                 mq = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->tx_queues[i] :
1447                     dev->data->rx_queues[i];
1448                 mq->n_pkts = 0;
1449                 mq->n_bytes = 0;
1450         }
1451         for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1452                 mq = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->rx_queues[i] :
1453                     dev->data->tx_queues[i];
1454                 mq->n_pkts = 0;
1455                 mq->n_bytes = 0;
1456         }
1457
1458         return 0;
1459 }
1460
1461 static int
1462 memif_rx_queue_intr_enable(struct rte_eth_dev *dev __rte_unused,
1463                            uint16_t qid __rte_unused)
1464 {
1465         MIF_LOG(WARNING, "Interrupt mode not supported.");
1466
1467         return -1;
1468 }
1469
1470 static int
1471 memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
1472 {
1473         struct pmd_internals *pmd __rte_unused = dev->data->dev_private;
1474
1475         return 0;
1476 }
1477
1478 static const struct eth_dev_ops ops = {
1479         .dev_start = memif_dev_start,
1480         .dev_close = memif_dev_close,
1481         .dev_infos_get = memif_dev_info,
1482         .dev_configure = memif_dev_configure,
1483         .tx_queue_setup = memif_tx_queue_setup,
1484         .rx_queue_setup = memif_rx_queue_setup,
1485         .rx_queue_release = memif_rx_queue_release,
1486         .tx_queue_release = memif_tx_queue_release,
1487         .rx_queue_intr_enable = memif_rx_queue_intr_enable,
1488         .rx_queue_intr_disable = memif_rx_queue_intr_disable,
1489         .link_update = memif_link_update,
1490         .stats_get = memif_stats_get,
1491         .stats_reset = memif_stats_reset,
1492 };
1493
1494 static int
1495 memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
1496              memif_interface_id_t id, uint32_t flags,
1497              const char *socket_filename,
1498              memif_log2_ring_size_t log2_ring_size,
1499              uint16_t pkt_buffer_size, const char *secret,
1500              struct rte_ether_addr *ether_addr)
1501 {
1502         int ret = 0;
1503         struct rte_eth_dev *eth_dev;
1504         struct rte_eth_dev_data *data;
1505         struct pmd_internals *pmd;
1506         struct pmd_process_private *process_private;
1507         const unsigned int numa_node = vdev->device.numa_node;
1508         const char *name = rte_vdev_device_name(vdev);
1509
1510         eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
1511         if (eth_dev == NULL) {
1512                 MIF_LOG(ERR, "%s: Unable to allocate device struct.", name);
1513                 return -1;
1514         }
1515
1516         process_private = (struct pmd_process_private *)
1517                 rte_zmalloc(name, sizeof(struct pmd_process_private),
1518                             RTE_CACHE_LINE_SIZE);
1519
1520         if (process_private == NULL) {
1521                 MIF_LOG(ERR, "Failed to alloc memory for process private");
1522                 return -1;
1523         }
1524         eth_dev->process_private = process_private;
1525
1526         pmd = eth_dev->data->dev_private;
1527         memset(pmd, 0, sizeof(*pmd));
1528
1529         pmd->id = id;
1530         pmd->flags = flags;
1531         pmd->flags |= ETH_MEMIF_FLAG_DISABLED;
1532         pmd->role = role;
1533         /* Zero-copy flag irelevant to server. */
1534         if (pmd->role == MEMIF_ROLE_SERVER)
1535                 pmd->flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1536
1537         ret = memif_socket_init(eth_dev, socket_filename);
1538         if (ret < 0)
1539                 return ret;
1540
1541         memset(pmd->secret, 0, sizeof(char) * ETH_MEMIF_SECRET_SIZE);
1542         if (secret != NULL)
1543                 strlcpy(pmd->secret, secret, sizeof(pmd->secret));
1544
1545         pmd->cfg.log2_ring_size = log2_ring_size;
1546         /* set in .dev_configure() */
1547         pmd->cfg.num_c2s_rings = 0;
1548         pmd->cfg.num_s2c_rings = 0;
1549
1550         pmd->cfg.pkt_buffer_size = pkt_buffer_size;
1551         rte_spinlock_init(&pmd->cc_lock);
1552
1553         data = eth_dev->data;
1554         data->dev_private = pmd;
1555         data->numa_node = numa_node;
1556         data->dev_link = pmd_link;
1557         data->mac_addrs = ether_addr;
1558         data->promiscuous = 1;
1559         data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1560
1561         eth_dev->dev_ops = &ops;
1562         eth_dev->device = &vdev->device;
1563         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1564                 eth_dev->rx_pkt_burst = eth_memif_rx_zc;
1565                 eth_dev->tx_pkt_burst = eth_memif_tx_zc;
1566         } else {
1567                 eth_dev->rx_pkt_burst = eth_memif_rx;
1568                 eth_dev->tx_pkt_burst = eth_memif_tx;
1569         }
1570
1571         rte_eth_dev_probing_finish(eth_dev);
1572
1573         return 0;
1574 }
1575
1576 static int
1577 memif_set_role(const char *key __rte_unused, const char *value,
1578                void *extra_args)
1579 {
1580         enum memif_role_t *role = (enum memif_role_t *)extra_args;
1581
1582         if (strstr(value, "server") != NULL) {
1583                 *role = MEMIF_ROLE_SERVER;
1584         } else if (strstr(value, "client") != NULL) {
1585                 *role = MEMIF_ROLE_CLIENT;
1586         } else if (strstr(value, "master") != NULL) {
1587                 MIF_LOG(NOTICE, "Role argument \"master\" is deprecated, use \"server\"");
1588                 *role = MEMIF_ROLE_SERVER;
1589         } else if (strstr(value, "slave") != NULL) {
1590                 MIF_LOG(NOTICE, "Role argument \"slave\" is deprecated, use \"client\"");
1591                 *role = MEMIF_ROLE_CLIENT;
1592         } else {
1593                 MIF_LOG(ERR, "Unknown role: %s.", value);
1594                 return -EINVAL;
1595         }
1596         return 0;
1597 }
1598
1599 static int
1600 memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args)
1601 {
1602         uint32_t *flags = (uint32_t *)extra_args;
1603
1604         if (strstr(value, "yes") != NULL) {
1605                 if (!rte_mcfg_get_single_file_segments()) {
1606                         MIF_LOG(ERR, "Zero-copy doesn't support multi-file segments.");
1607                         return -ENOTSUP;
1608                 }
1609                 *flags |= ETH_MEMIF_FLAG_ZERO_COPY;
1610         } else if (strstr(value, "no") != NULL) {
1611                 *flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1612         } else {
1613                 MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value);
1614                 return -EINVAL;
1615         }
1616         return 0;
1617 }
1618
1619 static int
1620 memif_set_id(const char *key __rte_unused, const char *value, void *extra_args)
1621 {
1622         memif_interface_id_t *id = (memif_interface_id_t *)extra_args;
1623
1624         /* even if parsing fails, 0 is a valid id */
1625         *id = strtoul(value, NULL, 10);
1626         return 0;
1627 }
1628
1629 static int
1630 memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args)
1631 {
1632         unsigned long tmp;
1633         uint16_t *pkt_buffer_size = (uint16_t *)extra_args;
1634
1635         tmp = strtoul(value, NULL, 10);
1636         if (tmp == 0 || tmp > 0xFFFF) {
1637                 MIF_LOG(ERR, "Invalid buffer size: %s.", value);
1638                 return -EINVAL;
1639         }
1640         *pkt_buffer_size = tmp;
1641         return 0;
1642 }
1643
1644 static int
1645 memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args)
1646 {
1647         unsigned long tmp;
1648         memif_log2_ring_size_t *log2_ring_size =
1649             (memif_log2_ring_size_t *)extra_args;
1650
1651         tmp = strtoul(value, NULL, 10);
1652         if (tmp == 0 || tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE) {
1653                 MIF_LOG(ERR, "Invalid ring size: %s (max %u).",
1654                         value, ETH_MEMIF_MAX_LOG2_RING_SIZE);
1655                 return -EINVAL;
1656         }
1657         *log2_ring_size = tmp;
1658         return 0;
1659 }
1660
1661 /* check if directory exists and if we have permission to read/write */
1662 static int
1663 memif_check_socket_filename(const char *filename)
1664 {
1665         char *dir = NULL, *tmp;
1666         uint32_t idx;
1667         int ret = 0;
1668
1669         if (strlen(filename) >= MEMIF_SOCKET_UN_SIZE) {
1670                 MIF_LOG(ERR, "Unix socket address too long (max 108).");
1671                 return -1;
1672         }
1673
1674         tmp = strrchr(filename, '/');
1675         if (tmp != NULL) {
1676                 idx = tmp - filename;
1677                 dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 1), 0);
1678                 if (dir == NULL) {
1679                         MIF_LOG(ERR, "Failed to allocate memory.");
1680                         return -1;
1681                 }
1682                 strlcpy(dir, filename, sizeof(char) * (idx + 1));
1683         }
1684
1685         if (dir == NULL || (faccessat(-1, dir, F_OK | R_OK |
1686                                         W_OK, AT_EACCESS) < 0)) {
1687                 MIF_LOG(ERR, "Invalid socket directory.");
1688                 ret = -EINVAL;
1689         }
1690
1691         if (dir != NULL)
1692                 rte_free(dir);
1693
1694         return ret;
1695 }
1696
1697 static int
1698 memif_set_socket_filename(const char *key __rte_unused, const char *value,
1699                           void *extra_args)
1700 {
1701         const char **socket_filename = (const char **)extra_args;
1702
1703         *socket_filename = value;
1704         return 0;
1705 }
1706
1707 static int
1708 memif_set_is_socket_abstract(const char *key __rte_unused, const char *value, void *extra_args)
1709 {
1710         uint32_t *flags = (uint32_t *)extra_args;
1711
1712         if (strstr(value, "yes") != NULL) {
1713                 *flags |= ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1714         } else if (strstr(value, "no") != NULL) {
1715                 *flags &= ~ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1716         } else {
1717                 MIF_LOG(ERR, "Failed to parse socket-abstract param: %s.", value);
1718                 return -EINVAL;
1719         }
1720         return 0;
1721 }
1722
1723 static int
1724 memif_set_mac(const char *key __rte_unused, const char *value, void *extra_args)
1725 {
1726         struct rte_ether_addr *ether_addr = (struct rte_ether_addr *)extra_args;
1727
1728         if (rte_ether_unformat_addr(value, ether_addr) < 0)
1729                 MIF_LOG(WARNING, "Failed to parse mac '%s'.", value);
1730         return 0;
1731 }
1732
1733 static int
1734 memif_set_secret(const char *key __rte_unused, const char *value, void *extra_args)
1735 {
1736         const char **secret = (const char **)extra_args;
1737
1738         *secret = value;
1739         return 0;
1740 }
1741
1742 static int
1743 rte_pmd_memif_probe(struct rte_vdev_device *vdev)
1744 {
1745         RTE_BUILD_BUG_ON(sizeof(memif_msg_t) != 128);
1746         RTE_BUILD_BUG_ON(sizeof(memif_desc_t) != 16);
1747         int ret = 0;
1748         struct rte_kvargs *kvlist;
1749         const char *name = rte_vdev_device_name(vdev);
1750         enum memif_role_t role = MEMIF_ROLE_CLIENT;
1751         memif_interface_id_t id = 0;
1752         uint16_t pkt_buffer_size = ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE;
1753         memif_log2_ring_size_t log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE;
1754         const char *socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME;
1755         uint32_t flags = 0;
1756         const char *secret = NULL;
1757         struct rte_ether_addr *ether_addr = rte_zmalloc("",
1758                 sizeof(struct rte_ether_addr), 0);
1759         struct rte_eth_dev *eth_dev;
1760
1761         rte_eth_random_addr(ether_addr->addr_bytes);
1762
1763         MIF_LOG(INFO, "Initialize MEMIF: %s.", name);
1764
1765         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1766                 eth_dev = rte_eth_dev_attach_secondary(name);
1767                 if (!eth_dev) {
1768                         MIF_LOG(ERR, "Failed to probe %s", name);
1769                         return -1;
1770                 }
1771
1772                 eth_dev->dev_ops = &ops;
1773                 eth_dev->device = &vdev->device;
1774                 eth_dev->rx_pkt_burst = eth_memif_rx;
1775                 eth_dev->tx_pkt_burst = eth_memif_tx;
1776
1777                 if (!rte_eal_primary_proc_alive(NULL)) {
1778                         MIF_LOG(ERR, "Primary process is missing");
1779                         return -1;
1780                 }
1781
1782                 eth_dev->process_private = (struct pmd_process_private *)
1783                         rte_zmalloc(name,
1784                                 sizeof(struct pmd_process_private),
1785                                 RTE_CACHE_LINE_SIZE);
1786                 if (eth_dev->process_private == NULL) {
1787                         MIF_LOG(ERR,
1788                                 "Failed to alloc memory for process private");
1789                         return -1;
1790                 }
1791
1792                 rte_eth_dev_probing_finish(eth_dev);
1793
1794                 return 0;
1795         }
1796
1797         ret = rte_mp_action_register(MEMIF_MP_SEND_REGION, memif_mp_send_region);
1798         /*
1799          * Primary process can continue probing, but secondary process won't
1800          * be able to get memory regions information
1801          */
1802         if (ret < 0 && rte_errno != EEXIST)
1803                 MIF_LOG(WARNING, "Failed to register mp action callback: %s",
1804                         strerror(rte_errno));
1805
1806         /* use abstract address by default */
1807         flags |= ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1808
1809         kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments);
1810
1811         /* parse parameters */
1812         if (kvlist != NULL) {
1813                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG,
1814                                          &memif_set_role, &role);
1815                 if (ret < 0)
1816                         goto exit;
1817                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG,
1818                                          &memif_set_id, &id);
1819                 if (ret < 0)
1820                         goto exit;
1821                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
1822                                          &memif_set_bs, &pkt_buffer_size);
1823                 if (ret < 0)
1824                         goto exit;
1825                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG,
1826                                          &memif_set_rs, &log2_ring_size);
1827                 if (ret < 0)
1828                         goto exit;
1829                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ARG,
1830                                          &memif_set_socket_filename,
1831                                          (void *)(&socket_filename));
1832                 if (ret < 0)
1833                         goto exit;
1834                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ABSTRACT_ARG,
1835                                          &memif_set_is_socket_abstract, &flags);
1836                 if (ret < 0)
1837                         goto exit;
1838                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_MAC_ARG,
1839                                          &memif_set_mac, ether_addr);
1840                 if (ret < 0)
1841                         goto exit;
1842                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG,
1843                                          &memif_set_zc, &flags);
1844                 if (ret < 0)
1845                         goto exit;
1846                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SECRET_ARG,
1847                                          &memif_set_secret, (void *)(&secret));
1848                 if (ret < 0)
1849                         goto exit;
1850         }
1851
1852         if (!(flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT)) {
1853                 ret = memif_check_socket_filename(socket_filename);
1854                 if (ret < 0)
1855                         goto exit;
1856         }
1857
1858         /* create interface */
1859         ret = memif_create(vdev, role, id, flags, socket_filename,
1860                            log2_ring_size, pkt_buffer_size, secret, ether_addr);
1861
1862 exit:
1863         if (kvlist != NULL)
1864                 rte_kvargs_free(kvlist);
1865         return ret;
1866 }
1867
1868 static int
1869 rte_pmd_memif_remove(struct rte_vdev_device *vdev)
1870 {
1871         struct rte_eth_dev *eth_dev;
1872
1873         eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
1874         if (eth_dev == NULL)
1875                 return 0;
1876
1877         return rte_eth_dev_close(eth_dev->data->port_id);
1878 }
1879
1880 static struct rte_vdev_driver pmd_memif_drv = {
1881         .probe = rte_pmd_memif_probe,
1882         .remove = rte_pmd_memif_remove,
1883 };
1884
1885 RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv);
1886
1887 RTE_PMD_REGISTER_PARAM_STRING(net_memif,
1888                               ETH_MEMIF_ID_ARG "=<int>"
1889                               ETH_MEMIF_ROLE_ARG "=server|client"
1890                               ETH_MEMIF_PKT_BUFFER_SIZE_ARG "=<int>"
1891                               ETH_MEMIF_RING_SIZE_ARG "=<int>"
1892                               ETH_MEMIF_SOCKET_ARG "=<string>"
1893                                   ETH_MEMIF_SOCKET_ABSTRACT_ARG "=yes|no"
1894                               ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx"
1895                               ETH_MEMIF_ZC_ARG "=yes|no"
1896                               ETH_MEMIF_SECRET_ARG "=<string>");
1897
1898 RTE_LOG_REGISTER_DEFAULT(memif_logtype, NOTICE);