net/ngbe: support MAC filters
[dpdk.git] / drivers / net / memif / rte_eth_memif.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
3  */
4
5 #include <stdlib.h>
6 #include <fcntl.h>
7 #include <unistd.h>
8 #include <sys/types.h>
9 #include <sys/socket.h>
10 #include <sys/un.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13 #include <linux/if_ether.h>
14 #include <errno.h>
15 #include <sys/eventfd.h>
16
17 #include <rte_version.h>
18 #include <rte_mbuf.h>
19 #include <rte_ether.h>
20 #include <ethdev_driver.h>
21 #include <ethdev_vdev.h>
22 #include <rte_malloc.h>
23 #include <rte_kvargs.h>
24 #include <rte_bus_vdev.h>
25 #include <rte_string_fns.h>
26 #include <rte_errno.h>
27 #include <rte_memory.h>
28 #include <rte_memzone.h>
29 #include <rte_eal_memconfig.h>
30
31 #include "rte_eth_memif.h"
32 #include "memif_socket.h"
33
34 #define ETH_MEMIF_ID_ARG                "id"
35 #define ETH_MEMIF_ROLE_ARG              "role"
36 #define ETH_MEMIF_PKT_BUFFER_SIZE_ARG   "bsize"
37 #define ETH_MEMIF_RING_SIZE_ARG         "rsize"
38 #define ETH_MEMIF_SOCKET_ARG            "socket"
39 #define ETH_MEMIF_SOCKET_ABSTRACT_ARG   "socket-abstract"
40 #define ETH_MEMIF_MAC_ARG               "mac"
41 #define ETH_MEMIF_ZC_ARG                "zero-copy"
42 #define ETH_MEMIF_SECRET_ARG            "secret"
43
44 static const char * const valid_arguments[] = {
45         ETH_MEMIF_ID_ARG,
46         ETH_MEMIF_ROLE_ARG,
47         ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
48         ETH_MEMIF_RING_SIZE_ARG,
49         ETH_MEMIF_SOCKET_ARG,
50         ETH_MEMIF_SOCKET_ABSTRACT_ARG,
51         ETH_MEMIF_MAC_ARG,
52         ETH_MEMIF_ZC_ARG,
53         ETH_MEMIF_SECRET_ARG,
54         NULL
55 };
56
57 static const struct rte_eth_link pmd_link = {
58         .link_speed = RTE_ETH_SPEED_NUM_10G,
59         .link_duplex = RTE_ETH_LINK_FULL_DUPLEX,
60         .link_status = RTE_ETH_LINK_DOWN,
61         .link_autoneg = RTE_ETH_LINK_AUTONEG
62 };
63
64 #define MEMIF_MP_SEND_REGION            "memif_mp_send_region"
65
66
67 static int memif_region_init_zc(const struct rte_memseg_list *msl,
68                                 const struct rte_memseg *ms, void *arg);
69
70 const char *
71 memif_version(void)
72 {
73         return ("memif-" RTE_STR(MEMIF_VERSION_MAJOR) "." RTE_STR(MEMIF_VERSION_MINOR));
74 }
75
76 /* Message header to synchronize regions */
77 struct mp_region_msg {
78         char port_name[RTE_DEV_NAME_MAX_LEN];
79         memif_region_index_t idx;
80         memif_region_size_t size;
81 };
82
83 static int
84 memif_mp_send_region(const struct rte_mp_msg *msg, const void *peer)
85 {
86         struct rte_eth_dev *dev;
87         struct pmd_process_private *proc_private;
88         const struct mp_region_msg *msg_param = (const struct mp_region_msg *)msg->param;
89         struct rte_mp_msg reply;
90         struct mp_region_msg *reply_param = (struct mp_region_msg *)reply.param;
91         uint16_t port_id;
92         int ret;
93
94         /* Get requested port */
95         ret = rte_eth_dev_get_port_by_name(msg_param->port_name, &port_id);
96         if (ret) {
97                 MIF_LOG(ERR, "Failed to get port id for %s",
98                         msg_param->port_name);
99                 return -1;
100         }
101         dev = &rte_eth_devices[port_id];
102         proc_private = dev->process_private;
103
104         memset(&reply, 0, sizeof(reply));
105         strlcpy(reply.name, msg->name, sizeof(reply.name));
106         reply_param->idx = msg_param->idx;
107         if (proc_private->regions[msg_param->idx] != NULL) {
108                 reply_param->size = proc_private->regions[msg_param->idx]->region_size;
109                 reply.fds[0] = proc_private->regions[msg_param->idx]->fd;
110                 reply.num_fds = 1;
111         }
112         reply.len_param = sizeof(*reply_param);
113         if (rte_mp_reply(&reply, peer) < 0) {
114                 MIF_LOG(ERR, "Failed to reply to an add region request");
115                 return -1;
116         }
117
118         return 0;
119 }
120
121 /*
122  * Request regions
123  * Called by secondary process, when ports link status goes up.
124  */
125 static int
126 memif_mp_request_regions(struct rte_eth_dev *dev)
127 {
128         int ret, i;
129         struct timespec timeout = {.tv_sec = 5, .tv_nsec = 0};
130         struct rte_mp_msg msg, *reply;
131         struct rte_mp_reply replies;
132         struct mp_region_msg *msg_param = (struct mp_region_msg *)msg.param;
133         struct mp_region_msg *reply_param;
134         struct memif_region *r;
135         struct pmd_process_private *proc_private = dev->process_private;
136         struct pmd_internals *pmd = dev->data->dev_private;
137         /* in case of zero-copy client, only request region 0 */
138         uint16_t max_region_num = (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) ?
139                                    1 : ETH_MEMIF_MAX_REGION_NUM;
140
141         MIF_LOG(DEBUG, "Requesting memory regions");
142
143         for (i = 0; i < max_region_num; i++) {
144                 /* Prepare the message */
145                 memset(&msg, 0, sizeof(msg));
146                 strlcpy(msg.name, MEMIF_MP_SEND_REGION, sizeof(msg.name));
147                 strlcpy(msg_param->port_name, dev->data->name,
148                         sizeof(msg_param->port_name));
149                 msg_param->idx = i;
150                 msg.len_param = sizeof(*msg_param);
151
152                 /* Send message */
153                 ret = rte_mp_request_sync(&msg, &replies, &timeout);
154                 if (ret < 0 || replies.nb_received != 1) {
155                         MIF_LOG(ERR, "Failed to send mp msg: %d",
156                                 rte_errno);
157                         return -1;
158                 }
159
160                 reply = &replies.msgs[0];
161                 reply_param = (struct mp_region_msg *)reply->param;
162
163                 if (reply_param->size > 0) {
164                         r = rte_zmalloc("region", sizeof(struct memif_region), 0);
165                         if (r == NULL) {
166                                 MIF_LOG(ERR, "Failed to alloc memif region.");
167                                 free(reply);
168                                 return -ENOMEM;
169                         }
170                         r->region_size = reply_param->size;
171                         if (reply->num_fds < 1) {
172                                 MIF_LOG(ERR, "Missing file descriptor.");
173                                 free(reply);
174                                 return -1;
175                         }
176                         r->fd = reply->fds[0];
177                         r->addr = NULL;
178
179                         proc_private->regions[reply_param->idx] = r;
180                         proc_private->regions_num++;
181                 }
182                 free(reply);
183         }
184
185         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
186                 ret = rte_memseg_walk(memif_region_init_zc, (void *)proc_private);
187                 if (ret < 0)
188                         return ret;
189         }
190
191         return memif_connect(dev);
192 }
193
194 static int
195 memif_dev_info(struct rte_eth_dev *dev __rte_unused, struct rte_eth_dev_info *dev_info)
196 {
197         dev_info->max_mac_addrs = 1;
198         dev_info->max_rx_pktlen = RTE_ETHER_MAX_LEN;
199         dev_info->max_rx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
200         dev_info->max_tx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
201         dev_info->min_rx_bufsize = 0;
202         dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
203
204         return 0;
205 }
206
207 static memif_ring_t *
208 memif_get_ring(struct pmd_internals *pmd, struct pmd_process_private *proc_private,
209                memif_ring_type_t type, uint16_t ring_num)
210 {
211         /* rings only in region 0 */
212         void *p = proc_private->regions[0]->addr;
213         int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) *
214             (1 << pmd->run.log2_ring_size);
215
216         p = (uint8_t *)p + (ring_num + type * pmd->run.num_c2s_rings) * ring_size;
217
218         return (memif_ring_t *)p;
219 }
220
221 static memif_region_offset_t
222 memif_get_ring_offset(struct rte_eth_dev *dev, struct memif_queue *mq,
223                       memif_ring_type_t type, uint16_t num)
224 {
225         struct pmd_internals *pmd = dev->data->dev_private;
226         struct pmd_process_private *proc_private = dev->process_private;
227
228         return ((uint8_t *)memif_get_ring(pmd, proc_private, type, num) -
229                 (uint8_t *)proc_private->regions[mq->region]->addr);
230 }
231
232 static memif_ring_t *
233 memif_get_ring_from_queue(struct pmd_process_private *proc_private,
234                           struct memif_queue *mq)
235 {
236         struct memif_region *r;
237
238         r = proc_private->regions[mq->region];
239         if (r == NULL)
240                 return NULL;
241
242         return (memif_ring_t *)((uint8_t *)r->addr + mq->ring_offset);
243 }
244
245 static void *
246 memif_get_buffer(struct pmd_process_private *proc_private, memif_desc_t *d)
247 {
248         return ((uint8_t *)proc_private->regions[d->region]->addr + d->offset);
249 }
250
251 /* Free mbufs received by server */
252 static void
253 memif_free_stored_mbufs(struct pmd_process_private *proc_private, struct memif_queue *mq)
254 {
255         uint16_t cur_tail;
256         uint16_t mask = (1 << mq->log2_ring_size) - 1;
257         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
258
259         /* FIXME: improve performance */
260         /* The ring->tail acts as a guard variable between Tx and Rx
261          * threads, so using load-acquire pairs with store-release
262          * in function eth_memif_rx for C2S queues.
263          */
264         cur_tail = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
265         while (mq->last_tail != cur_tail) {
266                 RTE_MBUF_PREFETCH_TO_FREE(mq->buffers[(mq->last_tail + 1) & mask]);
267                 /* Decrement refcnt and free mbuf. (current segment) */
268                 rte_mbuf_refcnt_update(mq->buffers[mq->last_tail & mask], -1);
269                 rte_pktmbuf_free_seg(mq->buffers[mq->last_tail & mask]);
270                 mq->last_tail++;
271         }
272 }
273
274 static int
275 memif_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *cur_tail,
276                     struct rte_mbuf *tail)
277 {
278         /* Check for number-of-segments-overflow */
279         if (unlikely(head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS))
280                 return -EOVERFLOW;
281
282         /* Chain 'tail' onto the old tail */
283         cur_tail->next = tail;
284
285         /* accumulate number of segments and total length. */
286         head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs);
287
288         tail->pkt_len = tail->data_len;
289         head->pkt_len += tail->pkt_len;
290
291         return 0;
292 }
293
294 static uint16_t
295 eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
296 {
297         struct memif_queue *mq = queue;
298         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
299         struct pmd_process_private *proc_private =
300                 rte_eth_devices[mq->in_port].process_private;
301         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
302         uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0;
303         uint16_t n_rx_pkts = 0;
304         uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) -
305                 RTE_PKTMBUF_HEADROOM;
306         uint16_t src_len, src_off, dst_len, dst_off, cp_len;
307         memif_ring_type_t type = mq->type;
308         memif_desc_t *d0;
309         struct rte_mbuf *mbuf, *mbuf_head, *mbuf_tail;
310         uint64_t b;
311         ssize_t size __rte_unused;
312         uint16_t head;
313         int ret;
314         struct rte_eth_link link;
315
316         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
317                 return 0;
318         if (unlikely(ring == NULL)) {
319                 /* Secondary process will attempt to request regions. */
320                 ret = rte_eth_link_get(mq->in_port, &link);
321                 if (ret < 0)
322                         MIF_LOG(ERR, "Failed to get port %u link info: %s",
323                                 mq->in_port, rte_strerror(-ret));
324                 return 0;
325         }
326
327         /* consume interrupt */
328         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0)
329                 size = read(rte_intr_fd_get(mq->intr_handle), &b,
330                             sizeof(b));
331
332         ring_size = 1 << mq->log2_ring_size;
333         mask = ring_size - 1;
334
335         if (type == MEMIF_RING_C2S) {
336                 cur_slot = mq->last_head;
337                 last_slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
338         } else {
339                 cur_slot = mq->last_tail;
340                 last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
341         }
342
343         if (cur_slot == last_slot)
344                 goto refill;
345         n_slots = last_slot - cur_slot;
346
347         while (n_slots && n_rx_pkts < nb_pkts) {
348                 mbuf_head = rte_pktmbuf_alloc(mq->mempool);
349                 if (unlikely(mbuf_head == NULL))
350                         goto no_free_bufs;
351                 mbuf = mbuf_head;
352                 mbuf->port = mq->in_port;
353
354 next_slot:
355                 s0 = cur_slot & mask;
356                 d0 = &ring->desc[s0];
357
358                 src_len = d0->length;
359                 dst_off = 0;
360                 src_off = 0;
361
362                 do {
363                         dst_len = mbuf_size - dst_off;
364                         if (dst_len == 0) {
365                                 dst_off = 0;
366                                 dst_len = mbuf_size;
367
368                                 /* store pointer to tail */
369                                 mbuf_tail = mbuf;
370                                 mbuf = rte_pktmbuf_alloc(mq->mempool);
371                                 if (unlikely(mbuf == NULL))
372                                         goto no_free_bufs;
373                                 mbuf->port = mq->in_port;
374                                 ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
375                                 if (unlikely(ret < 0)) {
376                                         MIF_LOG(ERR, "number-of-segments-overflow");
377                                         rte_pktmbuf_free(mbuf);
378                                         goto no_free_bufs;
379                                 }
380                         }
381                         cp_len = RTE_MIN(dst_len, src_len);
382
383                         rte_pktmbuf_data_len(mbuf) += cp_len;
384                         rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
385                         if (mbuf != mbuf_head)
386                                 rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
387
388                         rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *,
389                                                            dst_off),
390                                 (uint8_t *)memif_get_buffer(proc_private, d0) +
391                                 src_off, cp_len);
392
393                         src_off += cp_len;
394                         dst_off += cp_len;
395                         src_len -= cp_len;
396                 } while (src_len);
397
398                 cur_slot++;
399                 n_slots--;
400
401                 if (d0->flags & MEMIF_DESC_FLAG_NEXT)
402                         goto next_slot;
403
404                 mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
405                 *bufs++ = mbuf_head;
406                 n_rx_pkts++;
407         }
408
409 no_free_bufs:
410         if (type == MEMIF_RING_C2S) {
411                 __atomic_store_n(&ring->tail, cur_slot, __ATOMIC_RELEASE);
412                 mq->last_head = cur_slot;
413         } else {
414                 mq->last_tail = cur_slot;
415         }
416
417 refill:
418         if (type == MEMIF_RING_S2C) {
419                 /* ring->head is updated by the receiver and this function
420                  * is called in the context of receiver thread. The loads in
421                  * the receiver do not need to synchronize with its own stores.
422                  */
423                 head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
424                 n_slots = ring_size - head + mq->last_tail;
425
426                 while (n_slots--) {
427                         s0 = head++ & mask;
428                         d0 = &ring->desc[s0];
429                         d0->length = pmd->run.pkt_buffer_size;
430                 }
431                 __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
432         }
433
434         mq->n_pkts += n_rx_pkts;
435         return n_rx_pkts;
436 }
437
438 static uint16_t
439 eth_memif_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
440 {
441         struct memif_queue *mq = queue;
442         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
443         struct pmd_process_private *proc_private =
444                 rte_eth_devices[mq->in_port].process_private;
445         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
446         uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0, head;
447         uint16_t n_rx_pkts = 0;
448         memif_desc_t *d0;
449         struct rte_mbuf *mbuf, *mbuf_tail;
450         struct rte_mbuf *mbuf_head = NULL;
451         int ret;
452         struct rte_eth_link link;
453
454         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
455                 return 0;
456         if (unlikely(ring == NULL)) {
457                 /* Secondary process will attempt to request regions. */
458                 rte_eth_link_get(mq->in_port, &link);
459                 return 0;
460         }
461
462         /* consume interrupt */
463         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
464                 uint64_t b;
465                 ssize_t size __rte_unused;
466                 size = read(rte_intr_fd_get(mq->intr_handle), &b,
467                             sizeof(b));
468         }
469
470         ring_size = 1 << mq->log2_ring_size;
471         mask = ring_size - 1;
472
473         cur_slot = mq->last_tail;
474         /* The ring->tail acts as a guard variable between Tx and Rx
475          * threads, so using load-acquire pairs with store-release
476          * to synchronize it between threads.
477          */
478         last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
479         if (cur_slot == last_slot)
480                 goto refill;
481         n_slots = last_slot - cur_slot;
482
483         while (n_slots && n_rx_pkts < nb_pkts) {
484                 s0 = cur_slot & mask;
485
486                 d0 = &ring->desc[s0];
487                 mbuf_head = mq->buffers[s0];
488                 mbuf = mbuf_head;
489
490 next_slot:
491                 /* prefetch next descriptor */
492                 if (n_rx_pkts + 1 < nb_pkts)
493                         rte_prefetch0(&ring->desc[(cur_slot + 1) & mask]);
494
495                 mbuf->port = mq->in_port;
496                 rte_pktmbuf_data_len(mbuf) = d0->length;
497                 rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
498
499                 mq->n_bytes += rte_pktmbuf_data_len(mbuf);
500
501                 cur_slot++;
502                 n_slots--;
503                 if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
504                         s0 = cur_slot & mask;
505                         d0 = &ring->desc[s0];
506                         mbuf_tail = mbuf;
507                         mbuf = mq->buffers[s0];
508                         ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
509                         if (unlikely(ret < 0)) {
510                                 MIF_LOG(ERR, "number-of-segments-overflow");
511                                 goto refill;
512                         }
513                         goto next_slot;
514                 }
515
516                 *bufs++ = mbuf_head;
517                 n_rx_pkts++;
518         }
519
520         mq->last_tail = cur_slot;
521
522 /* Supply server with new buffers */
523 refill:
524         /* ring->head is updated by the receiver and this function
525          * is called in the context of receiver thread. The loads in
526          * the receiver do not need to synchronize with its own stores.
527          */
528         head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
529         n_slots = ring_size - head + mq->last_tail;
530
531         if (n_slots < 32)
532                 goto no_free_mbufs;
533
534         ret = rte_pktmbuf_alloc_bulk(mq->mempool, &mq->buffers[head & mask], n_slots);
535         if (unlikely(ret < 0))
536                 goto no_free_mbufs;
537
538         while (n_slots--) {
539                 s0 = head++ & mask;
540                 if (n_slots > 0)
541                         rte_prefetch0(mq->buffers[head & mask]);
542                 d0 = &ring->desc[s0];
543                 /* store buffer header */
544                 mbuf = mq->buffers[s0];
545                 /* populate descriptor */
546                 d0->length = rte_pktmbuf_data_room_size(mq->mempool) -
547                                 RTE_PKTMBUF_HEADROOM;
548                 d0->region = 1;
549                 d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
550                         (uint8_t *)proc_private->regions[d0->region]->addr;
551         }
552 no_free_mbufs:
553         /* The ring->head acts as a guard variable between Tx and Rx
554          * threads, so using store-release pairs with load-acquire
555          * in function eth_memif_tx.
556          */
557         __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
558
559         mq->n_pkts += n_rx_pkts;
560
561         return n_rx_pkts;
562 }
563
564 static uint16_t
565 eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
566 {
567         struct memif_queue *mq = queue;
568         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
569         struct pmd_process_private *proc_private =
570                 rte_eth_devices[mq->in_port].process_private;
571         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
572         uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0;
573         uint16_t src_len, src_off, dst_len, dst_off, cp_len, nb_segs;
574         memif_ring_type_t type = mq->type;
575         memif_desc_t *d0;
576         struct rte_mbuf *mbuf;
577         struct rte_mbuf *mbuf_head;
578         uint64_t a;
579         ssize_t size;
580         struct rte_eth_link link;
581
582         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
583                 return 0;
584         if (unlikely(ring == NULL)) {
585                 int ret;
586
587                 /* Secondary process will attempt to request regions. */
588                 ret = rte_eth_link_get(mq->in_port, &link);
589                 if (ret < 0)
590                         MIF_LOG(ERR, "Failed to get port %u link info: %s",
591                                 mq->in_port, rte_strerror(-ret));
592                 return 0;
593         }
594
595         ring_size = 1 << mq->log2_ring_size;
596         mask = ring_size - 1;
597
598         if (type == MEMIF_RING_C2S) {
599                 /* For C2S queues ring->head is updated by the sender and
600                  * this function is called in the context of sending thread.
601                  * The loads in the sender do not need to synchronize with
602                  * its own stores. Hence, the following load can be a
603                  * relaxed load.
604                  */
605                 slot = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
606                 n_free = ring_size - slot +
607                                 __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
608         } else {
609                 /* For S2C queues ring->tail is updated by the sender and
610                  * this function is called in the context of sending thread.
611                  * The loads in the sender do not need to synchronize with
612                  * its own stores. Hence, the following load can be a
613                  * relaxed load.
614                  */
615                 slot = __atomic_load_n(&ring->tail, __ATOMIC_RELAXED);
616                 n_free = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE) - slot;
617         }
618
619         while (n_tx_pkts < nb_pkts && n_free) {
620                 mbuf_head = *bufs++;
621                 nb_segs = mbuf_head->nb_segs;
622                 mbuf = mbuf_head;
623
624                 saved_slot = slot;
625                 d0 = &ring->desc[slot & mask];
626                 dst_off = 0;
627                 dst_len = (type == MEMIF_RING_C2S) ?
628                         pmd->run.pkt_buffer_size : d0->length;
629
630 next_in_chain:
631                 src_off = 0;
632                 src_len = rte_pktmbuf_data_len(mbuf);
633
634                 while (src_len) {
635                         if (dst_len == 0) {
636                                 if (n_free) {
637                                         slot++;
638                                         n_free--;
639                                         d0->flags |= MEMIF_DESC_FLAG_NEXT;
640                                         d0 = &ring->desc[slot & mask];
641                                         dst_off = 0;
642                                         dst_len = (type == MEMIF_RING_C2S) ?
643                                             pmd->run.pkt_buffer_size : d0->length;
644                                         d0->flags = 0;
645                                 } else {
646                                         slot = saved_slot;
647                                         goto no_free_slots;
648                                 }
649                         }
650                         cp_len = RTE_MIN(dst_len, src_len);
651
652                         rte_memcpy((uint8_t *)memif_get_buffer(proc_private,
653                                                                d0) + dst_off,
654                                 rte_pktmbuf_mtod_offset(mbuf, void *, src_off),
655                                 cp_len);
656
657                         mq->n_bytes += cp_len;
658                         src_off += cp_len;
659                         dst_off += cp_len;
660                         src_len -= cp_len;
661                         dst_len -= cp_len;
662
663                         d0->length = dst_off;
664                 }
665
666                 if (--nb_segs > 0) {
667                         mbuf = mbuf->next;
668                         goto next_in_chain;
669                 }
670
671                 n_tx_pkts++;
672                 slot++;
673                 n_free--;
674                 rte_pktmbuf_free(mbuf_head);
675         }
676
677 no_free_slots:
678         if (type == MEMIF_RING_C2S)
679                 __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
680         else
681                 __atomic_store_n(&ring->tail, slot, __ATOMIC_RELEASE);
682
683         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
684                 a = 1;
685                 size = write(rte_intr_fd_get(mq->intr_handle), &a,
686                              sizeof(a));
687                 if (unlikely(size < 0)) {
688                         MIF_LOG(WARNING,
689                                 "Failed to send interrupt. %s", strerror(errno));
690                 }
691         }
692
693         mq->n_pkts += n_tx_pkts;
694         return n_tx_pkts;
695 }
696
697
698 static int
699 memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq,
700                 memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask,
701                 uint16_t slot, uint16_t n_free)
702 {
703         memif_desc_t *d0;
704         uint16_t nb_segs = mbuf->nb_segs;
705         int used_slots = 1;
706
707 next_in_chain:
708         /* store pointer to mbuf to free it later */
709         mq->buffers[slot & mask] = mbuf;
710         /* Increment refcnt to make sure the buffer is not freed before server
711          * receives it. (current segment)
712          */
713         rte_mbuf_refcnt_update(mbuf, 1);
714         /* populate descriptor */
715         d0 = &ring->desc[slot & mask];
716         d0->length = rte_pktmbuf_data_len(mbuf);
717         mq->n_bytes += rte_pktmbuf_data_len(mbuf);
718         /* FIXME: get region index */
719         d0->region = 1;
720         d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
721                 (uint8_t *)proc_private->regions[d0->region]->addr;
722         d0->flags = 0;
723
724         /* check if buffer is chained */
725         if (--nb_segs > 0) {
726                 if (n_free < 2)
727                         return 0;
728                 /* mark buffer as chained */
729                 d0->flags |= MEMIF_DESC_FLAG_NEXT;
730                 /* advance mbuf */
731                 mbuf = mbuf->next;
732                 /* update counters */
733                 used_slots++;
734                 slot++;
735                 n_free--;
736                 goto next_in_chain;
737         }
738         return used_slots;
739 }
740
741 static uint16_t
742 eth_memif_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
743 {
744         struct memif_queue *mq = queue;
745         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
746         struct pmd_process_private *proc_private =
747                 rte_eth_devices[mq->in_port].process_private;
748         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
749         uint16_t slot, n_free, ring_size, mask, n_tx_pkts = 0;
750         struct rte_eth_link link;
751
752         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
753                 return 0;
754         if (unlikely(ring == NULL)) {
755                 /* Secondary process will attempt to request regions. */
756                 rte_eth_link_get(mq->in_port, &link);
757                 return 0;
758         }
759
760         ring_size = 1 << mq->log2_ring_size;
761         mask = ring_size - 1;
762
763         /* free mbufs received by server */
764         memif_free_stored_mbufs(proc_private, mq);
765
766         /* ring type always MEMIF_RING_C2S */
767         /* For C2S queues ring->head is updated by the sender and
768          * this function is called in the context of sending thread.
769          * The loads in the sender do not need to synchronize with
770          * its own stores. Hence, the following load can be a
771          * relaxed load.
772          */
773         slot = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
774         n_free = ring_size - slot + mq->last_tail;
775
776         int used_slots;
777
778         while (n_free && (n_tx_pkts < nb_pkts)) {
779                 while ((n_free > 4) && ((nb_pkts - n_tx_pkts) > 4)) {
780                         if ((nb_pkts - n_tx_pkts) > 8) {
781                                 rte_prefetch0(*bufs + 4);
782                                 rte_prefetch0(*bufs + 5);
783                                 rte_prefetch0(*bufs + 6);
784                                 rte_prefetch0(*bufs + 7);
785                         }
786                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
787                                 mask, slot, n_free);
788                         if (unlikely(used_slots < 1))
789                                 goto no_free_slots;
790                         n_tx_pkts++;
791                         slot += used_slots;
792                         n_free -= used_slots;
793
794                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
795                                 mask, slot, n_free);
796                         if (unlikely(used_slots < 1))
797                                 goto no_free_slots;
798                         n_tx_pkts++;
799                         slot += used_slots;
800                         n_free -= used_slots;
801
802                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
803                                 mask, slot, n_free);
804                         if (unlikely(used_slots < 1))
805                                 goto no_free_slots;
806                         n_tx_pkts++;
807                         slot += used_slots;
808                         n_free -= used_slots;
809
810                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
811                                 mask, slot, n_free);
812                         if (unlikely(used_slots < 1))
813                                 goto no_free_slots;
814                         n_tx_pkts++;
815                         slot += used_slots;
816                         n_free -= used_slots;
817                 }
818                 used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
819                         mask, slot, n_free);
820                 if (unlikely(used_slots < 1))
821                         goto no_free_slots;
822                 n_tx_pkts++;
823                 slot += used_slots;
824                 n_free -= used_slots;
825         }
826
827 no_free_slots:
828         /* ring type always MEMIF_RING_C2S */
829         /* The ring->head acts as a guard variable between Tx and Rx
830          * threads, so using store-release pairs with load-acquire
831          * in function eth_memif_rx for C2S rings.
832          */
833         __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
834
835         /* Send interrupt, if enabled. */
836         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
837                 uint64_t a = 1;
838                 ssize_t size = write(rte_intr_fd_get(mq->intr_handle),
839                                      &a, sizeof(a));
840                 if (unlikely(size < 0)) {
841                         MIF_LOG(WARNING,
842                                 "Failed to send interrupt. %s", strerror(errno));
843                 }
844         }
845
846         /* increment queue counters */
847         mq->n_pkts += n_tx_pkts;
848
849         return n_tx_pkts;
850 }
851
852 void
853 memif_free_regions(struct rte_eth_dev *dev)
854 {
855         struct pmd_process_private *proc_private = dev->process_private;
856         struct pmd_internals *pmd = dev->data->dev_private;
857         int i;
858         struct memif_region *r;
859
860         /* regions are allocated contiguously, so it's
861          * enough to loop until 'proc_private->regions_num'
862          */
863         for (i = 0; i < proc_private->regions_num; i++) {
864                 r = proc_private->regions[i];
865                 if (r != NULL) {
866                         /* This is memzone */
867                         if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
868                                 r->addr = NULL;
869                                 if (r->fd > 0)
870                                         close(r->fd);
871                         }
872                         if (r->addr != NULL) {
873                                 munmap(r->addr, r->region_size);
874                                 if (r->fd > 0) {
875                                         close(r->fd);
876                                         r->fd = -1;
877                                 }
878                         }
879                         rte_free(r);
880                         proc_private->regions[i] = NULL;
881                 }
882         }
883         proc_private->regions_num = 0;
884 }
885
886 static int
887 memif_region_init_zc(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
888                      void *arg)
889 {
890         struct pmd_process_private *proc_private = (struct pmd_process_private *)arg;
891         struct memif_region *r;
892
893         if (proc_private->regions_num < 1) {
894                 MIF_LOG(ERR, "Missing descriptor region");
895                 return -1;
896         }
897
898         r = proc_private->regions[proc_private->regions_num - 1];
899
900         if (r->addr != msl->base_va)
901                 r = proc_private->regions[++proc_private->regions_num - 1];
902
903         if (r == NULL) {
904                 r = rte_zmalloc("region", sizeof(struct memif_region), 0);
905                 if (r == NULL) {
906                         MIF_LOG(ERR, "Failed to alloc memif region.");
907                         return -ENOMEM;
908                 }
909
910                 r->addr = msl->base_va;
911                 r->region_size = ms->len;
912                 r->fd = rte_memseg_get_fd(ms);
913                 if (r->fd < 0)
914                         return -1;
915                 r->pkt_buffer_offset = 0;
916
917                 proc_private->regions[proc_private->regions_num - 1] = r;
918         } else {
919                 r->region_size += ms->len;
920         }
921
922         return 0;
923 }
924
925 static int
926 memif_region_init_shm(struct rte_eth_dev *dev, uint8_t has_buffers)
927 {
928         struct pmd_internals *pmd = dev->data->dev_private;
929         struct pmd_process_private *proc_private = dev->process_private;
930         char shm_name[ETH_MEMIF_SHM_NAME_SIZE];
931         int ret = 0;
932         struct memif_region *r;
933
934         if (proc_private->regions_num >= ETH_MEMIF_MAX_REGION_NUM) {
935                 MIF_LOG(ERR, "Too many regions.");
936                 return -1;
937         }
938
939         r = rte_zmalloc("region", sizeof(struct memif_region), 0);
940         if (r == NULL) {
941                 MIF_LOG(ERR, "Failed to alloc memif region.");
942                 return -ENOMEM;
943         }
944
945         /* calculate buffer offset */
946         r->pkt_buffer_offset = (pmd->run.num_c2s_rings + pmd->run.num_s2c_rings) *
947             (sizeof(memif_ring_t) + sizeof(memif_desc_t) *
948             (1 << pmd->run.log2_ring_size));
949
950         r->region_size = r->pkt_buffer_offset;
951         /* if region has buffers, add buffers size to region_size */
952         if (has_buffers == 1)
953                 r->region_size += (uint32_t)(pmd->run.pkt_buffer_size *
954                         (1 << pmd->run.log2_ring_size) *
955                         (pmd->run.num_c2s_rings +
956                          pmd->run.num_s2c_rings));
957
958         memset(shm_name, 0, sizeof(char) * ETH_MEMIF_SHM_NAME_SIZE);
959         snprintf(shm_name, ETH_MEMIF_SHM_NAME_SIZE, "memif_region_%d",
960                  proc_private->regions_num);
961
962         r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING);
963         if (r->fd < 0) {
964                 MIF_LOG(ERR, "Failed to create shm file: %s.", strerror(errno));
965                 ret = -1;
966                 goto error;
967         }
968
969         ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK);
970         if (ret < 0) {
971                 MIF_LOG(ERR, "Failed to add seals to shm file: %s.", strerror(errno));
972                 goto error;
973         }
974
975         ret = ftruncate(r->fd, r->region_size);
976         if (ret < 0) {
977                 MIF_LOG(ERR, "Failed to truncate shm file: %s.", strerror(errno));
978                 goto error;
979         }
980
981         r->addr = mmap(NULL, r->region_size, PROT_READ |
982                        PROT_WRITE, MAP_SHARED, r->fd, 0);
983         if (r->addr == MAP_FAILED) {
984                 MIF_LOG(ERR, "Failed to mmap shm region: %s.", strerror(ret));
985                 ret = -1;
986                 goto error;
987         }
988
989         proc_private->regions[proc_private->regions_num] = r;
990         proc_private->regions_num++;
991
992         return ret;
993
994 error:
995         if (r->fd > 0)
996                 close(r->fd);
997         r->fd = -1;
998
999         return ret;
1000 }
1001
1002 static int
1003 memif_regions_init(struct rte_eth_dev *dev)
1004 {
1005         struct pmd_internals *pmd = dev->data->dev_private;
1006         int ret;
1007
1008         /*
1009          * Zero-copy exposes dpdk memory.
1010          * Each memseg list will be represented by memif region.
1011          * Zero-copy regions indexing: memseg list idx + 1,
1012          * as we already have region 0 reserved for descriptors.
1013          */
1014         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1015                 /* create region idx 0 containing descriptors */
1016                 ret = memif_region_init_shm(dev, 0);
1017                 if (ret < 0)
1018                         return ret;
1019                 ret = rte_memseg_walk(memif_region_init_zc, (void *)dev->process_private);
1020                 if (ret < 0)
1021                         return ret;
1022         } else {
1023                 /* create one memory region contaning rings and buffers */
1024                 ret = memif_region_init_shm(dev, /* has buffers */ 1);
1025                 if (ret < 0)
1026                         return ret;
1027         }
1028
1029         return 0;
1030 }
1031
1032 static void
1033 memif_init_rings(struct rte_eth_dev *dev)
1034 {
1035         struct pmd_internals *pmd = dev->data->dev_private;
1036         struct pmd_process_private *proc_private = dev->process_private;
1037         memif_ring_t *ring;
1038         int i, j;
1039         uint16_t slot;
1040
1041         for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1042                 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_C2S, i);
1043                 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1044                 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1045                 ring->cookie = MEMIF_COOKIE;
1046                 ring->flags = 0;
1047
1048                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1049                         continue;
1050
1051                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1052                         slot = i * (1 << pmd->run.log2_ring_size) + j;
1053                         ring->desc[j].region = 0;
1054                         ring->desc[j].offset =
1055                                 proc_private->regions[0]->pkt_buffer_offset +
1056                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
1057                         ring->desc[j].length = pmd->run.pkt_buffer_size;
1058                 }
1059         }
1060
1061         for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1062                 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_S2C, i);
1063                 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1064                 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1065                 ring->cookie = MEMIF_COOKIE;
1066                 ring->flags = 0;
1067
1068                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1069                         continue;
1070
1071                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1072                         slot = (i + pmd->run.num_c2s_rings) *
1073                             (1 << pmd->run.log2_ring_size) + j;
1074                         ring->desc[j].region = 0;
1075                         ring->desc[j].offset =
1076                                 proc_private->regions[0]->pkt_buffer_offset +
1077                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
1078                         ring->desc[j].length = pmd->run.pkt_buffer_size;
1079                 }
1080         }
1081 }
1082
1083 /* called only by client */
1084 static int
1085 memif_init_queues(struct rte_eth_dev *dev)
1086 {
1087         struct pmd_internals *pmd = dev->data->dev_private;
1088         struct memif_queue *mq;
1089         int i;
1090
1091         for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1092                 mq = dev->data->tx_queues[i];
1093                 mq->log2_ring_size = pmd->run.log2_ring_size;
1094                 /* queues located only in region 0 */
1095                 mq->region = 0;
1096                 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_C2S, i);
1097                 mq->last_head = 0;
1098                 mq->last_tail = 0;
1099                 if (rte_intr_fd_set(mq->intr_handle, eventfd(0, EFD_NONBLOCK)))
1100                         return -rte_errno;
1101
1102                 if (rte_intr_fd_get(mq->intr_handle) < 0) {
1103                         MIF_LOG(WARNING,
1104                                 "Failed to create eventfd for tx queue %d: %s.", i,
1105                                 strerror(errno));
1106                 }
1107                 mq->buffers = NULL;
1108                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1109                         mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1110                                                   (1 << mq->log2_ring_size), 0);
1111                         if (mq->buffers == NULL)
1112                                 return -ENOMEM;
1113                 }
1114         }
1115
1116         for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1117                 mq = dev->data->rx_queues[i];
1118                 mq->log2_ring_size = pmd->run.log2_ring_size;
1119                 /* queues located only in region 0 */
1120                 mq->region = 0;
1121                 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_S2C, i);
1122                 mq->last_head = 0;
1123                 mq->last_tail = 0;
1124                 if (rte_intr_fd_set(mq->intr_handle, eventfd(0, EFD_NONBLOCK)))
1125                         return -rte_errno;
1126                 if (rte_intr_fd_get(mq->intr_handle) < 0) {
1127                         MIF_LOG(WARNING,
1128                                 "Failed to create eventfd for rx queue %d: %s.", i,
1129                                 strerror(errno));
1130                 }
1131                 mq->buffers = NULL;
1132                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1133                         mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1134                                                   (1 << mq->log2_ring_size), 0);
1135                         if (mq->buffers == NULL)
1136                                 return -ENOMEM;
1137                 }
1138         }
1139         return 0;
1140 }
1141
1142 int
1143 memif_init_regions_and_queues(struct rte_eth_dev *dev)
1144 {
1145         int ret;
1146
1147         ret = memif_regions_init(dev);
1148         if (ret < 0)
1149                 return ret;
1150
1151         memif_init_rings(dev);
1152
1153         ret = memif_init_queues(dev);
1154         if (ret < 0)
1155                 return ret;
1156
1157         return 0;
1158 }
1159
1160 int
1161 memif_connect(struct rte_eth_dev *dev)
1162 {
1163         struct pmd_internals *pmd = dev->data->dev_private;
1164         struct pmd_process_private *proc_private = dev->process_private;
1165         struct memif_region *mr;
1166         struct memif_queue *mq;
1167         memif_ring_t *ring;
1168         int i;
1169
1170         for (i = 0; i < proc_private->regions_num; i++) {
1171                 mr = proc_private->regions[i];
1172                 if (mr != NULL) {
1173                         if (mr->addr == NULL) {
1174                                 if (mr->fd < 0)
1175                                         return -1;
1176                                 mr->addr = mmap(NULL, mr->region_size,
1177                                                 PROT_READ | PROT_WRITE,
1178                                                 MAP_SHARED, mr->fd, 0);
1179                                 if (mr->addr == MAP_FAILED) {
1180                                         MIF_LOG(ERR, "mmap failed: %s\n",
1181                                                 strerror(errno));
1182                                         return -1;
1183                                 }
1184                         }
1185                         if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
1186                                 /* close memseg file */
1187                                 close(mr->fd);
1188                                 mr->fd = -1;
1189                         }
1190                 }
1191         }
1192
1193         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1194                 for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1195                         mq = (pmd->role == MEMIF_ROLE_CLIENT) ?
1196                             dev->data->tx_queues[i] : dev->data->rx_queues[i];
1197                         ring = memif_get_ring_from_queue(proc_private, mq);
1198                         if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1199                                 MIF_LOG(ERR, "Wrong ring");
1200                                 return -1;
1201                         }
1202                         __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1203                         __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1204                         mq->last_head = 0;
1205                         mq->last_tail = 0;
1206                         /* enable polling mode */
1207                         if (pmd->role == MEMIF_ROLE_SERVER)
1208                                 ring->flags = MEMIF_RING_FLAG_MASK_INT;
1209                 }
1210                 for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1211                         mq = (pmd->role == MEMIF_ROLE_CLIENT) ?
1212                             dev->data->rx_queues[i] : dev->data->tx_queues[i];
1213                         ring = memif_get_ring_from_queue(proc_private, mq);
1214                         if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1215                                 MIF_LOG(ERR, "Wrong ring");
1216                                 return -1;
1217                         }
1218                         __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1219                         __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1220                         mq->last_head = 0;
1221                         mq->last_tail = 0;
1222                         /* enable polling mode */
1223                         if (pmd->role == MEMIF_ROLE_CLIENT)
1224                                 ring->flags = MEMIF_RING_FLAG_MASK_INT;
1225                 }
1226
1227                 pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
1228                 pmd->flags |= ETH_MEMIF_FLAG_CONNECTED;
1229                 dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
1230         }
1231         MIF_LOG(INFO, "Connected.");
1232         return 0;
1233 }
1234
1235 static int
1236 memif_dev_start(struct rte_eth_dev *dev)
1237 {
1238         struct pmd_internals *pmd = dev->data->dev_private;
1239         int ret = 0;
1240
1241         switch (pmd->role) {
1242         case MEMIF_ROLE_CLIENT:
1243                 ret = memif_connect_client(dev);
1244                 break;
1245         case MEMIF_ROLE_SERVER:
1246                 ret = memif_connect_server(dev);
1247                 break;
1248         default:
1249                 MIF_LOG(ERR, "Unknown role: %d.", pmd->role);
1250                 ret = -1;
1251                 break;
1252         }
1253
1254         return ret;
1255 }
1256
1257 static int
1258 memif_dev_close(struct rte_eth_dev *dev)
1259 {
1260         struct pmd_internals *pmd = dev->data->dev_private;
1261         int i;
1262
1263         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1264                 memif_msg_enq_disconnect(pmd->cc, "Device closed", 0);
1265                 memif_disconnect(dev);
1266
1267                 for (i = 0; i < dev->data->nb_rx_queues; i++)
1268                         (*dev->dev_ops->rx_queue_release)(dev, i);
1269                 for (i = 0; i < dev->data->nb_tx_queues; i++)
1270                         (*dev->dev_ops->tx_queue_release)(dev, i);
1271
1272                 memif_socket_remove_device(dev);
1273         } else {
1274                 memif_disconnect(dev);
1275         }
1276
1277         rte_free(dev->process_private);
1278
1279         return 0;
1280 }
1281
1282 static int
1283 memif_dev_configure(struct rte_eth_dev *dev)
1284 {
1285         struct pmd_internals *pmd = dev->data->dev_private;
1286
1287         /*
1288          * CLIENT - TXQ
1289          * SERVER - RXQ
1290          */
1291         pmd->cfg.num_c2s_rings = (pmd->role == MEMIF_ROLE_CLIENT) ?
1292                                   dev->data->nb_tx_queues : dev->data->nb_rx_queues;
1293
1294         /*
1295          * CLIENT - RXQ
1296          * SERVER - TXQ
1297          */
1298         pmd->cfg.num_s2c_rings = (pmd->role == MEMIF_ROLE_CLIENT) ?
1299                                   dev->data->nb_rx_queues : dev->data->nb_tx_queues;
1300
1301         return 0;
1302 }
1303
1304 static int
1305 memif_tx_queue_setup(struct rte_eth_dev *dev,
1306                      uint16_t qid,
1307                      uint16_t nb_tx_desc __rte_unused,
1308                      unsigned int socket_id __rte_unused,
1309                      const struct rte_eth_txconf *tx_conf __rte_unused)
1310 {
1311         struct pmd_internals *pmd = dev->data->dev_private;
1312         struct memif_queue *mq;
1313
1314         mq = rte_zmalloc("tx-queue", sizeof(struct memif_queue), 0);
1315         if (mq == NULL) {
1316                 MIF_LOG(ERR, "Failed to allocate tx queue id: %u", qid);
1317                 return -ENOMEM;
1318         }
1319
1320         /* Allocate interrupt instance */
1321         mq->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
1322         if (mq->intr_handle == NULL) {
1323                 MIF_LOG(ERR, "Failed to allocate intr handle");
1324                 return -ENOMEM;
1325         }
1326
1327         mq->type =
1328             (pmd->role == MEMIF_ROLE_CLIENT) ? MEMIF_RING_C2S : MEMIF_RING_S2C;
1329         mq->n_pkts = 0;
1330         mq->n_bytes = 0;
1331
1332         if (rte_intr_fd_set(mq->intr_handle, -1))
1333                 return -rte_errno;
1334
1335         if (rte_intr_type_set(mq->intr_handle, RTE_INTR_HANDLE_EXT))
1336                 return -rte_errno;
1337
1338         mq->in_port = dev->data->port_id;
1339         dev->data->tx_queues[qid] = mq;
1340
1341         return 0;
1342 }
1343
1344 static int
1345 memif_rx_queue_setup(struct rte_eth_dev *dev,
1346                      uint16_t qid,
1347                      uint16_t nb_rx_desc __rte_unused,
1348                      unsigned int socket_id __rte_unused,
1349                      const struct rte_eth_rxconf *rx_conf __rte_unused,
1350                      struct rte_mempool *mb_pool)
1351 {
1352         struct pmd_internals *pmd = dev->data->dev_private;
1353         struct memif_queue *mq;
1354
1355         mq = rte_zmalloc("rx-queue", sizeof(struct memif_queue), 0);
1356         if (mq == NULL) {
1357                 MIF_LOG(ERR, "Failed to allocate rx queue id: %u", qid);
1358                 return -ENOMEM;
1359         }
1360
1361         /* Allocate interrupt instance */
1362         mq->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
1363         if (mq->intr_handle == NULL) {
1364                 MIF_LOG(ERR, "Failed to allocate intr handle");
1365                 return -ENOMEM;
1366         }
1367
1368         mq->type = (pmd->role == MEMIF_ROLE_CLIENT) ? MEMIF_RING_S2C : MEMIF_RING_C2S;
1369         mq->n_pkts = 0;
1370         mq->n_bytes = 0;
1371
1372         if (rte_intr_fd_set(mq->intr_handle, -1))
1373                 return -rte_errno;
1374
1375         if (rte_intr_type_set(mq->intr_handle, RTE_INTR_HANDLE_EXT))
1376                 return -rte_errno;
1377
1378         mq->mempool = mb_pool;
1379         mq->in_port = dev->data->port_id;
1380         dev->data->rx_queues[qid] = mq;
1381
1382         return 0;
1383 }
1384
1385 static void
1386 memif_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
1387 {
1388         struct memif_queue *mq = dev->data->rx_queues[qid];
1389
1390         if (!mq)
1391                 return;
1392
1393         rte_intr_instance_free(mq->intr_handle);
1394         rte_free(mq);
1395 }
1396
1397 static void
1398 memif_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
1399 {
1400         struct memif_queue *mq = dev->data->tx_queues[qid];
1401
1402         if (!mq)
1403                 return;
1404
1405         rte_free(mq);
1406 }
1407
1408 static int
1409 memif_link_update(struct rte_eth_dev *dev,
1410                   int wait_to_complete __rte_unused)
1411 {
1412         struct pmd_process_private *proc_private;
1413
1414         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1415                 proc_private = dev->process_private;
1416                 if (dev->data->dev_link.link_status == RTE_ETH_LINK_UP &&
1417                                 proc_private->regions_num == 0) {
1418                         memif_mp_request_regions(dev);
1419                 } else if (dev->data->dev_link.link_status == RTE_ETH_LINK_DOWN &&
1420                                 proc_private->regions_num > 0) {
1421                         memif_free_regions(dev);
1422                 }
1423         }
1424         return 0;
1425 }
1426
1427 static int
1428 memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1429 {
1430         struct pmd_internals *pmd = dev->data->dev_private;
1431         struct memif_queue *mq;
1432         int i;
1433         uint8_t tmp, nq;
1434
1435         stats->ipackets = 0;
1436         stats->ibytes = 0;
1437         stats->opackets = 0;
1438         stats->obytes = 0;
1439
1440         tmp = (pmd->role == MEMIF_ROLE_CLIENT) ? pmd->run.num_c2s_rings :
1441             pmd->run.num_s2c_rings;
1442         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1443             RTE_ETHDEV_QUEUE_STAT_CNTRS;
1444
1445         /* RX stats */
1446         for (i = 0; i < nq; i++) {
1447                 mq = dev->data->rx_queues[i];
1448                 stats->q_ipackets[i] = mq->n_pkts;
1449                 stats->q_ibytes[i] = mq->n_bytes;
1450                 stats->ipackets += mq->n_pkts;
1451                 stats->ibytes += mq->n_bytes;
1452         }
1453
1454         tmp = (pmd->role == MEMIF_ROLE_CLIENT) ? pmd->run.num_s2c_rings :
1455             pmd->run.num_c2s_rings;
1456         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1457             RTE_ETHDEV_QUEUE_STAT_CNTRS;
1458
1459         /* TX stats */
1460         for (i = 0; i < nq; i++) {
1461                 mq = dev->data->tx_queues[i];
1462                 stats->q_opackets[i] = mq->n_pkts;
1463                 stats->q_obytes[i] = mq->n_bytes;
1464                 stats->opackets += mq->n_pkts;
1465                 stats->obytes += mq->n_bytes;
1466         }
1467         return 0;
1468 }
1469
1470 static int
1471 memif_stats_reset(struct rte_eth_dev *dev)
1472 {
1473         struct pmd_internals *pmd = dev->data->dev_private;
1474         int i;
1475         struct memif_queue *mq;
1476
1477         for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1478                 mq = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->tx_queues[i] :
1479                     dev->data->rx_queues[i];
1480                 mq->n_pkts = 0;
1481                 mq->n_bytes = 0;
1482         }
1483         for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1484                 mq = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->rx_queues[i] :
1485                     dev->data->tx_queues[i];
1486                 mq->n_pkts = 0;
1487                 mq->n_bytes = 0;
1488         }
1489
1490         return 0;
1491 }
1492
1493 static int
1494 memif_rx_queue_intr_enable(struct rte_eth_dev *dev __rte_unused,
1495                            uint16_t qid __rte_unused)
1496 {
1497         MIF_LOG(WARNING, "Interrupt mode not supported.");
1498
1499         return -1;
1500 }
1501
1502 static int
1503 memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
1504 {
1505         struct pmd_internals *pmd __rte_unused = dev->data->dev_private;
1506
1507         return 0;
1508 }
1509
1510 static const struct eth_dev_ops ops = {
1511         .dev_start = memif_dev_start,
1512         .dev_close = memif_dev_close,
1513         .dev_infos_get = memif_dev_info,
1514         .dev_configure = memif_dev_configure,
1515         .tx_queue_setup = memif_tx_queue_setup,
1516         .rx_queue_setup = memif_rx_queue_setup,
1517         .rx_queue_release = memif_rx_queue_release,
1518         .tx_queue_release = memif_tx_queue_release,
1519         .rx_queue_intr_enable = memif_rx_queue_intr_enable,
1520         .rx_queue_intr_disable = memif_rx_queue_intr_disable,
1521         .link_update = memif_link_update,
1522         .stats_get = memif_stats_get,
1523         .stats_reset = memif_stats_reset,
1524 };
1525
1526 static int
1527 memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
1528              memif_interface_id_t id, uint32_t flags,
1529              const char *socket_filename,
1530              memif_log2_ring_size_t log2_ring_size,
1531              uint16_t pkt_buffer_size, const char *secret,
1532              struct rte_ether_addr *ether_addr)
1533 {
1534         int ret = 0;
1535         struct rte_eth_dev *eth_dev;
1536         struct rte_eth_dev_data *data;
1537         struct pmd_internals *pmd;
1538         struct pmd_process_private *process_private;
1539         const unsigned int numa_node = vdev->device.numa_node;
1540         const char *name = rte_vdev_device_name(vdev);
1541
1542         eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
1543         if (eth_dev == NULL) {
1544                 MIF_LOG(ERR, "%s: Unable to allocate device struct.", name);
1545                 return -1;
1546         }
1547
1548         process_private = (struct pmd_process_private *)
1549                 rte_zmalloc(name, sizeof(struct pmd_process_private),
1550                             RTE_CACHE_LINE_SIZE);
1551
1552         if (process_private == NULL) {
1553                 MIF_LOG(ERR, "Failed to alloc memory for process private");
1554                 return -1;
1555         }
1556         eth_dev->process_private = process_private;
1557
1558         pmd = eth_dev->data->dev_private;
1559         memset(pmd, 0, sizeof(*pmd));
1560
1561         pmd->id = id;
1562         pmd->flags = flags;
1563         pmd->flags |= ETH_MEMIF_FLAG_DISABLED;
1564         pmd->role = role;
1565         /* Zero-copy flag irelevant to server. */
1566         if (pmd->role == MEMIF_ROLE_SERVER)
1567                 pmd->flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1568
1569         ret = memif_socket_init(eth_dev, socket_filename);
1570         if (ret < 0)
1571                 return ret;
1572
1573         memset(pmd->secret, 0, sizeof(char) * ETH_MEMIF_SECRET_SIZE);
1574         if (secret != NULL)
1575                 strlcpy(pmd->secret, secret, sizeof(pmd->secret));
1576
1577         pmd->cfg.log2_ring_size = log2_ring_size;
1578         /* set in .dev_configure() */
1579         pmd->cfg.num_c2s_rings = 0;
1580         pmd->cfg.num_s2c_rings = 0;
1581
1582         pmd->cfg.pkt_buffer_size = pkt_buffer_size;
1583         rte_spinlock_init(&pmd->cc_lock);
1584
1585         data = eth_dev->data;
1586         data->dev_private = pmd;
1587         data->numa_node = numa_node;
1588         data->dev_link = pmd_link;
1589         data->mac_addrs = ether_addr;
1590         data->promiscuous = 1;
1591         data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1592
1593         eth_dev->dev_ops = &ops;
1594         eth_dev->device = &vdev->device;
1595         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1596                 eth_dev->rx_pkt_burst = eth_memif_rx_zc;
1597                 eth_dev->tx_pkt_burst = eth_memif_tx_zc;
1598         } else {
1599                 eth_dev->rx_pkt_burst = eth_memif_rx;
1600                 eth_dev->tx_pkt_burst = eth_memif_tx;
1601         }
1602
1603         rte_eth_dev_probing_finish(eth_dev);
1604
1605         return 0;
1606 }
1607
1608 static int
1609 memif_set_role(const char *key __rte_unused, const char *value,
1610                void *extra_args)
1611 {
1612         enum memif_role_t *role = (enum memif_role_t *)extra_args;
1613
1614         if (strstr(value, "server") != NULL) {
1615                 *role = MEMIF_ROLE_SERVER;
1616         } else if (strstr(value, "client") != NULL) {
1617                 *role = MEMIF_ROLE_CLIENT;
1618         } else if (strstr(value, "master") != NULL) {
1619                 MIF_LOG(NOTICE, "Role argument \"master\" is deprecated, use \"server\"");
1620                 *role = MEMIF_ROLE_SERVER;
1621         } else if (strstr(value, "slave") != NULL) {
1622                 MIF_LOG(NOTICE, "Role argument \"slave\" is deprecated, use \"client\"");
1623                 *role = MEMIF_ROLE_CLIENT;
1624         } else {
1625                 MIF_LOG(ERR, "Unknown role: %s.", value);
1626                 return -EINVAL;
1627         }
1628         return 0;
1629 }
1630
1631 static int
1632 memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args)
1633 {
1634         uint32_t *flags = (uint32_t *)extra_args;
1635
1636         if (strstr(value, "yes") != NULL) {
1637                 if (!rte_mcfg_get_single_file_segments()) {
1638                         MIF_LOG(ERR, "Zero-copy doesn't support multi-file segments.");
1639                         return -ENOTSUP;
1640                 }
1641                 *flags |= ETH_MEMIF_FLAG_ZERO_COPY;
1642         } else if (strstr(value, "no") != NULL) {
1643                 *flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1644         } else {
1645                 MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value);
1646                 return -EINVAL;
1647         }
1648         return 0;
1649 }
1650
1651 static int
1652 memif_set_id(const char *key __rte_unused, const char *value, void *extra_args)
1653 {
1654         memif_interface_id_t *id = (memif_interface_id_t *)extra_args;
1655
1656         /* even if parsing fails, 0 is a valid id */
1657         *id = strtoul(value, NULL, 10);
1658         return 0;
1659 }
1660
1661 static int
1662 memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args)
1663 {
1664         unsigned long tmp;
1665         uint16_t *pkt_buffer_size = (uint16_t *)extra_args;
1666
1667         tmp = strtoul(value, NULL, 10);
1668         if (tmp == 0 || tmp > 0xFFFF) {
1669                 MIF_LOG(ERR, "Invalid buffer size: %s.", value);
1670                 return -EINVAL;
1671         }
1672         *pkt_buffer_size = tmp;
1673         return 0;
1674 }
1675
1676 static int
1677 memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args)
1678 {
1679         unsigned long tmp;
1680         memif_log2_ring_size_t *log2_ring_size =
1681             (memif_log2_ring_size_t *)extra_args;
1682
1683         tmp = strtoul(value, NULL, 10);
1684         if (tmp == 0 || tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE) {
1685                 MIF_LOG(ERR, "Invalid ring size: %s (max %u).",
1686                         value, ETH_MEMIF_MAX_LOG2_RING_SIZE);
1687                 return -EINVAL;
1688         }
1689         *log2_ring_size = tmp;
1690         return 0;
1691 }
1692
1693 /* check if directory exists and if we have permission to read/write */
1694 static int
1695 memif_check_socket_filename(const char *filename)
1696 {
1697         char *dir = NULL, *tmp;
1698         uint32_t idx;
1699         int ret = 0;
1700
1701         if (strlen(filename) >= MEMIF_SOCKET_UN_SIZE) {
1702                 MIF_LOG(ERR, "Unix socket address too long (max 108).");
1703                 return -1;
1704         }
1705
1706         tmp = strrchr(filename, '/');
1707         if (tmp != NULL) {
1708                 idx = tmp - filename;
1709                 dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 1), 0);
1710                 if (dir == NULL) {
1711                         MIF_LOG(ERR, "Failed to allocate memory.");
1712                         return -1;
1713                 }
1714                 strlcpy(dir, filename, sizeof(char) * (idx + 1));
1715         }
1716
1717         if (dir == NULL || (faccessat(-1, dir, F_OK | R_OK |
1718                                         W_OK, AT_EACCESS) < 0)) {
1719                 MIF_LOG(ERR, "Invalid socket directory.");
1720                 ret = -EINVAL;
1721         }
1722
1723         if (dir != NULL)
1724                 rte_free(dir);
1725
1726         return ret;
1727 }
1728
1729 static int
1730 memif_set_socket_filename(const char *key __rte_unused, const char *value,
1731                           void *extra_args)
1732 {
1733         const char **socket_filename = (const char **)extra_args;
1734
1735         *socket_filename = value;
1736         return 0;
1737 }
1738
1739 static int
1740 memif_set_is_socket_abstract(const char *key __rte_unused, const char *value, void *extra_args)
1741 {
1742         uint32_t *flags = (uint32_t *)extra_args;
1743
1744         if (strstr(value, "yes") != NULL) {
1745                 *flags |= ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1746         } else if (strstr(value, "no") != NULL) {
1747                 *flags &= ~ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1748         } else {
1749                 MIF_LOG(ERR, "Failed to parse socket-abstract param: %s.", value);
1750                 return -EINVAL;
1751         }
1752         return 0;
1753 }
1754
1755 static int
1756 memif_set_mac(const char *key __rte_unused, const char *value, void *extra_args)
1757 {
1758         struct rte_ether_addr *ether_addr = (struct rte_ether_addr *)extra_args;
1759
1760         if (rte_ether_unformat_addr(value, ether_addr) < 0)
1761                 MIF_LOG(WARNING, "Failed to parse mac '%s'.", value);
1762         return 0;
1763 }
1764
1765 static int
1766 memif_set_secret(const char *key __rte_unused, const char *value, void *extra_args)
1767 {
1768         const char **secret = (const char **)extra_args;
1769
1770         *secret = value;
1771         return 0;
1772 }
1773
1774 static int
1775 rte_pmd_memif_probe(struct rte_vdev_device *vdev)
1776 {
1777         RTE_BUILD_BUG_ON(sizeof(memif_msg_t) != 128);
1778         RTE_BUILD_BUG_ON(sizeof(memif_desc_t) != 16);
1779         int ret = 0;
1780         struct rte_kvargs *kvlist;
1781         const char *name = rte_vdev_device_name(vdev);
1782         enum memif_role_t role = MEMIF_ROLE_CLIENT;
1783         memif_interface_id_t id = 0;
1784         uint16_t pkt_buffer_size = ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE;
1785         memif_log2_ring_size_t log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE;
1786         const char *socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME;
1787         uint32_t flags = 0;
1788         const char *secret = NULL;
1789         struct rte_ether_addr *ether_addr = rte_zmalloc("",
1790                 sizeof(struct rte_ether_addr), 0);
1791         struct rte_eth_dev *eth_dev;
1792
1793         rte_eth_random_addr(ether_addr->addr_bytes);
1794
1795         MIF_LOG(INFO, "Initialize MEMIF: %s.", name);
1796
1797         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1798                 eth_dev = rte_eth_dev_attach_secondary(name);
1799                 if (!eth_dev) {
1800                         MIF_LOG(ERR, "Failed to probe %s", name);
1801                         return -1;
1802                 }
1803
1804                 eth_dev->dev_ops = &ops;
1805                 eth_dev->device = &vdev->device;
1806                 eth_dev->rx_pkt_burst = eth_memif_rx;
1807                 eth_dev->tx_pkt_burst = eth_memif_tx;
1808
1809                 if (!rte_eal_primary_proc_alive(NULL)) {
1810                         MIF_LOG(ERR, "Primary process is missing");
1811                         return -1;
1812                 }
1813
1814                 eth_dev->process_private = (struct pmd_process_private *)
1815                         rte_zmalloc(name,
1816                                 sizeof(struct pmd_process_private),
1817                                 RTE_CACHE_LINE_SIZE);
1818                 if (eth_dev->process_private == NULL) {
1819                         MIF_LOG(ERR,
1820                                 "Failed to alloc memory for process private");
1821                         return -1;
1822                 }
1823
1824                 rte_eth_dev_probing_finish(eth_dev);
1825
1826                 return 0;
1827         }
1828
1829         ret = rte_mp_action_register(MEMIF_MP_SEND_REGION, memif_mp_send_region);
1830         /*
1831          * Primary process can continue probing, but secondary process won't
1832          * be able to get memory regions information
1833          */
1834         if (ret < 0 && rte_errno != EEXIST)
1835                 MIF_LOG(WARNING, "Failed to register mp action callback: %s",
1836                         strerror(rte_errno));
1837
1838         /* use abstract address by default */
1839         flags |= ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1840
1841         kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments);
1842
1843         /* parse parameters */
1844         if (kvlist != NULL) {
1845                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG,
1846                                          &memif_set_role, &role);
1847                 if (ret < 0)
1848                         goto exit;
1849                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG,
1850                                          &memif_set_id, &id);
1851                 if (ret < 0)
1852                         goto exit;
1853                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
1854                                          &memif_set_bs, &pkt_buffer_size);
1855                 if (ret < 0)
1856                         goto exit;
1857                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG,
1858                                          &memif_set_rs, &log2_ring_size);
1859                 if (ret < 0)
1860                         goto exit;
1861                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ARG,
1862                                          &memif_set_socket_filename,
1863                                          (void *)(&socket_filename));
1864                 if (ret < 0)
1865                         goto exit;
1866                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ABSTRACT_ARG,
1867                                          &memif_set_is_socket_abstract, &flags);
1868                 if (ret < 0)
1869                         goto exit;
1870                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_MAC_ARG,
1871                                          &memif_set_mac, ether_addr);
1872                 if (ret < 0)
1873                         goto exit;
1874                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG,
1875                                          &memif_set_zc, &flags);
1876                 if (ret < 0)
1877                         goto exit;
1878                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SECRET_ARG,
1879                                          &memif_set_secret, (void *)(&secret));
1880                 if (ret < 0)
1881                         goto exit;
1882         }
1883
1884         if (!(flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT)) {
1885                 ret = memif_check_socket_filename(socket_filename);
1886                 if (ret < 0)
1887                         goto exit;
1888         }
1889
1890         /* create interface */
1891         ret = memif_create(vdev, role, id, flags, socket_filename,
1892                            log2_ring_size, pkt_buffer_size, secret, ether_addr);
1893
1894 exit:
1895         if (kvlist != NULL)
1896                 rte_kvargs_free(kvlist);
1897         return ret;
1898 }
1899
1900 static int
1901 rte_pmd_memif_remove(struct rte_vdev_device *vdev)
1902 {
1903         struct rte_eth_dev *eth_dev;
1904
1905         eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
1906         if (eth_dev == NULL)
1907                 return 0;
1908
1909         return rte_eth_dev_close(eth_dev->data->port_id);
1910 }
1911
1912 static struct rte_vdev_driver pmd_memif_drv = {
1913         .probe = rte_pmd_memif_probe,
1914         .remove = rte_pmd_memif_remove,
1915 };
1916
1917 RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv);
1918
1919 RTE_PMD_REGISTER_PARAM_STRING(net_memif,
1920                               ETH_MEMIF_ID_ARG "=<int>"
1921                               ETH_MEMIF_ROLE_ARG "=server|client"
1922                               ETH_MEMIF_PKT_BUFFER_SIZE_ARG "=<int>"
1923                               ETH_MEMIF_RING_SIZE_ARG "=<int>"
1924                               ETH_MEMIF_SOCKET_ARG "=<string>"
1925                                   ETH_MEMIF_SOCKET_ABSTRACT_ARG "=yes|no"
1926                               ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx"
1927                               ETH_MEMIF_ZC_ARG "=yes|no"
1928                               ETH_MEMIF_SECRET_ARG "=<string>");
1929
1930 RTE_LOG_REGISTER_DEFAULT(memif_logtype, NOTICE);