b72e24932eb415e533f7b2af614d743eda48bd3f
[dpdk.git] / drivers / net / memif / rte_eth_memif.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
3  */
4
5 #include <stdlib.h>
6 #include <fcntl.h>
7 #include <unistd.h>
8 #include <sys/types.h>
9 #include <sys/socket.h>
10 #include <sys/un.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13 #include <linux/if_ether.h>
14 #include <errno.h>
15 #include <sys/eventfd.h>
16
17 #include <rte_version.h>
18 #include <rte_mbuf.h>
19 #include <rte_ether.h>
20 #include <rte_ethdev_driver.h>
21 #include <rte_ethdev_vdev.h>
22 #include <rte_malloc.h>
23 #include <rte_kvargs.h>
24 #include <rte_bus_vdev.h>
25 #include <rte_string_fns.h>
26 #include <rte_errno.h>
27 #include <rte_memory.h>
28 #include <rte_memzone.h>
29 #include <rte_eal_memconfig.h>
30
31 #include "rte_eth_memif.h"
32 #include "memif_socket.h"
33
34 #define ETH_MEMIF_ID_ARG                "id"
35 #define ETH_MEMIF_ROLE_ARG              "role"
36 #define ETH_MEMIF_PKT_BUFFER_SIZE_ARG   "bsize"
37 #define ETH_MEMIF_RING_SIZE_ARG         "rsize"
38 #define ETH_MEMIF_SOCKET_ARG            "socket"
39 #define ETH_MEMIF_MAC_ARG               "mac"
40 #define ETH_MEMIF_ZC_ARG                "zero-copy"
41 #define ETH_MEMIF_SECRET_ARG            "secret"
42
43 static const char * const valid_arguments[] = {
44         ETH_MEMIF_ID_ARG,
45         ETH_MEMIF_ROLE_ARG,
46         ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
47         ETH_MEMIF_RING_SIZE_ARG,
48         ETH_MEMIF_SOCKET_ARG,
49         ETH_MEMIF_MAC_ARG,
50         ETH_MEMIF_ZC_ARG,
51         ETH_MEMIF_SECRET_ARG,
52         NULL
53 };
54
55 static const struct rte_eth_link pmd_link = {
56         .link_speed = ETH_SPEED_NUM_10G,
57         .link_duplex = ETH_LINK_FULL_DUPLEX,
58         .link_status = ETH_LINK_DOWN,
59         .link_autoneg = ETH_LINK_AUTONEG
60 };
61
62 #define MEMIF_MP_SEND_REGION            "memif_mp_send_region"
63
64
65 static int memif_region_init_zc(const struct rte_memseg_list *msl,
66                                 const struct rte_memseg *ms, void *arg);
67
68 const char *
69 memif_version(void)
70 {
71         return ("memif-" RTE_STR(MEMIF_VERSION_MAJOR) "." RTE_STR(MEMIF_VERSION_MINOR));
72 }
73
74 /* Message header to synchronize regions */
75 struct mp_region_msg {
76         char port_name[RTE_DEV_NAME_MAX_LEN];
77         memif_region_index_t idx;
78         memif_region_size_t size;
79 };
80
81 static int
82 memif_mp_send_region(const struct rte_mp_msg *msg, const void *peer)
83 {
84         struct rte_eth_dev *dev;
85         struct pmd_process_private *proc_private;
86         const struct mp_region_msg *msg_param = (const struct mp_region_msg *)msg->param;
87         struct rte_mp_msg reply;
88         struct mp_region_msg *reply_param = (struct mp_region_msg *)reply.param;
89         uint16_t port_id;
90         int ret;
91
92         /* Get requested port */
93         ret = rte_eth_dev_get_port_by_name(msg_param->port_name, &port_id);
94         if (ret) {
95                 MIF_LOG(ERR, "Failed to get port id for %s",
96                         msg_param->port_name);
97                 return -1;
98         }
99         dev = &rte_eth_devices[port_id];
100         proc_private = dev->process_private;
101
102         memset(&reply, 0, sizeof(reply));
103         strlcpy(reply.name, msg->name, sizeof(reply.name));
104         reply_param->idx = msg_param->idx;
105         if (proc_private->regions[msg_param->idx] != NULL) {
106                 reply_param->size = proc_private->regions[msg_param->idx]->region_size;
107                 reply.fds[0] = proc_private->regions[msg_param->idx]->fd;
108                 reply.num_fds = 1;
109         }
110         reply.len_param = sizeof(*reply_param);
111         if (rte_mp_reply(&reply, peer) < 0) {
112                 MIF_LOG(ERR, "Failed to reply to an add region request");
113                 return -1;
114         }
115
116         return 0;
117 }
118
119 /*
120  * Request regions
121  * Called by secondary process, when ports link status goes up.
122  */
123 static int
124 memif_mp_request_regions(struct rte_eth_dev *dev)
125 {
126         int ret, i;
127         struct timespec timeout = {.tv_sec = 5, .tv_nsec = 0};
128         struct rte_mp_msg msg, *reply;
129         struct rte_mp_reply replies;
130         struct mp_region_msg *msg_param = (struct mp_region_msg *)msg.param;
131         struct mp_region_msg *reply_param;
132         struct memif_region *r;
133         struct pmd_process_private *proc_private = dev->process_private;
134         struct pmd_internals *pmd = dev->data->dev_private;
135         /* in case of zero-copy slave, only request region 0 */
136         uint16_t max_region_num = (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) ?
137                                    1 : ETH_MEMIF_MAX_REGION_NUM;
138
139         MIF_LOG(DEBUG, "Requesting memory regions");
140
141         for (i = 0; i < max_region_num; i++) {
142                 /* Prepare the message */
143                 memset(&msg, 0, sizeof(msg));
144                 strlcpy(msg.name, MEMIF_MP_SEND_REGION, sizeof(msg.name));
145                 strlcpy(msg_param->port_name, dev->data->name,
146                         sizeof(msg_param->port_name));
147                 msg_param->idx = i;
148                 msg.len_param = sizeof(*msg_param);
149
150                 /* Send message */
151                 ret = rte_mp_request_sync(&msg, &replies, &timeout);
152                 if (ret < 0 || replies.nb_received != 1) {
153                         MIF_LOG(ERR, "Failed to send mp msg: %d",
154                                 rte_errno);
155                         return -1;
156                 }
157
158                 reply = &replies.msgs[0];
159                 reply_param = (struct mp_region_msg *)reply->param;
160
161                 if (reply_param->size > 0) {
162                         r = rte_zmalloc("region", sizeof(struct memif_region), 0);
163                         if (r == NULL) {
164                                 MIF_LOG(ERR, "Failed to alloc memif region.");
165                                 free(reply);
166                                 return -ENOMEM;
167                         }
168                         r->region_size = reply_param->size;
169                         if (reply->num_fds < 1) {
170                                 MIF_LOG(ERR, "Missing file descriptor.");
171                                 free(reply);
172                                 return -1;
173                         }
174                         r->fd = reply->fds[0];
175                         r->addr = NULL;
176
177                         proc_private->regions[reply_param->idx] = r;
178                         proc_private->regions_num++;
179                 }
180                 free(reply);
181         }
182
183         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
184                 ret = rte_memseg_walk(memif_region_init_zc, (void *)proc_private);
185                 if (ret < 0)
186                         return ret;
187         }
188
189         return memif_connect(dev);
190 }
191
192 static int
193 memif_dev_info(struct rte_eth_dev *dev __rte_unused, struct rte_eth_dev_info *dev_info)
194 {
195         dev_info->max_mac_addrs = 1;
196         dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN;
197         dev_info->max_rx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
198         dev_info->max_tx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
199         dev_info->min_rx_bufsize = 0;
200
201         return 0;
202 }
203
204 static memif_ring_t *
205 memif_get_ring(struct pmd_internals *pmd, struct pmd_process_private *proc_private,
206                memif_ring_type_t type, uint16_t ring_num)
207 {
208         /* rings only in region 0 */
209         void *p = proc_private->regions[0]->addr;
210         int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) *
211             (1 << pmd->run.log2_ring_size);
212
213         p = (uint8_t *)p + (ring_num + type * pmd->run.num_s2m_rings) * ring_size;
214
215         return (memif_ring_t *)p;
216 }
217
218 static memif_region_offset_t
219 memif_get_ring_offset(struct rte_eth_dev *dev, struct memif_queue *mq,
220                       memif_ring_type_t type, uint16_t num)
221 {
222         struct pmd_internals *pmd = dev->data->dev_private;
223         struct pmd_process_private *proc_private = dev->process_private;
224
225         return ((uint8_t *)memif_get_ring(pmd, proc_private, type, num) -
226                 (uint8_t *)proc_private->regions[mq->region]->addr);
227 }
228
229 static memif_ring_t *
230 memif_get_ring_from_queue(struct pmd_process_private *proc_private,
231                           struct memif_queue *mq)
232 {
233         struct memif_region *r;
234
235         r = proc_private->regions[mq->region];
236         if (r == NULL)
237                 return NULL;
238
239         return (memif_ring_t *)((uint8_t *)r->addr + mq->ring_offset);
240 }
241
242 static void *
243 memif_get_buffer(struct pmd_process_private *proc_private, memif_desc_t *d)
244 {
245         return ((uint8_t *)proc_private->regions[d->region]->addr + d->offset);
246 }
247
248 /* Free mbufs received by master */
249 static void
250 memif_free_stored_mbufs(struct pmd_process_private *proc_private, struct memif_queue *mq)
251 {
252         uint16_t mask = (1 << mq->log2_ring_size) - 1;
253         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
254
255         /* FIXME: improve performance */
256         /* The ring->tail acts as a guard variable between Tx and Rx
257          * threads, so using load-acquire pairs with store-release
258          * to synchronize it between threads.
259          */
260         while (mq->last_tail != __atomic_load_n(&ring->tail,
261                                                 __ATOMIC_ACQUIRE)) {
262                 RTE_MBUF_PREFETCH_TO_FREE(mq->buffers[(mq->last_tail + 1) & mask]);
263                 /* Decrement refcnt and free mbuf. (current segment) */
264                 rte_mbuf_refcnt_update(mq->buffers[mq->last_tail & mask], -1);
265                 rte_pktmbuf_free_seg(mq->buffers[mq->last_tail & mask]);
266                 mq->last_tail++;
267         }
268 }
269
270 static int
271 memif_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *cur_tail,
272                     struct rte_mbuf *tail)
273 {
274         /* Check for number-of-segments-overflow */
275         if (unlikely(head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS))
276                 return -EOVERFLOW;
277
278         /* Chain 'tail' onto the old tail */
279         cur_tail->next = tail;
280
281         /* accumulate number of segments and total length. */
282         head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs);
283
284         tail->pkt_len = tail->data_len;
285         head->pkt_len += tail->pkt_len;
286
287         return 0;
288 }
289
290 static uint16_t
291 eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
292 {
293         struct memif_queue *mq = queue;
294         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
295         struct pmd_process_private *proc_private =
296                 rte_eth_devices[mq->in_port].process_private;
297         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
298         uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0;
299         uint16_t n_rx_pkts = 0;
300         uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) -
301                 RTE_PKTMBUF_HEADROOM;
302         uint16_t src_len, src_off, dst_len, dst_off, cp_len;
303         memif_ring_type_t type = mq->type;
304         memif_desc_t *d0;
305         struct rte_mbuf *mbuf, *mbuf_head, *mbuf_tail;
306         uint64_t b;
307         ssize_t size __rte_unused;
308         uint16_t head;
309         int ret;
310         struct rte_eth_link link;
311
312         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
313                 return 0;
314         if (unlikely(ring == NULL)) {
315                 /* Secondary process will attempt to request regions. */
316                 ret = rte_eth_link_get(mq->in_port, &link);
317                 if (ret < 0)
318                         MIF_LOG(ERR, "Failed to get port %u link info: %s",
319                                 mq->in_port, rte_strerror(-ret));
320                 return 0;
321         }
322
323         /* consume interrupt */
324         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0)
325                 size = read(mq->intr_handle.fd, &b, sizeof(b));
326
327         ring_size = 1 << mq->log2_ring_size;
328         mask = ring_size - 1;
329
330         if (type == MEMIF_RING_S2M) {
331                 cur_slot = mq->last_head;
332                 last_slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
333         } else {
334                 cur_slot = mq->last_tail;
335                 last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
336         }
337
338         if (cur_slot == last_slot)
339                 goto refill;
340         n_slots = last_slot - cur_slot;
341
342         while (n_slots && n_rx_pkts < nb_pkts) {
343                 mbuf_head = rte_pktmbuf_alloc(mq->mempool);
344                 if (unlikely(mbuf_head == NULL))
345                         goto no_free_bufs;
346                 mbuf = mbuf_head;
347                 mbuf->port = mq->in_port;
348
349 next_slot:
350                 s0 = cur_slot & mask;
351                 d0 = &ring->desc[s0];
352
353                 src_len = d0->length;
354                 dst_off = 0;
355                 src_off = 0;
356
357                 do {
358                         dst_len = mbuf_size - dst_off;
359                         if (dst_len == 0) {
360                                 dst_off = 0;
361                                 dst_len = mbuf_size;
362
363                                 /* store pointer to tail */
364                                 mbuf_tail = mbuf;
365                                 mbuf = rte_pktmbuf_alloc(mq->mempool);
366                                 if (unlikely(mbuf == NULL))
367                                         goto no_free_bufs;
368                                 mbuf->port = mq->in_port;
369                                 ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
370                                 if (unlikely(ret < 0)) {
371                                         MIF_LOG(ERR, "number-of-segments-overflow");
372                                         rte_pktmbuf_free(mbuf);
373                                         goto no_free_bufs;
374                                 }
375                         }
376                         cp_len = RTE_MIN(dst_len, src_len);
377
378                         rte_pktmbuf_data_len(mbuf) += cp_len;
379                         rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
380                         if (mbuf != mbuf_head)
381                                 rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
382
383                         memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, dst_off),
384                                (uint8_t *)memif_get_buffer(proc_private, d0) + src_off,
385                                cp_len);
386
387                         src_off += cp_len;
388                         dst_off += cp_len;
389                         src_len -= cp_len;
390                 } while (src_len);
391
392                 cur_slot++;
393                 n_slots--;
394
395                 if (d0->flags & MEMIF_DESC_FLAG_NEXT)
396                         goto next_slot;
397
398                 mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
399                 *bufs++ = mbuf_head;
400                 n_rx_pkts++;
401         }
402
403 no_free_bufs:
404         if (type == MEMIF_RING_S2M) {
405                 __atomic_store_n(&ring->tail, cur_slot, __ATOMIC_RELEASE);
406                 mq->last_head = cur_slot;
407         } else {
408                 mq->last_tail = cur_slot;
409         }
410
411 refill:
412         if (type == MEMIF_RING_M2S) {
413                 head = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
414                 n_slots = ring_size - head + mq->last_tail;
415
416                 while (n_slots--) {
417                         s0 = head++ & mask;
418                         d0 = &ring->desc[s0];
419                         d0->length = pmd->run.pkt_buffer_size;
420                 }
421                 __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
422         }
423
424         mq->n_pkts += n_rx_pkts;
425         return n_rx_pkts;
426 }
427
428 static uint16_t
429 eth_memif_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
430 {
431         struct memif_queue *mq = queue;
432         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
433         struct pmd_process_private *proc_private =
434                 rte_eth_devices[mq->in_port].process_private;
435         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
436         uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0, head;
437         uint16_t n_rx_pkts = 0;
438         memif_desc_t *d0;
439         struct rte_mbuf *mbuf, *mbuf_tail;
440         struct rte_mbuf *mbuf_head = NULL;
441         int ret;
442         struct rte_eth_link link;
443
444         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
445                 return 0;
446         if (unlikely(ring == NULL)) {
447                 /* Secondary process will attempt to request regions. */
448                 rte_eth_link_get(mq->in_port, &link);
449                 return 0;
450         }
451
452         /* consume interrupt */
453         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
454                 uint64_t b;
455                 ssize_t size __rte_unused;
456                 size = read(mq->intr_handle.fd, &b, sizeof(b));
457         }
458
459         ring_size = 1 << mq->log2_ring_size;
460         mask = ring_size - 1;
461
462         cur_slot = mq->last_tail;
463         /* The ring->tail acts as a guard variable between Tx and Rx
464          * threads, so using load-acquire pairs with store-release
465          * to synchronize it between threads.
466          */
467         last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
468         if (cur_slot == last_slot)
469                 goto refill;
470         n_slots = last_slot - cur_slot;
471
472         while (n_slots && n_rx_pkts < nb_pkts) {
473                 s0 = cur_slot & mask;
474
475                 d0 = &ring->desc[s0];
476                 mbuf_head = mq->buffers[s0];
477                 mbuf = mbuf_head;
478
479 next_slot:
480                 /* prefetch next descriptor */
481                 if (n_rx_pkts + 1 < nb_pkts)
482                         rte_prefetch0(&ring->desc[(cur_slot + 1) & mask]);
483
484                 mbuf->port = mq->in_port;
485                 rte_pktmbuf_data_len(mbuf) = d0->length;
486                 rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
487
488                 mq->n_bytes += rte_pktmbuf_data_len(mbuf);
489
490                 cur_slot++;
491                 n_slots--;
492                 if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
493                         s0 = cur_slot & mask;
494                         d0 = &ring->desc[s0];
495                         mbuf_tail = mbuf;
496                         mbuf = mq->buffers[s0];
497                         ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
498                         if (unlikely(ret < 0)) {
499                                 MIF_LOG(ERR, "number-of-segments-overflow");
500                                 goto refill;
501                         }
502                         goto next_slot;
503                 }
504
505                 *bufs++ = mbuf_head;
506                 n_rx_pkts++;
507         }
508
509         mq->last_tail = cur_slot;
510
511 /* Supply master with new buffers */
512 refill:
513         /* The ring->head acts as a guard variable between Tx and Rx
514          * threads, so using load-acquire pairs with store-release
515          * to synchronize it between threads.
516          */
517         head = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
518         n_slots = ring_size - head + mq->last_tail;
519
520         if (n_slots < 32)
521                 goto no_free_mbufs;
522
523         ret = rte_pktmbuf_alloc_bulk(mq->mempool, &mq->buffers[head & mask], n_slots);
524         if (unlikely(ret < 0))
525                 goto no_free_mbufs;
526
527         while (n_slots--) {
528                 s0 = head++ & mask;
529                 if (n_slots > 0)
530                         rte_prefetch0(mq->buffers[head & mask]);
531                 d0 = &ring->desc[s0];
532                 /* store buffer header */
533                 mbuf = mq->buffers[s0];
534                 /* populate descriptor */
535                 d0->length = rte_pktmbuf_data_room_size(mq->mempool) -
536                                 RTE_PKTMBUF_HEADROOM;
537                 d0->region = 1;
538                 d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
539                         (uint8_t *)proc_private->regions[d0->region]->addr;
540         }
541 no_free_mbufs:
542         __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
543
544         mq->n_pkts += n_rx_pkts;
545
546         return n_rx_pkts;
547 }
548
549 static uint16_t
550 eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
551 {
552         struct memif_queue *mq = queue;
553         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
554         struct pmd_process_private *proc_private =
555                 rte_eth_devices[mq->in_port].process_private;
556         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
557         uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0;
558         uint16_t src_len, src_off, dst_len, dst_off, cp_len;
559         memif_ring_type_t type = mq->type;
560         memif_desc_t *d0;
561         struct rte_mbuf *mbuf;
562         struct rte_mbuf *mbuf_head;
563         uint64_t a;
564         ssize_t size;
565         struct rte_eth_link link;
566
567         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
568                 return 0;
569         if (unlikely(ring == NULL)) {
570                 int ret;
571
572                 /* Secondary process will attempt to request regions. */
573                 ret = rte_eth_link_get(mq->in_port, &link);
574                 if (ret < 0)
575                         MIF_LOG(ERR, "Failed to get port %u link info: %s",
576                                 mq->in_port, rte_strerror(-ret));
577                 return 0;
578         }
579
580         ring_size = 1 << mq->log2_ring_size;
581         mask = ring_size - 1;
582
583         if (type == MEMIF_RING_S2M) {
584                 slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
585                 n_free = ring_size - slot +
586                                 __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
587         } else {
588                 /* For M2S queues ring->tail is updated by the sender and
589                  * this function is called in the context of sending thread.
590                  * The loads in the sender do not need to synchronize with
591                  * its own stores. Hence, the following load can be a
592                  * relaxed load.
593                  */
594                 slot = __atomic_load_n(&ring->tail, __ATOMIC_RELAXED);
595                 n_free = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE) - slot;
596         }
597
598         while (n_tx_pkts < nb_pkts && n_free) {
599                 mbuf_head = *bufs++;
600                 mbuf = mbuf_head;
601
602                 saved_slot = slot;
603                 d0 = &ring->desc[slot & mask];
604                 dst_off = 0;
605                 dst_len = (type == MEMIF_RING_S2M) ?
606                         pmd->run.pkt_buffer_size : d0->length;
607
608 next_in_chain:
609                 src_off = 0;
610                 src_len = rte_pktmbuf_data_len(mbuf);
611
612                 while (src_len) {
613                         if (dst_len == 0) {
614                                 if (n_free) {
615                                         slot++;
616                                         n_free--;
617                                         d0->flags |= MEMIF_DESC_FLAG_NEXT;
618                                         d0 = &ring->desc[slot & mask];
619                                         dst_off = 0;
620                                         dst_len = (type == MEMIF_RING_S2M) ?
621                                             pmd->run.pkt_buffer_size : d0->length;
622                                         d0->flags = 0;
623                                 } else {
624                                         slot = saved_slot;
625                                         goto no_free_slots;
626                                 }
627                         }
628                         cp_len = RTE_MIN(dst_len, src_len);
629
630                         memcpy((uint8_t *)memif_get_buffer(proc_private, d0) + dst_off,
631                                rte_pktmbuf_mtod_offset(mbuf, void *, src_off),
632                                cp_len);
633
634                         mq->n_bytes += cp_len;
635                         src_off += cp_len;
636                         dst_off += cp_len;
637                         src_len -= cp_len;
638                         dst_len -= cp_len;
639
640                         d0->length = dst_off;
641                 }
642
643                 if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
644                         mbuf = mbuf->next;
645                         goto next_in_chain;
646                 }
647
648                 n_tx_pkts++;
649                 slot++;
650                 n_free--;
651                 rte_pktmbuf_free(mbuf_head);
652         }
653
654 no_free_slots:
655         if (type == MEMIF_RING_S2M)
656                 __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
657         else
658                 __atomic_store_n(&ring->tail, slot, __ATOMIC_RELEASE);
659
660         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
661                 a = 1;
662                 size = write(mq->intr_handle.fd, &a, sizeof(a));
663                 if (unlikely(size < 0)) {
664                         MIF_LOG(WARNING,
665                                 "Failed to send interrupt. %s", strerror(errno));
666                 }
667         }
668
669         mq->n_pkts += n_tx_pkts;
670         return n_tx_pkts;
671 }
672
673
674 static int
675 memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq,
676                 memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask,
677                 uint16_t slot, uint16_t n_free)
678 {
679         memif_desc_t *d0;
680         int used_slots = 1;
681
682 next_in_chain:
683         /* store pointer to mbuf to free it later */
684         mq->buffers[slot & mask] = mbuf;
685         /* Increment refcnt to make sure the buffer is not freed before master
686          * receives it. (current segment)
687          */
688         rte_mbuf_refcnt_update(mbuf, 1);
689         /* populate descriptor */
690         d0 = &ring->desc[slot & mask];
691         d0->length = rte_pktmbuf_data_len(mbuf);
692         /* FIXME: get region index */
693         d0->region = 1;
694         d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
695                 (uint8_t *)proc_private->regions[d0->region]->addr;
696         d0->flags = 0;
697
698         /* check if buffer is chained */
699         if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
700                 if (n_free < 2)
701                         return 0;
702                 /* mark buffer as chained */
703                 d0->flags |= MEMIF_DESC_FLAG_NEXT;
704                 /* advance mbuf */
705                 mbuf = mbuf->next;
706                 /* update counters */
707                 used_slots++;
708                 slot++;
709                 n_free--;
710                 goto next_in_chain;
711         }
712         return used_slots;
713 }
714
715 static uint16_t
716 eth_memif_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
717 {
718         struct memif_queue *mq = queue;
719         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
720         struct pmd_process_private *proc_private =
721                 rte_eth_devices[mq->in_port].process_private;
722         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
723         uint16_t slot, n_free, ring_size, mask, n_tx_pkts = 0;
724         memif_ring_type_t type = mq->type;
725         struct rte_eth_link link;
726
727         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
728                 return 0;
729         if (unlikely(ring == NULL)) {
730                 /* Secondary process will attempt to request regions. */
731                 rte_eth_link_get(mq->in_port, &link);
732                 return 0;
733         }
734
735         ring_size = 1 << mq->log2_ring_size;
736         mask = ring_size - 1;
737
738         /* free mbufs received by master */
739         memif_free_stored_mbufs(proc_private, mq);
740
741         /* ring type always MEMIF_RING_S2M */
742         /* The ring->head acts as a guard variable between Tx and Rx
743          * threads, so using load-acquire pairs with store-release
744          * to synchronize it between threads.
745          */
746         slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
747         n_free = ring_size - slot + mq->last_tail;
748
749         int used_slots;
750
751         while (n_free && (n_tx_pkts < nb_pkts)) {
752                 while ((n_free > 4) && ((nb_pkts - n_tx_pkts) > 4)) {
753                         if ((nb_pkts - n_tx_pkts) > 8) {
754                                 rte_prefetch0(*bufs + 4);
755                                 rte_prefetch0(*bufs + 5);
756                                 rte_prefetch0(*bufs + 6);
757                                 rte_prefetch0(*bufs + 7);
758                         }
759                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
760                                 mask, slot, n_free);
761                         if (unlikely(used_slots < 1))
762                                 goto no_free_slots;
763                         n_tx_pkts++;
764                         slot += used_slots;
765                         n_free -= used_slots;
766
767                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
768                                 mask, slot, n_free);
769                         if (unlikely(used_slots < 1))
770                                 goto no_free_slots;
771                         n_tx_pkts++;
772                         slot += used_slots;
773                         n_free -= used_slots;
774
775                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
776                                 mask, slot, n_free);
777                         if (unlikely(used_slots < 1))
778                                 goto no_free_slots;
779                         n_tx_pkts++;
780                         slot += used_slots;
781                         n_free -= used_slots;
782
783                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
784                                 mask, slot, n_free);
785                         if (unlikely(used_slots < 1))
786                                 goto no_free_slots;
787                         n_tx_pkts++;
788                         slot += used_slots;
789                         n_free -= used_slots;
790                 }
791                 used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
792                         mask, slot, n_free);
793                 if (unlikely(used_slots < 1))
794                         goto no_free_slots;
795                 n_tx_pkts++;
796                 slot += used_slots;
797                 n_free -= used_slots;
798         }
799
800 no_free_slots:
801         /* update ring pointers */
802         if (type == MEMIF_RING_S2M)
803                 __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
804         else
805                 __atomic_store_n(&ring->tail, slot, __ATOMIC_RELEASE);
806
807         /* Send interrupt, if enabled. */
808         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
809                 uint64_t a = 1;
810                 ssize_t size = write(mq->intr_handle.fd, &a, sizeof(a));
811                 if (unlikely(size < 0)) {
812                         MIF_LOG(WARNING,
813                                 "Failed to send interrupt. %s", strerror(errno));
814                 }
815         }
816
817         /* increment queue counters */
818         mq->n_pkts += n_tx_pkts;
819
820         return n_tx_pkts;
821 }
822
823 void
824 memif_free_regions(struct rte_eth_dev *dev)
825 {
826         struct pmd_process_private *proc_private = dev->process_private;
827         struct pmd_internals *pmd = dev->data->dev_private;
828         int i;
829         struct memif_region *r;
830
831         /* regions are allocated contiguously, so it's
832          * enough to loop until 'proc_private->regions_num'
833          */
834         for (i = 0; i < proc_private->regions_num; i++) {
835                 r = proc_private->regions[i];
836                 if (r != NULL) {
837                         /* This is memzone */
838                         if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
839                                 r->addr = NULL;
840                                 if (r->fd > 0)
841                                         close(r->fd);
842                         }
843                         if (r->addr != NULL) {
844                                 munmap(r->addr, r->region_size);
845                                 if (r->fd > 0) {
846                                         close(r->fd);
847                                         r->fd = -1;
848                                 }
849                         }
850                         rte_free(r);
851                         proc_private->regions[i] = NULL;
852                 }
853         }
854         proc_private->regions_num = 0;
855 }
856
857 static int
858 memif_region_init_zc(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
859                      void *arg)
860 {
861         struct pmd_process_private *proc_private = (struct pmd_process_private *)arg;
862         struct memif_region *r;
863
864         if (proc_private->regions_num < 1) {
865                 MIF_LOG(ERR, "Missing descriptor region");
866                 return -1;
867         }
868
869         r = proc_private->regions[proc_private->regions_num - 1];
870
871         if (r->addr != msl->base_va)
872                 r = proc_private->regions[++proc_private->regions_num - 1];
873
874         if (r == NULL) {
875                 r = rte_zmalloc("region", sizeof(struct memif_region), 0);
876                 if (r == NULL) {
877                         MIF_LOG(ERR, "Failed to alloc memif region.");
878                         return -ENOMEM;
879                 }
880
881                 r->addr = msl->base_va;
882                 r->region_size = ms->len;
883                 r->fd = rte_memseg_get_fd(ms);
884                 if (r->fd < 0)
885                         return -1;
886                 r->pkt_buffer_offset = 0;
887
888                 proc_private->regions[proc_private->regions_num - 1] = r;
889         } else {
890                 r->region_size += ms->len;
891         }
892
893         return 0;
894 }
895
896 static int
897 memif_region_init_shm(struct rte_eth_dev *dev, uint8_t has_buffers)
898 {
899         struct pmd_internals *pmd = dev->data->dev_private;
900         struct pmd_process_private *proc_private = dev->process_private;
901         char shm_name[ETH_MEMIF_SHM_NAME_SIZE];
902         int ret = 0;
903         struct memif_region *r;
904
905         if (proc_private->regions_num >= ETH_MEMIF_MAX_REGION_NUM) {
906                 MIF_LOG(ERR, "Too many regions.");
907                 return -1;
908         }
909
910         r = rte_zmalloc("region", sizeof(struct memif_region), 0);
911         if (r == NULL) {
912                 MIF_LOG(ERR, "Failed to alloc memif region.");
913                 return -ENOMEM;
914         }
915
916         /* calculate buffer offset */
917         r->pkt_buffer_offset = (pmd->run.num_s2m_rings + pmd->run.num_m2s_rings) *
918             (sizeof(memif_ring_t) + sizeof(memif_desc_t) *
919             (1 << pmd->run.log2_ring_size));
920
921         r->region_size = r->pkt_buffer_offset;
922         /* if region has buffers, add buffers size to region_size */
923         if (has_buffers == 1)
924                 r->region_size += (uint32_t)(pmd->run.pkt_buffer_size *
925                         (1 << pmd->run.log2_ring_size) *
926                         (pmd->run.num_s2m_rings +
927                          pmd->run.num_m2s_rings));
928
929         memset(shm_name, 0, sizeof(char) * ETH_MEMIF_SHM_NAME_SIZE);
930         snprintf(shm_name, ETH_MEMIF_SHM_NAME_SIZE, "memif_region_%d",
931                  proc_private->regions_num);
932
933         r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING);
934         if (r->fd < 0) {
935                 MIF_LOG(ERR, "Failed to create shm file: %s.", strerror(errno));
936                 ret = -1;
937                 goto error;
938         }
939
940         ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK);
941         if (ret < 0) {
942                 MIF_LOG(ERR, "Failed to add seals to shm file: %s.", strerror(errno));
943                 goto error;
944         }
945
946         ret = ftruncate(r->fd, r->region_size);
947         if (ret < 0) {
948                 MIF_LOG(ERR, "Failed to truncate shm file: %s.", strerror(errno));
949                 goto error;
950         }
951
952         r->addr = mmap(NULL, r->region_size, PROT_READ |
953                        PROT_WRITE, MAP_SHARED, r->fd, 0);
954         if (r->addr == MAP_FAILED) {
955                 MIF_LOG(ERR, "Failed to mmap shm region: %s.", strerror(ret));
956                 ret = -1;
957                 goto error;
958         }
959
960         proc_private->regions[proc_private->regions_num] = r;
961         proc_private->regions_num++;
962
963         return ret;
964
965 error:
966         if (r->fd > 0)
967                 close(r->fd);
968         r->fd = -1;
969
970         return ret;
971 }
972
973 static int
974 memif_regions_init(struct rte_eth_dev *dev)
975 {
976         struct pmd_internals *pmd = dev->data->dev_private;
977         int ret;
978
979         /*
980          * Zero-copy exposes dpdk memory.
981          * Each memseg list will be represented by memif region.
982          * Zero-copy regions indexing: memseg list idx + 1,
983          * as we already have region 0 reserved for descriptors.
984          */
985         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
986                 /* create region idx 0 containing descriptors */
987                 ret = memif_region_init_shm(dev, 0);
988                 if (ret < 0)
989                         return ret;
990                 ret = rte_memseg_walk(memif_region_init_zc, (void *)dev->process_private);
991                 if (ret < 0)
992                         return ret;
993         } else {
994                 /* create one memory region contaning rings and buffers */
995                 ret = memif_region_init_shm(dev, /* has buffers */ 1);
996                 if (ret < 0)
997                         return ret;
998         }
999
1000         return 0;
1001 }
1002
1003 static void
1004 memif_init_rings(struct rte_eth_dev *dev)
1005 {
1006         struct pmd_internals *pmd = dev->data->dev_private;
1007         struct pmd_process_private *proc_private = dev->process_private;
1008         memif_ring_t *ring;
1009         int i, j;
1010         uint16_t slot;
1011
1012         for (i = 0; i < pmd->run.num_s2m_rings; i++) {
1013                 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_S2M, i);
1014                 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1015                 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1016                 ring->cookie = MEMIF_COOKIE;
1017                 ring->flags = 0;
1018
1019                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1020                         continue;
1021
1022                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1023                         slot = i * (1 << pmd->run.log2_ring_size) + j;
1024                         ring->desc[j].region = 0;
1025                         ring->desc[j].offset =
1026                                 proc_private->regions[0]->pkt_buffer_offset +
1027                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
1028                         ring->desc[j].length = pmd->run.pkt_buffer_size;
1029                 }
1030         }
1031
1032         for (i = 0; i < pmd->run.num_m2s_rings; i++) {
1033                 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_M2S, i);
1034                 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1035                 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1036                 ring->cookie = MEMIF_COOKIE;
1037                 ring->flags = 0;
1038
1039                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1040                         continue;
1041
1042                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1043                         slot = (i + pmd->run.num_s2m_rings) *
1044                             (1 << pmd->run.log2_ring_size) + j;
1045                         ring->desc[j].region = 0;
1046                         ring->desc[j].offset =
1047                                 proc_private->regions[0]->pkt_buffer_offset +
1048                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
1049                         ring->desc[j].length = pmd->run.pkt_buffer_size;
1050                 }
1051         }
1052 }
1053
1054 /* called only by slave */
1055 static int
1056 memif_init_queues(struct rte_eth_dev *dev)
1057 {
1058         struct pmd_internals *pmd = dev->data->dev_private;
1059         struct memif_queue *mq;
1060         int i;
1061
1062         for (i = 0; i < pmd->run.num_s2m_rings; i++) {
1063                 mq = dev->data->tx_queues[i];
1064                 mq->log2_ring_size = pmd->run.log2_ring_size;
1065                 /* queues located only in region 0 */
1066                 mq->region = 0;
1067                 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_S2M, i);
1068                 mq->last_head = 0;
1069                 mq->last_tail = 0;
1070                 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1071                 if (mq->intr_handle.fd < 0) {
1072                         MIF_LOG(WARNING,
1073                                 "Failed to create eventfd for tx queue %d: %s.", i,
1074                                 strerror(errno));
1075                 }
1076                 mq->buffers = NULL;
1077                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1078                         mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1079                                                   (1 << mq->log2_ring_size), 0);
1080                         if (mq->buffers == NULL)
1081                                 return -ENOMEM;
1082                 }
1083         }
1084
1085         for (i = 0; i < pmd->run.num_m2s_rings; i++) {
1086                 mq = dev->data->rx_queues[i];
1087                 mq->log2_ring_size = pmd->run.log2_ring_size;
1088                 /* queues located only in region 0 */
1089                 mq->region = 0;
1090                 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_M2S, i);
1091                 mq->last_head = 0;
1092                 mq->last_tail = 0;
1093                 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1094                 if (mq->intr_handle.fd < 0) {
1095                         MIF_LOG(WARNING,
1096                                 "Failed to create eventfd for rx queue %d: %s.", i,
1097                                 strerror(errno));
1098                 }
1099                 mq->buffers = NULL;
1100                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1101                         mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1102                                                   (1 << mq->log2_ring_size), 0);
1103                         if (mq->buffers == NULL)
1104                                 return -ENOMEM;
1105                 }
1106         }
1107         return 0;
1108 }
1109
1110 int
1111 memif_init_regions_and_queues(struct rte_eth_dev *dev)
1112 {
1113         int ret;
1114
1115         ret = memif_regions_init(dev);
1116         if (ret < 0)
1117                 return ret;
1118
1119         memif_init_rings(dev);
1120
1121         ret = memif_init_queues(dev);
1122         if (ret < 0)
1123                 return ret;
1124
1125         return 0;
1126 }
1127
1128 int
1129 memif_connect(struct rte_eth_dev *dev)
1130 {
1131         struct pmd_internals *pmd = dev->data->dev_private;
1132         struct pmd_process_private *proc_private = dev->process_private;
1133         struct memif_region *mr;
1134         struct memif_queue *mq;
1135         memif_ring_t *ring;
1136         int i;
1137
1138         for (i = 0; i < proc_private->regions_num; i++) {
1139                 mr = proc_private->regions[i];
1140                 if (mr != NULL) {
1141                         if (mr->addr == NULL) {
1142                                 if (mr->fd < 0)
1143                                         return -1;
1144                                 mr->addr = mmap(NULL, mr->region_size,
1145                                                 PROT_READ | PROT_WRITE,
1146                                                 MAP_SHARED, mr->fd, 0);
1147                                 if (mr->addr == MAP_FAILED) {
1148                                         MIF_LOG(ERR, "mmap failed: %s\n",
1149                                                 strerror(errno));
1150                                         return -1;
1151                                 }
1152                         }
1153                         if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
1154                                 /* close memseg file */
1155                                 close(mr->fd);
1156                                 mr->fd = -1;
1157                         }
1158                 }
1159         }
1160
1161         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1162                 for (i = 0; i < pmd->run.num_s2m_rings; i++) {
1163                         mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
1164                             dev->data->tx_queues[i] : dev->data->rx_queues[i];
1165                         ring = memif_get_ring_from_queue(proc_private, mq);
1166                         if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1167                                 MIF_LOG(ERR, "Wrong ring");
1168                                 return -1;
1169                         }
1170                         __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1171                         __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1172                         mq->last_head = 0;
1173                         mq->last_tail = 0;
1174                         /* enable polling mode */
1175                         if (pmd->role == MEMIF_ROLE_MASTER)
1176                                 ring->flags = MEMIF_RING_FLAG_MASK_INT;
1177                 }
1178                 for (i = 0; i < pmd->run.num_m2s_rings; i++) {
1179                         mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
1180                             dev->data->rx_queues[i] : dev->data->tx_queues[i];
1181                         ring = memif_get_ring_from_queue(proc_private, mq);
1182                         if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1183                                 MIF_LOG(ERR, "Wrong ring");
1184                                 return -1;
1185                         }
1186                         __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1187                         __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1188                         mq->last_head = 0;
1189                         mq->last_tail = 0;
1190                         /* enable polling mode */
1191                         if (pmd->role == MEMIF_ROLE_SLAVE)
1192                                 ring->flags = MEMIF_RING_FLAG_MASK_INT;
1193                 }
1194
1195                 pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
1196                 pmd->flags |= ETH_MEMIF_FLAG_CONNECTED;
1197                 dev->data->dev_link.link_status = ETH_LINK_UP;
1198         }
1199         MIF_LOG(INFO, "Connected.");
1200         return 0;
1201 }
1202
1203 static int
1204 memif_dev_start(struct rte_eth_dev *dev)
1205 {
1206         struct pmd_internals *pmd = dev->data->dev_private;
1207         int ret = 0;
1208
1209         switch (pmd->role) {
1210         case MEMIF_ROLE_SLAVE:
1211                 ret = memif_connect_slave(dev);
1212                 break;
1213         case MEMIF_ROLE_MASTER:
1214                 ret = memif_connect_master(dev);
1215                 break;
1216         default:
1217                 MIF_LOG(ERR, "Unknown role: %d.", pmd->role);
1218                 ret = -1;
1219                 break;
1220         }
1221
1222         return ret;
1223 }
1224
1225 static int
1226 memif_dev_close(struct rte_eth_dev *dev)
1227 {
1228         struct pmd_internals *pmd = dev->data->dev_private;
1229         int i;
1230
1231         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1232                 memif_msg_enq_disconnect(pmd->cc, "Device closed", 0);
1233                 memif_disconnect(dev);
1234
1235                 for (i = 0; i < dev->data->nb_rx_queues; i++)
1236                         (*dev->dev_ops->rx_queue_release)(dev->data->rx_queues[i]);
1237                 for (i = 0; i < dev->data->nb_tx_queues; i++)
1238                         (*dev->dev_ops->tx_queue_release)(dev->data->tx_queues[i]);
1239
1240                 memif_socket_remove_device(dev);
1241         } else {
1242                 memif_disconnect(dev);
1243         }
1244
1245         rte_free(dev->process_private);
1246
1247         return 0;
1248 }
1249
1250 static int
1251 memif_dev_configure(struct rte_eth_dev *dev)
1252 {
1253         struct pmd_internals *pmd = dev->data->dev_private;
1254
1255         /*
1256          * SLAVE - TXQ
1257          * MASTER - RXQ
1258          */
1259         pmd->cfg.num_s2m_rings = (pmd->role == MEMIF_ROLE_SLAVE) ?
1260                                   dev->data->nb_tx_queues : dev->data->nb_rx_queues;
1261
1262         /*
1263          * SLAVE - RXQ
1264          * MASTER - TXQ
1265          */
1266         pmd->cfg.num_m2s_rings = (pmd->role == MEMIF_ROLE_SLAVE) ?
1267                                   dev->data->nb_rx_queues : dev->data->nb_tx_queues;
1268
1269         return 0;
1270 }
1271
1272 static int
1273 memif_tx_queue_setup(struct rte_eth_dev *dev,
1274                      uint16_t qid,
1275                      uint16_t nb_tx_desc __rte_unused,
1276                      unsigned int socket_id __rte_unused,
1277                      const struct rte_eth_txconf *tx_conf __rte_unused)
1278 {
1279         struct pmd_internals *pmd = dev->data->dev_private;
1280         struct memif_queue *mq;
1281
1282         mq = rte_zmalloc("tx-queue", sizeof(struct memif_queue), 0);
1283         if (mq == NULL) {
1284                 MIF_LOG(ERR, "Failed to allocate tx queue id: %u", qid);
1285                 return -ENOMEM;
1286         }
1287
1288         mq->type =
1289             (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_S2M : MEMIF_RING_M2S;
1290         mq->n_pkts = 0;
1291         mq->n_bytes = 0;
1292         mq->intr_handle.fd = -1;
1293         mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1294         mq->in_port = dev->data->port_id;
1295         dev->data->tx_queues[qid] = mq;
1296
1297         return 0;
1298 }
1299
1300 static int
1301 memif_rx_queue_setup(struct rte_eth_dev *dev,
1302                      uint16_t qid,
1303                      uint16_t nb_rx_desc __rte_unused,
1304                      unsigned int socket_id __rte_unused,
1305                      const struct rte_eth_rxconf *rx_conf __rte_unused,
1306                      struct rte_mempool *mb_pool)
1307 {
1308         struct pmd_internals *pmd = dev->data->dev_private;
1309         struct memif_queue *mq;
1310
1311         mq = rte_zmalloc("rx-queue", sizeof(struct memif_queue), 0);
1312         if (mq == NULL) {
1313                 MIF_LOG(ERR, "Failed to allocate rx queue id: %u", qid);
1314                 return -ENOMEM;
1315         }
1316
1317         mq->type = (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_M2S : MEMIF_RING_S2M;
1318         mq->n_pkts = 0;
1319         mq->n_bytes = 0;
1320         mq->intr_handle.fd = -1;
1321         mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1322         mq->mempool = mb_pool;
1323         mq->in_port = dev->data->port_id;
1324         dev->data->rx_queues[qid] = mq;
1325
1326         return 0;
1327 }
1328
1329 static void
1330 memif_queue_release(void *queue)
1331 {
1332         struct memif_queue *mq = (struct memif_queue *)queue;
1333
1334         if (!mq)
1335                 return;
1336
1337         rte_free(mq);
1338 }
1339
1340 static int
1341 memif_link_update(struct rte_eth_dev *dev,
1342                   int wait_to_complete __rte_unused)
1343 {
1344         struct pmd_process_private *proc_private;
1345
1346         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1347                 proc_private = dev->process_private;
1348                 if (dev->data->dev_link.link_status == ETH_LINK_UP &&
1349                                 proc_private->regions_num == 0) {
1350                         memif_mp_request_regions(dev);
1351                 } else if (dev->data->dev_link.link_status == ETH_LINK_DOWN &&
1352                                 proc_private->regions_num > 0) {
1353                         memif_free_regions(dev);
1354                 }
1355         }
1356         return 0;
1357 }
1358
1359 static int
1360 memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1361 {
1362         struct pmd_internals *pmd = dev->data->dev_private;
1363         struct memif_queue *mq;
1364         int i;
1365         uint8_t tmp, nq;
1366
1367         stats->ipackets = 0;
1368         stats->ibytes = 0;
1369         stats->opackets = 0;
1370         stats->obytes = 0;
1371
1372         tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_s2m_rings :
1373             pmd->run.num_m2s_rings;
1374         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1375             RTE_ETHDEV_QUEUE_STAT_CNTRS;
1376
1377         /* RX stats */
1378         for (i = 0; i < nq; i++) {
1379                 mq = dev->data->rx_queues[i];
1380                 stats->q_ipackets[i] = mq->n_pkts;
1381                 stats->q_ibytes[i] = mq->n_bytes;
1382                 stats->ipackets += mq->n_pkts;
1383                 stats->ibytes += mq->n_bytes;
1384         }
1385
1386         tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_m2s_rings :
1387             pmd->run.num_s2m_rings;
1388         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1389             RTE_ETHDEV_QUEUE_STAT_CNTRS;
1390
1391         /* TX stats */
1392         for (i = 0; i < nq; i++) {
1393                 mq = dev->data->tx_queues[i];
1394                 stats->q_opackets[i] = mq->n_pkts;
1395                 stats->q_obytes[i] = mq->n_bytes;
1396                 stats->opackets += mq->n_pkts;
1397                 stats->obytes += mq->n_bytes;
1398         }
1399         return 0;
1400 }
1401
1402 static int
1403 memif_stats_reset(struct rte_eth_dev *dev)
1404 {
1405         struct pmd_internals *pmd = dev->data->dev_private;
1406         int i;
1407         struct memif_queue *mq;
1408
1409         for (i = 0; i < pmd->run.num_s2m_rings; i++) {
1410                 mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->tx_queues[i] :
1411                     dev->data->rx_queues[i];
1412                 mq->n_pkts = 0;
1413                 mq->n_bytes = 0;
1414         }
1415         for (i = 0; i < pmd->run.num_m2s_rings; i++) {
1416                 mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->rx_queues[i] :
1417                     dev->data->tx_queues[i];
1418                 mq->n_pkts = 0;
1419                 mq->n_bytes = 0;
1420         }
1421
1422         return 0;
1423 }
1424
1425 static int
1426 memif_rx_queue_intr_enable(struct rte_eth_dev *dev __rte_unused,
1427                            uint16_t qid __rte_unused)
1428 {
1429         MIF_LOG(WARNING, "Interrupt mode not supported.");
1430
1431         return -1;
1432 }
1433
1434 static int
1435 memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
1436 {
1437         struct pmd_internals *pmd __rte_unused = dev->data->dev_private;
1438
1439         return 0;
1440 }
1441
1442 static const struct eth_dev_ops ops = {
1443         .dev_start = memif_dev_start,
1444         .dev_close = memif_dev_close,
1445         .dev_infos_get = memif_dev_info,
1446         .dev_configure = memif_dev_configure,
1447         .tx_queue_setup = memif_tx_queue_setup,
1448         .rx_queue_setup = memif_rx_queue_setup,
1449         .rx_queue_release = memif_queue_release,
1450         .tx_queue_release = memif_queue_release,
1451         .rx_queue_intr_enable = memif_rx_queue_intr_enable,
1452         .rx_queue_intr_disable = memif_rx_queue_intr_disable,
1453         .link_update = memif_link_update,
1454         .stats_get = memif_stats_get,
1455         .stats_reset = memif_stats_reset,
1456 };
1457
1458 static int
1459 memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
1460              memif_interface_id_t id, uint32_t flags,
1461              const char *socket_filename,
1462              memif_log2_ring_size_t log2_ring_size,
1463              uint16_t pkt_buffer_size, const char *secret,
1464              struct rte_ether_addr *ether_addr)
1465 {
1466         int ret = 0;
1467         struct rte_eth_dev *eth_dev;
1468         struct rte_eth_dev_data *data;
1469         struct pmd_internals *pmd;
1470         struct pmd_process_private *process_private;
1471         const unsigned int numa_node = vdev->device.numa_node;
1472         const char *name = rte_vdev_device_name(vdev);
1473
1474         eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
1475         if (eth_dev == NULL) {
1476                 MIF_LOG(ERR, "%s: Unable to allocate device struct.", name);
1477                 return -1;
1478         }
1479
1480         process_private = (struct pmd_process_private *)
1481                 rte_zmalloc(name, sizeof(struct pmd_process_private),
1482                             RTE_CACHE_LINE_SIZE);
1483
1484         if (process_private == NULL) {
1485                 MIF_LOG(ERR, "Failed to alloc memory for process private");
1486                 return -1;
1487         }
1488         eth_dev->process_private = process_private;
1489
1490         pmd = eth_dev->data->dev_private;
1491         memset(pmd, 0, sizeof(*pmd));
1492
1493         pmd->id = id;
1494         pmd->flags = flags;
1495         pmd->flags |= ETH_MEMIF_FLAG_DISABLED;
1496         pmd->role = role;
1497         /* Zero-copy flag irelevant to master. */
1498         if (pmd->role == MEMIF_ROLE_MASTER)
1499                 pmd->flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1500
1501         ret = memif_socket_init(eth_dev, socket_filename);
1502         if (ret < 0)
1503                 return ret;
1504
1505         memset(pmd->secret, 0, sizeof(char) * ETH_MEMIF_SECRET_SIZE);
1506         if (secret != NULL)
1507                 strlcpy(pmd->secret, secret, sizeof(pmd->secret));
1508
1509         pmd->cfg.log2_ring_size = log2_ring_size;
1510         /* set in .dev_configure() */
1511         pmd->cfg.num_s2m_rings = 0;
1512         pmd->cfg.num_m2s_rings = 0;
1513
1514         pmd->cfg.pkt_buffer_size = pkt_buffer_size;
1515         rte_spinlock_init(&pmd->cc_lock);
1516
1517         data = eth_dev->data;
1518         data->dev_private = pmd;
1519         data->numa_node = numa_node;
1520         data->dev_link = pmd_link;
1521         data->mac_addrs = ether_addr;
1522         data->promiscuous = 1;
1523
1524         eth_dev->dev_ops = &ops;
1525         eth_dev->device = &vdev->device;
1526         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1527                 eth_dev->rx_pkt_burst = eth_memif_rx_zc;
1528                 eth_dev->tx_pkt_burst = eth_memif_tx_zc;
1529         } else {
1530                 eth_dev->rx_pkt_burst = eth_memif_rx;
1531                 eth_dev->tx_pkt_burst = eth_memif_tx;
1532         }
1533
1534         rte_eth_dev_probing_finish(eth_dev);
1535
1536         return 0;
1537 }
1538
1539 static int
1540 memif_set_role(const char *key __rte_unused, const char *value,
1541                void *extra_args)
1542 {
1543         enum memif_role_t *role = (enum memif_role_t *)extra_args;
1544
1545         if (strstr(value, "master") != NULL) {
1546                 *role = MEMIF_ROLE_MASTER;
1547         } else if (strstr(value, "slave") != NULL) {
1548                 *role = MEMIF_ROLE_SLAVE;
1549         } else {
1550                 MIF_LOG(ERR, "Unknown role: %s.", value);
1551                 return -EINVAL;
1552         }
1553         return 0;
1554 }
1555
1556 static int
1557 memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args)
1558 {
1559         uint32_t *flags = (uint32_t *)extra_args;
1560
1561         if (strstr(value, "yes") != NULL) {
1562                 if (!rte_mcfg_get_single_file_segments()) {
1563                         MIF_LOG(ERR, "Zero-copy doesn't support multi-file segments.");
1564                         return -ENOTSUP;
1565                 }
1566                 *flags |= ETH_MEMIF_FLAG_ZERO_COPY;
1567         } else if (strstr(value, "no") != NULL) {
1568                 *flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1569         } else {
1570                 MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value);
1571                 return -EINVAL;
1572         }
1573         return 0;
1574 }
1575
1576 static int
1577 memif_set_id(const char *key __rte_unused, const char *value, void *extra_args)
1578 {
1579         memif_interface_id_t *id = (memif_interface_id_t *)extra_args;
1580
1581         /* even if parsing fails, 0 is a valid id */
1582         *id = strtoul(value, NULL, 10);
1583         return 0;
1584 }
1585
1586 static int
1587 memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args)
1588 {
1589         unsigned long tmp;
1590         uint16_t *pkt_buffer_size = (uint16_t *)extra_args;
1591
1592         tmp = strtoul(value, NULL, 10);
1593         if (tmp == 0 || tmp > 0xFFFF) {
1594                 MIF_LOG(ERR, "Invalid buffer size: %s.", value);
1595                 return -EINVAL;
1596         }
1597         *pkt_buffer_size = tmp;
1598         return 0;
1599 }
1600
1601 static int
1602 memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args)
1603 {
1604         unsigned long tmp;
1605         memif_log2_ring_size_t *log2_ring_size =
1606             (memif_log2_ring_size_t *)extra_args;
1607
1608         tmp = strtoul(value, NULL, 10);
1609         if (tmp == 0 || tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE) {
1610                 MIF_LOG(ERR, "Invalid ring size: %s (max %u).",
1611                         value, ETH_MEMIF_MAX_LOG2_RING_SIZE);
1612                 return -EINVAL;
1613         }
1614         *log2_ring_size = tmp;
1615         return 0;
1616 }
1617
1618 /* check if directory exists and if we have permission to read/write */
1619 static int
1620 memif_check_socket_filename(const char *filename)
1621 {
1622         char *dir = NULL, *tmp;
1623         uint32_t idx;
1624         int ret = 0;
1625
1626         if (strlen(filename) >= MEMIF_SOCKET_UN_SIZE) {
1627                 MIF_LOG(ERR, "Unix socket address too long (max 108).");
1628                 return -1;
1629         }
1630
1631         tmp = strrchr(filename, '/');
1632         if (tmp != NULL) {
1633                 idx = tmp - filename;
1634                 dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 1), 0);
1635                 if (dir == NULL) {
1636                         MIF_LOG(ERR, "Failed to allocate memory.");
1637                         return -1;
1638                 }
1639                 strlcpy(dir, filename, sizeof(char) * (idx + 1));
1640         }
1641
1642         if (dir == NULL || (faccessat(-1, dir, F_OK | R_OK |
1643                                         W_OK, AT_EACCESS) < 0)) {
1644                 MIF_LOG(ERR, "Invalid socket directory.");
1645                 ret = -EINVAL;
1646         }
1647
1648         if (dir != NULL)
1649                 rte_free(dir);
1650
1651         return ret;
1652 }
1653
1654 static int
1655 memif_set_socket_filename(const char *key __rte_unused, const char *value,
1656                           void *extra_args)
1657 {
1658         const char **socket_filename = (const char **)extra_args;
1659
1660         *socket_filename = value;
1661         return memif_check_socket_filename(*socket_filename);
1662 }
1663
1664 static int
1665 memif_set_mac(const char *key __rte_unused, const char *value, void *extra_args)
1666 {
1667         struct rte_ether_addr *ether_addr = (struct rte_ether_addr *)extra_args;
1668
1669         if (rte_ether_unformat_addr(value, ether_addr) < 0)
1670                 MIF_LOG(WARNING, "Failed to parse mac '%s'.", value);
1671         return 0;
1672 }
1673
1674 static int
1675 memif_set_secret(const char *key __rte_unused, const char *value, void *extra_args)
1676 {
1677         const char **secret = (const char **)extra_args;
1678
1679         *secret = value;
1680         return 0;
1681 }
1682
1683 static int
1684 rte_pmd_memif_probe(struct rte_vdev_device *vdev)
1685 {
1686         RTE_BUILD_BUG_ON(sizeof(memif_msg_t) != 128);
1687         RTE_BUILD_BUG_ON(sizeof(memif_desc_t) != 16);
1688         int ret = 0;
1689         struct rte_kvargs *kvlist;
1690         const char *name = rte_vdev_device_name(vdev);
1691         enum memif_role_t role = MEMIF_ROLE_SLAVE;
1692         memif_interface_id_t id = 0;
1693         uint16_t pkt_buffer_size = ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE;
1694         memif_log2_ring_size_t log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE;
1695         const char *socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME;
1696         uint32_t flags = 0;
1697         const char *secret = NULL;
1698         struct rte_ether_addr *ether_addr = rte_zmalloc("",
1699                 sizeof(struct rte_ether_addr), 0);
1700         struct rte_eth_dev *eth_dev;
1701
1702         rte_eth_random_addr(ether_addr->addr_bytes);
1703
1704         MIF_LOG(INFO, "Initialize MEMIF: %s.", name);
1705
1706         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1707                 eth_dev = rte_eth_dev_attach_secondary(name);
1708                 if (!eth_dev) {
1709                         MIF_LOG(ERR, "Failed to probe %s", name);
1710                         return -1;
1711                 }
1712
1713                 eth_dev->dev_ops = &ops;
1714                 eth_dev->device = &vdev->device;
1715                 eth_dev->rx_pkt_burst = eth_memif_rx;
1716                 eth_dev->tx_pkt_burst = eth_memif_tx;
1717
1718                 if (!rte_eal_primary_proc_alive(NULL)) {
1719                         MIF_LOG(ERR, "Primary process is missing");
1720                         return -1;
1721                 }
1722
1723                 eth_dev->process_private = (struct pmd_process_private *)
1724                         rte_zmalloc(name,
1725                                 sizeof(struct pmd_process_private),
1726                                 RTE_CACHE_LINE_SIZE);
1727                 if (eth_dev->process_private == NULL) {
1728                         MIF_LOG(ERR,
1729                                 "Failed to alloc memory for process private");
1730                         return -1;
1731                 }
1732
1733                 rte_eth_dev_probing_finish(eth_dev);
1734
1735                 return 0;
1736         }
1737
1738         ret = rte_mp_action_register(MEMIF_MP_SEND_REGION, memif_mp_send_region);
1739         /*
1740          * Primary process can continue probing, but secondary process won't
1741          * be able to get memory regions information
1742          */
1743         if (ret < 0 && rte_errno != EEXIST)
1744                 MIF_LOG(WARNING, "Failed to register mp action callback: %s",
1745                         strerror(rte_errno));
1746
1747         kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments);
1748
1749         /* parse parameters */
1750         if (kvlist != NULL) {
1751                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG,
1752                                          &memif_set_role, &role);
1753                 if (ret < 0)
1754                         goto exit;
1755                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG,
1756                                          &memif_set_id, &id);
1757                 if (ret < 0)
1758                         goto exit;
1759                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
1760                                          &memif_set_bs, &pkt_buffer_size);
1761                 if (ret < 0)
1762                         goto exit;
1763                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG,
1764                                          &memif_set_rs, &log2_ring_size);
1765                 if (ret < 0)
1766                         goto exit;
1767                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ARG,
1768                                          &memif_set_socket_filename,
1769                                          (void *)(&socket_filename));
1770                 if (ret < 0)
1771                         goto exit;
1772                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_MAC_ARG,
1773                                          &memif_set_mac, ether_addr);
1774                 if (ret < 0)
1775                         goto exit;
1776                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG,
1777                                          &memif_set_zc, &flags);
1778                 if (ret < 0)
1779                         goto exit;
1780                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SECRET_ARG,
1781                                          &memif_set_secret, (void *)(&secret));
1782                 if (ret < 0)
1783                         goto exit;
1784         }
1785
1786         /* create interface */
1787         ret = memif_create(vdev, role, id, flags, socket_filename,
1788                            log2_ring_size, pkt_buffer_size, secret, ether_addr);
1789
1790 exit:
1791         if (kvlist != NULL)
1792                 rte_kvargs_free(kvlist);
1793         return ret;
1794 }
1795
1796 static int
1797 rte_pmd_memif_remove(struct rte_vdev_device *vdev)
1798 {
1799         struct rte_eth_dev *eth_dev;
1800
1801         eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
1802         if (eth_dev == NULL)
1803                 return 0;
1804
1805         rte_eth_dev_close(eth_dev->data->port_id);
1806
1807         return 0;
1808 }
1809
1810 static struct rte_vdev_driver pmd_memif_drv = {
1811         .probe = rte_pmd_memif_probe,
1812         .remove = rte_pmd_memif_remove,
1813 };
1814
1815 RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv);
1816
1817 RTE_PMD_REGISTER_PARAM_STRING(net_memif,
1818                               ETH_MEMIF_ID_ARG "=<int>"
1819                               ETH_MEMIF_ROLE_ARG "=master|slave"
1820                               ETH_MEMIF_PKT_BUFFER_SIZE_ARG "=<int>"
1821                               ETH_MEMIF_RING_SIZE_ARG "=<int>"
1822                               ETH_MEMIF_SOCKET_ARG "=<string>"
1823                               ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx"
1824                               ETH_MEMIF_ZC_ARG "=yes|no"
1825                               ETH_MEMIF_SECRET_ARG "=<string>");
1826
1827 RTE_LOG_REGISTER(memif_logtype, pmd.net.memif, NOTICE);