net/bnxt: fix using RSS config struct
[dpdk.git] / drivers / net / memif / rte_eth_memif.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
3  */
4
5 #include <stdlib.h>
6 #include <fcntl.h>
7 #include <unistd.h>
8 #include <sys/types.h>
9 #include <sys/socket.h>
10 #include <sys/un.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13 #include <linux/if_ether.h>
14 #include <errno.h>
15 #include <sys/eventfd.h>
16
17 #include <rte_version.h>
18 #include <rte_mbuf.h>
19 #include <rte_ether.h>
20 #include <rte_ethdev_driver.h>
21 #include <rte_ethdev_vdev.h>
22 #include <rte_malloc.h>
23 #include <rte_kvargs.h>
24 #include <rte_bus_vdev.h>
25 #include <rte_string_fns.h>
26 #include <rte_errno.h>
27 #include <rte_memory.h>
28 #include <rte_memzone.h>
29 #include <rte_eal_memconfig.h>
30
31 #include "rte_eth_memif.h"
32 #include "memif_socket.h"
33
34 #define ETH_MEMIF_ID_ARG                "id"
35 #define ETH_MEMIF_ROLE_ARG              "role"
36 #define ETH_MEMIF_PKT_BUFFER_SIZE_ARG   "bsize"
37 #define ETH_MEMIF_RING_SIZE_ARG         "rsize"
38 #define ETH_MEMIF_SOCKET_ARG            "socket"
39 #define ETH_MEMIF_MAC_ARG               "mac"
40 #define ETH_MEMIF_ZC_ARG                "zero-copy"
41 #define ETH_MEMIF_SECRET_ARG            "secret"
42
43 static const char * const valid_arguments[] = {
44         ETH_MEMIF_ID_ARG,
45         ETH_MEMIF_ROLE_ARG,
46         ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
47         ETH_MEMIF_RING_SIZE_ARG,
48         ETH_MEMIF_SOCKET_ARG,
49         ETH_MEMIF_MAC_ARG,
50         ETH_MEMIF_ZC_ARG,
51         ETH_MEMIF_SECRET_ARG,
52         NULL
53 };
54
55 static const struct rte_eth_link pmd_link = {
56         .link_speed = ETH_SPEED_NUM_10G,
57         .link_duplex = ETH_LINK_FULL_DUPLEX,
58         .link_status = ETH_LINK_DOWN,
59         .link_autoneg = ETH_LINK_AUTONEG
60 };
61
62 #define MEMIF_MP_SEND_REGION            "memif_mp_send_region"
63
64
65 static int memif_region_init_zc(const struct rte_memseg_list *msl,
66                                 const struct rte_memseg *ms, void *arg);
67
68 const char *
69 memif_version(void)
70 {
71         return ("memif-" RTE_STR(MEMIF_VERSION_MAJOR) "." RTE_STR(MEMIF_VERSION_MINOR));
72 }
73
74 /* Message header to synchronize regions */
75 struct mp_region_msg {
76         char port_name[RTE_DEV_NAME_MAX_LEN];
77         memif_region_index_t idx;
78         memif_region_size_t size;
79 };
80
81 static int
82 memif_mp_send_region(const struct rte_mp_msg *msg, const void *peer)
83 {
84         struct rte_eth_dev *dev;
85         struct pmd_process_private *proc_private;
86         const struct mp_region_msg *msg_param = (const struct mp_region_msg *)msg->param;
87         struct rte_mp_msg reply;
88         struct mp_region_msg *reply_param = (struct mp_region_msg *)reply.param;
89         uint16_t port_id;
90         int ret;
91
92         /* Get requested port */
93         ret = rte_eth_dev_get_port_by_name(msg_param->port_name, &port_id);
94         if (ret) {
95                 MIF_LOG(ERR, "Failed to get port id for %s",
96                         msg_param->port_name);
97                 return -1;
98         }
99         dev = &rte_eth_devices[port_id];
100         proc_private = dev->process_private;
101
102         memset(&reply, 0, sizeof(reply));
103         strlcpy(reply.name, msg->name, sizeof(reply.name));
104         reply_param->idx = msg_param->idx;
105         if (proc_private->regions[msg_param->idx] != NULL) {
106                 reply_param->size = proc_private->regions[msg_param->idx]->region_size;
107                 reply.fds[0] = proc_private->regions[msg_param->idx]->fd;
108                 reply.num_fds = 1;
109         }
110         reply.len_param = sizeof(*reply_param);
111         if (rte_mp_reply(&reply, peer) < 0) {
112                 MIF_LOG(ERR, "Failed to reply to an add region request");
113                 return -1;
114         }
115
116         return 0;
117 }
118
119 /*
120  * Request regions
121  * Called by secondary process, when ports link status goes up.
122  */
123 static int
124 memif_mp_request_regions(struct rte_eth_dev *dev)
125 {
126         int ret, i;
127         struct timespec timeout = {.tv_sec = 5, .tv_nsec = 0};
128         struct rte_mp_msg msg, *reply;
129         struct rte_mp_reply replies;
130         struct mp_region_msg *msg_param = (struct mp_region_msg *)msg.param;
131         struct mp_region_msg *reply_param;
132         struct memif_region *r;
133         struct pmd_process_private *proc_private = dev->process_private;
134         struct pmd_internals *pmd = dev->data->dev_private;
135         /* in case of zero-copy slave, only request region 0 */
136         uint16_t max_region_num = (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) ?
137                                    1 : ETH_MEMIF_MAX_REGION_NUM;
138
139         MIF_LOG(DEBUG, "Requesting memory regions");
140
141         for (i = 0; i < max_region_num; i++) {
142                 /* Prepare the message */
143                 memset(&msg, 0, sizeof(msg));
144                 strlcpy(msg.name, MEMIF_MP_SEND_REGION, sizeof(msg.name));
145                 strlcpy(msg_param->port_name, dev->data->name,
146                         sizeof(msg_param->port_name));
147                 msg_param->idx = i;
148                 msg.len_param = sizeof(*msg_param);
149
150                 /* Send message */
151                 ret = rte_mp_request_sync(&msg, &replies, &timeout);
152                 if (ret < 0 || replies.nb_received != 1) {
153                         MIF_LOG(ERR, "Failed to send mp msg: %d",
154                                 rte_errno);
155                         return -1;
156                 }
157
158                 reply = &replies.msgs[0];
159                 reply_param = (struct mp_region_msg *)reply->param;
160
161                 if (reply_param->size > 0) {
162                         r = rte_zmalloc("region", sizeof(struct memif_region), 0);
163                         if (r == NULL) {
164                                 MIF_LOG(ERR, "Failed to alloc memif region.");
165                                 free(reply);
166                                 return -ENOMEM;
167                         }
168                         r->region_size = reply_param->size;
169                         if (reply->num_fds < 1) {
170                                 MIF_LOG(ERR, "Missing file descriptor.");
171                                 free(reply);
172                                 return -1;
173                         }
174                         r->fd = reply->fds[0];
175                         r->addr = NULL;
176
177                         proc_private->regions[reply_param->idx] = r;
178                         proc_private->regions_num++;
179                 }
180                 free(reply);
181         }
182
183         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
184                 ret = rte_memseg_walk(memif_region_init_zc, (void *)proc_private);
185                 if (ret < 0)
186                         return ret;
187         }
188
189         return memif_connect(dev);
190 }
191
192 static int
193 memif_dev_info(struct rte_eth_dev *dev __rte_unused, struct rte_eth_dev_info *dev_info)
194 {
195         dev_info->max_mac_addrs = 1;
196         dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN;
197         dev_info->max_rx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
198         dev_info->max_tx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
199         dev_info->min_rx_bufsize = 0;
200
201         return 0;
202 }
203
204 static memif_ring_t *
205 memif_get_ring(struct pmd_internals *pmd, struct pmd_process_private *proc_private,
206                memif_ring_type_t type, uint16_t ring_num)
207 {
208         /* rings only in region 0 */
209         void *p = proc_private->regions[0]->addr;
210         int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) *
211             (1 << pmd->run.log2_ring_size);
212
213         p = (uint8_t *)p + (ring_num + type * pmd->run.num_s2m_rings) * ring_size;
214
215         return (memif_ring_t *)p;
216 }
217
218 static memif_region_offset_t
219 memif_get_ring_offset(struct rte_eth_dev *dev, struct memif_queue *mq,
220                       memif_ring_type_t type, uint16_t num)
221 {
222         struct pmd_internals *pmd = dev->data->dev_private;
223         struct pmd_process_private *proc_private = dev->process_private;
224
225         return ((uint8_t *)memif_get_ring(pmd, proc_private, type, num) -
226                 (uint8_t *)proc_private->regions[mq->region]->addr);
227 }
228
229 static memif_ring_t *
230 memif_get_ring_from_queue(struct pmd_process_private *proc_private,
231                           struct memif_queue *mq)
232 {
233         struct memif_region *r;
234
235         r = proc_private->regions[mq->region];
236         if (r == NULL)
237                 return NULL;
238
239         return (memif_ring_t *)((uint8_t *)r->addr + mq->ring_offset);
240 }
241
242 static void *
243 memif_get_buffer(struct pmd_process_private *proc_private, memif_desc_t *d)
244 {
245         return ((uint8_t *)proc_private->regions[d->region]->addr + d->offset);
246 }
247
248 /* Free mbufs received by master */
249 static void
250 memif_free_stored_mbufs(struct pmd_process_private *proc_private, struct memif_queue *mq)
251 {
252         uint16_t mask = (1 << mq->log2_ring_size) - 1;
253         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
254
255         /* FIXME: improve performance */
256         while (mq->last_tail != ring->tail) {
257                 RTE_MBUF_PREFETCH_TO_FREE(mq->buffers[(mq->last_tail + 1) & mask]);
258                 /* Decrement refcnt and free mbuf. (current segment) */
259                 rte_mbuf_refcnt_update(mq->buffers[mq->last_tail & mask], -1);
260                 rte_pktmbuf_free_seg(mq->buffers[mq->last_tail & mask]);
261                 mq->last_tail++;
262         }
263 }
264
265 static int
266 memif_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *cur_tail,
267                     struct rte_mbuf *tail)
268 {
269         /* Check for number-of-segments-overflow */
270         if (unlikely(head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS))
271                 return -EOVERFLOW;
272
273         /* Chain 'tail' onto the old tail */
274         cur_tail->next = tail;
275
276         /* accumulate number of segments and total length. */
277         head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs);
278
279         tail->pkt_len = tail->data_len;
280         head->pkt_len += tail->pkt_len;
281
282         return 0;
283 }
284
285 static uint16_t
286 eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
287 {
288         struct memif_queue *mq = queue;
289         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
290         struct pmd_process_private *proc_private =
291                 rte_eth_devices[mq->in_port].process_private;
292         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
293         uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0;
294         uint16_t n_rx_pkts = 0;
295         uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) -
296                 RTE_PKTMBUF_HEADROOM;
297         uint16_t src_len, src_off, dst_len, dst_off, cp_len;
298         memif_ring_type_t type = mq->type;
299         memif_desc_t *d0;
300         struct rte_mbuf *mbuf, *mbuf_head, *mbuf_tail;
301         uint64_t b;
302         ssize_t size __rte_unused;
303         uint16_t head;
304         int ret;
305         struct rte_eth_link link;
306
307         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
308                 return 0;
309         if (unlikely(ring == NULL)) {
310                 /* Secondary process will attempt to request regions. */
311                 ret = rte_eth_link_get(mq->in_port, &link);
312                 if (ret < 0)
313                         MIF_LOG(ERR, "Failed to get port %u link info: %s",
314                                 mq->in_port, rte_strerror(-ret));
315                 return 0;
316         }
317
318         /* consume interrupt */
319         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0)
320                 size = read(mq->intr_handle.fd, &b, sizeof(b));
321
322         ring_size = 1 << mq->log2_ring_size;
323         mask = ring_size - 1;
324
325         if (type == MEMIF_RING_S2M) {
326                 cur_slot = mq->last_head;
327                 last_slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
328         } else {
329                 cur_slot = mq->last_tail;
330                 last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
331         }
332
333         if (cur_slot == last_slot)
334                 goto refill;
335         n_slots = last_slot - cur_slot;
336
337         while (n_slots && n_rx_pkts < nb_pkts) {
338                 mbuf_head = rte_pktmbuf_alloc(mq->mempool);
339                 if (unlikely(mbuf_head == NULL))
340                         goto no_free_bufs;
341                 mbuf = mbuf_head;
342                 mbuf->port = mq->in_port;
343
344 next_slot:
345                 s0 = cur_slot & mask;
346                 d0 = &ring->desc[s0];
347
348                 src_len = d0->length;
349                 dst_off = 0;
350                 src_off = 0;
351
352                 do {
353                         dst_len = mbuf_size - dst_off;
354                         if (dst_len == 0) {
355                                 dst_off = 0;
356                                 dst_len = mbuf_size;
357
358                                 /* store pointer to tail */
359                                 mbuf_tail = mbuf;
360                                 mbuf = rte_pktmbuf_alloc(mq->mempool);
361                                 if (unlikely(mbuf == NULL))
362                                         goto no_free_bufs;
363                                 mbuf->port = mq->in_port;
364                                 ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
365                                 if (unlikely(ret < 0)) {
366                                         MIF_LOG(ERR, "number-of-segments-overflow");
367                                         rte_pktmbuf_free(mbuf);
368                                         goto no_free_bufs;
369                                 }
370                         }
371                         cp_len = RTE_MIN(dst_len, src_len);
372
373                         rte_pktmbuf_data_len(mbuf) += cp_len;
374                         rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
375                         if (mbuf != mbuf_head)
376                                 rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
377
378                         memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, dst_off),
379                                (uint8_t *)memif_get_buffer(proc_private, d0) + src_off,
380                                cp_len);
381
382                         src_off += cp_len;
383                         dst_off += cp_len;
384                         src_len -= cp_len;
385                 } while (src_len);
386
387                 cur_slot++;
388                 n_slots--;
389
390                 if (d0->flags & MEMIF_DESC_FLAG_NEXT)
391                         goto next_slot;
392
393                 mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
394                 *bufs++ = mbuf_head;
395                 n_rx_pkts++;
396         }
397
398 no_free_bufs:
399         if (type == MEMIF_RING_S2M) {
400                 __atomic_store_n(&ring->tail, cur_slot, __ATOMIC_RELEASE);
401                 mq->last_head = cur_slot;
402         } else {
403                 mq->last_tail = cur_slot;
404         }
405
406 refill:
407         if (type == MEMIF_RING_M2S) {
408                 head = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
409                 n_slots = ring_size - head + mq->last_tail;
410
411                 while (n_slots--) {
412                         s0 = head++ & mask;
413                         d0 = &ring->desc[s0];
414                         d0->length = pmd->run.pkt_buffer_size;
415                 }
416                 __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
417         }
418
419         mq->n_pkts += n_rx_pkts;
420         return n_rx_pkts;
421 }
422
423 static uint16_t
424 eth_memif_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
425 {
426         struct memif_queue *mq = queue;
427         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
428         struct pmd_process_private *proc_private =
429                 rte_eth_devices[mq->in_port].process_private;
430         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
431         uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0, head;
432         uint16_t n_rx_pkts = 0;
433         memif_desc_t *d0;
434         struct rte_mbuf *mbuf, *mbuf_tail;
435         struct rte_mbuf *mbuf_head = NULL;
436         int ret;
437         struct rte_eth_link link;
438
439         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
440                 return 0;
441         if (unlikely(ring == NULL)) {
442                 /* Secondary process will attempt to request regions. */
443                 rte_eth_link_get(mq->in_port, &link);
444                 return 0;
445         }
446
447         /* consume interrupt */
448         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
449                 uint64_t b;
450                 ssize_t size __rte_unused;
451                 size = read(mq->intr_handle.fd, &b, sizeof(b));
452         }
453
454         ring_size = 1 << mq->log2_ring_size;
455         mask = ring_size - 1;
456
457         cur_slot = mq->last_tail;
458         last_slot = ring->tail;
459         if (cur_slot == last_slot)
460                 goto refill;
461         n_slots = last_slot - cur_slot;
462
463         while (n_slots && n_rx_pkts < nb_pkts) {
464                 s0 = cur_slot & mask;
465
466                 d0 = &ring->desc[s0];
467                 mbuf_head = mq->buffers[s0];
468                 mbuf = mbuf_head;
469
470 next_slot:
471                 /* prefetch next descriptor */
472                 if (n_rx_pkts + 1 < nb_pkts)
473                         rte_prefetch0(&ring->desc[(cur_slot + 1) & mask]);
474
475                 mbuf->port = mq->in_port;
476                 rte_pktmbuf_data_len(mbuf) = d0->length;
477                 rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
478
479                 mq->n_bytes += rte_pktmbuf_data_len(mbuf);
480
481                 cur_slot++;
482                 n_slots--;
483                 if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
484                         s0 = cur_slot & mask;
485                         d0 = &ring->desc[s0];
486                         mbuf_tail = mbuf;
487                         mbuf = mq->buffers[s0];
488                         ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
489                         if (unlikely(ret < 0)) {
490                                 MIF_LOG(ERR, "number-of-segments-overflow");
491                                 goto refill;
492                         }
493                         goto next_slot;
494                 }
495
496                 *bufs++ = mbuf_head;
497                 n_rx_pkts++;
498         }
499
500         mq->last_tail = cur_slot;
501
502 /* Supply master with new buffers */
503 refill:
504         head = ring->head;
505         n_slots = ring_size - head + mq->last_tail;
506
507         if (n_slots < 32)
508                 goto no_free_mbufs;
509
510         ret = rte_pktmbuf_alloc_bulk(mq->mempool, &mq->buffers[head & mask], n_slots);
511         if (unlikely(ret < 0))
512                 goto no_free_mbufs;
513
514         while (n_slots--) {
515                 s0 = head++ & mask;
516                 if (n_slots > 0)
517                         rte_prefetch0(mq->buffers[head & mask]);
518                 d0 = &ring->desc[s0];
519                 /* store buffer header */
520                 mbuf = mq->buffers[s0];
521                 /* populate descriptor */
522                 d0->length = rte_pktmbuf_data_room_size(mq->mempool) -
523                                 RTE_PKTMBUF_HEADROOM;
524                 d0->region = 1;
525                 d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
526                         (uint8_t *)proc_private->regions[d0->region]->addr;
527         }
528 no_free_mbufs:
529         rte_mb();
530         ring->head = head;
531
532         mq->n_pkts += n_rx_pkts;
533
534         return n_rx_pkts;
535 }
536
537 static uint16_t
538 eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
539 {
540         struct memif_queue *mq = queue;
541         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
542         struct pmd_process_private *proc_private =
543                 rte_eth_devices[mq->in_port].process_private;
544         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
545         uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0;
546         uint16_t src_len, src_off, dst_len, dst_off, cp_len;
547         memif_ring_type_t type = mq->type;
548         memif_desc_t *d0;
549         struct rte_mbuf *mbuf;
550         struct rte_mbuf *mbuf_head;
551         uint64_t a;
552         ssize_t size;
553         struct rte_eth_link link;
554
555         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
556                 return 0;
557         if (unlikely(ring == NULL)) {
558                 int ret;
559
560                 /* Secondary process will attempt to request regions. */
561                 ret = rte_eth_link_get(mq->in_port, &link);
562                 if (ret < 0)
563                         MIF_LOG(ERR, "Failed to get port %u link info: %s",
564                                 mq->in_port, rte_strerror(-ret));
565                 return 0;
566         }
567
568         ring_size = 1 << mq->log2_ring_size;
569         mask = ring_size - 1;
570
571         n_free = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE) - mq->last_tail;
572         mq->last_tail += n_free;
573
574         if (type == MEMIF_RING_S2M) {
575                 slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
576                 n_free = ring_size - slot + mq->last_tail;
577         } else {
578                 slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
579                 n_free = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE) - slot;
580         }
581
582         while (n_tx_pkts < nb_pkts && n_free) {
583                 mbuf_head = *bufs++;
584                 mbuf = mbuf_head;
585
586                 saved_slot = slot;
587                 d0 = &ring->desc[slot & mask];
588                 dst_off = 0;
589                 dst_len = (type == MEMIF_RING_S2M) ?
590                         pmd->run.pkt_buffer_size : d0->length;
591
592 next_in_chain:
593                 src_off = 0;
594                 src_len = rte_pktmbuf_data_len(mbuf);
595
596                 while (src_len) {
597                         if (dst_len == 0) {
598                                 if (n_free) {
599                                         slot++;
600                                         n_free--;
601                                         d0->flags |= MEMIF_DESC_FLAG_NEXT;
602                                         d0 = &ring->desc[slot & mask];
603                                         dst_off = 0;
604                                         dst_len = (type == MEMIF_RING_S2M) ?
605                                             pmd->run.pkt_buffer_size : d0->length;
606                                         d0->flags = 0;
607                                 } else {
608                                         slot = saved_slot;
609                                         goto no_free_slots;
610                                 }
611                         }
612                         cp_len = RTE_MIN(dst_len, src_len);
613
614                         memcpy((uint8_t *)memif_get_buffer(proc_private, d0) + dst_off,
615                                rte_pktmbuf_mtod_offset(mbuf, void *, src_off),
616                                cp_len);
617
618                         mq->n_bytes += cp_len;
619                         src_off += cp_len;
620                         dst_off += cp_len;
621                         src_len -= cp_len;
622                         dst_len -= cp_len;
623
624                         d0->length = dst_off;
625                 }
626
627                 if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
628                         mbuf = mbuf->next;
629                         goto next_in_chain;
630                 }
631
632                 n_tx_pkts++;
633                 slot++;
634                 n_free--;
635                 rte_pktmbuf_free(mbuf_head);
636         }
637
638 no_free_slots:
639         if (type == MEMIF_RING_S2M)
640                 __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
641         else
642                 __atomic_store_n(&ring->tail, slot, __ATOMIC_RELEASE);
643
644         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
645                 a = 1;
646                 size = write(mq->intr_handle.fd, &a, sizeof(a));
647                 if (unlikely(size < 0)) {
648                         MIF_LOG(WARNING,
649                                 "Failed to send interrupt. %s", strerror(errno));
650                 }
651         }
652
653         mq->n_pkts += n_tx_pkts;
654         return n_tx_pkts;
655 }
656
657
658 static int
659 memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq,
660                 memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask,
661                 uint16_t slot, uint16_t n_free)
662 {
663         memif_desc_t *d0;
664         int used_slots = 1;
665
666 next_in_chain:
667         /* store pointer to mbuf to free it later */
668         mq->buffers[slot & mask] = mbuf;
669         /* Increment refcnt to make sure the buffer is not freed before master
670          * receives it. (current segment)
671          */
672         rte_mbuf_refcnt_update(mbuf, 1);
673         /* populate descriptor */
674         d0 = &ring->desc[slot & mask];
675         d0->length = rte_pktmbuf_data_len(mbuf);
676         /* FIXME: get region index */
677         d0->region = 1;
678         d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
679                 (uint8_t *)proc_private->regions[d0->region]->addr;
680         d0->flags = 0;
681
682         /* check if buffer is chained */
683         if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
684                 if (n_free < 2)
685                         return 0;
686                 /* mark buffer as chained */
687                 d0->flags |= MEMIF_DESC_FLAG_NEXT;
688                 /* advance mbuf */
689                 mbuf = mbuf->next;
690                 /* update counters */
691                 used_slots++;
692                 slot++;
693                 n_free--;
694                 goto next_in_chain;
695         }
696         return used_slots;
697 }
698
699 static uint16_t
700 eth_memif_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
701 {
702         struct memif_queue *mq = queue;
703         struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
704         struct pmd_process_private *proc_private =
705                 rte_eth_devices[mq->in_port].process_private;
706         memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
707         uint16_t slot, n_free, ring_size, mask, n_tx_pkts = 0;
708         memif_ring_type_t type = mq->type;
709         struct rte_eth_link link;
710
711         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
712                 return 0;
713         if (unlikely(ring == NULL)) {
714                 /* Secondary process will attempt to request regions. */
715                 rte_eth_link_get(mq->in_port, &link);
716                 return 0;
717         }
718
719         ring_size = 1 << mq->log2_ring_size;
720         mask = ring_size - 1;
721
722         /* free mbufs received by master */
723         memif_free_stored_mbufs(proc_private, mq);
724
725         /* ring type always MEMIF_RING_S2M */
726         slot = ring->head;
727         n_free = ring_size - ring->head + mq->last_tail;
728
729         int used_slots;
730
731         while (n_free && (n_tx_pkts < nb_pkts)) {
732                 while ((n_free > 4) && ((nb_pkts - n_tx_pkts) > 4)) {
733                         if ((nb_pkts - n_tx_pkts) > 8) {
734                                 rte_prefetch0(*bufs + 4);
735                                 rte_prefetch0(*bufs + 5);
736                                 rte_prefetch0(*bufs + 6);
737                                 rte_prefetch0(*bufs + 7);
738                         }
739                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
740                                 mask, slot, n_free);
741                         if (unlikely(used_slots < 1))
742                                 goto no_free_slots;
743                         n_tx_pkts++;
744                         slot += used_slots;
745                         n_free -= used_slots;
746
747                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
748                                 mask, slot, n_free);
749                         if (unlikely(used_slots < 1))
750                                 goto no_free_slots;
751                         n_tx_pkts++;
752                         slot += used_slots;
753                         n_free -= used_slots;
754
755                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
756                                 mask, slot, n_free);
757                         if (unlikely(used_slots < 1))
758                                 goto no_free_slots;
759                         n_tx_pkts++;
760                         slot += used_slots;
761                         n_free -= used_slots;
762
763                         used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
764                                 mask, slot, n_free);
765                         if (unlikely(used_slots < 1))
766                                 goto no_free_slots;
767                         n_tx_pkts++;
768                         slot += used_slots;
769                         n_free -= used_slots;
770                 }
771                 used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
772                         mask, slot, n_free);
773                 if (unlikely(used_slots < 1))
774                         goto no_free_slots;
775                 n_tx_pkts++;
776                 slot += used_slots;
777                 n_free -= used_slots;
778         }
779
780 no_free_slots:
781         rte_mb();
782         /* update ring pointers */
783         if (type == MEMIF_RING_S2M)
784                 ring->head = slot;
785         else
786                 ring->tail = slot;
787
788         /* Send interrupt, if enabled. */
789         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
790                 uint64_t a = 1;
791                 ssize_t size = write(mq->intr_handle.fd, &a, sizeof(a));
792                 if (unlikely(size < 0)) {
793                         MIF_LOG(WARNING,
794                                 "Failed to send interrupt. %s", strerror(errno));
795                 }
796         }
797
798         /* increment queue counters */
799         mq->n_pkts += n_tx_pkts;
800
801         return n_tx_pkts;
802 }
803
804 void
805 memif_free_regions(struct rte_eth_dev *dev)
806 {
807         struct pmd_process_private *proc_private = dev->process_private;
808         struct pmd_internals *pmd = dev->data->dev_private;
809         int i;
810         struct memif_region *r;
811
812         /* regions are allocated contiguously, so it's
813          * enough to loop until 'proc_private->regions_num'
814          */
815         for (i = 0; i < proc_private->regions_num; i++) {
816                 r = proc_private->regions[i];
817                 if (r != NULL) {
818                         /* This is memzone */
819                         if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
820                                 r->addr = NULL;
821                                 if (r->fd > 0)
822                                         close(r->fd);
823                         }
824                         if (r->addr != NULL) {
825                                 munmap(r->addr, r->region_size);
826                                 if (r->fd > 0) {
827                                         close(r->fd);
828                                         r->fd = -1;
829                                 }
830                         }
831                         rte_free(r);
832                         proc_private->regions[i] = NULL;
833                 }
834         }
835         proc_private->regions_num = 0;
836 }
837
838 static int
839 memif_region_init_zc(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
840                      void *arg)
841 {
842         struct pmd_process_private *proc_private = (struct pmd_process_private *)arg;
843         struct memif_region *r;
844
845         if (proc_private->regions_num < 1) {
846                 MIF_LOG(ERR, "Missing descriptor region");
847                 return -1;
848         }
849
850         r = proc_private->regions[proc_private->regions_num - 1];
851
852         if (r->addr != msl->base_va)
853                 r = proc_private->regions[++proc_private->regions_num - 1];
854
855         if (r == NULL) {
856                 r = rte_zmalloc("region", sizeof(struct memif_region), 0);
857                 if (r == NULL) {
858                         MIF_LOG(ERR, "Failed to alloc memif region.");
859                         return -ENOMEM;
860                 }
861
862                 r->addr = msl->base_va;
863                 r->region_size = ms->len;
864                 r->fd = rte_memseg_get_fd(ms);
865                 if (r->fd < 0)
866                         return -1;
867                 r->pkt_buffer_offset = 0;
868
869                 proc_private->regions[proc_private->regions_num - 1] = r;
870         } else {
871                 r->region_size += ms->len;
872         }
873
874         return 0;
875 }
876
877 static int
878 memif_region_init_shm(struct rte_eth_dev *dev, uint8_t has_buffers)
879 {
880         struct pmd_internals *pmd = dev->data->dev_private;
881         struct pmd_process_private *proc_private = dev->process_private;
882         char shm_name[ETH_MEMIF_SHM_NAME_SIZE];
883         int ret = 0;
884         struct memif_region *r;
885
886         if (proc_private->regions_num >= ETH_MEMIF_MAX_REGION_NUM) {
887                 MIF_LOG(ERR, "Too many regions.");
888                 return -1;
889         }
890
891         r = rte_zmalloc("region", sizeof(struct memif_region), 0);
892         if (r == NULL) {
893                 MIF_LOG(ERR, "Failed to alloc memif region.");
894                 return -ENOMEM;
895         }
896
897         /* calculate buffer offset */
898         r->pkt_buffer_offset = (pmd->run.num_s2m_rings + pmd->run.num_m2s_rings) *
899             (sizeof(memif_ring_t) + sizeof(memif_desc_t) *
900             (1 << pmd->run.log2_ring_size));
901
902         r->region_size = r->pkt_buffer_offset;
903         /* if region has buffers, add buffers size to region_size */
904         if (has_buffers == 1)
905                 r->region_size += (uint32_t)(pmd->run.pkt_buffer_size *
906                         (1 << pmd->run.log2_ring_size) *
907                         (pmd->run.num_s2m_rings +
908                          pmd->run.num_m2s_rings));
909
910         memset(shm_name, 0, sizeof(char) * ETH_MEMIF_SHM_NAME_SIZE);
911         snprintf(shm_name, ETH_MEMIF_SHM_NAME_SIZE, "memif_region_%d",
912                  proc_private->regions_num);
913
914         r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING);
915         if (r->fd < 0) {
916                 MIF_LOG(ERR, "Failed to create shm file: %s.", strerror(errno));
917                 ret = -1;
918                 goto error;
919         }
920
921         ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK);
922         if (ret < 0) {
923                 MIF_LOG(ERR, "Failed to add seals to shm file: %s.", strerror(errno));
924                 goto error;
925         }
926
927         ret = ftruncate(r->fd, r->region_size);
928         if (ret < 0) {
929                 MIF_LOG(ERR, "Failed to truncate shm file: %s.", strerror(errno));
930                 goto error;
931         }
932
933         r->addr = mmap(NULL, r->region_size, PROT_READ |
934                        PROT_WRITE, MAP_SHARED, r->fd, 0);
935         if (r->addr == MAP_FAILED) {
936                 MIF_LOG(ERR, "Failed to mmap shm region: %s.", strerror(ret));
937                 ret = -1;
938                 goto error;
939         }
940
941         proc_private->regions[proc_private->regions_num] = r;
942         proc_private->regions_num++;
943
944         return ret;
945
946 error:
947         if (r->fd > 0)
948                 close(r->fd);
949         r->fd = -1;
950
951         return ret;
952 }
953
954 static int
955 memif_regions_init(struct rte_eth_dev *dev)
956 {
957         struct pmd_internals *pmd = dev->data->dev_private;
958         int ret;
959
960         /*
961          * Zero-copy exposes dpdk memory.
962          * Each memseg list will be represented by memif region.
963          * Zero-copy regions indexing: memseg list idx + 1,
964          * as we already have region 0 reserved for descriptors.
965          */
966         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
967                 /* create region idx 0 containing descriptors */
968                 ret = memif_region_init_shm(dev, 0);
969                 if (ret < 0)
970                         return ret;
971                 ret = rte_memseg_walk(memif_region_init_zc, (void *)dev->process_private);
972                 if (ret < 0)
973                         return ret;
974         } else {
975                 /* create one memory region contaning rings and buffers */
976                 ret = memif_region_init_shm(dev, /* has buffers */ 1);
977                 if (ret < 0)
978                         return ret;
979         }
980
981         return 0;
982 }
983
984 static void
985 memif_init_rings(struct rte_eth_dev *dev)
986 {
987         struct pmd_internals *pmd = dev->data->dev_private;
988         struct pmd_process_private *proc_private = dev->process_private;
989         memif_ring_t *ring;
990         int i, j;
991         uint16_t slot;
992
993         for (i = 0; i < pmd->run.num_s2m_rings; i++) {
994                 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_S2M, i);
995                 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
996                 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
997                 ring->cookie = MEMIF_COOKIE;
998                 ring->flags = 0;
999
1000                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1001                         continue;
1002
1003                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1004                         slot = i * (1 << pmd->run.log2_ring_size) + j;
1005                         ring->desc[j].region = 0;
1006                         ring->desc[j].offset =
1007                                 proc_private->regions[0]->pkt_buffer_offset +
1008                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
1009                         ring->desc[j].length = pmd->run.pkt_buffer_size;
1010                 }
1011         }
1012
1013         for (i = 0; i < pmd->run.num_m2s_rings; i++) {
1014                 ring = memif_get_ring(pmd, proc_private, MEMIF_RING_M2S, i);
1015                 __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1016                 __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1017                 ring->cookie = MEMIF_COOKIE;
1018                 ring->flags = 0;
1019
1020                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1021                         continue;
1022
1023                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1024                         slot = (i + pmd->run.num_s2m_rings) *
1025                             (1 << pmd->run.log2_ring_size) + j;
1026                         ring->desc[j].region = 0;
1027                         ring->desc[j].offset =
1028                                 proc_private->regions[0]->pkt_buffer_offset +
1029                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
1030                         ring->desc[j].length = pmd->run.pkt_buffer_size;
1031                 }
1032         }
1033 }
1034
1035 /* called only by slave */
1036 static int
1037 memif_init_queues(struct rte_eth_dev *dev)
1038 {
1039         struct pmd_internals *pmd = dev->data->dev_private;
1040         struct memif_queue *mq;
1041         int i;
1042
1043         for (i = 0; i < pmd->run.num_s2m_rings; i++) {
1044                 mq = dev->data->tx_queues[i];
1045                 mq->log2_ring_size = pmd->run.log2_ring_size;
1046                 /* queues located only in region 0 */
1047                 mq->region = 0;
1048                 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_S2M, i);
1049                 mq->last_head = 0;
1050                 mq->last_tail = 0;
1051                 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1052                 if (mq->intr_handle.fd < 0) {
1053                         MIF_LOG(WARNING,
1054                                 "Failed to create eventfd for tx queue %d: %s.", i,
1055                                 strerror(errno));
1056                 }
1057                 mq->buffers = NULL;
1058                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1059                         mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1060                                                   (1 << mq->log2_ring_size), 0);
1061                         if (mq->buffers == NULL)
1062                                 return -ENOMEM;
1063                 }
1064         }
1065
1066         for (i = 0; i < pmd->run.num_m2s_rings; i++) {
1067                 mq = dev->data->rx_queues[i];
1068                 mq->log2_ring_size = pmd->run.log2_ring_size;
1069                 /* queues located only in region 0 */
1070                 mq->region = 0;
1071                 mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_M2S, i);
1072                 mq->last_head = 0;
1073                 mq->last_tail = 0;
1074                 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1075                 if (mq->intr_handle.fd < 0) {
1076                         MIF_LOG(WARNING,
1077                                 "Failed to create eventfd for rx queue %d: %s.", i,
1078                                 strerror(errno));
1079                 }
1080                 mq->buffers = NULL;
1081                 if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1082                         mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1083                                                   (1 << mq->log2_ring_size), 0);
1084                         if (mq->buffers == NULL)
1085                                 return -ENOMEM;
1086                 }
1087         }
1088         return 0;
1089 }
1090
1091 int
1092 memif_init_regions_and_queues(struct rte_eth_dev *dev)
1093 {
1094         int ret;
1095
1096         ret = memif_regions_init(dev);
1097         if (ret < 0)
1098                 return ret;
1099
1100         memif_init_rings(dev);
1101
1102         ret = memif_init_queues(dev);
1103         if (ret < 0)
1104                 return ret;
1105
1106         return 0;
1107 }
1108
1109 int
1110 memif_connect(struct rte_eth_dev *dev)
1111 {
1112         struct pmd_internals *pmd = dev->data->dev_private;
1113         struct pmd_process_private *proc_private = dev->process_private;
1114         struct memif_region *mr;
1115         struct memif_queue *mq;
1116         memif_ring_t *ring;
1117         int i;
1118
1119         for (i = 0; i < proc_private->regions_num; i++) {
1120                 mr = proc_private->regions[i];
1121                 if (mr != NULL) {
1122                         if (mr->addr == NULL) {
1123                                 if (mr->fd < 0)
1124                                         return -1;
1125                                 mr->addr = mmap(NULL, mr->region_size,
1126                                                 PROT_READ | PROT_WRITE,
1127                                                 MAP_SHARED, mr->fd, 0);
1128                                 if (mr->addr == MAP_FAILED) {
1129                                         MIF_LOG(ERR, "mmap failed: %s\n",
1130                                                 strerror(errno));
1131                                         return -1;
1132                                 }
1133                         }
1134                         if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
1135                                 /* close memseg file */
1136                                 close(mr->fd);
1137                                 mr->fd = -1;
1138                         }
1139                 }
1140         }
1141
1142         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1143                 for (i = 0; i < pmd->run.num_s2m_rings; i++) {
1144                         mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
1145                             dev->data->tx_queues[i] : dev->data->rx_queues[i];
1146                         ring = memif_get_ring_from_queue(proc_private, mq);
1147                         if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1148                                 MIF_LOG(ERR, "Wrong ring");
1149                                 return -1;
1150                         }
1151                         __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1152                         __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1153                         mq->last_head = 0;
1154                         mq->last_tail = 0;
1155                         /* enable polling mode */
1156                         if (pmd->role == MEMIF_ROLE_MASTER)
1157                                 ring->flags = MEMIF_RING_FLAG_MASK_INT;
1158                 }
1159                 for (i = 0; i < pmd->run.num_m2s_rings; i++) {
1160                         mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
1161                             dev->data->rx_queues[i] : dev->data->tx_queues[i];
1162                         ring = memif_get_ring_from_queue(proc_private, mq);
1163                         if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1164                                 MIF_LOG(ERR, "Wrong ring");
1165                                 return -1;
1166                         }
1167                         __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1168                         __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1169                         mq->last_head = 0;
1170                         mq->last_tail = 0;
1171                         /* enable polling mode */
1172                         if (pmd->role == MEMIF_ROLE_SLAVE)
1173                                 ring->flags = MEMIF_RING_FLAG_MASK_INT;
1174                 }
1175
1176                 pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
1177                 pmd->flags |= ETH_MEMIF_FLAG_CONNECTED;
1178                 dev->data->dev_link.link_status = ETH_LINK_UP;
1179         }
1180         MIF_LOG(INFO, "Connected.");
1181         return 0;
1182 }
1183
1184 static int
1185 memif_dev_start(struct rte_eth_dev *dev)
1186 {
1187         struct pmd_internals *pmd = dev->data->dev_private;
1188         int ret = 0;
1189
1190         switch (pmd->role) {
1191         case MEMIF_ROLE_SLAVE:
1192                 ret = memif_connect_slave(dev);
1193                 break;
1194         case MEMIF_ROLE_MASTER:
1195                 ret = memif_connect_master(dev);
1196                 break;
1197         default:
1198                 MIF_LOG(ERR, "Unknown role: %d.", pmd->role);
1199                 ret = -1;
1200                 break;
1201         }
1202
1203         return ret;
1204 }
1205
1206 static void
1207 memif_dev_close(struct rte_eth_dev *dev)
1208 {
1209         struct pmd_internals *pmd = dev->data->dev_private;
1210         int i;
1211
1212         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1213                 memif_msg_enq_disconnect(pmd->cc, "Device closed", 0);
1214                 memif_disconnect(dev);
1215
1216                 for (i = 0; i < dev->data->nb_rx_queues; i++)
1217                         (*dev->dev_ops->rx_queue_release)(dev->data->rx_queues[i]);
1218                 for (i = 0; i < dev->data->nb_tx_queues; i++)
1219                         (*dev->dev_ops->tx_queue_release)(dev->data->tx_queues[i]);
1220
1221                 memif_socket_remove_device(dev);
1222         } else {
1223                 memif_disconnect(dev);
1224         }
1225
1226         rte_free(dev->process_private);
1227 }
1228
1229 static int
1230 memif_dev_configure(struct rte_eth_dev *dev)
1231 {
1232         struct pmd_internals *pmd = dev->data->dev_private;
1233
1234         /*
1235          * SLAVE - TXQ
1236          * MASTER - RXQ
1237          */
1238         pmd->cfg.num_s2m_rings = (pmd->role == MEMIF_ROLE_SLAVE) ?
1239                                   dev->data->nb_tx_queues : dev->data->nb_rx_queues;
1240
1241         /*
1242          * SLAVE - RXQ
1243          * MASTER - TXQ
1244          */
1245         pmd->cfg.num_m2s_rings = (pmd->role == MEMIF_ROLE_SLAVE) ?
1246                                   dev->data->nb_rx_queues : dev->data->nb_tx_queues;
1247
1248         return 0;
1249 }
1250
1251 static int
1252 memif_tx_queue_setup(struct rte_eth_dev *dev,
1253                      uint16_t qid,
1254                      uint16_t nb_tx_desc __rte_unused,
1255                      unsigned int socket_id __rte_unused,
1256                      const struct rte_eth_txconf *tx_conf __rte_unused)
1257 {
1258         struct pmd_internals *pmd = dev->data->dev_private;
1259         struct memif_queue *mq;
1260
1261         mq = rte_zmalloc("tx-queue", sizeof(struct memif_queue), 0);
1262         if (mq == NULL) {
1263                 MIF_LOG(ERR, "Failed to allocate tx queue id: %u", qid);
1264                 return -ENOMEM;
1265         }
1266
1267         mq->type =
1268             (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_S2M : MEMIF_RING_M2S;
1269         mq->n_pkts = 0;
1270         mq->n_bytes = 0;
1271         mq->intr_handle.fd = -1;
1272         mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1273         mq->in_port = dev->data->port_id;
1274         dev->data->tx_queues[qid] = mq;
1275
1276         return 0;
1277 }
1278
1279 static int
1280 memif_rx_queue_setup(struct rte_eth_dev *dev,
1281                      uint16_t qid,
1282                      uint16_t nb_rx_desc __rte_unused,
1283                      unsigned int socket_id __rte_unused,
1284                      const struct rte_eth_rxconf *rx_conf __rte_unused,
1285                      struct rte_mempool *mb_pool)
1286 {
1287         struct pmd_internals *pmd = dev->data->dev_private;
1288         struct memif_queue *mq;
1289
1290         mq = rte_zmalloc("rx-queue", sizeof(struct memif_queue), 0);
1291         if (mq == NULL) {
1292                 MIF_LOG(ERR, "Failed to allocate rx queue id: %u", qid);
1293                 return -ENOMEM;
1294         }
1295
1296         mq->type = (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_M2S : MEMIF_RING_S2M;
1297         mq->n_pkts = 0;
1298         mq->n_bytes = 0;
1299         mq->intr_handle.fd = -1;
1300         mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1301         mq->mempool = mb_pool;
1302         mq->in_port = dev->data->port_id;
1303         dev->data->rx_queues[qid] = mq;
1304
1305         return 0;
1306 }
1307
1308 static void
1309 memif_queue_release(void *queue)
1310 {
1311         struct memif_queue *mq = (struct memif_queue *)queue;
1312
1313         if (!mq)
1314                 return;
1315
1316         rte_free(mq);
1317 }
1318
1319 static int
1320 memif_link_update(struct rte_eth_dev *dev,
1321                   int wait_to_complete __rte_unused)
1322 {
1323         struct pmd_process_private *proc_private;
1324
1325         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1326                 proc_private = dev->process_private;
1327                 if (dev->data->dev_link.link_status == ETH_LINK_UP &&
1328                                 proc_private->regions_num == 0) {
1329                         memif_mp_request_regions(dev);
1330                 } else if (dev->data->dev_link.link_status == ETH_LINK_DOWN &&
1331                                 proc_private->regions_num > 0) {
1332                         memif_free_regions(dev);
1333                 }
1334         }
1335         return 0;
1336 }
1337
1338 static int
1339 memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1340 {
1341         struct pmd_internals *pmd = dev->data->dev_private;
1342         struct memif_queue *mq;
1343         int i;
1344         uint8_t tmp, nq;
1345
1346         stats->ipackets = 0;
1347         stats->ibytes = 0;
1348         stats->opackets = 0;
1349         stats->obytes = 0;
1350
1351         tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_s2m_rings :
1352             pmd->run.num_m2s_rings;
1353         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1354             RTE_ETHDEV_QUEUE_STAT_CNTRS;
1355
1356         /* RX stats */
1357         for (i = 0; i < nq; i++) {
1358                 mq = dev->data->rx_queues[i];
1359                 stats->q_ipackets[i] = mq->n_pkts;
1360                 stats->q_ibytes[i] = mq->n_bytes;
1361                 stats->ipackets += mq->n_pkts;
1362                 stats->ibytes += mq->n_bytes;
1363         }
1364
1365         tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_m2s_rings :
1366             pmd->run.num_s2m_rings;
1367         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1368             RTE_ETHDEV_QUEUE_STAT_CNTRS;
1369
1370         /* TX stats */
1371         for (i = 0; i < nq; i++) {
1372                 mq = dev->data->tx_queues[i];
1373                 stats->q_opackets[i] = mq->n_pkts;
1374                 stats->q_obytes[i] = mq->n_bytes;
1375                 stats->opackets += mq->n_pkts;
1376                 stats->obytes += mq->n_bytes;
1377         }
1378         return 0;
1379 }
1380
1381 static int
1382 memif_stats_reset(struct rte_eth_dev *dev)
1383 {
1384         struct pmd_internals *pmd = dev->data->dev_private;
1385         int i;
1386         struct memif_queue *mq;
1387
1388         for (i = 0; i < pmd->run.num_s2m_rings; i++) {
1389                 mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->tx_queues[i] :
1390                     dev->data->rx_queues[i];
1391                 mq->n_pkts = 0;
1392                 mq->n_bytes = 0;
1393         }
1394         for (i = 0; i < pmd->run.num_m2s_rings; i++) {
1395                 mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->rx_queues[i] :
1396                     dev->data->tx_queues[i];
1397                 mq->n_pkts = 0;
1398                 mq->n_bytes = 0;
1399         }
1400
1401         return 0;
1402 }
1403
1404 static int
1405 memif_rx_queue_intr_enable(struct rte_eth_dev *dev __rte_unused,
1406                            uint16_t qid __rte_unused)
1407 {
1408         MIF_LOG(WARNING, "Interrupt mode not supported.");
1409
1410         return -1;
1411 }
1412
1413 static int
1414 memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
1415 {
1416         struct pmd_internals *pmd __rte_unused = dev->data->dev_private;
1417
1418         return 0;
1419 }
1420
1421 static const struct eth_dev_ops ops = {
1422         .dev_start = memif_dev_start,
1423         .dev_close = memif_dev_close,
1424         .dev_infos_get = memif_dev_info,
1425         .dev_configure = memif_dev_configure,
1426         .tx_queue_setup = memif_tx_queue_setup,
1427         .rx_queue_setup = memif_rx_queue_setup,
1428         .rx_queue_release = memif_queue_release,
1429         .tx_queue_release = memif_queue_release,
1430         .rx_queue_intr_enable = memif_rx_queue_intr_enable,
1431         .rx_queue_intr_disable = memif_rx_queue_intr_disable,
1432         .link_update = memif_link_update,
1433         .stats_get = memif_stats_get,
1434         .stats_reset = memif_stats_reset,
1435 };
1436
1437 static int
1438 memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
1439              memif_interface_id_t id, uint32_t flags,
1440              const char *socket_filename,
1441              memif_log2_ring_size_t log2_ring_size,
1442              uint16_t pkt_buffer_size, const char *secret,
1443              struct rte_ether_addr *ether_addr)
1444 {
1445         int ret = 0;
1446         struct rte_eth_dev *eth_dev;
1447         struct rte_eth_dev_data *data;
1448         struct pmd_internals *pmd;
1449         struct pmd_process_private *process_private;
1450         const unsigned int numa_node = vdev->device.numa_node;
1451         const char *name = rte_vdev_device_name(vdev);
1452
1453         eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
1454         if (eth_dev == NULL) {
1455                 MIF_LOG(ERR, "%s: Unable to allocate device struct.", name);
1456                 return -1;
1457         }
1458
1459         process_private = (struct pmd_process_private *)
1460                 rte_zmalloc(name, sizeof(struct pmd_process_private),
1461                             RTE_CACHE_LINE_SIZE);
1462
1463         if (process_private == NULL) {
1464                 MIF_LOG(ERR, "Failed to alloc memory for process private");
1465                 return -1;
1466         }
1467         eth_dev->process_private = process_private;
1468
1469         pmd = eth_dev->data->dev_private;
1470         memset(pmd, 0, sizeof(*pmd));
1471
1472         pmd->id = id;
1473         pmd->flags = flags;
1474         pmd->flags |= ETH_MEMIF_FLAG_DISABLED;
1475         pmd->role = role;
1476         /* Zero-copy flag irelevant to master. */
1477         if (pmd->role == MEMIF_ROLE_MASTER)
1478                 pmd->flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1479
1480         ret = memif_socket_init(eth_dev, socket_filename);
1481         if (ret < 0)
1482                 return ret;
1483
1484         memset(pmd->secret, 0, sizeof(char) * ETH_MEMIF_SECRET_SIZE);
1485         if (secret != NULL)
1486                 strlcpy(pmd->secret, secret, sizeof(pmd->secret));
1487
1488         pmd->cfg.log2_ring_size = log2_ring_size;
1489         /* set in .dev_configure() */
1490         pmd->cfg.num_s2m_rings = 0;
1491         pmd->cfg.num_m2s_rings = 0;
1492
1493         pmd->cfg.pkt_buffer_size = pkt_buffer_size;
1494         rte_spinlock_init(&pmd->cc_lock);
1495
1496         data = eth_dev->data;
1497         data->dev_private = pmd;
1498         data->numa_node = numa_node;
1499         data->dev_link = pmd_link;
1500         data->mac_addrs = ether_addr;
1501         data->promiscuous = 1;
1502
1503         eth_dev->dev_ops = &ops;
1504         eth_dev->device = &vdev->device;
1505         if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1506                 eth_dev->rx_pkt_burst = eth_memif_rx_zc;
1507                 eth_dev->tx_pkt_burst = eth_memif_tx_zc;
1508         } else {
1509                 eth_dev->rx_pkt_burst = eth_memif_rx;
1510                 eth_dev->tx_pkt_burst = eth_memif_tx;
1511         }
1512
1513
1514         eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
1515
1516         rte_eth_dev_probing_finish(eth_dev);
1517
1518         return 0;
1519 }
1520
1521 static int
1522 memif_set_role(const char *key __rte_unused, const char *value,
1523                void *extra_args)
1524 {
1525         enum memif_role_t *role = (enum memif_role_t *)extra_args;
1526
1527         if (strstr(value, "master") != NULL) {
1528                 *role = MEMIF_ROLE_MASTER;
1529         } else if (strstr(value, "slave") != NULL) {
1530                 *role = MEMIF_ROLE_SLAVE;
1531         } else {
1532                 MIF_LOG(ERR, "Unknown role: %s.", value);
1533                 return -EINVAL;
1534         }
1535         return 0;
1536 }
1537
1538 static int
1539 memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args)
1540 {
1541         uint32_t *flags = (uint32_t *)extra_args;
1542
1543         if (strstr(value, "yes") != NULL) {
1544                 if (!rte_mcfg_get_single_file_segments()) {
1545                         MIF_LOG(ERR, "Zero-copy doesn't support multi-file segments.");
1546                         return -ENOTSUP;
1547                 }
1548                 *flags |= ETH_MEMIF_FLAG_ZERO_COPY;
1549         } else if (strstr(value, "no") != NULL) {
1550                 *flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1551         } else {
1552                 MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value);
1553                 return -EINVAL;
1554         }
1555         return 0;
1556 }
1557
1558 static int
1559 memif_set_id(const char *key __rte_unused, const char *value, void *extra_args)
1560 {
1561         memif_interface_id_t *id = (memif_interface_id_t *)extra_args;
1562
1563         /* even if parsing fails, 0 is a valid id */
1564         *id = strtoul(value, NULL, 10);
1565         return 0;
1566 }
1567
1568 static int
1569 memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args)
1570 {
1571         unsigned long tmp;
1572         uint16_t *pkt_buffer_size = (uint16_t *)extra_args;
1573
1574         tmp = strtoul(value, NULL, 10);
1575         if (tmp == 0 || tmp > 0xFFFF) {
1576                 MIF_LOG(ERR, "Invalid buffer size: %s.", value);
1577                 return -EINVAL;
1578         }
1579         *pkt_buffer_size = tmp;
1580         return 0;
1581 }
1582
1583 static int
1584 memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args)
1585 {
1586         unsigned long tmp;
1587         memif_log2_ring_size_t *log2_ring_size =
1588             (memif_log2_ring_size_t *)extra_args;
1589
1590         tmp = strtoul(value, NULL, 10);
1591         if (tmp == 0 || tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE) {
1592                 MIF_LOG(ERR, "Invalid ring size: %s (max %u).",
1593                         value, ETH_MEMIF_MAX_LOG2_RING_SIZE);
1594                 return -EINVAL;
1595         }
1596         *log2_ring_size = tmp;
1597         return 0;
1598 }
1599
1600 /* check if directory exists and if we have permission to read/write */
1601 static int
1602 memif_check_socket_filename(const char *filename)
1603 {
1604         char *dir = NULL, *tmp;
1605         uint32_t idx;
1606         int ret = 0;
1607
1608         if (strlen(filename) >= MEMIF_SOCKET_UN_SIZE) {
1609                 MIF_LOG(ERR, "Unix socket address too long (max 108).");
1610                 return -1;
1611         }
1612
1613         tmp = strrchr(filename, '/');
1614         if (tmp != NULL) {
1615                 idx = tmp - filename;
1616                 dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 1), 0);
1617                 if (dir == NULL) {
1618                         MIF_LOG(ERR, "Failed to allocate memory.");
1619                         return -1;
1620                 }
1621                 strlcpy(dir, filename, sizeof(char) * (idx + 1));
1622         }
1623
1624         if (dir == NULL || (faccessat(-1, dir, F_OK | R_OK |
1625                                         W_OK, AT_EACCESS) < 0)) {
1626                 MIF_LOG(ERR, "Invalid socket directory.");
1627                 ret = -EINVAL;
1628         }
1629
1630         if (dir != NULL)
1631                 rte_free(dir);
1632
1633         return ret;
1634 }
1635
1636 static int
1637 memif_set_socket_filename(const char *key __rte_unused, const char *value,
1638                           void *extra_args)
1639 {
1640         const char **socket_filename = (const char **)extra_args;
1641
1642         *socket_filename = value;
1643         return memif_check_socket_filename(*socket_filename);
1644 }
1645
1646 static int
1647 memif_set_mac(const char *key __rte_unused, const char *value, void *extra_args)
1648 {
1649         struct rte_ether_addr *ether_addr = (struct rte_ether_addr *)extra_args;
1650
1651         if (rte_ether_unformat_addr(value, ether_addr) < 0)
1652                 MIF_LOG(WARNING, "Failed to parse mac '%s'.", value);
1653         return 0;
1654 }
1655
1656 static int
1657 memif_set_secret(const char *key __rte_unused, const char *value, void *extra_args)
1658 {
1659         const char **secret = (const char **)extra_args;
1660
1661         *secret = value;
1662         return 0;
1663 }
1664
1665 static int
1666 rte_pmd_memif_probe(struct rte_vdev_device *vdev)
1667 {
1668         RTE_BUILD_BUG_ON(sizeof(memif_msg_t) != 128);
1669         RTE_BUILD_BUG_ON(sizeof(memif_desc_t) != 16);
1670         int ret = 0;
1671         struct rte_kvargs *kvlist;
1672         const char *name = rte_vdev_device_name(vdev);
1673         enum memif_role_t role = MEMIF_ROLE_SLAVE;
1674         memif_interface_id_t id = 0;
1675         uint16_t pkt_buffer_size = ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE;
1676         memif_log2_ring_size_t log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE;
1677         const char *socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME;
1678         uint32_t flags = 0;
1679         const char *secret = NULL;
1680         struct rte_ether_addr *ether_addr = rte_zmalloc("",
1681                 sizeof(struct rte_ether_addr), 0);
1682         struct rte_eth_dev *eth_dev;
1683
1684         rte_eth_random_addr(ether_addr->addr_bytes);
1685
1686         MIF_LOG(INFO, "Initialize MEMIF: %s.", name);
1687
1688         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1689                 eth_dev = rte_eth_dev_attach_secondary(name);
1690                 if (!eth_dev) {
1691                         MIF_LOG(ERR, "Failed to probe %s", name);
1692                         return -1;
1693                 }
1694
1695                 eth_dev->dev_ops = &ops;
1696                 eth_dev->device = &vdev->device;
1697                 eth_dev->rx_pkt_burst = eth_memif_rx;
1698                 eth_dev->tx_pkt_burst = eth_memif_tx;
1699
1700                 if (!rte_eal_primary_proc_alive(NULL)) {
1701                         MIF_LOG(ERR, "Primary process is missing");
1702                         return -1;
1703                 }
1704
1705                 eth_dev->process_private = (struct pmd_process_private *)
1706                         rte_zmalloc(name,
1707                                 sizeof(struct pmd_process_private),
1708                                 RTE_CACHE_LINE_SIZE);
1709                 if (eth_dev->process_private == NULL) {
1710                         MIF_LOG(ERR,
1711                                 "Failed to alloc memory for process private");
1712                         return -1;
1713                 }
1714
1715                 rte_eth_dev_probing_finish(eth_dev);
1716
1717                 return 0;
1718         }
1719
1720         ret = rte_mp_action_register(MEMIF_MP_SEND_REGION, memif_mp_send_region);
1721         /*
1722          * Primary process can continue probing, but secondary process won't
1723          * be able to get memory regions information
1724          */
1725         if (ret < 0 && rte_errno != EEXIST)
1726                 MIF_LOG(WARNING, "Failed to register mp action callback: %s",
1727                         strerror(rte_errno));
1728
1729         kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments);
1730
1731         /* parse parameters */
1732         if (kvlist != NULL) {
1733                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG,
1734                                          &memif_set_role, &role);
1735                 if (ret < 0)
1736                         goto exit;
1737                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG,
1738                                          &memif_set_id, &id);
1739                 if (ret < 0)
1740                         goto exit;
1741                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
1742                                          &memif_set_bs, &pkt_buffer_size);
1743                 if (ret < 0)
1744                         goto exit;
1745                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG,
1746                                          &memif_set_rs, &log2_ring_size);
1747                 if (ret < 0)
1748                         goto exit;
1749                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ARG,
1750                                          &memif_set_socket_filename,
1751                                          (void *)(&socket_filename));
1752                 if (ret < 0)
1753                         goto exit;
1754                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_MAC_ARG,
1755                                          &memif_set_mac, ether_addr);
1756                 if (ret < 0)
1757                         goto exit;
1758                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG,
1759                                          &memif_set_zc, &flags);
1760                 if (ret < 0)
1761                         goto exit;
1762                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SECRET_ARG,
1763                                          &memif_set_secret, (void *)(&secret));
1764                 if (ret < 0)
1765                         goto exit;
1766         }
1767
1768         /* create interface */
1769         ret = memif_create(vdev, role, id, flags, socket_filename,
1770                            log2_ring_size, pkt_buffer_size, secret, ether_addr);
1771
1772 exit:
1773         if (kvlist != NULL)
1774                 rte_kvargs_free(kvlist);
1775         return ret;
1776 }
1777
1778 static int
1779 rte_pmd_memif_remove(struct rte_vdev_device *vdev)
1780 {
1781         struct rte_eth_dev *eth_dev;
1782
1783         eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
1784         if (eth_dev == NULL)
1785                 return 0;
1786
1787         rte_eth_dev_close(eth_dev->data->port_id);
1788
1789         return 0;
1790 }
1791
1792 static struct rte_vdev_driver pmd_memif_drv = {
1793         .probe = rte_pmd_memif_probe,
1794         .remove = rte_pmd_memif_remove,
1795 };
1796
1797 RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv);
1798
1799 RTE_PMD_REGISTER_PARAM_STRING(net_memif,
1800                               ETH_MEMIF_ID_ARG "=<int>"
1801                               ETH_MEMIF_ROLE_ARG "=master|slave"
1802                               ETH_MEMIF_PKT_BUFFER_SIZE_ARG "=<int>"
1803                               ETH_MEMIF_RING_SIZE_ARG "=<int>"
1804                               ETH_MEMIF_SOCKET_ARG "=<string>"
1805                               ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx"
1806                               ETH_MEMIF_ZC_ARG "=yes|no"
1807                               ETH_MEMIF_SECRET_ARG "=<string>");
1808
1809 int memif_logtype;
1810
1811 RTE_INIT(memif_init_log)
1812 {
1813         memif_logtype = rte_log_register("pmd.net.memif");
1814         if (memif_logtype >= 0)
1815                 rte_log_set_level(memif_logtype, RTE_LOG_NOTICE);
1816 }