net/memif: introduce memory interface PMD
[dpdk.git] / drivers / net / memif / rte_eth_memif.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
3  */
4
5 #include <stdlib.h>
6 #include <fcntl.h>
7 #include <unistd.h>
8 #include <sys/types.h>
9 #include <sys/socket.h>
10 #include <sys/un.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13 #include <linux/if_ether.h>
14 #include <errno.h>
15 #include <sys/eventfd.h>
16
17 #include <rte_version.h>
18 #include <rte_mbuf.h>
19 #include <rte_ether.h>
20 #include <rte_ethdev_driver.h>
21 #include <rte_ethdev_vdev.h>
22 #include <rte_malloc.h>
23 #include <rte_kvargs.h>
24 #include <rte_bus_vdev.h>
25 #include <rte_string_fns.h>
26
27 #include "rte_eth_memif.h"
28 #include "memif_socket.h"
29
30 #define ETH_MEMIF_ID_ARG                "id"
31 #define ETH_MEMIF_ROLE_ARG              "role"
32 #define ETH_MEMIF_PKT_BUFFER_SIZE_ARG   "bsize"
33 #define ETH_MEMIF_RING_SIZE_ARG         "rsize"
34 #define ETH_MEMIF_SOCKET_ARG            "socket"
35 #define ETH_MEMIF_MAC_ARG               "mac"
36 #define ETH_MEMIF_ZC_ARG                "zero-copy"
37 #define ETH_MEMIF_SECRET_ARG            "secret"
38
39 static const char * const valid_arguments[] = {
40         ETH_MEMIF_ID_ARG,
41         ETH_MEMIF_ROLE_ARG,
42         ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
43         ETH_MEMIF_RING_SIZE_ARG,
44         ETH_MEMIF_SOCKET_ARG,
45         ETH_MEMIF_MAC_ARG,
46         ETH_MEMIF_ZC_ARG,
47         ETH_MEMIF_SECRET_ARG,
48         NULL
49 };
50
51 const char *
52 memif_version(void)
53 {
54         return ("memif-" RTE_STR(MEMIF_VERSION_MAJOR) "." RTE_STR(MEMIF_VERSION_MINOR));
55 }
56
57 static void
58 memif_dev_info(struct rte_eth_dev *dev __rte_unused, struct rte_eth_dev_info *dev_info)
59 {
60         dev_info->max_mac_addrs = 1;
61         dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN;
62         dev_info->max_rx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
63         dev_info->max_tx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
64         dev_info->min_rx_bufsize = 0;
65 }
66
67 static memif_ring_t *
68 memif_get_ring(struct pmd_internals *pmd, memif_ring_type_t type, uint16_t ring_num)
69 {
70         /* rings only in region 0 */
71         void *p = pmd->regions[0]->addr;
72         int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) *
73             (1 << pmd->run.log2_ring_size);
74
75         p = (uint8_t *)p + (ring_num + type * pmd->run.num_s2m_rings) * ring_size;
76
77         return (memif_ring_t *)p;
78 }
79
80 static void *
81 memif_get_buffer(struct pmd_internals *pmd, memif_desc_t *d)
82 {
83         return ((uint8_t *)pmd->regions[d->region]->addr + d->offset);
84 }
85
86 static int
87 memif_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *cur_tail,
88                     struct rte_mbuf *tail)
89 {
90         /* Check for number-of-segments-overflow */
91         if (unlikely(head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS))
92                 return -EOVERFLOW;
93
94         /* Chain 'tail' onto the old tail */
95         cur_tail->next = tail;
96
97         /* accumulate number of segments and total length. */
98         head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs);
99
100         tail->pkt_len = tail->data_len;
101         head->pkt_len += tail->pkt_len;
102
103         return 0;
104 }
105
106 static uint16_t
107 eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
108 {
109         struct memif_queue *mq = queue;
110         struct pmd_internals *pmd = mq->pmd;
111         memif_ring_t *ring = mq->ring;
112         uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0;
113         uint16_t n_rx_pkts = 0;
114         uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) -
115                 RTE_PKTMBUF_HEADROOM;
116         uint16_t src_len, src_off, dst_len, dst_off, cp_len;
117         memif_ring_type_t type = mq->type;
118         memif_desc_t *d0;
119         struct rte_mbuf *mbuf, *mbuf_head, *mbuf_tail;
120         uint64_t b;
121         ssize_t size __rte_unused;
122         uint16_t head;
123         int ret;
124
125         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
126                 return 0;
127         if (unlikely(ring == NULL))
128                 return 0;
129
130         /* consume interrupt */
131         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0)
132                 size = read(mq->intr_handle.fd, &b, sizeof(b));
133
134         ring_size = 1 << mq->log2_ring_size;
135         mask = ring_size - 1;
136
137         cur_slot = (type == MEMIF_RING_S2M) ? mq->last_head : mq->last_tail;
138         last_slot = (type == MEMIF_RING_S2M) ? ring->head : ring->tail;
139         if (cur_slot == last_slot)
140                 goto refill;
141         n_slots = last_slot - cur_slot;
142
143         while (n_slots && n_rx_pkts < nb_pkts) {
144                 mbuf_head = rte_pktmbuf_alloc(mq->mempool);
145                 if (unlikely(mbuf_head == NULL))
146                         goto no_free_bufs;
147                 mbuf = mbuf_head;
148                 mbuf->port = mq->in_port;
149
150 next_slot:
151                 s0 = cur_slot & mask;
152                 d0 = &ring->desc[s0];
153
154                 src_len = d0->length;
155                 dst_off = 0;
156                 src_off = 0;
157
158                 do {
159                         dst_len = mbuf_size - dst_off;
160                         if (dst_len == 0) {
161                                 dst_off = 0;
162                                 dst_len = mbuf_size;
163
164                                 /* store pointer to tail */
165                                 mbuf_tail = mbuf;
166                                 mbuf = rte_pktmbuf_alloc(mq->mempool);
167                                 if (unlikely(mbuf == NULL))
168                                         goto no_free_bufs;
169                                 mbuf->port = mq->in_port;
170                                 ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
171                                 if (unlikely(ret < 0)) {
172                                         MIF_LOG(ERR, "%s: number-of-segments-overflow",
173                                                 rte_vdev_device_name(pmd->vdev));
174                                         rte_pktmbuf_free(mbuf);
175                                         goto no_free_bufs;
176                                 }
177                         }
178                         cp_len = RTE_MIN(dst_len, src_len);
179
180                         rte_pktmbuf_data_len(mbuf) += cp_len;
181                         rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
182                         if (mbuf != mbuf_head)
183                                 rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
184
185                         memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, dst_off),
186                                (uint8_t *)memif_get_buffer(pmd, d0) + src_off, cp_len);
187
188                         src_off += cp_len;
189                         dst_off += cp_len;
190                         src_len -= cp_len;
191                 } while (src_len);
192
193                 cur_slot++;
194                 n_slots--;
195
196                 if (d0->flags & MEMIF_DESC_FLAG_NEXT)
197                         goto next_slot;
198
199                 mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
200                 *bufs++ = mbuf_head;
201                 n_rx_pkts++;
202         }
203
204 no_free_bufs:
205         if (type == MEMIF_RING_S2M) {
206                 rte_mb();
207                 ring->tail = cur_slot;
208                 mq->last_head = cur_slot;
209         } else {
210                 mq->last_tail = cur_slot;
211         }
212
213 refill:
214         if (type == MEMIF_RING_M2S) {
215                 head = ring->head;
216                 n_slots = ring_size - head + mq->last_tail;
217
218                 while (n_slots--) {
219                         s0 = head++ & mask;
220                         d0 = &ring->desc[s0];
221                         d0->length = pmd->run.pkt_buffer_size;
222                 }
223                 rte_mb();
224                 ring->head = head;
225         }
226
227         mq->n_pkts += n_rx_pkts;
228         return n_rx_pkts;
229 }
230
231 static uint16_t
232 eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
233 {
234         struct memif_queue *mq = queue;
235         struct pmd_internals *pmd = mq->pmd;
236         memif_ring_t *ring = mq->ring;
237         uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0;
238         uint16_t src_len, src_off, dst_len, dst_off, cp_len;
239         memif_ring_type_t type = mq->type;
240         memif_desc_t *d0;
241         struct rte_mbuf *mbuf;
242         struct rte_mbuf *mbuf_head;
243         uint64_t a;
244         ssize_t size;
245
246         if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
247                 return 0;
248         if (unlikely(ring == NULL))
249                 return 0;
250
251         ring_size = 1 << mq->log2_ring_size;
252         mask = ring_size - 1;
253
254         n_free = ring->tail - mq->last_tail;
255         mq->last_tail += n_free;
256         slot = (type == MEMIF_RING_S2M) ? ring->head : ring->tail;
257
258         if (type == MEMIF_RING_S2M)
259                 n_free = ring_size - ring->head + mq->last_tail;
260         else
261                 n_free = ring->head - ring->tail;
262
263         while (n_tx_pkts < nb_pkts && n_free) {
264                 mbuf_head = *bufs++;
265                 mbuf = mbuf_head;
266
267                 saved_slot = slot;
268                 d0 = &ring->desc[slot & mask];
269                 dst_off = 0;
270                 dst_len = (type == MEMIF_RING_S2M) ?
271                         pmd->run.pkt_buffer_size : d0->length;
272
273 next_in_chain:
274                 src_off = 0;
275                 src_len = rte_pktmbuf_data_len(mbuf);
276
277                 while (src_len) {
278                         if (dst_len == 0) {
279                                 if (n_free) {
280                                         slot++;
281                                         n_free--;
282                                         d0->flags |= MEMIF_DESC_FLAG_NEXT;
283                                         d0 = &ring->desc[slot & mask];
284                                         dst_off = 0;
285                                         dst_len = (type == MEMIF_RING_S2M) ?
286                                             pmd->run.pkt_buffer_size : d0->length;
287                                         d0->flags = 0;
288                                 } else {
289                                         slot = saved_slot;
290                                         goto no_free_slots;
291                                 }
292                         }
293                         cp_len = RTE_MIN(dst_len, src_len);
294
295                         memcpy((uint8_t *)memif_get_buffer(pmd, d0) + dst_off,
296                                rte_pktmbuf_mtod_offset(mbuf, void *, src_off),
297                                cp_len);
298
299                         mq->n_bytes += cp_len;
300                         src_off += cp_len;
301                         dst_off += cp_len;
302                         src_len -= cp_len;
303                         dst_len -= cp_len;
304
305                         d0->length = dst_off;
306                 }
307
308                 if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
309                         mbuf = mbuf->next;
310                         goto next_in_chain;
311                 }
312
313                 n_tx_pkts++;
314                 slot++;
315                 n_free--;
316                 rte_pktmbuf_free(mbuf_head);
317         }
318
319 no_free_slots:
320         rte_mb();
321         if (type == MEMIF_RING_S2M)
322                 ring->head = slot;
323         else
324                 ring->tail = slot;
325
326         if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
327                 a = 1;
328                 size = write(mq->intr_handle.fd, &a, sizeof(a));
329                 if (unlikely(size < 0)) {
330                         MIF_LOG(WARNING,
331                                 "%s: Failed to send interrupt. %s",
332                                 rte_vdev_device_name(pmd->vdev), strerror(errno));
333                 }
334         }
335
336         mq->n_err += nb_pkts - n_tx_pkts;
337         mq->n_pkts += n_tx_pkts;
338         return n_tx_pkts;
339 }
340
341 void
342 memif_free_regions(struct pmd_internals *pmd)
343 {
344         int i;
345         struct memif_region *r;
346
347         /* regions are allocated contiguously, so it's
348          * enough to loop until 'pmd->regions_num'
349          */
350         for (i = 0; i < pmd->regions_num; i++) {
351                 r = pmd->regions[i];
352                 if (r != NULL) {
353                         if (r->addr != NULL) {
354                                 munmap(r->addr, r->region_size);
355                                 if (r->fd > 0) {
356                                         close(r->fd);
357                                         r->fd = -1;
358                                 }
359                         }
360                         rte_free(r);
361                         pmd->regions[i] = NULL;
362                 }
363         }
364         pmd->regions_num = 0;
365 }
366
367 static int
368 memif_region_init_shm(struct pmd_internals *pmd, uint8_t has_buffers)
369 {
370         char shm_name[ETH_MEMIF_SHM_NAME_SIZE];
371         int ret = 0;
372         struct memif_region *r;
373
374         if (pmd->regions_num >= ETH_MEMIF_MAX_REGION_NUM) {
375                 MIF_LOG(ERR, "%s: Too many regions.", rte_vdev_device_name(pmd->vdev));
376                 return -1;
377         }
378
379         r = rte_zmalloc("region", sizeof(struct memif_region), 0);
380         if (r == NULL) {
381                 MIF_LOG(ERR, "%s: Failed to alloc memif region.",
382                         rte_vdev_device_name(pmd->vdev));
383                 return -ENOMEM;
384         }
385
386         /* calculate buffer offset */
387         r->pkt_buffer_offset = (pmd->run.num_s2m_rings + pmd->run.num_m2s_rings) *
388             (sizeof(memif_ring_t) + sizeof(memif_desc_t) *
389             (1 << pmd->run.log2_ring_size));
390
391         r->region_size = r->pkt_buffer_offset;
392         /* if region has buffers, add buffers size to region_size */
393         if (has_buffers == 1)
394                 r->region_size += (uint32_t)(pmd->run.pkt_buffer_size *
395                         (1 << pmd->run.log2_ring_size) *
396                         (pmd->run.num_s2m_rings +
397                          pmd->run.num_m2s_rings));
398
399         memset(shm_name, 0, sizeof(char) * ETH_MEMIF_SHM_NAME_SIZE);
400         snprintf(shm_name, ETH_MEMIF_SHM_NAME_SIZE, "memif_region_%d",
401                  pmd->regions_num);
402
403         r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING);
404         if (r->fd < 0) {
405                 MIF_LOG(ERR, "%s: Failed to create shm file: %s.",
406                         rte_vdev_device_name(pmd->vdev),
407                         strerror(errno));
408                 ret = -1;
409                 goto error;
410         }
411
412         ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK);
413         if (ret < 0) {
414                 MIF_LOG(ERR, "%s: Failed to add seals to shm file: %s.",
415                         rte_vdev_device_name(pmd->vdev),
416                         strerror(errno));
417                 goto error;
418         }
419
420         ret = ftruncate(r->fd, r->region_size);
421         if (ret < 0) {
422                 MIF_LOG(ERR, "%s: Failed to truncate shm file: %s.",
423                         rte_vdev_device_name(pmd->vdev),
424                         strerror(errno));
425                 goto error;
426         }
427
428         r->addr = mmap(NULL, r->region_size, PROT_READ |
429                        PROT_WRITE, MAP_SHARED, r->fd, 0);
430         if (r->addr == MAP_FAILED) {
431                 MIF_LOG(ERR, "%s: Failed to mmap shm region: %s.",
432                         rte_vdev_device_name(pmd->vdev),
433                         strerror(ret));
434                 ret = -1;
435                 goto error;
436         }
437
438         pmd->regions[pmd->regions_num] = r;
439         pmd->regions_num++;
440
441         return ret;
442
443 error:
444         if (r->fd > 0)
445                 close(r->fd);
446         r->fd = -1;
447
448         return ret;
449 }
450
451 static int
452 memif_regions_init(struct pmd_internals *pmd)
453 {
454         int ret;
455
456         /* create one buffer region */
457         ret = memif_region_init_shm(pmd, /* has buffer */ 1);
458         if (ret < 0)
459                 return ret;
460
461         return 0;
462 }
463
464 static void
465 memif_init_rings(struct rte_eth_dev *dev)
466 {
467         struct pmd_internals *pmd = dev->data->dev_private;
468         memif_ring_t *ring;
469         int i, j;
470         uint16_t slot;
471
472         for (i = 0; i < pmd->run.num_s2m_rings; i++) {
473                 ring = memif_get_ring(pmd, MEMIF_RING_S2M, i);
474                 ring->head = 0;
475                 ring->tail = 0;
476                 ring->cookie = MEMIF_COOKIE;
477                 ring->flags = 0;
478                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
479                         slot = i * (1 << pmd->run.log2_ring_size) + j;
480                         ring->desc[j].region = 0;
481                         ring->desc[j].offset = pmd->regions[0]->pkt_buffer_offset +
482                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
483                         ring->desc[j].length = pmd->run.pkt_buffer_size;
484                 }
485         }
486
487         for (i = 0; i < pmd->run.num_m2s_rings; i++) {
488                 ring = memif_get_ring(pmd, MEMIF_RING_M2S, i);
489                 ring->head = 0;
490                 ring->tail = 0;
491                 ring->cookie = MEMIF_COOKIE;
492                 ring->flags = 0;
493                 for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
494                         slot = (i + pmd->run.num_s2m_rings) *
495                             (1 << pmd->run.log2_ring_size) + j;
496                         ring->desc[j].region = 0;
497                         ring->desc[j].offset = pmd->regions[0]->pkt_buffer_offset +
498                                 (uint32_t)(slot * pmd->run.pkt_buffer_size);
499                         ring->desc[j].length = pmd->run.pkt_buffer_size;
500                 }
501         }
502 }
503
504 /* called only by slave */
505 static void
506 memif_init_queues(struct rte_eth_dev *dev)
507 {
508         struct pmd_internals *pmd = dev->data->dev_private;
509         struct memif_queue *mq;
510         int i;
511
512         for (i = 0; i < pmd->run.num_s2m_rings; i++) {
513                 mq = dev->data->tx_queues[i];
514                 mq->ring = memif_get_ring(pmd, MEMIF_RING_S2M, i);
515                 mq->log2_ring_size = pmd->run.log2_ring_size;
516                 /* queues located only in region 0 */
517                 mq->region = 0;
518                 mq->ring_offset = (uint8_t *)mq->ring - (uint8_t *)pmd->regions[0]->addr;
519                 mq->last_head = 0;
520                 mq->last_tail = 0;
521                 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
522                 if (mq->intr_handle.fd < 0) {
523                         MIF_LOG(WARNING,
524                                 "%s: Failed to create eventfd for tx queue %d: %s.",
525                                 rte_vdev_device_name(pmd->vdev), i,
526                                 strerror(errno));
527                 }
528         }
529
530         for (i = 0; i < pmd->run.num_m2s_rings; i++) {
531                 mq = dev->data->rx_queues[i];
532                 mq->ring = memif_get_ring(pmd, MEMIF_RING_M2S, i);
533                 mq->log2_ring_size = pmd->run.log2_ring_size;
534                 /* queues located only in region 0 */
535                 mq->region = 0;
536                 mq->ring_offset = (uint8_t *)mq->ring - (uint8_t *)pmd->regions[0]->addr;
537                 mq->last_head = 0;
538                 mq->last_tail = 0;
539                 mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
540                 if (mq->intr_handle.fd < 0) {
541                         MIF_LOG(WARNING,
542                                 "%s: Failed to create eventfd for rx queue %d: %s.",
543                                 rte_vdev_device_name(pmd->vdev), i,
544                                 strerror(errno));
545                 }
546         }
547 }
548
549 int
550 memif_init_regions_and_queues(struct rte_eth_dev *dev)
551 {
552         int ret;
553
554         ret = memif_regions_init(dev->data->dev_private);
555         if (ret < 0)
556                 return ret;
557
558         memif_init_rings(dev);
559
560         memif_init_queues(dev);
561
562         return 0;
563 }
564
565 int
566 memif_connect(struct rte_eth_dev *dev)
567 {
568         struct pmd_internals *pmd = dev->data->dev_private;
569         struct memif_region *mr;
570         struct memif_queue *mq;
571         int i;
572
573         for (i = 0; i < pmd->regions_num; i++) {
574                 mr = pmd->regions[i];
575                 if (mr != NULL) {
576                         if (mr->addr == NULL) {
577                                 if (mr->fd < 0)
578                                         return -1;
579                                 mr->addr = mmap(NULL, mr->region_size,
580                                                 PROT_READ | PROT_WRITE,
581                                                 MAP_SHARED, mr->fd, 0);
582                                 if (mr->addr == NULL)
583                                         return -1;
584                         }
585                 }
586         }
587
588         for (i = 0; i < pmd->run.num_s2m_rings; i++) {
589                 mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
590                     dev->data->tx_queues[i] : dev->data->rx_queues[i];
591                 mq->ring = (memif_ring_t *)((uint8_t *)pmd->regions[mq->region]->addr +
592                             mq->ring_offset);
593                 if (mq->ring->cookie != MEMIF_COOKIE) {
594                         MIF_LOG(ERR, "%s: Wrong cookie",
595                                 rte_vdev_device_name(pmd->vdev));
596                         return -1;
597                 }
598                 mq->ring->head = 0;
599                 mq->ring->tail = 0;
600                 mq->last_head = 0;
601                 mq->last_tail = 0;
602                 /* enable polling mode */
603                 if (pmd->role == MEMIF_ROLE_MASTER)
604                         mq->ring->flags = MEMIF_RING_FLAG_MASK_INT;
605         }
606         for (i = 0; i < pmd->run.num_m2s_rings; i++) {
607                 mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
608                     dev->data->rx_queues[i] : dev->data->tx_queues[i];
609                 mq->ring = (memif_ring_t *)((uint8_t *)pmd->regions[mq->region]->addr +
610                             mq->ring_offset);
611                 if (mq->ring->cookie != MEMIF_COOKIE) {
612                         MIF_LOG(ERR, "%s: Wrong cookie",
613                                 rte_vdev_device_name(pmd->vdev));
614                         return -1;
615                 }
616                 mq->ring->head = 0;
617                 mq->ring->tail = 0;
618                 mq->last_head = 0;
619                 mq->last_tail = 0;
620                 /* enable polling mode */
621                 if (pmd->role == MEMIF_ROLE_SLAVE)
622                         mq->ring->flags = MEMIF_RING_FLAG_MASK_INT;
623         }
624
625         pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
626         pmd->flags |= ETH_MEMIF_FLAG_CONNECTED;
627         dev->data->dev_link.link_status = ETH_LINK_UP;
628         MIF_LOG(INFO, "%s: Connected.", rte_vdev_device_name(pmd->vdev));
629         return 0;
630 }
631
632 static int
633 memif_dev_start(struct rte_eth_dev *dev)
634 {
635         struct pmd_internals *pmd = dev->data->dev_private;
636         int ret = 0;
637
638         switch (pmd->role) {
639         case MEMIF_ROLE_SLAVE:
640                 ret = memif_connect_slave(dev);
641                 break;
642         case MEMIF_ROLE_MASTER:
643                 ret = memif_connect_master(dev);
644                 break;
645         default:
646                 MIF_LOG(ERR, "%s: Unknown role: %d.",
647                         rte_vdev_device_name(pmd->vdev), pmd->role);
648                 ret = -1;
649                 break;
650         }
651
652         return ret;
653 }
654
655 static void
656 memif_dev_close(struct rte_eth_dev *dev)
657 {
658         struct pmd_internals *pmd = dev->data->dev_private;
659         int i;
660
661         memif_msg_enq_disconnect(pmd->cc, "Device closed", 0);
662         memif_disconnect(dev);
663
664         for (i = 0; i < dev->data->nb_rx_queues; i++)
665                 (*dev->dev_ops->rx_queue_release)(dev->data->rx_queues[i]);
666         for (i = 0; i < dev->data->nb_tx_queues; i++)
667                 (*dev->dev_ops->tx_queue_release)(dev->data->tx_queues[i]);
668
669         memif_socket_remove_device(dev);
670 }
671
672 static int
673 memif_dev_configure(struct rte_eth_dev *dev)
674 {
675         struct pmd_internals *pmd = dev->data->dev_private;
676
677         /*
678          * SLAVE - TXQ
679          * MASTER - RXQ
680          */
681         pmd->cfg.num_s2m_rings = (pmd->role == MEMIF_ROLE_SLAVE) ?
682                                   dev->data->nb_tx_queues : dev->data->nb_rx_queues;
683
684         /*
685          * SLAVE - RXQ
686          * MASTER - TXQ
687          */
688         pmd->cfg.num_m2s_rings = (pmd->role == MEMIF_ROLE_SLAVE) ?
689                                   dev->data->nb_rx_queues : dev->data->nb_tx_queues;
690
691         return 0;
692 }
693
694 static int
695 memif_tx_queue_setup(struct rte_eth_dev *dev,
696                      uint16_t qid,
697                      uint16_t nb_tx_desc __rte_unused,
698                      unsigned int socket_id __rte_unused,
699                      const struct rte_eth_txconf *tx_conf __rte_unused)
700 {
701         struct pmd_internals *pmd = dev->data->dev_private;
702         struct memif_queue *mq;
703
704         mq = rte_zmalloc("tx-queue", sizeof(struct memif_queue), 0);
705         if (mq == NULL) {
706                 MIF_LOG(ERR, "%s: Failed to allocate tx queue id: %u",
707                         rte_vdev_device_name(pmd->vdev), qid);
708                 return -ENOMEM;
709         }
710
711         mq->type =
712             (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_S2M : MEMIF_RING_M2S;
713         mq->n_pkts = 0;
714         mq->n_bytes = 0;
715         mq->n_err = 0;
716         mq->intr_handle.fd = -1;
717         mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
718         mq->pmd = pmd;
719         dev->data->tx_queues[qid] = mq;
720
721         return 0;
722 }
723
724 static int
725 memif_rx_queue_setup(struct rte_eth_dev *dev,
726                      uint16_t qid,
727                      uint16_t nb_rx_desc __rte_unused,
728                      unsigned int socket_id __rte_unused,
729                      const struct rte_eth_rxconf *rx_conf __rte_unused,
730                      struct rte_mempool *mb_pool)
731 {
732         struct pmd_internals *pmd = dev->data->dev_private;
733         struct memif_queue *mq;
734
735         mq = rte_zmalloc("rx-queue", sizeof(struct memif_queue), 0);
736         if (mq == NULL) {
737                 MIF_LOG(ERR, "%s: Failed to allocate rx queue id: %u",
738                         rte_vdev_device_name(pmd->vdev), qid);
739                 return -ENOMEM;
740         }
741
742         mq->type = (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_M2S : MEMIF_RING_S2M;
743         mq->n_pkts = 0;
744         mq->n_bytes = 0;
745         mq->n_err = 0;
746         mq->intr_handle.fd = -1;
747         mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
748         mq->mempool = mb_pool;
749         mq->in_port = dev->data->port_id;
750         mq->pmd = pmd;
751         dev->data->rx_queues[qid] = mq;
752
753         return 0;
754 }
755
756 static void
757 memif_queue_release(void *queue)
758 {
759         struct memif_queue *mq = (struct memif_queue *)queue;
760
761         if (!mq)
762                 return;
763
764         rte_free(mq);
765 }
766
767 static int
768 memif_link_update(struct rte_eth_dev *dev __rte_unused,
769                   int wait_to_complete __rte_unused)
770 {
771         return 0;
772 }
773
774 static int
775 memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
776 {
777         struct pmd_internals *pmd = dev->data->dev_private;
778         struct memif_queue *mq;
779         int i;
780         uint8_t tmp, nq;
781
782         stats->ipackets = 0;
783         stats->ibytes = 0;
784         stats->opackets = 0;
785         stats->obytes = 0;
786         stats->oerrors = 0;
787
788         tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_s2m_rings :
789             pmd->run.num_m2s_rings;
790         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
791             RTE_ETHDEV_QUEUE_STAT_CNTRS;
792
793         /* RX stats */
794         for (i = 0; i < nq; i++) {
795                 mq = dev->data->rx_queues[i];
796                 stats->q_ipackets[i] = mq->n_pkts;
797                 stats->q_ibytes[i] = mq->n_bytes;
798                 stats->ipackets += mq->n_pkts;
799                 stats->ibytes += mq->n_bytes;
800         }
801
802         tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_m2s_rings :
803             pmd->run.num_s2m_rings;
804         nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
805             RTE_ETHDEV_QUEUE_STAT_CNTRS;
806
807         /* TX stats */
808         for (i = 0; i < nq; i++) {
809                 mq = dev->data->tx_queues[i];
810                 stats->q_opackets[i] = mq->n_pkts;
811                 stats->q_obytes[i] = mq->n_bytes;
812                 stats->opackets += mq->n_pkts;
813                 stats->obytes += mq->n_bytes;
814                 stats->oerrors += mq->n_err;
815         }
816         return 0;
817 }
818
819 static void
820 memif_stats_reset(struct rte_eth_dev *dev)
821 {
822         struct pmd_internals *pmd = dev->data->dev_private;
823         int i;
824         struct memif_queue *mq;
825
826         for (i = 0; i < pmd->run.num_s2m_rings; i++) {
827                 mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->tx_queues[i] :
828                     dev->data->rx_queues[i];
829                 mq->n_pkts = 0;
830                 mq->n_bytes = 0;
831                 mq->n_err = 0;
832         }
833         for (i = 0; i < pmd->run.num_m2s_rings; i++) {
834                 mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->rx_queues[i] :
835                     dev->data->tx_queues[i];
836                 mq->n_pkts = 0;
837                 mq->n_bytes = 0;
838                 mq->n_err = 0;
839         }
840 }
841
842 static int
843 memif_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
844 {
845         struct pmd_internals *pmd = dev->data->dev_private;
846
847         MIF_LOG(WARNING, "%s: Interrupt mode not supported.",
848                 rte_vdev_device_name(pmd->vdev));
849
850         return -1;
851 }
852
853 static int
854 memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
855 {
856         struct pmd_internals *pmd __rte_unused = dev->data->dev_private;
857
858         return 0;
859 }
860
861 static const struct eth_dev_ops ops = {
862         .dev_start = memif_dev_start,
863         .dev_close = memif_dev_close,
864         .dev_infos_get = memif_dev_info,
865         .dev_configure = memif_dev_configure,
866         .tx_queue_setup = memif_tx_queue_setup,
867         .rx_queue_setup = memif_rx_queue_setup,
868         .rx_queue_release = memif_queue_release,
869         .tx_queue_release = memif_queue_release,
870         .rx_queue_intr_enable = memif_rx_queue_intr_enable,
871         .rx_queue_intr_disable = memif_rx_queue_intr_disable,
872         .link_update = memif_link_update,
873         .stats_get = memif_stats_get,
874         .stats_reset = memif_stats_reset,
875 };
876
877 static int
878 memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
879              memif_interface_id_t id, uint32_t flags,
880              const char *socket_filename,
881              memif_log2_ring_size_t log2_ring_size,
882              uint16_t pkt_buffer_size, const char *secret,
883              struct rte_ether_addr *ether_addr)
884 {
885         int ret = 0;
886         struct rte_eth_dev *eth_dev;
887         struct rte_eth_dev_data *data;
888         struct pmd_internals *pmd;
889         const unsigned int numa_node = vdev->device.numa_node;
890         const char *name = rte_vdev_device_name(vdev);
891
892         if (flags & ETH_MEMIF_FLAG_ZERO_COPY) {
893                 MIF_LOG(ERR, "Zero-copy slave not supported.");
894                 return -1;
895         }
896
897         eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
898         if (eth_dev == NULL) {
899                 MIF_LOG(ERR, "%s: Unable to allocate device struct.", name);
900                 return -1;
901         }
902
903         pmd = eth_dev->data->dev_private;
904         memset(pmd, 0, sizeof(*pmd));
905
906         pmd->vdev = vdev;
907         pmd->id = id;
908         pmd->flags = flags;
909         pmd->flags |= ETH_MEMIF_FLAG_DISABLED;
910         pmd->role = role;
911
912         ret = memif_socket_init(eth_dev, socket_filename);
913         if (ret < 0)
914                 return ret;
915
916         memset(pmd->secret, 0, sizeof(char) * ETH_MEMIF_SECRET_SIZE);
917         if (secret != NULL)
918                 strlcpy(pmd->secret, secret, sizeof(pmd->secret));
919
920         pmd->cfg.log2_ring_size = log2_ring_size;
921         /* set in .dev_configure() */
922         pmd->cfg.num_s2m_rings = 0;
923         pmd->cfg.num_m2s_rings = 0;
924
925         pmd->cfg.pkt_buffer_size = pkt_buffer_size;
926
927         data = eth_dev->data;
928         data->dev_private = pmd;
929         data->numa_node = numa_node;
930         data->mac_addrs = ether_addr;
931
932         eth_dev->dev_ops = &ops;
933         eth_dev->device = &vdev->device;
934         eth_dev->rx_pkt_burst = eth_memif_rx;
935         eth_dev->tx_pkt_burst = eth_memif_tx;
936
937         eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
938
939         rte_eth_dev_probing_finish(eth_dev);
940
941         return 0;
942 }
943
944 static int
945 memif_set_role(const char *key __rte_unused, const char *value,
946                void *extra_args)
947 {
948         enum memif_role_t *role = (enum memif_role_t *)extra_args;
949
950         if (strstr(value, "master") != NULL) {
951                 *role = MEMIF_ROLE_MASTER;
952         } else if (strstr(value, "slave") != NULL) {
953                 *role = MEMIF_ROLE_SLAVE;
954         } else {
955                 MIF_LOG(ERR, "Unknown role: %s.", value);
956                 return -EINVAL;
957         }
958         return 0;
959 }
960
961 static int
962 memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args)
963 {
964         uint32_t *flags = (uint32_t *)extra_args;
965
966         if (strstr(value, "yes") != NULL) {
967                 *flags |= ETH_MEMIF_FLAG_ZERO_COPY;
968         } else if (strstr(value, "no") != NULL) {
969                 *flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
970         } else {
971                 MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value);
972                 return -EINVAL;
973         }
974         return 0;
975 }
976
977 static int
978 memif_set_id(const char *key __rte_unused, const char *value, void *extra_args)
979 {
980         memif_interface_id_t *id = (memif_interface_id_t *)extra_args;
981
982         /* even if parsing fails, 0 is a valid id */
983         *id = strtoul(value, NULL, 10);
984         return 0;
985 }
986
987 static int
988 memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args)
989 {
990         unsigned long tmp;
991         uint16_t *pkt_buffer_size = (uint16_t *)extra_args;
992
993         tmp = strtoul(value, NULL, 10);
994         if (tmp == 0 || tmp > 0xFFFF) {
995                 MIF_LOG(ERR, "Invalid buffer size: %s.", value);
996                 return -EINVAL;
997         }
998         *pkt_buffer_size = tmp;
999         return 0;
1000 }
1001
1002 static int
1003 memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args)
1004 {
1005         unsigned long tmp;
1006         memif_log2_ring_size_t *log2_ring_size =
1007             (memif_log2_ring_size_t *)extra_args;
1008
1009         tmp = strtoul(value, NULL, 10);
1010         if (tmp == 0 || tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE) {
1011                 MIF_LOG(ERR, "Invalid ring size: %s (max %u).",
1012                         value, ETH_MEMIF_MAX_LOG2_RING_SIZE);
1013                 return -EINVAL;
1014         }
1015         *log2_ring_size = tmp;
1016         return 0;
1017 }
1018
1019 /* check if directory exists and if we have permission to read/write */
1020 static int
1021 memif_check_socket_filename(const char *filename)
1022 {
1023         char *dir = NULL, *tmp;
1024         uint32_t idx;
1025         int ret = 0;
1026
1027         tmp = strrchr(filename, '/');
1028         if (tmp != NULL) {
1029                 idx = tmp - filename;
1030                 dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 1), 0);
1031                 if (dir == NULL) {
1032                         MIF_LOG(ERR, "Failed to allocate memory.");
1033                         return -1;
1034                 }
1035                 strlcpy(dir, filename, sizeof(char) * (idx + 1));
1036         }
1037
1038         if (dir == NULL || (faccessat(-1, dir, F_OK | R_OK |
1039                                         W_OK, AT_EACCESS) < 0)) {
1040                 MIF_LOG(ERR, "Invalid socket directory.");
1041                 ret = -EINVAL;
1042         }
1043
1044         if (dir != NULL)
1045                 rte_free(dir);
1046
1047         return ret;
1048 }
1049
1050 static int
1051 memif_set_socket_filename(const char *key __rte_unused, const char *value,
1052                           void *extra_args)
1053 {
1054         const char **socket_filename = (const char **)extra_args;
1055
1056         *socket_filename = value;
1057         return memif_check_socket_filename(*socket_filename);
1058 }
1059
1060 static int
1061 memif_set_mac(const char *key __rte_unused, const char *value, void *extra_args)
1062 {
1063         struct rte_ether_addr *ether_addr = (struct rte_ether_addr *)extra_args;
1064         int ret = 0;
1065
1066         ret = sscanf(value, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
1067                &ether_addr->addr_bytes[0], &ether_addr->addr_bytes[1],
1068                &ether_addr->addr_bytes[2], &ether_addr->addr_bytes[3],
1069                &ether_addr->addr_bytes[4], &ether_addr->addr_bytes[5]);
1070         if (ret != 6)
1071                 MIF_LOG(WARNING, "Failed to parse mac '%s'.", value);
1072         return 0;
1073 }
1074
1075 static int
1076 memif_set_secret(const char *key __rte_unused, const char *value, void *extra_args)
1077 {
1078         const char **secret = (const char **)extra_args;
1079
1080         *secret = value;
1081         return 0;
1082 }
1083
1084 static int
1085 rte_pmd_memif_probe(struct rte_vdev_device *vdev)
1086 {
1087         RTE_BUILD_BUG_ON(sizeof(memif_msg_t) != 128);
1088         RTE_BUILD_BUG_ON(sizeof(memif_desc_t) != 16);
1089         int ret = 0;
1090         struct rte_kvargs *kvlist;
1091         const char *name = rte_vdev_device_name(vdev);
1092         enum memif_role_t role = MEMIF_ROLE_SLAVE;
1093         memif_interface_id_t id = 0;
1094         uint16_t pkt_buffer_size = ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE;
1095         memif_log2_ring_size_t log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE;
1096         const char *socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME;
1097         uint32_t flags = 0;
1098         const char *secret = NULL;
1099         struct rte_ether_addr *ether_addr = rte_zmalloc("",
1100                 sizeof(struct rte_ether_addr), 0);
1101
1102         rte_eth_random_addr(ether_addr->addr_bytes);
1103
1104         MIF_LOG(INFO, "Initialize MEMIF: %s.", name);
1105
1106         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1107                 MIF_LOG(ERR, "Multi-processing not supported for memif.");
1108                 /* TODO:
1109                  * Request connection information.
1110                  *
1111                  * Once memif in the primary process is connected,
1112                  * broadcast connection information.
1113                  */
1114                 return -1;
1115         }
1116
1117         kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments);
1118
1119         /* parse parameters */
1120         if (kvlist != NULL) {
1121                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG,
1122                                          &memif_set_role, &role);
1123                 if (ret < 0)
1124                         goto exit;
1125                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG,
1126                                          &memif_set_id, &id);
1127                 if (ret < 0)
1128                         goto exit;
1129                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
1130                                          &memif_set_bs, &pkt_buffer_size);
1131                 if (ret < 0)
1132                         goto exit;
1133                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG,
1134                                          &memif_set_rs, &log2_ring_size);
1135                 if (ret < 0)
1136                         goto exit;
1137                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ARG,
1138                                          &memif_set_socket_filename,
1139                                          (void *)(&socket_filename));
1140                 if (ret < 0)
1141                         goto exit;
1142                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_MAC_ARG,
1143                                          &memif_set_mac, ether_addr);
1144                 if (ret < 0)
1145                         goto exit;
1146                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG,
1147                                          &memif_set_zc, &flags);
1148                 if (ret < 0)
1149                         goto exit;
1150                 ret = rte_kvargs_process(kvlist, ETH_MEMIF_SECRET_ARG,
1151                                          &memif_set_secret, (void *)(&secret));
1152                 if (ret < 0)
1153                         goto exit;
1154         }
1155
1156         /* create interface */
1157         ret = memif_create(vdev, role, id, flags, socket_filename,
1158                            log2_ring_size, pkt_buffer_size, secret, ether_addr);
1159
1160 exit:
1161         if (kvlist != NULL)
1162                 rte_kvargs_free(kvlist);
1163         return ret;
1164 }
1165
1166 static int
1167 rte_pmd_memif_remove(struct rte_vdev_device *vdev)
1168 {
1169         struct rte_eth_dev *eth_dev;
1170
1171         eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
1172         if (eth_dev == NULL)
1173                 return 0;
1174
1175         rte_eth_dev_close(eth_dev->data->port_id);
1176
1177         return 0;
1178 }
1179
1180 static struct rte_vdev_driver pmd_memif_drv = {
1181         .probe = rte_pmd_memif_probe,
1182         .remove = rte_pmd_memif_remove,
1183 };
1184
1185 RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv);
1186
1187 RTE_PMD_REGISTER_PARAM_STRING(net_memif,
1188                               ETH_MEMIF_ID_ARG "=<int>"
1189                               ETH_MEMIF_ROLE_ARG "=master|slave"
1190                               ETH_MEMIF_PKT_BUFFER_SIZE_ARG "=<int>"
1191                               ETH_MEMIF_RING_SIZE_ARG "=<int>"
1192                               ETH_MEMIF_SOCKET_ARG "=<string>"
1193                               ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx"
1194                               ETH_MEMIF_ZC_ARG "=yes|no"
1195                               ETH_MEMIF_SECRET_ARG "=<string>");
1196
1197 int memif_logtype;
1198
1199 RTE_INIT(memif_init_log)
1200 {
1201         memif_logtype = rte_log_register("pmd.net.memif");
1202         if (memif_logtype >= 0)
1203                 rte_log_set_level(memif_logtype, RTE_LOG_NOTICE);
1204 }