net/pcap: move handler to process private
[dpdk.git] / drivers / net / pcap / rte_eth_pcap.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright(c) 2014 6WIND S.A.
4  * All rights reserved.
5  */
6
7 #include <time.h>
8
9 #include <net/if.h>
10 #include <sys/socket.h>
11 #include <sys/ioctl.h>
12 #include <unistd.h>
13
14 #if defined(RTE_EXEC_ENV_BSDAPP)
15 #include <sys/sysctl.h>
16 #include <net/if_dl.h>
17 #endif
18
19 #include <pcap.h>
20
21 #include <rte_cycles.h>
22 #include <rte_ethdev_driver.h>
23 #include <rte_ethdev_vdev.h>
24 #include <rte_kvargs.h>
25 #include <rte_malloc.h>
26 #include <rte_mbuf.h>
27 #include <rte_bus_vdev.h>
28 #include <rte_string_fns.h>
29
30 #define RTE_ETH_PCAP_SNAPSHOT_LEN 65535
31 #define RTE_ETH_PCAP_SNAPLEN ETHER_MAX_JUMBO_FRAME_LEN
32 #define RTE_ETH_PCAP_PROMISC 1
33 #define RTE_ETH_PCAP_TIMEOUT -1
34
35 #define ETH_PCAP_RX_PCAP_ARG  "rx_pcap"
36 #define ETH_PCAP_TX_PCAP_ARG  "tx_pcap"
37 #define ETH_PCAP_RX_IFACE_ARG "rx_iface"
38 #define ETH_PCAP_RX_IFACE_IN_ARG "rx_iface_in"
39 #define ETH_PCAP_TX_IFACE_ARG "tx_iface"
40 #define ETH_PCAP_IFACE_ARG    "iface"
41 #define ETH_PCAP_PHY_MAC_ARG  "phy_mac"
42
43 #define ETH_PCAP_ARG_MAXLEN     64
44
45 #define RTE_PMD_PCAP_MAX_QUEUES 16
46
47 static char errbuf[PCAP_ERRBUF_SIZE];
48 static unsigned char tx_pcap_data[RTE_ETH_PCAP_SNAPLEN];
49 static struct timeval start_time;
50 static uint64_t start_cycles;
51 static uint64_t hz;
52 static uint8_t iface_idx;
53
54 struct queue_stat {
55         volatile unsigned long pkts;
56         volatile unsigned long bytes;
57         volatile unsigned long err_pkts;
58 };
59
60 struct pcap_rx_queue {
61         uint16_t port_id;
62         uint16_t queue_id;
63         struct rte_mempool *mb_pool;
64         struct queue_stat rx_stat;
65         char name[PATH_MAX];
66         char type[ETH_PCAP_ARG_MAXLEN];
67 };
68
69 struct pcap_tx_queue {
70         uint16_t port_id;
71         uint16_t queue_id;
72         struct queue_stat tx_stat;
73         char name[PATH_MAX];
74         char type[ETH_PCAP_ARG_MAXLEN];
75 };
76
77 struct pmd_internals {
78         struct pcap_rx_queue rx_queue[RTE_PMD_PCAP_MAX_QUEUES];
79         struct pcap_tx_queue tx_queue[RTE_PMD_PCAP_MAX_QUEUES];
80         struct ether_addr eth_addr;
81         int if_index;
82         int single_iface;
83         int phy_mac;
84 };
85
86 struct pmd_process_private {
87         pcap_t *rx_pcap[RTE_PMD_PCAP_MAX_QUEUES];
88         pcap_t *tx_pcap[RTE_PMD_PCAP_MAX_QUEUES];
89         pcap_dumper_t *tx_dumper[RTE_PMD_PCAP_MAX_QUEUES];
90 };
91
92 struct pmd_devargs {
93         unsigned int num_of_queue;
94         struct devargs_queue {
95                 pcap_dumper_t *dumper;
96                 pcap_t *pcap;
97                 const char *name;
98                 const char *type;
99         } queue[RTE_PMD_PCAP_MAX_QUEUES];
100         int phy_mac;
101 };
102
103 static const char *valid_arguments[] = {
104         ETH_PCAP_RX_PCAP_ARG,
105         ETH_PCAP_TX_PCAP_ARG,
106         ETH_PCAP_RX_IFACE_ARG,
107         ETH_PCAP_RX_IFACE_IN_ARG,
108         ETH_PCAP_TX_IFACE_ARG,
109         ETH_PCAP_IFACE_ARG,
110         ETH_PCAP_PHY_MAC_ARG,
111         NULL
112 };
113
114 static struct rte_eth_link pmd_link = {
115                 .link_speed = ETH_SPEED_NUM_10G,
116                 .link_duplex = ETH_LINK_FULL_DUPLEX,
117                 .link_status = ETH_LINK_DOWN,
118                 .link_autoneg = ETH_LINK_FIXED,
119 };
120
121 static int eth_pcap_logtype;
122
123 #define PMD_LOG(level, fmt, args...) \
124         rte_log(RTE_LOG_ ## level, eth_pcap_logtype, \
125                 "%s(): " fmt "\n", __func__, ##args)
126
127 static int
128 eth_pcap_rx_jumbo(struct rte_mempool *mb_pool, struct rte_mbuf *mbuf,
129                 const u_char *data, uint16_t data_len)
130 {
131         /* Copy the first segment. */
132         uint16_t len = rte_pktmbuf_tailroom(mbuf);
133         struct rte_mbuf *m = mbuf;
134
135         rte_memcpy(rte_pktmbuf_append(mbuf, len), data, len);
136         data_len -= len;
137         data += len;
138
139         while (data_len > 0) {
140                 /* Allocate next mbuf and point to that. */
141                 m->next = rte_pktmbuf_alloc(mb_pool);
142
143                 if (unlikely(!m->next))
144                         return -1;
145
146                 m = m->next;
147
148                 /* Headroom is not needed in chained mbufs. */
149                 rte_pktmbuf_prepend(m, rte_pktmbuf_headroom(m));
150                 m->pkt_len = 0;
151                 m->data_len = 0;
152
153                 /* Copy next segment. */
154                 len = RTE_MIN(rte_pktmbuf_tailroom(m), data_len);
155                 rte_memcpy(rte_pktmbuf_append(m, len), data, len);
156
157                 mbuf->nb_segs++;
158                 data_len -= len;
159                 data += len;
160         }
161
162         return mbuf->nb_segs;
163 }
164
165 /* Copy data from mbuf chain to a buffer suitable for writing to a PCAP file. */
166 static void
167 eth_pcap_gather_data(unsigned char *data, struct rte_mbuf *mbuf)
168 {
169         uint16_t data_len = 0;
170
171         while (mbuf) {
172                 rte_memcpy(data + data_len, rte_pktmbuf_mtod(mbuf, void *),
173                         mbuf->data_len);
174
175                 data_len += mbuf->data_len;
176                 mbuf = mbuf->next;
177         }
178 }
179
180 static uint16_t
181 eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
182 {
183         unsigned int i;
184         struct pcap_pkthdr header;
185         struct pmd_process_private *pp;
186         const u_char *packet;
187         struct rte_mbuf *mbuf;
188         struct pcap_rx_queue *pcap_q = queue;
189         uint16_t num_rx = 0;
190         uint16_t buf_size;
191         uint32_t rx_bytes = 0;
192         pcap_t *pcap;
193
194         pp = rte_eth_devices[pcap_q->port_id].process_private;
195         pcap = pp->rx_pcap[pcap_q->queue_id];
196
197         if (unlikely(pcap == NULL || nb_pkts == 0))
198                 return 0;
199
200         /* Reads the given number of packets from the pcap file one by one
201          * and copies the packet data into a newly allocated mbuf to return.
202          */
203         for (i = 0; i < nb_pkts; i++) {
204                 /* Get the next PCAP packet */
205                 packet = pcap_next(pcap, &header);
206                 if (unlikely(packet == NULL))
207                         break;
208
209                 mbuf = rte_pktmbuf_alloc(pcap_q->mb_pool);
210                 if (unlikely(mbuf == NULL))
211                         break;
212
213                 /* Now get the space available for data in the mbuf */
214                 buf_size = rte_pktmbuf_data_room_size(pcap_q->mb_pool) -
215                                 RTE_PKTMBUF_HEADROOM;
216
217                 if (header.caplen <= buf_size) {
218                         /* pcap packet will fit in the mbuf, can copy it */
219                         rte_memcpy(rte_pktmbuf_mtod(mbuf, void *), packet,
220                                         header.caplen);
221                         mbuf->data_len = (uint16_t)header.caplen;
222                 } else {
223                         /* Try read jumbo frame into multi mbufs. */
224                         if (unlikely(eth_pcap_rx_jumbo(pcap_q->mb_pool,
225                                                        mbuf,
226                                                        packet,
227                                                        header.caplen) == -1)) {
228                                 rte_pktmbuf_free(mbuf);
229                                 break;
230                         }
231                 }
232
233                 mbuf->pkt_len = (uint16_t)header.caplen;
234                 mbuf->port = pcap_q->port_id;
235                 bufs[num_rx] = mbuf;
236                 num_rx++;
237                 rx_bytes += header.caplen;
238         }
239         pcap_q->rx_stat.pkts += num_rx;
240         pcap_q->rx_stat.bytes += rx_bytes;
241
242         return num_rx;
243 }
244
245 static inline void
246 calculate_timestamp(struct timeval *ts) {
247         uint64_t cycles;
248         struct timeval cur_time;
249
250         cycles = rte_get_timer_cycles() - start_cycles;
251         cur_time.tv_sec = cycles / hz;
252         cur_time.tv_usec = (cycles % hz) * 1e6 / hz;
253         timeradd(&start_time, &cur_time, ts);
254 }
255
256 /*
257  * Callback to handle writing packets to a pcap file.
258  */
259 static uint16_t
260 eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
261 {
262         unsigned int i;
263         struct rte_mbuf *mbuf;
264         struct pmd_process_private *pp;
265         struct pcap_tx_queue *dumper_q = queue;
266         uint16_t num_tx = 0;
267         uint32_t tx_bytes = 0;
268         struct pcap_pkthdr header;
269         pcap_dumper_t *dumper;
270
271         pp = rte_eth_devices[dumper_q->port_id].process_private;
272         dumper = pp->tx_dumper[dumper_q->queue_id];
273
274         if (dumper == NULL || nb_pkts == 0)
275                 return 0;
276
277         /* writes the nb_pkts packets to the previously opened pcap file
278          * dumper */
279         for (i = 0; i < nb_pkts; i++) {
280                 mbuf = bufs[i];
281                 calculate_timestamp(&header.ts);
282                 header.len = mbuf->pkt_len;
283                 header.caplen = header.len;
284
285                 if (likely(mbuf->nb_segs == 1)) {
286                         pcap_dump((u_char *)dumper, &header,
287                                   rte_pktmbuf_mtod(mbuf, void*));
288                 } else {
289                         if (mbuf->pkt_len <= ETHER_MAX_JUMBO_FRAME_LEN) {
290                                 eth_pcap_gather_data(tx_pcap_data, mbuf);
291                                 pcap_dump((u_char *)dumper, &header,
292                                           tx_pcap_data);
293                         } else {
294                                 PMD_LOG(ERR,
295                                         "Dropping PCAP packet. Size (%d) > max jumbo size (%d).",
296                                         mbuf->pkt_len,
297                                         ETHER_MAX_JUMBO_FRAME_LEN);
298
299                                 rte_pktmbuf_free(mbuf);
300                                 break;
301                         }
302                 }
303
304                 num_tx++;
305                 tx_bytes += mbuf->pkt_len;
306                 rte_pktmbuf_free(mbuf);
307         }
308
309         /*
310          * Since there's no place to hook a callback when the forwarding
311          * process stops and to make sure the pcap file is actually written,
312          * we flush the pcap dumper within each burst.
313          */
314         pcap_dump_flush(dumper);
315         dumper_q->tx_stat.pkts += num_tx;
316         dumper_q->tx_stat.bytes += tx_bytes;
317         dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
318
319         return num_tx;
320 }
321
322 /*
323  * Callback to handle sending packets through a real NIC.
324  */
325 static uint16_t
326 eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
327 {
328         unsigned int i;
329         int ret;
330         struct rte_mbuf *mbuf;
331         struct pmd_process_private *pp;
332         struct pcap_tx_queue *tx_queue = queue;
333         uint16_t num_tx = 0;
334         uint32_t tx_bytes = 0;
335         pcap_t *pcap;
336
337         pp = rte_eth_devices[tx_queue->port_id].process_private;
338         pcap = pp->tx_pcap[tx_queue->queue_id];
339
340         if (unlikely(nb_pkts == 0 || pcap == NULL))
341                 return 0;
342
343         for (i = 0; i < nb_pkts; i++) {
344                 mbuf = bufs[i];
345
346                 if (likely(mbuf->nb_segs == 1)) {
347                         ret = pcap_sendpacket(pcap,
348                                         rte_pktmbuf_mtod(mbuf, u_char *),
349                                         mbuf->pkt_len);
350                 } else {
351                         if (mbuf->pkt_len <= ETHER_MAX_JUMBO_FRAME_LEN) {
352                                 eth_pcap_gather_data(tx_pcap_data, mbuf);
353                                 ret = pcap_sendpacket(pcap,
354                                                 tx_pcap_data, mbuf->pkt_len);
355                         } else {
356                                 PMD_LOG(ERR,
357                                         "Dropping PCAP packet. Size (%d) > max jumbo size (%d).",
358                                         mbuf->pkt_len,
359                                         ETHER_MAX_JUMBO_FRAME_LEN);
360
361                                 rte_pktmbuf_free(mbuf);
362                                 break;
363                         }
364                 }
365
366                 if (unlikely(ret != 0))
367                         break;
368                 num_tx++;
369                 tx_bytes += mbuf->pkt_len;
370                 rte_pktmbuf_free(mbuf);
371         }
372
373         tx_queue->tx_stat.pkts += num_tx;
374         tx_queue->tx_stat.bytes += tx_bytes;
375         tx_queue->tx_stat.err_pkts += nb_pkts - num_tx;
376
377         return num_tx;
378 }
379
380 /*
381  * pcap_open_live wrapper function
382  */
383 static inline int
384 open_iface_live(const char *iface, pcap_t **pcap) {
385         *pcap = pcap_open_live(iface, RTE_ETH_PCAP_SNAPLEN,
386                         RTE_ETH_PCAP_PROMISC, RTE_ETH_PCAP_TIMEOUT, errbuf);
387
388         if (*pcap == NULL) {
389                 PMD_LOG(ERR, "Couldn't open %s: %s", iface, errbuf);
390                 return -1;
391         }
392
393         return 0;
394 }
395
396 static int
397 open_single_iface(const char *iface, pcap_t **pcap)
398 {
399         if (open_iface_live(iface, pcap) < 0) {
400                 PMD_LOG(ERR, "Couldn't open interface %s", iface);
401                 return -1;
402         }
403
404         return 0;
405 }
406
407 static int
408 open_single_tx_pcap(const char *pcap_filename, pcap_dumper_t **dumper)
409 {
410         pcap_t *tx_pcap;
411
412         /*
413          * We need to create a dummy empty pcap_t to use it
414          * with pcap_dump_open(). We create big enough an Ethernet
415          * pcap holder.
416          */
417         tx_pcap = pcap_open_dead(DLT_EN10MB, RTE_ETH_PCAP_SNAPSHOT_LEN);
418         if (tx_pcap == NULL) {
419                 PMD_LOG(ERR, "Couldn't create dead pcap");
420                 return -1;
421         }
422
423         /* The dumper is created using the previous pcap_t reference */
424         *dumper = pcap_dump_open(tx_pcap, pcap_filename);
425         if (*dumper == NULL) {
426                 pcap_close(tx_pcap);
427                 PMD_LOG(ERR, "Couldn't open %s for writing.",
428                         pcap_filename);
429                 return -1;
430         }
431
432         pcap_close(tx_pcap);
433         return 0;
434 }
435
436 static int
437 open_single_rx_pcap(const char *pcap_filename, pcap_t **pcap)
438 {
439         *pcap = pcap_open_offline(pcap_filename, errbuf);
440         if (*pcap == NULL) {
441                 PMD_LOG(ERR, "Couldn't open %s: %s", pcap_filename,
442                         errbuf);
443                 return -1;
444         }
445
446         return 0;
447 }
448
449 static int
450 eth_dev_start(struct rte_eth_dev *dev)
451 {
452         unsigned int i;
453         struct pmd_internals *internals = dev->data->dev_private;
454         struct pmd_process_private *pp = dev->process_private;
455         struct pcap_tx_queue *tx;
456         struct pcap_rx_queue *rx;
457
458         /* Special iface case. Single pcap is open and shared between tx/rx. */
459         if (internals->single_iface) {
460                 tx = &internals->tx_queue[0];
461                 rx = &internals->rx_queue[0];
462
463                 if (!pp->tx_pcap[0] &&
464                         strcmp(tx->type, ETH_PCAP_IFACE_ARG) == 0) {
465                         if (open_single_iface(tx->name, &pp->tx_pcap[0]) < 0)
466                                 return -1;
467                         pp->rx_pcap[0] = pp->tx_pcap[0];
468                 }
469
470                 goto status_up;
471         }
472
473         /* If not open already, open tx pcaps/dumpers */
474         for (i = 0; i < dev->data->nb_tx_queues; i++) {
475                 tx = &internals->tx_queue[i];
476
477                 if (!pp->tx_dumper[i] &&
478                                 strcmp(tx->type, ETH_PCAP_TX_PCAP_ARG) == 0) {
479                         if (open_single_tx_pcap(tx->name,
480                                 &pp->tx_dumper[i]) < 0)
481                                 return -1;
482                 } else if (!pp->tx_pcap[i] &&
483                                 strcmp(tx->type, ETH_PCAP_TX_IFACE_ARG) == 0) {
484                         if (open_single_iface(tx->name, &pp->tx_pcap[i]) < 0)
485                                 return -1;
486                 }
487         }
488
489         /* If not open already, open rx pcaps */
490         for (i = 0; i < dev->data->nb_rx_queues; i++) {
491                 rx = &internals->rx_queue[i];
492
493                 if (pp->rx_pcap[i] != NULL)
494                         continue;
495
496                 if (strcmp(rx->type, ETH_PCAP_RX_PCAP_ARG) == 0) {
497                         if (open_single_rx_pcap(rx->name, &pp->rx_pcap[i]) < 0)
498                                 return -1;
499                 } else if (strcmp(rx->type, ETH_PCAP_RX_IFACE_ARG) == 0) {
500                         if (open_single_iface(rx->name, &pp->rx_pcap[i]) < 0)
501                                 return -1;
502                 }
503         }
504
505 status_up:
506         for (i = 0; i < dev->data->nb_rx_queues; i++)
507                 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
508
509         for (i = 0; i < dev->data->nb_tx_queues; i++)
510                 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
511
512         dev->data->dev_link.link_status = ETH_LINK_UP;
513
514         return 0;
515 }
516
517 /*
518  * This function gets called when the current port gets stopped.
519  * Is the only place for us to close all the tx streams dumpers.
520  * If not called the dumpers will be flushed within each tx burst.
521  */
522 static void
523 eth_dev_stop(struct rte_eth_dev *dev)
524 {
525         unsigned int i;
526         struct pmd_internals *internals = dev->data->dev_private;
527         struct pmd_process_private *pp = dev->process_private;
528
529         /* Special iface case. Single pcap is open and shared between tx/rx. */
530         if (internals->single_iface) {
531                 pcap_close(pp->tx_pcap[0]);
532                 pp->tx_pcap[0] = NULL;
533                 pp->rx_pcap[0] = NULL;
534                 goto status_down;
535         }
536
537         for (i = 0; i < dev->data->nb_tx_queues; i++) {
538                 if (pp->tx_dumper[i] != NULL) {
539                         pcap_dump_close(pp->tx_dumper[i]);
540                         pp->tx_dumper[i] = NULL;
541                 }
542
543                 if (pp->tx_pcap[i] != NULL) {
544                         pcap_close(pp->tx_pcap[i]);
545                         pp->tx_pcap[i] = NULL;
546                 }
547         }
548
549         for (i = 0; i < dev->data->nb_rx_queues; i++) {
550                 if (pp->rx_pcap[i] != NULL) {
551                         pcap_close(pp->rx_pcap[i]);
552                         pp->rx_pcap[i] = NULL;
553                 }
554         }
555
556 status_down:
557         for (i = 0; i < dev->data->nb_rx_queues; i++)
558                 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
559
560         for (i = 0; i < dev->data->nb_tx_queues; i++)
561                 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
562
563         dev->data->dev_link.link_status = ETH_LINK_DOWN;
564 }
565
566 static int
567 eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
568 {
569         return 0;
570 }
571
572 static void
573 eth_dev_info(struct rte_eth_dev *dev,
574                 struct rte_eth_dev_info *dev_info)
575 {
576         struct pmd_internals *internals = dev->data->dev_private;
577
578         dev_info->if_index = internals->if_index;
579         dev_info->max_mac_addrs = 1;
580         dev_info->max_rx_pktlen = (uint32_t) -1;
581         dev_info->max_rx_queues = dev->data->nb_rx_queues;
582         dev_info->max_tx_queues = dev->data->nb_tx_queues;
583         dev_info->min_rx_bufsize = 0;
584 }
585
586 static int
587 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
588 {
589         unsigned int i;
590         unsigned long rx_packets_total = 0, rx_bytes_total = 0;
591         unsigned long tx_packets_total = 0, tx_bytes_total = 0;
592         unsigned long tx_packets_err_total = 0;
593         const struct pmd_internals *internal = dev->data->dev_private;
594
595         for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
596                         i < dev->data->nb_rx_queues; i++) {
597                 stats->q_ipackets[i] = internal->rx_queue[i].rx_stat.pkts;
598                 stats->q_ibytes[i] = internal->rx_queue[i].rx_stat.bytes;
599                 rx_packets_total += stats->q_ipackets[i];
600                 rx_bytes_total += stats->q_ibytes[i];
601         }
602
603         for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
604                         i < dev->data->nb_tx_queues; i++) {
605                 stats->q_opackets[i] = internal->tx_queue[i].tx_stat.pkts;
606                 stats->q_obytes[i] = internal->tx_queue[i].tx_stat.bytes;
607                 stats->q_errors[i] = internal->tx_queue[i].tx_stat.err_pkts;
608                 tx_packets_total += stats->q_opackets[i];
609                 tx_bytes_total += stats->q_obytes[i];
610                 tx_packets_err_total += stats->q_errors[i];
611         }
612
613         stats->ipackets = rx_packets_total;
614         stats->ibytes = rx_bytes_total;
615         stats->opackets = tx_packets_total;
616         stats->obytes = tx_bytes_total;
617         stats->oerrors = tx_packets_err_total;
618
619         return 0;
620 }
621
622 static void
623 eth_stats_reset(struct rte_eth_dev *dev)
624 {
625         unsigned int i;
626         struct pmd_internals *internal = dev->data->dev_private;
627
628         for (i = 0; i < dev->data->nb_rx_queues; i++) {
629                 internal->rx_queue[i].rx_stat.pkts = 0;
630                 internal->rx_queue[i].rx_stat.bytes = 0;
631         }
632
633         for (i = 0; i < dev->data->nb_tx_queues; i++) {
634                 internal->tx_queue[i].tx_stat.pkts = 0;
635                 internal->tx_queue[i].tx_stat.bytes = 0;
636                 internal->tx_queue[i].tx_stat.err_pkts = 0;
637         }
638 }
639
640 static void
641 eth_dev_close(struct rte_eth_dev *dev __rte_unused)
642 {
643 }
644
645 static void
646 eth_queue_release(void *q __rte_unused)
647 {
648 }
649
650 static int
651 eth_link_update(struct rte_eth_dev *dev __rte_unused,
652                 int wait_to_complete __rte_unused)
653 {
654         return 0;
655 }
656
657 static int
658 eth_rx_queue_setup(struct rte_eth_dev *dev,
659                 uint16_t rx_queue_id,
660                 uint16_t nb_rx_desc __rte_unused,
661                 unsigned int socket_id __rte_unused,
662                 const struct rte_eth_rxconf *rx_conf __rte_unused,
663                 struct rte_mempool *mb_pool)
664 {
665         struct pmd_internals *internals = dev->data->dev_private;
666         struct pcap_rx_queue *pcap_q = &internals->rx_queue[rx_queue_id];
667
668         pcap_q->mb_pool = mb_pool;
669         pcap_q->port_id = dev->data->port_id;
670         pcap_q->queue_id = rx_queue_id;
671         dev->data->rx_queues[rx_queue_id] = pcap_q;
672
673         return 0;
674 }
675
676 static int
677 eth_tx_queue_setup(struct rte_eth_dev *dev,
678                 uint16_t tx_queue_id,
679                 uint16_t nb_tx_desc __rte_unused,
680                 unsigned int socket_id __rte_unused,
681                 const struct rte_eth_txconf *tx_conf __rte_unused)
682 {
683         struct pmd_internals *internals = dev->data->dev_private;
684         struct pcap_tx_queue *pcap_q = &internals->tx_queue[tx_queue_id];
685
686         pcap_q->port_id = dev->data->port_id;
687         pcap_q->queue_id = tx_queue_id;
688         dev->data->tx_queues[tx_queue_id] = pcap_q;
689
690         return 0;
691 }
692
693 static int
694 eth_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
695 {
696         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
697
698         return 0;
699 }
700
701 static int
702 eth_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
703 {
704         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
705
706         return 0;
707 }
708
709 static int
710 eth_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
711 {
712         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
713
714         return 0;
715 }
716
717 static int
718 eth_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
719 {
720         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
721
722         return 0;
723 }
724
725 static const struct eth_dev_ops ops = {
726         .dev_start = eth_dev_start,
727         .dev_stop = eth_dev_stop,
728         .dev_close = eth_dev_close,
729         .dev_configure = eth_dev_configure,
730         .dev_infos_get = eth_dev_info,
731         .rx_queue_setup = eth_rx_queue_setup,
732         .tx_queue_setup = eth_tx_queue_setup,
733         .rx_queue_start = eth_rx_queue_start,
734         .tx_queue_start = eth_tx_queue_start,
735         .rx_queue_stop = eth_rx_queue_stop,
736         .tx_queue_stop = eth_tx_queue_stop,
737         .rx_queue_release = eth_queue_release,
738         .tx_queue_release = eth_queue_release,
739         .link_update = eth_link_update,
740         .stats_get = eth_stats_get,
741         .stats_reset = eth_stats_reset,
742 };
743
744 static int
745 add_queue(struct pmd_devargs *pmd, const char *name, const char *type,
746                 pcap_t *pcap, pcap_dumper_t *dumper)
747 {
748         if (pmd->num_of_queue >= RTE_PMD_PCAP_MAX_QUEUES)
749                 return -1;
750         if (pcap)
751                 pmd->queue[pmd->num_of_queue].pcap = pcap;
752         if (dumper)
753                 pmd->queue[pmd->num_of_queue].dumper = dumper;
754         pmd->queue[pmd->num_of_queue].name = name;
755         pmd->queue[pmd->num_of_queue].type = type;
756         pmd->num_of_queue++;
757         return 0;
758 }
759
760 /*
761  * Function handler that opens the pcap file for reading a stores a
762  * reference of it for use it later on.
763  */
764 static int
765 open_rx_pcap(const char *key, const char *value, void *extra_args)
766 {
767         const char *pcap_filename = value;
768         struct pmd_devargs *rx = extra_args;
769         pcap_t *pcap = NULL;
770
771         if (open_single_rx_pcap(pcap_filename, &pcap) < 0)
772                 return -1;
773
774         if (add_queue(rx, pcap_filename, key, pcap, NULL) < 0) {
775                 pcap_close(pcap);
776                 return -1;
777         }
778
779         return 0;
780 }
781
782 /*
783  * Opens a pcap file for writing and stores a reference to it
784  * for use it later on.
785  */
786 static int
787 open_tx_pcap(const char *key, const char *value, void *extra_args)
788 {
789         const char *pcap_filename = value;
790         struct pmd_devargs *dumpers = extra_args;
791         pcap_dumper_t *dumper;
792
793         if (open_single_tx_pcap(pcap_filename, &dumper) < 0)
794                 return -1;
795
796         if (add_queue(dumpers, pcap_filename, key, NULL, dumper) < 0) {
797                 pcap_dump_close(dumper);
798                 return -1;
799         }
800
801         return 0;
802 }
803
804 /*
805  * Opens an interface for reading and writing
806  */
807 static inline int
808 open_rx_tx_iface(const char *key, const char *value, void *extra_args)
809 {
810         const char *iface = value;
811         struct pmd_devargs *tx = extra_args;
812         pcap_t *pcap = NULL;
813
814         if (open_single_iface(iface, &pcap) < 0)
815                 return -1;
816
817         tx->queue[0].pcap = pcap;
818         tx->queue[0].name = iface;
819         tx->queue[0].type = key;
820
821         return 0;
822 }
823
824 static inline int
825 set_iface_direction(const char *iface, pcap_t *pcap,
826                 pcap_direction_t direction)
827 {
828         const char *direction_str = (direction == PCAP_D_IN) ? "IN" : "OUT";
829         if (pcap_setdirection(pcap, direction) < 0) {
830                 PMD_LOG(ERR, "Setting %s pcap direction %s failed - %s\n",
831                                 iface, direction_str, pcap_geterr(pcap));
832                 return -1;
833         }
834         PMD_LOG(INFO, "Setting %s pcap direction %s\n",
835                         iface, direction_str);
836         return 0;
837 }
838
839 static inline int
840 open_iface(const char *key, const char *value, void *extra_args)
841 {
842         const char *iface = value;
843         struct pmd_devargs *pmd = extra_args;
844         pcap_t *pcap = NULL;
845
846         if (open_single_iface(iface, &pcap) < 0)
847                 return -1;
848         if (add_queue(pmd, iface, key, pcap, NULL) < 0) {
849                 pcap_close(pcap);
850                 return -1;
851         }
852
853         return 0;
854 }
855
856 /*
857  * Opens a NIC for reading packets from it
858  */
859 static inline int
860 open_rx_iface(const char *key, const char *value, void *extra_args)
861 {
862         int ret = open_iface(key, value, extra_args);
863         if (ret < 0)
864                 return ret;
865         if (strcmp(key, ETH_PCAP_RX_IFACE_IN_ARG) == 0) {
866                 struct pmd_devargs *pmd = extra_args;
867                 unsigned int qid = pmd->num_of_queue - 1;
868
869                 set_iface_direction(pmd->queue[qid].name,
870                                 pmd->queue[qid].pcap,
871                                 PCAP_D_IN);
872         }
873
874         return 0;
875 }
876
877 static inline int
878 rx_iface_args_process(const char *key, const char *value, void *extra_args)
879 {
880         if (strcmp(key, ETH_PCAP_RX_IFACE_ARG) == 0 ||
881                         strcmp(key, ETH_PCAP_RX_IFACE_IN_ARG) == 0)
882                 return open_rx_iface(key, value, extra_args);
883
884         return 0;
885 }
886
887 /*
888  * Opens a NIC for writing packets to it
889  */
890 static int
891 open_tx_iface(const char *key, const char *value, void *extra_args)
892 {
893         return open_iface(key, value, extra_args);
894 }
895
896 static int
897 select_phy_mac(const char *key __rte_unused, const char *value,
898                 void *extra_args)
899 {
900         if (extra_args) {
901                 const int phy_mac = atoi(value);
902                 int *enable_phy_mac = extra_args;
903
904                 if (phy_mac)
905                         *enable_phy_mac = 1;
906         }
907         return 0;
908 }
909
910 static struct rte_vdev_driver pmd_pcap_drv;
911
912 static int
913 pmd_init_internals(struct rte_vdev_device *vdev,
914                 const unsigned int nb_rx_queues,
915                 const unsigned int nb_tx_queues,
916                 struct pmd_internals **internals,
917                 struct rte_eth_dev **eth_dev)
918 {
919         struct rte_eth_dev_data *data;
920         struct pmd_process_private *pp;
921         unsigned int numa_node = vdev->device.numa_node;
922
923         PMD_LOG(INFO, "Creating pcap-backed ethdev on numa socket %d",
924                 numa_node);
925
926         pp = (struct pmd_process_private *)
927                 rte_zmalloc(NULL, sizeof(struct pmd_process_private),
928                                 RTE_CACHE_LINE_SIZE);
929
930         if (pp == NULL) {
931                 PMD_LOG(ERR,
932                         "Failed to allocate memory for process private");
933                 return -1;
934         }
935
936         /* reserve an ethdev entry */
937         *eth_dev = rte_eth_vdev_allocate(vdev, sizeof(**internals));
938         if (!(*eth_dev)) {
939                 rte_free(pp);
940                 return -1;
941         }
942         (*eth_dev)->process_private = pp;
943         /* now put it all together
944          * - store queue data in internals,
945          * - store numa_node info in eth_dev
946          * - point eth_dev_data to internals
947          * - and point eth_dev structure to new eth_dev_data structure
948          */
949         *internals = (*eth_dev)->data->dev_private;
950         /*
951          * Interface MAC = 02:70:63:61:70:<iface_idx>
952          * derived from: 'locally administered':'p':'c':'a':'p':'iface_idx'
953          * where the middle 4 characters are converted to hex.
954          */
955         (*internals)->eth_addr = (struct ether_addr) {
956                 .addr_bytes = { 0x02, 0x70, 0x63, 0x61, 0x70, iface_idx++ }
957         };
958         (*internals)->phy_mac = 0;
959         data = (*eth_dev)->data;
960         data->nb_rx_queues = (uint16_t)nb_rx_queues;
961         data->nb_tx_queues = (uint16_t)nb_tx_queues;
962         data->dev_link = pmd_link;
963         data->mac_addrs = &(*internals)->eth_addr;
964
965         /*
966          * NOTE: we'll replace the data element, of originally allocated
967          * eth_dev so the rings are local per-process
968          */
969         (*eth_dev)->dev_ops = &ops;
970
971         return 0;
972 }
973
974 static int
975 eth_pcap_update_mac(const char *if_name, struct rte_eth_dev *eth_dev,
976                 const unsigned int numa_node)
977 {
978 #if defined(RTE_EXEC_ENV_LINUXAPP)
979         void *mac_addrs;
980         struct ifreq ifr;
981         int if_fd = socket(AF_INET, SOCK_DGRAM, 0);
982
983         if (if_fd == -1)
984                 return -1;
985
986         rte_strscpy(ifr.ifr_name, if_name, sizeof(ifr.ifr_name));
987         if (ioctl(if_fd, SIOCGIFHWADDR, &ifr)) {
988                 close(if_fd);
989                 return -1;
990         }
991
992         mac_addrs = rte_zmalloc_socket(NULL, ETHER_ADDR_LEN, 0, numa_node);
993         if (!mac_addrs) {
994                 close(if_fd);
995                 return -1;
996         }
997
998         PMD_LOG(INFO, "Setting phy MAC for %s", if_name);
999         eth_dev->data->mac_addrs = mac_addrs;
1000         rte_memcpy(eth_dev->data->mac_addrs[0].addr_bytes,
1001                         ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
1002
1003         close(if_fd);
1004
1005         return 0;
1006
1007 #elif defined(RTE_EXEC_ENV_BSDAPP)
1008         void *mac_addrs;
1009         struct if_msghdr *ifm;
1010         struct sockaddr_dl *sdl;
1011         int mib[6];
1012         size_t len = 0;
1013         char *buf;
1014
1015         mib[0] = CTL_NET;
1016         mib[1] = AF_ROUTE;
1017         mib[2] = 0;
1018         mib[3] = AF_LINK;
1019         mib[4] = NET_RT_IFLIST;
1020         mib[5] = if_nametoindex(if_name);
1021
1022         if (sysctl(mib, 6, NULL, &len, NULL, 0) < 0)
1023                 return -1;
1024
1025         if (len == 0)
1026                 return -1;
1027
1028         buf = rte_malloc(NULL, len, 0);
1029         if (!buf)
1030                 return -1;
1031
1032         if (sysctl(mib, 6, buf, &len, NULL, 0) < 0) {
1033                 rte_free(buf);
1034                 return -1;
1035         }
1036         ifm = (struct if_msghdr *)buf;
1037         sdl = (struct sockaddr_dl *)(ifm + 1);
1038
1039         mac_addrs = rte_zmalloc_socket(NULL, ETHER_ADDR_LEN, 0, numa_node);
1040         if (!mac_addrs) {
1041                 rte_free(buf);
1042                 return -1;
1043         }
1044
1045         PMD_LOG(INFO, "Setting phy MAC for %s", if_name);
1046         eth_dev->data->mac_addrs = mac_addrs;
1047         rte_memcpy(eth_dev->data->mac_addrs[0].addr_bytes,
1048                         LLADDR(sdl), ETHER_ADDR_LEN);
1049
1050         rte_free(buf);
1051
1052         return 0;
1053 #else
1054         return -1;
1055 #endif
1056 }
1057
1058 static int
1059 eth_from_pcaps_common(struct rte_vdev_device *vdev,
1060                 struct pmd_devargs *rx_queues, const unsigned int nb_rx_queues,
1061                 struct pmd_devargs *tx_queues, const unsigned int nb_tx_queues,
1062                 struct pmd_internals **internals, struct rte_eth_dev **eth_dev)
1063 {
1064         struct pmd_process_private *pp;
1065         unsigned int i;
1066
1067         /* do some parameter checking */
1068         if (rx_queues == NULL && nb_rx_queues > 0)
1069                 return -1;
1070         if (tx_queues == NULL && nb_tx_queues > 0)
1071                 return -1;
1072
1073         if (pmd_init_internals(vdev, nb_rx_queues, nb_tx_queues, internals,
1074                         eth_dev) < 0)
1075                 return -1;
1076
1077         pp = (*eth_dev)->process_private;
1078         for (i = 0; i < nb_rx_queues; i++) {
1079                 struct pcap_rx_queue *rx = &(*internals)->rx_queue[i];
1080                 struct devargs_queue *queue = &rx_queues->queue[i];
1081
1082                 pp->rx_pcap[i] = queue->pcap;
1083                 snprintf(rx->name, sizeof(rx->name), "%s", queue->name);
1084                 snprintf(rx->type, sizeof(rx->type), "%s", queue->type);
1085         }
1086
1087         for (i = 0; i < nb_tx_queues; i++) {
1088                 struct pcap_tx_queue *tx = &(*internals)->tx_queue[i];
1089                 struct devargs_queue *queue = &tx_queues->queue[i];
1090
1091                 pp->tx_dumper[i] = queue->dumper;
1092                 pp->tx_pcap[i] = queue->pcap;
1093                 snprintf(tx->name, sizeof(tx->name), "%s", queue->name);
1094                 snprintf(tx->type, sizeof(tx->type), "%s", queue->type);
1095         }
1096
1097         return 0;
1098 }
1099
1100 static int
1101 eth_from_pcaps(struct rte_vdev_device *vdev,
1102                 struct pmd_devargs *rx_queues, const unsigned int nb_rx_queues,
1103                 struct pmd_devargs *tx_queues, const unsigned int nb_tx_queues,
1104                 int single_iface, unsigned int using_dumpers)
1105 {
1106         struct pmd_internals *internals = NULL;
1107         struct rte_eth_dev *eth_dev = NULL;
1108         int ret;
1109
1110         ret = eth_from_pcaps_common(vdev, rx_queues, nb_rx_queues,
1111                 tx_queues, nb_tx_queues, &internals, &eth_dev);
1112
1113         if (ret < 0)
1114                 return ret;
1115
1116         /* store weather we are using a single interface for rx/tx or not */
1117         internals->single_iface = single_iface;
1118
1119         if (single_iface) {
1120                 internals->if_index = if_nametoindex(rx_queues->queue[0].name);
1121
1122                 /* phy_mac arg is applied only only if "iface" devarg is provided */
1123                 if (rx_queues->phy_mac) {
1124                         int ret = eth_pcap_update_mac(rx_queues->queue[0].name,
1125                                         eth_dev, vdev->device.numa_node);
1126                         if (ret == 0)
1127                                 internals->phy_mac = 1;
1128                 }
1129         }
1130
1131         eth_dev->rx_pkt_burst = eth_pcap_rx;
1132
1133         if (using_dumpers)
1134                 eth_dev->tx_pkt_burst = eth_pcap_tx_dumper;
1135         else
1136                 eth_dev->tx_pkt_burst = eth_pcap_tx;
1137
1138         rte_eth_dev_probing_finish(eth_dev);
1139         return 0;
1140 }
1141
1142 static int
1143 pmd_pcap_probe(struct rte_vdev_device *dev)
1144 {
1145         const char *name;
1146         unsigned int is_rx_pcap = 0, is_tx_pcap = 0;
1147         struct rte_kvargs *kvlist;
1148         struct pmd_devargs pcaps = {0};
1149         struct pmd_devargs dumpers = {0};
1150         struct rte_eth_dev *eth_dev;
1151         int single_iface = 0;
1152         int ret;
1153
1154         name = rte_vdev_device_name(dev);
1155         PMD_LOG(INFO, "Initializing pmd_pcap for %s", name);
1156
1157         gettimeofday(&start_time, NULL);
1158         start_cycles = rte_get_timer_cycles();
1159         hz = rte_get_timer_hz();
1160
1161         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1162                 eth_dev = rte_eth_dev_attach_secondary(name);
1163                 if (!eth_dev) {
1164                         PMD_LOG(ERR, "Failed to probe %s", name);
1165                         return -1;
1166                 }
1167                 /* TODO: request info from primary to set up Rx and Tx */
1168                 eth_dev->dev_ops = &ops;
1169                 eth_dev->device = &dev->device;
1170                 rte_eth_dev_probing_finish(eth_dev);
1171                 return 0;
1172         }
1173
1174         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
1175         if (kvlist == NULL)
1176                 return -1;
1177
1178         /*
1179          * If iface argument is passed we open the NICs and use them for
1180          * reading / writing
1181          */
1182         if (rte_kvargs_count(kvlist, ETH_PCAP_IFACE_ARG) == 1) {
1183
1184                 ret = rte_kvargs_process(kvlist, ETH_PCAP_IFACE_ARG,
1185                                 &open_rx_tx_iface, &pcaps);
1186                 if (ret < 0)
1187                         goto free_kvlist;
1188
1189                 dumpers.queue[0] = pcaps.queue[0];
1190
1191                 ret = rte_kvargs_process(kvlist, ETH_PCAP_PHY_MAC_ARG,
1192                                 &select_phy_mac, &pcaps.phy_mac);
1193                 if (ret < 0)
1194                         goto free_kvlist;
1195
1196                 dumpers.phy_mac = pcaps.phy_mac;
1197
1198                 single_iface = 1;
1199                 pcaps.num_of_queue = 1;
1200                 dumpers.num_of_queue = 1;
1201
1202                 goto create_eth;
1203         }
1204
1205         /*
1206          * We check whether we want to open a RX stream from a real NIC or a
1207          * pcap file
1208          */
1209         is_rx_pcap = rte_kvargs_count(kvlist, ETH_PCAP_RX_PCAP_ARG) ? 1 : 0;
1210         pcaps.num_of_queue = 0;
1211
1212         if (is_rx_pcap) {
1213                 ret = rte_kvargs_process(kvlist, ETH_PCAP_RX_PCAP_ARG,
1214                                 &open_rx_pcap, &pcaps);
1215         } else {
1216                 ret = rte_kvargs_process(kvlist, NULL,
1217                                 &rx_iface_args_process, &pcaps);
1218         }
1219
1220         if (ret < 0)
1221                 goto free_kvlist;
1222
1223         /*
1224          * We check whether we want to open a TX stream to a real NIC or a
1225          * pcap file
1226          */
1227         is_tx_pcap = rte_kvargs_count(kvlist, ETH_PCAP_TX_PCAP_ARG) ? 1 : 0;
1228         dumpers.num_of_queue = 0;
1229
1230         if (is_tx_pcap)
1231                 ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_PCAP_ARG,
1232                                 &open_tx_pcap, &dumpers);
1233         else
1234                 ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_IFACE_ARG,
1235                                 &open_tx_iface, &dumpers);
1236
1237         if (ret < 0)
1238                 goto free_kvlist;
1239
1240 create_eth:
1241         ret = eth_from_pcaps(dev, &pcaps, pcaps.num_of_queue, &dumpers,
1242                 dumpers.num_of_queue, single_iface, is_tx_pcap);
1243
1244 free_kvlist:
1245         rte_kvargs_free(kvlist);
1246
1247         return ret;
1248 }
1249
1250 static int
1251 pmd_pcap_remove(struct rte_vdev_device *dev)
1252 {
1253         struct pmd_internals *internals = NULL;
1254         struct rte_eth_dev *eth_dev = NULL;
1255
1256         PMD_LOG(INFO, "Closing pcap ethdev on numa socket %d",
1257                         rte_socket_id());
1258
1259         if (!dev)
1260                 return -1;
1261
1262         /* reserve an ethdev entry */
1263         eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
1264         if (eth_dev == NULL)
1265                 return -1;
1266
1267         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1268                 internals = eth_dev->data->dev_private;
1269                 if (internals != NULL && internals->phy_mac == 0)
1270                         /* not dynamically allocated, must not be freed */
1271                         eth_dev->data->mac_addrs = NULL;
1272         }
1273
1274         rte_free(eth_dev->process_private);
1275         rte_eth_dev_release_port(eth_dev);
1276
1277         return 0;
1278 }
1279
1280 static struct rte_vdev_driver pmd_pcap_drv = {
1281         .probe = pmd_pcap_probe,
1282         .remove = pmd_pcap_remove,
1283 };
1284
1285 RTE_PMD_REGISTER_VDEV(net_pcap, pmd_pcap_drv);
1286 RTE_PMD_REGISTER_ALIAS(net_pcap, eth_pcap);
1287 RTE_PMD_REGISTER_PARAM_STRING(net_pcap,
1288         ETH_PCAP_RX_PCAP_ARG "=<string> "
1289         ETH_PCAP_TX_PCAP_ARG "=<string> "
1290         ETH_PCAP_RX_IFACE_ARG "=<ifc> "
1291         ETH_PCAP_RX_IFACE_IN_ARG "=<ifc> "
1292         ETH_PCAP_TX_IFACE_ARG "=<ifc> "
1293         ETH_PCAP_IFACE_ARG "=<ifc> "
1294         ETH_PCAP_PHY_MAC_ARG "=<int>");
1295
1296 RTE_INIT(eth_pcap_init_log)
1297 {
1298         eth_pcap_logtype = rte_log_register("pmd.net.pcap");
1299         if (eth_pcap_logtype >= 0)
1300                 rte_log_set_level(eth_pcap_logtype, RTE_LOG_NOTICE);
1301 }