net/bnxt: fix RSS table sizes
[dpdk.git] / drivers / net / pcap / rte_eth_pcap.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright(c) 2014 6WIND S.A.
4  * All rights reserved.
5  */
6
7 #include <time.h>
8
9 #include <net/if.h>
10 #include <sys/socket.h>
11 #include <sys/ioctl.h>
12 #include <unistd.h>
13
14 #if defined(RTE_EXEC_ENV_FREEBSD)
15 #include <sys/sysctl.h>
16 #include <net/if_dl.h>
17 #endif
18
19 #include <pcap.h>
20
21 #include <rte_cycles.h>
22 #include <rte_ethdev_driver.h>
23 #include <rte_ethdev_vdev.h>
24 #include <rte_kvargs.h>
25 #include <rte_malloc.h>
26 #include <rte_mbuf.h>
27 #include <rte_bus_vdev.h>
28 #include <rte_string_fns.h>
29
30 #define RTE_ETH_PCAP_SNAPSHOT_LEN 65535
31 #define RTE_ETH_PCAP_SNAPLEN RTE_ETHER_MAX_JUMBO_FRAME_LEN
32 #define RTE_ETH_PCAP_PROMISC 1
33 #define RTE_ETH_PCAP_TIMEOUT -1
34
35 #define ETH_PCAP_RX_PCAP_ARG  "rx_pcap"
36 #define ETH_PCAP_TX_PCAP_ARG  "tx_pcap"
37 #define ETH_PCAP_RX_IFACE_ARG "rx_iface"
38 #define ETH_PCAP_RX_IFACE_IN_ARG "rx_iface_in"
39 #define ETH_PCAP_TX_IFACE_ARG "tx_iface"
40 #define ETH_PCAP_IFACE_ARG    "iface"
41 #define ETH_PCAP_PHY_MAC_ARG  "phy_mac"
42 #define ETH_PCAP_INFINITE_RX_ARG  "infinite_rx"
43
44 #define ETH_PCAP_ARG_MAXLEN     64
45
46 #define RTE_PMD_PCAP_MAX_QUEUES 16
47
48 static char errbuf[PCAP_ERRBUF_SIZE];
49 static unsigned char tx_pcap_data[RTE_ETH_PCAP_SNAPLEN];
50 static struct timeval start_time;
51 static uint64_t start_cycles;
52 static uint64_t hz;
53 static uint8_t iface_idx;
54
55 struct queue_stat {
56         volatile unsigned long pkts;
57         volatile unsigned long bytes;
58         volatile unsigned long err_pkts;
59 };
60
61 struct pcap_rx_queue {
62         uint16_t port_id;
63         uint16_t queue_id;
64         struct rte_mempool *mb_pool;
65         struct queue_stat rx_stat;
66         char name[PATH_MAX];
67         char type[ETH_PCAP_ARG_MAXLEN];
68
69         /* Contains pre-generated packets to be looped through */
70         struct rte_ring *pkts;
71 };
72
73 struct pcap_tx_queue {
74         uint16_t port_id;
75         uint16_t queue_id;
76         struct queue_stat tx_stat;
77         char name[PATH_MAX];
78         char type[ETH_PCAP_ARG_MAXLEN];
79 };
80
81 struct pmd_internals {
82         struct pcap_rx_queue rx_queue[RTE_PMD_PCAP_MAX_QUEUES];
83         struct pcap_tx_queue tx_queue[RTE_PMD_PCAP_MAX_QUEUES];
84         char devargs[ETH_PCAP_ARG_MAXLEN];
85         struct rte_ether_addr eth_addr;
86         int if_index;
87         int single_iface;
88         int phy_mac;
89         unsigned int infinite_rx;
90 };
91
92 struct pmd_process_private {
93         pcap_t *rx_pcap[RTE_PMD_PCAP_MAX_QUEUES];
94         pcap_t *tx_pcap[RTE_PMD_PCAP_MAX_QUEUES];
95         pcap_dumper_t *tx_dumper[RTE_PMD_PCAP_MAX_QUEUES];
96 };
97
98 struct pmd_devargs {
99         unsigned int num_of_queue;
100         struct devargs_queue {
101                 pcap_dumper_t *dumper;
102                 pcap_t *pcap;
103                 const char *name;
104                 const char *type;
105         } queue[RTE_PMD_PCAP_MAX_QUEUES];
106         int phy_mac;
107 };
108
109 struct pmd_devargs_all {
110         struct pmd_devargs rx_queues;
111         struct pmd_devargs tx_queues;
112         int single_iface;
113         unsigned int is_tx_pcap;
114         unsigned int is_tx_iface;
115         unsigned int is_rx_pcap;
116         unsigned int is_rx_iface;
117         unsigned int infinite_rx;
118 };
119
120 static const char *valid_arguments[] = {
121         ETH_PCAP_RX_PCAP_ARG,
122         ETH_PCAP_TX_PCAP_ARG,
123         ETH_PCAP_RX_IFACE_ARG,
124         ETH_PCAP_RX_IFACE_IN_ARG,
125         ETH_PCAP_TX_IFACE_ARG,
126         ETH_PCAP_IFACE_ARG,
127         ETH_PCAP_PHY_MAC_ARG,
128         ETH_PCAP_INFINITE_RX_ARG,
129         NULL
130 };
131
132 static struct rte_eth_link pmd_link = {
133                 .link_speed = ETH_SPEED_NUM_10G,
134                 .link_duplex = ETH_LINK_FULL_DUPLEX,
135                 .link_status = ETH_LINK_DOWN,
136                 .link_autoneg = ETH_LINK_FIXED,
137 };
138
139 static int eth_pcap_logtype;
140
141 #define PMD_LOG(level, fmt, args...) \
142         rte_log(RTE_LOG_ ## level, eth_pcap_logtype, \
143                 "%s(): " fmt "\n", __func__, ##args)
144
145 static int
146 eth_pcap_rx_jumbo(struct rte_mempool *mb_pool, struct rte_mbuf *mbuf,
147                 const u_char *data, uint16_t data_len)
148 {
149         /* Copy the first segment. */
150         uint16_t len = rte_pktmbuf_tailroom(mbuf);
151         struct rte_mbuf *m = mbuf;
152
153         rte_memcpy(rte_pktmbuf_append(mbuf, len), data, len);
154         data_len -= len;
155         data += len;
156
157         while (data_len > 0) {
158                 /* Allocate next mbuf and point to that. */
159                 m->next = rte_pktmbuf_alloc(mb_pool);
160
161                 if (unlikely(!m->next))
162                         return -1;
163
164                 m = m->next;
165
166                 /* Headroom is not needed in chained mbufs. */
167                 rte_pktmbuf_prepend(m, rte_pktmbuf_headroom(m));
168                 m->pkt_len = 0;
169                 m->data_len = 0;
170
171                 /* Copy next segment. */
172                 len = RTE_MIN(rte_pktmbuf_tailroom(m), data_len);
173                 rte_memcpy(rte_pktmbuf_append(m, len), data, len);
174
175                 mbuf->nb_segs++;
176                 data_len -= len;
177                 data += len;
178         }
179
180         return mbuf->nb_segs;
181 }
182
183 /* Copy data from mbuf chain to a buffer suitable for writing to a PCAP file. */
184 static void
185 eth_pcap_gather_data(unsigned char *data, struct rte_mbuf *mbuf)
186 {
187         uint16_t data_len = 0;
188
189         while (mbuf) {
190                 rte_memcpy(data + data_len, rte_pktmbuf_mtod(mbuf, void *),
191                         mbuf->data_len);
192
193                 data_len += mbuf->data_len;
194                 mbuf = mbuf->next;
195         }
196 }
197
198 static uint16_t
199 eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
200 {
201         int i;
202         struct pcap_rx_queue *pcap_q = queue;
203         uint32_t rx_bytes = 0;
204
205         if (unlikely(nb_pkts == 0))
206                 return 0;
207
208         if (rte_pktmbuf_alloc_bulk(pcap_q->mb_pool, bufs, nb_pkts) != 0)
209                 return 0;
210
211         for (i = 0; i < nb_pkts; i++) {
212                 struct rte_mbuf *pcap_buf;
213                 int err = rte_ring_dequeue(pcap_q->pkts, (void **)&pcap_buf);
214                 if (err)
215                         return i;
216
217                 rte_memcpy(rte_pktmbuf_mtod(bufs[i], void *),
218                                 rte_pktmbuf_mtod(pcap_buf, void *),
219                                 pcap_buf->data_len);
220                 bufs[i]->data_len = pcap_buf->data_len;
221                 bufs[i]->pkt_len = pcap_buf->pkt_len;
222                 bufs[i]->port = pcap_q->port_id;
223                 rx_bytes += pcap_buf->data_len;
224
225                 /* Enqueue packet back on ring to allow infinite rx. */
226                 rte_ring_enqueue(pcap_q->pkts, pcap_buf);
227         }
228
229         pcap_q->rx_stat.pkts += i;
230         pcap_q->rx_stat.bytes += rx_bytes;
231
232         return i;
233 }
234
235 static uint16_t
236 eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
237 {
238         unsigned int i;
239         struct pcap_pkthdr header;
240         struct pmd_process_private *pp;
241         const u_char *packet;
242         struct rte_mbuf *mbuf;
243         struct pcap_rx_queue *pcap_q = queue;
244         uint16_t num_rx = 0;
245         uint16_t buf_size;
246         uint32_t rx_bytes = 0;
247         pcap_t *pcap;
248
249         pp = rte_eth_devices[pcap_q->port_id].process_private;
250         pcap = pp->rx_pcap[pcap_q->queue_id];
251
252         if (unlikely(pcap == NULL || nb_pkts == 0))
253                 return 0;
254
255         /* Reads the given number of packets from the pcap file one by one
256          * and copies the packet data into a newly allocated mbuf to return.
257          */
258         for (i = 0; i < nb_pkts; i++) {
259                 /* Get the next PCAP packet */
260                 packet = pcap_next(pcap, &header);
261                 if (unlikely(packet == NULL))
262                         break;
263
264                 mbuf = rte_pktmbuf_alloc(pcap_q->mb_pool);
265                 if (unlikely(mbuf == NULL))
266                         break;
267
268                 /* Now get the space available for data in the mbuf */
269                 buf_size = rte_pktmbuf_data_room_size(pcap_q->mb_pool) -
270                                 RTE_PKTMBUF_HEADROOM;
271
272                 if (header.caplen <= buf_size) {
273                         /* pcap packet will fit in the mbuf, can copy it */
274                         rte_memcpy(rte_pktmbuf_mtod(mbuf, void *), packet,
275                                         header.caplen);
276                         mbuf->data_len = (uint16_t)header.caplen;
277                 } else {
278                         /* Try read jumbo frame into multi mbufs. */
279                         if (unlikely(eth_pcap_rx_jumbo(pcap_q->mb_pool,
280                                                        mbuf,
281                                                        packet,
282                                                        header.caplen) == -1)) {
283                                 rte_pktmbuf_free(mbuf);
284                                 break;
285                         }
286                 }
287
288                 mbuf->pkt_len = (uint16_t)header.caplen;
289                 mbuf->port = pcap_q->port_id;
290                 bufs[num_rx] = mbuf;
291                 num_rx++;
292                 rx_bytes += header.caplen;
293         }
294         pcap_q->rx_stat.pkts += num_rx;
295         pcap_q->rx_stat.bytes += rx_bytes;
296
297         return num_rx;
298 }
299
300 static uint16_t
301 eth_null_rx(void *queue __rte_unused,
302                 struct rte_mbuf **bufs __rte_unused,
303                 uint16_t nb_pkts __rte_unused)
304 {
305         return 0;
306 }
307
308 static inline void
309 calculate_timestamp(struct timeval *ts) {
310         uint64_t cycles;
311         struct timeval cur_time;
312
313         cycles = rte_get_timer_cycles() - start_cycles;
314         cur_time.tv_sec = cycles / hz;
315         cur_time.tv_usec = (cycles % hz) * 1e6 / hz;
316         timeradd(&start_time, &cur_time, ts);
317 }
318
319 /*
320  * Callback to handle writing packets to a pcap file.
321  */
322 static uint16_t
323 eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
324 {
325         unsigned int i;
326         struct rte_mbuf *mbuf;
327         struct pmd_process_private *pp;
328         struct pcap_tx_queue *dumper_q = queue;
329         uint16_t num_tx = 0;
330         uint32_t tx_bytes = 0;
331         struct pcap_pkthdr header;
332         pcap_dumper_t *dumper;
333
334         pp = rte_eth_devices[dumper_q->port_id].process_private;
335         dumper = pp->tx_dumper[dumper_q->queue_id];
336
337         if (dumper == NULL || nb_pkts == 0)
338                 return 0;
339
340         /* writes the nb_pkts packets to the previously opened pcap file
341          * dumper */
342         for (i = 0; i < nb_pkts; i++) {
343                 mbuf = bufs[i];
344                 calculate_timestamp(&header.ts);
345                 header.len = mbuf->pkt_len;
346                 header.caplen = header.len;
347
348                 if (likely(mbuf->nb_segs == 1)) {
349                         pcap_dump((u_char *)dumper, &header,
350                                   rte_pktmbuf_mtod(mbuf, void*));
351                 } else {
352                         if (mbuf->pkt_len <= RTE_ETHER_MAX_JUMBO_FRAME_LEN) {
353                                 eth_pcap_gather_data(tx_pcap_data, mbuf);
354                                 pcap_dump((u_char *)dumper, &header,
355                                           tx_pcap_data);
356                         } else {
357                                 PMD_LOG(ERR,
358                                         "Dropping PCAP packet. Size (%d) > max jumbo size (%d).",
359                                         mbuf->pkt_len,
360                                         RTE_ETHER_MAX_JUMBO_FRAME_LEN);
361
362                                 break;
363                         }
364                 }
365
366                 num_tx++;
367                 tx_bytes += mbuf->pkt_len;
368                 rte_pktmbuf_free(mbuf);
369         }
370
371         /*
372          * Since there's no place to hook a callback when the forwarding
373          * process stops and to make sure the pcap file is actually written,
374          * we flush the pcap dumper within each burst.
375          */
376         pcap_dump_flush(dumper);
377         dumper_q->tx_stat.pkts += num_tx;
378         dumper_q->tx_stat.bytes += tx_bytes;
379         dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
380
381         return num_tx;
382 }
383
384 /*
385  * Callback to handle dropping packets in the infinite rx case.
386  */
387 static uint16_t
388 eth_tx_drop(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
389 {
390         unsigned int i;
391         uint32_t tx_bytes = 0;
392         struct pcap_tx_queue *tx_queue = queue;
393
394         if (unlikely(nb_pkts == 0))
395                 return 0;
396
397         for (i = 0; i < nb_pkts; i++) {
398                 tx_bytes += bufs[i]->data_len;
399                 rte_pktmbuf_free(bufs[i]);
400         }
401
402         tx_queue->tx_stat.pkts += nb_pkts;
403         tx_queue->tx_stat.bytes += tx_bytes;
404
405         return i;
406 }
407
408 /*
409  * Callback to handle sending packets through a real NIC.
410  */
411 static uint16_t
412 eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
413 {
414         unsigned int i;
415         int ret;
416         struct rte_mbuf *mbuf;
417         struct pmd_process_private *pp;
418         struct pcap_tx_queue *tx_queue = queue;
419         uint16_t num_tx = 0;
420         uint32_t tx_bytes = 0;
421         pcap_t *pcap;
422
423         pp = rte_eth_devices[tx_queue->port_id].process_private;
424         pcap = pp->tx_pcap[tx_queue->queue_id];
425
426         if (unlikely(nb_pkts == 0 || pcap == NULL))
427                 return 0;
428
429         for (i = 0; i < nb_pkts; i++) {
430                 mbuf = bufs[i];
431
432                 if (likely(mbuf->nb_segs == 1)) {
433                         ret = pcap_sendpacket(pcap,
434                                         rte_pktmbuf_mtod(mbuf, u_char *),
435                                         mbuf->pkt_len);
436                 } else {
437                         if (mbuf->pkt_len <= RTE_ETHER_MAX_JUMBO_FRAME_LEN) {
438                                 eth_pcap_gather_data(tx_pcap_data, mbuf);
439                                 ret = pcap_sendpacket(pcap,
440                                                 tx_pcap_data, mbuf->pkt_len);
441                         } else {
442                                 PMD_LOG(ERR,
443                                         "Dropping PCAP packet. Size (%d) > max jumbo size (%d).",
444                                         mbuf->pkt_len,
445                                         RTE_ETHER_MAX_JUMBO_FRAME_LEN);
446
447                                 break;
448                         }
449                 }
450
451                 if (unlikely(ret != 0))
452                         break;
453                 num_tx++;
454                 tx_bytes += mbuf->pkt_len;
455                 rte_pktmbuf_free(mbuf);
456         }
457
458         tx_queue->tx_stat.pkts += num_tx;
459         tx_queue->tx_stat.bytes += tx_bytes;
460         tx_queue->tx_stat.err_pkts += nb_pkts - num_tx;
461
462         return num_tx;
463 }
464
465 /*
466  * pcap_open_live wrapper function
467  */
468 static inline int
469 open_iface_live(const char *iface, pcap_t **pcap) {
470         *pcap = pcap_open_live(iface, RTE_ETH_PCAP_SNAPLEN,
471                         RTE_ETH_PCAP_PROMISC, RTE_ETH_PCAP_TIMEOUT, errbuf);
472
473         if (*pcap == NULL) {
474                 PMD_LOG(ERR, "Couldn't open %s: %s", iface, errbuf);
475                 return -1;
476         }
477
478         return 0;
479 }
480
481 static int
482 open_single_iface(const char *iface, pcap_t **pcap)
483 {
484         if (open_iface_live(iface, pcap) < 0) {
485                 PMD_LOG(ERR, "Couldn't open interface %s", iface);
486                 return -1;
487         }
488
489         return 0;
490 }
491
492 static int
493 open_single_tx_pcap(const char *pcap_filename, pcap_dumper_t **dumper)
494 {
495         pcap_t *tx_pcap;
496
497         /*
498          * We need to create a dummy empty pcap_t to use it
499          * with pcap_dump_open(). We create big enough an Ethernet
500          * pcap holder.
501          */
502         tx_pcap = pcap_open_dead(DLT_EN10MB, RTE_ETH_PCAP_SNAPSHOT_LEN);
503         if (tx_pcap == NULL) {
504                 PMD_LOG(ERR, "Couldn't create dead pcap");
505                 return -1;
506         }
507
508         /* The dumper is created using the previous pcap_t reference */
509         *dumper = pcap_dump_open(tx_pcap, pcap_filename);
510         if (*dumper == NULL) {
511                 pcap_close(tx_pcap);
512                 PMD_LOG(ERR, "Couldn't open %s for writing.",
513                         pcap_filename);
514                 return -1;
515         }
516
517         pcap_close(tx_pcap);
518         return 0;
519 }
520
521 static int
522 open_single_rx_pcap(const char *pcap_filename, pcap_t **pcap)
523 {
524         *pcap = pcap_open_offline(pcap_filename, errbuf);
525         if (*pcap == NULL) {
526                 PMD_LOG(ERR, "Couldn't open %s: %s", pcap_filename,
527                         errbuf);
528                 return -1;
529         }
530
531         return 0;
532 }
533
534 static uint64_t
535 count_packets_in_pcap(pcap_t **pcap, struct pcap_rx_queue *pcap_q)
536 {
537         const u_char *packet;
538         struct pcap_pkthdr header;
539         uint64_t pcap_pkt_count = 0;
540
541         while ((packet = pcap_next(*pcap, &header)))
542                 pcap_pkt_count++;
543
544         /* The pcap is reopened so it can be used as normal later. */
545         pcap_close(*pcap);
546         *pcap = NULL;
547         open_single_rx_pcap(pcap_q->name, pcap);
548
549         return pcap_pkt_count;
550 }
551
552 static int
553 eth_dev_start(struct rte_eth_dev *dev)
554 {
555         unsigned int i;
556         struct pmd_internals *internals = dev->data->dev_private;
557         struct pmd_process_private *pp = dev->process_private;
558         struct pcap_tx_queue *tx;
559         struct pcap_rx_queue *rx;
560
561         /* Special iface case. Single pcap is open and shared between tx/rx. */
562         if (internals->single_iface) {
563                 tx = &internals->tx_queue[0];
564                 rx = &internals->rx_queue[0];
565
566                 if (!pp->tx_pcap[0] &&
567                         strcmp(tx->type, ETH_PCAP_IFACE_ARG) == 0) {
568                         if (open_single_iface(tx->name, &pp->tx_pcap[0]) < 0)
569                                 return -1;
570                         pp->rx_pcap[0] = pp->tx_pcap[0];
571                 }
572
573                 goto status_up;
574         }
575
576         /* If not open already, open tx pcaps/dumpers */
577         for (i = 0; i < dev->data->nb_tx_queues; i++) {
578                 tx = &internals->tx_queue[i];
579
580                 if (!pp->tx_dumper[i] &&
581                                 strcmp(tx->type, ETH_PCAP_TX_PCAP_ARG) == 0) {
582                         if (open_single_tx_pcap(tx->name,
583                                 &pp->tx_dumper[i]) < 0)
584                                 return -1;
585                 } else if (!pp->tx_pcap[i] &&
586                                 strcmp(tx->type, ETH_PCAP_TX_IFACE_ARG) == 0) {
587                         if (open_single_iface(tx->name, &pp->tx_pcap[i]) < 0)
588                                 return -1;
589                 }
590         }
591
592         /* If not open already, open rx pcaps */
593         for (i = 0; i < dev->data->nb_rx_queues; i++) {
594                 rx = &internals->rx_queue[i];
595
596                 if (pp->rx_pcap[i] != NULL)
597                         continue;
598
599                 if (strcmp(rx->type, ETH_PCAP_RX_PCAP_ARG) == 0) {
600                         if (open_single_rx_pcap(rx->name, &pp->rx_pcap[i]) < 0)
601                                 return -1;
602                 } else if (strcmp(rx->type, ETH_PCAP_RX_IFACE_ARG) == 0) {
603                         if (open_single_iface(rx->name, &pp->rx_pcap[i]) < 0)
604                                 return -1;
605                 }
606         }
607
608 status_up:
609         for (i = 0; i < dev->data->nb_rx_queues; i++)
610                 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
611
612         for (i = 0; i < dev->data->nb_tx_queues; i++)
613                 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
614
615         dev->data->dev_link.link_status = ETH_LINK_UP;
616
617         return 0;
618 }
619
620 /*
621  * This function gets called when the current port gets stopped.
622  * Is the only place for us to close all the tx streams dumpers.
623  * If not called the dumpers will be flushed within each tx burst.
624  */
625 static void
626 eth_dev_stop(struct rte_eth_dev *dev)
627 {
628         unsigned int i;
629         struct pmd_internals *internals = dev->data->dev_private;
630         struct pmd_process_private *pp = dev->process_private;
631
632         /* Special iface case. Single pcap is open and shared between tx/rx. */
633         if (internals->single_iface) {
634                 pcap_close(pp->tx_pcap[0]);
635                 pp->tx_pcap[0] = NULL;
636                 pp->rx_pcap[0] = NULL;
637                 goto status_down;
638         }
639
640         for (i = 0; i < dev->data->nb_tx_queues; i++) {
641                 if (pp->tx_dumper[i] != NULL) {
642                         pcap_dump_close(pp->tx_dumper[i]);
643                         pp->tx_dumper[i] = NULL;
644                 }
645
646                 if (pp->tx_pcap[i] != NULL) {
647                         pcap_close(pp->tx_pcap[i]);
648                         pp->tx_pcap[i] = NULL;
649                 }
650         }
651
652         for (i = 0; i < dev->data->nb_rx_queues; i++) {
653                 if (pp->rx_pcap[i] != NULL) {
654                         pcap_close(pp->rx_pcap[i]);
655                         pp->rx_pcap[i] = NULL;
656                 }
657         }
658
659 status_down:
660         for (i = 0; i < dev->data->nb_rx_queues; i++)
661                 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
662
663         for (i = 0; i < dev->data->nb_tx_queues; i++)
664                 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
665
666         dev->data->dev_link.link_status = ETH_LINK_DOWN;
667 }
668
669 static int
670 eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
671 {
672         return 0;
673 }
674
675 static void
676 eth_dev_info(struct rte_eth_dev *dev,
677                 struct rte_eth_dev_info *dev_info)
678 {
679         struct pmd_internals *internals = dev->data->dev_private;
680
681         dev_info->if_index = internals->if_index;
682         dev_info->max_mac_addrs = 1;
683         dev_info->max_rx_pktlen = (uint32_t) -1;
684         dev_info->max_rx_queues = dev->data->nb_rx_queues;
685         dev_info->max_tx_queues = dev->data->nb_tx_queues;
686         dev_info->min_rx_bufsize = 0;
687 }
688
689 static int
690 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
691 {
692         unsigned int i;
693         unsigned long rx_packets_total = 0, rx_bytes_total = 0;
694         unsigned long tx_packets_total = 0, tx_bytes_total = 0;
695         unsigned long tx_packets_err_total = 0;
696         const struct pmd_internals *internal = dev->data->dev_private;
697
698         for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
699                         i < dev->data->nb_rx_queues; i++) {
700                 stats->q_ipackets[i] = internal->rx_queue[i].rx_stat.pkts;
701                 stats->q_ibytes[i] = internal->rx_queue[i].rx_stat.bytes;
702                 rx_packets_total += stats->q_ipackets[i];
703                 rx_bytes_total += stats->q_ibytes[i];
704         }
705
706         for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
707                         i < dev->data->nb_tx_queues; i++) {
708                 stats->q_opackets[i] = internal->tx_queue[i].tx_stat.pkts;
709                 stats->q_obytes[i] = internal->tx_queue[i].tx_stat.bytes;
710                 tx_packets_total += stats->q_opackets[i];
711                 tx_bytes_total += stats->q_obytes[i];
712                 tx_packets_err_total += internal->tx_queue[i].tx_stat.err_pkts;
713         }
714
715         stats->ipackets = rx_packets_total;
716         stats->ibytes = rx_bytes_total;
717         stats->opackets = tx_packets_total;
718         stats->obytes = tx_bytes_total;
719         stats->oerrors = tx_packets_err_total;
720
721         return 0;
722 }
723
724 static void
725 eth_stats_reset(struct rte_eth_dev *dev)
726 {
727         unsigned int i;
728         struct pmd_internals *internal = dev->data->dev_private;
729
730         for (i = 0; i < dev->data->nb_rx_queues; i++) {
731                 internal->rx_queue[i].rx_stat.pkts = 0;
732                 internal->rx_queue[i].rx_stat.bytes = 0;
733         }
734
735         for (i = 0; i < dev->data->nb_tx_queues; i++) {
736                 internal->tx_queue[i].tx_stat.pkts = 0;
737                 internal->tx_queue[i].tx_stat.bytes = 0;
738                 internal->tx_queue[i].tx_stat.err_pkts = 0;
739         }
740 }
741
742 static void
743 eth_dev_close(struct rte_eth_dev *dev)
744 {
745         unsigned int i;
746         struct pmd_internals *internals = dev->data->dev_private;
747
748         /* Device wide flag, but cleanup must be performed per queue. */
749         if (internals->infinite_rx) {
750                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
751                         struct pcap_rx_queue *pcap_q = &internals->rx_queue[i];
752                         struct rte_mbuf *pcap_buf;
753
754                         while (!rte_ring_dequeue(pcap_q->pkts,
755                                         (void **)&pcap_buf))
756                                 rte_pktmbuf_free(pcap_buf);
757
758                         rte_ring_free(pcap_q->pkts);
759                 }
760         }
761
762 }
763
764 static void
765 eth_queue_release(void *q __rte_unused)
766 {
767 }
768
769 static int
770 eth_link_update(struct rte_eth_dev *dev __rte_unused,
771                 int wait_to_complete __rte_unused)
772 {
773         return 0;
774 }
775
776 static int
777 eth_rx_queue_setup(struct rte_eth_dev *dev,
778                 uint16_t rx_queue_id,
779                 uint16_t nb_rx_desc __rte_unused,
780                 unsigned int socket_id __rte_unused,
781                 const struct rte_eth_rxconf *rx_conf __rte_unused,
782                 struct rte_mempool *mb_pool)
783 {
784         struct pmd_internals *internals = dev->data->dev_private;
785         struct pcap_rx_queue *pcap_q = &internals->rx_queue[rx_queue_id];
786
787         pcap_q->mb_pool = mb_pool;
788         pcap_q->port_id = dev->data->port_id;
789         pcap_q->queue_id = rx_queue_id;
790         dev->data->rx_queues[rx_queue_id] = pcap_q;
791
792         if (internals->infinite_rx) {
793                 struct pmd_process_private *pp;
794                 char ring_name[NAME_MAX];
795                 static uint32_t ring_number;
796                 uint64_t pcap_pkt_count = 0;
797                 struct rte_mbuf *bufs[1];
798                 pcap_t **pcap;
799
800                 pp = rte_eth_devices[pcap_q->port_id].process_private;
801                 pcap = &pp->rx_pcap[pcap_q->queue_id];
802
803                 if (unlikely(*pcap == NULL))
804                         return -ENOENT;
805
806                 pcap_pkt_count = count_packets_in_pcap(pcap, pcap_q);
807
808                 snprintf(ring_name, sizeof(ring_name), "PCAP_RING%" PRIu16,
809                                 ring_number);
810
811                 pcap_q->pkts = rte_ring_create(ring_name,
812                                 rte_align64pow2(pcap_pkt_count + 1), 0,
813                                 RING_F_SP_ENQ | RING_F_SC_DEQ);
814                 ring_number++;
815                 if (!pcap_q->pkts)
816                         return -ENOENT;
817
818                 /* Fill ring with packets from PCAP file one by one. */
819                 while (eth_pcap_rx(pcap_q, bufs, 1)) {
820                         /* Check for multiseg mbufs. */
821                         if (bufs[0]->nb_segs != 1) {
822                                 rte_pktmbuf_free(*bufs);
823
824                                 while (!rte_ring_dequeue(pcap_q->pkts,
825                                                 (void **)bufs))
826                                         rte_pktmbuf_free(*bufs);
827
828                                 rte_ring_free(pcap_q->pkts);
829                                 PMD_LOG(ERR, "Multiseg mbufs are not supported in infinite_rx "
830                                                 "mode.");
831                                 return -EINVAL;
832                         }
833
834                         rte_ring_enqueue_bulk(pcap_q->pkts,
835                                         (void * const *)bufs, 1, NULL);
836                 }
837                 /*
838                  * Reset the stats for this queue since eth_pcap_rx calls above
839                  * didn't result in the application receiving packets.
840                  */
841                 pcap_q->rx_stat.pkts = 0;
842                 pcap_q->rx_stat.bytes = 0;
843         }
844
845         return 0;
846 }
847
848 static int
849 eth_tx_queue_setup(struct rte_eth_dev *dev,
850                 uint16_t tx_queue_id,
851                 uint16_t nb_tx_desc __rte_unused,
852                 unsigned int socket_id __rte_unused,
853                 const struct rte_eth_txconf *tx_conf __rte_unused)
854 {
855         struct pmd_internals *internals = dev->data->dev_private;
856         struct pcap_tx_queue *pcap_q = &internals->tx_queue[tx_queue_id];
857
858         pcap_q->port_id = dev->data->port_id;
859         pcap_q->queue_id = tx_queue_id;
860         dev->data->tx_queues[tx_queue_id] = pcap_q;
861
862         return 0;
863 }
864
865 static int
866 eth_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
867 {
868         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
869
870         return 0;
871 }
872
873 static int
874 eth_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
875 {
876         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
877
878         return 0;
879 }
880
881 static int
882 eth_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
883 {
884         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
885
886         return 0;
887 }
888
889 static int
890 eth_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
891 {
892         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
893
894         return 0;
895 }
896
897 static const struct eth_dev_ops ops = {
898         .dev_start = eth_dev_start,
899         .dev_stop = eth_dev_stop,
900         .dev_close = eth_dev_close,
901         .dev_configure = eth_dev_configure,
902         .dev_infos_get = eth_dev_info,
903         .rx_queue_setup = eth_rx_queue_setup,
904         .tx_queue_setup = eth_tx_queue_setup,
905         .rx_queue_start = eth_rx_queue_start,
906         .tx_queue_start = eth_tx_queue_start,
907         .rx_queue_stop = eth_rx_queue_stop,
908         .tx_queue_stop = eth_tx_queue_stop,
909         .rx_queue_release = eth_queue_release,
910         .tx_queue_release = eth_queue_release,
911         .link_update = eth_link_update,
912         .stats_get = eth_stats_get,
913         .stats_reset = eth_stats_reset,
914 };
915
916 static int
917 add_queue(struct pmd_devargs *pmd, const char *name, const char *type,
918                 pcap_t *pcap, pcap_dumper_t *dumper)
919 {
920         if (pmd->num_of_queue >= RTE_PMD_PCAP_MAX_QUEUES)
921                 return -1;
922         if (pcap)
923                 pmd->queue[pmd->num_of_queue].pcap = pcap;
924         if (dumper)
925                 pmd->queue[pmd->num_of_queue].dumper = dumper;
926         pmd->queue[pmd->num_of_queue].name = name;
927         pmd->queue[pmd->num_of_queue].type = type;
928         pmd->num_of_queue++;
929         return 0;
930 }
931
932 /*
933  * Function handler that opens the pcap file for reading a stores a
934  * reference of it for use it later on.
935  */
936 static int
937 open_rx_pcap(const char *key, const char *value, void *extra_args)
938 {
939         const char *pcap_filename = value;
940         struct pmd_devargs *rx = extra_args;
941         pcap_t *pcap = NULL;
942
943         if (open_single_rx_pcap(pcap_filename, &pcap) < 0)
944                 return -1;
945
946         if (add_queue(rx, pcap_filename, key, pcap, NULL) < 0) {
947                 pcap_close(pcap);
948                 return -1;
949         }
950
951         return 0;
952 }
953
954 /*
955  * Opens a pcap file for writing and stores a reference to it
956  * for use it later on.
957  */
958 static int
959 open_tx_pcap(const char *key, const char *value, void *extra_args)
960 {
961         const char *pcap_filename = value;
962         struct pmd_devargs *dumpers = extra_args;
963         pcap_dumper_t *dumper;
964
965         if (open_single_tx_pcap(pcap_filename, &dumper) < 0)
966                 return -1;
967
968         if (add_queue(dumpers, pcap_filename, key, NULL, dumper) < 0) {
969                 pcap_dump_close(dumper);
970                 return -1;
971         }
972
973         return 0;
974 }
975
976 /*
977  * Opens an interface for reading and writing
978  */
979 static inline int
980 open_rx_tx_iface(const char *key, const char *value, void *extra_args)
981 {
982         const char *iface = value;
983         struct pmd_devargs *tx = extra_args;
984         pcap_t *pcap = NULL;
985
986         if (open_single_iface(iface, &pcap) < 0)
987                 return -1;
988
989         tx->queue[0].pcap = pcap;
990         tx->queue[0].name = iface;
991         tx->queue[0].type = key;
992
993         return 0;
994 }
995
996 static inline int
997 set_iface_direction(const char *iface, pcap_t *pcap,
998                 pcap_direction_t direction)
999 {
1000         const char *direction_str = (direction == PCAP_D_IN) ? "IN" : "OUT";
1001         if (pcap_setdirection(pcap, direction) < 0) {
1002                 PMD_LOG(ERR, "Setting %s pcap direction %s failed - %s\n",
1003                                 iface, direction_str, pcap_geterr(pcap));
1004                 return -1;
1005         }
1006         PMD_LOG(INFO, "Setting %s pcap direction %s\n",
1007                         iface, direction_str);
1008         return 0;
1009 }
1010
1011 static inline int
1012 open_iface(const char *key, const char *value, void *extra_args)
1013 {
1014         const char *iface = value;
1015         struct pmd_devargs *pmd = extra_args;
1016         pcap_t *pcap = NULL;
1017
1018         if (open_single_iface(iface, &pcap) < 0)
1019                 return -1;
1020         if (add_queue(pmd, iface, key, pcap, NULL) < 0) {
1021                 pcap_close(pcap);
1022                 return -1;
1023         }
1024
1025         return 0;
1026 }
1027
1028 /*
1029  * Opens a NIC for reading packets from it
1030  */
1031 static inline int
1032 open_rx_iface(const char *key, const char *value, void *extra_args)
1033 {
1034         int ret = open_iface(key, value, extra_args);
1035         if (ret < 0)
1036                 return ret;
1037         if (strcmp(key, ETH_PCAP_RX_IFACE_IN_ARG) == 0) {
1038                 struct pmd_devargs *pmd = extra_args;
1039                 unsigned int qid = pmd->num_of_queue - 1;
1040
1041                 set_iface_direction(pmd->queue[qid].name,
1042                                 pmd->queue[qid].pcap,
1043                                 PCAP_D_IN);
1044         }
1045
1046         return 0;
1047 }
1048
1049 static inline int
1050 rx_iface_args_process(const char *key, const char *value, void *extra_args)
1051 {
1052         if (strcmp(key, ETH_PCAP_RX_IFACE_ARG) == 0 ||
1053                         strcmp(key, ETH_PCAP_RX_IFACE_IN_ARG) == 0)
1054                 return open_rx_iface(key, value, extra_args);
1055
1056         return 0;
1057 }
1058
1059 /*
1060  * Opens a NIC for writing packets to it
1061  */
1062 static int
1063 open_tx_iface(const char *key, const char *value, void *extra_args)
1064 {
1065         return open_iface(key, value, extra_args);
1066 }
1067
1068 static int
1069 select_phy_mac(const char *key __rte_unused, const char *value,
1070                 void *extra_args)
1071 {
1072         if (extra_args) {
1073                 const int phy_mac = atoi(value);
1074                 int *enable_phy_mac = extra_args;
1075
1076                 if (phy_mac)
1077                         *enable_phy_mac = 1;
1078         }
1079         return 0;
1080 }
1081
1082 static int
1083 get_infinite_rx_arg(const char *key __rte_unused,
1084                 const char *value, void *extra_args)
1085 {
1086         if (extra_args) {
1087                 const int infinite_rx = atoi(value);
1088                 int *enable_infinite_rx = extra_args;
1089
1090                 if (infinite_rx > 0)
1091                         *enable_infinite_rx = 1;
1092         }
1093         return 0;
1094 }
1095
1096 static int
1097 pmd_init_internals(struct rte_vdev_device *vdev,
1098                 const unsigned int nb_rx_queues,
1099                 const unsigned int nb_tx_queues,
1100                 struct pmd_internals **internals,
1101                 struct rte_eth_dev **eth_dev)
1102 {
1103         struct rte_eth_dev_data *data;
1104         struct pmd_process_private *pp;
1105         unsigned int numa_node = vdev->device.numa_node;
1106
1107         PMD_LOG(INFO, "Creating pcap-backed ethdev on numa socket %d",
1108                 numa_node);
1109
1110         pp = (struct pmd_process_private *)
1111                 rte_zmalloc(NULL, sizeof(struct pmd_process_private),
1112                                 RTE_CACHE_LINE_SIZE);
1113
1114         if (pp == NULL) {
1115                 PMD_LOG(ERR,
1116                         "Failed to allocate memory for process private");
1117                 return -1;
1118         }
1119
1120         /* reserve an ethdev entry */
1121         *eth_dev = rte_eth_vdev_allocate(vdev, sizeof(**internals));
1122         if (!(*eth_dev)) {
1123                 rte_free(pp);
1124                 return -1;
1125         }
1126         (*eth_dev)->process_private = pp;
1127         /* now put it all together
1128          * - store queue data in internals,
1129          * - store numa_node info in eth_dev
1130          * - point eth_dev_data to internals
1131          * - and point eth_dev structure to new eth_dev_data structure
1132          */
1133         *internals = (*eth_dev)->data->dev_private;
1134         /*
1135          * Interface MAC = 02:70:63:61:70:<iface_idx>
1136          * derived from: 'locally administered':'p':'c':'a':'p':'iface_idx'
1137          * where the middle 4 characters are converted to hex.
1138          */
1139         (*internals)->eth_addr = (struct rte_ether_addr) {
1140                 .addr_bytes = { 0x02, 0x70, 0x63, 0x61, 0x70, iface_idx++ }
1141         };
1142         (*internals)->phy_mac = 0;
1143         data = (*eth_dev)->data;
1144         data->nb_rx_queues = (uint16_t)nb_rx_queues;
1145         data->nb_tx_queues = (uint16_t)nb_tx_queues;
1146         data->dev_link = pmd_link;
1147         data->mac_addrs = &(*internals)->eth_addr;
1148
1149         /*
1150          * NOTE: we'll replace the data element, of originally allocated
1151          * eth_dev so the rings are local per-process
1152          */
1153         (*eth_dev)->dev_ops = &ops;
1154
1155         strlcpy((*internals)->devargs, rte_vdev_device_args(vdev),
1156                         ETH_PCAP_ARG_MAXLEN);
1157
1158         return 0;
1159 }
1160
1161 static int
1162 eth_pcap_update_mac(const char *if_name, struct rte_eth_dev *eth_dev,
1163                 const unsigned int numa_node)
1164 {
1165 #if defined(RTE_EXEC_ENV_LINUX)
1166         void *mac_addrs;
1167         struct ifreq ifr;
1168         int if_fd = socket(AF_INET, SOCK_DGRAM, 0);
1169
1170         if (if_fd == -1)
1171                 return -1;
1172
1173         rte_strscpy(ifr.ifr_name, if_name, sizeof(ifr.ifr_name));
1174         if (ioctl(if_fd, SIOCGIFHWADDR, &ifr)) {
1175                 close(if_fd);
1176                 return -1;
1177         }
1178
1179         mac_addrs = rte_zmalloc_socket(NULL, RTE_ETHER_ADDR_LEN, 0, numa_node);
1180         if (!mac_addrs) {
1181                 close(if_fd);
1182                 return -1;
1183         }
1184
1185         PMD_LOG(INFO, "Setting phy MAC for %s", if_name);
1186         eth_dev->data->mac_addrs = mac_addrs;
1187         rte_memcpy(eth_dev->data->mac_addrs[0].addr_bytes,
1188                         ifr.ifr_hwaddr.sa_data, RTE_ETHER_ADDR_LEN);
1189
1190         close(if_fd);
1191
1192         return 0;
1193
1194 #elif defined(RTE_EXEC_ENV_FREEBSD)
1195         void *mac_addrs;
1196         struct if_msghdr *ifm;
1197         struct sockaddr_dl *sdl;
1198         int mib[6];
1199         size_t len = 0;
1200         char *buf;
1201
1202         mib[0] = CTL_NET;
1203         mib[1] = AF_ROUTE;
1204         mib[2] = 0;
1205         mib[3] = AF_LINK;
1206         mib[4] = NET_RT_IFLIST;
1207         mib[5] = if_nametoindex(if_name);
1208
1209         if (sysctl(mib, 6, NULL, &len, NULL, 0) < 0)
1210                 return -1;
1211
1212         if (len == 0)
1213                 return -1;
1214
1215         buf = rte_malloc(NULL, len, 0);
1216         if (!buf)
1217                 return -1;
1218
1219         if (sysctl(mib, 6, buf, &len, NULL, 0) < 0) {
1220                 rte_free(buf);
1221                 return -1;
1222         }
1223         ifm = (struct if_msghdr *)buf;
1224         sdl = (struct sockaddr_dl *)(ifm + 1);
1225
1226         mac_addrs = rte_zmalloc_socket(NULL, RTE_ETHER_ADDR_LEN, 0, numa_node);
1227         if (!mac_addrs) {
1228                 rte_free(buf);
1229                 return -1;
1230         }
1231
1232         PMD_LOG(INFO, "Setting phy MAC for %s", if_name);
1233         eth_dev->data->mac_addrs = mac_addrs;
1234         rte_memcpy(eth_dev->data->mac_addrs[0].addr_bytes,
1235                         LLADDR(sdl), RTE_ETHER_ADDR_LEN);
1236
1237         rte_free(buf);
1238
1239         return 0;
1240 #else
1241         return -1;
1242 #endif
1243 }
1244
1245 static int
1246 eth_from_pcaps_common(struct rte_vdev_device *vdev,
1247                 struct pmd_devargs_all *devargs_all,
1248                 struct pmd_internals **internals, struct rte_eth_dev **eth_dev)
1249 {
1250         struct pmd_process_private *pp;
1251         struct pmd_devargs *rx_queues = &devargs_all->rx_queues;
1252         struct pmd_devargs *tx_queues = &devargs_all->tx_queues;
1253         const unsigned int nb_rx_queues = rx_queues->num_of_queue;
1254         const unsigned int nb_tx_queues = tx_queues->num_of_queue;
1255         unsigned int i;
1256
1257         /* do some parameter checking */
1258         if (rx_queues == NULL && nb_rx_queues > 0)
1259                 return -1;
1260         if (tx_queues == NULL && nb_tx_queues > 0)
1261                 return -1;
1262
1263         if (pmd_init_internals(vdev, nb_rx_queues, nb_tx_queues, internals,
1264                         eth_dev) < 0)
1265                 return -1;
1266
1267         pp = (*eth_dev)->process_private;
1268         for (i = 0; i < nb_rx_queues; i++) {
1269                 struct pcap_rx_queue *rx = &(*internals)->rx_queue[i];
1270                 struct devargs_queue *queue = &rx_queues->queue[i];
1271
1272                 pp->rx_pcap[i] = queue->pcap;
1273                 strlcpy(rx->name, queue->name, sizeof(rx->name));
1274                 strlcpy(rx->type, queue->type, sizeof(rx->type));
1275         }
1276
1277         for (i = 0; i < nb_tx_queues; i++) {
1278                 struct pcap_tx_queue *tx = &(*internals)->tx_queue[i];
1279                 struct devargs_queue *queue = &tx_queues->queue[i];
1280
1281                 pp->tx_dumper[i] = queue->dumper;
1282                 pp->tx_pcap[i] = queue->pcap;
1283                 strlcpy(tx->name, queue->name, sizeof(tx->name));
1284                 strlcpy(tx->type, queue->type, sizeof(tx->type));
1285         }
1286
1287         return 0;
1288 }
1289
1290 static int
1291 eth_from_pcaps(struct rte_vdev_device *vdev,
1292                 struct pmd_devargs_all *devargs_all)
1293 {
1294         struct pmd_internals *internals = NULL;
1295         struct rte_eth_dev *eth_dev = NULL;
1296         struct pmd_devargs *rx_queues = &devargs_all->rx_queues;
1297         int single_iface = devargs_all->single_iface;
1298         unsigned int infinite_rx = devargs_all->infinite_rx;
1299         int ret;
1300
1301         ret = eth_from_pcaps_common(vdev, devargs_all, &internals, &eth_dev);
1302
1303         if (ret < 0)
1304                 return ret;
1305
1306         /* store weather we are using a single interface for rx/tx or not */
1307         internals->single_iface = single_iface;
1308
1309         if (single_iface) {
1310                 internals->if_index = if_nametoindex(rx_queues->queue[0].name);
1311
1312                 /* phy_mac arg is applied only only if "iface" devarg is provided */
1313                 if (rx_queues->phy_mac) {
1314                         int ret = eth_pcap_update_mac(rx_queues->queue[0].name,
1315                                         eth_dev, vdev->device.numa_node);
1316                         if (ret == 0)
1317                                 internals->phy_mac = 1;
1318                 }
1319         }
1320
1321         internals->infinite_rx = infinite_rx;
1322         /* Assign rx ops. */
1323         if (infinite_rx)
1324                 eth_dev->rx_pkt_burst = eth_pcap_rx_infinite;
1325         else if (devargs_all->is_rx_pcap || devargs_all->is_rx_iface ||
1326                         single_iface)
1327                 eth_dev->rx_pkt_burst = eth_pcap_rx;
1328         else
1329                 eth_dev->rx_pkt_burst = eth_null_rx;
1330
1331         /* Assign tx ops. */
1332         if (devargs_all->is_tx_pcap)
1333                 eth_dev->tx_pkt_burst = eth_pcap_tx_dumper;
1334         else if (devargs_all->is_tx_iface || single_iface)
1335                 eth_dev->tx_pkt_burst = eth_pcap_tx;
1336         else
1337                 eth_dev->tx_pkt_burst = eth_tx_drop;
1338
1339         rte_eth_dev_probing_finish(eth_dev);
1340         return 0;
1341 }
1342
1343 static int
1344 pmd_pcap_probe(struct rte_vdev_device *dev)
1345 {
1346         const char *name;
1347         struct rte_kvargs *kvlist;
1348         struct pmd_devargs pcaps = {0};
1349         struct pmd_devargs dumpers = {0};
1350         struct rte_eth_dev *eth_dev =  NULL;
1351         struct pmd_internals *internal;
1352         int ret = 0;
1353
1354         struct pmd_devargs_all devargs_all = {
1355                 .single_iface = 0,
1356                 .is_tx_pcap = 0,
1357                 .is_tx_iface = 0,
1358                 .infinite_rx = 0,
1359         };
1360
1361         name = rte_vdev_device_name(dev);
1362         PMD_LOG(INFO, "Initializing pmd_pcap for %s", name);
1363
1364         gettimeofday(&start_time, NULL);
1365         start_cycles = rte_get_timer_cycles();
1366         hz = rte_get_timer_hz();
1367
1368         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1369                 eth_dev = rte_eth_dev_attach_secondary(name);
1370                 if (!eth_dev) {
1371                         PMD_LOG(ERR, "Failed to probe %s", name);
1372                         return -1;
1373                 }
1374
1375                 internal = eth_dev->data->dev_private;
1376
1377                 kvlist = rte_kvargs_parse(internal->devargs, valid_arguments);
1378                 if (kvlist == NULL)
1379                         return -1;
1380         } else {
1381                 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
1382                                 valid_arguments);
1383                 if (kvlist == NULL)
1384                         return -1;
1385         }
1386
1387         /*
1388          * If iface argument is passed we open the NICs and use them for
1389          * reading / writing
1390          */
1391         if (rte_kvargs_count(kvlist, ETH_PCAP_IFACE_ARG) == 1) {
1392
1393                 ret = rte_kvargs_process(kvlist, ETH_PCAP_IFACE_ARG,
1394                                 &open_rx_tx_iface, &pcaps);
1395                 if (ret < 0)
1396                         goto free_kvlist;
1397
1398                 dumpers.queue[0] = pcaps.queue[0];
1399
1400                 ret = rte_kvargs_process(kvlist, ETH_PCAP_PHY_MAC_ARG,
1401                                 &select_phy_mac, &pcaps.phy_mac);
1402                 if (ret < 0)
1403                         goto free_kvlist;
1404
1405                 dumpers.phy_mac = pcaps.phy_mac;
1406
1407                 devargs_all.single_iface = 1;
1408                 pcaps.num_of_queue = 1;
1409                 dumpers.num_of_queue = 1;
1410
1411                 goto create_eth;
1412         }
1413
1414         /*
1415          * We check whether we want to open a RX stream from a real NIC, a
1416          * pcap file or open a dummy RX stream
1417          */
1418         devargs_all.is_rx_pcap =
1419                 rte_kvargs_count(kvlist, ETH_PCAP_RX_PCAP_ARG) ? 1 : 0;
1420         devargs_all.is_rx_iface =
1421                 rte_kvargs_count(kvlist, ETH_PCAP_RX_IFACE_ARG) ? 1 : 0;
1422         pcaps.num_of_queue = 0;
1423
1424         devargs_all.is_tx_pcap =
1425                 rte_kvargs_count(kvlist, ETH_PCAP_TX_PCAP_ARG) ? 1 : 0;
1426         devargs_all.is_tx_iface =
1427                 rte_kvargs_count(kvlist, ETH_PCAP_TX_IFACE_ARG) ? 1 : 0;
1428         dumpers.num_of_queue = 0;
1429
1430         if (devargs_all.is_rx_pcap) {
1431                 /*
1432                  * We check whether we want to infinitely rx the pcap file.
1433                  */
1434                 unsigned int infinite_rx_arg_cnt = rte_kvargs_count(kvlist,
1435                                 ETH_PCAP_INFINITE_RX_ARG);
1436
1437                 if (infinite_rx_arg_cnt == 1) {
1438                         ret = rte_kvargs_process(kvlist,
1439                                         ETH_PCAP_INFINITE_RX_ARG,
1440                                         &get_infinite_rx_arg,
1441                                         &devargs_all.infinite_rx);
1442                         if (ret < 0)
1443                                 goto free_kvlist;
1444                         PMD_LOG(INFO, "infinite_rx has been %s for %s",
1445                                         devargs_all.infinite_rx ? "enabled" : "disabled",
1446                                         name);
1447
1448                 } else if (infinite_rx_arg_cnt > 1) {
1449                         PMD_LOG(WARNING, "infinite_rx has not been enabled since the "
1450                                         "argument has been provided more than once "
1451                                         "for %s", name);
1452                 }
1453
1454                 ret = rte_kvargs_process(kvlist, ETH_PCAP_RX_PCAP_ARG,
1455                                 &open_rx_pcap, &pcaps);
1456         } else if (devargs_all.is_rx_iface) {
1457                 ret = rte_kvargs_process(kvlist, NULL,
1458                                 &rx_iface_args_process, &pcaps);
1459         } else if (devargs_all.is_tx_iface || devargs_all.is_tx_pcap) {
1460                 unsigned int i;
1461
1462                 /* Count number of tx queue args passed before dummy rx queue
1463                  * creation so a dummy rx queue can be created for each tx queue
1464                  */
1465                 unsigned int num_tx_queues =
1466                         (rte_kvargs_count(kvlist, ETH_PCAP_TX_PCAP_ARG) +
1467                         rte_kvargs_count(kvlist, ETH_PCAP_TX_IFACE_ARG));
1468
1469                 PMD_LOG(INFO, "Creating null rx queue since no rx queues were provided.");
1470
1471                 /* Creating a dummy rx queue for each tx queue passed */
1472                 for (i = 0; i < num_tx_queues; i++)
1473                         ret = add_queue(&pcaps, "dummy_rx", "rx_null", NULL,
1474                                         NULL);
1475         } else {
1476                 PMD_LOG(ERR, "Error - No rx or tx queues provided");
1477                 ret = -ENOENT;
1478         }
1479         if (ret < 0)
1480                 goto free_kvlist;
1481
1482         /*
1483          * We check whether we want to open a TX stream to a real NIC,
1484          * a pcap file, or drop packets on tx
1485          */
1486         if (devargs_all.is_tx_pcap) {
1487                 ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_PCAP_ARG,
1488                                 &open_tx_pcap, &dumpers);
1489         } else if (devargs_all.is_tx_iface) {
1490                 ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_IFACE_ARG,
1491                                 &open_tx_iface, &dumpers);
1492         } else {
1493                 unsigned int i;
1494
1495                 PMD_LOG(INFO, "Dropping packets on tx since no tx queues were provided.");
1496
1497                 /* Add 1 dummy queue per rxq which counts and drops packets. */
1498                 for (i = 0; i < pcaps.num_of_queue; i++)
1499                         ret = add_queue(&dumpers, "dummy_tx", "tx_drop", NULL,
1500                                         NULL);
1501         }
1502
1503         if (ret < 0)
1504                 goto free_kvlist;
1505
1506 create_eth:
1507         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1508                 struct pmd_process_private *pp;
1509                 unsigned int i;
1510
1511                 internal = eth_dev->data->dev_private;
1512                         pp = (struct pmd_process_private *)
1513                                 rte_zmalloc(NULL,
1514                                         sizeof(struct pmd_process_private),
1515                                         RTE_CACHE_LINE_SIZE);
1516
1517                 if (pp == NULL) {
1518                         PMD_LOG(ERR,
1519                                 "Failed to allocate memory for process private");
1520                         ret = -1;
1521                         goto free_kvlist;
1522                 }
1523
1524                 eth_dev->dev_ops = &ops;
1525                 eth_dev->device = &dev->device;
1526
1527                 /* setup process private */
1528                 for (i = 0; i < pcaps.num_of_queue; i++)
1529                         pp->rx_pcap[i] = pcaps.queue[i].pcap;
1530
1531                 for (i = 0; i < dumpers.num_of_queue; i++) {
1532                         pp->tx_dumper[i] = dumpers.queue[i].dumper;
1533                         pp->tx_pcap[i] = dumpers.queue[i].pcap;
1534                 }
1535
1536                 eth_dev->process_private = pp;
1537                 eth_dev->rx_pkt_burst = eth_pcap_rx;
1538                 if (devargs_all.is_tx_pcap)
1539                         eth_dev->tx_pkt_burst = eth_pcap_tx_dumper;
1540                 else
1541                         eth_dev->tx_pkt_burst = eth_pcap_tx;
1542
1543                 rte_eth_dev_probing_finish(eth_dev);
1544                 goto free_kvlist;
1545         }
1546
1547         devargs_all.rx_queues = pcaps;
1548         devargs_all.tx_queues = dumpers;
1549
1550         ret = eth_from_pcaps(dev, &devargs_all);
1551
1552 free_kvlist:
1553         rte_kvargs_free(kvlist);
1554
1555         return ret;
1556 }
1557
1558 static int
1559 pmd_pcap_remove(struct rte_vdev_device *dev)
1560 {
1561         struct pmd_internals *internals = NULL;
1562         struct rte_eth_dev *eth_dev = NULL;
1563
1564         PMD_LOG(INFO, "Closing pcap ethdev on numa socket %d",
1565                         rte_socket_id());
1566
1567         if (!dev)
1568                 return -1;
1569
1570         /* reserve an ethdev entry */
1571         eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
1572         if (eth_dev == NULL)
1573                 return -1;
1574
1575         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1576                 internals = eth_dev->data->dev_private;
1577                 if (internals != NULL && internals->phy_mac == 0)
1578                         /* not dynamically allocated, must not be freed */
1579                         eth_dev->data->mac_addrs = NULL;
1580         }
1581
1582         eth_dev_close(eth_dev);
1583
1584         rte_free(eth_dev->process_private);
1585         rte_eth_dev_release_port(eth_dev);
1586
1587         return 0;
1588 }
1589
1590 static struct rte_vdev_driver pmd_pcap_drv = {
1591         .probe = pmd_pcap_probe,
1592         .remove = pmd_pcap_remove,
1593 };
1594
1595 RTE_PMD_REGISTER_VDEV(net_pcap, pmd_pcap_drv);
1596 RTE_PMD_REGISTER_ALIAS(net_pcap, eth_pcap);
1597 RTE_PMD_REGISTER_PARAM_STRING(net_pcap,
1598         ETH_PCAP_RX_PCAP_ARG "=<string> "
1599         ETH_PCAP_TX_PCAP_ARG "=<string> "
1600         ETH_PCAP_RX_IFACE_ARG "=<ifc> "
1601         ETH_PCAP_RX_IFACE_IN_ARG "=<ifc> "
1602         ETH_PCAP_TX_IFACE_ARG "=<ifc> "
1603         ETH_PCAP_IFACE_ARG "=<ifc> "
1604         ETH_PCAP_PHY_MAC_ARG "=<int>"
1605         ETH_PCAP_INFINITE_RX_ARG "=<0|1>");
1606
1607 RTE_INIT(eth_pcap_init_log)
1608 {
1609         eth_pcap_logtype = rte_log_register("pmd.net.pcap");
1610         if (eth_pcap_logtype >= 0)
1611                 rte_log_set_level(eth_pcap_logtype, RTE_LOG_NOTICE);
1612 }