1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2019 Microsoft Corporation
14 #include <rte_common.h>
15 #include <rte_cycles.h>
17 #include <rte_errno.h>
18 #include <rte_ethdev.h>
19 #include <rte_ether.h>
21 #include <rte_pcapng.h>
22 #include <rte_reciprocal.h>
25 #include "pcapng_proto.h"
27 /* conversion from DPDK speed to PCAPNG */
28 #define PCAPNG_MBPS_SPEED 1000000ull
30 /* Format of the capture file handle */
32 int outfd; /* output file */
33 /* DPDK port id to interface index in file */
34 uint32_t port_index[RTE_MAX_ETHPORTS];
37 /* For converting TSC cycles to PCAPNG ns format */
38 static struct pcapng_time {
42 struct rte_reciprocal_u64 tsc_hz_inverse;
50 pcapng_time.cycles = rte_get_tsc_cycles();
51 clock_gettime(CLOCK_REALTIME, &ts);
52 pcapng_time.cycles = (pcapng_time.cycles + rte_get_tsc_cycles()) / 2;
53 pcapng_time.ns = rte_timespec_to_ns(&ts);
55 pcapng_time.tsc_hz = rte_get_tsc_hz();
56 pcapng_time.tsc_hz_inverse = rte_reciprocal_value_u64(pcapng_time.tsc_hz);
59 /* PCAPNG timestamps are in nanoseconds */
60 static uint64_t pcapng_tsc_to_ns(uint64_t cycles)
64 if (!pcapng_time.tsc_hz)
67 /* In essence the calculation is:
68 * delta = (cycles - pcapng_time.cycles) * NSEC_PRE_SEC / rte_get_tsc_hz()
69 * but this overflows within 4 to 8 seconds depending on TSC frequency.
70 * Instead, if delta >= pcapng_time.tsc_hz:
71 * Increase pcapng_time.ns and pcapng_time.cycles by the number of
72 * whole seconds in delta and reduce delta accordingly.
73 * delta will therefore always lie in the interval [0, pcapng_time.tsc_hz),
74 * which will not overflow when multiplied by NSEC_PER_SEC provided the
75 * TSC frequency < approx 18.4GHz.
77 * Currently all TSCs operate below 5GHz.
79 delta = cycles - pcapng_time.cycles;
80 if (unlikely(delta >= pcapng_time.tsc_hz)) {
81 if (likely(delta < pcapng_time.tsc_hz * 2)) {
82 delta -= pcapng_time.tsc_hz;
83 pcapng_time.cycles += pcapng_time.tsc_hz;
84 pcapng_time.ns += NSEC_PER_SEC;
86 secs = rte_reciprocal_divide_u64(delta, &pcapng_time.tsc_hz_inverse);
87 delta -= secs * pcapng_time.tsc_hz;
88 pcapng_time.cycles += secs * pcapng_time.tsc_hz;
89 pcapng_time.ns += secs * NSEC_PER_SEC;
93 return pcapng_time.ns + rte_reciprocal_divide_u64(delta * NSEC_PER_SEC,
94 &pcapng_time.tsc_hz_inverse);
97 /* length of option including padding */
98 static uint16_t pcapng_optlen(uint16_t len)
100 return RTE_ALIGN(sizeof(struct pcapng_option) + len,
104 /* build TLV option and return location of next */
105 static struct pcapng_option *
106 pcapng_add_option(struct pcapng_option *popt, uint16_t code,
107 const void *data, uint16_t len)
111 memcpy(popt->data, data, len);
113 return (struct pcapng_option *)((uint8_t *)popt + pcapng_optlen(len));
117 * Write required initial section header describing the capture
120 pcapng_section_block(rte_pcapng_t *self,
121 const char *os, const char *hw,
122 const char *app, const char *comment)
124 struct pcapng_section_header *hdr;
125 struct pcapng_option *opt;
132 len += pcapng_optlen(strlen(hw));
134 len += pcapng_optlen(strlen(os));
136 len += pcapng_optlen(strlen(app));
138 len += pcapng_optlen(strlen(comment));
140 /* reserve space for OPT_END */
141 len += pcapng_optlen(0);
142 len += sizeof(uint32_t);
144 buf = calloc(1, len);
148 hdr = (struct pcapng_section_header *)buf;
149 *hdr = (struct pcapng_section_header) {
150 .block_type = PCAPNG_SECTION_BLOCK,
152 .byte_order_magic = PCAPNG_BYTE_ORDER_MAGIC,
153 .major_version = PCAPNG_MAJOR_VERS,
154 .minor_version = PCAPNG_MINOR_VERS,
155 .section_length = UINT64_MAX,
158 /* After the section header insert variable length options. */
159 opt = (struct pcapng_option *)(hdr + 1);
161 opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
162 comment, strlen(comment));
164 opt = pcapng_add_option(opt, PCAPNG_SHB_HARDWARE,
167 opt = pcapng_add_option(opt, PCAPNG_SHB_OS,
170 opt = pcapng_add_option(opt, PCAPNG_SHB_USERAPPL,
173 /* The standard requires last option to be OPT_END */
174 opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
176 /* clone block_length after option */
177 memcpy(opt, &hdr->block_length, sizeof(uint32_t));
179 cc = write(self->outfd, buf, len);
185 /* Write an interface block for a DPDK port */
187 pcapng_add_interface(rte_pcapng_t *self, uint16_t port)
189 struct pcapng_interface_block *hdr;
190 struct rte_eth_dev_info dev_info;
191 struct rte_ether_addr *ea, macaddr;
192 const struct rte_device *dev;
193 struct rte_eth_link link;
194 struct pcapng_option *opt;
195 const uint8_t tsresol = 9; /* nanosecond resolution */
198 char ifname[IF_NAMESIZE];
202 if (rte_eth_dev_info_get(port, &dev_info) < 0)
205 /* make something like an interface name */
206 if (if_indextoname(dev_info.if_index, ifname) == NULL)
207 snprintf(ifname, IF_NAMESIZE, "dpdk:%u", port);
209 /* make a useful device hardware string */
210 dev = dev_info.device;
212 snprintf(ifhw, sizeof(ifhw),
213 "%s-%s", dev->bus->name, dev->name);
215 /* DPDK reports in units of Mbps */
216 if (rte_eth_link_get(port, &link) == 0 &&
217 link.link_status == RTE_ETH_LINK_UP)
218 speed = link.link_speed * PCAPNG_MBPS_SPEED;
220 if (rte_eth_macaddr_get(port, &macaddr) < 0)
225 /* Compute length of interface block options */
228 len += pcapng_optlen(sizeof(tsresol)); /* timestamp */
229 len += pcapng_optlen(strlen(ifname)); /* ifname */
232 len += pcapng_optlen(RTE_ETHER_ADDR_LEN); /* macaddr */
234 len += pcapng_optlen(sizeof(uint64_t));
236 len += pcapng_optlen(strlen(ifhw));
238 len += pcapng_optlen(0);
239 len += sizeof(uint32_t);
245 hdr = (struct pcapng_interface_block *)buf;
246 *hdr = (struct pcapng_interface_block) {
247 .block_type = PCAPNG_INTERFACE_BLOCK,
248 .link_type = 1, /* DLT_EN10MB - Ethernet */
252 opt = (struct pcapng_option *)(hdr + 1);
253 opt = pcapng_add_option(opt, PCAPNG_IFB_TSRESOL,
254 &tsresol, sizeof(tsresol));
255 opt = pcapng_add_option(opt, PCAPNG_IFB_NAME,
256 ifname, strlen(ifname));
258 opt = pcapng_add_option(opt, PCAPNG_IFB_MACADDR,
259 ea, RTE_ETHER_ADDR_LEN);
261 opt = pcapng_add_option(opt, PCAPNG_IFB_SPEED,
262 &speed, sizeof(uint64_t));
264 opt = pcapng_add_option(opt, PCAPNG_IFB_HARDWARE,
266 opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
268 /* clone block_length after optionsa */
269 memcpy(opt, &hdr->block_length, sizeof(uint32_t));
271 return write(self->outfd, buf, len);
275 * Write the list of possible interfaces at the start
279 pcapng_interfaces(rte_pcapng_t *self)
284 RTE_ETH_FOREACH_DEV(port_id) {
285 /* The list if ports in pcapng needs to be contiguous */
286 self->port_index[port_id] = index++;
287 if (pcapng_add_interface(self, port_id) < 0)
294 * Write an Interface statistics block at the end of capture.
297 rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
299 uint64_t start_time, uint64_t end_time,
300 uint64_t ifrecv, uint64_t ifdrop)
302 struct pcapng_statistics *hdr;
303 struct pcapng_option *opt;
304 uint32_t optlen, len;
308 RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
312 if (ifrecv != UINT64_MAX)
313 optlen += pcapng_optlen(sizeof(ifrecv));
314 if (ifdrop != UINT64_MAX)
315 optlen += pcapng_optlen(sizeof(ifdrop));
317 optlen += pcapng_optlen(sizeof(start_time));
319 optlen += pcapng_optlen(sizeof(end_time));
321 optlen += pcapng_optlen(strlen(comment));
323 optlen += pcapng_optlen(0);
325 len = sizeof(*hdr) + optlen + sizeof(uint32_t);
330 hdr = (struct pcapng_statistics *)buf;
331 opt = (struct pcapng_option *)(hdr + 1);
334 opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
335 comment, strlen(comment));
337 opt = pcapng_add_option(opt, PCAPNG_ISB_STARTTIME,
338 &start_time, sizeof(start_time));
340 opt = pcapng_add_option(opt, PCAPNG_ISB_ENDTIME,
341 &end_time, sizeof(end_time));
342 if (ifrecv != UINT64_MAX)
343 opt = pcapng_add_option(opt, PCAPNG_ISB_IFRECV,
344 &ifrecv, sizeof(ifrecv));
345 if (ifdrop != UINT64_MAX)
346 opt = pcapng_add_option(opt, PCAPNG_ISB_IFDROP,
347 &ifdrop, sizeof(ifdrop));
349 opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
351 hdr->block_type = PCAPNG_INTERFACE_STATS_BLOCK;
352 hdr->block_length = len;
353 hdr->interface_id = self->port_index[port_id];
355 ns = pcapng_tsc_to_ns(rte_get_tsc_cycles());
356 hdr->timestamp_hi = ns >> 32;
357 hdr->timestamp_lo = (uint32_t)ns;
359 /* clone block_length after option */
360 memcpy(opt, &len, sizeof(uint32_t));
362 return write(self->outfd, buf, len);
366 rte_pcapng_mbuf_size(uint32_t length)
368 /* The VLAN and EPB header must fit in the mbuf headroom. */
369 RTE_ASSERT(sizeof(struct pcapng_enhance_packet_block) +
370 sizeof(struct rte_vlan_hdr) <= RTE_PKTMBUF_HEADROOM);
372 /* The flags and queue information are added at the end. */
373 return sizeof(struct rte_mbuf)
374 + RTE_ALIGN(length, sizeof(uint32_t))
375 + pcapng_optlen(sizeof(uint32_t)) /* flag option */
376 + pcapng_optlen(sizeof(uint32_t)) /* queue option */
377 + sizeof(uint32_t); /* length */
380 /* More generalized version rte_vlan_insert() */
382 pcapng_vlan_insert(struct rte_mbuf *m, uint16_t ether_type, uint16_t tci)
384 struct rte_ether_hdr *nh, *oh;
385 struct rte_vlan_hdr *vh;
387 if (!RTE_MBUF_DIRECT(m) || rte_mbuf_refcnt_read(m) > 1)
390 if (rte_pktmbuf_data_len(m) < sizeof(*oh))
393 oh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
394 nh = (struct rte_ether_hdr *)
395 rte_pktmbuf_prepend(m, sizeof(struct rte_vlan_hdr));
399 memmove(nh, oh, 2 * RTE_ETHER_ADDR_LEN);
400 nh->ether_type = rte_cpu_to_be_16(ether_type);
402 vh = (struct rte_vlan_hdr *) (nh + 1);
403 vh->vlan_tci = rte_cpu_to_be_16(tci);
409 * The mbufs created use the Pcapng standard enhanced packet block.
412 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
413 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
414 * 0 | Block Type = 0x00000006 |
415 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
416 * 4 | Block Total Length |
417 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
419 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
420 * 12 | Timestamp (High) |
421 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
422 * 16 | Timestamp (Low) |
423 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
424 * 20 | Captured Packet Length |
425 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
426 * 24 | Original Packet Length |
427 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
430 * / variable length, padded to 32 bits /
432 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
433 * | Option Code = 0x0002 | Option Length = 0x004 |
434 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
435 * | Flags (direction) |
436 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
437 * | Option Code = 0x0006 | Option Length = 0x002 |
438 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
440 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
441 * | Block Total Length |
442 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
445 /* Make a copy of original mbuf with pcapng header and options */
447 rte_pcapng_copy(uint16_t port_id, uint32_t queue,
448 const struct rte_mbuf *md,
449 struct rte_mempool *mp,
450 uint32_t length, uint64_t cycles,
451 enum rte_pcapng_direction direction)
453 struct pcapng_enhance_packet_block *epb;
454 uint32_t orig_len, data_len, padding, flags;
455 struct pcapng_option *opt;
456 const uint16_t optlen = pcapng_optlen(sizeof(flags)) + pcapng_optlen(sizeof(queue));
460 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
461 RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
463 ns = pcapng_tsc_to_ns(cycles);
465 orig_len = rte_pktmbuf_pkt_len(md);
467 /* Take snapshot of the data */
468 mc = rte_pktmbuf_copy(md, mp, 0, length);
469 if (unlikely(mc == NULL))
472 /* Expand any offloaded VLAN information */
473 if ((direction == RTE_PCAPNG_DIRECTION_IN &&
474 (md->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED)) ||
475 (direction == RTE_PCAPNG_DIRECTION_OUT &&
476 (md->ol_flags & RTE_MBUF_F_TX_VLAN))) {
477 if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_VLAN,
482 if ((direction == RTE_PCAPNG_DIRECTION_IN &&
483 (md->ol_flags & RTE_MBUF_F_RX_QINQ_STRIPPED)) ||
484 (direction == RTE_PCAPNG_DIRECTION_OUT &&
485 (md->ol_flags & RTE_MBUF_F_TX_QINQ))) {
486 if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_QINQ,
487 md->vlan_tci_outer) != 0)
491 /* pad the packet to 32 bit boundary */
492 data_len = rte_pktmbuf_data_len(mc);
493 padding = RTE_ALIGN(data_len, sizeof(uint32_t)) - data_len;
495 void *tail = rte_pktmbuf_append(mc, padding);
499 memset(tail, 0, padding);
502 /* reserve trailing options and block length */
503 opt = (struct pcapng_option *)
504 rte_pktmbuf_append(mc, optlen + sizeof(uint32_t));
505 if (unlikely(opt == NULL))
509 case RTE_PCAPNG_DIRECTION_IN:
510 flags = PCAPNG_IFB_INBOUND;
512 case RTE_PCAPNG_DIRECTION_OUT:
513 flags = PCAPNG_IFB_OUTBOUND;
519 opt = pcapng_add_option(opt, PCAPNG_EPB_FLAGS,
520 &flags, sizeof(flags));
522 opt = pcapng_add_option(opt, PCAPNG_EPB_QUEUE,
523 &queue, sizeof(queue));
525 /* Note: END_OPT necessary here. Wireshark doesn't do it. */
527 /* Add PCAPNG packet header */
528 epb = (struct pcapng_enhance_packet_block *)
529 rte_pktmbuf_prepend(mc, sizeof(*epb));
530 if (unlikely(epb == NULL))
533 epb->block_type = PCAPNG_ENHANCED_PACKET_BLOCK;
534 epb->block_length = rte_pktmbuf_data_len(mc);
536 /* Interface index is filled in later during write */
539 epb->timestamp_hi = ns >> 32;
540 epb->timestamp_lo = (uint32_t)ns;
541 epb->capture_length = data_len;
542 epb->original_length = orig_len;
544 /* set trailer of block length */
545 *(uint32_t *)opt = epb->block_length;
550 rte_pktmbuf_free(mc);
554 /* Count how many segments are in this array of mbufs */
556 mbuf_burst_segs(struct rte_mbuf *pkts[], unsigned int n)
558 unsigned int i, iovcnt;
560 for (iovcnt = 0, i = 0; i < n; i++) {
561 const struct rte_mbuf *m = pkts[i];
563 __rte_mbuf_sanity_check(m, 1);
565 iovcnt += m->nb_segs;
570 /* Write pre-formatted packets to file. */
572 rte_pcapng_write_packets(rte_pcapng_t *self,
573 struct rte_mbuf *pkts[], uint16_t nb_pkts)
575 int iovcnt = mbuf_burst_segs(pkts, nb_pkts);
576 struct iovec iov[iovcnt];
580 for (i = cnt = 0; i < nb_pkts; i++) {
581 struct rte_mbuf *m = pkts[i];
582 struct pcapng_enhance_packet_block *epb;
584 /* sanity check that is really a pcapng mbuf */
585 epb = rte_pktmbuf_mtod(m, struct pcapng_enhance_packet_block *);
586 if (unlikely(epb->block_type != PCAPNG_ENHANCED_PACKET_BLOCK ||
587 epb->block_length != rte_pktmbuf_data_len(m))) {
593 * The DPDK port is recorded during pcapng_copy.
594 * Map that to PCAPNG interface in file.
596 epb->interface_id = self->port_index[m->port];
598 iov[cnt].iov_base = rte_pktmbuf_mtod(m, void *);
599 iov[cnt].iov_len = rte_pktmbuf_data_len(m);
601 } while ((m = m->next));
604 ret = writev(self->outfd, iov, iovcnt);
605 if (unlikely(ret < 0))
610 /* Create new pcapng writer handle */
612 rte_pcapng_fdopen(int fd,
613 const char *osname, const char *hardware,
614 const char *appname, const char *comment)
618 self = malloc(sizeof(*self));
626 if (pcapng_section_block(self, osname, hardware, appname, comment) < 0)
629 if (pcapng_interfaces(self) < 0)
639 rte_pcapng_close(rte_pcapng_t *self)