pcapng: add new library for writing pcapng files
authorStephen Hemminger <stephen@networkplumber.org>
Wed, 20 Oct 2021 21:42:26 +0000 (14:42 -0700)
committerThomas Monjalon <thomas@monjalon.net>
Fri, 22 Oct 2021 15:19:07 +0000 (17:19 +0200)
This is utility library for writing pcapng format files
used by Wireshark family of utilities. Older tcpdump
also knows how to read (but not write) this format.

See
  https://github.com/pcapng/pcapng/

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Reshma Pattan <reshma.pattan@intel.com>
Acked-by: Ray Kinsella <mdr@ashroe.eu>
13 files changed:
MAINTAINERS
doc/api/doxy-api-index.md
doc/api/doxy-api.conf.in
doc/guides/howto/packet_capture_framework.rst
doc/guides/prog_guide/index.rst
doc/guides/prog_guide/pcapng_lib.rst [new file with mode: 0644]
doc/guides/rel_notes/release_21_11.rst
lib/meson.build
lib/pcapng/meson.build [new file with mode: 0644]
lib/pcapng/pcapng_proto.h [new file with mode: 0644]
lib/pcapng/rte_pcapng.c [new file with mode: 0644]
lib/pcapng/rte_pcapng.h [new file with mode: 0644]
lib/pcapng/version.map [new file with mode: 0644]

index fb3f123..3f9bb0f 100644 (file)
@@ -1431,9 +1431,12 @@ F: doc/guides/sample_app_ug/qos_scheduler.rst
 
 Packet capture
 M: Reshma Pattan <reshma.pattan@intel.com>
+M: Stephen Hemminger <stephen@networkplumber.org>
 F: lib/pdump/
 F: doc/guides/prog_guide/pdump_lib.rst
 F: app/test/test_pdump.*
+F: lib/pcapng/
+F: doc/guides/prog_guide/pcapng_lib.rst
 F: app/pdump/
 F: doc/guides/tools/pdump.rst
 
index 2939050..7685606 100644 (file)
@@ -210,6 +210,7 @@ The public API headers are grouped by topics:
 - **debug**:
   [jobstats]           (@ref rte_jobstats.h),
   [telemetry]          (@ref rte_telemetry.h),
+  [pcapng]             (@ref rte_pcapng.h),
   [pdump]              (@ref rte_pdump.h),
   [hexdump]            (@ref rte_hexdump.h),
   [debug]              (@ref rte_debug.h),
index 109ec1f..096ebba 100644 (file)
@@ -59,6 +59,7 @@ INPUT                   = @TOPDIR@/doc/api/doxy-api-index.md \
                           @TOPDIR@/lib/metrics \
                           @TOPDIR@/lib/node \
                           @TOPDIR@/lib/net \
+                          @TOPDIR@/lib/pcapng \
                           @TOPDIR@/lib/pci \
                           @TOPDIR@/lib/pdump \
                           @TOPDIR@/lib/pipeline \
index c31bac5..9fc0dc9 100644 (file)
@@ -1,32 +1,32 @@
 ..  SPDX-License-Identifier: BSD-3-Clause
-    Copyright(c) 2017 Intel Corporation.
+    Copyright(c) 2017-2021 Intel Corporation.
 
-DPDK pdump Library and pdump Tool
-=================================
+DPDK packet capture libraries and tools
+=======================================
 
 This document describes how the Data Plane Development Kit (DPDK) Packet
 Capture Framework is used for capturing packets on DPDK ports. It is intended
 for users of DPDK who want to know more about the Packet Capture feature and
 for those who want to monitor traffic on DPDK-controlled devices.
 
-The DPDK packet capture framework was introduced in DPDK v16.07. The DPDK
-packet capture framework consists of the DPDK pdump library and DPDK pdump
-tool.
-
+The DPDK packet capture framework was introduced in DPDK v16.07
+and enhanced in 21.11.
+The DPDK packet capture framework consists of the libraries
+for collecting packets ``librte_pdump``
+and writing packets to a file ``librte_pcapng``.
+There is an application: ``dpdk-pdump``.
 
 Introduction
 ------------
 
-The :ref:`librte_pdump <pdump_library>` library provides the APIs required to
-allow users to initialize the packet capture framework and to enable or
-disable packet capture. The library works on a multi process communication model and its
-usage is recommended for debugging purposes.
+The :doc:`librte_pdump <../prog_guide/pdump_lib>` library provides the API
+required to allow users to initialize the packet capture framework
+and to enable or disable packet capture.
+The library works on a multi-process communication model
+and its usage is recommended for debugging purposes.
 
-The :ref:`dpdk-pdump <pdump_tool>` tool is developed based on the
-``librte_pdump`` library.  It runs as a DPDK secondary process and is capable
-of enabling or disabling packet capture on DPDK ports. The ``dpdk-pdump`` tool
-provides command-line options with which users can request enabling or
-disabling of the packet capture on DPDK ports.
+The :doc:`librte_pcapng <../prog_guide/pcapng_lib>` library provides the API
+to format packets and write them to a file in Pcapng format.
 
 The application which initializes the packet capture framework will be a primary process
 and the application that enables or disables the packet capture will
index 89af28d..a8e8e75 100644 (file)
@@ -44,6 +44,7 @@ Programmer's Guide
     ip_fragment_reassembly_lib
     generic_receive_offload_lib
     generic_segmentation_offload_lib
+    pcapng_lib
     pdump_lib
     multi_proc_support
     kernel_nic_interface
diff --git a/doc/guides/prog_guide/pcapng_lib.rst b/doc/guides/prog_guide/pcapng_lib.rst
new file mode 100644 (file)
index 0000000..cc3eccc
--- /dev/null
@@ -0,0 +1,47 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(c) 2021 Microsoft Corporation
+
+Packet Capture Next Generation Library
+======================================
+
+Exchanging packet traces becomes more and more critical every day.
+The de facto standard for this is the format define by libpcap;
+but that format is rather old and is lacking in functionality
+for more modern applications.
+The `Pcapng file format`_ is the default capture file format
+for modern network capture processing tools
+such as `wireshark`_ (can also be read by `tcpdump`_).
+
+The Pcapng library is a an API for formatting packet data
+into a Pcapng file.
+The format conforms to the current `Pcapng RFC`_ standard.
+It is designed to be integrated with the packet capture library.
+
+Usage
+-----
+
+Before the library can be used, the function ``rte_pcapng_init``
+should be called once to initialize timestamp computation.
+
+The output stream is created with ``rte_pcapng_fdopen``,
+and should be closed with ``rte_pcapng_close``.
+
+The library requires a DPDK mempool to allocate mbufs.
+The mbufs need to be able to accommodate additional space
+for the pcapng packet format header and trailer information;
+the function ``rte_pcapng_mbuf_size`` should be used
+to determine the lower bound based on MTU.
+
+Collecting packets is done in two parts.
+The function ``rte_pcapng_copy`` is used to format and copy mbuf data
+and ``rte_pcapng_write_packets`` writes a burst of packets to the output file.
+
+The function ``rte_pcapng_write_stats`` can be used
+to write statistics information into the output file.
+The summary statistics information is automatically added
+by ``rte_pcapng_close``.
+
+.. _Tcpdump: https://tcpdump.org/
+.. _Wireshark: https://wireshark.org/
+.. _Pcapng file format: https://github.com/pcapng/pcapng/
+.. _Pcapng RFC: https://datatracker.ietf.org/doc/html/draft-tuexen-opsawg-pcapng
index a0ad309..0d2e8a6 100644 (file)
@@ -240,6 +240,10 @@ New Features
   * Added tests to verify tunnel header verification in IPsec inbound.
   * Added tests to verify inner checksum.
 
+* **Revised packet capture framework.**
+
+  * New library for writing pcapng packet capture files.
+
 
 Removed Items
 -------------
index 5aa1be5..484b1da 100644 (file)
@@ -41,6 +41,7 @@ libraries = [
         'latencystats',
         'lpm',
         'member',
+        'pcapng',
         'power',
         'pdump',
         'rawdev',
diff --git a/lib/pcapng/meson.build b/lib/pcapng/meson.build
new file mode 100644 (file)
index 0000000..4549925
--- /dev/null
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2019 Microsoft Corporation
+
+sources = files('rte_pcapng.c')
+headers = files('rte_pcapng.h')
+
+deps += ['ethdev']
diff --git a/lib/pcapng/pcapng_proto.h b/lib/pcapng/pcapng_proto.h
new file mode 100644 (file)
index 0000000..47161d8
--- /dev/null
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019-2020 Microsoft Corporation
+ *
+ * PCAP Next Generation Capture File writer
+ *
+ * See: https://github.com/pcapng/pcapng/ for the file format.
+ */
+
+enum pcapng_block_types {
+       PCAPNG_INTERFACE_BLOCK          = 1,
+       PCAPNG_PACKET_BLOCK,            /* Obsolete */
+       PCAPNG_SIMPLE_PACKET_BLOCK,
+       PCAPNG_NAME_RESOLUTION_BLOCK,
+       PCAPNG_INTERFACE_STATS_BLOCK,
+       PCAPNG_ENHANCED_PACKET_BLOCK,
+
+       PCAPNG_SECTION_BLOCK            = 0x0A0D0D0A,
+};
+
+struct pcapng_option {
+       uint16_t code;
+       uint16_t length;
+       uint8_t data[];
+};
+
+#define PCAPNG_BYTE_ORDER_MAGIC 0x1A2B3C4D
+#define PCAPNG_MAJOR_VERS 1
+#define PCAPNG_MINOR_VERS 0
+
+enum pcapng_opt {
+       PCAPNG_OPT_END  = 0,
+       PCAPNG_OPT_COMMENT = 1,
+};
+
+struct pcapng_section_header {
+       uint32_t block_type;
+       uint32_t block_length;
+       uint32_t byte_order_magic;
+       uint16_t major_version;
+       uint16_t minor_version;
+       uint64_t section_length;
+};
+
+enum pcapng_section_opt {
+       PCAPNG_SHB_HARDWARE = 2,
+       PCAPNG_SHB_OS       = 3,
+       PCAPNG_SHB_USERAPPL = 4,
+};
+
+struct pcapng_interface_block {
+       uint32_t block_type;    /* 1 */
+       uint32_t block_length;
+       uint16_t link_type;
+       uint16_t reserved;
+       uint32_t snap_len;
+};
+
+enum pcapng_interface_options {
+       PCAPNG_IFB_NAME  = 2,
+       PCAPNG_IFB_DESCRIPTION,
+       PCAPNG_IFB_IPV4ADDR,
+       PCAPNG_IFB_IPV6ADDR,
+       PCAPNG_IFB_MACADDR,
+       PCAPNG_IFB_EUIADDR,
+       PCAPNG_IFB_SPEED,
+       PCAPNG_IFB_TSRESOL,
+       PCAPNG_IFB_TZONE,
+       PCAPNG_IFB_FILTER,
+       PCAPNG_IFB_OS,
+       PCAPNG_IFB_FCSLEN,
+       PCAPNG_IFB_TSOFFSET,
+       PCAPNG_IFB_HARDWARE,
+};
+
+struct pcapng_enhance_packet_block {
+       uint32_t block_type;    /* 6 */
+       uint32_t block_length;
+       uint32_t interface_id;
+       uint32_t timestamp_hi;
+       uint32_t timestamp_lo;
+       uint32_t capture_length;
+       uint32_t original_length;
+};
+
+/* Flags values */
+#define PCAPNG_IFB_INBOUND   0b01
+#define PCAPNG_IFB_OUTBOUND  0b10
+
+enum pcapng_epb_options {
+       PCAPNG_EPB_FLAGS = 2,
+       PCAPNG_EPB_HASH,
+       PCAPNG_EPB_DROPCOUNT,
+       PCAPNG_EPB_PACKETID,
+       PCAPNG_EPB_QUEUE,
+       PCAPNG_EPB_VERDICT,
+};
+
+enum pcapng_epb_hash {
+       PCAPNG_HASH_2COMP = 0,
+       PCAPNG_HASH_XOR,
+       PCAPNG_HASH_CRC32,
+       PCAPNG_HASH_MD5,
+       PCAPNG_HASH_SHA1,
+       PCAPNG_HASH_TOEPLITZ,
+};
+
+struct pcapng_simple_packet {
+       uint32_t block_type;    /* 3 */
+       uint32_t block_length;
+       uint32_t packet_length;
+};
+
+struct pcapng_statistics {
+       uint32_t block_type;    /* 5 */
+       uint32_t block_length;
+       uint32_t interface_id;
+       uint32_t timestamp_hi;
+       uint32_t timestamp_lo;
+};
+
+enum pcapng_isb_options {
+       PCAPNG_ISB_STARTTIME = 2,
+       PCAPNG_ISB_ENDTIME,
+       PCAPNG_ISB_IFRECV,
+       PCAPNG_ISB_IFDROP,
+       PCAPNG_ISB_FILTERACCEPT,
+       PCAPNG_ISB_OSDROP,
+       PCAPNG_ISB_USRDELIV,
+};
diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
new file mode 100644 (file)
index 0000000..3a399de
--- /dev/null
@@ -0,0 +1,607 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Microsoft Corporation
+ */
+
+#include <errno.h>
+#include <net/if.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_dev.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_ether.h>
+#include <rte_mbuf.h>
+#include <rte_pcapng.h>
+#include <rte_time.h>
+
+#include "pcapng_proto.h"
+
+/* conversion from DPDK speed to PCAPNG */
+#define PCAPNG_MBPS_SPEED 1000000ull
+
+/* Format of the capture file handle */
+struct rte_pcapng {
+       int  outfd;             /* output file */
+       /* DPDK port id to interface index in file */
+       uint32_t port_index[RTE_MAX_ETHPORTS];
+};
+
+/* For converting TSC cycles to PCAPNG ns format */
+struct pcapng_time {
+       uint64_t ns;
+       uint64_t cycles;
+} pcapng_time;
+
+RTE_INIT(pcapng_init)
+{
+       struct timespec ts;
+
+       pcapng_time.cycles = rte_get_tsc_cycles();
+       clock_gettime(CLOCK_REALTIME, &ts);
+       pcapng_time.ns = rte_timespec_to_ns(&ts);
+}
+
+/* PCAPNG timestamps are in nanoseconds */
+static uint64_t pcapng_tsc_to_ns(uint64_t cycles)
+{
+       uint64_t delta;
+
+       delta = cycles - pcapng_time.cycles;
+       return pcapng_time.ns + (delta * NSEC_PER_SEC) / rte_get_tsc_hz();
+}
+
+/* length of option including padding */
+static uint16_t pcapng_optlen(uint16_t len)
+{
+       return RTE_ALIGN(sizeof(struct pcapng_option) + len,
+                        sizeof(uint32_t));
+}
+
+/* build TLV option and return location of next */
+static struct pcapng_option *
+pcapng_add_option(struct pcapng_option *popt, uint16_t code,
+                 const void *data, uint16_t len)
+{
+       popt->code = code;
+       popt->length = len;
+       memcpy(popt->data, data, len);
+
+       return (struct pcapng_option *)((uint8_t *)popt + pcapng_optlen(len));
+}
+
+/*
+ * Write required initial section header describing the capture
+ */
+static int
+pcapng_section_block(rte_pcapng_t *self,
+                   const char *os, const char *hw,
+                   const char *app, const char *comment)
+{
+       struct pcapng_section_header *hdr;
+       struct pcapng_option *opt;
+       void *buf;
+       uint32_t len;
+       ssize_t cc;
+
+       len = sizeof(*hdr);
+       if (hw)
+               len += pcapng_optlen(strlen(hw));
+       if (os)
+               len += pcapng_optlen(strlen(os));
+       if (app)
+               len += pcapng_optlen(strlen(app));
+       if (comment)
+               len += pcapng_optlen(strlen(comment));
+
+       /* reserve space for OPT_END */
+       len += pcapng_optlen(0);
+       len += sizeof(uint32_t);
+
+       buf = calloc(1, len);
+       if (!buf)
+               return -1;
+
+       hdr = (struct pcapng_section_header *)buf;
+       *hdr = (struct pcapng_section_header) {
+               .block_type = PCAPNG_SECTION_BLOCK,
+               .block_length = len,
+               .byte_order_magic = PCAPNG_BYTE_ORDER_MAGIC,
+               .major_version = PCAPNG_MAJOR_VERS,
+               .minor_version = PCAPNG_MINOR_VERS,
+               .section_length = UINT64_MAX,
+       };
+
+       /* After the section header insert variable length options. */
+       opt = (struct pcapng_option *)(hdr + 1);
+       if (comment)
+               opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
+                                       comment, strlen(comment));
+       if (hw)
+               opt = pcapng_add_option(opt, PCAPNG_SHB_HARDWARE,
+                                       hw, strlen(hw));
+       if (os)
+               opt = pcapng_add_option(opt, PCAPNG_SHB_OS,
+                                       os, strlen(os));
+       if (app)
+               opt = pcapng_add_option(opt, PCAPNG_SHB_USERAPPL,
+                                       app, strlen(app));
+
+       /* The standard requires last option to be OPT_END */
+       opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
+
+       /* clone block_length after option */
+       memcpy(opt, &hdr->block_length, sizeof(uint32_t));
+
+       cc = write(self->outfd, buf, len);
+       free(buf);
+
+       return cc;
+}
+
+/* Write an interface block for a DPDK port */
+static int
+pcapng_add_interface(rte_pcapng_t *self, uint16_t port)
+{
+       struct pcapng_interface_block *hdr;
+       struct rte_eth_dev_info dev_info;
+       struct rte_ether_addr *ea, macaddr;
+       const struct rte_device *dev;
+       struct rte_eth_link link;
+       struct pcapng_option *opt;
+       const uint8_t tsresol = 9;      /* nanosecond resolution */
+       uint32_t len;
+       void *buf;
+       char ifname[IF_NAMESIZE];
+       char ifhw[256];
+       uint64_t speed = 0;
+
+       if (rte_eth_dev_info_get(port, &dev_info) < 0)
+               return -1;
+
+       /* make something like an interface name */
+       if (if_indextoname(dev_info.if_index, ifname) == NULL)
+               snprintf(ifname, IF_NAMESIZE, "dpdk:%u", port);
+
+       /* make a useful device hardware string */
+       dev = dev_info.device;
+       if (dev)
+               snprintf(ifhw, sizeof(ifhw),
+                        "%s-%s", dev->bus->name, dev->name);
+
+       /* DPDK reports in units of Mbps */
+       rte_eth_link_get(port, &link);
+       if (link.link_status == ETH_LINK_UP)
+               speed = link.link_speed * PCAPNG_MBPS_SPEED;
+
+       if (rte_eth_macaddr_get(port, &macaddr) < 0)
+               ea = NULL;
+       else
+               ea = &macaddr;
+
+       /* Compute length of interface block options */
+       len = sizeof(*hdr);
+
+       len += pcapng_optlen(sizeof(tsresol));  /* timestamp */
+       len += pcapng_optlen(strlen(ifname));   /* ifname */
+
+       if (ea)
+               len += pcapng_optlen(RTE_ETHER_ADDR_LEN); /* macaddr */
+       if (speed != 0)
+               len += pcapng_optlen(sizeof(uint64_t));
+       if (dev)
+               len += pcapng_optlen(strlen(ifhw));
+
+       len += pcapng_optlen(0);
+       len += sizeof(uint32_t);
+
+       buf = alloca(len);
+       if (!buf)
+               return -1;
+
+       hdr = (struct pcapng_interface_block *)buf;
+       *hdr = (struct pcapng_interface_block) {
+               .block_type = PCAPNG_INTERFACE_BLOCK,
+               .link_type = 1,         /* DLT_EN10MB - Ethernet */
+               .block_length = len,
+       };
+
+       opt = (struct pcapng_option *)(hdr + 1);
+       opt = pcapng_add_option(opt, PCAPNG_IFB_TSRESOL,
+                               &tsresol, sizeof(tsresol));
+       opt = pcapng_add_option(opt, PCAPNG_IFB_NAME,
+                               ifname, strlen(ifname));
+       if (ea)
+               opt = pcapng_add_option(opt, PCAPNG_IFB_MACADDR,
+                                       ea, RTE_ETHER_ADDR_LEN);
+       if (speed != 0)
+               opt = pcapng_add_option(opt, PCAPNG_IFB_SPEED,
+                                        &speed, sizeof(uint64_t));
+       if (dev)
+               opt = pcapng_add_option(opt, PCAPNG_IFB_HARDWARE,
+                                        ifhw, strlen(ifhw));
+       opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
+
+       /* clone block_length after optionsa */
+       memcpy(opt, &hdr->block_length, sizeof(uint32_t));
+
+       return write(self->outfd, buf, len);
+}
+
+/*
+ * Write the list of possible interfaces at the start
+ * of the file.
+ */
+static int
+pcapng_interfaces(rte_pcapng_t *self)
+{
+       uint16_t port_id;
+       uint16_t index = 0;
+
+       RTE_ETH_FOREACH_DEV(port_id) {
+               /* The list if ports in pcapng needs to be contiguous */
+               self->port_index[port_id] = index++;
+               if (pcapng_add_interface(self, port_id) < 0)
+                       return -1;
+       }
+       return 0;
+}
+
+/*
+ * Write an Interface statistics block at the end of capture.
+ */
+ssize_t
+rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
+                      const char *comment,
+                      uint64_t start_time, uint64_t end_time,
+                      uint64_t ifrecv, uint64_t ifdrop)
+{
+       struct pcapng_statistics *hdr;
+       struct pcapng_option *opt;
+       uint32_t optlen, len;
+       uint8_t *buf;
+       uint64_t ns;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+       optlen = 0;
+
+       if (ifrecv != UINT64_MAX)
+               optlen += pcapng_optlen(sizeof(ifrecv));
+       if (ifdrop != UINT64_MAX)
+               optlen += pcapng_optlen(sizeof(ifdrop));
+       if (start_time != 0)
+               optlen += pcapng_optlen(sizeof(start_time));
+       if (end_time != 0)
+               optlen += pcapng_optlen(sizeof(end_time));
+       if (comment)
+               optlen += pcapng_optlen(strlen(comment));
+       if (optlen != 0)
+               optlen += pcapng_optlen(0);
+
+       len = sizeof(*hdr) + optlen + sizeof(uint32_t);
+       buf = alloca(len);
+       if (buf == NULL)
+               return -1;
+
+       hdr = (struct pcapng_statistics *)buf;
+       opt = (struct pcapng_option *)(hdr + 1);
+
+       if (comment)
+               opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
+                                       comment, strlen(comment));
+       if (start_time != 0)
+               opt = pcapng_add_option(opt, PCAPNG_ISB_STARTTIME,
+                                        &start_time, sizeof(start_time));
+       if (end_time != 0)
+               opt = pcapng_add_option(opt, PCAPNG_ISB_ENDTIME,
+                                        &end_time, sizeof(end_time));
+       if (ifrecv != UINT64_MAX)
+               opt = pcapng_add_option(opt, PCAPNG_ISB_IFRECV,
+                               &ifrecv, sizeof(ifrecv));
+       if (ifdrop != UINT64_MAX)
+               opt = pcapng_add_option(opt, PCAPNG_ISB_IFDROP,
+                               &ifdrop, sizeof(ifdrop));
+       if (optlen != 0)
+               opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
+
+       hdr->block_type = PCAPNG_INTERFACE_STATS_BLOCK;
+       hdr->block_length = len;
+       hdr->interface_id = self->port_index[port_id];
+
+       ns = pcapng_tsc_to_ns(rte_get_tsc_cycles());
+       hdr->timestamp_hi = ns >> 32;
+       hdr->timestamp_lo = (uint32_t)ns;
+
+       /* clone block_length after option */
+       memcpy(opt, &len, sizeof(uint32_t));
+
+       return write(self->outfd, buf, len);
+}
+
+uint32_t
+rte_pcapng_mbuf_size(uint32_t length)
+{
+       /* The VLAN and EPB header must fit in the mbuf headroom. */
+       RTE_ASSERT(sizeof(struct pcapng_enhance_packet_block) +
+                  sizeof(struct rte_vlan_hdr) <= RTE_PKTMBUF_HEADROOM);
+
+       /* The flags and queue information are added at the end. */
+       return sizeof(struct rte_mbuf)
+               + RTE_ALIGN(length, sizeof(uint32_t))
+               + pcapng_optlen(sizeof(uint32_t)) /* flag option */
+               + pcapng_optlen(sizeof(uint32_t)) /* queue option */
+               + sizeof(uint32_t);               /*  length */
+}
+
+/* More generalized version rte_vlan_insert() */
+static int
+pcapng_vlan_insert(struct rte_mbuf *m, uint16_t ether_type, uint16_t tci)
+{
+       struct rte_ether_hdr *nh, *oh;
+       struct rte_vlan_hdr *vh;
+
+       if (!RTE_MBUF_DIRECT(m) || rte_mbuf_refcnt_read(m) > 1)
+               return -EINVAL;
+
+       if (rte_pktmbuf_data_len(m) < sizeof(*oh))
+               return -EINVAL;
+
+       oh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+       nh = (struct rte_ether_hdr *)
+               rte_pktmbuf_prepend(m, sizeof(struct rte_vlan_hdr));
+       if (nh == NULL)
+               return -ENOSPC;
+
+       memmove(nh, oh, 2 * RTE_ETHER_ADDR_LEN);
+       nh->ether_type = rte_cpu_to_be_16(ether_type);
+
+       vh = (struct rte_vlan_hdr *) (nh + 1);
+       vh->vlan_tci = rte_cpu_to_be_16(tci);
+
+       return 0;
+}
+
+/*
+ *   The mbufs created use the Pcapng standard enhanced packet  block.
+ *
+ *                         1                   2                   3
+ *     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  0 |                    Block Type = 0x00000006                    |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  4 |                      Block Total Length                       |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  8 |                         Interface ID                          |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * 12 |                        Timestamp (High)                       |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * 16 |                        Timestamp (Low)                        |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * 20 |                    Captured Packet Length                     |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * 24 |                    Original Packet Length                     |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * 28 /                                                               /
+ *    /                          Packet Data                          /
+ *    /              variable length, padded to 32 bits               /
+ *    /                                                               /
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |      Option Code = 0x0002     |     Option Length = 0x004     |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |              Flags (direction)                                |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |      Option Code = 0x0006     |     Option Length = 0x002     |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |              Queue id                                         |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |                      Block Total Length                       |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+/* Make a copy of original mbuf with pcapng header and options */
+struct rte_mbuf *
+rte_pcapng_copy(uint16_t port_id, uint32_t queue,
+               const struct rte_mbuf *md,
+               struct rte_mempool *mp,
+               uint32_t length, uint64_t cycles,
+               enum rte_pcapng_direction direction)
+{
+       struct pcapng_enhance_packet_block *epb;
+       uint32_t orig_len, data_len, padding, flags;
+       struct pcapng_option *opt;
+       const uint16_t optlen = pcapng_optlen(sizeof(flags)) + pcapng_optlen(sizeof(queue));
+       struct rte_mbuf *mc;
+       uint64_t ns;
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
+#endif
+       ns = pcapng_tsc_to_ns(cycles);
+
+       orig_len = rte_pktmbuf_pkt_len(md);
+
+       /* Take snapshot of the data */
+       mc = rte_pktmbuf_copy(md, mp, 0, length);
+       if (unlikely(mc == NULL))
+               return NULL;
+
+       /* Expand any offloaded VLAN information */
+       if ((direction == RTE_PCAPNG_DIRECTION_IN &&
+            (md->ol_flags & PKT_RX_VLAN_STRIPPED)) ||
+           (direction == RTE_PCAPNG_DIRECTION_OUT &&
+            (md->ol_flags & PKT_TX_VLAN))) {
+               if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_VLAN,
+                                      md->vlan_tci) != 0)
+                       goto fail;
+       }
+
+       if ((direction == RTE_PCAPNG_DIRECTION_IN &&
+            (md->ol_flags & PKT_RX_QINQ_STRIPPED)) ||
+           (direction == RTE_PCAPNG_DIRECTION_OUT &&
+            (md->ol_flags & PKT_TX_QINQ))) {
+               if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_QINQ,
+                                      md->vlan_tci_outer) != 0)
+                       goto fail;
+       }
+
+       /* pad the packet to 32 bit boundary */
+       data_len = rte_pktmbuf_data_len(mc);
+       padding = RTE_ALIGN(data_len, sizeof(uint32_t)) - data_len;
+       if (padding > 0) {
+               void *tail = rte_pktmbuf_append(mc, padding);
+
+               if (tail == NULL)
+                       goto fail;
+               memset(tail, 0, padding);
+       }
+
+       /* reserve trailing options and block length */
+       opt = (struct pcapng_option *)
+               rte_pktmbuf_append(mc, optlen + sizeof(uint32_t));
+       if (unlikely(opt == NULL))
+               goto fail;
+
+       switch (direction) {
+       case RTE_PCAPNG_DIRECTION_IN:
+               flags = PCAPNG_IFB_INBOUND;
+               break;
+       case RTE_PCAPNG_DIRECTION_OUT:
+               flags = PCAPNG_IFB_OUTBOUND;
+               break;
+       default:
+               flags = 0;
+       }
+
+       opt = pcapng_add_option(opt, PCAPNG_EPB_FLAGS,
+                               &flags, sizeof(flags));
+
+       opt = pcapng_add_option(opt, PCAPNG_EPB_QUEUE,
+                               &queue, sizeof(queue));
+
+       /* Note: END_OPT necessary here. Wireshark doesn't do it. */
+
+       /* Add PCAPNG packet header */
+       epb = (struct pcapng_enhance_packet_block *)
+               rte_pktmbuf_prepend(mc, sizeof(*epb));
+       if (unlikely(epb == NULL))
+               goto fail;
+
+       epb->block_type = PCAPNG_ENHANCED_PACKET_BLOCK;
+       epb->block_length = rte_pktmbuf_data_len(mc);
+
+       /* Interface index is filled in later during write */
+       mc->port = port_id;
+
+       epb->timestamp_hi = ns >> 32;
+       epb->timestamp_lo = (uint32_t)ns;
+       epb->capture_length = data_len;
+       epb->original_length = orig_len;
+
+       /* set trailer of block length */
+       *(uint32_t *)opt = epb->block_length;
+
+       return mc;
+
+fail:
+       rte_pktmbuf_free(mc);
+       return NULL;
+}
+
+/* Count how many segments are in this array of mbufs */
+static unsigned int
+mbuf_burst_segs(struct rte_mbuf *pkts[], unsigned int n)
+{
+       unsigned int i, iovcnt;
+
+       for (iovcnt = 0, i = 0; i < n; i++) {
+               const struct rte_mbuf *m = pkts[i];
+
+               __rte_mbuf_sanity_check(m, 1);
+
+               iovcnt += m->nb_segs;
+       }
+       return iovcnt;
+}
+
+/* Write pre-formatted packets to file. */
+ssize_t
+rte_pcapng_write_packets(rte_pcapng_t *self,
+                        struct rte_mbuf *pkts[], uint16_t nb_pkts)
+{
+       int iovcnt = mbuf_burst_segs(pkts, nb_pkts);
+       struct iovec iov[iovcnt];
+       unsigned int i, cnt;
+       ssize_t ret;
+
+       for (i = cnt = 0; i < nb_pkts; i++) {
+               struct rte_mbuf *m = pkts[i];
+               struct pcapng_enhance_packet_block *epb;
+
+               /* sanity check that is really a pcapng mbuf */
+               epb = rte_pktmbuf_mtod(m, struct pcapng_enhance_packet_block *);
+               if (unlikely(epb->block_type != PCAPNG_ENHANCED_PACKET_BLOCK ||
+                            epb->block_length != rte_pktmbuf_data_len(m))) {
+                       rte_errno = EINVAL;
+                       return -1;
+               }
+
+               /*
+                * The DPDK port is recorded during pcapng_copy.
+                * Map that to PCAPNG interface in file.
+                */
+               epb->interface_id = self->port_index[m->port];
+               do {
+                       iov[cnt].iov_base = rte_pktmbuf_mtod(m, void *);
+                       iov[cnt].iov_len = rte_pktmbuf_data_len(m);
+                       ++cnt;
+               } while ((m = m->next));
+       }
+
+       ret = writev(self->outfd, iov, iovcnt);
+       if (unlikely(ret < 0))
+               rte_errno = errno;
+       return ret;
+}
+
+/* Create new pcapng writer handle */
+rte_pcapng_t *
+rte_pcapng_fdopen(int fd,
+                 const char *osname, const char *hardware,
+                 const char *appname, const char *comment)
+{
+       rte_pcapng_t *self;
+
+       self = malloc(sizeof(*self));
+       if (!self) {
+               rte_errno = ENOMEM;
+               return NULL;
+       }
+
+       self->outfd = fd;
+
+       if (pcapng_section_block(self, osname, hardware, appname, comment) < 0)
+               goto fail;
+
+       if (pcapng_interfaces(self) < 0)
+               goto fail;
+
+       return self;
+fail:
+       free(self);
+       return NULL;
+}
+
+void
+rte_pcapng_close(rte_pcapng_t *self)
+{
+       close(self->outfd);
+       free(self);
+}
diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h
new file mode 100644 (file)
index 0000000..8d3fbb1
--- /dev/null
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Microsoft Corporation
+ */
+
+/**
+ * @file
+ * RTE pcapng
+ *
+ * @warning
+ * @b EXPERIMENTAL:
+ * All functions in this file may be changed or removed without prior notice.
+ *
+ * Pcapng is an evolution from the pcap format, created to address some of
+ * its deficiencies. Namely, the lack of extensibility and inability to store
+ * additional information.
+ *
+ * For details about the file format see RFC:
+ *   https://www.ietf.org/id/draft-tuexen-opsawg-pcapng-03.html
+ *  and
+ *    https://github.com/pcapng/pcapng/
+ */
+
+#ifndef _RTE_PCAPNG_H_
+#define _RTE_PCAPNG_H_
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <rte_compat.h>
+#include <rte_common.h>
+#include <rte_mempool.h>
+#include <rte_ring.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Opaque handle used for functions in this library. */
+typedef struct rte_pcapng rte_pcapng_t;
+
+/**
+ * Write data to existing open file
+ *
+ * @param fd
+ *   file descriptor
+ * @param osname
+ *   Optional description of the operating system.
+ *   Examples: "Debian 11", "Windows Server 22"
+ * @param hardware
+ *   Optional description of the hardware used to create this file.
+ *   Examples: "x86 Virtual Machine"
+ * @param appname
+ *   Optional: application name recorded in the pcapng file.
+ *   Example: "dpdk-dumpcap 1.0 (DPDK 20.11)"
+ * @param comment
+ *   Optional comment to add to file header.
+ * @return
+ *   handle to library, or NULL in case of error (and rte_errno is set).
+ */
+__rte_experimental
+rte_pcapng_t *
+rte_pcapng_fdopen(int fd,
+                 const char *osname, const char *hardware,
+                 const char *appname, const char *comment);
+
+/**
+ * Close capture file
+ *
+ * @param self
+ *  handle to library
+ */
+__rte_experimental
+void
+rte_pcapng_close(rte_pcapng_t *self);
+
+/**
+ * Direction flag
+ * These should match Enhanced Packet Block flag bits
+ */
+enum rte_pcapng_direction {
+       RTE_PCAPNG_DIRECTION_UNKNOWN = 0,
+       RTE_PCAPNG_DIRECTION_IN  = 1,
+       RTE_PCAPNG_DIRECTION_OUT = 2,
+};
+
+/**
+ * Format an mbuf for writing to file.
+ *
+ * @param port_id
+ *   The Ethernet port on which packet was received
+ *   or is going to be transmitted.
+ * @param queue
+ *   The queue on the Ethernet port where packet was received
+ *   or is going to be transmitted.
+ * @param mp
+ *   The mempool from which the "clone" mbufs are allocated.
+ * @param m
+ *   The mbuf to copy
+ * @param length
+ *   The upper limit on bytes to copy.  Passing UINT32_MAX
+ *   means all data (after offset).
+ * @param timestamp
+ *   The timestamp in TSC cycles.
+ * @param direction
+ *   The direction of the packer: receive, transmit or unknown.
+ *
+ * @return
+ *   - The pointer to the new mbuf formatted for pcapng_write
+ *   - NULL if allocation fails.
+ *
+ */
+__rte_experimental
+struct rte_mbuf *
+rte_pcapng_copy(uint16_t port_id, uint32_t queue,
+               const struct rte_mbuf *m, struct rte_mempool *mp,
+               uint32_t length, uint64_t timestamp,
+               enum rte_pcapng_direction direction);
+
+
+/**
+ * Determine optimum mbuf data size.
+ *
+ * @param length
+ *   The largest packet that will be copied.
+ * @return
+ *   The minimum size of mbuf data to handle packet with length bytes.
+ *   Accounting for required header and trailer fields
+ */
+__rte_experimental
+uint32_t
+rte_pcapng_mbuf_size(uint32_t length);
+
+/**
+ * Write packets to the capture file.
+ *
+ * Packets to be captured are copied by rte_pcapng_copy()
+ * and then this function is called to write them to the file.
+ *
+ * @warning
+ * Do not pass original mbufs from transmit or receive
+ * or file will be invalid pcapng format.
+ *
+ * @param self
+ *  The handle to the packet capture file
+ * @param pkts
+ *  The address of an array of *nb_pkts* pointers to *rte_mbuf* structures
+ *  which contain the output packets
+ * @param nb_pkts
+ *  The number of packets to write to the file.
+ * @return
+ *  The number of bytes written to file, -1 on failure to write file.
+ *  The mbuf's in *pkts* are always freed.
+ */
+__rte_experimental
+ssize_t
+rte_pcapng_write_packets(rte_pcapng_t *self,
+                        struct rte_mbuf *pkts[], uint16_t nb_pkts);
+
+/**
+ * Write an Interface statistics block.
+ * For statistics, use 0 if don't know or care to report it.
+ * Should be called before closing capture to report results.
+ *
+ * @param self
+ *  The handle to the packet capture file
+ * @param port
+ *  The Ethernet port to report stats on.
+ * @param comment
+ *   Optional comment to add to statistics.
+ * @param start_time
+ *  The time when packet capture was started in nanoseconds.
+ *  Optional: can be zero if not known.
+ * @param end_time
+ *  The time when packet capture was stopped in nanoseconds.
+ *  Optional: can be zero if not finished;
+ * @param ifrecv
+ *  The number of packets received by capture.
+ *  Optional: use UINT64_MAX if not known.
+ * @param ifdrop
+ *  The number of packets missed by the capture process.
+ *  Optional: use UINT64_MAX if not known.
+ * @return
+ *  number of bytes written to file, -1 on failure to write file
+ */
+__rte_experimental
+ssize_t
+rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port,
+                      const char *comment,
+                      uint64_t start_time, uint64_t end_time,
+                      uint64_t ifrecv, uint64_t ifdrop);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PCAPNG_H_ */
diff --git a/lib/pcapng/version.map b/lib/pcapng/version.map
new file mode 100644 (file)
index 0000000..05a9c86
--- /dev/null
@@ -0,0 +1,12 @@
+EXPERIMENTAL {
+       global:
+
+       rte_pcapng_close;
+       rte_pcapng_copy;
+       rte_pcapng_fdopen;
+       rte_pcapng_mbuf_size;
+       rte_pcapng_write_packets;
+       rte_pcapng_write_stats;
+
+       local: *;
+};