net/pcap: switch Rx timestamp to dynamic mbuf field
[dpdk.git] / drivers / net / pcap / rte_eth_pcap.c
index bfc0756..4e6d493 100644 (file)
 #define RTE_PMD_PCAP_MAX_QUEUES 16
 
 static char errbuf[PCAP_ERRBUF_SIZE];
-static unsigned char tx_pcap_data[RTE_ETH_PCAP_SNAPLEN];
 static struct timeval start_time;
 static uint64_t start_cycles;
 static uint64_t hz;
 static uint8_t iface_idx;
 
+static uint64_t timestamp_rx_dynflag;
+static int timestamp_dynfield_offset = -1;
+
 struct queue_stat {
        volatile unsigned long pkts;
        volatile unsigned long bytes;
@@ -136,7 +138,7 @@ static struct rte_eth_link pmd_link = {
                .link_autoneg = ETH_LINK_FIXED,
 };
 
-static int eth_pcap_logtype;
+RTE_LOG_REGISTER(eth_pcap_logtype, pmd.net.pcap, NOTICE);
 
 #define PMD_LOG(level, fmt, args...) \
        rte_log(RTE_LOG_ ## level, eth_pcap_logtype, \
@@ -180,21 +182,6 @@ eth_pcap_rx_jumbo(struct rte_mempool *mb_pool, struct rte_mbuf *mbuf,
        return mbuf->nb_segs;
 }
 
-/* Copy data from mbuf chain to a buffer suitable for writing to a PCAP file. */
-static void
-eth_pcap_gather_data(unsigned char *data, struct rte_mbuf *mbuf)
-{
-       uint16_t data_len = 0;
-
-       while (mbuf) {
-               rte_memcpy(data + data_len, rte_pktmbuf_mtod(mbuf, void *),
-                       mbuf->data_len);
-
-               data_len += mbuf->data_len;
-               mbuf = mbuf->next;
-       }
-}
-
 static uint16_t
 eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
@@ -281,6 +268,11 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                }
 
                mbuf->pkt_len = (uint16_t)header.caplen;
+               *RTE_MBUF_DYNFIELD(mbuf, timestamp_dynfield_offset,
+                       rte_mbuf_timestamp_t *) =
+                               (uint64_t)header.ts.tv_sec * 1000000 +
+                               header.ts.tv_usec;
+               mbuf->ol_flags |= timestamp_rx_dynflag;
                mbuf->port = pcap_q->port_id;
                bufs[num_rx] = mbuf;
                num_rx++;
@@ -300,6 +292,8 @@ eth_null_rx(void *queue __rte_unused,
        return 0;
 }
 
+#define NSEC_PER_SEC   1000000000L
+
 static inline void
 calculate_timestamp(struct timeval *ts) {
        uint64_t cycles;
@@ -307,8 +301,14 @@ calculate_timestamp(struct timeval *ts) {
 
        cycles = rte_get_timer_cycles() - start_cycles;
        cur_time.tv_sec = cycles / hz;
-       cur_time.tv_usec = (cycles % hz) * 1e6 / hz;
-       timeradd(&start_time, &cur_time, ts);
+       cur_time.tv_usec = (cycles % hz) * NSEC_PER_SEC / hz;
+
+       ts->tv_sec = start_time.tv_sec + cur_time.tv_sec;
+       ts->tv_usec = start_time.tv_usec + cur_time.tv_usec;
+       if (ts->tv_usec >= NSEC_PER_SEC) {
+               ts->tv_usec -= NSEC_PER_SEC;
+               ts->tv_sec += 1;
+       }
 }
 
 /*
@@ -325,6 +325,8 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
        uint32_t tx_bytes = 0;
        struct pcap_pkthdr header;
        pcap_dumper_t *dumper;
+       unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
+       size_t len, caplen;
 
        pp = rte_eth_devices[dumper_q->port_id].process_private;
        dumper = pp->tx_dumper[dumper_q->queue_id];
@@ -336,31 +338,24 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
         * dumper */
        for (i = 0; i < nb_pkts; i++) {
                mbuf = bufs[i];
-               calculate_timestamp(&header.ts);
-               header.len = mbuf->pkt_len;
-               header.caplen = header.len;
-
-               if (likely(mbuf->nb_segs == 1)) {
-                       pcap_dump((u_char *)dumper, &header,
-                                 rte_pktmbuf_mtod(mbuf, void*));
-               } else {
-                       if (mbuf->pkt_len <= RTE_ETHER_MAX_JUMBO_FRAME_LEN) {
-                               eth_pcap_gather_data(tx_pcap_data, mbuf);
-                               pcap_dump((u_char *)dumper, &header,
-                                         tx_pcap_data);
-                       } else {
-                               PMD_LOG(ERR,
-                                       "Dropping PCAP packet. Size (%d) > max jumbo size (%d).",
-                                       mbuf->pkt_len,
-                                       RTE_ETHER_MAX_JUMBO_FRAME_LEN);
-
-                               rte_pktmbuf_free(mbuf);
-                               continue;
-                       }
+               len = caplen = rte_pktmbuf_pkt_len(mbuf);
+               if (unlikely(!rte_pktmbuf_is_contiguous(mbuf) &&
+                               len > sizeof(temp_data))) {
+                       caplen = sizeof(temp_data);
                }
 
+               calculate_timestamp(&header.ts);
+               header.len = len;
+               header.caplen = caplen;
+               /* rte_pktmbuf_read() returns a pointer to the data directly
+                * in the mbuf (when the mbuf is contiguous) or, otherwise,
+                * a pointer to temp_data after copying into it.
+                */
+               pcap_dump((u_char *)dumper, &header,
+                       rte_pktmbuf_read(mbuf, 0, caplen, temp_data));
+
                num_tx++;
-               tx_bytes += mbuf->pkt_len;
+               tx_bytes += caplen;
                rte_pktmbuf_free(mbuf);
        }
 
@@ -415,6 +410,8 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
        uint16_t num_tx = 0;
        uint32_t tx_bytes = 0;
        pcap_t *pcap;
+       unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
+       size_t len;
 
        pp = rte_eth_devices[tx_queue->port_id].process_private;
        pcap = pp->tx_pcap[tx_queue->queue_id];
@@ -424,31 +421,26 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
        for (i = 0; i < nb_pkts; i++) {
                mbuf = bufs[i];
-
-               if (likely(mbuf->nb_segs == 1)) {
-                       ret = pcap_sendpacket(pcap,
-                                       rte_pktmbuf_mtod(mbuf, u_char *),
-                                       mbuf->pkt_len);
-               } else {
-                       if (mbuf->pkt_len <= RTE_ETHER_MAX_JUMBO_FRAME_LEN) {
-                               eth_pcap_gather_data(tx_pcap_data, mbuf);
-                               ret = pcap_sendpacket(pcap,
-                                               tx_pcap_data, mbuf->pkt_len);
-                       } else {
-                               PMD_LOG(ERR,
-                                       "Dropping PCAP packet. Size (%d) > max jumbo size (%d).",
-                                       mbuf->pkt_len,
-                                       RTE_ETHER_MAX_JUMBO_FRAME_LEN);
-
-                               rte_pktmbuf_free(mbuf);
-                               continue;
-                       }
+               len = rte_pktmbuf_pkt_len(mbuf);
+               if (unlikely(!rte_pktmbuf_is_contiguous(mbuf) &&
+                               len > sizeof(temp_data))) {
+                       PMD_LOG(ERR,
+                               "Dropping multi segment PCAP packet. Size (%zd) > max size (%zd).",
+                               len, sizeof(temp_data));
+                       rte_pktmbuf_free(mbuf);
+                       continue;
                }
 
+               /* rte_pktmbuf_read() returns a pointer to the data directly
+                * in the mbuf (when the mbuf is contiguous) or, otherwise,
+                * a pointer to temp_data after copying into it.
+                */
+               ret = pcap_sendpacket(pcap,
+                       rte_pktmbuf_read(mbuf, 0, len, temp_data), len);
                if (unlikely(ret != 0))
                        break;
                num_tx++;
-               tx_bytes += mbuf->pkt_len;
+               tx_bytes += len;
                rte_pktmbuf_free(mbuf);
        }
 
@@ -496,7 +488,8 @@ open_single_tx_pcap(const char *pcap_filename, pcap_dumper_t **dumper)
         * with pcap_dump_open(). We create big enough an Ethernet
         * pcap holder.
         */
-       tx_pcap = pcap_open_dead(DLT_EN10MB, RTE_ETH_PCAP_SNAPSHOT_LEN);
+       tx_pcap = pcap_open_dead_with_tstamp_precision(DLT_EN10MB,
+                       RTE_ETH_PCAP_SNAPSHOT_LEN, PCAP_TSTAMP_PRECISION_NANO);
        if (tx_pcap == NULL) {
                PMD_LOG(ERR, "Couldn't create dead pcap");
                return -1;
@@ -619,7 +612,7 @@ status_up:
  * Is the only place for us to close all the tx streams dumpers.
  * If not called the dumpers will be flushed within each tx burst.
  */
-static void
+static int
 eth_dev_stop(struct rte_eth_dev *dev)
 {
        unsigned int i;
@@ -661,15 +654,26 @@ status_down:
                dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
 
        dev->data->dev_link.link_status = ETH_LINK_DOWN;
+
+       return 0;
 }
 
 static int
 eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
 {
+       int ret;
+
+       ret = rte_mbuf_dyn_rx_timestamp_register(&timestamp_dynfield_offset,
+                       &timestamp_rx_dynflag);
+       if (ret != 0) {
+               PMD_LOG(ERR, "Failed to register Rx timestamp field/flag");
+               return -rte_errno;
+       }
+
        return 0;
 }
 
-static void
+static int
 eth_dev_info(struct rte_eth_dev *dev,
                struct rte_eth_dev_info *dev_info)
 {
@@ -681,6 +685,8 @@ eth_dev_info(struct rte_eth_dev *dev,
        dev_info->max_rx_queues = dev->data->nb_rx_queues;
        dev_info->max_tx_queues = dev->data->nb_tx_queues;
        dev_info->min_rx_bufsize = 0;
+
+       return 0;
 }
 
 static int
@@ -718,7 +724,7 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
        return 0;
 }
 
-static void
+static int
 eth_stats_reset(struct rte_eth_dev *dev)
 {
        unsigned int i;
@@ -734,20 +740,37 @@ eth_stats_reset(struct rte_eth_dev *dev)
                internal->tx_queue[i].tx_stat.bytes = 0;
                internal->tx_queue[i].tx_stat.err_pkts = 0;
        }
+
+       return 0;
 }
 
-static void
+static int
 eth_dev_close(struct rte_eth_dev *dev)
 {
        unsigned int i;
        struct pmd_internals *internals = dev->data->dev_private;
 
+       PMD_LOG(INFO, "Closing pcap ethdev on NUMA socket %d",
+                       rte_socket_id());
+
+       rte_free(dev->process_private);
+
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return 0;
+
        /* Device wide flag, but cleanup must be performed per queue. */
        if (internals->infinite_rx) {
                for (i = 0; i < dev->data->nb_rx_queues; i++) {
                        struct pcap_rx_queue *pcap_q = &internals->rx_queue[i];
                        struct rte_mbuf *pcap_buf;
 
+                       /*
+                        * 'pcap_q->pkts' can be NULL if 'eth_dev_close()'
+                        * called before 'eth_rx_queue_setup()' has been called
+                        */
+                       if (pcap_q->pkts == NULL)
+                               continue;
+
                        while (!rte_ring_dequeue(pcap_q->pkts,
                                        (void **)&pcap_buf))
                                rte_pktmbuf_free(pcap_buf);
@@ -756,6 +779,11 @@ eth_dev_close(struct rte_eth_dev *dev)
                }
        }
 
+       if (internals->phy_mac == 0)
+               /* not dynamically allocated, must not be freed */
+               dev->data->mac_addrs = NULL;
+
+       return 0;
 }
 
 static void
@@ -1142,6 +1170,9 @@ pmd_init_internals(struct rte_vdev_device *vdev,
        data->nb_tx_queues = (uint16_t)nb_tx_queues;
        data->dev_link = pmd_link;
        data->mac_addrs = &(*internals)->eth_addr;
+       data->promiscuous = 1;
+       data->all_multicast = 1;
+       data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
 
        /*
         * NOTE: we'll replace the data element, of originally allocated
@@ -1251,12 +1282,6 @@ eth_from_pcaps_common(struct rte_vdev_device *vdev,
        const unsigned int nb_tx_queues = tx_queues->num_of_queue;
        unsigned int i;
 
-       /* do some parameter checking */
-       if (rx_queues == NULL && nb_rx_queues > 0)
-               return -1;
-       if (tx_queues == NULL && nb_tx_queues > 0)
-               return -1;
-
        if (pmd_init_internals(vdev, nb_rx_queues, nb_tx_queues, internals,
                        eth_dev) < 0)
                return -1;
@@ -1415,7 +1440,8 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
        devargs_all.is_rx_pcap =
                rte_kvargs_count(kvlist, ETH_PCAP_RX_PCAP_ARG) ? 1 : 0;
        devargs_all.is_rx_iface =
-               rte_kvargs_count(kvlist, ETH_PCAP_RX_IFACE_ARG) ? 1 : 0;
+               (rte_kvargs_count(kvlist, ETH_PCAP_RX_IFACE_ARG) +
+                rte_kvargs_count(kvlist, ETH_PCAP_RX_IFACE_IN_ARG)) ? 1 : 0;
        pcaps.num_of_queue = 0;
 
        devargs_all.is_tx_pcap =
@@ -1555,30 +1581,16 @@ free_kvlist:
 static int
 pmd_pcap_remove(struct rte_vdev_device *dev)
 {
-       struct pmd_internals *internals = NULL;
        struct rte_eth_dev *eth_dev = NULL;
 
-       PMD_LOG(INFO, "Closing pcap ethdev on numa socket %d",
-                       rte_socket_id());
-
        if (!dev)
                return -1;
 
-       /* reserve an ethdev entry */
        eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
        if (eth_dev == NULL)
-               return -1;
-
-       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
-               internals = eth_dev->data->dev_private;
-               if (internals != NULL && internals->phy_mac == 0)
-                       /* not dynamically allocated, must not be freed */
-                       eth_dev->data->mac_addrs = NULL;
-       }
+               return 0; /* port already released */
 
        eth_dev_close(eth_dev);
-
-       rte_free(eth_dev->process_private);
        rte_eth_dev_release_port(eth_dev);
 
        return 0;
@@ -1600,10 +1612,3 @@ RTE_PMD_REGISTER_PARAM_STRING(net_pcap,
        ETH_PCAP_IFACE_ARG "=<ifc> "
        ETH_PCAP_PHY_MAC_ARG "=<int>"
        ETH_PCAP_INFINITE_RX_ARG "=<0|1>");
-
-RTE_INIT(eth_pcap_init_log)
-{
-       eth_pcap_logtype = rte_log_register("pmd.net.pcap");
-       if (eth_pcap_logtype >= 0)
-               rte_log_set_level(eth_pcap_logtype, RTE_LOG_NOTICE);
-}