app/testpmd: set packet dump based on verbosity level
[dpdk.git] / app / test-pmd / testpmd.c
index f72560c..b057365 100644 (file)
@@ -27,6 +27,7 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_cycles.h>
+#include <rte_malloc_heap.h>
 #include <rte_memory.h>
 #include <rte_memcpy.h>
 #include <rte_launch.h>
 
 #include "testpmd.h"
 
+#ifndef MAP_HUGETLB
+/* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
+#define HUGE_FLAG (0x40000)
+#else
+#define HUGE_FLAG MAP_HUGETLB
+#endif
+
+#ifndef MAP_HUGE_SHIFT
+/* older kernels (or FreeBSD) will not have this define */
+#define HUGE_SHIFT (26)
+#else
+#define HUGE_SHIFT MAP_HUGE_SHIFT
+#endif
+
+#define EXTMEM_HEAP_NAME "extmem"
+
 uint16_t verbose_level = 0; /**< Silent by default. */
 int testpmd_logtype; /**< Log type for testpmd logs */
 
@@ -88,9 +105,13 @@ uint8_t numa_support = 1; /**< numa enabled by default */
 uint8_t socket_num = UMA_NO_CONFIG;
 
 /*
- * Use ANONYMOUS mapped memory (might be not physically continuous) for mbufs.
+ * Select mempool allocation type:
+ * - native: use regular DPDK memory
+ * - anon: use regular DPDK memory to create mempool, but populate using
+ *         anonymous memory (may not be IOVA-contiguous)
+ * - xmem: use externally allocated hugepage memory
  */
-uint8_t mp_anon = 0;
+uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
 
 /*
  * Store specified sockets on which memory pool to be used by ports
@@ -127,6 +148,8 @@ portid_t nb_ports;             /**< Number of probed ethernet ports. */
 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
 
+portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
+
 /*
  * Test Forwarding Configuration.
  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
@@ -155,9 +178,9 @@ struct fwd_engine * fwd_engines[] = {
        &tx_only_engine,
        &csum_fwd_engine,
        &icmp_echo_engine,
-#if defined RTE_LIBRTE_PMD_SOFTNIC && defined RTE_LIBRTE_SCHED
-       &softnic_tm_engine,
-       &softnic_tm_bypass_engine,
+       &noisy_vnf_engine,
+#if defined RTE_LIBRTE_PMD_SOFTNIC
+       &softnic_fwd_engine,
 #endif
 #ifdef RTE_LIBRTE_IEEE1588
        &ieee1588_fwd_engine,
@@ -251,6 +274,40 @@ int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
  */
 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
 
+/*
+ * Configurable value of buffered packets before sending.
+ */
+uint16_t noisy_tx_sw_bufsz;
+
+/*
+ * Configurable value of packet buffer timeout.
+ */
+uint16_t noisy_tx_sw_buf_flush_time;
+
+/*
+ * Configurable value for size of VNF internal memory area
+ * used for simulating noisy neighbour behaviour
+ */
+uint64_t noisy_lkup_mem_sz;
+
+/*
+ * Configurable value of number of random writes done in
+ * VNF simulation memory area.
+ */
+uint64_t noisy_lkup_num_writes;
+
+/*
+ * Configurable value of number of random reads done in
+ * VNF simulation memory area.
+ */
+uint64_t noisy_lkup_num_reads;
+
+/*
+ * Configurable value of number of random reads/writes done in
+ * VNF simulation memory area.
+ */
+uint64_t noisy_lkup_num_reads_writes;
+
 /*
  * Receive Side Scaling (RSS) configuration.
  */
@@ -333,8 +390,6 @@ lcoreid_t latencystats_lcore_id = -1;
  */
 struct rte_eth_rxmode rx_mode = {
        .max_rx_pkt_len = ETHER_MAX_LEN, /**< Default maximum frame length. */
-       .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
-       .ignore_offload_bitfield = 1,
 };
 
 struct rte_eth_txmode tx_mode = {
@@ -346,7 +401,7 @@ struct rte_fdir_conf fdir_conf = {
        .pballoc = RTE_FDIR_PBALLOC_64K,
        .status = RTE_FDIR_REPORT_STATUS,
        .mask = {
-               .vlan_tci_mask = 0x0,
+               .vlan_tci_mask = 0xFFEF,
                .ipv4_mask     = {
                        .src_ip = 0xFFFFFFFF,
                        .dst_ip = 0xFFFFFFFF,
@@ -393,6 +448,38 @@ uint8_t bitrate_enabled;
 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
 
+struct vxlan_encap_conf vxlan_encap_conf = {
+       .select_ipv4 = 1,
+       .select_vlan = 0,
+       .vni = "\x00\x00\x00",
+       .udp_src = 0,
+       .udp_dst = RTE_BE16(4789),
+       .ipv4_src = IPv4(127, 0, 0, 1),
+       .ipv4_dst = IPv4(255, 255, 255, 255),
+       .ipv6_src = "\x00\x00\x00\x00\x00\x00\x00\x00"
+               "\x00\x00\x00\x00\x00\x00\x00\x01",
+       .ipv6_dst = "\x00\x00\x00\x00\x00\x00\x00\x00"
+               "\x00\x00\x00\x00\x00\x00\x11\x11",
+       .vlan_tci = 0,
+       .eth_src = "\x00\x00\x00\x00\x00\x00",
+       .eth_dst = "\xff\xff\xff\xff\xff\xff",
+};
+
+struct nvgre_encap_conf nvgre_encap_conf = {
+       .select_ipv4 = 1,
+       .select_vlan = 0,
+       .tni = "\x00\x00\x00",
+       .ipv4_src = IPv4(127, 0, 0, 1),
+       .ipv4_dst = IPv4(255, 255, 255, 255),
+       .ipv6_src = "\x00\x00\x00\x00\x00\x00\x00\x00"
+               "\x00\x00\x00\x00\x00\x00\x00\x01",
+       .ipv6_dst = "\x00\x00\x00\x00\x00\x00\x00\x00"
+               "\x00\x00\x00\x00\x00\x00\x11\x11",
+       .vlan_tci = 0,
+       .eth_src = "\x00\x00\x00\x00\x00\x00",
+       .eth_dst = "\xff\xff\xff\xff\xff\xff",
+};
+
 /* Forward function declarations */
 static void map_port_queue_stats_mapping_registers(portid_t pi,
                                                   struct rte_port *port);
@@ -400,12 +487,9 @@ static void check_all_ports_link_status(uint32_t port_mask);
 static int eth_event_callback(portid_t port_id,
                              enum rte_eth_event_type type,
                              void *param, void *ret_param);
-static void eth_dev_event_callback(char *device_name,
+static void eth_dev_event_callback(const char *device_name,
                                enum rte_dev_event_type type,
                                void *param);
-static int eth_dev_event_callback_register(void);
-static int eth_dev_event_callback_unregister(void);
-
 
 /*
  * Check if all the ports are started.
@@ -444,6 +528,8 @@ set_default_fwd_lcores_config(void)
 
        nb_lc = 0;
        for (i = 0; i < RTE_MAX_LCORE; i++) {
+               if (!rte_lcore_is_enabled(i))
+                       continue;
                sock_num = rte_lcore_to_socket_id(i);
                if (new_socket_id(sock_num)) {
                        if (num_sockets >= RTE_MAX_NUMA_NODES) {
@@ -453,8 +539,6 @@ set_default_fwd_lcores_config(void)
                        }
                        socket_ids[num_sockets++] = sock_num;
                }
-               if (!rte_lcore_is_enabled(i))
-                       continue;
                if (i == rte_get_master_lcore())
                        continue;
                fwd_lcores_cpuids[nb_lc++] = i;
@@ -481,9 +565,21 @@ set_default_fwd_ports_config(void)
        portid_t pt_id;
        int i = 0;
 
-       RTE_ETH_FOREACH_DEV(pt_id)
+       RTE_ETH_FOREACH_DEV(pt_id) {
                fwd_ports_ids[i++] = pt_id;
 
+               /* Update sockets info according to the attached device */
+               int socket_id = rte_eth_dev_socket_id(pt_id);
+               if (socket_id >= 0 && new_socket_id(socket_id)) {
+                       if (num_sockets >= RTE_MAX_NUMA_NODES) {
+                               rte_exit(EXIT_FAILURE,
+                                        "Total sockets greater than %u\n",
+                                        RTE_MAX_NUMA_NODES);
+                       }
+                       socket_ids[num_sockets++] = socket_id;
+               }
+       }
+
        nb_cfg_ports = nb_ports;
        nb_fwd_ports = nb_ports;
 }
@@ -496,6 +592,236 @@ set_def_fwd_config(void)
        set_default_fwd_ports_config();
 }
 
+/* extremely pessimistic estimation of memory required to create a mempool */
+static int
+calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
+{
+       unsigned int n_pages, mbuf_per_pg, leftover;
+       uint64_t total_mem, mbuf_mem, obj_sz;
+
+       /* there is no good way to predict how much space the mempool will
+        * occupy because it will allocate chunks on the fly, and some of those
+        * will come from default DPDK memory while some will come from our
+        * external memory, so just assume 128MB will be enough for everyone.
+        */
+       uint64_t hdr_mem = 128 << 20;
+
+       /* account for possible non-contiguousness */
+       obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
+       if (obj_sz > pgsz) {
+               TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
+               return -1;
+       }
+
+       mbuf_per_pg = pgsz / obj_sz;
+       leftover = (nb_mbufs % mbuf_per_pg) > 0;
+       n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
+
+       mbuf_mem = n_pages * pgsz;
+
+       total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
+
+       if (total_mem > SIZE_MAX) {
+               TESTPMD_LOG(ERR, "Memory size too big\n");
+               return -1;
+       }
+       *out = (size_t)total_mem;
+
+       return 0;
+}
+
+static inline uint32_t
+bsf64(uint64_t v)
+{
+       return (uint32_t)__builtin_ctzll(v);
+}
+
+static inline uint32_t
+log2_u64(uint64_t v)
+{
+       if (v == 0)
+               return 0;
+       v = rte_align64pow2(v);
+       return bsf64(v);
+}
+
+static int
+pagesz_flags(uint64_t page_sz)
+{
+       /* as per mmap() manpage, all page sizes are log2 of page size
+        * shifted by MAP_HUGE_SHIFT
+        */
+       int log2 = log2_u64(page_sz);
+
+       return (log2 << HUGE_SHIFT);
+}
+
+static void *
+alloc_mem(size_t memsz, size_t pgsz, bool huge)
+{
+       void *addr;
+       int flags;
+
+       /* allocate anonymous hugepages */
+       flags = MAP_ANONYMOUS | MAP_PRIVATE;
+       if (huge)
+               flags |= HUGE_FLAG | pagesz_flags(pgsz);
+
+       addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
+       if (addr == MAP_FAILED)
+               return NULL;
+
+       return addr;
+}
+
+struct extmem_param {
+       void *addr;
+       size_t len;
+       size_t pgsz;
+       rte_iova_t *iova_table;
+       unsigned int iova_table_len;
+};
+
+static int
+create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
+               bool huge)
+{
+       uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
+                       RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
+       unsigned int cur_page, n_pages, pgsz_idx;
+       size_t mem_sz, cur_pgsz;
+       rte_iova_t *iovas = NULL;
+       void *addr;
+       int ret;
+
+       for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
+               /* skip anything that is too big */
+               if (pgsizes[pgsz_idx] > SIZE_MAX)
+                       continue;
+
+               cur_pgsz = pgsizes[pgsz_idx];
+
+               /* if we were told not to allocate hugepages, override */
+               if (!huge)
+                       cur_pgsz = sysconf(_SC_PAGESIZE);
+
+               ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
+               if (ret < 0) {
+                       TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
+                       return -1;
+               }
+
+               /* allocate our memory */
+               addr = alloc_mem(mem_sz, cur_pgsz, huge);
+
+               /* if we couldn't allocate memory with a specified page size,
+                * that doesn't mean we can't do it with other page sizes, so
+                * try another one.
+                */
+               if (addr == NULL)
+                       continue;
+
+               /* store IOVA addresses for every page in this memory area */
+               n_pages = mem_sz / cur_pgsz;
+
+               iovas = malloc(sizeof(*iovas) * n_pages);
+
+               if (iovas == NULL) {
+                       TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
+                       goto fail;
+               }
+               /* lock memory if it's not huge pages */
+               if (!huge)
+                       mlock(addr, mem_sz);
+
+               /* populate IOVA addresses */
+               for (cur_page = 0; cur_page < n_pages; cur_page++) {
+                       rte_iova_t iova;
+                       size_t offset;
+                       void *cur;
+
+                       offset = cur_pgsz * cur_page;
+                       cur = RTE_PTR_ADD(addr, offset);
+
+                       /* touch the page before getting its IOVA */
+                       *(volatile char *)cur = 0;
+
+                       iova = rte_mem_virt2iova(cur);
+
+                       iovas[cur_page] = iova;
+               }
+
+               break;
+       }
+       /* if we couldn't allocate anything */
+       if (iovas == NULL)
+               return -1;
+
+       param->addr = addr;
+       param->len = mem_sz;
+       param->pgsz = cur_pgsz;
+       param->iova_table = iovas;
+       param->iova_table_len = n_pages;
+
+       return 0;
+fail:
+       if (iovas)
+               free(iovas);
+       if (addr)
+               munmap(addr, mem_sz);
+
+       return -1;
+}
+
+static int
+setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
+{
+       struct extmem_param param;
+       int socket_id, ret;
+
+       memset(&param, 0, sizeof(param));
+
+       /* check if our heap exists */
+       socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
+       if (socket_id < 0) {
+               /* create our heap */
+               ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
+               if (ret < 0) {
+                       TESTPMD_LOG(ERR, "Cannot create heap\n");
+                       return -1;
+               }
+       }
+
+       ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
+       if (ret < 0) {
+               TESTPMD_LOG(ERR, "Cannot create memory area\n");
+               return -1;
+       }
+
+       /* we now have a valid memory area, so add it to heap */
+       ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
+                       param.addr, param.len, param.iova_table,
+                       param.iova_table_len, param.pgsz);
+
+       /* when using VFIO, memory is automatically mapped for DMA by EAL */
+
+       /* not needed any more */
+       free(param.iova_table);
+
+       if (ret < 0) {
+               TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
+               munmap(param.addr, param.len);
+               return -1;
+       }
+
+       /* success */
+
+       TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
+                       param.len >> 20);
+
+       return 0;
+}
+
 /*
  * Configuration initialisation done once at init time.
  */
@@ -514,27 +840,59 @@ mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
                "create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
                pool_name, nb_mbuf, mbuf_seg_size, socket_id);
 
-       if (mp_anon != 0) {
-               rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
-                       mb_size, (unsigned) mb_mempool_cache,
-                       sizeof(struct rte_pktmbuf_pool_private),
-                       socket_id, 0);
-               if (rte_mp == NULL)
-                       goto err;
-
-               if (rte_mempool_populate_anon(rte_mp) == 0) {
-                       rte_mempool_free(rte_mp);
-                       rte_mp = NULL;
-                       goto err;
-               }
-               rte_pktmbuf_pool_init(rte_mp, NULL);
-               rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
-       } else {
-               /* wrapper to rte_mempool_create() */
-               TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
-                               rte_mbuf_best_mempool_ops());
-               rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
-                       mb_mempool_cache, 0, mbuf_seg_size, socket_id);
+       switch (mp_alloc_type) {
+       case MP_ALLOC_NATIVE:
+               {
+                       /* wrapper to rte_mempool_create() */
+                       TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
+                                       rte_mbuf_best_mempool_ops());
+                       rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
+                               mb_mempool_cache, 0, mbuf_seg_size, socket_id);
+                       break;
+               }
+       case MP_ALLOC_ANON:
+               {
+                       rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
+                               mb_size, (unsigned int) mb_mempool_cache,
+                               sizeof(struct rte_pktmbuf_pool_private),
+                               socket_id, 0);
+                       if (rte_mp == NULL)
+                               goto err;
+
+                       if (rte_mempool_populate_anon(rte_mp) == 0) {
+                               rte_mempool_free(rte_mp);
+                               rte_mp = NULL;
+                               goto err;
+                       }
+                       rte_pktmbuf_pool_init(rte_mp, NULL);
+                       rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
+                       break;
+               }
+       case MP_ALLOC_XMEM:
+       case MP_ALLOC_XMEM_HUGE:
+               {
+                       int heap_socket;
+                       bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
+
+                       if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
+                               rte_exit(EXIT_FAILURE, "Could not create external memory\n");
+
+                       heap_socket =
+                               rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
+                       if (heap_socket < 0)
+                               rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
+
+                       TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
+                                       rte_mbuf_best_mempool_ops());
+                       rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
+                                       mb_mempool_cache, 0, mbuf_seg_size,
+                                       heap_socket);
+                       break;
+               }
+       default:
+               {
+                       rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
+               }
        }
 
 err:
@@ -671,15 +1029,10 @@ init_config(void)
        uint8_t port_per_socket[RTE_MAX_NUMA_NODES];
        struct rte_gro_param gro_param;
        uint32_t gso_types;
+       int k;
 
        memset(port_per_socket,0,RTE_MAX_NUMA_NODES);
 
-       if (numa_support) {
-               memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
-               memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
-               memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
-       }
-
        /* Configuration of logical cores. */
        fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
                                sizeof(struct fwd_lcore *) * nb_lcores,
@@ -705,6 +1058,7 @@ init_config(void)
                port->dev_conf.txmode = tx_mode;
                port->dev_conf.rxmode = rx_mode;
                rte_eth_dev_info_get(pid, &port->dev_info);
+
                if (!(port->dev_info.tx_offload_capa &
                      DEV_TX_OFFLOAD_MBUF_FAST_FREE))
                        port->dev_conf.txmode.offloads &=
@@ -715,13 +1069,25 @@ init_config(void)
                        else {
                                uint32_t socket_id = rte_eth_dev_socket_id(pid);
 
-                               /* if socket_id is invalid, set to 0 */
+                               /*
+                                * if socket_id is invalid,
+                                * set to the first available socket.
+                                */
                                if (check_socket_id(socket_id) < 0)
-                                       socket_id = 0;
+                                       socket_id = socket_ids[0];
                                port_per_socket[socket_id]++;
                        }
                }
 
+               /* Apply Rx offloads configuration */
+               for (k = 0; k < port->dev_info.max_rx_queues; k++)
+                       port->rx_conf[k].offloads =
+                               port->dev_conf.rxmode.offloads;
+               /* Apply Tx offloads configuration */
+               for (k = 0; k < port->dev_info.max_tx_queues; k++)
+                       port->tx_conf[k].offloads =
+                               port->dev_conf.txmode.offloads;
+
                /* set flag to initialize port/queue */
                port->need_reconfig = 1;
                port->need_reconfig_queues = 1;
@@ -762,7 +1128,7 @@ init_config(void)
        init_port_config();
 
        gso_types = DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
-               DEV_TX_OFFLOAD_GRE_TNL_TSO;
+               DEV_TX_OFFLOAD_GRE_TNL_TSO | DEV_TX_OFFLOAD_UDP_TSO;
        /*
         * Records which Mbuf pool to use by each logical core, if needed.
         */
@@ -801,6 +1167,19 @@ init_config(void)
                                        "rte_gro_ctx_create() failed\n");
                }
        }
+
+#if defined RTE_LIBRTE_PMD_SOFTNIC
+       if (strcmp(cur_fwd_eng->fwd_mode_name, "softnic") == 0) {
+               RTE_ETH_FOREACH_DEV(pid) {
+                       port = &ports[pid];
+                       const char *driver = port->dev_info.driver_name;
+
+                       if (strcmp(driver, "net_softnic") == 0)
+                               port->softport.fwd_lcore_arg = fwd_lcores;
+               }
+       }
+#endif
+
 }
 
 
@@ -851,9 +1230,12 @@ init_fwd_streams(void)
                        else {
                                port->socket_id = rte_eth_dev_socket_id(pid);
 
-                               /* if socket_id is invalid, set to 0 */
+                               /*
+                                * if socket_id is invalid,
+                                * set to the first available socket.
+                                */
                                if (check_socket_id(port->socket_id) < 0)
-                                       port->socket_id = 0;
+                                       port->socket_id = socket_ids[0];
                        }
                }
                else {
@@ -936,6 +1318,9 @@ pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
                        pktnb_stats[1] = pktnb_stats[0];
                        burst_stats[0] = nb_burst;
                        pktnb_stats[0] = nb_pkt;
+               } else if (nb_burst > burst_stats[1]) {
+                       burst_stats[1] = nb_burst;
+                       pktnb_stats[1] = nb_pkt;
                }
        }
        if (total_burst == 0)
@@ -982,8 +1367,9 @@ fwd_port_stats_display(portid_t port_id, struct rte_eth_stats *stats)
                       (uint64_t) (stats->ipackets + stats->imissed));
 
                if (cur_fwd_eng == &csum_fwd_engine)
-                       printf("  Bad-ipcsum: %-14"PRIu64" Bad-l4csum: %-14"PRIu64" \n",
-                              port->rx_bad_ip_csum, port->rx_bad_l4_csum);
+                       printf("  Bad-ipcsum: %-14"PRIu64" Bad-l4csum: %-14"PRIu64"Bad-outer-l4csum: %-14"PRIu64"\n",
+                              port->rx_bad_ip_csum, port->rx_bad_l4_csum,
+                              port->rx_bad_outer_l4_csum);
                if ((stats->ierrors + stats->rx_nombuf) > 0) {
                        printf("  RX-error: %-"PRIu64"\n",  stats->ierrors);
                        printf("  RX-nombufs: %-14"PRIu64"\n", stats->rx_nombuf);
@@ -1001,8 +1387,9 @@ fwd_port_stats_display(portid_t port_id, struct rte_eth_stats *stats)
                       (uint64_t) (stats->ipackets + stats->imissed));
 
                if (cur_fwd_eng == &csum_fwd_engine)
-                       printf("  Bad-ipcsum:%14"PRIu64"    Bad-l4csum:%14"PRIu64"\n",
-                              port->rx_bad_ip_csum, port->rx_bad_l4_csum);
+                       printf("  Bad-ipcsum:%14"PRIu64"    Bad-l4csum:%14"PRIu64"    Bad-outer-l4csum: %-14"PRIu64"\n",
+                              port->rx_bad_ip_csum, port->rx_bad_l4_csum,
+                              port->rx_bad_outer_l4_csum);
                if ((stats->ierrors + stats->rx_nombuf) > 0) {
                        printf("  RX-error:%"PRIu64"\n", stats->ierrors);
                        printf("  RX-nombufs:             %14"PRIu64"\n",
@@ -1066,7 +1453,9 @@ fwd_stream_stats_display(streamid_t stream_id)
        /* if checksum mode */
        if (cur_fwd_eng == &csum_fwd_engine) {
               printf("  RX- bad IP checksum: %-14u  Rx- bad L4 checksum: "
-                       "%-14u\n", fs->rx_bad_ip_csum, fs->rx_bad_l4_csum);
+                       "%-14u Rx- bad outer L4 checksum: %-14u\n",
+                       fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
+                       fs->rx_bad_outer_l4_csum);
        }
 
 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
@@ -1130,8 +1519,9 @@ run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
        uint64_t tics_per_1sec;
        uint64_t tics_datum;
        uint64_t tics_current;
-       uint16_t idx_port;
+       uint16_t i, cnt_ports;
 
+       cnt_ports = nb_ports;
        tics_datum = rte_rdtsc();
        tics_per_1sec = rte_get_timer_hz();
 #endif
@@ -1146,9 +1536,9 @@ run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
                        tics_current = rte_rdtsc();
                        if (tics_current - tics_datum >= tics_per_1sec) {
                                /* Periodic bitrate calculation */
-                               RTE_ETH_FOREACH_DEV(idx_port)
+                               for (i = 0; i < cnt_ports; i++)
                                        rte_stats_bitrate_calc(bitrate_data,
-                                               idx_port);
+                                               ports_ids[i]);
                                tics_datum = tics_current;
                        }
                }
@@ -1319,6 +1709,7 @@ start_packet_forwarding(int with_tx_first)
                fwd_streams[sm_id]->fwd_dropped = 0;
                fwd_streams[sm_id]->rx_bad_ip_csum = 0;
                fwd_streams[sm_id]->rx_bad_l4_csum = 0;
+               fwd_streams[sm_id]->rx_bad_outer_l4_csum = 0;
 
 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
                memset(&fwd_streams[sm_id]->rx_burst_stats, 0,
@@ -1424,6 +1815,9 @@ stop_packet_forwarding(void)
                ports[fwd_streams[sm_id]->rx_port].rx_bad_l4_csum =
                                                        rx_bad_l4_csum;
 
+               ports[fwd_streams[sm_id]->rx_port].rx_bad_outer_l4_csum +=
+                               fwd_streams[sm_id]->rx_bad_outer_l4_csum;
+
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
                fwd_cycles = (uint64_t) (fwd_cycles +
                                         fwd_streams[sm_id]->core_cycles);
@@ -1606,7 +2000,7 @@ start_port(portid_t pid)
                                        return -1;
                                }
                        }
-
+                       configure_rxtx_dump_callbacks(0);
                        printf("Configuring Port %d (socket %u)\n", pi,
                                        port->socket_id);
                        /* configure port */
@@ -1627,11 +2021,6 @@ start_port(portid_t pid)
                        port->need_reconfig_queues = 0;
                        /* setup tx queues */
                        for (qi = 0; qi < nb_txq; qi++) {
-                               port->tx_conf[qi].txq_flags =
-                                       ETH_TXQ_FLAGS_IGNORE;
-                               /* Apply Tx offloads configuration */
-                               port->tx_conf[qi].offloads =
-                                       port->dev_conf.txmode.offloads;
                                if ((numa_support) &&
                                        (txring_numa[pi] != NUMA_NO_CONFIG))
                                        diag = rte_eth_tx_queue_setup(pi, qi,
@@ -1660,9 +2049,6 @@ start_port(portid_t pid)
                                return -1;
                        }
                        for (qi = 0; qi < nb_rxq; qi++) {
-                               /* Apply Rx offloads configuration */
-                               port->rx_conf[qi].offloads =
-                                       port->dev_conf.rxmode.offloads;
                                /* setup rx queues */
                                if ((numa_support) &&
                                        (rxring_numa[pi] != NUMA_NO_CONFIG)) {
@@ -1677,7 +2063,7 @@ start_port(portid_t pid)
                                        }
 
                                        diag = rte_eth_rx_queue_setup(pi, qi,
-                                            port->nb_rx_desc[pi],
+                                            port->nb_rx_desc[qi],
                                             rxring_numa[pi],
                                             &(port->rx_conf[qi]),
                                             mp);
@@ -1692,7 +2078,7 @@ start_port(portid_t pid)
                                                return -1;
                                        }
                                        diag = rte_eth_rx_queue_setup(pi, qi,
-                                            port->nb_rx_desc[pi],
+                                            port->nb_rx_desc[qi],
                                             port->socket_id,
                                             &(port->rx_conf[qi]),
                                             mp);
@@ -1713,7 +2099,7 @@ start_port(portid_t pid)
                                return -1;
                        }
                }
-
+               configure_rxtx_dump_callbacks(verbose_level);
                /* start port */
                if (rte_eth_dev_start(pi) < 0) {
                        printf("Fail to start port %d\n", pi);
@@ -1903,39 +2289,6 @@ reset_port(portid_t pid)
        printf("Done\n");
 }
 
-static int
-eth_dev_event_callback_register(void)
-{
-       int ret;
-
-       /* register the device event callback */
-       ret = rte_dev_event_callback_register(NULL,
-               eth_dev_event_callback, NULL);
-       if (ret) {
-               printf("Failed to register device event callback\n");
-               return -1;
-       }
-
-       return 0;
-}
-
-
-static int
-eth_dev_event_callback_unregister(void)
-{
-       int ret;
-
-       /* unregister the device event callback */
-       ret = rte_dev_event_callback_unregister(NULL,
-               eth_dev_event_callback, NULL);
-       if (ret < 0) {
-               printf("Failed to unregister device event callback\n");
-               return -1;
-       }
-
-       return 0;
-}
-
 void
 attach_port(char *identifier)
 {
@@ -1953,12 +2306,13 @@ attach_port(char *identifier)
                return;
 
        socket_id = (unsigned)rte_eth_dev_socket_id(pi);
-       /* if socket_id is invalid, set to 0 */
+       /* if socket_id is invalid, set to the first available socket. */
        if (check_socket_id(socket_id) < 0)
-               socket_id = 0;
+               socket_id = socket_ids[0];
        reconfig(pi, socket_id);
        rte_eth_promiscuous_enable(pi);
 
+       ports_ids[nb_ports] = pi;
        nb_ports = rte_eth_dev_count_avail();
 
        ports[pi].port_status = RTE_PORT_STOPPED;
@@ -1973,6 +2327,7 @@ void
 detach_port(portid_t port_id)
 {
        char name[RTE_ETH_NAME_MAX_LEN];
+       uint16_t i;
 
        printf("Detaching a port...\n");
 
@@ -1985,16 +2340,23 @@ detach_port(portid_t port_id)
                port_flow_flush(port_id);
 
        if (rte_eth_dev_detach(port_id, name)) {
-               TESTPMD_LOG(ERR, "Failed to detach port '%s'\n", name);
+               TESTPMD_LOG(ERR, "Failed to detach port %u\n", port_id);
                return;
        }
 
+       for (i = 0; i < nb_ports; i++) {
+               if (ports_ids[i] == port_id) {
+                       ports_ids[i] = ports_ids[nb_ports-1];
+                       ports_ids[nb_ports-1] = 0;
+                       break;
+               }
+       }
        nb_ports = rte_eth_dev_count_avail();
 
        update_fwd_ports(RTE_MAX_ETHPORTS);
 
-       printf("Port '%s' is detached. Now total ports is %d\n",
-                       name, nb_ports);
+       printf("Port %u is detached. Now total ports is %d\n",
+                       port_id, nb_ports);
        printf("Done\n");
        return;
 }
@@ -2002,6 +2364,7 @@ detach_port(portid_t port_id)
 void
 pmd_test_exit(void)
 {
+       struct rte_device *device;
        portid_t pt_id;
        int ret;
 
@@ -2015,19 +2378,43 @@ pmd_test_exit(void)
                        fflush(stdout);
                        stop_port(pt_id);
                        close_port(pt_id);
+
+                       /*
+                        * This is a workaround to fix a virtio-user issue that
+                        * requires to call clean-up routine to remove existing
+                        * socket.
+                        * This workaround valid only for testpmd, needs a fix
+                        * valid for all applications.
+                        * TODO: Implement proper resource cleanup
+                        */
+                       device = rte_eth_devices[pt_id].device;
+                       if (device && !strcmp(device->driver->name, "net_virtio_user"))
+                               detach_port(pt_id);
                }
        }
 
        if (hot_plug) {
                ret = rte_dev_event_monitor_stop();
-               if (ret)
+               if (ret) {
                        RTE_LOG(ERR, EAL,
                                "fail to stop device event monitor.");
+                       return;
+               }
 
-               ret = eth_dev_event_callback_unregister();
-               if (ret)
+               ret = rte_dev_event_callback_unregister(NULL,
+                       eth_dev_event_callback, NULL);
+               if (ret < 0) {
+                       RTE_LOG(ERR, EAL,
+                               "fail to unregister device event callback.\n");
+                       return;
+               }
+
+               ret = rte_dev_hotplug_handle_disable();
+               if (ret) {
                        RTE_LOG(ERR, EAL,
-                               "fail to unregister all event callbacks.");
+                               "fail to disable hotplug handling.\n");
+                       return;
+               }
        }
 
        printf("\nBye...\n");
@@ -2100,18 +2487,23 @@ check_all_ports_link_status(uint32_t port_mask)
 static void
 rmv_event_callback(void *arg)
 {
-       struct rte_eth_dev *dev;
+       int need_to_start = 0;
+       int org_no_link_check = no_link_check;
        portid_t port_id = (intptr_t)arg;
 
        RTE_ETH_VALID_PORTID_OR_RET(port_id);
-       dev = &rte_eth_devices[port_id];
 
+       if (!test_done && port_is_forwarding(port_id)) {
+               need_to_start = 1;
+               stop_packet_forwarding();
+       }
+       no_link_check = 1;
        stop_port(port_id);
+       no_link_check = org_no_link_check;
        close_port(port_id);
-       printf("removing device %s\n", dev->device->name);
-       if (rte_eal_dev_detach(dev->device))
-               TESTPMD_LOG(ERR, "Failed to detach device %s\n",
-                       dev->device->name);
+       detach_port(port_id);
+       if (need_to_start)
+               start_packet_forwarding(0);
 }
 
 /* This function is used by the interrupt thread */
@@ -2137,11 +2529,11 @@ eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
        RTE_SET_USED(ret_param);
 
        if (type >= RTE_ETH_EVENT_MAX) {
-               fprintf(stderr, "\nPort %" PRIu8 ": %s called upon invalid event %d\n",
+               fprintf(stderr, "\nPort %" PRIu16 ": %s called upon invalid event %d\n",
                        port_id, __func__, type);
                fflush(stderr);
        } else if (event_print_mask & (UINT32_C(1) << type)) {
-               printf("\nPort %" PRIu8 ": %s event\n", port_id,
+               printf("\nPort %" PRIu16 ": %s event\n", port_id,
                        event_desc[type]);
                fflush(stdout);
        }
@@ -2163,9 +2555,12 @@ eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
 
 /* This function is used by the interrupt thread */
 static void
-eth_dev_event_callback(char *device_name, enum rte_dev_event_type type,
+eth_dev_event_callback(const char *device_name, enum rte_dev_event_type type,
                             __rte_unused void *arg)
 {
+       uint16_t port_id;
+       int ret;
+
        if (type >= RTE_DEV_EVENT_MAX) {
                fprintf(stderr, "%s called upon invalid event %d\n",
                        __func__, type);
@@ -2176,9 +2571,13 @@ eth_dev_event_callback(char *device_name, enum rte_dev_event_type type,
        case RTE_DEV_EVENT_REMOVE:
                RTE_LOG(ERR, EAL, "The device: %s has been removed!\n",
                        device_name);
-               /* TODO: After finish failure handle, begin to stop
-                * packet forward, stop port, close port, detach port.
-                */
+               ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
+               if (ret) {
+                       RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
+                               device_name);
+                       return;
+               }
+               rmv_event_callback((void *)(intptr_t)port_id);
                break;
        case RTE_DEV_EVENT_ADD:
                RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
@@ -2325,16 +2724,15 @@ init_port_config(void)
 {
        portid_t pid;
        struct rte_port *port;
-       struct rte_eth_dev_info dev_info;
 
        RTE_ETH_FOREACH_DEV(pid) {
                port = &ports[pid];
                port->dev_conf.fdir_conf = fdir_conf;
+               rte_eth_dev_info_get(pid, &port->dev_info);
                if (nb_rxq > 1) {
-                       rte_eth_dev_info_get(pid, &dev_info);
                        port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
                        port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
-                               rss_hf & dev_info.flow_type_rss_offloads;
+                               rss_hf & port->dev_info.flow_type_rss_offloads;
                } else {
                        port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
                        port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
@@ -2364,17 +2762,6 @@ init_port_config(void)
                    (rte_eth_devices[pid].data->dev_flags &
                     RTE_ETH_DEV_INTR_RMV))
                        port->dev_conf.intr_conf.rmv = 1;
-
-#if defined RTE_LIBRTE_PMD_SOFTNIC && defined RTE_LIBRTE_SCHED
-               /* Detect softnic port */
-               if (!strcmp(port->dev_info.driver_name, "net_softnic")) {
-                       port->softnic_enable = 1;
-                       memset(&port->softport, 0, sizeof(struct softnic_port));
-
-                       if (!strcmp(cur_fwd_eng->fwd_mode_name, "tm"))
-                               port->softport.tm_flag = 1;
-               }
-#endif
        }
 }
 
@@ -2413,12 +2800,14 @@ const uint16_t vlan_tags[] = {
 };
 
 static  int
-get_eth_dcb_conf(struct rte_eth_conf *eth_conf,
+get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
                 enum dcb_mode_enable dcb_mode,
                 enum rte_eth_nb_tcs num_tcs,
                 uint8_t pfc_en)
 {
        uint8_t i;
+       int32_t rc;
+       struct rte_eth_rss_conf rss_conf;
 
        /*
         * Builds up the correct configuration for dcb+vt based on the vlan tags array
@@ -2458,6 +2847,10 @@ get_eth_dcb_conf(struct rte_eth_conf *eth_conf,
                struct rte_eth_dcb_tx_conf *tx_conf =
                                &eth_conf->tx_adv_conf.dcb_tx_conf;
 
+               rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
+               if (rc != 0)
+                       return rc;
+
                rx_conf->nb_tcs = num_tcs;
                tx_conf->nb_tcs = num_tcs;
 
@@ -2465,8 +2858,9 @@ get_eth_dcb_conf(struct rte_eth_conf *eth_conf,
                        rx_conf->dcb_tc[i] = i % num_tcs;
                        tx_conf->dcb_tc[i] = i % num_tcs;
                }
+
                eth_conf->rxmode.mq_mode = ETH_MQ_RX_DCB_RSS;
-               eth_conf->rx_adv_conf.rss_conf.rss_hf = rss_hf;
+               eth_conf->rx_adv_conf.rss_conf = rss_conf;
                eth_conf->txmode.mq_mode = ETH_MQ_TX_DCB;
        }
 
@@ -2500,17 +2894,13 @@ init_port_dcb_config(portid_t pid,
        port_conf.txmode = rte_port->dev_conf.txmode;
 
        /*set configuration of DCB in vt mode and DCB in non-vt mode*/
-       retval = get_eth_dcb_conf(&port_conf, dcb_mode, num_tcs, pfc_en);
+       retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
        if (retval < 0)
                return retval;
        port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
 
-       /**
-        * Write the configuration into the device.
-        * Set the numbers of RX & TX queues to 0, so
-        * the RX & TX queues will not be setup.
-        */
-       rte_eth_dev_configure(pid, 0, 0, &port_conf);
+       /* re-configure the device . */
+       rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
 
        rte_eth_dev_info_get(pid, &rte_port->dev_info);
 
@@ -2576,6 +2966,11 @@ init_port(void)
                                "rte_zmalloc(%d struct rte_port) failed\n",
                                RTE_MAX_ETHPORTS);
        }
+
+       /* Initialize ports NUMA structures */
+       memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
+       memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
+       memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
 }
 
 static void
@@ -2627,6 +3022,7 @@ main(int argc, char** argv)
 {
        int diag;
        portid_t port_id;
+       uint16_t count;
        int ret;
 
        signal(SIGINT, signal_handler);
@@ -2641,6 +3037,28 @@ main(int argc, char** argv)
                rte_panic("Cannot register log type");
        rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
 
+#ifdef RTE_LIBRTE_PDUMP
+       /* initialize packet capture framework */
+       rte_pdump_init(NULL);
+#endif
+
+       count = 0;
+       RTE_ETH_FOREACH_DEV(port_id) {
+               ports_ids[count] = port_id;
+               count++;
+       }
+       nb_ports = (portid_t) count;
+       if (nb_ports == 0)
+               TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
+
+       /* allocate port structures, and init them */
+       init_port();
+
+       set_def_fwd_config();
+       if (nb_lcores == 0)
+               rte_panic("Empty set of forwarding logical cores - check the "
+                         "core mask supplied in the command parameters\n");
+
        /* Bitrate/latency stats disabled by default */
 #ifdef RTE_LIBRTE_BITRATE
        bitrate_enabled = 0;
@@ -2666,23 +3084,6 @@ main(int argc, char** argv)
                        strerror(errno));
        }
 
-#ifdef RTE_LIBRTE_PDUMP
-       /* initialize packet capture framework */
-       rte_pdump_init(NULL);
-#endif
-
-       nb_ports = (portid_t) rte_eth_dev_count_avail();
-       if (nb_ports == 0)
-               TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
-
-       /* allocate port structures, and init them */
-       init_port();
-
-       set_def_fwd_config();
-       if (nb_lcores == 0)
-               rte_panic("Empty set of forwarding logical cores - check the "
-                         "core mask supplied in the command parameters\n");
-
        if (tx_first && interactive)
                rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
                                "interactive mode.\n");
@@ -2704,14 +3105,27 @@ main(int argc, char** argv)
        init_config();
 
        if (hot_plug) {
-               /* enable hot plug monitoring */
+               ret = rte_dev_hotplug_handle_enable();
+               if (ret) {
+                       RTE_LOG(ERR, EAL,
+                               "fail to enable hotplug handling.");
+                       return -1;
+               }
+
                ret = rte_dev_event_monitor_start();
                if (ret) {
-                       rte_errno = EINVAL;
+                       RTE_LOG(ERR, EAL,
+                               "fail to start device event monitoring.");
                        return -1;
                }
-               eth_dev_event_callback_register();
 
+               ret = rte_dev_event_callback_register(NULL,
+                       eth_dev_event_callback, NULL);
+               if (ret) {
+                       RTE_LOG(ERR, EAL,
+                               "fail  to register device event callback\n");
+                       return -1;
+               }
        }
 
        if (start_port(RTE_PORT_ALL) != 0)