net: add rte prefix to IP defines
[dpdk.git] / examples / performance-thread / l3fwd-thread / main.c
index db6cb64..3640579 100644 (file)
@@ -1,38 +1,7 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2016 Intel Corporation
  */
 
-#define _GNU_SOURCE
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
@@ -50,9 +19,7 @@
 #include <rte_log.h>
 #include <rte_memory.h>
 #include <rte_memcpy.h>
-#include <rte_memzone.h>
 #include <rte_eal.h>
-#include <rte_per_lcore.h>
 #include <rte_launch.h>
 #include <rte_atomic.h>
 #include <rte_cycles.h>
@@ -61,7 +28,6 @@
 #include <rte_per_lcore.h>
 #include <rte_branch_prediction.h>
 #include <rte_interrupts.h>
-#include <rte_pci.h>
 #include <rte_random.h>
 #include <rte_debug.h>
 #include <rte_ether.h>
@@ -73,6 +39,7 @@
 #include <rte_tcp.h>
 #include <rte_udp.h>
 #include <rte_string_fns.h>
+#include <rte_pause.h>
 
 #include <cmdline_parse.h>
 #include <cmdline_parse_etheraddr.h>
 #define APP_LOOKUP_METHOD             APP_LOOKUP_LPM
 #endif
 
+#ifndef __GLIBC__ /* sched_getcpu() is glibc specific */
+#define sched_getcpu() rte_lcore_id()
+#endif
+
+static int
+check_ptype(int portid)
+{
+       int i, ret;
+       int ipv4 = 0, ipv6 = 0;
+
+       ret = rte_eth_dev_get_supported_ptypes(portid, RTE_PTYPE_L3_MASK, NULL,
+                       0);
+       if (ret <= 0)
+               return 0;
+
+       uint32_t ptypes[ret];
+
+       ret = rte_eth_dev_get_supported_ptypes(portid, RTE_PTYPE_L3_MASK,
+                       ptypes, ret);
+       for (i = 0; i < ret; ++i) {
+               if (ptypes[i] & RTE_PTYPE_L3_IPV4)
+                       ipv4 = 1;
+               if (ptypes[i] & RTE_PTYPE_L3_IPV6)
+                       ipv6 = 1;
+       }
+
+       if (ipv4 && ipv6)
+               return 1;
+
+       return 0;
+}
+
+static inline void
+parse_ptype(struct rte_mbuf *m)
+{
+       struct rte_ether_hdr *eth_hdr;
+       uint32_t packet_type = RTE_PTYPE_UNKNOWN;
+       uint16_t ether_type;
+
+       eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+       ether_type = eth_hdr->ether_type;
+       if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPv4))
+               packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
+       else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPv6))
+               packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
+
+       m->packet_type = packet_type;
+}
+
+static uint16_t
+cb_parse_ptype(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+               struct rte_mbuf *pkts[], uint16_t nb_pkts,
+               __rte_unused uint16_t max_pkts, __rte_unused void *user_param)
+{
+       unsigned int i;
+
+       for (i = 0; i < nb_pkts; i++)
+               parse_ptype(pkts[i]);
+
+       return nb_pkts;
+}
+
 /*
  *  When set to zero, simple forwaring path is eanbled.
  *  When set to one, optimized forwarding path is enabled.
  *  Note that LPM optimisation path uses SSE4.1 instructions.
  */
-#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && !defined(__SSE4_1__))
-#define ENABLE_MULTI_BUFFER_OPTIMIZE   0
-#else
 #define ENABLE_MULTI_BUFFER_OPTIMIZE   1
-#endif
 
 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
 #include <rte_hash.h>
  */
 
 #define NB_MBUF RTE_MAX(\
-               (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT +       \
-               nb_ports*nb_lcores*MAX_PKT_BURST +                     \
-               nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT +         \
-               nb_lcores*MEMPOOL_CACHE_SIZE),                         \
+               (nb_ports*nb_rx_queue*nb_rxd +      \
+               nb_ports*nb_lcores*MAX_PKT_BURST +  \
+               nb_ports*n_tx_queue*nb_txd +        \
+               nb_lcores*MEMPOOL_CACHE_SIZE),      \
                (unsigned)8192)
 
 #define MAX_PKT_BURST     32
 /*
  * Configurable number of RX/TX ring descriptors
  */
-#define RTE_TEST_RX_DESC_DEFAULT 128
-#define RTE_TEST_TX_DESC_DEFAULT 128
+#define RTE_TEST_RX_DESC_DEFAULT 1024
+#define RTE_TEST_TX_DESC_DEFAULT 1024
 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
 
 /* ethernet addresses of ports */
 static uint64_t dest_eth_addr[RTE_MAX_ETHPORTS];
-static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
+static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
 
-static __m128i val_eth[RTE_MAX_ETHPORTS];
+static xmm_t val_eth[RTE_MAX_ETHPORTS];
 
 /* replace first 12B of the ethernet header. */
 #define        MASK_ETH 0x3f
 
 /* mask of enabled ports */
 static uint32_t enabled_port_mask;
-static int promiscuous_on; /**< $et in promiscuous mode off by default. */
+static int promiscuous_on; /**< Set in promiscuous mode off by default. */
 static int numa_on = 1;    /**< NUMA is enabled by default. */
+static int parse_ptype_on;
 
 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
 static int ipv6;           /**< ipv6 is false by default. */
@@ -218,7 +244,7 @@ struct mbuf_table {
 };
 
 struct lcore_rx_queue {
-       uint8_t port_id;
+       uint16_t port_id;
        uint8_t queue_id;
 } __rte_cache_aligned;
 
@@ -228,7 +254,7 @@ struct lcore_rx_queue {
 
 #define MAX_LCORE_PARAMS       1024
 struct rx_thread_params {
-       uint8_t port_id;
+       uint16_t port_id;
        uint8_t queue_id;
        uint8_t lcore_id;
        uint8_t thread_id;
@@ -276,13 +302,9 @@ static uint16_t nb_tx_thread_params = RTE_DIM(tx_thread_params_array_default);
 static struct rte_eth_conf port_conf = {
        .rxmode = {
                .mq_mode = ETH_MQ_RX_RSS,
-               .max_rx_pkt_len = ETHER_MAX_LEN,
+               .max_rx_pkt_len = RTE_ETHER_MAX_LEN,
                .split_hdr_size = 0,
-               .header_split   = 0, /**< Header Split disabled */
-               .hw_ip_checksum = 1, /**< IP checksum offload enabled */
-               .hw_vlan_filter = 0, /**< VLAN filtering disabled */
-               .jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
-               .hw_strip_crc   = 0, /**< CRC stripped by hardware */
+               .offloads = DEV_RX_OFFLOAD_CHECKSUM,
        },
        .rx_adv_conf = {
                .rss_conf = {
@@ -299,13 +321,8 @@ static struct rte_mempool *pktmbuf_pool[NB_SOCKETS];
 
 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
 
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
 #include <rte_hash_crc.h>
 #define DEFAULT_HASH_FUNC       rte_hash_crc
-#else
-#include <rte_jhash.h>
-#define DEFAULT_HASH_FUNC       rte_jhash
-#endif
 
 struct ipv4_5tuple {
        uint32_t ip_dst;
@@ -363,10 +380,10 @@ struct ipv6_l3fwd_route {
 };
 
 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
-       {{IPv4(101, 0, 0, 0), IPv4(100, 10, 0, 1),  101, 11, IPPROTO_TCP}, 0},
-       {{IPv4(201, 0, 0, 0), IPv4(200, 20, 0, 1),  102, 12, IPPROTO_TCP}, 1},
-       {{IPv4(111, 0, 0, 0), IPv4(100, 30, 0, 1),  101, 11, IPPROTO_TCP}, 2},
-       {{IPv4(211, 0, 0, 0), IPv4(200, 40, 0, 1),  102, 12, IPPROTO_TCP}, 3},
+       {{RTE_IPv4(101, 0, 0, 0), RTE_IPv4(100, 10, 0, 1),  101, 11, IPPROTO_TCP}, 0},
+       {{RTE_IPv4(201, 0, 0, 0), RTE_IPv4(200, 20, 0, 1),  102, 12, IPPROTO_TCP}, 1},
+       {{RTE_IPv4(111, 0, 0, 0), RTE_IPv4(100, 30, 0, 1),  101, 11, IPPROTO_TCP}, 2},
+       {{RTE_IPv4(211, 0, 0, 0), RTE_IPv4(200, 40, 0, 1),  102, 12, IPPROTO_TCP}, 3},
 };
 
 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
@@ -422,17 +439,10 @@ ipv4_hash_crc(const void *data, __rte_unused uint32_t data_len,
        t = k->proto;
        p = (const uint32_t *)&k->port_src;
 
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
        init_val = rte_hash_crc_4byte(t, init_val);
        init_val = rte_hash_crc_4byte(k->ip_src, init_val);
        init_val = rte_hash_crc_4byte(k->ip_dst, init_val);
        init_val = rte_hash_crc_4byte(*p, init_val);
-#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */
-       init_val = rte_jhash_1word(t, init_val);
-       init_val = rte_jhash_1word(k->ip_src, init_val);
-       init_val = rte_jhash_1word(k->ip_dst, init_val);
-       init_val = rte_jhash_1word(*p, init_val);
-#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
        return init_val;
 }
 
@@ -443,16 +453,13 @@ ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len,
        const union ipv6_5tuple_host *k;
        uint32_t t;
        const uint32_t *p;
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
        const uint32_t *ip_src0, *ip_src1, *ip_src2, *ip_src3;
        const uint32_t *ip_dst0, *ip_dst1, *ip_dst2, *ip_dst3;
-#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
 
        k = data;
        t = k->proto;
        p = (const uint32_t *)&k->port_src;
 
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
        ip_src0 = (const uint32_t *) k->ip_src;
        ip_src1 = (const uint32_t *)(k->ip_src + 4);
        ip_src2 = (const uint32_t *)(k->ip_src + 8);
@@ -471,12 +478,6 @@ ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len,
        init_val = rte_hash_crc_4byte(*ip_dst2, init_val);
        init_val = rte_hash_crc_4byte(*ip_dst3, init_val);
        init_val = rte_hash_crc_4byte(*p, init_val);
-#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */
-       init_val = rte_jhash_1word(t, init_val);
-       init_val = rte_jhash(k->ip_src, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val);
-       init_val = rte_jhash(k->ip_dst, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val);
-       init_val = rte_jhash_1word(*p, init_val);
-#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
        return init_val;
 }
 
@@ -502,14 +503,14 @@ struct ipv6_l3fwd_route {
 };
 
 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
-       {IPv4(1, 1, 1, 0), 24, 0},
-       {IPv4(2, 1, 1, 0), 24, 1},
-       {IPv4(3, 1, 1, 0), 24, 2},
-       {IPv4(4, 1, 1, 0), 24, 3},
-       {IPv4(5, 1, 1, 0), 24, 4},
-       {IPv4(6, 1, 1, 0), 24, 5},
-       {IPv4(7, 1, 1, 0), 24, 6},
-       {IPv4(8, 1, 1, 0), 24, 7},
+       {RTE_IPv4(1, 1, 1, 0), 24, 0},
+       {RTE_IPv4(2, 1, 1, 0), 24, 1},
+       {RTE_IPv4(3, 1, 1, 0), 24, 2},
+       {RTE_IPv4(4, 1, 1, 0), 24, 3},
+       {RTE_IPv4(5, 1, 1, 0), 24, 4},
+       {RTE_IPv4(6, 1, 1, 0), 24, 5},
+       {RTE_IPv4(7, 1, 1, 0), 24, 6},
+       {RTE_IPv4(8, 1, 1, 0), 24, 7},
 };
 
 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
@@ -610,7 +611,7 @@ struct thread_tx_conf tx_thread[MAX_TX_THREAD];
 
 /* Send burst of packets on an output interface */
 static inline int
-send_burst(struct thread_tx_conf *qconf, uint16_t n, uint8_t port)
+send_burst(struct thread_tx_conf *qconf, uint16_t n, uint16_t port)
 {
        struct rte_mbuf **m_table;
        int ret;
@@ -631,7 +632,7 @@ send_burst(struct thread_tx_conf *qconf, uint16_t n, uint8_t port)
 
 /* Enqueue a single packet, and send burst if queue is filled */
 static inline int
-send_single_packet(struct rte_mbuf *m, uint8_t port)
+send_single_packet(struct rte_mbuf *m, uint16_t port)
 {
        uint16_t len;
        struct thread_tx_conf *qconf;
@@ -655,9 +656,10 @@ send_single_packet(struct rte_mbuf *m, uint8_t port)
        return 0;
 }
 
-#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
-static inline __attribute__((always_inline)) void
-send_packetsx4(uint8_t port,
+#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
+       (ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
+static __rte_always_inline void
+send_packetsx4(uint16_t port,
        struct rte_mbuf *m[], uint32_t num)
 {
        uint32_t len, j, n;
@@ -697,12 +699,15 @@ send_packetsx4(uint8_t port,
        case 0:
                qconf->tx_mbufs[port].m_table[len + j] = m[j];
                j++;
+               /* fall-through */
        case 3:
                qconf->tx_mbufs[port].m_table[len + j] = m[j];
                j++;
+               /* fall-through */
        case 2:
                qconf->tx_mbufs[port].m_table[len + j] = m[j];
                j++;
+               /* fall-through */
        case 1:
                qconf->tx_mbufs[port].m_table[len + j] = m[j];
                j++;
@@ -724,12 +729,15 @@ send_packetsx4(uint8_t port,
                case 0:
                        qconf->tx_mbufs[port].m_table[j] = m[n + j];
                        j++;
+                       /* fall-through */
                case 3:
                        qconf->tx_mbufs[port].m_table[j] = m[n + j];
                        j++;
+                       /* fall-through */
                case 2:
                        qconf->tx_mbufs[port].m_table[j] = m[n + j];
                        j++;
+                       /* fall-through */
                case 1:
                        qconf->tx_mbufs[port].m_table[j] = m[n + j];
                        j++;
@@ -743,14 +751,14 @@ send_packetsx4(uint8_t port,
 
 #ifdef DO_RFC_1812_CHECKS
 static inline int
-is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
+is_valid_ipv4_pkt(struct rte_ipv4_hdr *pkt, uint32_t link_len)
 {
        /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
        /*
         * 1. The packet length reported by the Link Layer must be large
         * enough to hold the minimum length legal IP datagram (20 bytes).
         */
-       if (link_len < sizeof(struct ipv4_hdr))
+       if (link_len < sizeof(struct rte_ipv4_hdr))
                return -1;
 
        /* 2. The IP checksum must be correct. */
@@ -775,7 +783,7 @@ is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
         * datagram header, whose length is specified in the IP header length
         * field.
         */
-       if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
+       if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct rte_ipv4_hdr))
                return -5;
 
        return 0;
@@ -787,31 +795,33 @@ is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
 static __m128i mask0;
 static __m128i mask1;
 static __m128i mask2;
-static inline uint8_t
-get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid,
+static inline uint16_t
+get_ipv4_dst_port(void *ipv4_hdr, uint16_t portid,
                lookup_struct_t *ipv4_l3fwd_lookup_struct)
 {
        int ret = 0;
        union ipv4_5tuple_host key;
 
-       ipv4_hdr = (uint8_t *)ipv4_hdr + offsetof(struct ipv4_hdr, time_to_live);
+       ipv4_hdr = (uint8_t *)ipv4_hdr +
+               offsetof(struct rte_ipv4_hdr, time_to_live);
        __m128i data = _mm_loadu_si128((__m128i *)(ipv4_hdr));
        /* Get 5 tuple: dst port, src port, dst IP address, src IP address and
           protocol */
        key.xmm = _mm_and_si128(data, mask0);
        /* Find destination port */
        ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key);
-       return (uint8_t)((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]);
+       return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]);
 }
 
-static inline uint8_t
-get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid,
+static inline uint16_t
+get_ipv6_dst_port(void *ipv6_hdr, uint16_t portid,
                lookup_struct_t *ipv6_l3fwd_lookup_struct)
 {
        int ret = 0;
        union ipv6_5tuple_host key;
 
-       ipv6_hdr = (uint8_t *)ipv6_hdr + offsetof(struct ipv6_hdr, payload_len);
+       ipv6_hdr = (uint8_t *)ipv6_hdr +
+               offsetof(struct rte_ipv6_hdr, payload_len);
        __m128i data0 = _mm_loadu_si128((__m128i *)(ipv6_hdr));
        __m128i data1 = _mm_loadu_si128((__m128i *)(((uint8_t *)ipv6_hdr) +
                        sizeof(__m128i)));
@@ -828,36 +838,36 @@ get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid,
 
        /* Find destination port */
        ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key);
-       return (uint8_t)((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]);
+       return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]);
 }
 #endif
 
 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
 
-static inline uint8_t
-get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid,
+static inline uint16_t
+get_ipv4_dst_port(void *ipv4_hdr, uint16_t portid,
                lookup_struct_t *ipv4_l3fwd_lookup_struct)
 {
-       uint8_t next_hop;
+       uint32_t next_hop;
 
-       return (uint8_t)((rte_lpm_lookup(ipv4_l3fwd_lookup_struct,
-               rte_be_to_cpu_32(((struct ipv4_hdr *)ipv4_hdr)->dst_addr),
+       return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct,
+               rte_be_to_cpu_32(((struct rte_ipv4_hdr *)ipv4_hdr)->dst_addr),
                &next_hop) == 0) ? next_hop : portid);
 }
 
-static inline uint8_t
-get_ipv6_dst_port(void *ipv6_hdr,  uint8_t portid,
+static inline uint16_t
+get_ipv6_dst_port(void *ipv6_hdr,  uint16_t portid,
                lookup6_struct_t *ipv6_l3fwd_lookup_struct)
 {
-       uint8_t next_hop;
+       uint32_t next_hop;
 
-       return (uint8_t) ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct,
-                       ((struct ipv6_hdr *)ipv6_hdr)->dst_addr, &next_hop) == 0) ?
-                       next_hop : portid);
+       return ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct,
+               ((struct rte_ipv6_hdr *)ipv6_hdr)->dst_addr, &next_hop) == 0) ?
+               next_hop : portid);
 }
 #endif
 
-static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid)
+static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid)
                __attribute__((unused));
 
 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) && \
@@ -874,41 +884,41 @@ static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid)
 #define EXCLUDE_8TH_PKT 0x7f
 
 static inline void
-simple_ipv4_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid)
+simple_ipv4_fwd_8pkts(struct rte_mbuf *m[8], uint16_t portid)
 {
-       struct ether_hdr *eth_hdr[8];
-       struct ipv4_hdr *ipv4_hdr[8];
-       uint8_t dst_port[8];
+       struct rte_ether_hdr *eth_hdr[8];
+       struct rte_ipv4_hdr *ipv4_hdr[8];
+       uint16_t dst_port[8];
        int32_t ret[8];
        union ipv4_5tuple_host key[8];
        __m128i data[8];
 
-       eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *);
-       eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *);
-       eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *);
-       eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *);
-       eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *);
-       eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *);
-       eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *);
-       eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *);
+       eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct rte_ether_hdr *);
+       eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct rte_ether_hdr *);
+       eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct rte_ether_hdr *);
+       eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct rte_ether_hdr *);
+       eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct rte_ether_hdr *);
+       eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct rte_ether_hdr *);
+       eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct rte_ether_hdr *);
+       eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct rte_ether_hdr *);
 
        /* Handle IPv4 headers.*/
-       ipv4_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv4_hdr *,
-                       sizeof(struct ether_hdr));
-       ipv4_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct ipv4_hdr *,
-                       sizeof(struct ether_hdr));
-       ipv4_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct ipv4_hdr *,
-                       sizeof(struct ether_hdr));
-       ipv4_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv4_hdr *,
-                       sizeof(struct ether_hdr));
-       ipv4_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv4_hdr *,
-                       sizeof(struct ether_hdr));
-       ipv4_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv4_hdr *,
-                       sizeof(struct ether_hdr));
-       ipv4_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv4_hdr *,
-                       sizeof(struct ether_hdr));
-       ipv4_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv4_hdr *,
-                       sizeof(struct ether_hdr));
+       ipv4_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct rte_ipv4_hdr *,
+                       sizeof(struct rte_ether_hdr));
+       ipv4_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct rte_ipv4_hdr *,
+                       sizeof(struct rte_ether_hdr));
+       ipv4_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct rte_ipv4_hdr *,
+                       sizeof(struct rte_ether_hdr));
+       ipv4_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct rte_ipv4_hdr *,
+                       sizeof(struct rte_ether_hdr));
+       ipv4_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct rte_ipv4_hdr *,
+                       sizeof(struct rte_ether_hdr));
+       ipv4_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct rte_ipv4_hdr *,
+                       sizeof(struct rte_ether_hdr));
+       ipv4_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct rte_ipv4_hdr *,
+                       sizeof(struct rte_ether_hdr));
+       ipv4_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct rte_ipv4_hdr *,
+                       sizeof(struct rte_ether_hdr));
 
 #ifdef DO_RFC_1812_CHECKS
        /* Check to make sure the packet is valid (RFC1812) */
@@ -959,29 +969,29 @@ simple_ipv4_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid)
 #endif /* End of #ifdef DO_RFC_1812_CHECKS */
 
        data[0] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[0], __m128i *,
-                       sizeof(struct ether_hdr) +
-                       offsetof(struct ipv4_hdr, time_to_live)));
+                       sizeof(struct rte_ether_hdr) +
+                       offsetof(struct rte_ipv4_hdr, time_to_live)));
        data[1] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[1], __m128i *,
-                       sizeof(struct ether_hdr) +
-                       offsetof(struct ipv4_hdr, time_to_live)));
+                       sizeof(struct rte_ether_hdr) +
+                       offsetof(struct rte_ipv4_hdr, time_to_live)));
        data[2] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[2], __m128i *,
-                       sizeof(struct ether_hdr) +
-                       offsetof(struct ipv4_hdr, time_to_live)));
+                       sizeof(struct rte_ether_hdr) +
+                       offsetof(struct rte_ipv4_hdr, time_to_live)));
        data[3] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[3], __m128i *,
-                       sizeof(struct ether_hdr) +
-                       offsetof(struct ipv4_hdr, time_to_live)));
+                       sizeof(struct rte_ether_hdr) +
+                       offsetof(struct rte_ipv4_hdr, time_to_live)));
        data[4] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[4], __m128i *,
-                       sizeof(struct ether_hdr) +
-                       offsetof(struct ipv4_hdr, time_to_live)));
+                       sizeof(struct rte_ether_hdr) +
+                       offsetof(struct rte_ipv4_hdr, time_to_live)));
        data[5] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[5], __m128i *,
-                       sizeof(struct ether_hdr) +
-                       offsetof(struct ipv4_hdr, time_to_live)));
+                       sizeof(struct rte_ether_hdr) +
+                       offsetof(struct rte_ipv4_hdr, time_to_live)));
        data[6] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[6], __m128i *,
-                       sizeof(struct ether_hdr) +
-                       offsetof(struct ipv4_hdr, time_to_live)));
+                       sizeof(struct rte_ether_hdr) +
+                       offsetof(struct rte_ipv4_hdr, time_to_live)));
        data[7] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[7], __m128i *,
-                       sizeof(struct ether_hdr) +
-                       offsetof(struct ipv4_hdr, time_to_live)));
+                       sizeof(struct rte_ether_hdr) +
+                       offsetof(struct rte_ipv4_hdr, time_to_live)));
 
        key[0].xmm = _mm_and_si128(data[0], mask0);
        key[1].xmm = _mm_and_si128(data[1], mask0);
@@ -995,16 +1005,16 @@ simple_ipv4_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid)
        const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3],
                        &key[4], &key[5], &key[6], &key[7]};
 
-       rte_hash_lookup_multi(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct,
+       rte_hash_lookup_bulk(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct,
                        &key_array[0], 8, ret);
-       dst_port[0] = (uint8_t) ((ret[0] < 0) ? portid : ipv4_l3fwd_out_if[ret[0]]);
-       dst_port[1] = (uint8_t) ((ret[1] < 0) ? portid : ipv4_l3fwd_out_if[ret[1]]);
-       dst_port[2] = (uint8_t) ((ret[2] < 0) ? portid : ipv4_l3fwd_out_if[ret[2]]);
-       dst_port[3] = (uint8_t) ((ret[3] < 0) ? portid : ipv4_l3fwd_out_if[ret[3]]);
-       dst_port[4] = (uint8_t) ((ret[4] < 0) ? portid : ipv4_l3fwd_out_if[ret[4]]);
-       dst_port[5] = (uint8_t) ((ret[5] < 0) ? portid : ipv4_l3fwd_out_if[ret[5]]);
-       dst_port[6] = (uint8_t) ((ret[6] < 0) ? portid : ipv4_l3fwd_out_if[ret[6]]);
-       dst_port[7] = (uint8_t) ((ret[7] < 0) ? portid : ipv4_l3fwd_out_if[ret[7]]);
+       dst_port[0] = ((ret[0] < 0) ? portid : ipv4_l3fwd_out_if[ret[0]]);
+       dst_port[1] = ((ret[1] < 0) ? portid : ipv4_l3fwd_out_if[ret[1]]);
+       dst_port[2] = ((ret[2] < 0) ? portid : ipv4_l3fwd_out_if[ret[2]]);
+       dst_port[3] = ((ret[3] < 0) ? portid : ipv4_l3fwd_out_if[ret[3]]);
+       dst_port[4] = ((ret[4] < 0) ? portid : ipv4_l3fwd_out_if[ret[4]]);
+       dst_port[5] = ((ret[5] < 0) ? portid : ipv4_l3fwd_out_if[ret[5]]);
+       dst_port[6] = ((ret[6] < 0) ? portid : ipv4_l3fwd_out_if[ret[6]]);
+       dst_port[7] = ((ret[7] < 0) ? portid : ipv4_l3fwd_out_if[ret[7]]);
 
        if (dst_port[0] >= RTE_MAX_ETHPORTS ||
                        (enabled_port_mask & 1 << dst_port[0]) == 0)
@@ -1062,14 +1072,14 @@ simple_ipv4_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid)
        *(uint64_t *)&eth_hdr[7]->d_addr = dest_eth_addr[dst_port[7]];
 
        /* src addr */
-       ether_addr_copy(&ports_eth_addr[dst_port[0]], &eth_hdr[0]->s_addr);
-       ether_addr_copy(&ports_eth_addr[dst_port[1]], &eth_hdr[1]->s_addr);
-       ether_addr_copy(&ports_eth_addr[dst_port[2]], &eth_hdr[2]->s_addr);
-       ether_addr_copy(&ports_eth_addr[dst_port[3]], &eth_hdr[3]->s_addr);
-       ether_addr_copy(&ports_eth_addr[dst_port[4]], &eth_hdr[4]->s_addr);
-       ether_addr_copy(&ports_eth_addr[dst_port[5]], &eth_hdr[5]->s_addr);
-       ether_addr_copy(&ports_eth_addr[dst_port[6]], &eth_hdr[6]->s_addr);
-       ether_addr_copy(&ports_eth_addr[dst_port[7]], &eth_hdr[7]->s_addr);
+       rte_ether_addr_copy(&ports_eth_addr[dst_port[0]], &eth_hdr[0]->s_addr);
+       rte_ether_addr_copy(&ports_eth_addr[dst_port[1]], &eth_hdr[1]->s_addr);
+       rte_ether_addr_copy(&ports_eth_addr[dst_port[2]], &eth_hdr[2]->s_addr);
+       rte_ether_addr_copy(&ports_eth_addr[dst_port[3]], &eth_hdr[3]->s_addr);
+       rte_ether_addr_copy(&ports_eth_addr[dst_port[4]], &eth_hdr[4]->s_addr);
+       rte_ether_addr_copy(&ports_eth_addr[dst_port[5]], &eth_hdr[5]->s_addr);
+       rte_ether_addr_copy(&ports_eth_addr[dst_port[6]], &eth_hdr[6]->s_addr);
+       rte_ether_addr_copy(&ports_eth_addr[dst_port[7]], &eth_hdr[7]->s_addr);
 
        send_single_packet(m[0], (uint8_t)dst_port[0]);
        send_single_packet(m[1], (uint8_t)dst_port[1]);
@@ -1086,56 +1096,57 @@ static inline void get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0,
                __m128i mask1, union ipv6_5tuple_host *key)
 {
        __m128i tmpdata0 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0,
-                       __m128i *, sizeof(struct ether_hdr) +
-                       offsetof(struct ipv6_hdr, payload_len)));
+                       __m128i *, sizeof(struct rte_ether_hdr) +
+                       offsetof(struct rte_ipv6_hdr, payload_len)));
        __m128i tmpdata1 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0,
-                       __m128i *, sizeof(struct ether_hdr) +
-                       offsetof(struct ipv6_hdr, payload_len) + sizeof(__m128i)));
-       __m128i tmpdata2 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0,
-                       __m128i *, sizeof(struct ether_hdr) +
-                       offsetof(struct ipv6_hdr, payload_len) + sizeof(__m128i) +
+                       __m128i *, sizeof(struct rte_ether_hdr) +
+                       offsetof(struct rte_ipv6_hdr, payload_len) +
                        sizeof(__m128i)));
+       __m128i tmpdata2 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0,
+                       __m128i *, sizeof(struct rte_ether_hdr) +
+                       offsetof(struct rte_ipv6_hdr, payload_len) +
+                       sizeof(__m128i) + sizeof(__m128i)));
        key->xmm[0] = _mm_and_si128(tmpdata0, mask0);
        key->xmm[1] = tmpdata1;
        key->xmm[2] = _mm_and_si128(tmpdata2, mask1);
 }
 
 static inline void
-simple_ipv6_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid)
+simple_ipv6_fwd_8pkts(struct rte_mbuf *m[8], uint16_t portid)
 {
        int32_t ret[8];
-       uint8_t dst_port[8];
-       struct ether_hdr *eth_hdr[8];
+       uint16_t dst_port[8];
+       struct rte_ether_hdr *eth_hdr[8];
        union ipv6_5tuple_host key[8];
 
-       __attribute__((unused)) struct ipv6_hdr *ipv6_hdr[8];
+       __attribute__((unused)) struct rte_ipv6_hdr *ipv6_hdr[8];
 
-       eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *);
-       eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *);
-       eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *);
-       eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *);
-       eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *);
-       eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *);
-       eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *);
-       eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *);
+       eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct rte_ether_hdr *);
+       eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct rte_ether_hdr *);
+       eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct rte_ether_hdr *);
+       eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct rte_ether_hdr *);
+       eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct rte_ether_hdr *);
+       eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct rte_ether_hdr *);
+       eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct rte_ether_hdr *);
+       eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct rte_ether_hdr *);
 
        /* Handle IPv6 headers.*/
-       ipv6_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv6_hdr *,
-                       sizeof(struct ether_hdr));
-       ipv6_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct ipv6_hdr *,
-                       sizeof(struct ether_hdr));
-       ipv6_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct ipv6_hdr *,
-                       sizeof(struct ether_hdr));
-       ipv6_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv6_hdr *,
-                       sizeof(struct ether_hdr));
-       ipv6_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv6_hdr *,
-                       sizeof(struct ether_hdr));
-       ipv6_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv6_hdr *,
-                       sizeof(struct ether_hdr));
-       ipv6_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv6_hdr *,
-                       sizeof(struct ether_hdr));
-       ipv6_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv6_hdr *,
-                       sizeof(struct ether_hdr));
+       ipv6_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct rte_ipv6_hdr *,
+                       sizeof(struct rte_ether_hdr));
+       ipv6_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct rte_ipv6_hdr *,
+                       sizeof(struct rte_ether_hdr));
+       ipv6_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct rte_ipv6_hdr *,
+                       sizeof(struct rte_ether_hdr));
+       ipv6_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct rte_ipv6_hdr *,
+                       sizeof(struct rte_ether_hdr));
+       ipv6_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct rte_ipv6_hdr *,
+                       sizeof(struct rte_ether_hdr));
+       ipv6_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct rte_ipv6_hdr *,
+                       sizeof(struct rte_ether_hdr));
+       ipv6_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct rte_ipv6_hdr *,
+                       sizeof(struct rte_ether_hdr));
+       ipv6_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct rte_ipv6_hdr *,
+                       sizeof(struct rte_ether_hdr));
 
        get_ipv6_5tuple(m[0], mask1, mask2, &key[0]);
        get_ipv6_5tuple(m[1], mask1, mask2, &key[1]);
@@ -1149,16 +1160,16 @@ simple_ipv6_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid)
        const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3],
                        &key[4], &key[5], &key[6], &key[7]};
 
-       rte_hash_lookup_multi(RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct,
+       rte_hash_lookup_bulk(RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct,
                        &key_array[0], 4, ret);
-       dst_port[0] = (uint8_t) ((ret[0] < 0) ? portid : ipv6_l3fwd_out_if[ret[0]]);
-       dst_port[1] = (uint8_t) ((ret[1] < 0) ? portid : ipv6_l3fwd_out_if[ret[1]]);
-       dst_port[2] = (uint8_t) ((ret[2] < 0) ? portid : ipv6_l3fwd_out_if[ret[2]]);
-       dst_port[3] = (uint8_t) ((ret[3] < 0) ? portid : ipv6_l3fwd_out_if[ret[3]]);
-       dst_port[4] = (uint8_t) ((ret[4] < 0) ? portid : ipv6_l3fwd_out_if[ret[4]]);
-       dst_port[5] = (uint8_t) ((ret[5] < 0) ? portid : ipv6_l3fwd_out_if[ret[5]]);
-       dst_port[6] = (uint8_t) ((ret[6] < 0) ? portid : ipv6_l3fwd_out_if[ret[6]]);
-       dst_port[7] = (uint8_t) ((ret[7] < 0) ? portid : ipv6_l3fwd_out_if[ret[7]]);
+       dst_port[0] = ((ret[0] < 0) ? portid : ipv6_l3fwd_out_if[ret[0]]);
+       dst_port[1] = ((ret[1] < 0) ? portid : ipv6_l3fwd_out_if[ret[1]]);
+       dst_port[2] = ((ret[2] < 0) ? portid : ipv6_l3fwd_out_if[ret[2]]);
+       dst_port[3] = ((ret[3] < 0) ? portid : ipv6_l3fwd_out_if[ret[3]]);
+       dst_port[4] = ((ret[4] < 0) ? portid : ipv6_l3fwd_out_if[ret[4]]);
+       dst_port[5] = ((ret[5] < 0) ? portid : ipv6_l3fwd_out_if[ret[5]]);
+       dst_port[6] = ((ret[6] < 0) ? portid : ipv6_l3fwd_out_if[ret[6]]);
+       dst_port[7] = ((ret[7] < 0) ? portid : ipv6_l3fwd_out_if[ret[7]]);
 
        if (dst_port[0] >= RTE_MAX_ETHPORTS ||
                        (enabled_port_mask & 1 << dst_port[0]) == 0)
@@ -1196,40 +1207,40 @@ simple_ipv6_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid)
        *(uint64_t *)&eth_hdr[7]->d_addr = dest_eth_addr[dst_port[7]];
 
        /* src addr */
-       ether_addr_copy(&ports_eth_addr[dst_port[0]], &eth_hdr[0]->s_addr);
-       ether_addr_copy(&ports_eth_addr[dst_port[1]], &eth_hdr[1]->s_addr);
-       ether_addr_copy(&ports_eth_addr[dst_port[2]], &eth_hdr[2]->s_addr);
-       ether_addr_copy(&ports_eth_addr[dst_port[3]], &eth_hdr[3]->s_addr);
-       ether_addr_copy(&ports_eth_addr[dst_port[4]], &eth_hdr[4]->s_addr);
-       ether_addr_copy(&ports_eth_addr[dst_port[5]], &eth_hdr[5]->s_addr);
-       ether_addr_copy(&ports_eth_addr[dst_port[6]], &eth_hdr[6]->s_addr);
-       ether_addr_copy(&ports_eth_addr[dst_port[7]], &eth_hdr[7]->s_addr);
-
-       send_single_packet(m[0], (uint8_t)dst_port[0]);
-       send_single_packet(m[1], (uint8_t)dst_port[1]);
-       send_single_packet(m[2], (uint8_t)dst_port[2]);
-       send_single_packet(m[3], (uint8_t)dst_port[3]);
-       send_single_packet(m[4], (uint8_t)dst_port[4]);
-       send_single_packet(m[5], (uint8_t)dst_port[5]);
-       send_single_packet(m[6], (uint8_t)dst_port[6]);
-       send_single_packet(m[7], (uint8_t)dst_port[7]);
+       rte_ether_addr_copy(&ports_eth_addr[dst_port[0]], &eth_hdr[0]->s_addr);
+       rte_ether_addr_copy(&ports_eth_addr[dst_port[1]], &eth_hdr[1]->s_addr);
+       rte_ether_addr_copy(&ports_eth_addr[dst_port[2]], &eth_hdr[2]->s_addr);
+       rte_ether_addr_copy(&ports_eth_addr[dst_port[3]], &eth_hdr[3]->s_addr);
+       rte_ether_addr_copy(&ports_eth_addr[dst_port[4]], &eth_hdr[4]->s_addr);
+       rte_ether_addr_copy(&ports_eth_addr[dst_port[5]], &eth_hdr[5]->s_addr);
+       rte_ether_addr_copy(&ports_eth_addr[dst_port[6]], &eth_hdr[6]->s_addr);
+       rte_ether_addr_copy(&ports_eth_addr[dst_port[7]], &eth_hdr[7]->s_addr);
+
+       send_single_packet(m[0], dst_port[0]);
+       send_single_packet(m[1], dst_port[1]);
+       send_single_packet(m[2], dst_port[2]);
+       send_single_packet(m[3], dst_port[3]);
+       send_single_packet(m[4], dst_port[4]);
+       send_single_packet(m[5], dst_port[5]);
+       send_single_packet(m[6], dst_port[6]);
+       send_single_packet(m[7], dst_port[7]);
 
 }
 #endif /* APP_LOOKUP_METHOD */
 
-static inline __attribute__((always_inline)) void
-l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid)
+static __rte_always_inline void
+l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid)
 {
-       struct ether_hdr *eth_hdr;
-       struct ipv4_hdr *ipv4_hdr;
-       uint8_t dst_port;
+       struct rte_ether_hdr *eth_hdr;
+       struct rte_ipv4_hdr *ipv4_hdr;
+       uint16_t dst_port;
 
-       eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+       eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
 
        if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
                /* Handle IPv4 headers.*/
-               ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
-                               sizeof(struct ether_hdr));
+               ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
+                               sizeof(struct rte_ether_hdr));
 
 #ifdef DO_RFC_1812_CHECKS
                /* Check to make sure the packet is valid (RFC1812) */
@@ -1254,15 +1265,16 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid)
                *(uint64_t *)&eth_hdr->d_addr = dest_eth_addr[dst_port];
 
                /* src addr */
-               ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
+               rte_ether_addr_copy(&ports_eth_addr[dst_port],
+                               &eth_hdr->s_addr);
 
                send_single_packet(m, dst_port);
        } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
                /* Handle IPv6 headers.*/
-               struct ipv6_hdr *ipv6_hdr;
+               struct rte_ipv6_hdr *ipv6_hdr;
 
-               ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
-                               sizeof(struct ether_hdr));
+               ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
+                               sizeof(struct rte_ether_hdr));
 
                dst_port = get_ipv6_dst_port(ipv6_hdr, portid,
                                RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct);
@@ -1275,7 +1287,8 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid)
                *(uint64_t *)&eth_hdr->d_addr = dest_eth_addr[dst_port];
 
                /* src addr */
-               ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
+               rte_ether_addr_copy(&ports_eth_addr[dst_port],
+                               &eth_hdr->s_addr);
 
                send_single_packet(m, dst_port);
        } else
@@ -1292,7 +1305,7 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid)
 #define        IPV4_MAX_VER_IHL_DIFF   (IPV4_MAX_VER_IHL - IPV4_MIN_VER_IHL)
 
 /* Minimum value of IPV4 total length (20B) in network byte order. */
-#define        IPV4_MIN_LEN_BE (sizeof(struct ipv4_hdr) << 8)
+#define        IPV4_MIN_LEN_BE (sizeof(struct rte_ipv4_hdr) << 8)
 
 /*
  * From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2:
@@ -1305,8 +1318,8 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid)
  * If we encounter invalid IPV4 packet, then set destination port for it
  * to BAD_PORT value.
  */
-static inline __attribute__((always_inline)) void
-rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype)
+static __rte_always_inline void
+rfc1812_process(struct rte_ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype)
 {
        uint8_t ihl;
 
@@ -1325,7 +1338,7 @@ rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype)
 }
 
 #else
-#define        rfc1812_process(mb, dp) do { } while (0)
+#define        rfc1812_process(mb, dp, ptype)  do { } while (0)
 #endif /* DO_RFC_1812_CHECKS */
 #endif /* APP_LOOKUP_LPM && ENABLE_MULTI_BUFFER_OPTIMIZE */
 
@@ -1333,41 +1346,44 @@ rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype)
 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
        (ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
 
-static inline __attribute__((always_inline)) uint16_t
-get_dst_port(struct rte_mbuf *pkt, uint32_t dst_ipv4, uint8_t portid)
+static __rte_always_inline uint16_t
+get_dst_port(struct rte_mbuf *pkt, uint32_t dst_ipv4, uint16_t portid)
 {
-       uint8_t next_hop;
-       struct ipv6_hdr *ipv6_hdr;
-       struct ether_hdr *eth_hdr;
+       uint32_t next_hop;
+       struct rte_ipv6_hdr *ipv6_hdr;
+       struct rte_ether_hdr *eth_hdr;
 
        if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) {
-               if (rte_lpm_lookup(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct,
-                               dst_ipv4, &next_hop) != 0)
-                       next_hop = portid;
+               return (uint16_t) ((rte_lpm_lookup(
+                               RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, dst_ipv4,
+                               &next_hop) == 0) ? next_hop : portid);
+
        } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) {
-               eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
-               ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1);
-               if (rte_lpm6_lookup(RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct,
-                               ipv6_hdr->dst_addr, &next_hop) != 0)
-                       next_hop = portid;
-       } else {
-               next_hop = portid;
+
+               eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
+               ipv6_hdr = (struct rte_ipv6_hdr *)(eth_hdr + 1);
+
+               return (uint16_t) ((rte_lpm6_lookup(
+                               RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct,
+                               ipv6_hdr->dst_addr, &next_hop) == 0) ?
+                               next_hop : portid);
+
        }
 
-       return next_hop;
+       return portid;
 }
 
 static inline void
-process_packet(struct rte_mbuf *pkt, uint16_t *dst_port, uint8_t portid)
+process_packet(struct rte_mbuf *pkt, uint16_t *dst_port, uint16_t portid)
 {
-       struct ether_hdr *eth_hdr;
-       struct ipv4_hdr *ipv4_hdr;
+       struct rte_ether_hdr *eth_hdr;
+       struct rte_ipv4_hdr *ipv4_hdr;
        uint32_t dst_ipv4;
        uint16_t dp;
        __m128i te, ve;
 
-       eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
-       ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+       eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
+       ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
 
        dst_ipv4 = ipv4_hdr->dst_addr;
        dst_ipv4 = rte_be_to_cpu_32(dst_ipv4);
@@ -1391,27 +1407,27 @@ processx4_step1(struct rte_mbuf *pkt[FWDSTEP],
                __m128i *dip,
                uint32_t *ipv4_flag)
 {
-       struct ipv4_hdr *ipv4_hdr;
-       struct ether_hdr *eth_hdr;
+       struct rte_ipv4_hdr *ipv4_hdr;
+       struct rte_ether_hdr *eth_hdr;
        uint32_t x0, x1, x2, x3;
 
-       eth_hdr = rte_pktmbuf_mtod(pkt[0], struct ether_hdr *);
-       ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+       eth_hdr = rte_pktmbuf_mtod(pkt[0], struct rte_ether_hdr *);
+       ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
        x0 = ipv4_hdr->dst_addr;
        ipv4_flag[0] = pkt[0]->packet_type & RTE_PTYPE_L3_IPV4;
 
-       eth_hdr = rte_pktmbuf_mtod(pkt[1], struct ether_hdr *);
-       ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+       eth_hdr = rte_pktmbuf_mtod(pkt[1], struct rte_ether_hdr *);
+       ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
        x1 = ipv4_hdr->dst_addr;
        ipv4_flag[0] &= pkt[1]->packet_type;
 
-       eth_hdr = rte_pktmbuf_mtod(pkt[2], struct ether_hdr *);
-       ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+       eth_hdr = rte_pktmbuf_mtod(pkt[2], struct rte_ether_hdr *);
+       ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
        x2 = ipv4_hdr->dst_addr;
        ipv4_flag[0] &= pkt[2]->packet_type;
 
-       eth_hdr = rte_pktmbuf_mtod(pkt[3], struct ether_hdr *);
-       ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+       eth_hdr = rte_pktmbuf_mtod(pkt[3], struct rte_ether_hdr *);
+       ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
        x3 = ipv4_hdr->dst_addr;
        ipv4_flag[0] &= pkt[3]->packet_type;
 
@@ -1425,7 +1441,7 @@ processx4_step1(struct rte_mbuf *pkt[FWDSTEP],
 static inline void
 processx4_step2(__m128i dip,
                uint32_t ipv4_flag,
-               uint8_t portid,
+               uint16_t portid,
                struct rte_mbuf *pkt[FWDSTEP],
                uint16_t dprt[FWDSTEP])
 {
@@ -1439,7 +1455,11 @@ processx4_step2(__m128i dip,
        /* if all 4 packets are IPV4. */
        if (likely(ipv4_flag)) {
                rte_lpm_lookupx4(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, dip,
-                               dprt, portid);
+                               dst.u32, portid);
+
+               /* get rid of unused upper 16 bit for each dport. */
+               dst.x = _mm_packs_epi32(dst.x, dst.x);
+               *(uint64_t *)dprt = dst.u64[0];
        } else {
                dst.x = dip;
                dprt[0] = get_dst_port(pkt[0], dst.u32[0], portid);
@@ -1488,13 +1508,17 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
        _mm_store_si128(p[2], te[2]);
        _mm_store_si128(p[3], te[3]);
 
-       rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1),
+       rfc1812_process((struct rte_ipv4_hdr *)
+                       ((struct rte_ether_hdr *)p[0] + 1),
                        &dst_port[0], pkt[0]->packet_type);
-       rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1),
+       rfc1812_process((struct rte_ipv4_hdr *)
+                       ((struct rte_ether_hdr *)p[1] + 1),
                        &dst_port[1], pkt[1]->packet_type);
-       rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1),
+       rfc1812_process((struct rte_ipv4_hdr *)
+                       ((struct rte_ether_hdr *)p[2] + 1),
                        &dst_port[2], pkt[2]->packet_type);
-       rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1),
+       rfc1812_process((struct rte_ipv4_hdr *)
+                       ((struct rte_ether_hdr *)p[3] + 1),
                        &dst_port[3], pkt[3]->packet_type);
 }
 
@@ -1527,7 +1551,7 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
  * Suppose we have array of destionation ports:
  * dst_port[] = {a, b, c, d,, e, ... }
  * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
- * We doing 4 comparisions at once and the result is 4 bit mask.
+ * We doing 4 comparisons at once and the result is 4 bit mask.
  * This mask is used as an index into prebuild array of pnum values.
  */
 static inline uint16_t *
@@ -1652,9 +1676,9 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2)
 
        /* if dest port value has changed. */
        if (v != GRPMSK) {
-               lp = pnum->u16 + gptbl[v].idx;
-               lp[0] = 1;
                pnum->u64 = gptbl[v].pnum;
+               pnum->u16[FWDSTEP] = 1;
+               lp = pnum->u16 + gptbl[v].idx;
        }
 
        return lp;
@@ -1664,7 +1688,8 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2)
 
 static void
 process_burst(struct rte_mbuf *pkts_burst[MAX_PKT_BURST], int nb_rx,
-               uint8_t portid) {
+               uint16_t portid)
+{
 
        int j;
 
@@ -1789,10 +1814,12 @@ process_burst(struct rte_mbuf *pkts_burst[MAX_PKT_BURST], int nb_rx,
                process_packet(pkts_burst[j], dst_port + j, portid);
                GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
                j++;
+               /* fall-through */
        case 2:
                process_packet(pkts_burst[j], dst_port + j, portid);
                GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
                j++;
+               /* fall-through */
        case 1:
                process_packet(pkts_burst[j], dst_port + j, portid);
                GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
@@ -1833,12 +1860,12 @@ process_burst(struct rte_mbuf *pkts_burst[MAX_PKT_BURST], int nb_rx,
        for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
                rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
                                j + PREFETCH_OFFSET], void *));
-               l3fwd_simple_forward(pkts_burst[j], portid, qconf);
+               l3fwd_simple_forward(pkts_burst[j], portid);
        }
 
        /* Forward remaining prefetched packets */
        for (; j < nb_rx; j++)
-               l3fwd_simple_forward(pkts_burst[j], portid, qconf);
+               l3fwd_simple_forward(pkts_burst[j], portid);
 
 #endif /* ENABLE_MULTI_BUFFER_OPTIMIZE */
 
@@ -1968,21 +1995,22 @@ cpu_load_collector(__rte_unused void *arg) {
  *
  * This loop is used to start empty scheduler on lcore.
  */
-static void
+static void *
 lthread_null(__rte_unused void *args)
 {
        int lcore_id = rte_lcore_id();
 
        RTE_LOG(INFO, L3FWD, "Starting scheduler on lcore %d.\n", lcore_id);
        lthread_exit(NULL);
+       return NULL;
 }
 
 /* main processing loop */
-static void
+static void *
 lthread_tx_per_ring(void *dummy)
 {
        int nb_rx;
-       uint8_t portid;
+       uint16_t portid;
        struct rte_ring *ring;
        struct thread_tx_conf *tx_conf;
        struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
@@ -2010,7 +2038,7 @@ lthread_tx_per_ring(void *dummy)
                 */
                SET_CPU_BUSY(tx_conf, CPU_POLL);
                nb_rx = rte_ring_sc_dequeue_burst(ring, (void **)pkts_burst,
-                               MAX_PKT_BURST);
+                               MAX_PKT_BURST, NULL);
                SET_CPU_IDLE(tx_conf, CPU_POLL);
 
                if (nb_rx > 0) {
@@ -2023,6 +2051,7 @@ lthread_tx_per_ring(void *dummy)
                        lthread_cond_wait(ready, 0);
 
        }
+       return NULL;
 }
 
 /*
@@ -2031,13 +2060,13 @@ lthread_tx_per_ring(void *dummy)
  * This lthread is used to spawn one new lthread per ring from producers.
  *
  */
-static void
+static void *
 lthread_tx(void *args)
 {
        struct lthread *lt;
 
        unsigned lcore_id;
-       uint8_t portid;
+       uint16_t portid;
        struct thread_tx_conf *tx_conf;
 
        tx_conf = (struct thread_tx_conf *)args;
@@ -2076,15 +2105,17 @@ lthread_tx(void *args)
                }
 
        }
+       return NULL;
 }
 
-static void
+static void *
 lthread_rx(void *dummy)
 {
        int ret;
        uint16_t nb_rx;
        int i;
-       uint8_t portid, queueid;
+       uint16_t portid;
+       uint8_t queueid;
        int worker_id;
        int len[RTE_MAX_LCORE] = { 0 };
        int old_len, new_len;
@@ -2101,7 +2132,7 @@ lthread_rx(void *dummy)
 
        if (rx_conf->n_rx_queue == 0) {
                RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", rte_lcore_id());
-               return;
+               return NULL;
        }
 
        RTE_LOG(INFO, L3FWD, "Entering main Rx loop on lcore %u\n", rte_lcore_id());
@@ -2110,7 +2141,8 @@ lthread_rx(void *dummy)
 
                portid = rx_conf->rx_queue_list[i].port_id;
                queueid = rx_conf->rx_queue_list[i].queue_id;
-               RTE_LOG(INFO, L3FWD, " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n",
+               RTE_LOG(INFO, L3FWD,
+                       " -- lcoreid=%u portid=%u rxqueueid=%hhu\n",
                                rte_lcore_id(), portid, queueid);
        }
 
@@ -2146,7 +2178,7 @@ lthread_rx(void *dummy)
                                ret = rte_ring_sp_enqueue_burst(
                                                rx_conf->ring[worker_id],
                                                (void **) pkts_burst,
-                                               nb_rx);
+                                               nb_rx, NULL);
 
                                new_len = old_len + ret;
 
@@ -2172,6 +2204,7 @@ lthread_rx(void *dummy)
                        lthread_yield();
                }
        }
+       return NULL;
 }
 
 /*
@@ -2180,8 +2213,9 @@ lthread_rx(void *dummy)
  * This lthread loop spawns all rx and tx lthreads on master lcore
  */
 
-static void
-lthread_spawner(__rte_unused void *arg) {
+static void *
+lthread_spawner(__rte_unused void *arg)
+{
        struct lthread *lt[MAX_THREAD];
        int i;
        int n_thread = 0;
@@ -2222,6 +2256,7 @@ lthread_spawner(__rte_unused void *arg) {
        for (i = 0; i < n_thread; i++)
                lthread_join(lt[i], NULL);
 
+       return NULL;
 }
 
 /*
@@ -2269,7 +2304,7 @@ pthread_tx(void *dummy)
        struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
        uint64_t prev_tsc, diff_tsc, cur_tsc;
        int nb_rx;
-       uint8_t portid;
+       uint16_t portid;
        struct thread_tx_conf *tx_conf;
 
        const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
@@ -2314,7 +2349,7 @@ pthread_tx(void *dummy)
                 */
                SET_CPU_BUSY(tx_conf, CPU_POLL);
                nb_rx = rte_ring_sc_dequeue_burst(tx_conf->ring,
-                               (void **)pkts_burst, MAX_PKT_BURST);
+                               (void **)pkts_burst, MAX_PKT_BURST, NULL);
                SET_CPU_IDLE(tx_conf, CPU_POLL);
 
                if (unlikely(nb_rx == 0)) {
@@ -2338,7 +2373,8 @@ pthread_rx(void *dummy)
        uint32_t n;
        uint32_t nb_rx;
        unsigned lcore_id;
-       uint8_t portid, queueid;
+       uint8_t queueid;
+       uint16_t portid;
        struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
 
        struct thread_rx_conf *rx_conf;
@@ -2357,7 +2393,8 @@ pthread_rx(void *dummy)
 
                portid = rx_conf->rx_queue_list[i].port_id;
                queueid = rx_conf->rx_queue_list[i].queue_id;
-               RTE_LOG(INFO, L3FWD, " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n",
+               RTE_LOG(INFO, L3FWD,
+                       " -- lcoreid=%u portid=%u rxqueueid=%hhu\n",
                                lcore_id, portid, queueid);
        }
 
@@ -2386,7 +2423,7 @@ pthread_rx(void *dummy)
                        SET_CPU_BUSY(rx_conf, CPU_PROCESS);
                        worker_id = (worker_id + 1) % rx_conf->n_ring;
                        n = rte_ring_sp_enqueue_burst(rx_conf->ring[worker_id],
-                                       (void **)pkts_burst, nb_rx);
+                                       (void **)pkts_burst, nb_rx, NULL);
 
                        if (unlikely(n != nb_rx)) {
                                uint32_t k;
@@ -2465,7 +2502,7 @@ check_lcore_params(void)
 }
 
 static int
-check_port_config(const unsigned nb_ports)
+check_port_config(void)
 {
        unsigned portid;
        uint16_t i;
@@ -2476,7 +2513,7 @@ check_port_config(const unsigned nb_ports)
                        printf("port %u is not enabled in port mask\n", portid);
                        return -1;
                }
-               if (portid >= nb_ports) {
+               if (!rte_eth_dev_is_valid_port(portid)) {
                        printf("port %u is not present on the board\n", portid);
                        return -1;
                }
@@ -2485,7 +2522,7 @@ check_port_config(const unsigned nb_ports)
 }
 
 static uint8_t
-get_port_n_rx_queues(const uint8_t port)
+get_port_n_rx_queues(const uint16_t port)
 {
        int queue = -1;
        uint16_t i;
@@ -2601,6 +2638,7 @@ print_usage(const char *prgname)
                "  [--rx (port,queue,lcore,thread)[,(port,queue,lcore,thread]]"
                "  [--tx (lcore,thread)[,(lcore,thread]]"
                "  [--enable-jumbo [--max-pkt-len PKTLEN]]\n"
+               "  [--parse-ptype]\n\n"
                "  -p PORTMASK: hexadecimal bitmask of ports to configure\n"
                "  -P : enable promiscuous mode\n"
                "  --rx (port,queue,lcore,thread): rx queues configuration\n"
@@ -2612,7 +2650,8 @@ print_usage(const char *prgname)
                "  --enable-jumbo: enable jumbo frame"
                " which max packet len is PKTLEN in decimal (64-9600)\n"
                "  --hash-entry-num: specify the hash entry number in hexadecimal to be setup\n"
-               "  --no-lthreads: turn off lthread model\n",
+               "  --no-lthreads: turn off lthread model\n"
+               "  --parse-ptype: set to use software to analyze packet type\n\n",
                prgname);
 }
 
@@ -2713,7 +2752,7 @@ parse_rx_config(const char *q_arg)
                        return -1;
                }
                rx_thread_params_array[nb_rx_thread_params].port_id =
-                               (uint8_t)int_fld[FLD_PORT];
+                               int_fld[FLD_PORT];
                rx_thread_params_array[nb_rx_thread_params].queue_id =
                                (uint8_t)int_fld[FLD_QUEUE];
                rx_thread_params_array[nb_rx_thread_params].lcore_id =
@@ -2797,7 +2836,7 @@ parse_stat_lcore(const char *stat_lcore)
 static void
 parse_eth_dest(const char *optarg)
 {
-       uint8_t portid;
+       uint16_t portid;
        char *port_end;
        uint8_t c, *dest, peer_addr[6];
 
@@ -2831,6 +2870,7 @@ parse_eth_dest(const char *optarg)
 #define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo"
 #define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num"
 #define CMD_LINE_OPT_NO_LTHREADS "no-lthreads"
+#define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
 
 /* Parse the argument given in the command line of the application */
 static int
@@ -2850,6 +2890,7 @@ parse_args(int argc, char **argv)
                {CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, 0},
                {CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0, 0},
                {CMD_LINE_OPT_NO_LTHREADS, 0, 0, 0},
+               {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
                {NULL, 0, 0, 0}
        };
 
@@ -2926,15 +2967,26 @@ parse_args(int argc, char **argv)
                                lthreads_on = 0;
                        }
 
+                       if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_PARSE_PTYPE,
+                                       sizeof(CMD_LINE_OPT_PARSE_PTYPE))) {
+                               printf("software packet type parsing enabled\n");
+                               parse_ptype_on = 1;
+                       }
+
                        if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_ENABLE_JUMBO,
                                sizeof(CMD_LINE_OPT_ENABLE_JUMBO))) {
                                struct option lenopts = {"max-pkt-len", required_argument, 0,
                                                0};
 
                                printf("jumbo frame is enabled - disabling simple TX path\n");
-                               port_conf.rxmode.jumbo_frame = 1;
-
-                               /* if no max-pkt-len set, use the default value ETHER_MAX_LEN */
+                               port_conf.rxmode.offloads |=
+                                               DEV_RX_OFFLOAD_JUMBO_FRAME;
+                               port_conf.txmode.offloads |=
+                                               DEV_TX_OFFLOAD_MULTI_SEGS;
+
+                               /* if no max-pkt-len set, use the default value
+                                * RTE_ETHER_MAX_LEN
+                                */
                                if (0 == getopt_long(argc, argvopt, "", &lenopts,
                                                &option_index)) {
 
@@ -2974,16 +3026,16 @@ parse_args(int argc, char **argv)
                argv[optind-1] = prgname;
 
        ret = optind-1;
-       optind = 0; /* reset getopt lib */
+       optind = 1; /* reset getopt lib */
        return ret;
 }
 
 static void
-print_ethaddr(const char *name, const struct ether_addr *eth_addr)
+print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr)
 {
-       char buf[ETHER_ADDR_FMT_SIZE];
+       char buf[RTE_ETHER_ADDR_FMT_SIZE];
 
-       ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
+       rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr);
        printf("%s%s", name, buf);
 }
 
@@ -3093,19 +3145,19 @@ populate_ipv4_many_flow_into_table(const struct rte_hash *h,
                switch (i & (NUMBER_PORT_USED - 1)) {
                case 0:
                        entry = ipv4_l3fwd_route_array[0];
-                       entry.key.ip_dst = IPv4(101, c, b, a);
+                       entry.key.ip_dst = RTE_IPv4(101, c, b, a);
                        break;
                case 1:
                        entry = ipv4_l3fwd_route_array[1];
-                       entry.key.ip_dst = IPv4(201, c, b, a);
+                       entry.key.ip_dst = RTE_IPv4(201, c, b, a);
                        break;
                case 2:
                        entry = ipv4_l3fwd_route_array[2];
-                       entry.key.ip_dst = IPv4(111, c, b, a);
+                       entry.key.ip_dst = RTE_IPv4(111, c, b, a);
                        break;
                case 3:
                        entry = ipv4_l3fwd_route_array[3];
-                       entry.key.ip_dst = IPv4(211, c, b, a);
+                       entry.key.ip_dst = RTE_IPv4(211, c, b, a);
                        break;
                };
                convert_ipv4_5tuple(&entry.key, &newkey);
@@ -3244,14 +3296,18 @@ static void
 setup_lpm(int socketid)
 {
        struct rte_lpm6_config config;
+       struct rte_lpm_config lpm_ipv4_config;
        unsigned i;
        int ret;
        char s[64];
 
        /* create the LPM table */
        snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid);
-       ipv4_l3fwd_lookup_struct[socketid] = rte_lpm_create(s, socketid,
-                               IPV4_L3FWD_LPM_MAX_RULES, 0);
+       lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES;
+       lpm_ipv4_config.number_tbl8s = 256;
+       lpm_ipv4_config.flags = 0;
+       ipv4_l3fwd_lookup_struct[socketid] =
+                       rte_lpm_create(s, socketid, &lpm_ipv4_config);
        if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
                rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
                                " on socket %d\n", socketid);
@@ -3368,18 +3424,19 @@ init_mem(unsigned nb_mbuf)
 
 /* Check the link status of all ports in up to 9s, and print them finally */
 static void
-check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+check_all_ports_link_status(uint32_t port_mask)
 {
 #define CHECK_INTERVAL 100 /* 100ms */
 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
-       uint8_t portid, count, all_ports_up, print_flag = 0;
+       uint16_t portid;
+       uint8_t count, all_ports_up, print_flag = 0;
        struct rte_eth_link link;
 
        printf("\nChecking link status");
        fflush(stdout);
        for (count = 0; count <= MAX_CHECK_TIME; count++) {
                all_ports_up = 1;
-               for (portid = 0; portid < port_num; portid++) {
+               RTE_ETH_FOREACH_DEV(portid) {
                        if ((port_mask & (1 << portid)) == 0)
                                continue;
                        memset(&link, 0, sizeof(link));
@@ -3387,18 +3444,17 @@ check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
                        /* print link status if flag set */
                        if (print_flag == 1) {
                                if (link.link_status)
-                                       printf("Port %d Link Up - speed %u "
-                                               "Mbps - %s\n", (uint8_t)portid,
-                                               (unsigned)link.link_speed,
+                                       printf(
+                                       "Port%d Link Up. Speed %u Mbps - %s\n",
+                                               portid, link.link_speed,
                                (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
                                        ("full-duplex") : ("half-duplex\n"));
                                else
-                                       printf("Port %d Link Down\n",
-                                               (uint8_t)portid);
+                                       printf("Port %d Link Down\n", portid);
                                continue;
                        }
                        /* clear all_ports_up flag if any link down */
-                       if (link.link_status == 0) {
+                       if (link.link_status == ETH_LINK_DOWN) {
                                all_ports_up = 0;
                                break;
                        }
@@ -3429,10 +3485,10 @@ main(int argc, char **argv)
        int ret;
        int i;
        unsigned nb_ports;
-       uint16_t queueid;
+       uint16_t queueid, portid;
        unsigned lcore_id;
        uint32_t n_tx_queue, nb_lcores;
-       uint8_t portid, nb_rx_queue, queue, socketid;
+       uint8_t nb_rx_queue, queue, socketid;
 
        /* init EAL */
        ret = rte_eal_init(argc, argv);
@@ -3443,7 +3499,7 @@ main(int argc, char **argv)
 
        /* pre-init dst MACs for all ports to 02:00:00:00:00:xx */
        for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
-               dest_eth_addr[portid] = ETHER_LOCAL_ADMIN_ADDR +
+               dest_eth_addr[portid] = RTE_ETHER_LOCAL_ADMIN_ADDR +
                                ((uint64_t)portid << 40);
                *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid];
        }
@@ -3471,17 +3527,17 @@ main(int argc, char **argv)
        if (ret < 0)
                rte_exit(EXIT_FAILURE, "init_rx_rings failed\n");
 
-       nb_ports = rte_eth_dev_count();
-       if (nb_ports > RTE_MAX_ETHPORTS)
-               nb_ports = RTE_MAX_ETHPORTS;
+       nb_ports = rte_eth_dev_count_avail();
 
-       if (check_port_config(nb_ports) < 0)
+       if (check_port_config() < 0)
                rte_exit(EXIT_FAILURE, "check_port_config failed\n");
 
        nb_lcores = rte_lcore_count();
 
        /* initialize all ports */
-       for (portid = 0; portid < nb_ports; portid++) {
+       RTE_ETH_FOREACH_DEV(portid) {
+               struct rte_eth_conf local_port_conf = port_conf;
+
                /* skip ports that are not enabled */
                if ((enabled_port_mask & (1 << portid)) == 0) {
                        printf("\nSkipping disabled port %d\n", portid);
@@ -3498,24 +3554,47 @@ main(int argc, char **argv)
                        n_tx_queue = MAX_TX_QUEUE_PER_PORT;
                printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
                        nb_rx_queue, (unsigned)n_tx_queue);
+               rte_eth_dev_info_get(portid, &dev_info);
+               if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
+                       local_port_conf.txmode.offloads |=
+                               DEV_TX_OFFLOAD_MBUF_FAST_FREE;
+
+               local_port_conf.rx_adv_conf.rss_conf.rss_hf &=
+                       dev_info.flow_type_rss_offloads;
+               if (local_port_conf.rx_adv_conf.rss_conf.rss_hf !=
+                               port_conf.rx_adv_conf.rss_conf.rss_hf) {
+                       printf("Port %u modified RSS hash function based on hardware support,"
+                               "requested:%#"PRIx64" configured:%#"PRIx64"\n",
+                               portid,
+                               port_conf.rx_adv_conf.rss_conf.rss_hf,
+                               local_port_conf.rx_adv_conf.rss_conf.rss_hf);
+               }
+
                ret = rte_eth_dev_configure(portid, nb_rx_queue,
-                                       (uint16_t)n_tx_queue, &port_conf);
+                                       (uint16_t)n_tx_queue, &local_port_conf);
                if (ret < 0)
                        rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n",
                                ret, portid);
 
+               ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd,
+                                                      &nb_txd);
+               if (ret < 0)
+                       rte_exit(EXIT_FAILURE,
+                                "rte_eth_dev_adjust_nb_rx_tx_desc: err=%d, port=%d\n",
+                                ret, portid);
+
                rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
                print_ethaddr(" Address:", &ports_eth_addr[portid]);
                printf(", ");
                print_ethaddr("Destination:",
-                       (const struct ether_addr *)&dest_eth_addr[portid]);
+                       (const struct rte_ether_addr *)&dest_eth_addr[portid]);
                printf(", ");
 
                /*
                 * prepare src MACs for each port.
                 */
-               ether_addr_copy(&ports_eth_addr[portid],
-                       (struct ether_addr *)(val_eth + portid) + 1);
+               rte_ether_addr_copy(&ports_eth_addr[portid],
+                       (struct rte_ether_addr *)(val_eth + portid) + 1);
 
                /* init memory */
                ret = init_mem(NB_MBUF);
@@ -3536,10 +3615,8 @@ main(int argc, char **argv)
                        printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
                        fflush(stdout);
 
-                       rte_eth_dev_info_get(portid, &dev_info);
                        txconf = &dev_info.default_txconf;
-                       if (port_conf.rxmode.jumbo_frame)
-                               txconf->txq_flags = 0;
+                       txconf->offloads = local_port_conf.txmode.offloads;
                        ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
                                                     socketid, txconf);
                        if (ret < 0)
@@ -3568,8 +3645,14 @@ main(int argc, char **argv)
 
                /* init RX queues */
                for (queue = 0; queue < rx_thread[i].n_rx_queue; ++queue) {
+                       struct rte_eth_dev *dev;
+                       struct rte_eth_conf *conf;
+                       struct rte_eth_rxconf rxq_conf;
+
                        portid = rx_thread[i].rx_queue_list[queue].port_id;
                        queueid = rx_thread[i].rx_queue_list[queue].queue_id;
+                       dev = &rte_eth_devices[portid];
+                       conf = &dev->data->dev_conf;
 
                        if (numa_on)
                                socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id);
@@ -3579,9 +3662,12 @@ main(int argc, char **argv)
                        printf("rxq=%d,%d,%d ", portid, queueid, socketid);
                        fflush(stdout);
 
+                       rte_eth_dev_info_get(portid, &dev_info);
+                       rxq_conf = dev_info.default_rxconf;
+                       rxq_conf.offloads = conf->rxmode.offloads;
                        ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
                                        socketid,
-                                       NULL,
+                                       &rxq_conf,
                                        pktmbuf_pool[socketid]);
                        if (ret < 0)
                                rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d, "
@@ -3592,7 +3678,7 @@ main(int argc, char **argv)
        printf("\n");
 
        /* start ports */
-       for (portid = 0; portid < nb_ports; portid++) {
+       RTE_ETH_FOREACH_DEV(portid) {
                if ((enabled_port_mask & (1 << portid)) == 0)
                        continue;
 
@@ -3612,7 +3698,32 @@ main(int argc, char **argv)
                        rte_eth_promiscuous_enable(portid);
        }
 
-       check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
+       for (i = 0; i < n_rx_thread; i++) {
+               lcore_id = rx_thread[i].conf.lcore_id;
+               if (rte_lcore_is_enabled(lcore_id) == 0)
+                       continue;
+
+               /* check if hw packet type is supported */
+               for (queue = 0; queue < rx_thread[i].n_rx_queue; ++queue) {
+                       portid = rx_thread[i].rx_queue_list[queue].port_id;
+                       queueid = rx_thread[i].rx_queue_list[queue].queue_id;
+
+                       if (parse_ptype_on) {
+                               if (!rte_eth_add_rx_callback(portid, queueid,
+                                               cb_parse_ptype, NULL))
+                                       rte_exit(EXIT_FAILURE,
+                                               "Failed to add rx callback: "
+                                               "port=%d\n", portid);
+                       } else if (!check_ptype(portid))
+                               rte_exit(EXIT_FAILURE,
+                                       "Port %d cannot parse packet type.\n\n"
+                                       "Please add --parse-ptype to use sw "
+                                       "packet type analyzer.\n\n",
+                                       portid);
+               }
+       }
+
+       check_all_ports_link_status(enabled_port_mask);
 
        if (lthreads_on) {
                printf("Starting L-Threading Model\n");