examples: take promiscuous mode switch result into account
[dpdk.git] / examples / ipsec-secgw / ipsec-secgw.c
index fd81b6c..dac0d18 100644 (file)
@@ -41,6 +41,8 @@
 #include <rte_jhash.h>
 #include <rte_cryptodev.h>
 #include <rte_security.h>
+#include <rte_ip.h>
+#include <rte_ip_frag.h>
 
 #include "ipsec.h"
 #include "parser.h"
@@ -109,6 +111,11 @@ static uint16_t nb_txd = IPSEC_SECGW_TX_DESC_DEFAULT;
                (addr)->addr_bytes[4], (addr)->addr_bytes[5], \
                0, 0)
 
+#define        FRAG_TBL_BUCKET_ENTRIES 4
+#define        FRAG_TTL_MS             (10 * MS_PER_S)
+
+#define MTU_TO_FRAMELEN(x)     ((x) + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN)
+
 /* port/source ethernet addr and destination ethernet addr */
 struct ethaddr_info {
        uint64_t src, dst;
@@ -126,6 +133,8 @@ struct ethaddr_info ethaddr_tbl[RTE_MAX_ETHPORTS] = {
 #define CMD_LINE_OPT_CRYPTODEV_MASK    "cryptodev_mask"
 #define CMD_LINE_OPT_RX_OFFLOAD                "rxoffload"
 #define CMD_LINE_OPT_TX_OFFLOAD                "txoffload"
+#define CMD_LINE_OPT_REASSEMBLE                "reassemble"
+#define CMD_LINE_OPT_MTU               "mtu"
 
 enum {
        /* long options mapped to a short option */
@@ -139,6 +148,8 @@ enum {
        CMD_LINE_OPT_CRYPTODEV_MASK_NUM,
        CMD_LINE_OPT_RX_OFFLOAD_NUM,
        CMD_LINE_OPT_TX_OFFLOAD_NUM,
+       CMD_LINE_OPT_REASSEMBLE_NUM,
+       CMD_LINE_OPT_MTU_NUM,
 };
 
 static const struct option lgopts[] = {
@@ -147,6 +158,8 @@ static const struct option lgopts[] = {
        {CMD_LINE_OPT_CRYPTODEV_MASK, 1, 0, CMD_LINE_OPT_CRYPTODEV_MASK_NUM},
        {CMD_LINE_OPT_RX_OFFLOAD, 1, 0, CMD_LINE_OPT_RX_OFFLOAD_NUM},
        {CMD_LINE_OPT_TX_OFFLOAD, 1, 0, CMD_LINE_OPT_TX_OFFLOAD_NUM},
+       {CMD_LINE_OPT_REASSEMBLE, 1, 0, CMD_LINE_OPT_REASSEMBLE_NUM},
+       {CMD_LINE_OPT_MTU, 1, 0, CMD_LINE_OPT_MTU_NUM},
        {NULL, 0, 0, 0}
 };
 
@@ -159,7 +172,6 @@ static int32_t numa_on = 1; /**< NUMA is enabled by default. */
 static uint32_t nb_lcores;
 static uint32_t single_sa;
 static uint32_t single_sa_idx;
-static uint32_t frame_size;
 
 /*
  * RX/TX HW offload capabilities to enable/use on ethernet ports.
@@ -168,6 +180,13 @@ static uint32_t frame_size;
 static uint64_t dev_rx_offload = UINT64_MAX;
 static uint64_t dev_tx_offload = UINT64_MAX;
 
+/*
+ * global values that determine multi-seg policy
+ */
+static uint32_t frag_tbl_sz;
+static uint32_t frame_buf_size = RTE_MBUF_DEFAULT_BUF_SIZE;
+static uint32_t mtu_size = RTE_ETHER_MTU;
+
 /* application wide librte_ipsec/SA parameters */
 struct app_sa_prm app_sa_prm = {.enable = 0};
 
@@ -204,6 +223,12 @@ struct lcore_conf {
        struct ipsec_ctx outbound;
        struct rt_ctx *rt4_ctx;
        struct rt_ctx *rt6_ctx;
+       struct {
+               struct rte_ip_frag_tbl *tbl;
+               struct rte_mempool *pool_dir;
+               struct rte_mempool *pool_indir;
+               struct rte_ip_frag_death_row dr;
+       } frag;
 } __rte_cache_aligned;
 
 static struct lcore_conf lcore_conf[RTE_MAX_LCORE];
@@ -229,40 +254,106 @@ static struct rte_eth_conf port_conf = {
 
 static struct socket_ctx socket_ctx[NB_SOCKETS];
 
+/*
+ * Determine is multi-segment support required:
+ *  - either frame buffer size is smaller then mtu
+ *  - or reassmeble support is requested
+ */
+static int
+multi_seg_required(void)
+{
+       return (MTU_TO_FRAMELEN(mtu_size) + RTE_PKTMBUF_HEADROOM >
+               frame_buf_size || frag_tbl_sz != 0);
+}
+
+static inline void
+adjust_ipv4_pktlen(struct rte_mbuf *m, const struct rte_ipv4_hdr *iph,
+       uint32_t l2_len)
+{
+       uint32_t plen, trim;
+
+       plen = rte_be_to_cpu_16(iph->total_length) + l2_len;
+       if (plen < m->pkt_len) {
+               trim = m->pkt_len - plen;
+               rte_pktmbuf_trim(m, trim);
+       }
+}
+
+static inline void
+adjust_ipv6_pktlen(struct rte_mbuf *m, const struct rte_ipv6_hdr *iph,
+       uint32_t l2_len)
+{
+       uint32_t plen, trim;
+
+       plen = rte_be_to_cpu_16(iph->payload_len) + sizeof(*iph) + l2_len;
+       if (plen < m->pkt_len) {
+               trim = m->pkt_len - plen;
+               rte_pktmbuf_trim(m, trim);
+       }
+}
+
 static inline void
 prepare_one_packet(struct rte_mbuf *pkt, struct ipsec_traffic *t)
 {
-       uint8_t *nlp;
-       struct rte_ether_hdr *eth;
+       const struct rte_ether_hdr *eth;
+       const struct rte_ipv4_hdr *iph4;
+       const struct rte_ipv6_hdr *iph6;
+
+       eth = rte_pktmbuf_mtod(pkt, const struct rte_ether_hdr *);
+       if (eth->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
 
-       eth = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
-       if (eth->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPv4)) {
-               nlp = (uint8_t *)rte_pktmbuf_adj(pkt, RTE_ETHER_HDR_LEN);
-               nlp = RTE_PTR_ADD(nlp, offsetof(struct ip, ip_p));
-               if (*nlp == IPPROTO_ESP)
+               iph4 = (const struct rte_ipv4_hdr *)rte_pktmbuf_adj(pkt,
+                       RTE_ETHER_HDR_LEN);
+               adjust_ipv4_pktlen(pkt, iph4, 0);
+
+               if (iph4->next_proto_id == IPPROTO_ESP)
                        t->ipsec.pkts[(t->ipsec.num)++] = pkt;
                else {
-                       t->ip4.data[t->ip4.num] = nlp;
+                       t->ip4.data[t->ip4.num] = &iph4->next_proto_id;
                        t->ip4.pkts[(t->ip4.num)++] = pkt;
                }
                pkt->l2_len = 0;
-               pkt->l3_len = sizeof(struct ip);
-       } else if (eth->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPv6)) {
-               nlp = (uint8_t *)rte_pktmbuf_adj(pkt, RTE_ETHER_HDR_LEN);
-               nlp = RTE_PTR_ADD(nlp, offsetof(struct ip6_hdr, ip6_nxt));
-               if (*nlp == IPPROTO_ESP)
+               pkt->l3_len = sizeof(*iph4);
+       } else if (eth->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) {
+               int next_proto;
+               size_t l3len, ext_len;
+               uint8_t *p;
+
+               /* get protocol type */
+               iph6 = (const struct rte_ipv6_hdr *)rte_pktmbuf_adj(pkt,
+                       RTE_ETHER_HDR_LEN);
+               adjust_ipv6_pktlen(pkt, iph6, 0);
+
+               next_proto = iph6->proto;
+
+               /* determine l3 header size up to ESP extension */
+               l3len = sizeof(struct ip6_hdr);
+               p = rte_pktmbuf_mtod(pkt, uint8_t *);
+               while (next_proto != IPPROTO_ESP && l3len < pkt->data_len &&
+                       (next_proto = rte_ipv6_get_next_ext(p + l3len,
+                                               next_proto, &ext_len)) >= 0)
+                       l3len += ext_len;
+
+               /* drop packet when IPv6 header exceeds first segment length */
+               if (unlikely(l3len > pkt->data_len)) {
+                       rte_pktmbuf_free(pkt);
+                       return;
+               }
+
+               if (next_proto == IPPROTO_ESP)
                        t->ipsec.pkts[(t->ipsec.num)++] = pkt;
                else {
-                       t->ip6.data[t->ip6.num] = nlp;
+                       t->ip6.data[t->ip6.num] = &iph6->proto;
                        t->ip6.pkts[(t->ip6.num)++] = pkt;
                }
                pkt->l2_len = 0;
-               pkt->l3_len = sizeof(struct ip6_hdr);
+               pkt->l3_len = l3len;
        } else {
                /* Unknown/Unsupported type, drop the packet */
                RTE_LOG(ERR, IPSEC, "Unsupported packet type 0x%x\n",
                        rte_be_to_cpu_16(eth->ether_type));
                rte_pktmbuf_free(pkt);
+               return;
        }
 
        /* Check if the packet has been processed inline. For inline protocol
@@ -343,13 +434,13 @@ prepare_tx_pkt(struct rte_mbuf *pkt, uint16_t port,
                if ((pkt->ol_flags & PKT_TX_IP_CKSUM) == 0)
                        ip->ip_sum = rte_ipv4_cksum((struct rte_ipv4_hdr *)ip);
 
-               ethhdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPv4);
+               ethhdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
        } else {
                pkt->ol_flags |= qconf->outbound.ipv6_offloads;
                pkt->l3_len = sizeof(struct ip6_hdr);
                pkt->l2_len = RTE_ETHER_HDR_LEN;
 
-               ethhdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPv6);
+               ethhdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
        }
 
        memcpy(&ethhdr->s_addr, &ethaddr_tbl[port].src,
@@ -397,9 +488,52 @@ send_burst(struct lcore_conf *qconf, uint16_t n, uint16_t port)
        return 0;
 }
 
+/*
+ * Helper function to fragment and queue for TX one packet.
+ */
+static inline uint32_t
+send_fragment_packet(struct lcore_conf *qconf, struct rte_mbuf *m,
+       uint16_t port, uint8_t proto)
+{
+       struct buffer *tbl;
+       uint32_t len, n;
+       int32_t rc;
+
+       tbl =  qconf->tx_mbufs + port;
+       len = tbl->len;
+
+       /* free space for new fragments */
+       if (len + RTE_LIBRTE_IP_FRAG_MAX_FRAG >=  RTE_DIM(tbl->m_table)) {
+               send_burst(qconf, len, port);
+               len = 0;
+       }
+
+       n = RTE_DIM(tbl->m_table) - len;
+
+       if (proto == IPPROTO_IP)
+               rc = rte_ipv4_fragment_packet(m, tbl->m_table + len,
+                       n, mtu_size, qconf->frag.pool_dir,
+                       qconf->frag.pool_indir);
+       else
+               rc = rte_ipv6_fragment_packet(m, tbl->m_table + len,
+                       n, mtu_size, qconf->frag.pool_dir,
+                       qconf->frag.pool_indir);
+
+       if (rc >= 0)
+               len += rc;
+       else
+               RTE_LOG(ERR, IPSEC,
+                       "%s: failed to fragment packet with size %u, "
+                       "error code: %d\n",
+                       __func__, m->pkt_len, rte_errno);
+
+       rte_pktmbuf_free(m);
+       return len;
+}
+
 /* Enqueue a single packet, and send burst if queue is filled */
 static inline int32_t
-send_single_packet(struct rte_mbuf *m, uint16_t port)
+send_single_packet(struct rte_mbuf *m, uint16_t port, uint8_t proto)
 {
        uint32_t lcore_id;
        uint16_t len;
@@ -409,8 +543,16 @@ send_single_packet(struct rte_mbuf *m, uint16_t port)
 
        qconf = &lcore_conf[lcore_id];
        len = qconf->tx_mbufs[port].len;
-       qconf->tx_mbufs[port].m_table[len] = m;
-       len++;
+
+       if (m->pkt_len <= mtu_size) {
+               qconf->tx_mbufs[port].m_table[len] = m;
+               len++;
+
+       /* need to fragment the packet */
+       } else if (frag_tbl_sz > 0)
+               len = send_fragment_packet(qconf, m, port, proto);
+       else
+               rte_pktmbuf_free(m);
 
        /* enough pkts to be sent */
        if (unlikely(len == MAX_PKT_BURST)) {
@@ -764,7 +906,7 @@ route4_pkts(struct rt_ctx *rt_ctx, struct rte_mbuf *pkts[], uint8_t nb_pkts)
                        rte_pktmbuf_free(pkts[i]);
                        continue;
                }
-               send_single_packet(pkts[i], pkt_hop & 0xff);
+               send_single_packet(pkts[i], pkt_hop & 0xff, IPPROTO_IP);
        }
 }
 
@@ -816,7 +958,7 @@ route6_pkts(struct rt_ctx *rt_ctx, struct rte_mbuf *pkts[], uint8_t nb_pkts)
                        rte_pktmbuf_free(pkts[i]);
                        continue;
                }
-               send_single_packet(pkts[i], pkt_hop & 0xff);
+               send_single_packet(pkts[i], pkt_hop & 0xff, IPPROTO_IPV6);
        }
 }
 
@@ -982,6 +1124,8 @@ main_loop(__attribute__((unused)) void *dummy)
        qconf->outbound.session_pool = socket_ctx[socket_id].session_pool;
        qconf->outbound.session_priv_pool =
                        socket_ctx[socket_id].session_priv_pool;
+       qconf->frag.pool_dir = socket_ctx[socket_id].mbuf_pool;
+       qconf->frag.pool_indir = socket_ctx[socket_id].mbuf_pool_indir;
 
        if (qconf->nb_rx_queue == 0) {
                RTE_LOG(DEBUG, IPSEC, "lcore %u has nothing to do\n",
@@ -1128,12 +1272,14 @@ print_usage(const char *prgname)
                " [--cryptodev_mask MASK]"
                " [--" CMD_LINE_OPT_RX_OFFLOAD " RX_OFFLOAD_MASK]"
                " [--" CMD_LINE_OPT_TX_OFFLOAD " TX_OFFLOAD_MASK]"
+               " [--" CMD_LINE_OPT_REASSEMBLE " REASSEMBLE_TABLE_SIZE]"
+               " [--" CMD_LINE_OPT_MTU " MTU]"
                "\n\n"
                "  -p PORTMASK: Hexadecimal bitmask of ports to configure\n"
                "  -P : Enable promiscuous mode\n"
                "  -u PORTMASK: Hexadecimal bitmask of unprotected ports\n"
-               "  -j FRAMESIZE: Enable jumbo frame with 'FRAMESIZE' as maximum\n"
-               "                packet size\n"
+               "  -j FRAMESIZE: Data buffer size, minimum (and default)\n"
+               "     value: RTE_MBUF_DEFAULT_BUF_SIZE\n"
                "  -l enables code-path that uses librte_ipsec\n"
                "  -w REPLAY_WINDOW_SIZE specifies IPsec SQN replay window\n"
                "     size for each SA\n"
@@ -1151,6 +1297,13 @@ print_usage(const char *prgname)
                "  --" CMD_LINE_OPT_TX_OFFLOAD
                ": bitmask of the TX HW offload capabilities to enable/use\n"
                "                         (DEV_TX_OFFLOAD_*)\n"
+               "  --" CMD_LINE_OPT_REASSEMBLE " NUM"
+               ": max number of entries in reassemble(fragment) table\n"
+               "    (zero (default value) disables reassembly)\n"
+               "  --" CMD_LINE_OPT_MTU " MTU"
+               ": MTU value on all ports (default value: 1500)\n"
+               "    outgoing packets with bigger size will be fragmented\n"
+               "    incoming packets with bigger size will be discarded\n"
                "\n",
                prgname);
 }
@@ -1321,21 +1474,16 @@ parse_args(int32_t argc, char **argv)
                        f_present = 1;
                        break;
                case 'j':
-                       {
-                               int32_t size = parse_decimal(optarg);
-                               if (size <= 1518) {
-                                       printf("Invalid jumbo frame size\n");
-                                       if (size < 0) {
-                                               print_usage(prgname);
-                                               return -1;
-                                       }
-                                       printf("Using default value 9000\n");
-                                       frame_size = 9000;
-                               } else {
-                                       frame_size = size;
-                               }
+                       ret = parse_decimal(optarg);
+                       if (ret < RTE_MBUF_DEFAULT_BUF_SIZE ||
+                                       ret > UINT16_MAX) {
+                               printf("Invalid frame buffer size value: %s\n",
+                                       optarg);
+                               print_usage(prgname);
+                               return -1;
                        }
-                       printf("Enabled jumbo frames size %u\n", frame_size);
+                       frame_buf_size = ret;
+                       printf("Custom frame buffer size %u\n", frame_buf_size);
                        break;
                case 'l':
                        app_sa_prm.enable = 1;
@@ -1403,6 +1551,26 @@ parse_args(int32_t argc, char **argv)
                                return -1;
                        }
                        break;
+               case CMD_LINE_OPT_REASSEMBLE_NUM:
+                       ret = parse_decimal(optarg);
+                       if (ret < 0) {
+                               printf("Invalid argument for \'%s\': %s\n",
+                                       CMD_LINE_OPT_REASSEMBLE, optarg);
+                               print_usage(prgname);
+                               return -1;
+                       }
+                       frag_tbl_sz = ret;
+                       break;
+               case CMD_LINE_OPT_MTU_NUM:
+                       ret = parse_decimal(optarg);
+                       if (ret < 0 || ret > RTE_IPV4_MAX_PKT_LEN) {
+                               printf("Invalid argument for \'%s\': %s\n",
+                                       CMD_LINE_OPT_MTU, optarg);
+                               print_usage(prgname);
+                               return -1;
+                       }
+                       mtu_size = ret;
+                       break;
                default:
                        print_usage(prgname);
                        return -1;
@@ -1414,6 +1582,16 @@ parse_args(int32_t argc, char **argv)
                return -1;
        }
 
+       /* check do we need to enable multi-seg support */
+       if (multi_seg_required()) {
+               /* legacy mode doesn't support multi-seg */
+               app_sa_prm.enable = 1;
+               printf("frame buf size: %u, mtu: %u, "
+                       "number of reassemble entries: %u\n"
+                       "multi-segment support is required\n",
+                       frame_buf_size, mtu_size, frag_tbl_sz);
+       }
+
        print_app_sa_prm(&app_sa_prm);
 
        if (optind >= 0)
@@ -1628,9 +1806,12 @@ cryptodevs_init(void)
        struct rte_cryptodev_config dev_conf;
        struct rte_cryptodev_qp_conf qp_conf;
        uint16_t idx, max_nb_qps, qp, i;
-       int16_t cdev_id, port_id;
+       int16_t cdev_id;
        struct rte_hash_parameters params = { 0 };
 
+       const uint64_t mseg_flag = multi_seg_required() ?
+                               RTE_CRYPTODEV_FF_IN_PLACE_SGL : 0;
+
        params.entries = CDEV_MAP_ENTRIES;
        params.key_len = sizeof(struct cdev_key);
        params.hash_func = rte_jhash;
@@ -1651,45 +1832,6 @@ cryptodevs_init(void)
 
        printf("lcore/cryptodev/qp mappings:\n");
 
-       uint32_t max_sess_sz = 0, sess_sz;
-       for (cdev_id = 0; cdev_id < rte_cryptodev_count(); cdev_id++) {
-               void *sec_ctx;
-
-               /* Get crypto priv session size */
-               sess_sz = rte_cryptodev_sym_get_private_session_size(cdev_id);
-               if (sess_sz > max_sess_sz)
-                       max_sess_sz = sess_sz;
-
-               /*
-                * If crypto device is security capable, need to check the
-                * size of security session as well.
-                */
-
-               /* Get security context of the crypto device */
-               sec_ctx = rte_cryptodev_get_sec_ctx(cdev_id);
-               if (sec_ctx == NULL)
-                       continue;
-
-               /* Get size of security session */
-               sess_sz = rte_security_session_get_size(sec_ctx);
-               if (sess_sz > max_sess_sz)
-                       max_sess_sz = sess_sz;
-       }
-       RTE_ETH_FOREACH_DEV(port_id) {
-               void *sec_ctx;
-
-               if ((enabled_port_mask & (1 << port_id)) == 0)
-                       continue;
-
-               sec_ctx = rte_eth_dev_get_sec_ctx(port_id);
-               if (sec_ctx == NULL)
-                       continue;
-
-               sess_sz = rte_security_session_get_size(sec_ctx);
-               if (sess_sz > max_sess_sz)
-                       max_sess_sz = sess_sz;
-       }
-
        idx = 0;
        for (cdev_id = 0; cdev_id < rte_cryptodev_count(); cdev_id++) {
                struct rte_cryptodev_info cdev_info;
@@ -1699,6 +1841,12 @@ cryptodevs_init(void)
 
                rte_cryptodev_info_get(cdev_id, &cdev_info);
 
+               if ((mseg_flag & cdev_info.feature_flags) != mseg_flag)
+                       rte_exit(EXIT_FAILURE,
+                               "Device %hd does not support \'%s\' feature\n",
+                               cdev_id,
+                               rte_cryptodev_get_feature_name(mseg_flag));
+
                if (nb_lcore_params > cdev_info.max_nb_queue_pairs)
                        max_nb_qps = cdev_info.max_nb_queue_pairs;
                else
@@ -1720,6 +1868,7 @@ cryptodevs_init(void)
 
                dev_conf.socket_id = rte_cryptodev_socket_id(cdev_id);
                dev_conf.nb_queue_pairs = qp;
+               dev_conf.ff_disable = RTE_CRYPTODEV_FF_ASYMMETRIC_CRYPTO;
 
                uint32_t dev_max_sess = cdev_info.sym.max_nb_sessions;
                if (dev_max_sess != 0 && dev_max_sess < CDEV_MP_NB_OBJS)
@@ -1727,45 +1876,6 @@ cryptodevs_init(void)
                                "Device does not support at least %u "
                                "sessions", CDEV_MP_NB_OBJS);
 
-               if (!socket_ctx[dev_conf.socket_id].session_pool) {
-                       char mp_name[RTE_MEMPOOL_NAMESIZE];
-                       struct rte_mempool *sess_mp;
-
-                       snprintf(mp_name, RTE_MEMPOOL_NAMESIZE,
-                                       "sess_mp_%u", dev_conf.socket_id);
-                       sess_mp = rte_cryptodev_sym_session_pool_create(
-                                       mp_name, CDEV_MP_NB_OBJS,
-                                       0, CDEV_MP_CACHE_SZ, 0,
-                                       dev_conf.socket_id);
-                       socket_ctx[dev_conf.socket_id].session_pool = sess_mp;
-               }
-
-               if (!socket_ctx[dev_conf.socket_id].session_priv_pool) {
-                       char mp_name[RTE_MEMPOOL_NAMESIZE];
-                       struct rte_mempool *sess_mp;
-
-                       snprintf(mp_name, RTE_MEMPOOL_NAMESIZE,
-                                       "sess_mp_priv_%u", dev_conf.socket_id);
-                       sess_mp = rte_mempool_create(mp_name,
-                                       CDEV_MP_NB_OBJS,
-                                       max_sess_sz,
-                                       CDEV_MP_CACHE_SZ,
-                                       0, NULL, NULL, NULL,
-                                       NULL, dev_conf.socket_id,
-                                       0);
-                       socket_ctx[dev_conf.socket_id].session_priv_pool =
-                                       sess_mp;
-               }
-
-               if (!socket_ctx[dev_conf.socket_id].session_priv_pool ||
-                               !socket_ctx[dev_conf.socket_id].session_pool)
-                       rte_exit(EXIT_FAILURE,
-                               "Cannot create session pool on socket %d\n",
-                               dev_conf.socket_id);
-               else
-                       printf("Allocated session pool on socket %d\n",
-                                       dev_conf.socket_id);
-
                if (rte_cryptodev_configure(cdev_id, &dev_conf))
                        rte_panic("Failed to initialize cryptodev %u\n",
                                        cdev_id);
@@ -1786,39 +1896,6 @@ cryptodevs_init(void)
                                        cdev_id);
        }
 
-       /* create session pools for eth devices that implement security */
-       RTE_ETH_FOREACH_DEV(port_id) {
-               if ((enabled_port_mask & (1 << port_id)) &&
-                               rte_eth_dev_get_sec_ctx(port_id)) {
-                       int socket_id = rte_eth_dev_socket_id(port_id);
-
-                       if (!socket_ctx[socket_id].session_priv_pool) {
-                               char mp_name[RTE_MEMPOOL_NAMESIZE];
-                               struct rte_mempool *sess_mp;
-
-                               snprintf(mp_name, RTE_MEMPOOL_NAMESIZE,
-                                               "sess_mp_%u", socket_id);
-                               sess_mp = rte_mempool_create(mp_name,
-                                               (CDEV_MP_NB_OBJS * 2),
-                                               max_sess_sz,
-                                               CDEV_MP_CACHE_SZ,
-                                               0, NULL, NULL, NULL,
-                                               NULL, socket_id,
-                                               0);
-                               if (sess_mp == NULL)
-                                       rte_exit(EXIT_FAILURE,
-                                               "Cannot create session pool "
-                                               "on socket %d\n", socket_id);
-                               else
-                                       printf("Allocated session pool "
-                                               "on socket %d\n", socket_id);
-                               socket_ctx[socket_id].session_priv_pool =
-                                               sess_mp;
-                       }
-               }
-       }
-
-
        printf("\n");
 
        return 0;
@@ -1827,6 +1904,7 @@ cryptodevs_init(void)
 static void
 port_init(uint16_t portid, uint64_t req_rx_offloads, uint64_t req_tx_offloads)
 {
+       uint32_t frame_size;
        struct rte_eth_dev_info dev_info;
        struct rte_eth_txconf *txconf;
        uint16_t nb_tx_queue, nb_rx_queue;
@@ -1836,7 +1914,11 @@ port_init(uint16_t portid, uint64_t req_rx_offloads, uint64_t req_tx_offloads)
        struct rte_ether_addr ethaddr;
        struct rte_eth_conf local_port_conf = port_conf;
 
-       rte_eth_dev_info_get(portid, &dev_info);
+       ret = rte_eth_dev_info_get(portid, &dev_info);
+       if (ret != 0)
+               rte_exit(EXIT_FAILURE,
+                       "Error during getting device (port %u) info: %s\n",
+                       portid, strerror(-ret));
 
        /* limit allowed HW offloafs, as user requested */
        dev_info.rx_offload_capa &= dev_rx_offload;
@@ -1865,9 +1947,14 @@ port_init(uint16_t portid, uint64_t req_rx_offloads, uint64_t req_tx_offloads)
        printf("Creating queues: nb_rx_queue=%d nb_tx_queue=%u...\n",
                        nb_rx_queue, nb_tx_queue);
 
-       if (frame_size) {
-               local_port_conf.rxmode.max_rx_pkt_len = frame_size;
+       frame_size = MTU_TO_FRAMELEN(mtu_size);
+       if (frame_size > local_port_conf.rxmode.max_rx_pkt_len)
                local_port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
+       local_port_conf.rxmode.max_rx_pkt_len = frame_size;
+
+       if (multi_seg_required()) {
+               local_port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
+               local_port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS;
        }
 
        local_port_conf.rxmode.offloads |= req_rx_offloads;
@@ -1984,20 +2071,122 @@ port_init(uint16_t portid, uint64_t req_rx_offloads, uint64_t req_tx_offloads)
        printf("\n");
 }
 
+static size_t
+max_session_size(void)
+{
+       size_t max_sz, sz;
+       void *sec_ctx;
+       int16_t cdev_id, port_id, n;
+
+       max_sz = 0;
+       n =  rte_cryptodev_count();
+       for (cdev_id = 0; cdev_id != n; cdev_id++) {
+               sz = rte_cryptodev_sym_get_private_session_size(cdev_id);
+               if (sz > max_sz)
+                       max_sz = sz;
+               /*
+                * If crypto device is security capable, need to check the
+                * size of security session as well.
+                */
+
+               /* Get security context of the crypto device */
+               sec_ctx = rte_cryptodev_get_sec_ctx(cdev_id);
+               if (sec_ctx == NULL)
+                       continue;
+
+               /* Get size of security session */
+               sz = rte_security_session_get_size(sec_ctx);
+               if (sz > max_sz)
+                       max_sz = sz;
+       }
+
+       RTE_ETH_FOREACH_DEV(port_id) {
+               if ((enabled_port_mask & (1 << port_id)) == 0)
+                       continue;
+
+               sec_ctx = rte_eth_dev_get_sec_ctx(port_id);
+               if (sec_ctx == NULL)
+                       continue;
+
+               sz = rte_security_session_get_size(sec_ctx);
+               if (sz > max_sz)
+                       max_sz = sz;
+       }
+
+       return max_sz;
+}
+
+static void
+session_pool_init(struct socket_ctx *ctx, int32_t socket_id, size_t sess_sz)
+{
+       char mp_name[RTE_MEMPOOL_NAMESIZE];
+       struct rte_mempool *sess_mp;
+
+       snprintf(mp_name, RTE_MEMPOOL_NAMESIZE,
+                       "sess_mp_%u", socket_id);
+       sess_mp = rte_cryptodev_sym_session_pool_create(
+                       mp_name, CDEV_MP_NB_OBJS,
+                       sess_sz, CDEV_MP_CACHE_SZ, 0,
+                       socket_id);
+       ctx->session_pool = sess_mp;
+
+       if (ctx->session_pool == NULL)
+               rte_exit(EXIT_FAILURE,
+                       "Cannot init session pool on socket %d\n", socket_id);
+       else
+               printf("Allocated session pool on socket %d\n", socket_id);
+}
+
+static void
+session_priv_pool_init(struct socket_ctx *ctx, int32_t socket_id,
+       size_t sess_sz)
+{
+       char mp_name[RTE_MEMPOOL_NAMESIZE];
+       struct rte_mempool *sess_mp;
+
+       snprintf(mp_name, RTE_MEMPOOL_NAMESIZE,
+                       "sess_mp_priv_%u", socket_id);
+       sess_mp = rte_mempool_create(mp_name,
+                       CDEV_MP_NB_OBJS,
+                       sess_sz,
+                       CDEV_MP_CACHE_SZ,
+                       0, NULL, NULL, NULL,
+                       NULL, socket_id,
+                       0);
+       ctx->session_priv_pool = sess_mp;
+
+       if (ctx->session_priv_pool == NULL)
+               rte_exit(EXIT_FAILURE,
+                       "Cannot init session priv pool on socket %d\n",
+                       socket_id);
+       else
+               printf("Allocated session priv pool on socket %d\n",
+                       socket_id);
+}
+
 static void
 pool_init(struct socket_ctx *ctx, int32_t socket_id, uint32_t nb_mbuf)
 {
        char s[64];
-       uint32_t buff_size = frame_size ? (frame_size + RTE_PKTMBUF_HEADROOM) :
-                       RTE_MBUF_DEFAULT_BUF_SIZE;
-
+       int32_t ms;
 
        snprintf(s, sizeof(s), "mbuf_pool_%d", socket_id);
        ctx->mbuf_pool = rte_pktmbuf_pool_create(s, nb_mbuf,
                        MEMPOOL_CACHE_SIZE, ipsec_metadata_size(),
-                       buff_size,
-                       socket_id);
-       if (ctx->mbuf_pool == NULL)
+                       frame_buf_size, socket_id);
+
+       /*
+        * if multi-segment support is enabled, then create a pool
+        * for indirect mbufs.
+        */
+       ms = multi_seg_required();
+       if (ms != 0) {
+               snprintf(s, sizeof(s), "mbuf_pool_indir_%d", socket_id);
+               ctx->mbuf_pool_indir = rte_pktmbuf_pool_create(s, nb_mbuf,
+                       MEMPOOL_CACHE_SIZE, 0, 0, socket_id);
+       }
+
+       if (ctx->mbuf_pool == NULL || (ms != 0 && ctx->mbuf_pool_indir == NULL))
                rte_exit(EXIT_FAILURE, "Cannot init mbuf pool on socket %d\n",
                                socket_id);
        else
@@ -2059,14 +2248,150 @@ inline_ipsec_event_callback(uint16_t port_id, enum rte_eth_event_type type,
        return -1;
 }
 
+static uint16_t
+rx_callback(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+       struct rte_mbuf *pkt[], uint16_t nb_pkts,
+       __rte_unused uint16_t max_pkts, void *user_param)
+{
+       uint64_t tm;
+       uint32_t i, k;
+       struct lcore_conf *lc;
+       struct rte_mbuf *mb;
+       struct rte_ether_hdr *eth;
+
+       lc = user_param;
+       k = 0;
+       tm = 0;
+
+       for (i = 0; i != nb_pkts; i++) {
+
+               mb = pkt[i];
+               eth = rte_pktmbuf_mtod(mb, struct rte_ether_hdr *);
+               if (eth->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
+
+                       struct rte_ipv4_hdr *iph;
+
+                       iph = (struct rte_ipv4_hdr *)(eth + 1);
+                       if (rte_ipv4_frag_pkt_is_fragmented(iph)) {
+
+                               mb->l2_len = sizeof(*eth);
+                               mb->l3_len = sizeof(*iph);
+                               tm = (tm != 0) ? tm : rte_rdtsc();
+                               mb = rte_ipv4_frag_reassemble_packet(
+                                       lc->frag.tbl, &lc->frag.dr,
+                                       mb, tm, iph);
+
+                               if (mb != NULL) {
+                                       /* fix ip cksum after reassemble. */
+                                       iph = rte_pktmbuf_mtod_offset(mb,
+                                               struct rte_ipv4_hdr *,
+                                               mb->l2_len);
+                                       iph->hdr_checksum = 0;
+                                       iph->hdr_checksum = rte_ipv4_cksum(iph);
+                               }
+                       }
+               } else if (eth->ether_type ==
+                               rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) {
+
+                       struct rte_ipv6_hdr *iph;
+                       struct ipv6_extension_fragment *fh;
+
+                       iph = (struct rte_ipv6_hdr *)(eth + 1);
+                       fh = rte_ipv6_frag_get_ipv6_fragment_header(iph);
+                       if (fh != NULL) {
+                               mb->l2_len = sizeof(*eth);
+                               mb->l3_len = (uintptr_t)fh - (uintptr_t)iph +
+                                       sizeof(*fh);
+                               tm = (tm != 0) ? tm : rte_rdtsc();
+                               mb = rte_ipv6_frag_reassemble_packet(
+                                       lc->frag.tbl, &lc->frag.dr,
+                                       mb, tm, iph, fh);
+                               if (mb != NULL)
+                                       /* fix l3_len after reassemble. */
+                                       mb->l3_len = mb->l3_len - sizeof(*fh);
+                       }
+               }
+
+               pkt[k] = mb;
+               k += (mb != NULL);
+       }
+
+       /* some fragments were encountered, drain death row */
+       if (tm != 0)
+               rte_ip_frag_free_death_row(&lc->frag.dr, 0);
+
+       return k;
+}
+
+
+static int
+reassemble_lcore_init(struct lcore_conf *lc, uint32_t cid)
+{
+       int32_t sid;
+       uint32_t i;
+       uint64_t frag_cycles;
+       const struct lcore_rx_queue *rxq;
+       const struct rte_eth_rxtx_callback *cb;
+
+       /* create fragment table */
+       sid = rte_lcore_to_socket_id(cid);
+       frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) /
+               MS_PER_S * FRAG_TTL_MS;
+
+       lc->frag.tbl = rte_ip_frag_table_create(frag_tbl_sz,
+               FRAG_TBL_BUCKET_ENTRIES, frag_tbl_sz, frag_cycles, sid);
+       if (lc->frag.tbl == NULL) {
+               printf("%s(%u): failed to create fragment table of size: %u, "
+                       "error code: %d\n",
+                       __func__, cid, frag_tbl_sz, rte_errno);
+               return -ENOMEM;
+       }
+
+       /* setup reassemble RX callbacks for all queues */
+       for (i = 0; i != lc->nb_rx_queue; i++) {
+
+               rxq = lc->rx_queue_list + i;
+               cb = rte_eth_add_rx_callback(rxq->port_id, rxq->queue_id,
+                       rx_callback, lc);
+               if (cb == NULL) {
+                       printf("%s(%u): failed to install RX callback for "
+                               "portid=%u, queueid=%u, error code: %d\n",
+                               __func__, cid,
+                               rxq->port_id, rxq->queue_id, rte_errno);
+                       return -ENOMEM;
+               }
+       }
+
+       return 0;
+}
+
+static int
+reassemble_init(void)
+{
+       int32_t rc;
+       uint32_t i, lc;
+
+       rc = 0;
+       for (i = 0; i != nb_lcore_params; i++) {
+               lc = lcore_params[i].lcore_id;
+               rc = reassemble_lcore_init(lcore_conf + lc, lc);
+               if (rc != 0)
+                       break;
+       }
+
+       return rc;
+}
+
 int32_t
 main(int32_t argc, char **argv)
 {
        int32_t ret;
        uint32_t lcore_id;
+       uint32_t i;
        uint8_t socket_id;
        uint16_t portid;
        uint64_t req_rx_offloads, req_tx_offloads;
+       size_t sess_sz;
 
        /* init EAL */
        ret = rte_eal_init(argc, argv);
@@ -2094,7 +2419,8 @@ main(int32_t argc, char **argv)
 
        nb_lcores = rte_lcore_count();
 
-       /* Replicate each context per socket */
+       sess_sz = max_session_size();
+
        for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
                if (rte_lcore_is_enabled(lcore_id) == 0)
                        continue;
@@ -2104,20 +2430,14 @@ main(int32_t argc, char **argv)
                else
                        socket_id = 0;
 
+               /* mbuf_pool is initialised by the pool_init() function*/
                if (socket_ctx[socket_id].mbuf_pool)
                        continue;
 
-               /* initilaze SPD */
-               sp4_init(&socket_ctx[socket_id], socket_id);
-
-               sp6_init(&socket_ctx[socket_id], socket_id);
-
-               /* initilaze SAD */
-               sa_init(&socket_ctx[socket_id], socket_id);
-
-               rt_init(&socket_ctx[socket_id], socket_id);
-
                pool_init(&socket_ctx[socket_id], socket_id, NB_MBUF);
+               session_pool_init(&socket_ctx[socket_id], socket_id, sess_sz);
+               session_priv_pool_init(&socket_ctx[socket_id], socket_id,
+                       sess_sz);
        }
 
        RTE_ETH_FOREACH_DEV(portid) {
@@ -2135,7 +2455,11 @@ main(int32_t argc, char **argv)
                if ((enabled_port_mask & (1 << portid)) == 0)
                        continue;
 
-               /* Start device */
+               /*
+                * Start device
+                * note: device must be started before a flow rule
+                * can be installed.
+                */
                ret = rte_eth_dev_start(portid);
                if (ret < 0)
                        rte_exit(EXIT_FAILURE, "rte_eth_dev_start: "
@@ -2146,13 +2470,38 @@ main(int32_t argc, char **argv)
                 * to itself through 2 cross-connected  ports of the
                 * target machine.
                 */
-               if (promiscuous_on)
-                       rte_eth_promiscuous_enable(portid);
+               if (promiscuous_on) {
+                       ret = rte_eth_promiscuous_enable(portid);
+                       if (ret != 0)
+                               rte_exit(EXIT_FAILURE,
+                                       "rte_eth_promiscuous_enable: err=%s, port=%d\n",
+                                       rte_strerror(-ret), portid);
+               }
 
                rte_eth_dev_callback_register(portid,
                        RTE_ETH_EVENT_IPSEC, inline_ipsec_event_callback, NULL);
        }
 
+       /* fragment reassemble is enabled */
+       if (frag_tbl_sz != 0) {
+               ret = reassemble_init();
+               if (ret != 0)
+                       rte_exit(EXIT_FAILURE, "failed at reassemble init");
+       }
+
+       /* Replicate each context per socket */
+       for (i = 0; i < NB_SOCKETS && i < rte_socket_count(); i++) {
+               socket_id = rte_socket_id_by_idx(i);
+               if ((socket_ctx[socket_id].mbuf_pool != NULL) &&
+                       (socket_ctx[socket_id].sa_in == NULL) &&
+                       (socket_ctx[socket_id].sa_out == NULL)) {
+                       sa_init(&socket_ctx[socket_id], socket_id);
+                       sp4_init(&socket_ctx[socket_id], socket_id);
+                       sp6_init(&socket_ctx[socket_id], socket_id);
+                       rt_init(&socket_ctx[socket_id], socket_id);
+               }
+       }
+
        check_all_ports_link_status(enabled_port_mask);
 
        /* launch per-lcore init on every lcore */