X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=examples%2Fl3fwd%2Fmain.c;h=c35926d923c7dcaf1d1282155e498083c6b79392;hb=693f715da45c48ec1ec0fe4ba2f3b5ffd11ba53e;hp=04713c7b785e663bb903a5c01daa953def855699;hpb=96ff445371e034b558d6a34b577ee00892d7042f;p=dpdk.git diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c old mode 100755 new mode 100644 index 04713c7b78..c35926d923 --- a/examples/l3fwd/main.c +++ b/examples/l3fwd/main.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,15 +41,16 @@ #include #include #include +#include +#include #include -#include +#include #include #include #include #include #include -#include #include #include #include @@ -73,7 +74,10 @@ #include #include -#include "main.h" +#include +#include + +static volatile bool force_quit; #define APP_LOOKUP_EXACT_MATCH 0 #define APP_LOOKUP_LPM 1 @@ -122,8 +126,6 @@ #define MEMPOOL_CACHE_SIZE 256 -#define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) - /* * This expression is used to calculate the number of mbufs needed depending on user input, taking * into account memory for rx and tx hardware rings, cache per lcore and mtable per port per lcore. @@ -137,25 +139,6 @@ nb_lcores*MEMPOOL_CACHE_SIZE), \ (unsigned)8192) -/* - * RX and TX Prefetch, Host, and Write-back threshold values should be - * carefully set for optimal performance. Consult the network - * controller's datasheet and supporting DPDK documentation for guidance - * on how these parameters should be set. - */ -#define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */ -#define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */ -#define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */ - -/* - * These default values are optimized for use with the Intel(R) 82599 10 GbE - * Controller and the DPDK ixgbe PMD. Consider using other values for other - * network controllers and/or network drivers. - */ -#define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */ -#define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */ -#define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */ - #define MAX_PKT_BURST 32 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ @@ -183,6 +166,7 @@ static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /* ethernet addresses of ports */ +static uint64_t dest_eth_addr[RTE_MAX_ETHPORTS]; static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; static __m128i val_eth[RTE_MAX_ETHPORTS]; @@ -251,7 +235,7 @@ static struct rte_eth_conf port_conf = { .rx_adv_conf = { .rss_conf = { .rss_key = NULL, - .rss_hf = ETH_RSS_IPV4 | ETH_RSS_IPV6, + .rss_hf = ETH_RSS_IP, }, }, .txmode = { @@ -259,31 +243,6 @@ static struct rte_eth_conf port_conf = { }, }; -static const struct rte_eth_rxconf rx_conf = { - .rx_thresh = { - .pthresh = RX_PTHRESH, - .hthresh = RX_HTHRESH, - .wthresh = RX_WTHRESH, - }, - .rx_free_thresh = 32, -}; - -static struct rte_eth_txconf tx_conf = { - .tx_thresh = { - .pthresh = TX_PTHRESH, - .hthresh = TX_HTHRESH, - .wthresh = TX_WTHRESH, - }, - .tx_free_thresh = 0, /* Use PMD default values */ - .tx_rs_thresh = 0, /* Use PMD default values */ - .txq_flags = (ETH_TXQ_FLAGS_NOMULTSEGS | - ETH_TXQ_FLAGS_NOVLANOFFL | - ETH_TXQ_FLAGS_NOXSUMSCTP | - ETH_TXQ_FLAGS_NOXSUMUDP | - ETH_TXQ_FLAGS_NOXSUMTCP) - -}; - static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) @@ -418,7 +377,7 @@ ipv4_hash_crc(const void *data, __rte_unused uint32_t data_len, init_val = rte_jhash_1word(k->ip_dst, init_val); init_val = rte_jhash_1word(*p, init_val); #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ - return (init_val); + return init_val; } static inline uint32_t @@ -461,7 +420,7 @@ ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len, uint32_t init_va init_val = rte_jhash(k->ip_dst, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val); init_val = rte_jhash_1word(*p, init_val); #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ - return (init_val); + return init_val; } #define IPV4_L3FWD_NUM_ROUTES \ @@ -586,6 +545,7 @@ send_single_packet(struct rte_mbuf *m, uint8_t port) return 0; } +#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM) static inline __attribute__((always_inline)) void send_packetsx4(struct lcore_conf *qconf, uint8_t port, struct rte_mbuf *m[], uint32_t num) @@ -663,6 +623,7 @@ send_packetsx4(struct lcore_conf *qconf, uint8_t port, qconf->tx_mbufs[port].len = len; } +#endif /* APP_LOOKUP_LPM */ #ifdef DO_RFC_1812_CHECKS static inline int @@ -770,67 +731,100 @@ get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, lookup6_struct_t * ipv6_l3fwd } #endif +static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, + struct lcore_conf *qconf) __attribute__((unused)); + #if ((APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) && \ (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) -static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, struct lcore_conf *qconf); -#define MASK_ALL_PKTS 0xf -#define EXECLUDE_1ST_PKT 0xe -#define EXECLUDE_2ND_PKT 0xd -#define EXECLUDE_3RD_PKT 0xb -#define EXECLUDE_4TH_PKT 0x7 +#define MASK_ALL_PKTS 0xff +#define EXCLUDE_1ST_PKT 0xfe +#define EXCLUDE_2ND_PKT 0xfd +#define EXCLUDE_3RD_PKT 0xfb +#define EXCLUDE_4TH_PKT 0xf7 +#define EXCLUDE_5TH_PKT 0xef +#define EXCLUDE_6TH_PKT 0xdf +#define EXCLUDE_7TH_PKT 0xbf +#define EXCLUDE_8TH_PKT 0x7f static inline void -simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *qconf) +simple_ipv4_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid, struct lcore_conf *qconf) { - struct ether_hdr *eth_hdr[4]; - struct ipv4_hdr *ipv4_hdr[4]; - void *d_addr_bytes[4]; - uint8_t dst_port[4]; - int32_t ret[4]; - union ipv4_5tuple_host key[4]; - __m128i data[4]; + struct ether_hdr *eth_hdr[8]; + struct ipv4_hdr *ipv4_hdr[8]; + uint8_t dst_port[8]; + int32_t ret[8]; + union ipv4_5tuple_host key[8]; + __m128i data[8]; eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *); eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *); eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *); eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *); + eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *); + eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *); + eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *); + eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *); /* Handle IPv4 headers.*/ - ipv4_hdr[0] = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m[0], unsigned char *) + - sizeof(struct ether_hdr)); - ipv4_hdr[1] = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m[1], unsigned char *) + - sizeof(struct ether_hdr)); - ipv4_hdr[2] = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m[2], unsigned char *) + - sizeof(struct ether_hdr)); - ipv4_hdr[3] = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m[3], unsigned char *) + - sizeof(struct ether_hdr)); + ipv4_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv4_hdr *, + sizeof(struct ether_hdr)); + ipv4_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct ipv4_hdr *, + sizeof(struct ether_hdr)); + ipv4_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct ipv4_hdr *, + sizeof(struct ether_hdr)); + ipv4_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv4_hdr *, + sizeof(struct ether_hdr)); + ipv4_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv4_hdr *, + sizeof(struct ether_hdr)); + ipv4_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv4_hdr *, + sizeof(struct ether_hdr)); + ipv4_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv4_hdr *, + sizeof(struct ether_hdr)); + ipv4_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv4_hdr *, + sizeof(struct ether_hdr)); #ifdef DO_RFC_1812_CHECKS /* Check to make sure the packet is valid (RFC1812) */ uint8_t valid_mask = MASK_ALL_PKTS; - if (is_valid_ipv4_pkt(ipv4_hdr[0], m[0]->pkt.pkt_len) < 0) { + if (is_valid_ipv4_pkt(ipv4_hdr[0], m[0]->pkt_len) < 0) { rte_pktmbuf_free(m[0]); - valid_mask &= EXECLUDE_1ST_PKT; + valid_mask &= EXCLUDE_1ST_PKT; } - if (is_valid_ipv4_pkt(ipv4_hdr[1], m[1]->pkt.pkt_len) < 0) { + if (is_valid_ipv4_pkt(ipv4_hdr[1], m[1]->pkt_len) < 0) { rte_pktmbuf_free(m[1]); - valid_mask &= EXECLUDE_2ND_PKT; + valid_mask &= EXCLUDE_2ND_PKT; } - if (is_valid_ipv4_pkt(ipv4_hdr[2], m[2]->pkt.pkt_len) < 0) { + if (is_valid_ipv4_pkt(ipv4_hdr[2], m[2]->pkt_len) < 0) { rte_pktmbuf_free(m[2]); - valid_mask &= EXECLUDE_3RD_PKT; + valid_mask &= EXCLUDE_3RD_PKT; } - if (is_valid_ipv4_pkt(ipv4_hdr[3], m[3]->pkt.pkt_len) < 0) { + if (is_valid_ipv4_pkt(ipv4_hdr[3], m[3]->pkt_len) < 0) { rte_pktmbuf_free(m[3]); - valid_mask &= EXECLUDE_4TH_PKT; + valid_mask &= EXCLUDE_4TH_PKT; + } + if (is_valid_ipv4_pkt(ipv4_hdr[4], m[4]->pkt_len) < 0) { + rte_pktmbuf_free(m[4]); + valid_mask &= EXCLUDE_5TH_PKT; + } + if (is_valid_ipv4_pkt(ipv4_hdr[5], m[5]->pkt_len) < 0) { + rte_pktmbuf_free(m[5]); + valid_mask &= EXCLUDE_6TH_PKT; + } + if (is_valid_ipv4_pkt(ipv4_hdr[6], m[6]->pkt_len) < 0) { + rte_pktmbuf_free(m[6]); + valid_mask &= EXCLUDE_7TH_PKT; + } + if (is_valid_ipv4_pkt(ipv4_hdr[7], m[7]->pkt_len) < 0) { + rte_pktmbuf_free(m[7]); + valid_mask &= EXCLUDE_8TH_PKT; } if (unlikely(valid_mask != MASK_ALL_PKTS)) { if (valid_mask == 0){ return; } else { uint8_t i = 0; - for (i = 0; i < 4; i++) { + for (i = 0; i < 8; i++) { if ((0x1 << i) & valid_mask) { l3fwd_simple_forward(m[i], portid, qconf); } @@ -840,26 +834,52 @@ simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf * } #endif // End of #ifdef DO_RFC_1812_CHECKS - data[0] = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m[0], unsigned char *) + - sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live))); - data[1] = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m[1], unsigned char *) + - sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live))); - data[2] = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m[2], unsigned char *) + - sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live))); - data[3] = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m[3], unsigned char *) + - sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live))); + data[0] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[0], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[1] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[1], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[2] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[2], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[3] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[3], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[4] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[4], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[5] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[5], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[6] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[6], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); + data[7] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[7], __m128i *, + sizeof(struct ether_hdr) + + offsetof(struct ipv4_hdr, time_to_live))); key[0].xmm = _mm_and_si128(data[0], mask0); key[1].xmm = _mm_and_si128(data[1], mask0); key[2].xmm = _mm_and_si128(data[2], mask0); key[3].xmm = _mm_and_si128(data[3], mask0); + key[4].xmm = _mm_and_si128(data[4], mask0); + key[5].xmm = _mm_and_si128(data[5], mask0); + key[6].xmm = _mm_and_si128(data[6], mask0); + key[7].xmm = _mm_and_si128(data[7], mask0); - const void *key_array[4] = {&key[0], &key[1], &key[2],&key[3]}; - rte_hash_lookup_multi(qconf->ipv4_lookup_struct, &key_array[0], 4, ret); + const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3], + &key[4], &key[5], &key[6], &key[7]}; + + rte_hash_lookup_multi(qconf->ipv4_lookup_struct, &key_array[0], 8, ret); dst_port[0] = (uint8_t) ((ret[0] < 0) ? portid : ipv4_l3fwd_out_if[ret[0]]); dst_port[1] = (uint8_t) ((ret[1] < 0) ? portid : ipv4_l3fwd_out_if[ret[1]]); dst_port[2] = (uint8_t) ((ret[2] < 0) ? portid : ipv4_l3fwd_out_if[ret[2]]); dst_port[3] = (uint8_t) ((ret[3] < 0) ? portid : ipv4_l3fwd_out_if[ret[3]]); + dst_port[4] = (uint8_t) ((ret[4] < 0) ? portid : ipv4_l3fwd_out_if[ret[4]]); + dst_port[5] = (uint8_t) ((ret[5] < 0) ? portid : ipv4_l3fwd_out_if[ret[5]]); + dst_port[6] = (uint8_t) ((ret[6] < 0) ? portid : ipv4_l3fwd_out_if[ret[6]]); + dst_port[7] = (uint8_t) ((ret[7] < 0) ? portid : ipv4_l3fwd_out_if[ret[7]]); if (dst_port[0] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[0]) == 0) dst_port[0] = portid; @@ -869,16 +889,14 @@ simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf * dst_port[2] = portid; if (dst_port[3] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[3]) == 0) dst_port[3] = portid; - - /* 02:00:00:00:00:xx */ - d_addr_bytes[0] = ð_hdr[0]->d_addr.addr_bytes[0]; - d_addr_bytes[1] = ð_hdr[1]->d_addr.addr_bytes[0]; - d_addr_bytes[2] = ð_hdr[2]->d_addr.addr_bytes[0]; - d_addr_bytes[3] = ð_hdr[3]->d_addr.addr_bytes[0]; - *((uint64_t *)d_addr_bytes[0]) = 0x000000000002 + ((uint64_t)dst_port[0] << 40); - *((uint64_t *)d_addr_bytes[1]) = 0x000000000002 + ((uint64_t)dst_port[1] << 40); - *((uint64_t *)d_addr_bytes[2]) = 0x000000000002 + ((uint64_t)dst_port[2] << 40); - *((uint64_t *)d_addr_bytes[3]) = 0x000000000002 + ((uint64_t)dst_port[3] << 40); + if (dst_port[4] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[4]) == 0) + dst_port[4] = portid; + if (dst_port[5] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[5]) == 0) + dst_port[5] = portid; + if (dst_port[6] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[6]) == 0) + dst_port[6] = portid; + if (dst_port[7] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[7]) == 0) + dst_port[7] = portid; #ifdef DO_RFC_1812_CHECKS /* Update time to live and header checksum */ @@ -890,32 +908,53 @@ simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf * ++(ipv4_hdr[1]->hdr_checksum); ++(ipv4_hdr[2]->hdr_checksum); ++(ipv4_hdr[3]->hdr_checksum); + --(ipv4_hdr[4]->time_to_live); + --(ipv4_hdr[5]->time_to_live); + --(ipv4_hdr[6]->time_to_live); + --(ipv4_hdr[7]->time_to_live); + ++(ipv4_hdr[4]->hdr_checksum); + ++(ipv4_hdr[5]->hdr_checksum); + ++(ipv4_hdr[6]->hdr_checksum); + ++(ipv4_hdr[7]->hdr_checksum); #endif + /* dst addr */ + *(uint64_t *)ð_hdr[0]->d_addr = dest_eth_addr[dst_port[0]]; + *(uint64_t *)ð_hdr[1]->d_addr = dest_eth_addr[dst_port[1]]; + *(uint64_t *)ð_hdr[2]->d_addr = dest_eth_addr[dst_port[2]]; + *(uint64_t *)ð_hdr[3]->d_addr = dest_eth_addr[dst_port[3]]; + *(uint64_t *)ð_hdr[4]->d_addr = dest_eth_addr[dst_port[4]]; + *(uint64_t *)ð_hdr[5]->d_addr = dest_eth_addr[dst_port[5]]; + *(uint64_t *)ð_hdr[6]->d_addr = dest_eth_addr[dst_port[6]]; + *(uint64_t *)ð_hdr[7]->d_addr = dest_eth_addr[dst_port[7]]; + /* src addr */ ether_addr_copy(&ports_eth_addr[dst_port[0]], ð_hdr[0]->s_addr); ether_addr_copy(&ports_eth_addr[dst_port[1]], ð_hdr[1]->s_addr); ether_addr_copy(&ports_eth_addr[dst_port[2]], ð_hdr[2]->s_addr); ether_addr_copy(&ports_eth_addr[dst_port[3]], ð_hdr[3]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[4]], ð_hdr[4]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[5]], ð_hdr[5]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[6]], ð_hdr[6]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[7]], ð_hdr[7]->s_addr); send_single_packet(m[0], (uint8_t)dst_port[0]); send_single_packet(m[1], (uint8_t)dst_port[1]); send_single_packet(m[2], (uint8_t)dst_port[2]); send_single_packet(m[3], (uint8_t)dst_port[3]); + send_single_packet(m[4], (uint8_t)dst_port[4]); + send_single_packet(m[5], (uint8_t)dst_port[5]); + send_single_packet(m[6], (uint8_t)dst_port[6]); + send_single_packet(m[7], (uint8_t)dst_port[7]); } static inline void get_ipv6_5tuple(struct rte_mbuf* m0, __m128i mask0, __m128i mask1, union ipv6_5tuple_host * key) { - __m128i tmpdata0 = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m0, unsigned char *) - + sizeof(struct ether_hdr) + offsetof(struct ipv6_hdr, payload_len))); - __m128i tmpdata1 = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m0, unsigned char *) - + sizeof(struct ether_hdr) + offsetof(struct ipv6_hdr, payload_len) - + sizeof(__m128i))); - __m128i tmpdata2 = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m0, unsigned char *) - + sizeof(struct ether_hdr) + offsetof(struct ipv6_hdr, payload_len) - + sizeof(__m128i) + sizeof(__m128i))); + __m128i tmpdata0 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0, __m128i *, sizeof(struct ether_hdr) + offsetof(struct ipv6_hdr, payload_len))); + __m128i tmpdata1 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0, __m128i *, sizeof(struct ether_hdr) + offsetof(struct ipv6_hdr, payload_len) + sizeof(__m128i))); + __m128i tmpdata2 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0, __m128i *, sizeof(struct ether_hdr) + offsetof(struct ipv6_hdr, payload_len) + sizeof(__m128i) + sizeof(__m128i))); key->xmm[0] = _mm_and_si128(tmpdata0, mask0); key->xmm[1] = tmpdata1; key->xmm[2] = _mm_and_si128(tmpdata2, mask1); @@ -923,41 +962,62 @@ static inline void get_ipv6_5tuple(struct rte_mbuf* m0, __m128i mask0, __m128i m } static inline void -simple_ipv6_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *qconf) +simple_ipv6_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid, struct lcore_conf *qconf) { - struct ether_hdr *eth_hdr[4]; - __attribute__((unused)) struct ipv6_hdr *ipv6_hdr[4]; - void *d_addr_bytes[4]; - uint8_t dst_port[4]; - int32_t ret[4]; - union ipv6_5tuple_host key[4]; + struct ether_hdr *eth_hdr[8]; + __attribute__((unused)) struct ipv6_hdr *ipv6_hdr[8]; + uint8_t dst_port[8]; + int32_t ret[8]; + union ipv6_5tuple_host key[8]; eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *); eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *); eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *); eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *); + eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *); + eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *); + eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *); + eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *); /* Handle IPv6 headers.*/ - ipv6_hdr[0] = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m[0], unsigned char *) + - sizeof(struct ether_hdr)); - ipv6_hdr[1] = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m[1], unsigned char *) + - sizeof(struct ether_hdr)); - ipv6_hdr[2] = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m[2], unsigned char *) + - sizeof(struct ether_hdr)); - ipv6_hdr[3] = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m[3], unsigned char *) + - sizeof(struct ether_hdr)); + ipv6_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv6_hdr *, + sizeof(struct ether_hdr)); + ipv6_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct ipv6_hdr *, + sizeof(struct ether_hdr)); + ipv6_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct ipv6_hdr *, + sizeof(struct ether_hdr)); + ipv6_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv6_hdr *, + sizeof(struct ether_hdr)); + ipv6_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv6_hdr *, + sizeof(struct ether_hdr)); + ipv6_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv6_hdr *, + sizeof(struct ether_hdr)); + ipv6_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv6_hdr *, + sizeof(struct ether_hdr)); + ipv6_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv6_hdr *, + sizeof(struct ether_hdr)); get_ipv6_5tuple(m[0], mask1, mask2, &key[0]); get_ipv6_5tuple(m[1], mask1, mask2, &key[1]); get_ipv6_5tuple(m[2], mask1, mask2, &key[2]); get_ipv6_5tuple(m[3], mask1, mask2, &key[3]); - - const void *key_array[4] = {&key[0], &key[1], &key[2],&key[3]}; - rte_hash_lookup_multi(qconf->ipv6_lookup_struct, &key_array[0], 4, ret); - dst_port[0] = (uint8_t) ((ret[0] < 0)? portid:ipv6_l3fwd_out_if[ret[0]]); - dst_port[1] = (uint8_t) ((ret[1] < 0)? portid:ipv6_l3fwd_out_if[ret[1]]); - dst_port[2] = (uint8_t) ((ret[2] < 0)? portid:ipv6_l3fwd_out_if[ret[2]]); - dst_port[3] = (uint8_t) ((ret[3] < 0)? portid:ipv6_l3fwd_out_if[ret[3]]); + get_ipv6_5tuple(m[4], mask1, mask2, &key[4]); + get_ipv6_5tuple(m[5], mask1, mask2, &key[5]); + get_ipv6_5tuple(m[6], mask1, mask2, &key[6]); + get_ipv6_5tuple(m[7], mask1, mask2, &key[7]); + + const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3], + &key[4], &key[5], &key[6], &key[7]}; + + rte_hash_lookup_multi(qconf->ipv6_lookup_struct, &key_array[0], 8, ret); + dst_port[0] = (uint8_t) ((ret[0] < 0) ? portid:ipv6_l3fwd_out_if[ret[0]]); + dst_port[1] = (uint8_t) ((ret[1] < 0) ? portid:ipv6_l3fwd_out_if[ret[1]]); + dst_port[2] = (uint8_t) ((ret[2] < 0) ? portid:ipv6_l3fwd_out_if[ret[2]]); + dst_port[3] = (uint8_t) ((ret[3] < 0) ? portid:ipv6_l3fwd_out_if[ret[3]]); + dst_port[4] = (uint8_t) ((ret[4] < 0) ? portid:ipv6_l3fwd_out_if[ret[4]]); + dst_port[5] = (uint8_t) ((ret[5] < 0) ? portid:ipv6_l3fwd_out_if[ret[5]]); + dst_port[6] = (uint8_t) ((ret[6] < 0) ? portid:ipv6_l3fwd_out_if[ret[6]]); + dst_port[7] = (uint8_t) ((ret[7] < 0) ? portid:ipv6_l3fwd_out_if[ret[7]]); if (dst_port[0] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[0]) == 0) dst_port[0] = portid; @@ -967,27 +1027,43 @@ simple_ipv6_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf * dst_port[2] = portid; if (dst_port[3] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[3]) == 0) dst_port[3] = portid; - - /* 02:00:00:00:00:xx */ - d_addr_bytes[0] = ð_hdr[0]->d_addr.addr_bytes[0]; - d_addr_bytes[1] = ð_hdr[1]->d_addr.addr_bytes[0]; - d_addr_bytes[2] = ð_hdr[2]->d_addr.addr_bytes[0]; - d_addr_bytes[3] = ð_hdr[3]->d_addr.addr_bytes[0]; - *((uint64_t *)d_addr_bytes[0]) = 0x000000000002 + ((uint64_t)dst_port[0] << 40); - *((uint64_t *)d_addr_bytes[1]) = 0x000000000002 + ((uint64_t)dst_port[1] << 40); - *((uint64_t *)d_addr_bytes[2]) = 0x000000000002 + ((uint64_t)dst_port[2] << 40); - *((uint64_t *)d_addr_bytes[3]) = 0x000000000002 + ((uint64_t)dst_port[3] << 40); + if (dst_port[4] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[4]) == 0) + dst_port[4] = portid; + if (dst_port[5] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[5]) == 0) + dst_port[5] = portid; + if (dst_port[6] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[6]) == 0) + dst_port[6] = portid; + if (dst_port[7] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[7]) == 0) + dst_port[7] = portid; + + /* dst addr */ + *(uint64_t *)ð_hdr[0]->d_addr = dest_eth_addr[dst_port[0]]; + *(uint64_t *)ð_hdr[1]->d_addr = dest_eth_addr[dst_port[1]]; + *(uint64_t *)ð_hdr[2]->d_addr = dest_eth_addr[dst_port[2]]; + *(uint64_t *)ð_hdr[3]->d_addr = dest_eth_addr[dst_port[3]]; + *(uint64_t *)ð_hdr[4]->d_addr = dest_eth_addr[dst_port[4]]; + *(uint64_t *)ð_hdr[5]->d_addr = dest_eth_addr[dst_port[5]]; + *(uint64_t *)ð_hdr[6]->d_addr = dest_eth_addr[dst_port[6]]; + *(uint64_t *)ð_hdr[7]->d_addr = dest_eth_addr[dst_port[7]]; /* src addr */ ether_addr_copy(&ports_eth_addr[dst_port[0]], ð_hdr[0]->s_addr); ether_addr_copy(&ports_eth_addr[dst_port[1]], ð_hdr[1]->s_addr); ether_addr_copy(&ports_eth_addr[dst_port[2]], ð_hdr[2]->s_addr); ether_addr_copy(&ports_eth_addr[dst_port[3]], ð_hdr[3]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[4]], ð_hdr[4]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[5]], ð_hdr[5]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[6]], ð_hdr[6]->s_addr); + ether_addr_copy(&ports_eth_addr[dst_port[7]], ð_hdr[7]->s_addr); send_single_packet(m[0], (uint8_t)dst_port[0]); send_single_packet(m[1], (uint8_t)dst_port[1]); send_single_packet(m[2], (uint8_t)dst_port[2]); send_single_packet(m[3], (uint8_t)dst_port[3]); + send_single_packet(m[4], (uint8_t)dst_port[4]); + send_single_packet(m[5], (uint8_t)dst_port[5]); + send_single_packet(m[6], (uint8_t)dst_port[6]); + send_single_packet(m[7], (uint8_t)dst_port[7]); } #endif /* APP_LOOKUP_METHOD */ @@ -997,19 +1073,18 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, struct lcore_conf *qcon { struct ether_hdr *eth_hdr; struct ipv4_hdr *ipv4_hdr; - void *d_addr_bytes; uint8_t dst_port; eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); - if (m->ol_flags & PKT_RX_IPV4_HDR) { + if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { /* Handle IPv4 headers.*/ - ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m, unsigned char *) + - sizeof(struct ether_hdr)); + ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, + sizeof(struct ether_hdr)); #ifdef DO_RFC_1812_CHECKS /* Check to make sure the packet is valid (RFC1812) */ - if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt.pkt_len) < 0) { + if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { rte_pktmbuf_free(m); return; } @@ -1021,47 +1096,44 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, struct lcore_conf *qcon (enabled_port_mask & 1 << dst_port) == 0) dst_port = portid; - /* 02:00:00:00:00:xx */ - d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; - *((uint64_t *)d_addr_bytes) = ETHER_LOCAL_ADMIN_ADDR + - ((uint64_t)dst_port << 40); - #ifdef DO_RFC_1812_CHECKS /* Update time to live and header checksum */ --(ipv4_hdr->time_to_live); ++(ipv4_hdr->hdr_checksum); #endif + /* dst addr */ + *(uint64_t *)ð_hdr->d_addr = dest_eth_addr[dst_port]; /* src addr */ ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); send_single_packet(m, dst_port); - - } else { + } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { /* Handle IPv6 headers.*/ struct ipv6_hdr *ipv6_hdr; - ipv6_hdr = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m, unsigned char *) + - sizeof(struct ether_hdr)); + ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, + sizeof(struct ether_hdr)); dst_port = get_ipv6_dst_port(ipv6_hdr, portid, qconf->ipv6_lookup_struct); if (dst_port >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port) == 0) dst_port = portid; - /* 02:00:00:00:00:xx */ - d_addr_bytes = ð_hdr->d_addr.addr_bytes[0]; - *((uint64_t *)d_addr_bytes) = ETHER_LOCAL_ADMIN_ADDR + - ((uint64_t)dst_port << 40); + /* dst addr */ + *(uint64_t *)ð_hdr->d_addr = dest_eth_addr[dst_port]; /* src addr */ ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); send_single_packet(m, dst_port); - } - + } else + /* Free the mbuf that contains non-IPV4/IPV6 packet */ + rte_pktmbuf_free(m); } +#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ + (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) #ifdef DO_RFC_1812_CHECKS #define IPV4_MIN_VER_IHL 0x45 @@ -1083,12 +1155,11 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, struct lcore_conf *qcon * to BAD_PORT value. */ static inline __attribute__((always_inline)) void -rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t flags) +rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype) { uint8_t ihl; - if ((flags & PKT_RX_IPV4_HDR) != 0) { - + if (RTE_ETH_IS_IPV4_HDR(ptype)) { ihl = ipv4_hdr->version_ihl - IPV4_MIN_VER_IHL; ipv4_hdr->time_to_live--; @@ -1105,6 +1176,7 @@ rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t flags) #else #define rfc1812_process(mb, dp) do { } while (0) #endif /* DO_RFC_1812_CHECKS */ +#endif /* APP_LOOKUP_LPM && ENABLE_MULTI_BUFFER_OPTIMIZE */ #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ @@ -1118,11 +1190,11 @@ get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt, struct ipv6_hdr *ipv6_hdr; struct ether_hdr *eth_hdr; - if (pkt->ol_flags & PKT_RX_IPV4_HDR) { + if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) { if (rte_lpm_lookup(qconf->ipv4_lookup_struct, dst_ipv4, &next_hop) != 0) next_hop = portid; - } else if (pkt->ol_flags & PKT_RX_IPV6_HDR) { + } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) { eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1); if (rte_lpm6_lookup(qconf->ipv6_lookup_struct, @@ -1152,21 +1224,23 @@ process_packet(struct lcore_conf *qconf, struct rte_mbuf *pkt, dst_ipv4 = rte_be_to_cpu_32(dst_ipv4); dp = get_dst_port(qconf, pkt, dst_ipv4, portid); - te = _mm_load_si128((__m128i *)eth_hdr); + te = _mm_loadu_si128((__m128i *)eth_hdr); ve = val_eth[dp]; dst_port[0] = dp; - rfc1812_process(ipv4_hdr, dst_port, pkt->ol_flags); + rfc1812_process(ipv4_hdr, dst_port, pkt->packet_type); te = _mm_blend_epi16(te, ve, MASK_ETH); - _mm_store_si128((__m128i *)eth_hdr, te); + _mm_storeu_si128((__m128i *)eth_hdr, te); } /* - * Read ol_flags and destination IPV4 addresses from 4 mbufs. + * Read packet_type and destination IPV4 addresses from 4 mbufs. */ static inline void -processx4_step1(struct rte_mbuf *pkt[FWDSTEP], __m128i *dip, uint32_t *flag) +processx4_step1(struct rte_mbuf *pkt[FWDSTEP], + __m128i *dip, + uint32_t *ipv4_flag) { struct ipv4_hdr *ipv4_hdr; struct ether_hdr *eth_hdr; @@ -1175,22 +1249,22 @@ processx4_step1(struct rte_mbuf *pkt[FWDSTEP], __m128i *dip, uint32_t *flag) eth_hdr = rte_pktmbuf_mtod(pkt[0], struct ether_hdr *); ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); x0 = ipv4_hdr->dst_addr; - flag[0] = pkt[0]->ol_flags & PKT_RX_IPV4_HDR; + ipv4_flag[0] = pkt[0]->packet_type & RTE_PTYPE_L3_IPV4; eth_hdr = rte_pktmbuf_mtod(pkt[1], struct ether_hdr *); ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); x1 = ipv4_hdr->dst_addr; - flag[0] &= pkt[1]->ol_flags; + ipv4_flag[0] &= pkt[1]->packet_type; eth_hdr = rte_pktmbuf_mtod(pkt[2], struct ether_hdr *); ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); x2 = ipv4_hdr->dst_addr; - flag[0] &= pkt[2]->ol_flags; + ipv4_flag[0] &= pkt[2]->packet_type; eth_hdr = rte_pktmbuf_mtod(pkt[3], struct ether_hdr *); ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1); x3 = ipv4_hdr->dst_addr; - flag[0] &= pkt[3]->ol_flags; + ipv4_flag[0] &= pkt[3]->packet_type; dip[0] = _mm_set_epi32(x3, x2, x1, x0); } @@ -1200,8 +1274,12 @@ processx4_step1(struct rte_mbuf *pkt[FWDSTEP], __m128i *dip, uint32_t *flag) * If lookup fails, use incoming port (portid) as destination port. */ static inline void -processx4_step2(const struct lcore_conf *qconf, __m128i dip, uint32_t flag, - uint8_t portid, struct rte_mbuf *pkt[FWDSTEP], uint16_t dprt[FWDSTEP]) +processx4_step2(const struct lcore_conf *qconf, + __m128i dip, + uint32_t ipv4_flag, + uint8_t portid, + struct rte_mbuf *pkt[FWDSTEP], + uint16_t dprt[FWDSTEP]) { rte_xmm_t dst; const __m128i bswap_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, @@ -1211,10 +1289,10 @@ processx4_step2(const struct lcore_conf *qconf, __m128i dip, uint32_t flag, dip = _mm_shuffle_epi8(dip, bswap_mask); /* if all 4 packets are IPV4. */ - if (likely(flag != 0)) { + if (likely(ipv4_flag)) { rte_lpm_lookupx4(qconf->ipv4_lookup_struct, dip, dprt, portid); } else { - dst.m = dip; + dst.x = dip; dprt[0] = get_dst_port(qconf, pkt[0], dst.u32[0], portid); dprt[1] = get_dst_port(qconf, pkt[1], dst.u32[1], portid); dprt[2] = get_dst_port(qconf, pkt[2], dst.u32[2], portid); @@ -1233,22 +1311,22 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) __m128i ve[FWDSTEP]; __m128i *p[FWDSTEP]; - p[0] = (rte_pktmbuf_mtod(pkt[0], __m128i *)); - p[1] = (rte_pktmbuf_mtod(pkt[1], __m128i *)); - p[2] = (rte_pktmbuf_mtod(pkt[2], __m128i *)); - p[3] = (rte_pktmbuf_mtod(pkt[3], __m128i *)); + p[0] = rte_pktmbuf_mtod(pkt[0], __m128i *); + p[1] = rte_pktmbuf_mtod(pkt[1], __m128i *); + p[2] = rte_pktmbuf_mtod(pkt[2], __m128i *); + p[3] = rte_pktmbuf_mtod(pkt[3], __m128i *); ve[0] = val_eth[dst_port[0]]; - te[0] = _mm_load_si128(p[0]); + te[0] = _mm_loadu_si128(p[0]); ve[1] = val_eth[dst_port[1]]; - te[1] = _mm_load_si128(p[1]); + te[1] = _mm_loadu_si128(p[1]); ve[2] = val_eth[dst_port[2]]; - te[2] = _mm_load_si128(p[2]); + te[2] = _mm_loadu_si128(p[2]); ve[3] = val_eth[dst_port[3]]; - te[3] = _mm_load_si128(p[3]); + te[3] = _mm_loadu_si128(p[3]); /* Update first 12 bytes, keep rest bytes intact. */ te[0] = _mm_blend_epi16(te[0], ve[0], MASK_ETH); @@ -1256,19 +1334,181 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) te[2] = _mm_blend_epi16(te[2], ve[2], MASK_ETH); te[3] = _mm_blend_epi16(te[3], ve[3], MASK_ETH); - _mm_store_si128(p[0], te[0]); - _mm_store_si128(p[1], te[1]); - _mm_store_si128(p[2], te[2]); - _mm_store_si128(p[3], te[3]); + _mm_storeu_si128(p[0], te[0]); + _mm_storeu_si128(p[1], te[1]); + _mm_storeu_si128(p[2], te[2]); + _mm_storeu_si128(p[3], te[3]); rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1), - &dst_port[0], pkt[0]->ol_flags); + &dst_port[0], pkt[0]->packet_type); rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1), - &dst_port[1], pkt[1]->ol_flags); + &dst_port[1], pkt[1]->packet_type); rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1), - &dst_port[2], pkt[2]->ol_flags); + &dst_port[2], pkt[2]->packet_type); rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1), - &dst_port[3], pkt[3]->ol_flags); + &dst_port[3], pkt[3]->packet_type); +} + +/* + * We group consecutive packets with the same destionation port into one burst. + * To avoid extra latency this is done together with some other packet + * processing, but after we made a final decision about packet's destination. + * To do this we maintain: + * pnum - array of number of consecutive packets with the same dest port for + * each packet in the input burst. + * lp - pointer to the last updated element in the pnum. + * dlp - dest port value lp corresponds to. + */ + +#define GRPSZ (1 << FWDSTEP) +#define GRPMSK (GRPSZ - 1) + +#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ + if (likely((dlp) == (dcp)[(idx)])) { \ + (lp)[0]++; \ + } else { \ + (dlp) = (dcp)[idx]; \ + (lp) = (pn) + (idx); \ + (lp)[0] = 1; \ + } \ +} while (0) + +/* + * Group consecutive packets with the same destination port in bursts of 4. + * Suppose we have array of destionation ports: + * dst_port[] = {a, b, c, d,, e, ... } + * dp1 should contain: , dp2: . + * We doing 4 comparisions at once and the result is 4 bit mask. + * This mask is used as an index into prebuild array of pnum values. + */ +static inline uint16_t * +port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2) +{ + static const struct { + uint64_t pnum; /* prebuild 4 values for pnum[]. */ + int32_t idx; /* index for new last updated elemnet. */ + uint16_t lpv; /* add value to the last updated element. */ + } gptbl[GRPSZ] = { + { + /* 0: a != b, b != c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100010001), + .idx = 4, + .lpv = 0, + }, + { + /* 1: a == b, b != c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100010002), + .idx = 4, + .lpv = 1, + }, + { + /* 2: a != b, b == c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100020001), + .idx = 4, + .lpv = 0, + }, + { + /* 3: a == b, b == c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100020003), + .idx = 4, + .lpv = 2, + }, + { + /* 4: a != b, b != c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200010001), + .idx = 4, + .lpv = 0, + }, + { + /* 5: a == b, b != c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200010002), + .idx = 4, + .lpv = 1, + }, + { + /* 6: a != b, b == c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200030001), + .idx = 4, + .lpv = 0, + }, + { + /* 7: a == b, b == c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200030004), + .idx = 4, + .lpv = 3, + }, + { + /* 8: a != b, b != c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100010001), + .idx = 3, + .lpv = 0, + }, + { + /* 9: a == b, b != c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100010002), + .idx = 3, + .lpv = 1, + }, + { + /* 0xa: a != b, b == c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100020001), + .idx = 3, + .lpv = 0, + }, + { + /* 0xb: a == b, b == c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100020003), + .idx = 3, + .lpv = 2, + }, + { + /* 0xc: a != b, b != c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300010001), + .idx = 2, + .lpv = 0, + }, + { + /* 0xd: a == b, b != c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300010002), + .idx = 2, + .lpv = 1, + }, + { + /* 0xe: a != b, b == c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300040001), + .idx = 1, + .lpv = 0, + }, + { + /* 0xf: a == b, b == c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300040005), + .idx = 0, + .lpv = 4, + }, + }; + + union { + uint16_t u16[FWDSTEP + 1]; + uint64_t u64; + } *pnum = (void *)pn; + + int32_t v; + + dp1 = _mm_cmpeq_epi16(dp1, dp2); + dp1 = _mm_unpacklo_epi16(dp1, dp1); + v = _mm_movemask_ps((__m128)dp1); + + /* update last port counter. */ + lp[0] += gptbl[v].lpv; + + /* if dest port value has changed. */ + if (v != GRPMSK) { + lp = pnum->u16 + gptbl[v].idx; + lp[0] = 1; + pnum->u64 = gptbl[v].pnum; + } + + return lp; } #endif /* APP_LOOKUP_METHOD */ @@ -1289,9 +1529,12 @@ main_loop(__attribute__((unused)) void *dummy) #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) int32_t k; + uint16_t dlp; + uint16_t *lp; uint16_t dst_port[MAX_PKT_BURST]; __m128i dip[MAX_PKT_BURST / FWDSTEP]; - uint32_t flag[MAX_PKT_BURST / FWDSTEP]; + uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; + uint16_t pnum[MAX_PKT_BURST + 1]; #endif prev_tsc = 0; @@ -1314,7 +1557,7 @@ main_loop(__attribute__((unused)) void *dummy) portid, queueid); } - while (1) { + while (!force_quit) { cur_tsc = rte_rdtsc(); @@ -1355,20 +1598,26 @@ main_loop(__attribute__((unused)) void *dummy) #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) { /* - * Send nb_rx - nb_rx%4 packets - * in groups of 4. + * Send nb_rx - nb_rx%8 packets + * in groups of 8. */ - int32_t n = RTE_ALIGN_FLOOR(nb_rx, 4); - for (j = 0; j < n ; j+=4) { - uint32_t ol_flag = pkts_burst[j]->ol_flags - & pkts_burst[j+1]->ol_flags - & pkts_burst[j+2]->ol_flags - & pkts_burst[j+3]->ol_flags; - if (ol_flag & PKT_RX_IPV4_HDR ) { - simple_ipv4_fwd_4pkts(&pkts_burst[j], - portid, qconf); - } else if (ol_flag & PKT_RX_IPV6_HDR) { - simple_ipv6_fwd_4pkts(&pkts_burst[j], + int32_t n = RTE_ALIGN_FLOOR(nb_rx, 8); + for (j = 0; j < n; j += 8) { + uint32_t pkt_type = + pkts_burst[j]->packet_type & + pkts_burst[j+1]->packet_type & + pkts_burst[j+2]->packet_type & + pkts_burst[j+3]->packet_type & + pkts_burst[j+4]->packet_type & + pkts_burst[j+5]->packet_type & + pkts_burst[j+6]->packet_type & + pkts_burst[j+7]->packet_type; + if (pkt_type & RTE_PTYPE_L3_IPV4) { + simple_ipv4_fwd_8pkts( + &pkts_burst[j], portid, qconf); + } else if (pkt_type & + RTE_PTYPE_L3_IPV6) { + simple_ipv6_fwd_8pkts(&pkts_burst[j], portid, qconf); } else { l3fwd_simple_forward(pkts_burst[j], @@ -1379,6 +1628,14 @@ main_loop(__attribute__((unused)) void *dummy) portid, qconf); l3fwd_simple_forward(pkts_burst[j+3], portid, qconf); + l3fwd_simple_forward(pkts_burst[j+4], + portid, qconf); + l3fwd_simple_forward(pkts_burst[j+5], + portid, qconf); + l3fwd_simple_forward(pkts_burst[j+6], + portid, qconf); + l3fwd_simple_forward(pkts_burst[j+7], + portid, qconf); } } for (; j < nb_rx ; j++) { @@ -1392,19 +1649,71 @@ main_loop(__attribute__((unused)) void *dummy) for (j = 0; j != k; j += FWDSTEP) { processx4_step1(&pkts_burst[j], &dip[j / FWDSTEP], - &flag[j / FWDSTEP]); + &ipv4_flag[j / FWDSTEP]); } k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); for (j = 0; j != k; j += FWDSTEP) { processx4_step2(qconf, dip[j / FWDSTEP], - flag[j / FWDSTEP], portid, + ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]); } + /* + * Finish packet processing and group consecutive + * packets with the same destination port. + */ k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); - for (j = 0; j != k; j += FWDSTEP) { - processx4_step3(&pkts_burst[j], &dst_port[j]); + if (k != 0) { + __m128i dp1, dp2; + + lp = pnum; + lp[0] = 1; + + processx4_step3(pkts_burst, dst_port); + + /* dp1: */ + dp1 = _mm_loadu_si128((__m128i *)dst_port); + + for (j = FWDSTEP; j != k; j += FWDSTEP) { + processx4_step3(&pkts_burst[j], + &dst_port[j]); + + /* + * dp2: + * + */ + dp2 = _mm_loadu_si128((__m128i *) + &dst_port[j - FWDSTEP + 1]); + lp = port_groupx4(&pnum[j - FWDSTEP], + lp, dp1, dp2); + + /* + * dp1: + * + */ + dp1 = _mm_srli_si128(dp2, + (FWDSTEP - 1) * + sizeof(dst_port[0])); + } + + /* + * dp2: + */ + dp2 = _mm_shufflelo_epi16(dp1, 0xf9); + lp = port_groupx4(&pnum[j - FWDSTEP], lp, + dp1, dp2); + + /* + * remove values added by the last repeated + * dst port. + */ + lp[0]--; + dlp = dst_port[j - 1]; + } else { + /* set dlp and lp to the never used values. */ + dlp = BAD_PORT - 1; + lp = pnum + MAX_PKT_BURST; } /* Process up to last 3 packets one by one. */ @@ -1412,39 +1721,41 @@ main_loop(__attribute__((unused)) void *dummy) case 3: process_packet(qconf, pkts_burst[j], dst_port + j, portid); + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); j++; case 2: process_packet(qconf, pkts_burst[j], dst_port + j, portid); + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); j++; case 1: process_packet(qconf, pkts_burst[j], dst_port + j, portid); + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); j++; } /* * Send packets out, through destination port. - * Try to group packets with the same destination port. + * Consecuteve pacekts with the same destination port + * are already grouped together. * If destination port for the packet equals BAD_PORT, * then free the packet without sending it out. */ - for (j = 0; j < nb_rx; j = k) { + for (j = 0; j < nb_rx; j += k) { - uint16_t cn, pn = dst_port[j]; + int32_t m; + uint16_t pn; - k = j; - do { - cn = dst_port[k]; - } while (cn != BAD_PORT && pn == cn && - ++k < nb_rx); + pn = dst_port[j]; + k = pnum[j]; - send_packetsx4(qconf, pn, pkts_burst + j, - k - j); - - if (cn == BAD_PORT) { - rte_pktmbuf_free(pkts_burst[k]); - k += 1; + if (likely(pn != BAD_PORT)) { + send_packetsx4(qconf, pn, + pkts_burst + j, k); + } else { + for (m = j; m != j + k; m++) + rte_pktmbuf_free(pkts_burst[m]); } } @@ -1474,6 +1785,8 @@ main_loop(__attribute__((unused)) void *dummy) } } + + return 0; } static int @@ -1570,6 +1883,7 @@ print_usage(const char *prgname) " -p PORTMASK: hexadecimal bitmask of ports to configure\n" " -P : enable promiscuous mode\n" " --config (port,queue,lcore): rx queues configuration\n" + " --eth-dest=X,MM:MM:MM:MM:MM:MM: optional, ethernet destination for port X\n" " --no-numa: optional, disable numa awareness\n" " --ipv6: optional, specify it if running ipv6 packets\n" " --enable-jumbo: enable jumbo frame" @@ -1657,7 +1971,7 @@ parse_config(const char *q_arg) if(size >= sizeof(s)) return -1; - rte_snprintf(s, sizeof(s), "%.*s", size, p); + snprintf(s, sizeof(s), "%.*s", size, p); if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD) return -1; for (i = 0; i < _NUM_FLD; i++){ @@ -1680,7 +1994,36 @@ parse_config(const char *q_arg) return 0; } +static void +parse_eth_dest(const char *optarg) +{ + uint8_t portid; + char *port_end; + uint8_t c, *dest, peer_addr[6]; + + errno = 0; + portid = strtoul(optarg, &port_end, 10); + if (errno != 0 || port_end == optarg || *port_end++ != ',') + rte_exit(EXIT_FAILURE, + "Invalid eth-dest: %s", optarg); + if (portid >= RTE_MAX_ETHPORTS) + rte_exit(EXIT_FAILURE, + "eth-dest: port %d >= RTE_MAX_ETHPORTS(%d)\n", + portid, RTE_MAX_ETHPORTS); + + if (cmdline_parse_etheraddr(NULL, port_end, + &peer_addr, sizeof(peer_addr)) < 0) + rte_exit(EXIT_FAILURE, + "Invalid ethernet address: %s\n", + port_end); + dest = (uint8_t *)&dest_eth_addr[portid]; + for (c = 0; c < 6; c++) + dest[c] = peer_addr[c]; + *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid]; +} + #define CMD_LINE_OPT_CONFIG "config" +#define CMD_LINE_OPT_ETH_DEST "eth-dest" #define CMD_LINE_OPT_NO_NUMA "no-numa" #define CMD_LINE_OPT_IPV6 "ipv6" #define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo" @@ -1696,6 +2039,7 @@ parse_args(int argc, char **argv) char *prgname = argv[0]; static struct option lgopts[] = { {CMD_LINE_OPT_CONFIG, 1, 0, 0}, + {CMD_LINE_OPT_ETH_DEST, 1, 0, 0}, {CMD_LINE_OPT_NO_NUMA, 0, 0, 0}, {CMD_LINE_OPT_IPV6, 0, 0, 0}, {CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, 0}, @@ -1735,6 +2079,11 @@ parse_args(int argc, char **argv) } } + if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_ETH_DEST, + sizeof(CMD_LINE_OPT_ETH_DEST))) { + parse_eth_dest(optarg); + } + if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_NO_NUMA, sizeof(CMD_LINE_OPT_NO_NUMA))) { printf("numa is disabled \n"); @@ -1755,7 +2104,6 @@ parse_args(int argc, char **argv) printf("jumbo frame is enabled - disabling simple TX path\n"); port_conf.rxmode.jumbo_frame = 1; - tx_conf.txq_flags = 0; /* if no max-pkt-len set, use the default value ETHER_MAX_LEN */ if (0 == getopt_long(argc, argvopt, "", &lenopts, &option_index)) { @@ -1802,13 +2150,9 @@ parse_args(int argc, char **argv) static void print_ethaddr(const char *name, const struct ether_addr *eth_addr) { - printf ("%s%02X:%02X:%02X:%02X:%02X:%02X", name, - eth_addr->addr_bytes[0], - eth_addr->addr_bytes[1], - eth_addr->addr_bytes[2], - eth_addr->addr_bytes[3], - eth_addr->addr_bytes[4], - eth_addr->addr_bytes[5]); + char buf[ETHER_ADDR_FMT_SIZE]; + ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr); + printf("%s%s", name, buf); } #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) @@ -1980,7 +2324,6 @@ setup_hash(int socketid) struct rte_hash_parameters ipv4_l3fwd_hash_params = { .name = NULL, .entries = L3FWD_HASH_ENTRIES, - .bucket_entries = 4, .key_len = sizeof(union ipv4_5tuple_host), .hash_func = ipv4_hash_crc, .hash_func_init_val = 0, @@ -1989,7 +2332,6 @@ setup_hash(int socketid) struct rte_hash_parameters ipv6_l3fwd_hash_params = { .name = NULL, .entries = L3FWD_HASH_ENTRIES, - .bucket_entries = 4, .key_len = sizeof(union ipv6_5tuple_host), .hash_func = ipv6_hash_crc, .hash_func_init_val = 0, @@ -1998,7 +2340,7 @@ setup_hash(int socketid) char s[64]; /* create ipv4 hash */ - rte_snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); + snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); ipv4_l3fwd_hash_params.name = s; ipv4_l3fwd_hash_params.socket_id = socketid; ipv4_l3fwd_lookup_struct[socketid] = rte_hash_create(&ipv4_l3fwd_hash_params); @@ -2007,7 +2349,7 @@ setup_hash(int socketid) "socket %d\n", socketid); /* create ipv6 hash */ - rte_snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); + snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); ipv6_l3fwd_hash_params.name = s; ipv6_l3fwd_hash_params.socket_id = socketid; ipv6_l3fwd_lookup_struct[socketid] = rte_hash_create(&ipv6_l3fwd_hash_params); @@ -2051,7 +2393,7 @@ setup_lpm(int socketid) char s[64]; /* create the LPM table */ - rte_snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); + snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); ipv4_l3fwd_lookup_struct[socketid] = rte_lpm_create(s, socketid, IPV4_L3FWD_LPM_MAX_RULES, 0); if (ipv4_l3fwd_lookup_struct[socketid] == NULL) @@ -2084,7 +2426,7 @@ setup_lpm(int socketid) } /* create the LPM6 table */ - rte_snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid); + snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid); config.max_rules = IPV6_L3FWD_LPM_MAX_RULES; config.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S; @@ -2144,13 +2486,11 @@ init_mem(unsigned nb_mbuf) socketid, lcore_id, NB_SOCKETS); } if (pktmbuf_pool[socketid] == NULL) { - rte_snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); + snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); pktmbuf_pool[socketid] = - rte_mempool_create(s, nb_mbuf, MBUF_SIZE, MEMPOOL_CACHE_SIZE, - sizeof(struct rte_pktmbuf_pool_private), - rte_pktmbuf_pool_init, NULL, - rte_pktmbuf_init, NULL, - socketid, 0); + rte_pktmbuf_pool_create(s, nb_mbuf, + MEMPOOL_CACHE_SIZE, 0, + RTE_MBUF_DEFAULT_BUF_SIZE, socketid); if (pktmbuf_pool[socketid] == NULL) rte_exit(EXIT_FAILURE, "Cannot init mbuf pool on socket %d\n", socketid); @@ -2182,8 +2522,12 @@ check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) printf("\nChecking link status"); fflush(stdout); for (count = 0; count <= MAX_CHECK_TIME; count++) { + if (force_quit) + return; all_ports_up = 1; for (portid = 0; portid < port_num; portid++) { + if (force_quit) + return; if ((port_mask & (1 << portid)) == 0) continue; memset(&link, 0, sizeof(link)); @@ -2225,10 +2569,22 @@ check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) } } +static void +signal_handler(int signum) +{ + if (signum == SIGINT || signum == SIGTERM) { + printf("\n\nSignal %d received, preparing to exit...\n", + signum); + force_quit = true; + } +} + int -MAIN(int argc, char **argv) +main(int argc, char **argv) { struct lcore_conf *qconf; + struct rte_eth_dev_info dev_info; + struct rte_eth_txconf *txconf; int ret; unsigned nb_ports; uint16_t queueid; @@ -2243,6 +2599,16 @@ MAIN(int argc, char **argv) argc -= ret; argv += ret; + force_quit = false; + signal(SIGINT, signal_handler); + signal(SIGTERM, signal_handler); + + /* pre-init dst MACs for all ports to 02:00:00:00:00:xx */ + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + dest_eth_addr[portid] = ETHER_LOCAL_ADMIN_ADDR + ((uint64_t)portid << 40); + *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid]; + } + /* parse application arguments (after the EAL ones) */ ret = parse_args(argc, argv); if (ret < 0) @@ -2255,10 +2621,6 @@ MAIN(int argc, char **argv) if (ret < 0) rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); - - if (rte_eal_pci_probe() < 0) - rte_exit(EXIT_FAILURE, "Cannot probe PCI\n"); - nb_ports = rte_eth_dev_count(); if (nb_ports > RTE_MAX_ETHPORTS) nb_ports = RTE_MAX_ETHPORTS; @@ -2295,12 +2657,13 @@ MAIN(int argc, char **argv) rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); print_ethaddr(" Address:", &ports_eth_addr[portid]); printf(", "); + print_ethaddr("Destination:", + (const struct ether_addr *)&dest_eth_addr[portid]); + printf(", "); /* - * prepare dst and src MACs for each port. + * prepare src MACs for each port. */ - *(uint64_t *)(val_eth + portid) = - ETHER_LOCAL_ADMIN_ADDR + ((uint64_t)portid << 40); ether_addr_copy(&ports_eth_addr[portid], (struct ether_addr *)(val_eth + portid) + 1); @@ -2322,8 +2685,13 @@ MAIN(int argc, char **argv) printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); fflush(stdout); + + rte_eth_dev_info_get(portid, &dev_info); + txconf = &dev_info.default_txconf; + if (port_conf.rxmode.jumbo_frame) + txconf->txq_flags = 0; ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, - socketid, &tx_conf); + socketid, txconf); if (ret < 0) rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, " "port=%d\n", ret, portid); @@ -2355,7 +2723,9 @@ MAIN(int argc, char **argv) fflush(stdout); ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, - socketid, &rx_conf, pktmbuf_pool[socketid]); + socketid, + NULL, + pktmbuf_pool[socketid]); if (ret < 0) rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d," "port=%d\n", ret, portid); @@ -2387,12 +2757,26 @@ MAIN(int argc, char **argv) check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask); + ret = 0; /* launch per-lcore init on every lcore */ rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); RTE_LCORE_FOREACH_SLAVE(lcore_id) { - if (rte_eal_wait_lcore(lcore_id) < 0) - return -1; + if (rte_eal_wait_lcore(lcore_id) < 0) { + ret = -1; + break; + } } - return 0; + /* stop ports */ + for (portid = 0; portid < nb_ports; portid++) { + if ((enabled_port_mask & (1 << portid)) == 0) + continue; + printf("Closing port %d...", portid); + rte_eth_dev_stop(portid); + rte_eth_dev_close(portid); + printf(" Done\n"); + } + printf("Bye...\n"); + + return ret; }