X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=examples%2Fl3fwd%2Fmain.c;h=78710386ab0a01c1aca2d9ab2220d3e8d4a2a830;hb=ea0c20ea95fd5d71a10757e6598ac66233ea1495;hp=e1754a94c8fce2f9db71ef7f7b4bcfe0be6efed2;hpb=8a387fa85f02ed64f755e0baa72e567d1917af48;p=dpdk.git diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c old mode 100755 new mode 100644 index e1754a94c8..78710386ab --- a/examples/l3fwd/main.c +++ b/examples/l3fwd/main.c @@ -43,13 +43,12 @@ #include #include -#include +#include #include #include #include #include #include -#include #include #include #include @@ -73,8 +72,6 @@ #include #include -#include "main.h" - #define APP_LOOKUP_EXACT_MATCH 0 #define APP_LOOKUP_LPM 1 #define DO_RFC_1812_CHECKS @@ -122,7 +119,7 @@ #define MEMPOOL_CACHE_SIZE 256 -#define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) +#define MBUF_DATA_SIZE (2048 + RTE_PKTMBUF_HEADROOM) /* * This expression is used to calculate the number of mbufs needed depending on user input, taking @@ -137,25 +134,6 @@ nb_lcores*MEMPOOL_CACHE_SIZE), \ (unsigned)8192) -/* - * RX and TX Prefetch, Host, and Write-back threshold values should be - * carefully set for optimal performance. Consult the network - * controller's datasheet and supporting DPDK documentation for guidance - * on how these parameters should be set. - */ -#define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */ -#define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */ -#define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */ - -/* - * These default values are optimized for use with the Intel(R) 82599 10 GbE - * Controller and the DPDK ixgbe PMD. Consider using other values for other - * network controllers and/or network drivers. - */ -#define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */ -#define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */ -#define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */ - #define MAX_PKT_BURST 32 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ @@ -259,31 +237,6 @@ static struct rte_eth_conf port_conf = { }, }; -static const struct rte_eth_rxconf rx_conf = { - .rx_thresh = { - .pthresh = RX_PTHRESH, - .hthresh = RX_HTHRESH, - .wthresh = RX_WTHRESH, - }, - .rx_free_thresh = 32, -}; - -static struct rte_eth_txconf tx_conf = { - .tx_thresh = { - .pthresh = TX_PTHRESH, - .hthresh = TX_HTHRESH, - .wthresh = TX_WTHRESH, - }, - .tx_free_thresh = 0, /* Use PMD default values */ - .tx_rs_thresh = 0, /* Use PMD default values */ - .txq_flags = (ETH_TXQ_FLAGS_NOMULTSEGS | - ETH_TXQ_FLAGS_NOVLANOFFL | - ETH_TXQ_FLAGS_NOXSUMSCTP | - ETH_TXQ_FLAGS_NOXSUMUDP | - ETH_TXQ_FLAGS_NOXSUMTCP) - -}; - static struct rte_mempool * pktmbuf_pool[NB_SOCKETS]; #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) @@ -770,9 +723,11 @@ get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, lookup6_struct_t * ipv6_l3fwd } #endif +static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, + struct lcore_conf *qconf) __attribute__((unused)); + #if ((APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) && \ (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) -static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, struct lcore_conf *qconf); #define MASK_ALL_PKTS 0xf #define EXECLUDE_1ST_PKT 0xe @@ -809,19 +764,19 @@ simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf * #ifdef DO_RFC_1812_CHECKS /* Check to make sure the packet is valid (RFC1812) */ uint8_t valid_mask = MASK_ALL_PKTS; - if (is_valid_ipv4_pkt(ipv4_hdr[0], m[0]->pkt.pkt_len) < 0) { + if (is_valid_ipv4_pkt(ipv4_hdr[0], m[0]->pkt_len) < 0) { rte_pktmbuf_free(m[0]); valid_mask &= EXECLUDE_1ST_PKT; } - if (is_valid_ipv4_pkt(ipv4_hdr[1], m[1]->pkt.pkt_len) < 0) { + if (is_valid_ipv4_pkt(ipv4_hdr[1], m[1]->pkt_len) < 0) { rte_pktmbuf_free(m[1]); valid_mask &= EXECLUDE_2ND_PKT; } - if (is_valid_ipv4_pkt(ipv4_hdr[2], m[2]->pkt.pkt_len) < 0) { + if (is_valid_ipv4_pkt(ipv4_hdr[2], m[2]->pkt_len) < 0) { rte_pktmbuf_free(m[2]); valid_mask &= EXECLUDE_3RD_PKT; } - if (is_valid_ipv4_pkt(ipv4_hdr[3], m[3]->pkt.pkt_len) < 0) { + if (is_valid_ipv4_pkt(ipv4_hdr[3], m[3]->pkt_len) < 0) { rte_pktmbuf_free(m[3]); valid_mask &= EXECLUDE_4TH_PKT; } @@ -1009,7 +964,7 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, struct lcore_conf *qcon #ifdef DO_RFC_1812_CHECKS /* Check to make sure the packet is valid (RFC1812) */ - if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt.pkt_len) < 0) { + if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { rte_pktmbuf_free(m); return; } @@ -1214,7 +1169,7 @@ processx4_step2(const struct lcore_conf *qconf, __m128i dip, uint32_t flag, if (likely(flag != 0)) { rte_lpm_lookupx4(qconf->ipv4_lookup_struct, dip, dprt, portid); } else { - dst.m = dip; + dst.x = dip; dprt[0] = get_dst_port(qconf, pkt[0], dst.u32[0], portid); dprt[1] = get_dst_port(qconf, pkt[1], dst.u32[1], portid); dprt[2] = get_dst_port(qconf, pkt[2], dst.u32[2], portid); @@ -1271,6 +1226,168 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) &dst_port[3], pkt[3]->ol_flags); } +/* + * We group consecutive packets with the same destionation port into one burst. + * To avoid extra latency this is done together with some other packet + * processing, but after we made a final decision about packet's destination. + * To do this we maintain: + * pnum - array of number of consecutive packets with the same dest port for + * each packet in the input burst. + * lp - pointer to the last updated element in the pnum. + * dlp - dest port value lp corresponds to. + */ + +#define GRPSZ (1 << FWDSTEP) +#define GRPMSK (GRPSZ - 1) + +#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ + if (likely((dlp) == (dcp)[(idx)])) { \ + (lp)[0]++; \ + } else { \ + (dlp) = (dcp)[idx]; \ + (lp) = (pn) + (idx); \ + (lp)[0] = 1; \ + } \ +} while (0) + +/* + * Group consecutive packets with the same destination port in bursts of 4. + * Suppose we have array of destionation ports: + * dst_port[] = {a, b, c, d,, e, ... } + * dp1 should contain: , dp2: . + * We doing 4 comparisions at once and the result is 4 bit mask. + * This mask is used as an index into prebuild array of pnum values. + */ +static inline uint16_t * +port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2) +{ + static const struct { + uint64_t pnum; /* prebuild 4 values for pnum[]. */ + int32_t idx; /* index for new last updated elemnet. */ + uint16_t lpv; /* add value to the last updated element. */ + } gptbl[GRPSZ] = { + { + /* 0: a != b, b != c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100010001), + .idx = 4, + .lpv = 0, + }, + { + /* 1: a == b, b != c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100010002), + .idx = 4, + .lpv = 1, + }, + { + /* 2: a != b, b == c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100020001), + .idx = 4, + .lpv = 0, + }, + { + /* 3: a == b, b == c, c != d, d != e */ + .pnum = UINT64_C(0x0001000100020003), + .idx = 4, + .lpv = 2, + }, + { + /* 4: a != b, b != c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200010001), + .idx = 4, + .lpv = 0, + }, + { + /* 5: a == b, b != c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200010002), + .idx = 4, + .lpv = 1, + }, + { + /* 6: a != b, b == c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200030001), + .idx = 4, + .lpv = 0, + }, + { + /* 7: a == b, b == c, c == d, d != e */ + .pnum = UINT64_C(0x0001000200030004), + .idx = 4, + .lpv = 3, + }, + { + /* 8: a != b, b != c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100010001), + .idx = 3, + .lpv = 0, + }, + { + /* 9: a == b, b != c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100010002), + .idx = 3, + .lpv = 1, + }, + { + /* 0xa: a != b, b == c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100020001), + .idx = 3, + .lpv = 0, + }, + { + /* 0xb: a == b, b == c, c != d, d == e */ + .pnum = UINT64_C(0x0002000100020003), + .idx = 3, + .lpv = 2, + }, + { + /* 0xc: a != b, b != c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300010001), + .idx = 2, + .lpv = 0, + }, + { + /* 0xd: a == b, b != c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300010002), + .idx = 2, + .lpv = 1, + }, + { + /* 0xe: a != b, b == c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300040001), + .idx = 1, + .lpv = 0, + }, + { + /* 0xf: a == b, b == c, c == d, d == e */ + .pnum = UINT64_C(0x0002000300040005), + .idx = 0, + .lpv = 4, + }, + }; + + union { + uint16_t u16[FWDSTEP + 1]; + uint64_t u64; + } *pnum = (void *)pn; + + int32_t v; + + dp1 = _mm_cmpeq_epi16(dp1, dp2); + dp1 = _mm_unpacklo_epi16(dp1, dp1); + v = _mm_movemask_ps((__m128)dp1); + + /* update last port counter. */ + lp[0] += gptbl[v].lpv; + + /* if dest port value has changed. */ + if (v != GRPMSK) { + lp = pnum->u16 + gptbl[v].idx; + lp[0] = 1; + pnum->u64 = gptbl[v].pnum; + } + + return lp; +} + #endif /* APP_LOOKUP_METHOD */ /* main processing loop */ @@ -1289,9 +1406,12 @@ main_loop(__attribute__((unused)) void *dummy) #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) int32_t k; + uint16_t dlp; + uint16_t *lp; uint16_t dst_port[MAX_PKT_BURST]; __m128i dip[MAX_PKT_BURST / FWDSTEP]; uint32_t flag[MAX_PKT_BURST / FWDSTEP]; + uint16_t pnum[MAX_PKT_BURST + 1]; #endif prev_tsc = 0; @@ -1402,9 +1522,61 @@ main_loop(__attribute__((unused)) void *dummy) &pkts_burst[j], &dst_port[j]); } + /* + * Finish packet processing and group consecutive + * packets with the same destination port. + */ k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); - for (j = 0; j != k; j += FWDSTEP) { - processx4_step3(&pkts_burst[j], &dst_port[j]); + if (k != 0) { + __m128i dp1, dp2; + + lp = pnum; + lp[0] = 1; + + processx4_step3(pkts_burst, dst_port); + + /* dp1: */ + dp1 = _mm_loadu_si128((__m128i *)dst_port); + + for (j = FWDSTEP; j != k; j += FWDSTEP) { + processx4_step3(&pkts_burst[j], + &dst_port[j]); + + /* + * dp2: + * + */ + dp2 = _mm_loadu_si128((__m128i *) + &dst_port[j - FWDSTEP + 1]); + lp = port_groupx4(&pnum[j - FWDSTEP], + lp, dp1, dp2); + + /* + * dp1: + * + */ + dp1 = _mm_srli_si128(dp2, + (FWDSTEP - 1) * + sizeof(dst_port[0])); + } + + /* + * dp2: + */ + dp2 = _mm_shufflelo_epi16(dp1, 0xf9); + lp = port_groupx4(&pnum[j - FWDSTEP], lp, + dp1, dp2); + + /* + * remove values added by the last repeated + * dst port. + */ + lp[0]--; + dlp = dst_port[j - 1]; + } else { + /* set dlp and lp to the never used values. */ + dlp = BAD_PORT - 1; + lp = pnum + MAX_PKT_BURST; } /* Process up to last 3 packets one by one. */ @@ -1412,39 +1584,41 @@ main_loop(__attribute__((unused)) void *dummy) case 3: process_packet(qconf, pkts_burst[j], dst_port + j, portid); + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); j++; case 2: process_packet(qconf, pkts_burst[j], dst_port + j, portid); + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); j++; case 1: process_packet(qconf, pkts_burst[j], dst_port + j, portid); + GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); j++; } /* * Send packets out, through destination port. - * Try to group packets with the same destination port. + * Consecuteve pacekts with the same destination port + * are already grouped together. * If destination port for the packet equals BAD_PORT, * then free the packet without sending it out. */ - for (j = 0; j < nb_rx; j = k) { + for (j = 0; j < nb_rx; j += k) { - uint16_t cn, pn = dst_port[j]; + int32_t m; + uint16_t pn; - k = j; - do { - cn = dst_port[k]; - } while (cn != BAD_PORT && pn == cn && - ++k < nb_rx); + pn = dst_port[j]; + k = pnum[j]; - send_packetsx4(qconf, pn, pkts_burst + j, - k - j); - - if (cn == BAD_PORT) { - rte_pktmbuf_free(pkts_burst[k]); - k += 1; + if (likely(pn != BAD_PORT)) { + send_packetsx4(qconf, pn, + pkts_burst + j, k); + } else { + for (m = j; m != j + k; m++) + rte_pktmbuf_free(pkts_burst[m]); } } @@ -1657,7 +1831,7 @@ parse_config(const char *q_arg) if(size >= sizeof(s)) return -1; - rte_snprintf(s, sizeof(s), "%.*s", size, p); + snprintf(s, sizeof(s), "%.*s", size, p); if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD) return -1; for (i = 0; i < _NUM_FLD; i++){ @@ -1755,7 +1929,6 @@ parse_args(int argc, char **argv) printf("jumbo frame is enabled - disabling simple TX path\n"); port_conf.rxmode.jumbo_frame = 1; - tx_conf.txq_flags = 0; /* if no max-pkt-len set, use the default value ETHER_MAX_LEN */ if (0 == getopt_long(argc, argvopt, "", &lenopts, &option_index)) { @@ -1802,13 +1975,9 @@ parse_args(int argc, char **argv) static void print_ethaddr(const char *name, const struct ether_addr *eth_addr) { - printf ("%s%02X:%02X:%02X:%02X:%02X:%02X", name, - eth_addr->addr_bytes[0], - eth_addr->addr_bytes[1], - eth_addr->addr_bytes[2], - eth_addr->addr_bytes[3], - eth_addr->addr_bytes[4], - eth_addr->addr_bytes[5]); + char buf[ETHER_ADDR_FMT_SIZE]; + ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr); + printf("%s%s", name, buf); } #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) @@ -1998,7 +2167,7 @@ setup_hash(int socketid) char s[64]; /* create ipv4 hash */ - rte_snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); + snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); ipv4_l3fwd_hash_params.name = s; ipv4_l3fwd_hash_params.socket_id = socketid; ipv4_l3fwd_lookup_struct[socketid] = rte_hash_create(&ipv4_l3fwd_hash_params); @@ -2007,7 +2176,7 @@ setup_hash(int socketid) "socket %d\n", socketid); /* create ipv6 hash */ - rte_snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); + snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); ipv6_l3fwd_hash_params.name = s; ipv6_l3fwd_hash_params.socket_id = socketid; ipv6_l3fwd_lookup_struct[socketid] = rte_hash_create(&ipv6_l3fwd_hash_params); @@ -2051,7 +2220,7 @@ setup_lpm(int socketid) char s[64]; /* create the LPM table */ - rte_snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); + snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid); ipv4_l3fwd_lookup_struct[socketid] = rte_lpm_create(s, socketid, IPV4_L3FWD_LPM_MAX_RULES, 0); if (ipv4_l3fwd_lookup_struct[socketid] == NULL) @@ -2084,7 +2253,7 @@ setup_lpm(int socketid) } /* create the LPM6 table */ - rte_snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid); + snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid); config.max_rules = IPV6_L3FWD_LPM_MAX_RULES; config.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S; @@ -2144,13 +2313,11 @@ init_mem(unsigned nb_mbuf) socketid, lcore_id, NB_SOCKETS); } if (pktmbuf_pool[socketid] == NULL) { - rte_snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); + snprintf(s, sizeof(s), "mbuf_pool_%d", socketid); pktmbuf_pool[socketid] = - rte_mempool_create(s, nb_mbuf, MBUF_SIZE, MEMPOOL_CACHE_SIZE, - sizeof(struct rte_pktmbuf_pool_private), - rte_pktmbuf_pool_init, NULL, - rte_pktmbuf_init, NULL, - socketid, 0); + rte_pktmbuf_pool_create(s, nb_mbuf, + MEMPOOL_CACHE_SIZE, 0, MBUF_DATA_SIZE, + socketid); if (pktmbuf_pool[socketid] == NULL) rte_exit(EXIT_FAILURE, "Cannot init mbuf pool on socket %d\n", socketid); @@ -2226,9 +2393,11 @@ check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) } int -MAIN(int argc, char **argv) +main(int argc, char **argv) { struct lcore_conf *qconf; + struct rte_eth_dev_info dev_info; + struct rte_eth_txconf *txconf; int ret; unsigned nb_ports; uint16_t queueid; @@ -2255,10 +2424,6 @@ MAIN(int argc, char **argv) if (ret < 0) rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n"); - - if (rte_eal_pci_probe() < 0) - rte_exit(EXIT_FAILURE, "Cannot probe PCI\n"); - nb_ports = rte_eth_dev_count(); if (nb_ports > RTE_MAX_ETHPORTS) nb_ports = RTE_MAX_ETHPORTS; @@ -2322,8 +2487,13 @@ MAIN(int argc, char **argv) printf("txq=%u,%d,%d ", lcore_id, queueid, socketid); fflush(stdout); + + rte_eth_dev_info_get(portid, &dev_info); + txconf = &dev_info.default_txconf; + if (port_conf.rxmode.jumbo_frame) + txconf->txq_flags = 0; ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd, - socketid, &tx_conf); + socketid, txconf); if (ret < 0) rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, " "port=%d\n", ret, portid); @@ -2355,7 +2525,9 @@ MAIN(int argc, char **argv) fflush(stdout); ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd, - socketid, &rx_conf, pktmbuf_pool[socketid]); + socketid, + NULL, + pktmbuf_pool[socketid]); if (ret < 0) rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d," "port=%d\n", ret, portid);