X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=examples%2Fl3fwd%2Fl3fwd_em.c;h=def5a024284d824587aa64508edc06551435818a;hb=6cd6d65b7baa9fb87bb18dccb09d6bb1e552c38f;hp=ace06cf3e0c7f5a5e648f1d51235556f575bae8a;hpb=268888b5b0206aaa5d5212e2b35ea91b717e60b9;p=dpdk.git diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index ace06cf3e0..def5a02428 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -85,7 +86,7 @@ union ipv4_5tuple_host { uint16_t port_src; uint16_t port_dst; }; - __m128i xmm; + xmm_t xmm; }; #define XMM_NUM_IN_IPV6_5TUPLE 3 @@ -109,9 +110,11 @@ union ipv6_5tuple_host { uint16_t port_dst; uint64_t reserve; }; - __m128i xmm[XMM_NUM_IN_IPV6_5TUPLE]; + xmm_t xmm[XMM_NUM_IN_IPV6_5TUPLE]; }; + + struct ipv4_l3fwd_em_route { struct ipv4_5tuple key; uint8_t if_out; @@ -236,9 +239,29 @@ ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len, static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned; -static __m128i mask0; -static __m128i mask1; -static __m128i mask2; +static rte_xmm_t mask0; +static rte_xmm_t mask1; +static rte_xmm_t mask2; + +#if defined(__SSE2__) +static inline xmm_t +em_mask_key(void *key, xmm_t mask) +{ + __m128i data = _mm_loadu_si128((__m128i *)(key)); + + return _mm_and_si128(data, mask); +} +#elif defined(RTE_MACHINE_CPUFLAG_NEON) +static inline xmm_t +em_mask_key(void *key, xmm_t mask) +{ + int32x4_t data = vld1q_s32((int32_t *)key); + + return vandq_s32(data, mask); +} +#else +#error No vector engine (SSE, NEON) available, check your toolchain +#endif static inline uint8_t em_get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid, void *lookup_struct) @@ -249,13 +272,12 @@ em_get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid, void *lookup_struct) (struct rte_hash *)lookup_struct; ipv4_hdr = (uint8_t *)ipv4_hdr + offsetof(struct ipv4_hdr, time_to_live); - __m128i data = _mm_loadu_si128((__m128i *)(ipv4_hdr)); /* * Get 5 tuple: dst port, src port, dst IP address, * src IP address and protocol. */ - key.xmm = _mm_and_si128(data, mask0); + key.xmm = em_mask_key(ipv4_hdr, mask0.x); /* Find destination port */ ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key); @@ -271,110 +293,37 @@ em_get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, void *lookup_struct) (struct rte_hash *)lookup_struct; ipv6_hdr = (uint8_t *)ipv6_hdr + offsetof(struct ipv6_hdr, payload_len); - __m128i data0 = - _mm_loadu_si128((__m128i *)(ipv6_hdr)); - __m128i data1 = - _mm_loadu_si128((__m128i *)(((uint8_t *)ipv6_hdr)+ - sizeof(__m128i))); - __m128i data2 = - _mm_loadu_si128((__m128i *)(((uint8_t *)ipv6_hdr)+ - sizeof(__m128i)+sizeof(__m128i))); + void *data0 = ipv6_hdr; + void *data1 = ((uint8_t *)ipv6_hdr) + sizeof(xmm_t); + void *data2 = ((uint8_t *)ipv6_hdr) + sizeof(xmm_t) + sizeof(xmm_t); /* Get part of 5 tuple: src IP address lower 96 bits and protocol */ - key.xmm[0] = _mm_and_si128(data0, mask1); + key.xmm[0] = em_mask_key(data0, mask1.x); /* * Get part of 5 tuple: dst IP address lower 96 bits * and src IP address higher 32 bits. */ - key.xmm[1] = data1; + key.xmm[1] = *(xmm_t *)data1; /* * Get part of 5 tuple: dst port and src port * and dst IP address higher 32 bits. */ - key.xmm[2] = _mm_and_si128(data2, mask2); + key.xmm[2] = em_mask_key(data2, mask2.x); /* Find destination port */ ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key); return (uint8_t)((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]); } -static inline __attribute__((always_inline)) void -l3fwd_em_simple_forward(struct rte_mbuf *m, uint8_t portid, - struct lcore_conf *qconf) -{ - struct ether_hdr *eth_hdr; - struct ipv4_hdr *ipv4_hdr; - uint8_t dst_port; - - eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); - - if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { - /* Handle IPv4 headers.*/ - ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, - sizeof(struct ether_hdr)); - -#ifdef DO_RFC_1812_CHECKS - /* Check to make sure the packet is valid (RFC1812) */ - if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) { - rte_pktmbuf_free(m); - return; - } -#endif - dst_port = em_get_ipv4_dst_port(ipv4_hdr, portid, - qconf->ipv4_lookup_struct); - - if (dst_port >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port) == 0) - dst_port = portid; - -#ifdef DO_RFC_1812_CHECKS - /* Update time to live and header checksum */ - --(ipv4_hdr->time_to_live); - ++(ipv4_hdr->hdr_checksum); -#endif - /* dst addr */ - *(uint64_t *)ð_hdr->d_addr = dest_eth_addr[dst_port]; - - /* src addr */ - ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); - - send_single_packet(qconf, m, dst_port); - } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { - /* Handle IPv6 headers.*/ - struct ipv6_hdr *ipv6_hdr; - - ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, - sizeof(struct ether_hdr)); - - dst_port = em_get_ipv6_dst_port(ipv6_hdr, portid, - qconf->ipv6_lookup_struct); - - if (dst_port >= RTE_MAX_ETHPORTS || - (enabled_port_mask & 1 << dst_port) == 0) - dst_port = portid; - - /* dst addr */ - *(uint64_t *)ð_hdr->d_addr = dest_eth_addr[dst_port]; - - /* src addr */ - ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr); - - send_single_packet(qconf, m, dst_port); - } else { - /* Free the mbuf that contains non-IPV4/IPV6 packet */ - rte_pktmbuf_free(m); - } -} - -/* - * Include header file if SSE4_1 is enabled for - * buffer optimization i.e. ENABLE_MULTI_BUFFER_OPTIMIZE=1. - */ #if defined(__SSE4_1__) +#if defined(NO_HASH_MULTI_LOOKUP) #include "l3fwd_em_sse.h" #else +#include "l3fwd_em_hlm_sse.h" +#endif +#else #include "l3fwd_em.h" #endif @@ -412,14 +361,15 @@ convert_ipv6_5tuple(struct ipv6_5tuple *key1, #define BYTE_VALUE_MAX 256 #define ALL_32_BITS 0xffffffff #define BIT_8_TO_15 0x0000ff00 + static inline void populate_ipv4_few_flow_into_table(const struct rte_hash *h) { uint32_t i; int32_t ret; - mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, - ALL_32_BITS, BIT_8_TO_15); + mask0 = (rte_xmm_t){.u32 = {BIT_8_TO_15, ALL_32_BITS, + ALL_32_BITS, ALL_32_BITS} }; for (i = 0; i < IPV4_L3FWD_EM_NUM_ROUTES; i++) { struct ipv4_l3fwd_em_route entry; @@ -445,10 +395,10 @@ populate_ipv6_few_flow_into_table(const struct rte_hash *h) uint32_t i; int32_t ret; - mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, - ALL_32_BITS, BIT_16_TO_23); + mask1 = (rte_xmm_t){.u32 = {BIT_16_TO_23, ALL_32_BITS, + ALL_32_BITS, ALL_32_BITS} }; - mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS); + mask2 = (rte_xmm_t){.u32 = {ALL_32_BITS, ALL_32_BITS, 0, 0} }; for (i = 0; i < IPV6_L3FWD_EM_NUM_ROUTES; i++) { struct ipv6_l3fwd_em_route entry; @@ -474,8 +424,8 @@ populate_ipv4_many_flow_into_table(const struct rte_hash *h, { unsigned i; - mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, - ALL_32_BITS, BIT_8_TO_15); + mask0 = (rte_xmm_t){.u32 = {BIT_8_TO_15, ALL_32_BITS, + ALL_32_BITS, ALL_32_BITS} }; for (i = 0; i < nr_flow; i++) { struct ipv4_l3fwd_em_route entry; @@ -526,9 +476,9 @@ populate_ipv6_many_flow_into_table(const struct rte_hash *h, { unsigned i; - mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, - ALL_32_BITS, BIT_16_TO_23); - mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS); + mask1 = (rte_xmm_t){.u32 = {BIT_16_TO_23, ALL_32_BITS, + ALL_32_BITS, ALL_32_BITS} }; + mask2 = (rte_xmm_t){.u32 = {ALL_32_BITS, ALL_32_BITS, 0, 0} }; for (i = 0; i < nr_flow; i++) { struct ipv6_l3fwd_em_route entry; @@ -572,6 +522,115 @@ populate_ipv6_many_flow_into_table(const struct rte_hash *h, printf("Hash: Adding 0x%x keys\n", nr_flow); } +/* Requirements: + * 1. IP packets without extension; + * 2. L4 payload should be either TCP or UDP. + */ +int +em_check_ptype(int portid) +{ + int i, ret; + int ptype_l3_ipv4_ext = 0; + int ptype_l3_ipv6_ext = 0; + int ptype_l4_tcp = 0; + int ptype_l4_udp = 0; + uint32_t ptype_mask = RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_MASK; + + ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0); + if (ret <= 0) + return 0; + + uint32_t ptypes[ret]; + + ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret); + for (i = 0; i < ret; ++i) { + switch (ptypes[i]) { + case RTE_PTYPE_L3_IPV4_EXT: + ptype_l3_ipv4_ext = 1; + break; + case RTE_PTYPE_L3_IPV6_EXT: + ptype_l3_ipv6_ext = 1; + break; + case RTE_PTYPE_L4_TCP: + ptype_l4_tcp = 1; + break; + case RTE_PTYPE_L4_UDP: + ptype_l4_udp = 1; + break; + } + } + + if (ptype_l3_ipv4_ext == 0) + printf("port %d cannot parse RTE_PTYPE_L3_IPV4_EXT\n", portid); + if (ptype_l3_ipv6_ext == 0) + printf("port %d cannot parse RTE_PTYPE_L3_IPV6_EXT\n", portid); + if (!ptype_l3_ipv4_ext || !ptype_l3_ipv6_ext) + return 0; + + if (ptype_l4_tcp == 0) + printf("port %d cannot parse RTE_PTYPE_L4_TCP\n", portid); + if (ptype_l4_udp == 0) + printf("port %d cannot parse RTE_PTYPE_L4_UDP\n", portid); + if (ptype_l4_tcp && ptype_l4_udp) + return 1; + + return 0; +} + +static inline void +em_parse_ptype(struct rte_mbuf *m) +{ + struct ether_hdr *eth_hdr; + uint32_t packet_type = RTE_PTYPE_UNKNOWN; + uint16_t ether_type; + void *l3; + int hdr_len; + struct ipv4_hdr *ipv4_hdr; + struct ipv6_hdr *ipv6_hdr; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + ether_type = eth_hdr->ether_type; + l3 = (uint8_t *)eth_hdr + sizeof(struct ether_hdr); + if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { + ipv4_hdr = (struct ipv4_hdr *)l3; + hdr_len = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) * + IPV4_IHL_MULTIPLIER; + if (hdr_len == sizeof(struct ipv4_hdr)) { + packet_type |= RTE_PTYPE_L3_IPV4; + if (ipv4_hdr->next_proto_id == IPPROTO_TCP) + packet_type |= RTE_PTYPE_L4_TCP; + else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) + packet_type |= RTE_PTYPE_L4_UDP; + } else + packet_type |= RTE_PTYPE_L3_IPV4_EXT; + } else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { + ipv6_hdr = (struct ipv6_hdr *)l3; + if (ipv6_hdr->proto == IPPROTO_TCP) + packet_type |= RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP; + else if (ipv6_hdr->proto == IPPROTO_UDP) + packet_type |= RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP; + else + packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; + } + + m->packet_type = packet_type; +} + +uint16_t +em_cb_parse_ptype(uint8_t port __rte_unused, uint16_t queue __rte_unused, + struct rte_mbuf *pkts[], uint16_t nb_pkts, + uint16_t max_pkts __rte_unused, + void *user_param __rte_unused) +{ + unsigned i; + + for (i = 0; i < nb_pkts; ++i) + em_parse_ptype(pkts[i]); + + return nb_pkts; +} + +/* main processing loop */ int em_main_loop(__attribute__((unused)) void *dummy) { @@ -615,11 +674,8 @@ em_main_loop(__attribute__((unused)) void *dummy) diff_tsc = cur_tsc - prev_tsc; if (unlikely(diff_tsc > drain_tsc)) { - /* - * This could be optimized (use queueid instead of - * portid), but it is not called so often - */ - for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { + for (i = 0; i < qconf->n_tx_port; ++i) { + portid = qconf->tx_port_id[i]; if (qconf->tx_mbufs[portid].len == 0) continue; send_burst(qconf, @@ -642,10 +698,6 @@ em_main_loop(__attribute__((unused)) void *dummy) if (nb_rx == 0) continue; - /* - * For SSE4_1 use ENABLE_MULTI_BUFFER_OPTIMIZE=1 - * code. - */ #if defined(__SSE4_1__) l3fwd_em_send_packets(nb_rx, pkts_burst, portid, qconf);