X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=examples%2Fl3fwd%2Fl3fwd_sse.h;h=3349f274780ad9d3d2ceea0069009d463cb8508f;hb=6d13ea8e8e49ab957deae2bba5ecf4a4bfe747d1;hp=f9cf50a0ff76c731a132716c2585368395cdf961;hpb=8353a36a9b4b0766d4f3433fa6ad8fd41cb60bbc;p=dpdk.git diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h index f9cf50a0ff..3349f27478 100644 --- a/examples/l3fwd/l3fwd_sse.h +++ b/examples/l3fwd/l3fwd_sse.h @@ -1,84 +1,13 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016 Intel Corporation */ -#ifndef _L3FWD_COMMON_H_ -#define _L3FWD_COMMON_H_ +#ifndef _L3FWD_SSE_H_ +#define _L3FWD_SSE_H_ #include "l3fwd.h" - -#ifdef DO_RFC_1812_CHECKS - -#define IPV4_MIN_VER_IHL 0x45 -#define IPV4_MAX_VER_IHL 0x4f -#define IPV4_MAX_VER_IHL_DIFF (IPV4_MAX_VER_IHL - IPV4_MIN_VER_IHL) - -/* Minimum value of IPV4 total length (20B) in network byte order. */ -#define IPV4_MIN_LEN_BE (sizeof(struct ipv4_hdr) << 8) - -/* - * From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2: - * - The IP version number must be 4. - * - The IP header length field must be large enough to hold the - * minimum length legal IP datagram (20 bytes = 5 words). - * - The IP total length field must be large enough to hold the IP - * datagram header, whose length is specified in the IP header length - * field. - * If we encounter invalid IPV4 packet, then set destination port for it - * to BAD_PORT value. - */ -static inline __attribute__((always_inline)) void -rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype) -{ - uint8_t ihl; - - if (RTE_ETH_IS_IPV4_HDR(ptype)) { - ihl = ipv4_hdr->version_ihl - IPV4_MIN_VER_IHL; - - ipv4_hdr->time_to_live--; - ipv4_hdr->hdr_checksum++; - - if (ihl > IPV4_MAX_VER_IHL_DIFF || - ((uint8_t)ipv4_hdr->total_length == 0 && - ipv4_hdr->total_length < IPV4_MIN_LEN_BE)) - dp[0] = BAD_PORT; - - } -} - -#else -#define rfc1812_process(mb, dp, ptype) do { } while (0) -#endif /* DO_RFC_1812_CHECKS */ +#include "l3fwd_common.h" /* * Update source and destination MAC addresses in the ethernet header. @@ -119,154 +48,27 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) _mm_storeu_si128(p[2], te[2]); _mm_storeu_si128(p[3], te[3]); - rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1), + rfc1812_process((struct ipv4_hdr *)((struct rte_ether_hdr *)p[0] + 1), &dst_port[0], pkt[0]->packet_type); - rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1), + rfc1812_process((struct ipv4_hdr *)((struct rte_ether_hdr *)p[1] + 1), &dst_port[1], pkt[1]->packet_type); - rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1), + rfc1812_process((struct ipv4_hdr *)((struct rte_ether_hdr *)p[2] + 1), &dst_port[2], pkt[2]->packet_type); - rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1), + rfc1812_process((struct ipv4_hdr *)((struct rte_ether_hdr *)p[3] + 1), &dst_port[3], pkt[3]->packet_type); } -/* - * We group consecutive packets with the same destionation port into one burst. - * To avoid extra latency this is done together with some other packet - * processing, but after we made a final decision about packet's destination. - * To do this we maintain: - * pnum - array of number of consecutive packets with the same dest port for - * each packet in the input burst. - * lp - pointer to the last updated element in the pnum. - * dlp - dest port value lp corresponds to. - */ - -#define GRPSZ (1 << FWDSTEP) -#define GRPMSK (GRPSZ - 1) - -#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ - if (likely((dlp) == (dcp)[(idx)])) { \ - (lp)[0]++; \ - } else { \ - (dlp) = (dcp)[idx]; \ - (lp) = (pn) + (idx); \ - (lp)[0] = 1; \ - } \ -} while (0) - /* * Group consecutive packets with the same destination port in bursts of 4. * Suppose we have array of destionation ports: * dst_port[] = {a, b, c, d,, e, ... } * dp1 should contain: , dp2: . - * We doing 4 comparisions at once and the result is 4 bit mask. + * We doing 4 comparisons at once and the result is 4 bit mask. * This mask is used as an index into prebuild array of pnum values. */ static inline uint16_t * port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2) { - static const struct { - uint64_t pnum; /* prebuild 4 values for pnum[]. */ - int32_t idx; /* index for new last updated elemnet. */ - uint16_t lpv; /* add value to the last updated element. */ - } gptbl[GRPSZ] = { - { - /* 0: a != b, b != c, c != d, d != e */ - .pnum = UINT64_C(0x0001000100010001), - .idx = 4, - .lpv = 0, - }, - { - /* 1: a == b, b != c, c != d, d != e */ - .pnum = UINT64_C(0x0001000100010002), - .idx = 4, - .lpv = 1, - }, - { - /* 2: a != b, b == c, c != d, d != e */ - .pnum = UINT64_C(0x0001000100020001), - .idx = 4, - .lpv = 0, - }, - { - /* 3: a == b, b == c, c != d, d != e */ - .pnum = UINT64_C(0x0001000100020003), - .idx = 4, - .lpv = 2, - }, - { - /* 4: a != b, b != c, c == d, d != e */ - .pnum = UINT64_C(0x0001000200010001), - .idx = 4, - .lpv = 0, - }, - { - /* 5: a == b, b != c, c == d, d != e */ - .pnum = UINT64_C(0x0001000200010002), - .idx = 4, - .lpv = 1, - }, - { - /* 6: a != b, b == c, c == d, d != e */ - .pnum = UINT64_C(0x0001000200030001), - .idx = 4, - .lpv = 0, - }, - { - /* 7: a == b, b == c, c == d, d != e */ - .pnum = UINT64_C(0x0001000200030004), - .idx = 4, - .lpv = 3, - }, - { - /* 8: a != b, b != c, c != d, d == e */ - .pnum = UINT64_C(0x0002000100010001), - .idx = 3, - .lpv = 0, - }, - { - /* 9: a == b, b != c, c != d, d == e */ - .pnum = UINT64_C(0x0002000100010002), - .idx = 3, - .lpv = 1, - }, - { - /* 0xa: a != b, b == c, c != d, d == e */ - .pnum = UINT64_C(0x0002000100020001), - .idx = 3, - .lpv = 0, - }, - { - /* 0xb: a == b, b == c, c != d, d == e */ - .pnum = UINT64_C(0x0002000100020003), - .idx = 3, - .lpv = 2, - }, - { - /* 0xc: a != b, b != c, c == d, d == e */ - .pnum = UINT64_C(0x0002000300010001), - .idx = 2, - .lpv = 0, - }, - { - /* 0xd: a == b, b != c, c == d, d == e */ - .pnum = UINT64_C(0x0002000300010002), - .idx = 2, - .lpv = 1, - }, - { - /* 0xe: a != b, b == c, c == d, d == e */ - .pnum = UINT64_C(0x0002000300040001), - .idx = 1, - .lpv = 0, - }, - { - /* 0xf: a == b, b == c, c == d, d == e */ - .pnum = UINT64_C(0x0002000300040005), - .idx = 0, - .lpv = 4, - }, - }; - union { uint16_t u16[FWDSTEP + 1]; uint64_t u64; @@ -283,9 +85,9 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2) /* if dest port value has changed. */ if (v != GRPMSK) { - lp = pnum->u16 + gptbl[v].idx; - lp[0] = 1; pnum->u64 = gptbl[v].pnum; + pnum->u16[FWDSTEP] = 1; + lp = pnum->u16 + gptbl[v].idx; } return lp; @@ -299,10 +101,10 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2) static inline void process_packet(struct rte_mbuf *pkt, uint16_t *dst_port) { - struct ether_hdr *eth_hdr; + struct rte_ether_hdr *eth_hdr; __m128i te, ve; - eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); + eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); te = _mm_loadu_si128((__m128i *)eth_hdr); ve = val_eth[dst_port[0]]; @@ -314,88 +116,10 @@ process_packet(struct rte_mbuf *pkt, uint16_t *dst_port) _mm_storeu_si128((__m128i *)eth_hdr, te); } -static inline __attribute__((always_inline)) void -send_packetsx4(struct lcore_conf *qconf, uint8_t port, struct rte_mbuf *m[], - uint32_t num) -{ - uint32_t len, j, n; - - len = qconf->tx_mbufs[port].len; - - /* - * If TX buffer for that queue is empty, and we have enough packets, - * then send them straightway. - */ - if (num >= MAX_TX_BURST && len == 0) { - n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num); - if (unlikely(n < num)) { - do { - rte_pktmbuf_free(m[n]); - } while (++n < num); - } - return; - } - - /* - * Put packets into TX buffer for that queue. - */ - - n = len + num; - n = (n > MAX_PKT_BURST) ? MAX_PKT_BURST - len : num; - - j = 0; - switch (n % FWDSTEP) { - while (j < n) { - case 0: - qconf->tx_mbufs[port].m_table[len + j] = m[j]; - j++; - case 3: - qconf->tx_mbufs[port].m_table[len + j] = m[j]; - j++; - case 2: - qconf->tx_mbufs[port].m_table[len + j] = m[j]; - j++; - case 1: - qconf->tx_mbufs[port].m_table[len + j] = m[j]; - j++; - } - } - - len += n; - - /* enough pkts to be sent */ - if (unlikely(len == MAX_PKT_BURST)) { - - send_burst(qconf, MAX_PKT_BURST, port); - - /* copy rest of the packets into the TX buffer. */ - len = num - n; - j = 0; - switch (len % FWDSTEP) { - while (j < len) { - case 0: - qconf->tx_mbufs[port].m_table[j] = m[n + j]; - j++; - case 3: - qconf->tx_mbufs[port].m_table[j] = m[n + j]; - j++; - case 2: - qconf->tx_mbufs[port].m_table[j] = m[n + j]; - j++; - case 1: - qconf->tx_mbufs[port].m_table[j] = m[n + j]; - j++; - } - } - } - - qconf->tx_mbufs[port].len = len; -} - /** * Send packets burst from pkts_burst to the ports in dst_port array */ -static inline __attribute__((always_inline)) void +static __rte_always_inline void send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, uint16_t dst_port[MAX_PKT_BURST], int nb_rx) { @@ -464,10 +188,12 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, process_packet(pkts_burst[j], dst_port + j); GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); j++; + /* fall-through */ case 2: process_packet(pkts_burst[j], dst_port + j); GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); j++; + /* fall-through */ case 1: process_packet(pkts_burst[j], dst_port + j); GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); @@ -498,4 +224,4 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } -#endif /* _L3FWD_COMMON_H_ */ +#endif /* _L3FWD_SSE_H_ */