- dprt[0] = get_dst_port(qconf, pkt[0], dst.u32[0], portid);
- dprt[1] = get_dst_port(qconf, pkt[1], dst.u32[1], portid);
- dprt[2] = get_dst_port(qconf, pkt[2], dst.u32[2], portid);
- dprt[3] = get_dst_port(qconf, pkt[3], dst.u32[3], portid);
- }
-}
-
-/*
- * Update source and destination MAC addresses in the ethernet header.
- * Perform RFC1812 checks and updates for IPV4 packets.
- */
-static inline void
-processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
-{
- __m128i te[FWDSTEP];
- __m128i ve[FWDSTEP];
- __m128i *p[FWDSTEP];
-
- p[0] = rte_pktmbuf_mtod(pkt[0], __m128i *);
- p[1] = rte_pktmbuf_mtod(pkt[1], __m128i *);
- p[2] = rte_pktmbuf_mtod(pkt[2], __m128i *);
- p[3] = rte_pktmbuf_mtod(pkt[3], __m128i *);
-
- ve[0] = val_eth[dst_port[0]];
- te[0] = _mm_loadu_si128(p[0]);
-
- ve[1] = val_eth[dst_port[1]];
- te[1] = _mm_loadu_si128(p[1]);
-
- ve[2] = val_eth[dst_port[2]];
- te[2] = _mm_loadu_si128(p[2]);
-
- ve[3] = val_eth[dst_port[3]];
- te[3] = _mm_loadu_si128(p[3]);
-
- /* Update first 12 bytes, keep rest bytes intact. */
- te[0] = _mm_blend_epi16(te[0], ve[0], MASK_ETH);
- te[1] = _mm_blend_epi16(te[1], ve[1], MASK_ETH);
- te[2] = _mm_blend_epi16(te[2], ve[2], MASK_ETH);
- te[3] = _mm_blend_epi16(te[3], ve[3], MASK_ETH);
-
- _mm_storeu_si128(p[0], te[0]);
- _mm_storeu_si128(p[1], te[1]);
- _mm_storeu_si128(p[2], te[2]);
- _mm_storeu_si128(p[3], te[3]);
-
- rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1),
- &dst_port[0], pkt[0]->packet_type);
- rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1),
- &dst_port[1], pkt[1]->packet_type);
- rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1),
- &dst_port[2], pkt[2]->packet_type);
- rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1),
- &dst_port[3], pkt[3]->packet_type);
-}
-
-/*
- * We group consecutive packets with the same destionation port into one burst.
- * To avoid extra latency this is done together with some other packet
- * processing, but after we made a final decision about packet's destination.
- * To do this we maintain:
- * pnum - array of number of consecutive packets with the same dest port for
- * each packet in the input burst.
- * lp - pointer to the last updated element in the pnum.
- * dlp - dest port value lp corresponds to.
- */
-
-#define GRPSZ (1 << FWDSTEP)
-#define GRPMSK (GRPSZ - 1)
-
-#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \
- if (likely((dlp) == (dcp)[(idx)])) { \
- (lp)[0]++; \
- } else { \
- (dlp) = (dcp)[idx]; \
- (lp) = (pn) + (idx); \
- (lp)[0] = 1; \
- } \
-} while (0)
-
-/*
- * Group consecutive packets with the same destination port in bursts of 4.
- * Suppose we have array of destionation ports:
- * dst_port[] = {a, b, c, d,, e, ... }
- * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
- * We doing 4 comparisions at once and the result is 4 bit mask.
- * This mask is used as an index into prebuild array of pnum values.
- */
-static inline uint16_t *
-port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2)
-{
- static const struct {
- uint64_t pnum; /* prebuild 4 values for pnum[]. */
- int32_t idx; /* index for new last updated elemnet. */
- uint16_t lpv; /* add value to the last updated element. */
- } gptbl[GRPSZ] = {
- {
- /* 0: a != b, b != c, c != d, d != e */
- .pnum = UINT64_C(0x0001000100010001),
- .idx = 4,
- .lpv = 0,
- },
- {
- /* 1: a == b, b != c, c != d, d != e */
- .pnum = UINT64_C(0x0001000100010002),
- .idx = 4,
- .lpv = 1,
- },
- {
- /* 2: a != b, b == c, c != d, d != e */
- .pnum = UINT64_C(0x0001000100020001),
- .idx = 4,
- .lpv = 0,
- },
- {
- /* 3: a == b, b == c, c != d, d != e */
- .pnum = UINT64_C(0x0001000100020003),
- .idx = 4,
- .lpv = 2,
- },
- {
- /* 4: a != b, b != c, c == d, d != e */
- .pnum = UINT64_C(0x0001000200010001),
- .idx = 4,
- .lpv = 0,
- },
- {
- /* 5: a == b, b != c, c == d, d != e */
- .pnum = UINT64_C(0x0001000200010002),
- .idx = 4,
- .lpv = 1,
- },
- {
- /* 6: a != b, b == c, c == d, d != e */
- .pnum = UINT64_C(0x0001000200030001),
- .idx = 4,
- .lpv = 0,
- },
- {
- /* 7: a == b, b == c, c == d, d != e */
- .pnum = UINT64_C(0x0001000200030004),
- .idx = 4,
- .lpv = 3,
- },
- {
- /* 8: a != b, b != c, c != d, d == e */
- .pnum = UINT64_C(0x0002000100010001),
- .idx = 3,
- .lpv = 0,
- },
- {
- /* 9: a == b, b != c, c != d, d == e */
- .pnum = UINT64_C(0x0002000100010002),
- .idx = 3,
- .lpv = 1,
- },
- {
- /* 0xa: a != b, b == c, c != d, d == e */
- .pnum = UINT64_C(0x0002000100020001),
- .idx = 3,
- .lpv = 0,
- },
- {
- /* 0xb: a == b, b == c, c != d, d == e */
- .pnum = UINT64_C(0x0002000100020003),
- .idx = 3,
- .lpv = 2,
- },
- {
- /* 0xc: a != b, b != c, c == d, d == e */
- .pnum = UINT64_C(0x0002000300010001),
- .idx = 2,
- .lpv = 0,
- },
- {
- /* 0xd: a == b, b != c, c == d, d == e */
- .pnum = UINT64_C(0x0002000300010002),
- .idx = 2,
- .lpv = 1,
- },
- {
- /* 0xe: a != b, b == c, c == d, d == e */
- .pnum = UINT64_C(0x0002000300040001),
- .idx = 1,
- .lpv = 0,
- },
- {
- /* 0xf: a == b, b == c, c == d, d == e */
- .pnum = UINT64_C(0x0002000300040005),
- .idx = 0,
- .lpv = 4,
- },
- };
-
- union {
- uint16_t u16[FWDSTEP + 1];
- uint64_t u64;
- } *pnum = (void *)pn;
-
- int32_t v;
-
- dp1 = _mm_cmpeq_epi16(dp1, dp2);
- dp1 = _mm_unpacklo_epi16(dp1, dp1);
- v = _mm_movemask_ps((__m128)dp1);
-
- /* update last port counter. */
- lp[0] += gptbl[v].lpv;
-
- /* if dest port value has changed. */
- if (v != GRPMSK) {
- lp = pnum->u16 + gptbl[v].idx;
- lp[0] = 1;
- pnum->u64 = gptbl[v].pnum;