bpf: allow self-xor operation
[dpdk.git] / app / test-pmd / macswap_neon.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019 Arm Limited
3  *
4  * Copyright(c) 2019 Intel Corporation
5  *
6  * Derived do_macswap implementation from app/test-pmd/macswap_sse.h
7  */
8
9 #ifndef _MACSWAP_NEON_H_
10 #define _MACSWAP_NEON_H_
11
12 #include "macswap_common.h"
13 #include "rte_vect.h"
14
15 static inline void
16 do_macswap(struct rte_mbuf *pkts[], uint16_t nb,
17                 struct rte_port *txp)
18 {
19         struct rte_ether_hdr *eth_hdr[4];
20         struct rte_mbuf *mb[4];
21         uint64_t ol_flags;
22         int i;
23         int r;
24         uint8x16_t v0, v1, v2, v3;
25         /**
26          * Index map be used to shuffle the 16 bytes.
27          * byte 0-5 will be swapped with byte 6-11.
28          * byte 12-15 will keep unchanged.
29          */
30         const uint8x16_t idx_map = {6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
31                                 12, 13, 14, 15};
32
33         ol_flags = ol_flags_init(txp->dev_conf.txmode.offloads);
34         vlan_qinq_set(pkts, nb, ol_flags,
35                         txp->tx_vlan_id, txp->tx_vlan_id_outer);
36
37         i = 0;
38         r = nb;
39
40         while (r >= 4) {
41                 if (r >= 8) {
42                         rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 4], void *));
43                         rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 5], void *));
44                         rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 6], void *));
45                         rte_prefetch0(rte_pktmbuf_mtod(pkts[i + 7], void *));
46                 }
47
48                 mb[0] = pkts[i++];
49                 eth_hdr[0] = rte_pktmbuf_mtod(mb[0], struct rte_ether_hdr *);
50
51                 mb[1] = pkts[i++];
52                 eth_hdr[1] = rte_pktmbuf_mtod(mb[1], struct rte_ether_hdr *);
53
54                 mb[2] = pkts[i++];
55                 eth_hdr[2] = rte_pktmbuf_mtod(mb[2], struct rte_ether_hdr *);
56
57                 mb[3] = pkts[i++];
58                 eth_hdr[3] = rte_pktmbuf_mtod(mb[3], struct rte_ether_hdr *);
59
60                 v0 = vld1q_u8((uint8_t const *)eth_hdr[0]);
61                 v1 = vld1q_u8((uint8_t const *)eth_hdr[1]);
62                 v2 = vld1q_u8((uint8_t const *)eth_hdr[2]);
63                 v3 = vld1q_u8((uint8_t const *)eth_hdr[3]);
64
65                 v0 = vqtbl1q_u8(v0, idx_map);
66                 v1 = vqtbl1q_u8(v1, idx_map);
67                 v2 = vqtbl1q_u8(v2, idx_map);
68                 v3 = vqtbl1q_u8(v3, idx_map);
69
70                 vst1q_u8((uint8_t *)eth_hdr[0], v0);
71                 vst1q_u8((uint8_t *)eth_hdr[1], v1);
72                 vst1q_u8((uint8_t *)eth_hdr[2], v2);
73                 vst1q_u8((uint8_t *)eth_hdr[3], v3);
74
75                 mbuf_field_set(mb[0], ol_flags);
76                 mbuf_field_set(mb[1], ol_flags);
77                 mbuf_field_set(mb[2], ol_flags);
78                 mbuf_field_set(mb[3], ol_flags);
79                 r -= 4;
80         }
81
82         for ( ; i < nb; i++) {
83                 if (i < nb - 1)
84                         rte_prefetch0(rte_pktmbuf_mtod(pkts[i+1], void *));
85                 mb[0] = pkts[i];
86                 eth_hdr[0] = rte_pktmbuf_mtod(mb[0], struct rte_ether_hdr *);
87
88                 /* Swap dest and src mac addresses. */
89                 v0 = vld1q_u8((uint8_t const *)eth_hdr[0]);
90                 v0 = vqtbl1q_u8(v0, idx_map);
91                 vst1q_u8((uint8_t *)eth_hdr[0], v0);
92
93                 mbuf_field_set(mb[0], ol_flags);
94         }
95 }
96
97 #endif /* _MACSWAP_NEON_H_ */