net: support IP tunnels in software packet type parser
[dpdk.git] / lib / librte_net / rte_net.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2016 6WIND S.A.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdint.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_mbuf_ptype.h>
38 #include <rte_byteorder.h>
39 #include <rte_ether.h>
40 #include <rte_ip.h>
41 #include <rte_tcp.h>
42 #include <rte_udp.h>
43 #include <rte_sctp.h>
44 #include <rte_net.h>
45
46 /* get l3 packet type from ip6 next protocol */
47 static uint32_t
48 ptype_l3_ip6(uint8_t ip6_proto)
49 {
50         static const uint32_t ip6_ext_proto_map[256] = {
51                 [IPPROTO_HOPOPTS] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
52                 [IPPROTO_ROUTING] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
53                 [IPPROTO_FRAGMENT] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
54                 [IPPROTO_ESP] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
55                 [IPPROTO_AH] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
56                 [IPPROTO_DSTOPTS] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
57         };
58
59         return RTE_PTYPE_L3_IPV6 + ip6_ext_proto_map[ip6_proto];
60 }
61
62 /* get l3 packet type from ip version and header length */
63 static uint32_t
64 ptype_l3_ip(uint8_t ipv_ihl)
65 {
66         static const uint32_t ptype_l3_ip_proto_map[256] = {
67                 [0x45] = RTE_PTYPE_L3_IPV4,
68                 [0x46] = RTE_PTYPE_L3_IPV4_EXT,
69                 [0x47] = RTE_PTYPE_L3_IPV4_EXT,
70                 [0x48] = RTE_PTYPE_L3_IPV4_EXT,
71                 [0x49] = RTE_PTYPE_L3_IPV4_EXT,
72                 [0x4A] = RTE_PTYPE_L3_IPV4_EXT,
73                 [0x4B] = RTE_PTYPE_L3_IPV4_EXT,
74                 [0x4C] = RTE_PTYPE_L3_IPV4_EXT,
75                 [0x4D] = RTE_PTYPE_L3_IPV4_EXT,
76                 [0x4E] = RTE_PTYPE_L3_IPV4_EXT,
77                 [0x4F] = RTE_PTYPE_L3_IPV4_EXT,
78         };
79
80         return ptype_l3_ip_proto_map[ipv_ihl];
81 }
82
83 /* get l4 packet type from proto */
84 static uint32_t
85 ptype_l4(uint8_t proto)
86 {
87         static const uint32_t ptype_l4_proto[256] = {
88                 [IPPROTO_UDP] = RTE_PTYPE_L4_UDP,
89                 [IPPROTO_TCP] = RTE_PTYPE_L4_TCP,
90                 [IPPROTO_SCTP] = RTE_PTYPE_L4_SCTP,
91         };
92
93         return ptype_l4_proto[proto];
94 }
95
96 /* get inner l3 packet type from ip6 next protocol */
97 static uint32_t
98 ptype_inner_l3_ip6(uint8_t ip6_proto)
99 {
100         static const uint32_t ptype_inner_ip6_ext_proto_map[256] = {
101                 [IPPROTO_HOPOPTS] = RTE_PTYPE_INNER_L3_IPV6_EXT -
102                         RTE_PTYPE_INNER_L3_IPV6,
103                 [IPPROTO_ROUTING] = RTE_PTYPE_INNER_L3_IPV6_EXT -
104                         RTE_PTYPE_INNER_L3_IPV6,
105                 [IPPROTO_FRAGMENT] = RTE_PTYPE_INNER_L3_IPV6_EXT -
106                         RTE_PTYPE_INNER_L3_IPV6,
107                 [IPPROTO_ESP] = RTE_PTYPE_INNER_L3_IPV6_EXT -
108                         RTE_PTYPE_INNER_L3_IPV6,
109                 [IPPROTO_AH] = RTE_PTYPE_INNER_L3_IPV6_EXT -
110                         RTE_PTYPE_INNER_L3_IPV6,
111                 [IPPROTO_DSTOPTS] = RTE_PTYPE_INNER_L3_IPV6_EXT -
112                         RTE_PTYPE_INNER_L3_IPV6,
113         };
114
115         return RTE_PTYPE_INNER_L3_IPV6 +
116                 ptype_inner_ip6_ext_proto_map[ip6_proto];
117 }
118
119 /* get inner l3 packet type from ip version and header length */
120 static uint32_t
121 ptype_inner_l3_ip(uint8_t ipv_ihl)
122 {
123         static const uint32_t ptype_inner_l3_ip_proto_map[256] = {
124                 [0x45] = RTE_PTYPE_INNER_L3_IPV4,
125                 [0x46] = RTE_PTYPE_INNER_L3_IPV4_EXT,
126                 [0x47] = RTE_PTYPE_INNER_L3_IPV4_EXT,
127                 [0x48] = RTE_PTYPE_INNER_L3_IPV4_EXT,
128                 [0x49] = RTE_PTYPE_INNER_L3_IPV4_EXT,
129                 [0x4A] = RTE_PTYPE_INNER_L3_IPV4_EXT,
130                 [0x4B] = RTE_PTYPE_INNER_L3_IPV4_EXT,
131                 [0x4C] = RTE_PTYPE_INNER_L3_IPV4_EXT,
132                 [0x4D] = RTE_PTYPE_INNER_L3_IPV4_EXT,
133                 [0x4E] = RTE_PTYPE_INNER_L3_IPV4_EXT,
134                 [0x4F] = RTE_PTYPE_INNER_L3_IPV4_EXT,
135         };
136
137         return ptype_inner_l3_ip_proto_map[ipv_ihl];
138 }
139
140 /* get inner l4 packet type from proto */
141 static uint32_t
142 ptype_inner_l4(uint8_t proto)
143 {
144         static const uint32_t ptype_inner_l4_proto[256] = {
145                 [IPPROTO_UDP] = RTE_PTYPE_INNER_L4_UDP,
146                 [IPPROTO_TCP] = RTE_PTYPE_INNER_L4_TCP,
147                 [IPPROTO_SCTP] = RTE_PTYPE_INNER_L4_SCTP,
148         };
149
150         return ptype_inner_l4_proto[proto];
151 }
152
153 /* get the tunnel packet type if any, update proto. */
154 static uint32_t
155 ptype_tunnel(uint16_t *proto)
156 {
157         switch (*proto) {
158         case IPPROTO_IPIP:
159                 *proto = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
160                 return RTE_PTYPE_TUNNEL_IP;
161         case IPPROTO_IPV6:
162                 *proto = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
163                 return RTE_PTYPE_TUNNEL_IP; /* IP is also valid for IPv6 */
164         default:
165                 return 0;
166         }
167 }
168
169 /* get the ipv4 header length */
170 static uint8_t
171 ip4_hlen(const struct ipv4_hdr *hdr)
172 {
173         return (hdr->version_ihl & 0xf) * 4;
174 }
175
176 /* parse ipv6 extended headers, update offset and return next proto */
177 static uint16_t
178 skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off,
179         int *frag)
180 {
181         struct ext_hdr {
182                 uint8_t next_hdr;
183                 uint8_t len;
184         };
185         const struct ext_hdr *xh;
186         struct ext_hdr xh_copy;
187         unsigned int i;
188
189         *frag = 0;
190
191 #define MAX_EXT_HDRS 5
192         for (i = 0; i < MAX_EXT_HDRS; i++) {
193                 switch (proto) {
194                 case IPPROTO_HOPOPTS:
195                 case IPPROTO_ROUTING:
196                 case IPPROTO_DSTOPTS:
197                         xh = rte_pktmbuf_read(m, *off, sizeof(*xh),
198                                 &xh_copy);
199                         if (xh == NULL)
200                                 return 0;
201                         *off += (xh->len + 1) * 8;
202                         proto = xh->next_hdr;
203                         break;
204                 case IPPROTO_FRAGMENT:
205                         xh = rte_pktmbuf_read(m, *off, sizeof(*xh),
206                                 &xh_copy);
207                         if (xh == NULL)
208                                 return 0;
209                         *off += 8;
210                         proto = xh->next_hdr;
211                         *frag = 1;
212                         return proto; /* this is always the last ext hdr */
213                 case IPPROTO_NONE:
214                         return 0;
215                 default:
216                         return proto;
217                 }
218         }
219         return 0;
220 }
221
222 /* parse mbuf data to get packet type */
223 uint32_t rte_net_get_ptype(const struct rte_mbuf *m,
224         struct rte_net_hdr_lens *hdr_lens)
225 {
226         struct rte_net_hdr_lens local_hdr_lens;
227         const struct ether_hdr *eh;
228         struct ether_hdr eh_copy;
229         uint32_t pkt_type = RTE_PTYPE_L2_ETHER;
230         uint32_t off = 0;
231         uint16_t proto;
232
233         if (hdr_lens == NULL)
234                 hdr_lens = &local_hdr_lens;
235
236         eh = rte_pktmbuf_read(m, off, sizeof(*eh), &eh_copy);
237         if (unlikely(eh == NULL))
238                 return 0;
239         proto = eh->ether_type;
240         off = sizeof(*eh);
241         hdr_lens->l2_len = off;
242
243         if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
244                 goto l3; /* fast path if packet is IPv4 */
245
246         if (proto == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) {
247                 const struct vlan_hdr *vh;
248                 struct vlan_hdr vh_copy;
249
250                 pkt_type = RTE_PTYPE_L2_ETHER_VLAN;
251                 vh = rte_pktmbuf_read(m, off, sizeof(*vh), &vh_copy);
252                 if (unlikely(vh == NULL))
253                         return pkt_type;
254                 off += sizeof(*vh);
255                 hdr_lens->l2_len += sizeof(*vh);
256                 proto = vh->eth_proto;
257         } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_QINQ)) {
258                 const struct vlan_hdr *vh;
259                 struct vlan_hdr vh_copy;
260
261                 pkt_type = RTE_PTYPE_L2_ETHER_QINQ;
262                 vh = rte_pktmbuf_read(m, off + sizeof(*vh), sizeof(*vh),
263                         &vh_copy);
264                 if (unlikely(vh == NULL))
265                         return pkt_type;
266                 off += 2 * sizeof(*vh);
267                 hdr_lens->l2_len += 2 * sizeof(*vh);
268                 proto = vh->eth_proto;
269         }
270
271  l3:
272         if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
273                 const struct ipv4_hdr *ip4h;
274                 struct ipv4_hdr ip4h_copy;
275
276                 ip4h = rte_pktmbuf_read(m, off, sizeof(*ip4h), &ip4h_copy);
277                 if (unlikely(ip4h == NULL))
278                         return pkt_type;
279
280                 pkt_type |= ptype_l3_ip(ip4h->version_ihl);
281                 hdr_lens->l3_len = ip4_hlen(ip4h);
282                 off += hdr_lens->l3_len;
283                 if (ip4h->fragment_offset & rte_cpu_to_be_16(
284                                 IPV4_HDR_OFFSET_MASK | IPV4_HDR_MF_FLAG)) {
285                         pkt_type |= RTE_PTYPE_L4_FRAG;
286                         hdr_lens->l4_len = 0;
287                         return pkt_type;
288                 }
289                 proto = ip4h->next_proto_id;
290                 pkt_type |= ptype_l4(proto);
291         } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) {
292                 const struct ipv6_hdr *ip6h;
293                 struct ipv6_hdr ip6h_copy;
294                 int frag = 0;
295
296                 ip6h = rte_pktmbuf_read(m, off, sizeof(*ip6h), &ip6h_copy);
297                 if (unlikely(ip6h == NULL))
298                         return pkt_type;
299
300                 proto = ip6h->proto;
301                 hdr_lens->l3_len = sizeof(*ip6h);
302                 off += hdr_lens->l3_len;
303                 pkt_type |= ptype_l3_ip6(proto);
304                 if ((pkt_type & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6_EXT) {
305                         proto = skip_ip6_ext(proto, m, &off, &frag);
306                         hdr_lens->l3_len = off - hdr_lens->l2_len;
307                 }
308                 if (proto == 0)
309                         return pkt_type;
310                 if (frag) {
311                         pkt_type |= RTE_PTYPE_L4_FRAG;
312                         hdr_lens->l4_len = 0;
313                         return pkt_type;
314                 }
315                 pkt_type |= ptype_l4(proto);
316         }
317
318         if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP) {
319                 hdr_lens->l4_len = sizeof(struct udp_hdr);
320                 return pkt_type;
321         } else if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP) {
322                 const struct tcp_hdr *th;
323                 struct tcp_hdr th_copy;
324
325                 th = rte_pktmbuf_read(m, off, sizeof(*th), &th_copy);
326                 if (unlikely(th == NULL))
327                         return pkt_type & (RTE_PTYPE_L2_MASK |
328                                 RTE_PTYPE_L3_MASK);
329                 hdr_lens->l4_len = (th->data_off & 0xf0) >> 2;
330                 return pkt_type;
331         } else if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) {
332                 hdr_lens->l4_len = sizeof(struct sctp_hdr);
333                 return pkt_type;
334         } else {
335                 hdr_lens->l4_len = 0;
336                 pkt_type |= ptype_tunnel(&proto);
337                 hdr_lens->tunnel_len = 0;
338         }
339
340         /* same job for inner header: we need to duplicate the code
341          * because the packet types do not have the same value.
342          */
343         hdr_lens->inner_l2_len = 0;
344
345         if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
346                 const struct ipv4_hdr *ip4h;
347                 struct ipv4_hdr ip4h_copy;
348
349                 ip4h = rte_pktmbuf_read(m, off, sizeof(*ip4h), &ip4h_copy);
350                 if (unlikely(ip4h == NULL))
351                         return pkt_type;
352
353                 pkt_type |= ptype_inner_l3_ip(ip4h->version_ihl);
354                 hdr_lens->inner_l3_len = ip4_hlen(ip4h);
355                 off += hdr_lens->inner_l3_len;
356                 if (ip4h->fragment_offset &
357                                 rte_cpu_to_be_16(IPV4_HDR_OFFSET_MASK |
358                                         IPV4_HDR_MF_FLAG)) {
359                         pkt_type |= RTE_PTYPE_INNER_L4_FRAG;
360                         hdr_lens->inner_l4_len = 0;
361                         return pkt_type;
362                 }
363                 proto = ip4h->next_proto_id;
364                 pkt_type |= ptype_inner_l4(proto);
365         } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) {
366                 const struct ipv6_hdr *ip6h;
367                 struct ipv6_hdr ip6h_copy;
368                 int frag = 0;
369
370                 ip6h = rte_pktmbuf_read(m, off, sizeof(*ip6h), &ip6h_copy);
371                 if (unlikely(ip6h == NULL))
372                         return pkt_type;
373
374                 proto = ip6h->proto;
375                 hdr_lens->inner_l3_len = sizeof(*ip6h);
376                 off += hdr_lens->inner_l3_len;
377                 pkt_type |= ptype_inner_l3_ip6(proto);
378                 if ((pkt_type & RTE_PTYPE_INNER_L3_MASK) ==
379                                 RTE_PTYPE_INNER_L3_IPV6_EXT) {
380                         uint32_t prev_off;
381
382                         prev_off = off;
383                         proto = skip_ip6_ext(proto, m, &off, &frag);
384                         hdr_lens->inner_l3_len += off - prev_off;
385                 }
386                 if (proto == 0)
387                         return pkt_type;
388                 if (frag) {
389                         pkt_type |= RTE_PTYPE_INNER_L4_FRAG;
390                         hdr_lens->inner_l4_len = 0;
391                         return pkt_type;
392                 }
393                 pkt_type |= ptype_inner_l4(proto);
394         }
395
396         if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) == RTE_PTYPE_INNER_L4_UDP) {
397                 hdr_lens->inner_l4_len = sizeof(struct udp_hdr);
398         } else if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) ==
399                         RTE_PTYPE_INNER_L4_TCP) {
400                 const struct tcp_hdr *th;
401                 struct tcp_hdr th_copy;
402
403                 th = rte_pktmbuf_read(m, off, sizeof(*th), &th_copy);
404                 if (unlikely(th == NULL))
405                         return pkt_type & (RTE_PTYPE_INNER_L2_MASK |
406                                 RTE_PTYPE_INNER_L3_MASK);
407                 hdr_lens->inner_l4_len = (th->data_off & 0xf0) >> 2;
408         } else if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) ==
409                         RTE_PTYPE_INNER_L4_SCTP) {
410                 hdr_lens->inner_l4_len = sizeof(struct sctp_hdr);
411         } else {
412                 hdr_lens->inner_l4_len = 0;
413         }
414
415         return pkt_type;
416 }