app/testpmd: add parsing for QinQ VLAN headers
[dpdk.git] / app / test-pmd / csumonly.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <stdarg.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <stdint.h>
10 #include <unistd.h>
11 #include <inttypes.h>
12
13 #include <sys/queue.h>
14 #include <sys/stat.h>
15
16 #include <rte_common.h>
17 #include <rte_byteorder.h>
18 #include <rte_log.h>
19 #include <rte_debug.h>
20 #include <rte_cycles.h>
21 #include <rte_memory.h>
22 #include <rte_memcpy.h>
23 #include <rte_launch.h>
24 #include <rte_eal.h>
25 #include <rte_per_lcore.h>
26 #include <rte_lcore.h>
27 #include <rte_atomic.h>
28 #include <rte_branch_prediction.h>
29 #include <rte_mempool.h>
30 #include <rte_mbuf.h>
31 #include <rte_interrupts.h>
32 #include <rte_pci.h>
33 #include <rte_ether.h>
34 #include <rte_ethdev.h>
35 #include <rte_ip.h>
36 #include <rte_tcp.h>
37 #include <rte_udp.h>
38 #include <rte_vxlan.h>
39 #include <rte_sctp.h>
40 #include <rte_gtp.h>
41 #include <rte_prefetch.h>
42 #include <rte_string_fns.h>
43 #include <rte_flow.h>
44 #include <rte_gro.h>
45 #include <rte_gso.h>
46
47 #include "testpmd.h"
48
49 #define IP_DEFTTL  64   /* from RFC 1340. */
50
51 #define GRE_CHECKSUM_PRESENT    0x8000
52 #define GRE_KEY_PRESENT         0x2000
53 #define GRE_SEQUENCE_PRESENT    0x1000
54 #define GRE_EXT_LEN             4
55 #define GRE_SUPPORTED_FIELDS    (GRE_CHECKSUM_PRESENT | GRE_KEY_PRESENT |\
56                                  GRE_SEQUENCE_PRESENT)
57
58 /* We cannot use rte_cpu_to_be_16() on a constant in a switch/case */
59 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
60 #define _htons(x) ((uint16_t)((((x) & 0x00ffU) << 8) | (((x) & 0xff00U) >> 8)))
61 #else
62 #define _htons(x) (x)
63 #endif
64
65 uint16_t vxlan_gpe_udp_port = 4790;
66
67 /* structure that caches offload info for the current packet */
68 struct testpmd_offload_info {
69         uint16_t ethertype;
70         uint8_t gso_enable;
71         uint16_t l2_len;
72         uint16_t l3_len;
73         uint16_t l4_len;
74         uint8_t l4_proto;
75         uint8_t is_tunnel;
76         uint16_t outer_ethertype;
77         uint16_t outer_l2_len;
78         uint16_t outer_l3_len;
79         uint8_t outer_l4_proto;
80         uint16_t tso_segsz;
81         uint16_t tunnel_tso_segsz;
82         uint32_t pkt_len;
83 };
84
85 /* simplified GRE header */
86 struct simple_gre_hdr {
87         uint16_t flags;
88         uint16_t proto;
89 } __rte_packed;
90
91 static uint16_t
92 get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
93 {
94         if (ethertype == _htons(RTE_ETHER_TYPE_IPV4))
95                 return rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr);
96         else /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
97                 return rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr);
98 }
99
100 /* Parse an IPv4 header to fill l3_len, l4_len, and l4_proto */
101 static void
102 parse_ipv4(struct rte_ipv4_hdr *ipv4_hdr, struct testpmd_offload_info *info)
103 {
104         struct rte_tcp_hdr *tcp_hdr;
105
106         info->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4;
107         info->l4_proto = ipv4_hdr->next_proto_id;
108
109         /* only fill l4_len for TCP, it's useful for TSO */
110         if (info->l4_proto == IPPROTO_TCP) {
111                 tcp_hdr = (struct rte_tcp_hdr *)
112                         ((char *)ipv4_hdr + info->l3_len);
113                 info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
114         } else if (info->l4_proto == IPPROTO_UDP)
115                 info->l4_len = sizeof(struct rte_udp_hdr);
116         else
117                 info->l4_len = 0;
118 }
119
120 /* Parse an IPv6 header to fill l3_len, l4_len, and l4_proto */
121 static void
122 parse_ipv6(struct rte_ipv6_hdr *ipv6_hdr, struct testpmd_offload_info *info)
123 {
124         struct rte_tcp_hdr *tcp_hdr;
125
126         info->l3_len = sizeof(struct rte_ipv6_hdr);
127         info->l4_proto = ipv6_hdr->proto;
128
129         /* only fill l4_len for TCP, it's useful for TSO */
130         if (info->l4_proto == IPPROTO_TCP) {
131                 tcp_hdr = (struct rte_tcp_hdr *)
132                         ((char *)ipv6_hdr + info->l3_len);
133                 info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
134         } else if (info->l4_proto == IPPROTO_UDP)
135                 info->l4_len = sizeof(struct rte_udp_hdr);
136         else
137                 info->l4_len = 0;
138 }
139
140 /*
141  * Parse an ethernet header to fill the ethertype, l2_len, l3_len and
142  * ipproto. This function is able to recognize IPv4/IPv6 with optional VLAN
143  * headers. The l4_len argument is only set in case of TCP (useful for TSO).
144  */
145 static void
146 parse_ethernet(struct rte_ether_hdr *eth_hdr, struct testpmd_offload_info *info)
147 {
148         struct rte_ipv4_hdr *ipv4_hdr;
149         struct rte_ipv6_hdr *ipv6_hdr;
150         struct rte_vlan_hdr *vlan_hdr;
151
152         info->l2_len = sizeof(struct rte_ether_hdr);
153         info->ethertype = eth_hdr->ether_type;
154
155         while (info->ethertype == _htons(RTE_ETHER_TYPE_VLAN) ||
156                info->ethertype == _htons(RTE_ETHER_TYPE_QINQ)) {
157                 vlan_hdr = (struct rte_vlan_hdr *)
158                         ((char *)eth_hdr + info->l2_len);
159                 info->l2_len  += sizeof(struct rte_vlan_hdr);
160                 info->ethertype = vlan_hdr->eth_proto;
161         }
162
163         switch (info->ethertype) {
164         case _htons(RTE_ETHER_TYPE_IPV4):
165                 ipv4_hdr = (struct rte_ipv4_hdr *)
166                         ((char *)eth_hdr + info->l2_len);
167                 parse_ipv4(ipv4_hdr, info);
168                 break;
169         case _htons(RTE_ETHER_TYPE_IPV6):
170                 ipv6_hdr = (struct rte_ipv6_hdr *)
171                         ((char *)eth_hdr + info->l2_len);
172                 parse_ipv6(ipv6_hdr, info);
173                 break;
174         default:
175                 info->l4_len = 0;
176                 info->l3_len = 0;
177                 info->l4_proto = 0;
178                 break;
179         }
180 }
181
182 /*
183  * Parse a GTP protocol header.
184  * No optional fields and next extension header type.
185  */
186 static void
187 parse_gtp(struct rte_udp_hdr *udp_hdr,
188           struct testpmd_offload_info *info)
189 {
190         struct rte_ipv4_hdr *ipv4_hdr;
191         struct rte_ipv6_hdr *ipv6_hdr;
192         struct rte_gtp_hdr *gtp_hdr;
193         uint8_t gtp_len = sizeof(*gtp_hdr);
194         uint8_t ip_ver;
195
196         /* Check udp destination port. */
197         if (udp_hdr->dst_port != _htons(RTE_GTPC_UDP_PORT) &&
198             udp_hdr->src_port != _htons(RTE_GTPC_UDP_PORT) &&
199             udp_hdr->dst_port != _htons(RTE_GTPU_UDP_PORT))
200                 return;
201
202         info->is_tunnel = 1;
203         info->outer_ethertype = info->ethertype;
204         info->outer_l2_len = info->l2_len;
205         info->outer_l3_len = info->l3_len;
206         info->outer_l4_proto = info->l4_proto;
207         info->l2_len = 0;
208
209         gtp_hdr = (struct rte_gtp_hdr *)((char *)udp_hdr +
210                   sizeof(struct rte_udp_hdr));
211
212         /*
213          * Check message type. If message type is 0xff, it is
214          * a GTP data packet. If not, it is a GTP control packet
215          */
216         if (gtp_hdr->msg_type == 0xff) {
217                 ip_ver = *(uint8_t *)((char *)udp_hdr +
218                          sizeof(struct rte_udp_hdr) +
219                          sizeof(struct rte_gtp_hdr));
220                 ip_ver = (ip_ver) & 0xf0;
221
222                 if (ip_ver == RTE_GTP_TYPE_IPV4) {
223                         ipv4_hdr = (struct rte_ipv4_hdr *)((char *)gtp_hdr +
224                                    gtp_len);
225                         info->ethertype = _htons(RTE_ETHER_TYPE_IPV4);
226                         parse_ipv4(ipv4_hdr, info);
227                 } else if (ip_ver == RTE_GTP_TYPE_IPV6) {
228                         ipv6_hdr = (struct rte_ipv6_hdr *)((char *)gtp_hdr +
229                                    gtp_len);
230                         info->ethertype = _htons(RTE_ETHER_TYPE_IPV6);
231                         parse_ipv6(ipv6_hdr, info);
232                 }
233         } else {
234                 info->ethertype = 0;
235                 info->l4_len = 0;
236                 info->l3_len = 0;
237                 info->l4_proto = 0;
238         }
239
240         info->l2_len += RTE_ETHER_GTP_HLEN;
241 }
242
243 /* Parse a vxlan header */
244 static void
245 parse_vxlan(struct rte_udp_hdr *udp_hdr,
246             struct testpmd_offload_info *info,
247             uint32_t pkt_type)
248 {
249         struct rte_ether_hdr *eth_hdr;
250
251         /* check udp destination port, 4789 is the default vxlan port
252          * (rfc7348) or that the rx offload flag is set (i40e only
253          * currently) */
254         if (udp_hdr->dst_port != _htons(4789) &&
255                 RTE_ETH_IS_TUNNEL_PKT(pkt_type) == 0)
256                 return;
257
258         info->is_tunnel = 1;
259         info->outer_ethertype = info->ethertype;
260         info->outer_l2_len = info->l2_len;
261         info->outer_l3_len = info->l3_len;
262         info->outer_l4_proto = info->l4_proto;
263
264         eth_hdr = (struct rte_ether_hdr *)((char *)udp_hdr +
265                 sizeof(struct rte_udp_hdr) +
266                 sizeof(struct rte_vxlan_hdr));
267
268         parse_ethernet(eth_hdr, info);
269         info->l2_len += RTE_ETHER_VXLAN_HLEN; /* add udp + vxlan */
270 }
271
272 /* Parse a vxlan-gpe header */
273 static void
274 parse_vxlan_gpe(struct rte_udp_hdr *udp_hdr,
275             struct testpmd_offload_info *info)
276 {
277         struct rte_ether_hdr *eth_hdr;
278         struct rte_ipv4_hdr *ipv4_hdr;
279         struct rte_ipv6_hdr *ipv6_hdr;
280         struct rte_vxlan_gpe_hdr *vxlan_gpe_hdr;
281         uint8_t vxlan_gpe_len = sizeof(*vxlan_gpe_hdr);
282
283         /* Check udp destination port. */
284         if (udp_hdr->dst_port != _htons(vxlan_gpe_udp_port))
285                 return;
286
287         vxlan_gpe_hdr = (struct rte_vxlan_gpe_hdr *)((char *)udp_hdr +
288                                 sizeof(struct rte_udp_hdr));
289
290         if (!vxlan_gpe_hdr->proto || vxlan_gpe_hdr->proto ==
291             RTE_VXLAN_GPE_TYPE_IPV4) {
292                 info->is_tunnel = 1;
293                 info->outer_ethertype = info->ethertype;
294                 info->outer_l2_len = info->l2_len;
295                 info->outer_l3_len = info->l3_len;
296                 info->outer_l4_proto = info->l4_proto;
297
298                 ipv4_hdr = (struct rte_ipv4_hdr *)((char *)vxlan_gpe_hdr +
299                            vxlan_gpe_len);
300
301                 parse_ipv4(ipv4_hdr, info);
302                 info->ethertype = _htons(RTE_ETHER_TYPE_IPV4);
303                 info->l2_len = 0;
304
305         } else if (vxlan_gpe_hdr->proto == RTE_VXLAN_GPE_TYPE_IPV6) {
306                 info->is_tunnel = 1;
307                 info->outer_ethertype = info->ethertype;
308                 info->outer_l2_len = info->l2_len;
309                 info->outer_l3_len = info->l3_len;
310                 info->outer_l4_proto = info->l4_proto;
311
312                 ipv6_hdr = (struct rte_ipv6_hdr *)((char *)vxlan_gpe_hdr +
313                            vxlan_gpe_len);
314
315                 info->ethertype = _htons(RTE_ETHER_TYPE_IPV6);
316                 parse_ipv6(ipv6_hdr, info);
317                 info->l2_len = 0;
318
319         } else if (vxlan_gpe_hdr->proto == RTE_VXLAN_GPE_TYPE_ETH) {
320                 info->is_tunnel = 1;
321                 info->outer_ethertype = info->ethertype;
322                 info->outer_l2_len = info->l2_len;
323                 info->outer_l3_len = info->l3_len;
324                 info->outer_l4_proto = info->l4_proto;
325
326                 eth_hdr = (struct rte_ether_hdr *)((char *)vxlan_gpe_hdr +
327                           vxlan_gpe_len);
328
329                 parse_ethernet(eth_hdr, info);
330         } else
331                 return;
332
333         info->l2_len += RTE_ETHER_VXLAN_GPE_HLEN;
334 }
335
336 /* Parse a gre header */
337 static void
338 parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info)
339 {
340         struct rte_ether_hdr *eth_hdr;
341         struct rte_ipv4_hdr *ipv4_hdr;
342         struct rte_ipv6_hdr *ipv6_hdr;
343         uint8_t gre_len = 0;
344
345         gre_len += sizeof(struct simple_gre_hdr);
346
347         if (gre_hdr->flags & _htons(GRE_KEY_PRESENT))
348                 gre_len += GRE_EXT_LEN;
349         if (gre_hdr->flags & _htons(GRE_SEQUENCE_PRESENT))
350                 gre_len += GRE_EXT_LEN;
351         if (gre_hdr->flags & _htons(GRE_CHECKSUM_PRESENT))
352                 gre_len += GRE_EXT_LEN;
353
354         if (gre_hdr->proto == _htons(RTE_ETHER_TYPE_IPV4)) {
355                 info->is_tunnel = 1;
356                 info->outer_ethertype = info->ethertype;
357                 info->outer_l2_len = info->l2_len;
358                 info->outer_l3_len = info->l3_len;
359                 info->outer_l4_proto = info->l4_proto;
360
361                 ipv4_hdr = (struct rte_ipv4_hdr *)((char *)gre_hdr + gre_len);
362
363                 parse_ipv4(ipv4_hdr, info);
364                 info->ethertype = _htons(RTE_ETHER_TYPE_IPV4);
365                 info->l2_len = 0;
366
367         } else if (gre_hdr->proto == _htons(RTE_ETHER_TYPE_IPV6)) {
368                 info->is_tunnel = 1;
369                 info->outer_ethertype = info->ethertype;
370                 info->outer_l2_len = info->l2_len;
371                 info->outer_l3_len = info->l3_len;
372                 info->outer_l4_proto = info->l4_proto;
373
374                 ipv6_hdr = (struct rte_ipv6_hdr *)((char *)gre_hdr + gre_len);
375
376                 info->ethertype = _htons(RTE_ETHER_TYPE_IPV6);
377                 parse_ipv6(ipv6_hdr, info);
378                 info->l2_len = 0;
379
380         } else if (gre_hdr->proto == _htons(RTE_ETHER_TYPE_TEB)) {
381                 info->is_tunnel = 1;
382                 info->outer_ethertype = info->ethertype;
383                 info->outer_l2_len = info->l2_len;
384                 info->outer_l3_len = info->l3_len;
385                 info->outer_l4_proto = info->l4_proto;
386
387                 eth_hdr = (struct rte_ether_hdr *)((char *)gre_hdr + gre_len);
388
389                 parse_ethernet(eth_hdr, info);
390         } else
391                 return;
392
393         info->l2_len += gre_len;
394 }
395
396
397 /* Parse an encapsulated ip or ipv6 header */
398 static void
399 parse_encap_ip(void *encap_ip, struct testpmd_offload_info *info)
400 {
401         struct rte_ipv4_hdr *ipv4_hdr = encap_ip;
402         struct rte_ipv6_hdr *ipv6_hdr = encap_ip;
403         uint8_t ip_version;
404
405         ip_version = (ipv4_hdr->version_ihl & 0xf0) >> 4;
406
407         if (ip_version != 4 && ip_version != 6)
408                 return;
409
410         info->is_tunnel = 1;
411         info->outer_ethertype = info->ethertype;
412         info->outer_l2_len = info->l2_len;
413         info->outer_l3_len = info->l3_len;
414
415         if (ip_version == 4) {
416                 parse_ipv4(ipv4_hdr, info);
417                 info->ethertype = _htons(RTE_ETHER_TYPE_IPV4);
418         } else {
419                 parse_ipv6(ipv6_hdr, info);
420                 info->ethertype = _htons(RTE_ETHER_TYPE_IPV6);
421         }
422         info->l2_len = 0;
423 }
424
425 /* if possible, calculate the checksum of a packet in hw or sw,
426  * depending on the testpmd command line configuration */
427 static uint64_t
428 process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info,
429         uint64_t tx_offloads)
430 {
431         struct rte_ipv4_hdr *ipv4_hdr = l3_hdr;
432         struct rte_udp_hdr *udp_hdr;
433         struct rte_tcp_hdr *tcp_hdr;
434         struct rte_sctp_hdr *sctp_hdr;
435         uint64_t ol_flags = 0;
436         uint32_t max_pkt_len, tso_segsz = 0;
437
438         /* ensure packet is large enough to require tso */
439         if (!info->is_tunnel) {
440                 max_pkt_len = info->l2_len + info->l3_len + info->l4_len +
441                         info->tso_segsz;
442                 if (info->tso_segsz != 0 && info->pkt_len > max_pkt_len)
443                         tso_segsz = info->tso_segsz;
444         } else {
445                 max_pkt_len = info->outer_l2_len + info->outer_l3_len +
446                         info->l2_len + info->l3_len + info->l4_len +
447                         info->tunnel_tso_segsz;
448                 if (info->tunnel_tso_segsz != 0 && info->pkt_len > max_pkt_len)
449                         tso_segsz = info->tunnel_tso_segsz;
450         }
451
452         if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV4)) {
453                 ipv4_hdr = l3_hdr;
454                 ipv4_hdr->hdr_checksum = 0;
455
456                 ol_flags |= PKT_TX_IPV4;
457                 if (info->l4_proto == IPPROTO_TCP && tso_segsz) {
458                         ol_flags |= PKT_TX_IP_CKSUM;
459                 } else {
460                         if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM)
461                                 ol_flags |= PKT_TX_IP_CKSUM;
462                         else
463                                 ipv4_hdr->hdr_checksum =
464                                         rte_ipv4_cksum(ipv4_hdr);
465                 }
466         } else if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV6))
467                 ol_flags |= PKT_TX_IPV6;
468         else
469                 return 0; /* packet type not supported, nothing to do */
470
471         if (info->l4_proto == IPPROTO_UDP) {
472                 udp_hdr = (struct rte_udp_hdr *)((char *)l3_hdr + info->l3_len);
473                 /* do not recalculate udp cksum if it was 0 */
474                 if (udp_hdr->dgram_cksum != 0) {
475                         udp_hdr->dgram_cksum = 0;
476                         if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM)
477                                 ol_flags |= PKT_TX_UDP_CKSUM;
478                         else {
479                                 udp_hdr->dgram_cksum =
480                                         get_udptcp_checksum(l3_hdr, udp_hdr,
481                                                 info->ethertype);
482                         }
483                 }
484                 if (info->gso_enable)
485                         ol_flags |= PKT_TX_UDP_SEG;
486         } else if (info->l4_proto == IPPROTO_TCP) {
487                 tcp_hdr = (struct rte_tcp_hdr *)((char *)l3_hdr + info->l3_len);
488                 tcp_hdr->cksum = 0;
489                 if (tso_segsz)
490                         ol_flags |= PKT_TX_TCP_SEG;
491                 else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM)
492                         ol_flags |= PKT_TX_TCP_CKSUM;
493                 else {
494                         tcp_hdr->cksum =
495                                 get_udptcp_checksum(l3_hdr, tcp_hdr,
496                                         info->ethertype);
497                 }
498                 if (info->gso_enable)
499                         ol_flags |= PKT_TX_TCP_SEG;
500         } else if (info->l4_proto == IPPROTO_SCTP) {
501                 sctp_hdr = (struct rte_sctp_hdr *)
502                         ((char *)l3_hdr + info->l3_len);
503                 sctp_hdr->cksum = 0;
504                 /* sctp payload must be a multiple of 4 to be
505                  * offloaded */
506                 if ((tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM) &&
507                         ((ipv4_hdr->total_length & 0x3) == 0)) {
508                         ol_flags |= PKT_TX_SCTP_CKSUM;
509                 } else {
510                         /* XXX implement CRC32c, example available in
511                          * RFC3309 */
512                 }
513         }
514
515         return ol_flags;
516 }
517
518 /* Calculate the checksum of outer header */
519 static uint64_t
520 process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info,
521         uint64_t tx_offloads, int tso_enabled)
522 {
523         struct rte_ipv4_hdr *ipv4_hdr = outer_l3_hdr;
524         struct rte_ipv6_hdr *ipv6_hdr = outer_l3_hdr;
525         struct rte_udp_hdr *udp_hdr;
526         uint64_t ol_flags = 0;
527
528         if (info->outer_ethertype == _htons(RTE_ETHER_TYPE_IPV4)) {
529                 ipv4_hdr->hdr_checksum = 0;
530                 ol_flags |= PKT_TX_OUTER_IPV4;
531
532                 if (tx_offloads & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)
533                         ol_flags |= PKT_TX_OUTER_IP_CKSUM;
534                 else
535                         ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
536         } else
537                 ol_flags |= PKT_TX_OUTER_IPV6;
538
539         if (info->outer_l4_proto != IPPROTO_UDP)
540                 return ol_flags;
541
542         udp_hdr = (struct rte_udp_hdr *)
543                 ((char *)outer_l3_hdr + info->outer_l3_len);
544
545         if (tso_enabled)
546                 ol_flags |= PKT_TX_TCP_SEG;
547
548         /* Skip SW outer UDP checksum generation if HW supports it */
549         if (tx_offloads & DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) {
550                 if (info->outer_ethertype == _htons(RTE_ETHER_TYPE_IPV4))
551                         udp_hdr->dgram_cksum
552                                 = rte_ipv4_phdr_cksum(ipv4_hdr, ol_flags);
553                 else
554                         udp_hdr->dgram_cksum
555                                 = rte_ipv6_phdr_cksum(ipv6_hdr, ol_flags);
556
557                 ol_flags |= PKT_TX_OUTER_UDP_CKSUM;
558                 return ol_flags;
559         }
560
561         /* outer UDP checksum is done in software. In the other side, for
562          * UDP tunneling, like VXLAN or Geneve, outer UDP checksum can be
563          * set to zero.
564          *
565          * If a packet will be TSOed into small packets by NIC, we cannot
566          * set/calculate a non-zero checksum, because it will be a wrong
567          * value after the packet be split into several small packets.
568          */
569         if (tso_enabled)
570                 udp_hdr->dgram_cksum = 0;
571
572         /* do not recalculate udp cksum if it was 0 */
573         if (udp_hdr->dgram_cksum != 0) {
574                 udp_hdr->dgram_cksum = 0;
575                 if (info->outer_ethertype == _htons(RTE_ETHER_TYPE_IPV4))
576                         udp_hdr->dgram_cksum =
577                                 rte_ipv4_udptcp_cksum(ipv4_hdr, udp_hdr);
578                 else
579                         udp_hdr->dgram_cksum =
580                                 rte_ipv6_udptcp_cksum(ipv6_hdr, udp_hdr);
581         }
582
583         return ol_flags;
584 }
585
586 /*
587  * Helper function.
588  * Performs actual copying.
589  * Returns number of segments in the destination mbuf on success,
590  * or negative error code on failure.
591  */
592 static int
593 mbuf_copy_split(const struct rte_mbuf *ms, struct rte_mbuf *md[],
594         uint16_t seglen[], uint8_t nb_seg)
595 {
596         uint32_t dlen, slen, tlen;
597         uint32_t i, len;
598         const struct rte_mbuf *m;
599         const uint8_t *src;
600         uint8_t *dst;
601
602         dlen = 0;
603         slen = 0;
604         tlen = 0;
605
606         dst = NULL;
607         src = NULL;
608
609         m = ms;
610         i = 0;
611         while (ms != NULL && i != nb_seg) {
612
613                 if (slen == 0) {
614                         slen = rte_pktmbuf_data_len(ms);
615                         src = rte_pktmbuf_mtod(ms, const uint8_t *);
616                 }
617
618                 if (dlen == 0) {
619                         dlen = RTE_MIN(seglen[i], slen);
620                         md[i]->data_len = dlen;
621                         md[i]->next = (i + 1 == nb_seg) ? NULL : md[i + 1];
622                         dst = rte_pktmbuf_mtod(md[i], uint8_t *);
623                 }
624
625                 len = RTE_MIN(slen, dlen);
626                 memcpy(dst, src, len);
627                 tlen += len;
628                 slen -= len;
629                 dlen -= len;
630                 src += len;
631                 dst += len;
632
633                 if (slen == 0)
634                         ms = ms->next;
635                 if (dlen == 0)
636                         i++;
637         }
638
639         if (ms != NULL)
640                 return -ENOBUFS;
641         else if (tlen != m->pkt_len)
642                 return -EINVAL;
643
644         md[0]->nb_segs = nb_seg;
645         md[0]->pkt_len = tlen;
646         md[0]->vlan_tci = m->vlan_tci;
647         md[0]->vlan_tci_outer = m->vlan_tci_outer;
648         md[0]->ol_flags = m->ol_flags;
649         md[0]->tx_offload = m->tx_offload;
650
651         return nb_seg;
652 }
653
654 /*
655  * Allocate a new mbuf with up to tx_pkt_nb_segs segments.
656  * Copy packet contents and offload information into the new segmented mbuf.
657  */
658 static struct rte_mbuf *
659 pkt_copy_split(const struct rte_mbuf *pkt)
660 {
661         int32_t n, rc;
662         uint32_t i, len, nb_seg;
663         struct rte_mempool *mp;
664         uint16_t seglen[RTE_MAX_SEGS_PER_PKT];
665         struct rte_mbuf *p, *md[RTE_MAX_SEGS_PER_PKT];
666
667         mp = current_fwd_lcore()->mbp;
668
669         if (tx_pkt_split == TX_PKT_SPLIT_RND)
670                 nb_seg = random() % tx_pkt_nb_segs + 1;
671         else
672                 nb_seg = tx_pkt_nb_segs;
673
674         memcpy(seglen, tx_pkt_seg_lengths, nb_seg * sizeof(seglen[0]));
675
676         /* calculate number of segments to use and their length. */
677         len = 0;
678         for (i = 0; i != nb_seg && len < pkt->pkt_len; i++) {
679                 len += seglen[i];
680                 md[i] = NULL;
681         }
682
683         n = pkt->pkt_len - len;
684
685         /* update size of the last segment to fit rest of the packet */
686         if (n >= 0) {
687                 seglen[i - 1] += n;
688                 len += n;
689         }
690
691         nb_seg = i;
692         while (i != 0) {
693                 p = rte_pktmbuf_alloc(mp);
694                 if (p == NULL) {
695                         TESTPMD_LOG(ERR,
696                                 "failed to allocate %u-th of %u mbuf "
697                                 "from mempool: %s\n",
698                                 nb_seg - i, nb_seg, mp->name);
699                         break;
700                 }
701
702                 md[--i] = p;
703                 if (rte_pktmbuf_tailroom(md[i]) < seglen[i]) {
704                         TESTPMD_LOG(ERR, "mempool %s, %u-th segment: "
705                                 "expected seglen: %u, "
706                                 "actual mbuf tailroom: %u\n",
707                                 mp->name, i, seglen[i],
708                                 rte_pktmbuf_tailroom(md[i]));
709                         break;
710                 }
711         }
712
713         /* all mbufs successfully allocated, do copy */
714         if (i == 0) {
715                 rc = mbuf_copy_split(pkt, md, seglen, nb_seg);
716                 if (rc < 0)
717                         TESTPMD_LOG(ERR,
718                                 "mbuf_copy_split for %p(len=%u, nb_seg=%u) "
719                                 "into %u segments failed with error code: %d\n",
720                                 pkt, pkt->pkt_len, pkt->nb_segs, nb_seg, rc);
721
722                 /* figure out how many mbufs to free. */
723                 i = RTE_MAX(rc, 0);
724         }
725
726         /* free unused mbufs */
727         for (; i != nb_seg; i++) {
728                 rte_pktmbuf_free_seg(md[i]);
729                 md[i] = NULL;
730         }
731
732         return md[0];
733 }
734
735 /*
736  * Receive a burst of packets, and for each packet:
737  *  - parse packet, and try to recognize a supported packet type (1)
738  *  - if it's not a supported packet type, don't touch the packet, else:
739  *  - reprocess the checksum of all supported layers. This is done in SW
740  *    or HW, depending on testpmd command line configuration
741  *  - if TSO is enabled in testpmd command line, also flag the mbuf for TCP
742  *    segmentation offload (this implies HW TCP checksum)
743  * Then transmit packets on the output port.
744  *
745  * (1) Supported packets are:
746  *   Ether / (vlan) / IP|IP6 / UDP|TCP|SCTP .
747  *   Ether / (vlan) / outer IP|IP6 / outer UDP / VxLAN / Ether / IP|IP6 /
748  *           UDP|TCP|SCTP
749  *   Ether / (vlan) / outer IP|IP6 / outer UDP / VXLAN-GPE / Ether / IP|IP6 /
750  *           UDP|TCP|SCTP
751  *   Ether / (vlan) / outer IP|IP6 / outer UDP / VXLAN-GPE / IP|IP6 /
752  *           UDP|TCP|SCTP
753  *   Ether / (vlan) / outer IP / outer UDP / GTP / IP|IP6 / UDP|TCP|SCTP
754  *   Ether / (vlan) / outer IP|IP6 / GRE / Ether / IP|IP6 / UDP|TCP|SCTP
755  *   Ether / (vlan) / outer IP|IP6 / GRE / IP|IP6 / UDP|TCP|SCTP
756  *   Ether / (vlan) / outer IP|IP6 / IP|IP6 / UDP|TCP|SCTP
757  *
758  * The testpmd command line for this forward engine sets the flags
759  * TESTPMD_TX_OFFLOAD_* in ports[tx_port].tx_ol_flags. They control
760  * wether a checksum must be calculated in software or in hardware. The
761  * IP, UDP, TCP and SCTP flags always concern the inner layer. The
762  * OUTER_IP is only useful for tunnel packets.
763  */
764 static void
765 pkt_burst_checksum_forward(struct fwd_stream *fs)
766 {
767         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
768         struct rte_mbuf *gso_segments[GSO_MAX_PKT_BURST];
769         struct rte_gso_ctx *gso_ctx;
770         struct rte_mbuf **tx_pkts_burst;
771         struct rte_port *txp;
772         struct rte_mbuf *m, *p;
773         struct rte_ether_hdr *eth_hdr;
774         void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */
775         void **gro_ctx;
776         uint16_t gro_pkts_num;
777         uint8_t gro_enable;
778         uint16_t nb_rx;
779         uint16_t nb_tx;
780         uint16_t nb_prep;
781         uint16_t i;
782         uint64_t rx_ol_flags, tx_ol_flags;
783         uint64_t tx_offloads;
784         uint32_t retry;
785         uint32_t rx_bad_ip_csum;
786         uint32_t rx_bad_l4_csum;
787         uint32_t rx_bad_outer_l4_csum;
788         struct testpmd_offload_info info;
789         uint16_t nb_segments = 0;
790         int ret;
791
792 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
793         uint64_t start_tsc;
794         uint64_t end_tsc;
795         uint64_t core_cycles;
796 #endif
797
798 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
799         start_tsc = rte_rdtsc();
800 #endif
801
802         /* receive a burst of packet */
803         nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst,
804                                  nb_pkt_per_burst);
805         if (unlikely(nb_rx == 0))
806                 return;
807 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
808         fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
809 #endif
810         fs->rx_packets += nb_rx;
811         rx_bad_ip_csum = 0;
812         rx_bad_l4_csum = 0;
813         rx_bad_outer_l4_csum = 0;
814         gro_enable = gro_ports[fs->rx_port].enable;
815
816         txp = &ports[fs->tx_port];
817         tx_offloads = txp->dev_conf.txmode.offloads;
818         memset(&info, 0, sizeof(info));
819         info.tso_segsz = txp->tso_segsz;
820         info.tunnel_tso_segsz = txp->tunnel_tso_segsz;
821         if (gso_ports[fs->tx_port].enable)
822                 info.gso_enable = 1;
823
824         for (i = 0; i < nb_rx; i++) {
825                 if (likely(i < nb_rx - 1))
826                         rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i + 1],
827                                                        void *));
828
829                 m = pkts_burst[i];
830                 info.is_tunnel = 0;
831                 info.pkt_len = rte_pktmbuf_pkt_len(m);
832                 tx_ol_flags = m->ol_flags &
833                               (IND_ATTACHED_MBUF | EXT_ATTACHED_MBUF);
834                 rx_ol_flags = m->ol_flags;
835
836                 /* Update the L3/L4 checksum error packet statistics */
837                 if ((rx_ol_flags & PKT_RX_IP_CKSUM_MASK) == PKT_RX_IP_CKSUM_BAD)
838                         rx_bad_ip_csum += 1;
839                 if ((rx_ol_flags & PKT_RX_L4_CKSUM_MASK) == PKT_RX_L4_CKSUM_BAD)
840                         rx_bad_l4_csum += 1;
841                 if (rx_ol_flags & PKT_RX_OUTER_L4_CKSUM_BAD)
842                         rx_bad_outer_l4_csum += 1;
843
844                 /* step 1: dissect packet, parsing optional vlan, ip4/ip6, vxlan
845                  * and inner headers */
846
847                 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
848                 rte_ether_addr_copy(&peer_eth_addrs[fs->peer_addr],
849                                 &eth_hdr->d_addr);
850                 rte_ether_addr_copy(&ports[fs->tx_port].eth_addr,
851                                 &eth_hdr->s_addr);
852                 parse_ethernet(eth_hdr, &info);
853                 l3_hdr = (char *)eth_hdr + info.l2_len;
854
855                 /* check if it's a supported tunnel */
856                 if (txp->parse_tunnel) {
857                         if (info.l4_proto == IPPROTO_UDP) {
858                                 struct rte_udp_hdr *udp_hdr;
859
860                                 udp_hdr = (struct rte_udp_hdr *)
861                                         ((char *)l3_hdr + info.l3_len);
862                                 parse_gtp(udp_hdr, &info);
863                                 if (info.is_tunnel) {
864                                         tx_ol_flags |= PKT_TX_TUNNEL_GTP;
865                                         goto tunnel_update;
866                                 }
867                                 parse_vxlan_gpe(udp_hdr, &info);
868                                 if (info.is_tunnel) {
869                                         tx_ol_flags |=
870                                                 PKT_TX_TUNNEL_VXLAN_GPE;
871                                         goto tunnel_update;
872                                 }
873                                 parse_vxlan(udp_hdr, &info,
874                                             m->packet_type);
875                                 if (info.is_tunnel)
876                                         tx_ol_flags |=
877                                                 PKT_TX_TUNNEL_VXLAN;
878                         } else if (info.l4_proto == IPPROTO_GRE) {
879                                 struct simple_gre_hdr *gre_hdr;
880
881                                 gre_hdr = (struct simple_gre_hdr *)
882                                         ((char *)l3_hdr + info.l3_len);
883                                 parse_gre(gre_hdr, &info);
884                                 if (info.is_tunnel)
885                                         tx_ol_flags |= PKT_TX_TUNNEL_GRE;
886                         } else if (info.l4_proto == IPPROTO_IPIP) {
887                                 void *encap_ip_hdr;
888
889                                 encap_ip_hdr = (char *)l3_hdr + info.l3_len;
890                                 parse_encap_ip(encap_ip_hdr, &info);
891                                 if (info.is_tunnel)
892                                         tx_ol_flags |= PKT_TX_TUNNEL_IPIP;
893                         }
894                 }
895
896 tunnel_update:
897                 /* update l3_hdr and outer_l3_hdr if a tunnel was parsed */
898                 if (info.is_tunnel) {
899                         outer_l3_hdr = l3_hdr;
900                         l3_hdr = (char *)l3_hdr + info.outer_l3_len + info.l2_len;
901                 }
902
903                 /* step 2: depending on user command line configuration,
904                  * recompute checksum either in software or flag the
905                  * mbuf to offload the calculation to the NIC. If TSO
906                  * is configured, prepare the mbuf for TCP segmentation. */
907
908                 /* process checksums of inner headers first */
909                 tx_ol_flags |= process_inner_cksums(l3_hdr, &info,
910                         tx_offloads);
911
912                 /* Then process outer headers if any. Note that the software
913                  * checksum will be wrong if one of the inner checksums is
914                  * processed in hardware. */
915                 if (info.is_tunnel == 1) {
916                         tx_ol_flags |= process_outer_cksums(outer_l3_hdr, &info,
917                                         tx_offloads,
918                                         !!(tx_ol_flags & PKT_TX_TCP_SEG));
919                 }
920
921                 /* step 3: fill the mbuf meta data (flags and header lengths) */
922
923                 m->tx_offload = 0;
924                 if (info.is_tunnel == 1) {
925                         if (info.tunnel_tso_segsz ||
926                             (tx_offloads &
927                              DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) ||
928                             (tx_offloads &
929                              DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) ||
930                             (tx_ol_flags & PKT_TX_OUTER_IPV6)) {
931                                 m->outer_l2_len = info.outer_l2_len;
932                                 m->outer_l3_len = info.outer_l3_len;
933                                 m->l2_len = info.l2_len;
934                                 m->l3_len = info.l3_len;
935                                 m->l4_len = info.l4_len;
936                                 m->tso_segsz = info.tunnel_tso_segsz;
937                         }
938                         else {
939                                 /* if there is a outer UDP cksum
940                                    processed in sw and the inner in hw,
941                                    the outer checksum will be wrong as
942                                    the payload will be modified by the
943                                    hardware */
944                                 m->l2_len = info.outer_l2_len +
945                                         info.outer_l3_len + info.l2_len;
946                                 m->l3_len = info.l3_len;
947                                 m->l4_len = info.l4_len;
948                         }
949                 } else {
950                         /* this is only useful if an offload flag is
951                          * set, but it does not hurt to fill it in any
952                          * case */
953                         m->l2_len = info.l2_len;
954                         m->l3_len = info.l3_len;
955                         m->l4_len = info.l4_len;
956                         m->tso_segsz = info.tso_segsz;
957                 }
958                 m->ol_flags = tx_ol_flags;
959
960                 /* Do split & copy for the packet. */
961                 if (tx_pkt_split != TX_PKT_SPLIT_OFF) {
962                         p = pkt_copy_split(m);
963                         if (p != NULL) {
964                                 rte_pktmbuf_free(m);
965                                 m = p;
966                                 pkts_burst[i] = m;
967                         }
968                 }
969
970                 /* if verbose mode is enabled, dump debug info */
971                 if (verbose_level > 0) {
972                         char buf[256];
973
974                         printf("-----------------\n");
975                         printf("port=%u, mbuf=%p, pkt_len=%u, nb_segs=%u:\n",
976                                 fs->rx_port, m, m->pkt_len, m->nb_segs);
977                         /* dump rx parsed packet info */
978                         rte_get_rx_ol_flag_list(rx_ol_flags, buf, sizeof(buf));
979                         printf("rx: l2_len=%d ethertype=%x l3_len=%d "
980                                 "l4_proto=%d l4_len=%d flags=%s\n",
981                                 info.l2_len, rte_be_to_cpu_16(info.ethertype),
982                                 info.l3_len, info.l4_proto, info.l4_len, buf);
983                         if (rx_ol_flags & PKT_RX_LRO)
984                                 printf("rx: m->lro_segsz=%u\n", m->tso_segsz);
985                         if (info.is_tunnel == 1)
986                                 printf("rx: outer_l2_len=%d outer_ethertype=%x "
987                                         "outer_l3_len=%d\n", info.outer_l2_len,
988                                         rte_be_to_cpu_16(info.outer_ethertype),
989                                         info.outer_l3_len);
990                         /* dump tx packet info */
991                         if ((tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM |
992                                             DEV_TX_OFFLOAD_UDP_CKSUM |
993                                             DEV_TX_OFFLOAD_TCP_CKSUM |
994                                             DEV_TX_OFFLOAD_SCTP_CKSUM)) ||
995                                 info.tso_segsz != 0)
996                                 printf("tx: m->l2_len=%d m->l3_len=%d "
997                                         "m->l4_len=%d\n",
998                                         m->l2_len, m->l3_len, m->l4_len);
999                         if (info.is_tunnel == 1) {
1000                                 if ((tx_offloads &
1001                                     DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) ||
1002                                     (tx_offloads &
1003                                     DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) ||
1004                                     (tx_ol_flags & PKT_TX_OUTER_IPV6))
1005                                         printf("tx: m->outer_l2_len=%d "
1006                                                 "m->outer_l3_len=%d\n",
1007                                                 m->outer_l2_len,
1008                                                 m->outer_l3_len);
1009                                 if (info.tunnel_tso_segsz != 0 &&
1010                                                 (m->ol_flags & PKT_TX_TCP_SEG))
1011                                         printf("tx: m->tso_segsz=%d\n",
1012                                                 m->tso_segsz);
1013                         } else if (info.tso_segsz != 0 &&
1014                                         (m->ol_flags & PKT_TX_TCP_SEG))
1015                                 printf("tx: m->tso_segsz=%d\n", m->tso_segsz);
1016                         rte_get_tx_ol_flag_list(m->ol_flags, buf, sizeof(buf));
1017                         printf("tx: flags=%s", buf);
1018                         printf("\n");
1019                 }
1020         }
1021
1022         if (unlikely(gro_enable)) {
1023                 if (gro_flush_cycles == GRO_DEFAULT_FLUSH_CYCLES) {
1024                         nb_rx = rte_gro_reassemble_burst(pkts_burst, nb_rx,
1025                                         &(gro_ports[fs->rx_port].param));
1026                 } else {
1027                         gro_ctx = current_fwd_lcore()->gro_ctx;
1028                         nb_rx = rte_gro_reassemble(pkts_burst, nb_rx, gro_ctx);
1029
1030                         if (++fs->gro_times >= gro_flush_cycles) {
1031                                 gro_pkts_num = rte_gro_get_pkt_count(gro_ctx);
1032                                 if (gro_pkts_num > MAX_PKT_BURST - nb_rx)
1033                                         gro_pkts_num = MAX_PKT_BURST - nb_rx;
1034
1035                                 nb_rx += rte_gro_timeout_flush(gro_ctx, 0,
1036                                                 RTE_GRO_TCP_IPV4,
1037                                                 &pkts_burst[nb_rx],
1038                                                 gro_pkts_num);
1039                                 fs->gro_times = 0;
1040                         }
1041                 }
1042         }
1043
1044         if (gso_ports[fs->tx_port].enable == 0)
1045                 tx_pkts_burst = pkts_burst;
1046         else {
1047                 gso_ctx = &(current_fwd_lcore()->gso_ctx);
1048                 gso_ctx->gso_size = gso_max_segment_size;
1049                 for (i = 0; i < nb_rx; i++) {
1050                         ret = rte_gso_segment(pkts_burst[i], gso_ctx,
1051                                         &gso_segments[nb_segments],
1052                                         GSO_MAX_PKT_BURST - nb_segments);
1053                         if (ret >= 0)
1054                                 nb_segments += ret;
1055                         else {
1056                                 TESTPMD_LOG(DEBUG, "Unable to segment packet");
1057                                 rte_pktmbuf_free(pkts_burst[i]);
1058                         }
1059                 }
1060
1061                 tx_pkts_burst = gso_segments;
1062                 nb_rx = nb_segments;
1063         }
1064
1065         nb_prep = rte_eth_tx_prepare(fs->tx_port, fs->tx_queue,
1066                         tx_pkts_burst, nb_rx);
1067         if (nb_prep != nb_rx)
1068                 printf("Preparing packet burst to transmit failed: %s\n",
1069                                 rte_strerror(rte_errno));
1070
1071         nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, tx_pkts_burst,
1072                         nb_prep);
1073
1074         /*
1075          * Retry if necessary
1076          */
1077         if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) {
1078                 retry = 0;
1079                 while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) {
1080                         rte_delay_us(burst_tx_delay_time);
1081                         nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
1082                                         &tx_pkts_burst[nb_tx], nb_rx - nb_tx);
1083                 }
1084         }
1085         fs->tx_packets += nb_tx;
1086         fs->rx_bad_ip_csum += rx_bad_ip_csum;
1087         fs->rx_bad_l4_csum += rx_bad_l4_csum;
1088         fs->rx_bad_outer_l4_csum += rx_bad_outer_l4_csum;
1089
1090 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
1091         fs->tx_burst_stats.pkt_burst_spread[nb_tx]++;
1092 #endif
1093         if (unlikely(nb_tx < nb_rx)) {
1094                 fs->fwd_dropped += (nb_rx - nb_tx);
1095                 do {
1096                         rte_pktmbuf_free(tx_pkts_burst[nb_tx]);
1097                 } while (++nb_tx < nb_rx);
1098         }
1099
1100 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
1101         end_tsc = rte_rdtsc();
1102         core_cycles = (end_tsc - start_tsc);
1103         fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles);
1104 #endif
1105 }
1106
1107 struct fwd_engine csum_fwd_engine = {
1108         .fwd_mode_name  = "csum",
1109         .port_fwd_begin = NULL,
1110         .port_fwd_end   = NULL,
1111         .packet_fwd     = pkt_burst_checksum_forward,
1112 };