app/testpmd: support checking descriptor status
[dpdk.git] / app / test-pmd / icmpecho.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2013 6WIND S.A.
3  */
4
5 #include <stdarg.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <stdint.h>
10 #include <unistd.h>
11 #include <inttypes.h>
12
13 #include <sys/queue.h>
14 #include <sys/stat.h>
15
16 #include <rte_common.h>
17 #include <rte_byteorder.h>
18 #include <rte_log.h>
19 #include <rte_debug.h>
20 #include <rte_cycles.h>
21 #include <rte_per_lcore.h>
22 #include <rte_lcore.h>
23 #include <rte_atomic.h>
24 #include <rte_branch_prediction.h>
25 #include <rte_memory.h>
26 #include <rte_mempool.h>
27 #include <rte_mbuf.h>
28 #include <rte_ether.h>
29 #include <rte_ethdev.h>
30 #include <rte_arp.h>
31 #include <rte_ip.h>
32 #include <rte_icmp.h>
33 #include <rte_string_fns.h>
34 #include <rte_flow.h>
35
36 #include "testpmd.h"
37
38 static const char *
39 arp_op_name(uint16_t arp_op)
40 {
41         switch (arp_op) {
42         case RTE_ARP_OP_REQUEST:
43                 return "ARP Request";
44         case RTE_ARP_OP_REPLY:
45                 return "ARP Reply";
46         case RTE_ARP_OP_REVREQUEST:
47                 return "Reverse ARP Request";
48         case RTE_ARP_OP_REVREPLY:
49                 return "Reverse ARP Reply";
50         case RTE_ARP_OP_INVREQUEST:
51                 return "Peer Identify Request";
52         case RTE_ARP_OP_INVREPLY:
53                 return "Peer Identify Reply";
54         default:
55                 break;
56         }
57         return "Unkwown ARP op";
58 }
59
60 static const char *
61 ip_proto_name(uint16_t ip_proto)
62 {
63         static const char * ip_proto_names[] = {
64                 "IP6HOPOPTS", /**< IP6 hop-by-hop options */
65                 "ICMP",       /**< control message protocol */
66                 "IGMP",       /**< group mgmt protocol */
67                 "GGP",        /**< gateway^2 (deprecated) */
68                 "IPv4",       /**< IPv4 encapsulation */
69
70                 "UNASSIGNED",
71                 "TCP",        /**< transport control protocol */
72                 "ST",         /**< Stream protocol II */
73                 "EGP",        /**< exterior gateway protocol */
74                 "PIGP",       /**< private interior gateway */
75
76                 "RCC_MON",    /**< BBN RCC Monitoring */
77                 "NVPII",      /**< network voice protocol*/
78                 "PUP",        /**< pup */
79                 "ARGUS",      /**< Argus */
80                 "EMCON",      /**< EMCON */
81
82                 "XNET",       /**< Cross Net Debugger */
83                 "CHAOS",      /**< Chaos*/
84                 "UDP",        /**< user datagram protocol */
85                 "MUX",        /**< Multiplexing */
86                 "DCN_MEAS",   /**< DCN Measurement Subsystems */
87
88                 "HMP",        /**< Host Monitoring */
89                 "PRM",        /**< Packet Radio Measurement */
90                 "XNS_IDP",    /**< xns idp */
91                 "TRUNK1",     /**< Trunk-1 */
92                 "TRUNK2",     /**< Trunk-2 */
93
94                 "LEAF1",      /**< Leaf-1 */
95                 "LEAF2",      /**< Leaf-2 */
96                 "RDP",        /**< Reliable Data */
97                 "IRTP",       /**< Reliable Transaction */
98                 "TP4",        /**< tp-4 w/ class negotiation */
99
100                 "BLT",        /**< Bulk Data Transfer */
101                 "NSP",        /**< Network Services */
102                 "INP",        /**< Merit Internodal */
103                 "SEP",        /**< Sequential Exchange */
104                 "3PC",        /**< Third Party Connect */
105
106                 "IDPR",       /**< InterDomain Policy Routing */
107                 "XTP",        /**< XTP */
108                 "DDP",        /**< Datagram Delivery */
109                 "CMTP",       /**< Control Message Transport */
110                 "TPXX",       /**< TP++ Transport */
111
112                 "ILTP",       /**< IL transport protocol */
113                 "IPv6_HDR",   /**< IP6 header */
114                 "SDRP",       /**< Source Demand Routing */
115                 "IPv6_RTG",   /**< IP6 routing header */
116                 "IPv6_FRAG",  /**< IP6 fragmentation header */
117
118                 "IDRP",       /**< InterDomain Routing*/
119                 "RSVP",       /**< resource reservation */
120                 "GRE",        /**< General Routing Encap. */
121                 "MHRP",       /**< Mobile Host Routing */
122                 "BHA",        /**< BHA */
123
124                 "ESP",        /**< IP6 Encap Sec. Payload */
125                 "AH",         /**< IP6 Auth Header */
126                 "INLSP",      /**< Integ. Net Layer Security */
127                 "SWIPE",      /**< IP with encryption */
128                 "NHRP",       /**< Next Hop Resolution */
129
130                 "UNASSIGNED",
131                 "UNASSIGNED",
132                 "UNASSIGNED",
133                 "ICMPv6",     /**< ICMP6 */
134                 "IPv6NONEXT", /**< IP6 no next header */
135
136                 "Ipv6DSTOPTS",/**< IP6 destination option */
137                 "AHIP",       /**< any host internal protocol */
138                 "CFTP",       /**< CFTP */
139                 "HELLO",      /**< "hello" routing protocol */
140                 "SATEXPAK",   /**< SATNET/Backroom EXPAK */
141
142                 "KRYPTOLAN",  /**< Kryptolan */
143                 "RVD",        /**< Remote Virtual Disk */
144                 "IPPC",       /**< Pluribus Packet Core */
145                 "ADFS",       /**< Any distributed FS */
146                 "SATMON",     /**< Satnet Monitoring */
147
148                 "VISA",       /**< VISA Protocol */
149                 "IPCV",       /**< Packet Core Utility */
150                 "CPNX",       /**< Comp. Prot. Net. Executive */
151                 "CPHB",       /**< Comp. Prot. HeartBeat */
152                 "WSN",        /**< Wang Span Network */
153
154                 "PVP",        /**< Packet Video Protocol */
155                 "BRSATMON",   /**< BackRoom SATNET Monitoring */
156                 "ND",         /**< Sun net disk proto (temp.) */
157                 "WBMON",      /**< WIDEBAND Monitoring */
158                 "WBEXPAK",    /**< WIDEBAND EXPAK */
159
160                 "EON",        /**< ISO cnlp */
161                 "VMTP",       /**< VMTP */
162                 "SVMTP",      /**< Secure VMTP */
163                 "VINES",      /**< Banyon VINES */
164                 "TTP",        /**< TTP */
165
166                 "IGP",        /**< NSFNET-IGP */
167                 "DGP",        /**< dissimilar gateway prot. */
168                 "TCF",        /**< TCF */
169                 "IGRP",       /**< Cisco/GXS IGRP */
170                 "OSPFIGP",    /**< OSPFIGP */
171
172                 "SRPC",       /**< Strite RPC protocol */
173                 "LARP",       /**< Locus Address Resolution */
174                 "MTP",        /**< Multicast Transport */
175                 "AX25",       /**< AX.25 Frames */
176                 "4IN4",       /**< IP encapsulated in IP */
177
178                 "MICP",       /**< Mobile Int.ing control */
179                 "SCCSP",      /**< Semaphore Comm. security */
180                 "ETHERIP",    /**< Ethernet IP encapsulation */
181                 "ENCAP",      /**< encapsulation header */
182                 "AES",        /**< any private encr. scheme */
183
184                 "GMTP",       /**< GMTP */
185                 "IPCOMP",     /**< payload compression (IPComp) */
186                 "UNASSIGNED",
187                 "UNASSIGNED",
188                 "PIM",        /**< Protocol Independent Mcast */
189         };
190
191         if (ip_proto < sizeof(ip_proto_names) / sizeof(ip_proto_names[0]))
192                 return ip_proto_names[ip_proto];
193         switch (ip_proto) {
194 #ifdef IPPROTO_PGM
195         case IPPROTO_PGM:  /**< PGM */
196                 return "PGM";
197 #endif
198         case IPPROTO_SCTP:  /**< Stream Control Transport Protocol */
199                 return "SCTP";
200 #ifdef IPPROTO_DIVERT
201         case IPPROTO_DIVERT: /**< divert pseudo-protocol */
202                 return "DIVERT";
203 #endif
204         case IPPROTO_RAW: /**< raw IP packet */
205                 return "RAW";
206         default:
207                 break;
208         }
209         return "UNASSIGNED";
210 }
211
212 static void
213 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf)
214 {
215         uint32_t ipv4_addr;
216
217         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
218         sprintf(buf, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
219                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
220                 ipv4_addr & 0xFF);
221 }
222
223 static void
224 ether_addr_dump(const char *what, const struct rte_ether_addr *ea)
225 {
226         char buf[RTE_ETHER_ADDR_FMT_SIZE];
227
228         rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, ea);
229         if (what)
230                 printf("%s", what);
231         printf("%s", buf);
232 }
233
234 static void
235 ipv4_addr_dump(const char *what, uint32_t be_ipv4_addr)
236 {
237         char buf[16];
238
239         ipv4_addr_to_dot(be_ipv4_addr, buf);
240         if (what)
241                 printf("%s", what);
242         printf("%s", buf);
243 }
244
245 static uint16_t
246 ipv4_hdr_cksum(struct rte_ipv4_hdr *ip_h)
247 {
248         uint16_t *v16_h;
249         uint32_t ip_cksum;
250
251         /*
252          * Compute the sum of successive 16-bit words of the IPv4 header,
253          * skipping the checksum field of the header.
254          */
255         v16_h = (unaligned_uint16_t *) ip_h;
256         ip_cksum = v16_h[0] + v16_h[1] + v16_h[2] + v16_h[3] +
257                 v16_h[4] + v16_h[6] + v16_h[7] + v16_h[8] + v16_h[9];
258
259         /* reduce 32 bit checksum to 16 bits and complement it */
260         ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16);
261         ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16);
262         ip_cksum = (~ip_cksum) & 0x0000FFFF;
263         return (ip_cksum == 0) ? 0xFFFF : (uint16_t) ip_cksum;
264 }
265
266 #define is_multicast_ipv4_addr(ipv4_addr) \
267         (((rte_be_to_cpu_32((ipv4_addr)) >> 24) & 0x000000FF) == 0xE0)
268
269 /*
270  * Receive a burst of packets, lookup for ICMP echo requests, and, if any,
271  * send back ICMP echo replies.
272  */
273 static void
274 reply_to_icmp_echo_rqsts(struct fwd_stream *fs)
275 {
276         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
277         struct rte_mbuf *pkt;
278         struct rte_ether_hdr *eth_h;
279         struct rte_vlan_hdr *vlan_h;
280         struct rte_arp_hdr  *arp_h;
281         struct rte_ipv4_hdr *ip_h;
282         struct rte_icmp_hdr *icmp_h;
283         struct rte_ether_addr eth_addr;
284         uint32_t retry;
285         uint32_t ip_addr;
286         uint16_t nb_rx;
287         uint16_t nb_tx;
288         uint16_t nb_replies;
289         uint16_t eth_type;
290         uint16_t vlan_id;
291         uint16_t arp_op;
292         uint16_t arp_pro;
293         uint32_t cksum;
294         uint8_t  i;
295         int l2_len;
296 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
297         uint64_t start_tsc;
298         uint64_t end_tsc;
299         uint64_t core_cycles;
300 #endif
301
302 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
303         start_tsc = rte_rdtsc();
304 #endif
305
306         /*
307          * First, receive a burst of packets.
308          */
309         nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst,
310                                  nb_pkt_per_burst);
311         if (unlikely(nb_rx == 0))
312                 return;
313
314 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
315         fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
316 #endif
317         fs->rx_packets += nb_rx;
318         nb_replies = 0;
319         for (i = 0; i < nb_rx; i++) {
320                 if (likely(i < nb_rx - 1))
321                         rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i + 1],
322                                                        void *));
323                 pkt = pkts_burst[i];
324                 eth_h = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
325                 eth_type = RTE_BE_TO_CPU_16(eth_h->ether_type);
326                 l2_len = sizeof(struct rte_ether_hdr);
327                 if (verbose_level > 0) {
328                         printf("\nPort %d pkt-len=%u nb-segs=%u\n",
329                                fs->rx_port, pkt->pkt_len, pkt->nb_segs);
330                         ether_addr_dump("  ETH:  src=", &eth_h->s_addr);
331                         ether_addr_dump(" dst=", &eth_h->d_addr);
332                 }
333                 if (eth_type == RTE_ETHER_TYPE_VLAN) {
334                         vlan_h = (struct rte_vlan_hdr *)
335                                 ((char *)eth_h + sizeof(struct rte_ether_hdr));
336                         l2_len  += sizeof(struct rte_vlan_hdr);
337                         eth_type = rte_be_to_cpu_16(vlan_h->eth_proto);
338                         if (verbose_level > 0) {
339                                 vlan_id = rte_be_to_cpu_16(vlan_h->vlan_tci)
340                                         & 0xFFF;
341                                 printf(" [vlan id=%u]", vlan_id);
342                         }
343                 }
344                 if (verbose_level > 0) {
345                         printf(" type=0x%04x\n", eth_type);
346                 }
347
348                 /* Reply to ARP requests */
349                 if (eth_type == RTE_ETHER_TYPE_ARP) {
350                         arp_h = (struct rte_arp_hdr *) ((char *)eth_h + l2_len);
351                         arp_op = RTE_BE_TO_CPU_16(arp_h->arp_opcode);
352                         arp_pro = RTE_BE_TO_CPU_16(arp_h->arp_protocol);
353                         if (verbose_level > 0) {
354                                 printf("  ARP:  hrd=%d proto=0x%04x hln=%d "
355                                        "pln=%d op=%u (%s)\n",
356                                        RTE_BE_TO_CPU_16(arp_h->arp_hardware),
357                                        arp_pro, arp_h->arp_hlen,
358                                        arp_h->arp_plen, arp_op,
359                                        arp_op_name(arp_op));
360                         }
361                         if ((RTE_BE_TO_CPU_16(arp_h->arp_hardware) !=
362                              RTE_ARP_HRD_ETHER) ||
363                             (arp_pro != RTE_ETHER_TYPE_IPV4) ||
364                             (arp_h->arp_hlen != 6) ||
365                             (arp_h->arp_plen != 4)
366                             ) {
367                                 rte_pktmbuf_free(pkt);
368                                 if (verbose_level > 0)
369                                         printf("\n");
370                                 continue;
371                         }
372                         if (verbose_level > 0) {
373                                 rte_ether_addr_copy(&arp_h->arp_data.arp_sha,
374                                                 &eth_addr);
375                                 ether_addr_dump("        sha=", &eth_addr);
376                                 ip_addr = arp_h->arp_data.arp_sip;
377                                 ipv4_addr_dump(" sip=", ip_addr);
378                                 printf("\n");
379                                 rte_ether_addr_copy(&arp_h->arp_data.arp_tha,
380                                                 &eth_addr);
381                                 ether_addr_dump("        tha=", &eth_addr);
382                                 ip_addr = arp_h->arp_data.arp_tip;
383                                 ipv4_addr_dump(" tip=", ip_addr);
384                                 printf("\n");
385                         }
386                         if (arp_op != RTE_ARP_OP_REQUEST) {
387                                 rte_pktmbuf_free(pkt);
388                                 continue;
389                         }
390
391                         /*
392                          * Build ARP reply.
393                          */
394
395                         /* Use source MAC address as destination MAC address. */
396                         rte_ether_addr_copy(&eth_h->s_addr, &eth_h->d_addr);
397                         /* Set source MAC address with MAC address of TX port */
398                         rte_ether_addr_copy(&ports[fs->tx_port].eth_addr,
399                                         &eth_h->s_addr);
400
401                         arp_h->arp_opcode = rte_cpu_to_be_16(RTE_ARP_OP_REPLY);
402                         rte_ether_addr_copy(&arp_h->arp_data.arp_tha,
403                                         &eth_addr);
404                         rte_ether_addr_copy(&arp_h->arp_data.arp_sha,
405                                         &arp_h->arp_data.arp_tha);
406                         rte_ether_addr_copy(&eth_h->s_addr,
407                                         &arp_h->arp_data.arp_sha);
408
409                         /* Swap IP addresses in ARP payload */
410                         ip_addr = arp_h->arp_data.arp_sip;
411                         arp_h->arp_data.arp_sip = arp_h->arp_data.arp_tip;
412                         arp_h->arp_data.arp_tip = ip_addr;
413                         pkts_burst[nb_replies++] = pkt;
414                         continue;
415                 }
416
417                 if (eth_type != RTE_ETHER_TYPE_IPV4) {
418                         rte_pktmbuf_free(pkt);
419                         continue;
420                 }
421                 ip_h = (struct rte_ipv4_hdr *) ((char *)eth_h + l2_len);
422                 if (verbose_level > 0) {
423                         ipv4_addr_dump("  IPV4: src=", ip_h->src_addr);
424                         ipv4_addr_dump(" dst=", ip_h->dst_addr);
425                         printf(" proto=%d (%s)\n",
426                                ip_h->next_proto_id,
427                                ip_proto_name(ip_h->next_proto_id));
428                 }
429
430                 /*
431                  * Check if packet is a ICMP echo request.
432                  */
433                 icmp_h = (struct rte_icmp_hdr *) ((char *)ip_h +
434                                               sizeof(struct rte_ipv4_hdr));
435                 if (! ((ip_h->next_proto_id == IPPROTO_ICMP) &&
436                        (icmp_h->icmp_type == RTE_IP_ICMP_ECHO_REQUEST) &&
437                        (icmp_h->icmp_code == 0))) {
438                         rte_pktmbuf_free(pkt);
439                         continue;
440                 }
441
442                 if (verbose_level > 0)
443                         printf("  ICMP: echo request seq id=%d\n",
444                                rte_be_to_cpu_16(icmp_h->icmp_seq_nb));
445
446                 /*
447                  * Prepare ICMP echo reply to be sent back.
448                  * - switch ethernet source and destinations addresses,
449                  * - use the request IP source address as the reply IP
450                  *    destination address,
451                  * - if the request IP destination address is a multicast
452                  *   address:
453                  *     - choose a reply IP source address different from the
454                  *       request IP source address,
455                  *     - re-compute the IP header checksum.
456                  *   Otherwise:
457                  *     - switch the request IP source and destination
458                  *       addresses in the reply IP header,
459                  *     - keep the IP header checksum unchanged.
460                  * - set RTE_IP_ICMP_ECHO_REPLY in ICMP header.
461                  * ICMP checksum is computed by assuming it is valid in the
462                  * echo request and not verified.
463                  */
464                 rte_ether_addr_copy(&eth_h->s_addr, &eth_addr);
465                 rte_ether_addr_copy(&eth_h->d_addr, &eth_h->s_addr);
466                 rte_ether_addr_copy(&eth_addr, &eth_h->d_addr);
467                 ip_addr = ip_h->src_addr;
468                 if (is_multicast_ipv4_addr(ip_h->dst_addr)) {
469                         uint32_t ip_src;
470
471                         ip_src = rte_be_to_cpu_32(ip_addr);
472                         if ((ip_src & 0x00000003) == 1)
473                                 ip_src = (ip_src & 0xFFFFFFFC) | 0x00000002;
474                         else
475                                 ip_src = (ip_src & 0xFFFFFFFC) | 0x00000001;
476                         ip_h->src_addr = rte_cpu_to_be_32(ip_src);
477                         ip_h->dst_addr = ip_addr;
478                         ip_h->hdr_checksum = ipv4_hdr_cksum(ip_h);
479                 } else {
480                         ip_h->src_addr = ip_h->dst_addr;
481                         ip_h->dst_addr = ip_addr;
482                 }
483                 icmp_h->icmp_type = RTE_IP_ICMP_ECHO_REPLY;
484                 cksum = ~icmp_h->icmp_cksum & 0xffff;
485                 cksum += ~htons(RTE_IP_ICMP_ECHO_REQUEST << 8) & 0xffff;
486                 cksum += htons(RTE_IP_ICMP_ECHO_REPLY << 8);
487                 cksum = (cksum & 0xffff) + (cksum >> 16);
488                 cksum = (cksum & 0xffff) + (cksum >> 16);
489                 icmp_h->icmp_cksum = ~cksum;
490                 pkts_burst[nb_replies++] = pkt;
491         }
492
493         /* Send back ICMP echo replies, if any. */
494         if (nb_replies > 0) {
495                 nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
496                                          nb_replies);
497                 /*
498                  * Retry if necessary
499                  */
500                 if (unlikely(nb_tx < nb_replies) && fs->retry_enabled) {
501                         retry = 0;
502                         while (nb_tx < nb_replies &&
503                                         retry++ < burst_tx_retry_num) {
504                                 rte_delay_us(burst_tx_delay_time);
505                                 nb_tx += rte_eth_tx_burst(fs->tx_port,
506                                                 fs->tx_queue,
507                                                 &pkts_burst[nb_tx],
508                                                 nb_replies - nb_tx);
509                         }
510                 }
511                 fs->tx_packets += nb_tx;
512 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
513                 fs->tx_burst_stats.pkt_burst_spread[nb_tx]++;
514 #endif
515                 if (unlikely(nb_tx < nb_replies)) {
516                         fs->fwd_dropped += (nb_replies - nb_tx);
517                         do {
518                                 rte_pktmbuf_free(pkts_burst[nb_tx]);
519                         } while (++nb_tx < nb_replies);
520                 }
521         }
522
523 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
524         end_tsc = rte_rdtsc();
525         core_cycles = (end_tsc - start_tsc);
526         fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles);
527 #endif
528 }
529
530 struct fwd_engine icmp_echo_engine = {
531         .fwd_mode_name  = "icmpecho",
532         .port_fwd_begin = NULL,
533         .port_fwd_end   = NULL,
534         .packet_fwd     = reply_to_icmp_echo_rqsts,
535 };