eal: add and use unaligned integer types
[dpdk.git] / app / test-pmd / icmpecho.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2013 6WIND
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  */
34
35 #include <stdarg.h>
36 #include <string.h>
37 #include <stdio.h>
38 #include <errno.h>
39 #include <stdint.h>
40 #include <unistd.h>
41 #include <inttypes.h>
42
43 #include <sys/queue.h>
44 #include <sys/stat.h>
45
46 #include <rte_common.h>
47 #include <rte_byteorder.h>
48 #include <rte_log.h>
49 #include <rte_debug.h>
50 #include <rte_cycles.h>
51 #include <rte_per_lcore.h>
52 #include <rte_lcore.h>
53 #include <rte_atomic.h>
54 #include <rte_branch_prediction.h>
55 #include <rte_ring.h>
56 #include <rte_memory.h>
57 #include <rte_mempool.h>
58 #include <rte_mbuf.h>
59 #include <rte_ether.h>
60 #include <rte_ethdev.h>
61 #include <rte_arp.h>
62 #include <rte_ip.h>
63 #include <rte_icmp.h>
64 #include <rte_string_fns.h>
65
66 #include "testpmd.h"
67
68 static const char *
69 arp_op_name(uint16_t arp_op)
70 {
71         switch (arp_op ) {
72         case ARP_OP_REQUEST:
73                 return "ARP Request";
74         case ARP_OP_REPLY:
75                 return "ARP Reply";
76         case ARP_OP_REVREQUEST:
77                 return "Reverse ARP Request";
78         case ARP_OP_REVREPLY:
79                 return "Reverse ARP Reply";
80         case ARP_OP_INVREQUEST:
81                 return "Peer Identify Request";
82         case ARP_OP_INVREPLY:
83                 return "Peer Identify Reply";
84         default:
85                 break;
86         }
87         return "Unkwown ARP op";
88 }
89
90 static const char *
91 ip_proto_name(uint16_t ip_proto)
92 {
93         static const char * ip_proto_names[] = {
94                 "IP6HOPOPTS", /**< IP6 hop-by-hop options */
95                 "ICMP",       /**< control message protocol */
96                 "IGMP",       /**< group mgmt protocol */
97                 "GGP",        /**< gateway^2 (deprecated) */
98                 "IPv4",       /**< IPv4 encapsulation */
99
100                 "UNASSIGNED",
101                 "TCP",        /**< transport control protocol */
102                 "ST",         /**< Stream protocol II */
103                 "EGP",        /**< exterior gateway protocol */
104                 "PIGP",       /**< private interior gateway */
105
106                 "RCC_MON",    /**< BBN RCC Monitoring */
107                 "NVPII",      /**< network voice protocol*/
108                 "PUP",        /**< pup */
109                 "ARGUS",      /**< Argus */
110                 "EMCON",      /**< EMCON */
111
112                 "XNET",       /**< Cross Net Debugger */
113                 "CHAOS",      /**< Chaos*/
114                 "UDP",        /**< user datagram protocol */
115                 "MUX",        /**< Multiplexing */
116                 "DCN_MEAS",   /**< DCN Measurement Subsystems */
117
118                 "HMP",        /**< Host Monitoring */
119                 "PRM",        /**< Packet Radio Measurement */
120                 "XNS_IDP",    /**< xns idp */
121                 "TRUNK1",     /**< Trunk-1 */
122                 "TRUNK2",     /**< Trunk-2 */
123
124                 "LEAF1",      /**< Leaf-1 */
125                 "LEAF2",      /**< Leaf-2 */
126                 "RDP",        /**< Reliable Data */
127                 "IRTP",       /**< Reliable Transaction */
128                 "TP4",        /**< tp-4 w/ class negotiation */
129
130                 "BLT",        /**< Bulk Data Transfer */
131                 "NSP",        /**< Network Services */
132                 "INP",        /**< Merit Internodal */
133                 "SEP",        /**< Sequential Exchange */
134                 "3PC",        /**< Third Party Connect */
135
136                 "IDPR",       /**< InterDomain Policy Routing */
137                 "XTP",        /**< XTP */
138                 "DDP",        /**< Datagram Delivery */
139                 "CMTP",       /**< Control Message Transport */
140                 "TPXX",       /**< TP++ Transport */
141
142                 "ILTP",       /**< IL transport protocol */
143                 "IPv6_HDR",   /**< IP6 header */
144                 "SDRP",       /**< Source Demand Routing */
145                 "IPv6_RTG",   /**< IP6 routing header */
146                 "IPv6_FRAG",  /**< IP6 fragmentation header */
147
148                 "IDRP",       /**< InterDomain Routing*/
149                 "RSVP",       /**< resource reservation */
150                 "GRE",        /**< General Routing Encap. */
151                 "MHRP",       /**< Mobile Host Routing */
152                 "BHA",        /**< BHA */
153
154                 "ESP",        /**< IP6 Encap Sec. Payload */
155                 "AH",         /**< IP6 Auth Header */
156                 "INLSP",      /**< Integ. Net Layer Security */
157                 "SWIPE",      /**< IP with encryption */
158                 "NHRP",       /**< Next Hop Resolution */
159
160                 "UNASSIGNED",
161                 "UNASSIGNED",
162                 "UNASSIGNED",
163                 "ICMPv6",     /**< ICMP6 */
164                 "IPv6NONEXT", /**< IP6 no next header */
165
166                 "Ipv6DSTOPTS",/**< IP6 destination option */
167                 "AHIP",       /**< any host internal protocol */
168                 "CFTP",       /**< CFTP */
169                 "HELLO",      /**< "hello" routing protocol */
170                 "SATEXPAK",   /**< SATNET/Backroom EXPAK */
171
172                 "KRYPTOLAN",  /**< Kryptolan */
173                 "RVD",        /**< Remote Virtual Disk */
174                 "IPPC",       /**< Pluribus Packet Core */
175                 "ADFS",       /**< Any distributed FS */
176                 "SATMON",     /**< Satnet Monitoring */
177
178                 "VISA",       /**< VISA Protocol */
179                 "IPCV",       /**< Packet Core Utility */
180                 "CPNX",       /**< Comp. Prot. Net. Executive */
181                 "CPHB",       /**< Comp. Prot. HeartBeat */
182                 "WSN",        /**< Wang Span Network */
183
184                 "PVP",        /**< Packet Video Protocol */
185                 "BRSATMON",   /**< BackRoom SATNET Monitoring */
186                 "ND",         /**< Sun net disk proto (temp.) */
187                 "WBMON",      /**< WIDEBAND Monitoring */
188                 "WBEXPAK",    /**< WIDEBAND EXPAK */
189
190                 "EON",        /**< ISO cnlp */
191                 "VMTP",       /**< VMTP */
192                 "SVMTP",      /**< Secure VMTP */
193                 "VINES",      /**< Banyon VINES */
194                 "TTP",        /**< TTP */
195
196                 "IGP",        /**< NSFNET-IGP */
197                 "DGP",        /**< dissimilar gateway prot. */
198                 "TCF",        /**< TCF */
199                 "IGRP",       /**< Cisco/GXS IGRP */
200                 "OSPFIGP",    /**< OSPFIGP */
201
202                 "SRPC",       /**< Strite RPC protocol */
203                 "LARP",       /**< Locus Address Resoloution */
204                 "MTP",        /**< Multicast Transport */
205                 "AX25",       /**< AX.25 Frames */
206                 "4IN4",       /**< IP encapsulated in IP */
207
208                 "MICP",       /**< Mobile Int.ing control */
209                 "SCCSP",      /**< Semaphore Comm. security */
210                 "ETHERIP",    /**< Ethernet IP encapsulation */
211                 "ENCAP",      /**< encapsulation header */
212                 "AES",        /**< any private encr. scheme */
213
214                 "GMTP",       /**< GMTP */
215                 "IPCOMP",     /**< payload compression (IPComp) */
216                 "UNASSIGNED",
217                 "UNASSIGNED",
218                 "PIM",        /**< Protocol Independent Mcast */
219         };
220
221         if (ip_proto < sizeof(ip_proto_names) / sizeof(ip_proto_names[0]))
222                 return ip_proto_names[ip_proto];
223         switch (ip_proto) {
224 #ifdef IPPROTO_PGM
225         case IPPROTO_PGM:  /**< PGM */
226                 return "PGM";
227 #endif
228         case IPPROTO_SCTP:  /**< Stream Control Transport Protocol */
229                 return "SCTP";
230 #ifdef IPPROTO_DIVERT
231         case IPPROTO_DIVERT: /**< divert pseudo-protocol */
232                 return "DIVERT";
233 #endif
234         case IPPROTO_RAW: /**< raw IP packet */
235                 return "RAW";
236         default:
237                 break;
238         }
239         return "UNASSIGNED";
240 }
241
242 static void
243 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf)
244 {
245         uint32_t ipv4_addr;
246
247         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
248         sprintf(buf, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
249                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
250                 ipv4_addr & 0xFF);
251 }
252
253 static void
254 ether_addr_dump(const char *what, const struct ether_addr *ea)
255 {
256         char buf[ETHER_ADDR_FMT_SIZE];
257
258         ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, ea);
259         if (what)
260                 printf("%s", what);
261         printf("%s", buf);
262 }
263
264 static void
265 ipv4_addr_dump(const char *what, uint32_t be_ipv4_addr)
266 {
267         char buf[16];
268
269         ipv4_addr_to_dot(be_ipv4_addr, buf);
270         if (what)
271                 printf("%s", what);
272         printf("%s", buf);
273 }
274
275 static uint16_t
276 ipv4_hdr_cksum(struct ipv4_hdr *ip_h)
277 {
278         uint16_t *v16_h;
279         uint32_t ip_cksum;
280
281         /*
282          * Compute the sum of successive 16-bit words of the IPv4 header,
283          * skipping the checksum field of the header.
284          */
285         v16_h = (unaligned_uint16_t *) ip_h;
286         ip_cksum = v16_h[0] + v16_h[1] + v16_h[2] + v16_h[3] +
287                 v16_h[4] + v16_h[6] + v16_h[7] + v16_h[8] + v16_h[9];
288
289         /* reduce 32 bit checksum to 16 bits and complement it */
290         ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16);
291         ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16);
292         ip_cksum = (~ip_cksum) & 0x0000FFFF;
293         return (ip_cksum == 0) ? 0xFFFF : (uint16_t) ip_cksum;
294 }
295
296 #define is_multicast_ipv4_addr(ipv4_addr) \
297         (((rte_be_to_cpu_32((ipv4_addr)) >> 24) & 0x000000FF) == 0xE0)
298
299 /*
300  * Receive a burst of packets, lookup for ICMP echo requets, and, if any,
301  * send back ICMP echo replies.
302  */
303 static void
304 reply_to_icmp_echo_rqsts(struct fwd_stream *fs)
305 {
306         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
307         struct rte_mbuf *pkt;
308         struct ether_hdr *eth_h;
309         struct vlan_hdr *vlan_h;
310         struct arp_hdr  *arp_h;
311         struct ipv4_hdr *ip_h;
312         struct icmp_hdr *icmp_h;
313         struct ether_addr eth_addr;
314         uint32_t ip_addr;
315         uint16_t nb_rx;
316         uint16_t nb_tx;
317         uint16_t nb_replies;
318         uint16_t eth_type;
319         uint16_t vlan_id;
320         uint16_t arp_op;
321         uint16_t arp_pro;
322         uint32_t cksum;
323         uint8_t  i;
324         int l2_len;
325 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
326         uint64_t start_tsc;
327         uint64_t end_tsc;
328         uint64_t core_cycles;
329 #endif
330
331 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
332         start_tsc = rte_rdtsc();
333 #endif
334
335         /*
336          * First, receive a burst of packets.
337          */
338         nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst,
339                                  nb_pkt_per_burst);
340         if (unlikely(nb_rx == 0))
341                 return;
342
343 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
344         fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
345 #endif
346         fs->rx_packets += nb_rx;
347         nb_replies = 0;
348         for (i = 0; i < nb_rx; i++) {
349                 pkt = pkts_burst[i];
350                 eth_h = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
351                 eth_type = RTE_BE_TO_CPU_16(eth_h->ether_type);
352                 l2_len = sizeof(struct ether_hdr);
353                 if (verbose_level > 0) {
354                         printf("\nPort %d pkt-len=%u nb-segs=%u\n",
355                                fs->rx_port, pkt->pkt_len, pkt->nb_segs);
356                         ether_addr_dump("  ETH:  src=", &eth_h->s_addr);
357                         ether_addr_dump(" dst=", &eth_h->d_addr);
358                 }
359                 if (eth_type == ETHER_TYPE_VLAN) {
360                         vlan_h = (struct vlan_hdr *)
361                                 ((char *)eth_h + sizeof(struct ether_hdr));
362                         l2_len  += sizeof(struct vlan_hdr);
363                         eth_type = rte_be_to_cpu_16(vlan_h->eth_proto);
364                         if (verbose_level > 0) {
365                                 vlan_id = rte_be_to_cpu_16(vlan_h->vlan_tci)
366                                         & 0xFFF;
367                                 printf(" [vlan id=%u]", vlan_id);
368                         }
369                 }
370                 if (verbose_level > 0) {
371                         printf(" type=0x%04x\n", eth_type);
372                 }
373
374                 /* Reply to ARP requests */
375                 if (eth_type == ETHER_TYPE_ARP) {
376                         arp_h = (struct arp_hdr *) ((char *)eth_h + l2_len);
377                         arp_op = RTE_BE_TO_CPU_16(arp_h->arp_op);
378                         arp_pro = RTE_BE_TO_CPU_16(arp_h->arp_pro);
379                         if (verbose_level > 0) {
380                                 printf("  ARP:  hrd=%d proto=0x%04x hln=%d "
381                                        "pln=%d op=%u (%s)\n",
382                                        RTE_BE_TO_CPU_16(arp_h->arp_hrd),
383                                        arp_pro, arp_h->arp_hln,
384                                        arp_h->arp_pln, arp_op,
385                                        arp_op_name(arp_op));
386                         }
387                         if ((RTE_BE_TO_CPU_16(arp_h->arp_hrd) !=
388                              ARP_HRD_ETHER) ||
389                             (arp_pro != ETHER_TYPE_IPv4) ||
390                             (arp_h->arp_hln != 6) ||
391                             (arp_h->arp_pln != 4)
392                             ) {
393                                 rte_pktmbuf_free(pkt);
394                                 if (verbose_level > 0)
395                                         printf("\n");
396                                 continue;
397                         }
398                         if (verbose_level > 0) {
399                                 ether_addr_copy(&arp_h->arp_data.arp_sha, &eth_addr);
400                                 ether_addr_dump("        sha=", &eth_addr);
401                                 ip_addr = arp_h->arp_data.arp_sip;
402                                 ipv4_addr_dump(" sip=", ip_addr);
403                                 printf("\n");
404                                 ether_addr_copy(&arp_h->arp_data.arp_tha, &eth_addr);
405                                 ether_addr_dump("        tha=", &eth_addr);
406                                 ip_addr = arp_h->arp_data.arp_tip;
407                                 ipv4_addr_dump(" tip=", ip_addr);
408                                 printf("\n");
409                         }
410                         if (arp_op != ARP_OP_REQUEST) {
411                                 rte_pktmbuf_free(pkt);
412                                 continue;
413                         }
414
415                         /*
416                          * Build ARP reply.
417                          */
418
419                         /* Use source MAC address as destination MAC address. */
420                         ether_addr_copy(&eth_h->s_addr, &eth_h->d_addr);
421                         /* Set source MAC address with MAC address of TX port */
422                         ether_addr_copy(&ports[fs->tx_port].eth_addr,
423                                         &eth_h->s_addr);
424
425                         arp_h->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY);
426                         ether_addr_copy(&arp_h->arp_data.arp_tha, &eth_addr);
427                         ether_addr_copy(&arp_h->arp_data.arp_sha, &arp_h->arp_data.arp_tha);
428                         ether_addr_copy(&eth_h->s_addr, &arp_h->arp_data.arp_sha);
429
430                         /* Swap IP addresses in ARP payload */
431                         ip_addr = arp_h->arp_data.arp_sip;
432                         arp_h->arp_data.arp_sip = arp_h->arp_data.arp_tip;
433                         arp_h->arp_data.arp_tip = ip_addr;
434                         pkts_burst[nb_replies++] = pkt;
435                         continue;
436                 }
437
438                 if (eth_type != ETHER_TYPE_IPv4) {
439                         rte_pktmbuf_free(pkt);
440                         continue;
441                 }
442                 ip_h = (struct ipv4_hdr *) ((char *)eth_h + l2_len);
443                 if (verbose_level > 0) {
444                         ipv4_addr_dump("  IPV4: src=", ip_h->src_addr);
445                         ipv4_addr_dump(" dst=", ip_h->dst_addr);
446                         printf(" proto=%d (%s)\n",
447                                ip_h->next_proto_id,
448                                ip_proto_name(ip_h->next_proto_id));
449                 }
450
451                 /*
452                  * Check if packet is a ICMP echo request.
453                  */
454                 icmp_h = (struct icmp_hdr *) ((char *)ip_h +
455                                               sizeof(struct ipv4_hdr));
456                 if (! ((ip_h->next_proto_id == IPPROTO_ICMP) &&
457                        (icmp_h->icmp_type == IP_ICMP_ECHO_REQUEST) &&
458                        (icmp_h->icmp_code == 0))) {
459                         rte_pktmbuf_free(pkt);
460                         continue;
461                 }
462
463                 if (verbose_level > 0)
464                         printf("  ICMP: echo request seq id=%d\n",
465                                rte_be_to_cpu_16(icmp_h->icmp_seq_nb));
466
467                 /*
468                  * Prepare ICMP echo reply to be sent back.
469                  * - switch ethernet source and destinations addresses,
470                  * - use the request IP source address as the reply IP
471                  *    destination address,
472                  * - if the request IP destination address is a multicast
473                  *   address:
474                  *     - choose a reply IP source address different from the
475                  *       request IP source address,
476                  *     - re-compute the IP header checksum.
477                  *   Otherwise:
478                  *     - switch the request IP source and destination
479                  *       addresses in the reply IP header,
480                  *     - keep the IP header checksum unchanged.
481                  * - set IP_ICMP_ECHO_REPLY in ICMP header.
482                  * ICMP checksum is computed by assuming it is valid in the
483                  * echo request and not verified.
484                  */
485                 ether_addr_copy(&eth_h->s_addr, &eth_addr);
486                 ether_addr_copy(&eth_h->d_addr, &eth_h->s_addr);
487                 ether_addr_copy(&eth_addr, &eth_h->d_addr);
488                 ip_addr = ip_h->src_addr;
489                 if (is_multicast_ipv4_addr(ip_h->dst_addr)) {
490                         uint32_t ip_src;
491
492                         ip_src = rte_be_to_cpu_32(ip_addr);
493                         if ((ip_src & 0x00000003) == 1)
494                                 ip_src = (ip_src & 0xFFFFFFFC) | 0x00000002;
495                         else
496                                 ip_src = (ip_src & 0xFFFFFFFC) | 0x00000001;
497                         ip_h->src_addr = rte_cpu_to_be_32(ip_src);
498                         ip_h->dst_addr = ip_addr;
499                         ip_h->hdr_checksum = ipv4_hdr_cksum(ip_h);
500                 } else {
501                         ip_h->src_addr = ip_h->dst_addr;
502                         ip_h->dst_addr = ip_addr;
503                 }
504                 icmp_h->icmp_type = IP_ICMP_ECHO_REPLY;
505                 cksum = ~icmp_h->icmp_cksum & 0xffff;
506                 cksum += ~htons(IP_ICMP_ECHO_REQUEST << 8) & 0xffff;
507                 cksum += htons(IP_ICMP_ECHO_REPLY << 8);
508                 cksum = (cksum & 0xffff) + (cksum >> 16);
509                 cksum = (cksum & 0xffff) + (cksum >> 16);
510                 icmp_h->icmp_cksum = ~cksum;
511                 pkts_burst[nb_replies++] = pkt;
512         }
513
514         /* Send back ICMP echo replies, if any. */
515         if (nb_replies > 0) {
516                 nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
517                                          nb_replies);
518                 fs->tx_packets += nb_tx;
519 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
520                 fs->tx_burst_stats.pkt_burst_spread[nb_tx]++;
521 #endif
522                 if (unlikely(nb_tx < nb_replies)) {
523                         fs->fwd_dropped += (nb_replies - nb_tx);
524                         do {
525                                 rte_pktmbuf_free(pkts_burst[nb_tx]);
526                         } while (++nb_tx < nb_replies);
527                 }
528         }
529
530 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
531         end_tsc = rte_rdtsc();
532         core_cycles = (end_tsc - start_tsc);
533         fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles);
534 #endif
535 }
536
537 struct fwd_engine icmp_echo_engine = {
538         .fwd_mode_name  = "icmpecho",
539         .port_fwd_begin = NULL,
540         .port_fwd_end   = NULL,
541         .packet_fwd     = reply_to_icmp_echo_rqsts,
542 };