4 * Copyright(c) 2013 6WIND
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 #include <sys/queue.h>
46 #include <rte_common.h>
47 #include <rte_byteorder.h>
49 #include <rte_debug.h>
50 #include <rte_cycles.h>
51 #include <rte_per_lcore.h>
52 #include <rte_lcore.h>
53 #include <rte_atomic.h>
54 #include <rte_branch_prediction.h>
55 #include <rte_memory.h>
56 #include <rte_mempool.h>
58 #include <rte_ether.h>
59 #include <rte_ethdev.h>
63 #include <rte_string_fns.h>
68 arp_op_name(uint16_t arp_op)
75 case ARP_OP_REVREQUEST:
76 return "Reverse ARP Request";
78 return "Reverse ARP Reply";
79 case ARP_OP_INVREQUEST:
80 return "Peer Identify Request";
82 return "Peer Identify Reply";
86 return "Unkwown ARP op";
90 ip_proto_name(uint16_t ip_proto)
92 static const char * ip_proto_names[] = {
93 "IP6HOPOPTS", /**< IP6 hop-by-hop options */
94 "ICMP", /**< control message protocol */
95 "IGMP", /**< group mgmt protocol */
96 "GGP", /**< gateway^2 (deprecated) */
97 "IPv4", /**< IPv4 encapsulation */
100 "TCP", /**< transport control protocol */
101 "ST", /**< Stream protocol II */
102 "EGP", /**< exterior gateway protocol */
103 "PIGP", /**< private interior gateway */
105 "RCC_MON", /**< BBN RCC Monitoring */
106 "NVPII", /**< network voice protocol*/
108 "ARGUS", /**< Argus */
109 "EMCON", /**< EMCON */
111 "XNET", /**< Cross Net Debugger */
112 "CHAOS", /**< Chaos*/
113 "UDP", /**< user datagram protocol */
114 "MUX", /**< Multiplexing */
115 "DCN_MEAS", /**< DCN Measurement Subsystems */
117 "HMP", /**< Host Monitoring */
118 "PRM", /**< Packet Radio Measurement */
119 "XNS_IDP", /**< xns idp */
120 "TRUNK1", /**< Trunk-1 */
121 "TRUNK2", /**< Trunk-2 */
123 "LEAF1", /**< Leaf-1 */
124 "LEAF2", /**< Leaf-2 */
125 "RDP", /**< Reliable Data */
126 "IRTP", /**< Reliable Transaction */
127 "TP4", /**< tp-4 w/ class negotiation */
129 "BLT", /**< Bulk Data Transfer */
130 "NSP", /**< Network Services */
131 "INP", /**< Merit Internodal */
132 "SEP", /**< Sequential Exchange */
133 "3PC", /**< Third Party Connect */
135 "IDPR", /**< InterDomain Policy Routing */
137 "DDP", /**< Datagram Delivery */
138 "CMTP", /**< Control Message Transport */
139 "TPXX", /**< TP++ Transport */
141 "ILTP", /**< IL transport protocol */
142 "IPv6_HDR", /**< IP6 header */
143 "SDRP", /**< Source Demand Routing */
144 "IPv6_RTG", /**< IP6 routing header */
145 "IPv6_FRAG", /**< IP6 fragmentation header */
147 "IDRP", /**< InterDomain Routing*/
148 "RSVP", /**< resource reservation */
149 "GRE", /**< General Routing Encap. */
150 "MHRP", /**< Mobile Host Routing */
153 "ESP", /**< IP6 Encap Sec. Payload */
154 "AH", /**< IP6 Auth Header */
155 "INLSP", /**< Integ. Net Layer Security */
156 "SWIPE", /**< IP with encryption */
157 "NHRP", /**< Next Hop Resolution */
162 "ICMPv6", /**< ICMP6 */
163 "IPv6NONEXT", /**< IP6 no next header */
165 "Ipv6DSTOPTS",/**< IP6 destination option */
166 "AHIP", /**< any host internal protocol */
168 "HELLO", /**< "hello" routing protocol */
169 "SATEXPAK", /**< SATNET/Backroom EXPAK */
171 "KRYPTOLAN", /**< Kryptolan */
172 "RVD", /**< Remote Virtual Disk */
173 "IPPC", /**< Pluribus Packet Core */
174 "ADFS", /**< Any distributed FS */
175 "SATMON", /**< Satnet Monitoring */
177 "VISA", /**< VISA Protocol */
178 "IPCV", /**< Packet Core Utility */
179 "CPNX", /**< Comp. Prot. Net. Executive */
180 "CPHB", /**< Comp. Prot. HeartBeat */
181 "WSN", /**< Wang Span Network */
183 "PVP", /**< Packet Video Protocol */
184 "BRSATMON", /**< BackRoom SATNET Monitoring */
185 "ND", /**< Sun net disk proto (temp.) */
186 "WBMON", /**< WIDEBAND Monitoring */
187 "WBEXPAK", /**< WIDEBAND EXPAK */
189 "EON", /**< ISO cnlp */
191 "SVMTP", /**< Secure VMTP */
192 "VINES", /**< Banyon VINES */
195 "IGP", /**< NSFNET-IGP */
196 "DGP", /**< dissimilar gateway prot. */
198 "IGRP", /**< Cisco/GXS IGRP */
199 "OSPFIGP", /**< OSPFIGP */
201 "SRPC", /**< Strite RPC protocol */
202 "LARP", /**< Locus Address Resoloution */
203 "MTP", /**< Multicast Transport */
204 "AX25", /**< AX.25 Frames */
205 "4IN4", /**< IP encapsulated in IP */
207 "MICP", /**< Mobile Int.ing control */
208 "SCCSP", /**< Semaphore Comm. security */
209 "ETHERIP", /**< Ethernet IP encapsulation */
210 "ENCAP", /**< encapsulation header */
211 "AES", /**< any private encr. scheme */
214 "IPCOMP", /**< payload compression (IPComp) */
217 "PIM", /**< Protocol Independent Mcast */
220 if (ip_proto < sizeof(ip_proto_names) / sizeof(ip_proto_names[0]))
221 return ip_proto_names[ip_proto];
224 case IPPROTO_PGM: /**< PGM */
227 case IPPROTO_SCTP: /**< Stream Control Transport Protocol */
229 #ifdef IPPROTO_DIVERT
230 case IPPROTO_DIVERT: /**< divert pseudo-protocol */
233 case IPPROTO_RAW: /**< raw IP packet */
242 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf)
246 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
247 sprintf(buf, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
248 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
253 ether_addr_dump(const char *what, const struct ether_addr *ea)
255 char buf[ETHER_ADDR_FMT_SIZE];
257 ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, ea);
264 ipv4_addr_dump(const char *what, uint32_t be_ipv4_addr)
268 ipv4_addr_to_dot(be_ipv4_addr, buf);
275 ipv4_hdr_cksum(struct ipv4_hdr *ip_h)
281 * Compute the sum of successive 16-bit words of the IPv4 header,
282 * skipping the checksum field of the header.
284 v16_h = (unaligned_uint16_t *) ip_h;
285 ip_cksum = v16_h[0] + v16_h[1] + v16_h[2] + v16_h[3] +
286 v16_h[4] + v16_h[6] + v16_h[7] + v16_h[8] + v16_h[9];
288 /* reduce 32 bit checksum to 16 bits and complement it */
289 ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16);
290 ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16);
291 ip_cksum = (~ip_cksum) & 0x0000FFFF;
292 return (ip_cksum == 0) ? 0xFFFF : (uint16_t) ip_cksum;
295 #define is_multicast_ipv4_addr(ipv4_addr) \
296 (((rte_be_to_cpu_32((ipv4_addr)) >> 24) & 0x000000FF) == 0xE0)
299 * Receive a burst of packets, lookup for ICMP echo requets, and, if any,
300 * send back ICMP echo replies.
303 reply_to_icmp_echo_rqsts(struct fwd_stream *fs)
305 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
306 struct rte_mbuf *pkt;
307 struct ether_hdr *eth_h;
308 struct vlan_hdr *vlan_h;
309 struct arp_hdr *arp_h;
310 struct ipv4_hdr *ip_h;
311 struct icmp_hdr *icmp_h;
312 struct ether_addr eth_addr;
325 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
328 uint64_t core_cycles;
331 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
332 start_tsc = rte_rdtsc();
336 * First, receive a burst of packets.
338 nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst,
340 if (unlikely(nb_rx == 0))
343 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
344 fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
346 fs->rx_packets += nb_rx;
348 for (i = 0; i < nb_rx; i++) {
349 if (likely(i < nb_rx - 1))
350 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i + 1],
353 eth_h = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
354 eth_type = RTE_BE_TO_CPU_16(eth_h->ether_type);
355 l2_len = sizeof(struct ether_hdr);
356 if (verbose_level > 0) {
357 printf("\nPort %d pkt-len=%u nb-segs=%u\n",
358 fs->rx_port, pkt->pkt_len, pkt->nb_segs);
359 ether_addr_dump(" ETH: src=", ð_h->s_addr);
360 ether_addr_dump(" dst=", ð_h->d_addr);
362 if (eth_type == ETHER_TYPE_VLAN) {
363 vlan_h = (struct vlan_hdr *)
364 ((char *)eth_h + sizeof(struct ether_hdr));
365 l2_len += sizeof(struct vlan_hdr);
366 eth_type = rte_be_to_cpu_16(vlan_h->eth_proto);
367 if (verbose_level > 0) {
368 vlan_id = rte_be_to_cpu_16(vlan_h->vlan_tci)
370 printf(" [vlan id=%u]", vlan_id);
373 if (verbose_level > 0) {
374 printf(" type=0x%04x\n", eth_type);
377 /* Reply to ARP requests */
378 if (eth_type == ETHER_TYPE_ARP) {
379 arp_h = (struct arp_hdr *) ((char *)eth_h + l2_len);
380 arp_op = RTE_BE_TO_CPU_16(arp_h->arp_op);
381 arp_pro = RTE_BE_TO_CPU_16(arp_h->arp_pro);
382 if (verbose_level > 0) {
383 printf(" ARP: hrd=%d proto=0x%04x hln=%d "
384 "pln=%d op=%u (%s)\n",
385 RTE_BE_TO_CPU_16(arp_h->arp_hrd),
386 arp_pro, arp_h->arp_hln,
387 arp_h->arp_pln, arp_op,
388 arp_op_name(arp_op));
390 if ((RTE_BE_TO_CPU_16(arp_h->arp_hrd) !=
392 (arp_pro != ETHER_TYPE_IPv4) ||
393 (arp_h->arp_hln != 6) ||
394 (arp_h->arp_pln != 4)
396 rte_pktmbuf_free(pkt);
397 if (verbose_level > 0)
401 if (verbose_level > 0) {
402 ether_addr_copy(&arp_h->arp_data.arp_sha, ð_addr);
403 ether_addr_dump(" sha=", ð_addr);
404 ip_addr = arp_h->arp_data.arp_sip;
405 ipv4_addr_dump(" sip=", ip_addr);
407 ether_addr_copy(&arp_h->arp_data.arp_tha, ð_addr);
408 ether_addr_dump(" tha=", ð_addr);
409 ip_addr = arp_h->arp_data.arp_tip;
410 ipv4_addr_dump(" tip=", ip_addr);
413 if (arp_op != ARP_OP_REQUEST) {
414 rte_pktmbuf_free(pkt);
422 /* Use source MAC address as destination MAC address. */
423 ether_addr_copy(ð_h->s_addr, ð_h->d_addr);
424 /* Set source MAC address with MAC address of TX port */
425 ether_addr_copy(&ports[fs->tx_port].eth_addr,
428 arp_h->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY);
429 ether_addr_copy(&arp_h->arp_data.arp_tha, ð_addr);
430 ether_addr_copy(&arp_h->arp_data.arp_sha, &arp_h->arp_data.arp_tha);
431 ether_addr_copy(ð_h->s_addr, &arp_h->arp_data.arp_sha);
433 /* Swap IP addresses in ARP payload */
434 ip_addr = arp_h->arp_data.arp_sip;
435 arp_h->arp_data.arp_sip = arp_h->arp_data.arp_tip;
436 arp_h->arp_data.arp_tip = ip_addr;
437 pkts_burst[nb_replies++] = pkt;
441 if (eth_type != ETHER_TYPE_IPv4) {
442 rte_pktmbuf_free(pkt);
445 ip_h = (struct ipv4_hdr *) ((char *)eth_h + l2_len);
446 if (verbose_level > 0) {
447 ipv4_addr_dump(" IPV4: src=", ip_h->src_addr);
448 ipv4_addr_dump(" dst=", ip_h->dst_addr);
449 printf(" proto=%d (%s)\n",
451 ip_proto_name(ip_h->next_proto_id));
455 * Check if packet is a ICMP echo request.
457 icmp_h = (struct icmp_hdr *) ((char *)ip_h +
458 sizeof(struct ipv4_hdr));
459 if (! ((ip_h->next_proto_id == IPPROTO_ICMP) &&
460 (icmp_h->icmp_type == IP_ICMP_ECHO_REQUEST) &&
461 (icmp_h->icmp_code == 0))) {
462 rte_pktmbuf_free(pkt);
466 if (verbose_level > 0)
467 printf(" ICMP: echo request seq id=%d\n",
468 rte_be_to_cpu_16(icmp_h->icmp_seq_nb));
471 * Prepare ICMP echo reply to be sent back.
472 * - switch ethernet source and destinations addresses,
473 * - use the request IP source address as the reply IP
474 * destination address,
475 * - if the request IP destination address is a multicast
477 * - choose a reply IP source address different from the
478 * request IP source address,
479 * - re-compute the IP header checksum.
481 * - switch the request IP source and destination
482 * addresses in the reply IP header,
483 * - keep the IP header checksum unchanged.
484 * - set IP_ICMP_ECHO_REPLY in ICMP header.
485 * ICMP checksum is computed by assuming it is valid in the
486 * echo request and not verified.
488 ether_addr_copy(ð_h->s_addr, ð_addr);
489 ether_addr_copy(ð_h->d_addr, ð_h->s_addr);
490 ether_addr_copy(ð_addr, ð_h->d_addr);
491 ip_addr = ip_h->src_addr;
492 if (is_multicast_ipv4_addr(ip_h->dst_addr)) {
495 ip_src = rte_be_to_cpu_32(ip_addr);
496 if ((ip_src & 0x00000003) == 1)
497 ip_src = (ip_src & 0xFFFFFFFC) | 0x00000002;
499 ip_src = (ip_src & 0xFFFFFFFC) | 0x00000001;
500 ip_h->src_addr = rte_cpu_to_be_32(ip_src);
501 ip_h->dst_addr = ip_addr;
502 ip_h->hdr_checksum = ipv4_hdr_cksum(ip_h);
504 ip_h->src_addr = ip_h->dst_addr;
505 ip_h->dst_addr = ip_addr;
507 icmp_h->icmp_type = IP_ICMP_ECHO_REPLY;
508 cksum = ~icmp_h->icmp_cksum & 0xffff;
509 cksum += ~htons(IP_ICMP_ECHO_REQUEST << 8) & 0xffff;
510 cksum += htons(IP_ICMP_ECHO_REPLY << 8);
511 cksum = (cksum & 0xffff) + (cksum >> 16);
512 cksum = (cksum & 0xffff) + (cksum >> 16);
513 icmp_h->icmp_cksum = ~cksum;
514 pkts_burst[nb_replies++] = pkt;
517 /* Send back ICMP echo replies, if any. */
518 if (nb_replies > 0) {
519 nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
524 if (unlikely(nb_tx < nb_replies) && fs->retry_enabled) {
526 while (nb_tx < nb_replies &&
527 retry++ < burst_tx_retry_num) {
528 rte_delay_us(burst_tx_delay_time);
529 nb_tx += rte_eth_tx_burst(fs->tx_port,
535 fs->tx_packets += nb_tx;
536 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
537 fs->tx_burst_stats.pkt_burst_spread[nb_tx]++;
539 if (unlikely(nb_tx < nb_replies)) {
540 fs->fwd_dropped += (nb_replies - nb_tx);
542 rte_pktmbuf_free(pkts_burst[nb_tx]);
543 } while (++nb_tx < nb_replies);
547 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
548 end_tsc = rte_rdtsc();
549 core_cycles = (end_tsc - start_tsc);
550 fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles);
554 struct fwd_engine icmp_echo_engine = {
555 .fwd_mode_name = "icmpecho",
556 .port_fwd_begin = NULL,
557 .port_fwd_end = NULL,
558 .packet_fwd = reply_to_icmp_echo_rqsts,