1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2013 6WIND S.A.
13 #include <sys/queue.h>
16 #include <rte_common.h>
17 #include <rte_byteorder.h>
19 #include <rte_debug.h>
20 #include <rte_cycles.h>
21 #include <rte_per_lcore.h>
22 #include <rte_lcore.h>
23 #include <rte_atomic.h>
24 #include <rte_branch_prediction.h>
25 #include <rte_memory.h>
26 #include <rte_mempool.h>
28 #include <rte_ether.h>
29 #include <rte_ethdev.h>
33 #include <rte_string_fns.h>
39 arp_op_name(uint16_t arp_op)
42 case RTE_ARP_OP_REQUEST:
44 case RTE_ARP_OP_REPLY:
46 case RTE_ARP_OP_REVREQUEST:
47 return "Reverse ARP Request";
48 case RTE_ARP_OP_REVREPLY:
49 return "Reverse ARP Reply";
50 case RTE_ARP_OP_INVREQUEST:
51 return "Peer Identify Request";
52 case RTE_ARP_OP_INVREPLY:
53 return "Peer Identify Reply";
57 return "Unkwown ARP op";
61 ip_proto_name(uint16_t ip_proto)
63 static const char * ip_proto_names[] = {
64 "IP6HOPOPTS", /**< IP6 hop-by-hop options */
65 "ICMP", /**< control message protocol */
66 "IGMP", /**< group mgmt protocol */
67 "GGP", /**< gateway^2 (deprecated) */
68 "IPv4", /**< IPv4 encapsulation */
71 "TCP", /**< transport control protocol */
72 "ST", /**< Stream protocol II */
73 "EGP", /**< exterior gateway protocol */
74 "PIGP", /**< private interior gateway */
76 "RCC_MON", /**< BBN RCC Monitoring */
77 "NVPII", /**< network voice protocol*/
79 "ARGUS", /**< Argus */
80 "EMCON", /**< EMCON */
82 "XNET", /**< Cross Net Debugger */
84 "UDP", /**< user datagram protocol */
85 "MUX", /**< Multiplexing */
86 "DCN_MEAS", /**< DCN Measurement Subsystems */
88 "HMP", /**< Host Monitoring */
89 "PRM", /**< Packet Radio Measurement */
90 "XNS_IDP", /**< xns idp */
91 "TRUNK1", /**< Trunk-1 */
92 "TRUNK2", /**< Trunk-2 */
94 "LEAF1", /**< Leaf-1 */
95 "LEAF2", /**< Leaf-2 */
96 "RDP", /**< Reliable Data */
97 "IRTP", /**< Reliable Transaction */
98 "TP4", /**< tp-4 w/ class negotiation */
100 "BLT", /**< Bulk Data Transfer */
101 "NSP", /**< Network Services */
102 "INP", /**< Merit Internodal */
103 "SEP", /**< Sequential Exchange */
104 "3PC", /**< Third Party Connect */
106 "IDPR", /**< InterDomain Policy Routing */
108 "DDP", /**< Datagram Delivery */
109 "CMTP", /**< Control Message Transport */
110 "TPXX", /**< TP++ Transport */
112 "ILTP", /**< IL transport protocol */
113 "IPv6_HDR", /**< IP6 header */
114 "SDRP", /**< Source Demand Routing */
115 "IPv6_RTG", /**< IP6 routing header */
116 "IPv6_FRAG", /**< IP6 fragmentation header */
118 "IDRP", /**< InterDomain Routing*/
119 "RSVP", /**< resource reservation */
120 "GRE", /**< General Routing Encap. */
121 "MHRP", /**< Mobile Host Routing */
124 "ESP", /**< IP6 Encap Sec. Payload */
125 "AH", /**< IP6 Auth Header */
126 "INLSP", /**< Integ. Net Layer Security */
127 "SWIPE", /**< IP with encryption */
128 "NHRP", /**< Next Hop Resolution */
133 "ICMPv6", /**< ICMP6 */
134 "IPv6NONEXT", /**< IP6 no next header */
136 "Ipv6DSTOPTS",/**< IP6 destination option */
137 "AHIP", /**< any host internal protocol */
139 "HELLO", /**< "hello" routing protocol */
140 "SATEXPAK", /**< SATNET/Backroom EXPAK */
142 "KRYPTOLAN", /**< Kryptolan */
143 "RVD", /**< Remote Virtual Disk */
144 "IPPC", /**< Pluribus Packet Core */
145 "ADFS", /**< Any distributed FS */
146 "SATMON", /**< Satnet Monitoring */
148 "VISA", /**< VISA Protocol */
149 "IPCV", /**< Packet Core Utility */
150 "CPNX", /**< Comp. Prot. Net. Executive */
151 "CPHB", /**< Comp. Prot. HeartBeat */
152 "WSN", /**< Wang Span Network */
154 "PVP", /**< Packet Video Protocol */
155 "BRSATMON", /**< BackRoom SATNET Monitoring */
156 "ND", /**< Sun net disk proto (temp.) */
157 "WBMON", /**< WIDEBAND Monitoring */
158 "WBEXPAK", /**< WIDEBAND EXPAK */
160 "EON", /**< ISO cnlp */
162 "SVMTP", /**< Secure VMTP */
163 "VINES", /**< Banyon VINES */
166 "IGP", /**< NSFNET-IGP */
167 "DGP", /**< dissimilar gateway prot. */
169 "IGRP", /**< Cisco/GXS IGRP */
170 "OSPFIGP", /**< OSPFIGP */
172 "SRPC", /**< Strite RPC protocol */
173 "LARP", /**< Locus Address Resolution */
174 "MTP", /**< Multicast Transport */
175 "AX25", /**< AX.25 Frames */
176 "4IN4", /**< IP encapsulated in IP */
178 "MICP", /**< Mobile Int.ing control */
179 "SCCSP", /**< Semaphore Comm. security */
180 "ETHERIP", /**< Ethernet IP encapsulation */
181 "ENCAP", /**< encapsulation header */
182 "AES", /**< any private encr. scheme */
185 "IPCOMP", /**< payload compression (IPComp) */
188 "PIM", /**< Protocol Independent Mcast */
191 if (ip_proto < RTE_DIM(ip_proto_names))
192 return ip_proto_names[ip_proto];
195 case IPPROTO_PGM: /**< PGM */
198 case IPPROTO_SCTP: /**< Stream Control Transport Protocol */
200 #ifdef IPPROTO_DIVERT
201 case IPPROTO_DIVERT: /**< divert pseudo-protocol */
204 case IPPROTO_RAW: /**< raw IP packet */
213 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf)
217 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
218 sprintf(buf, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
219 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
224 ether_addr_dump(const char *what, const struct rte_ether_addr *ea)
226 char buf[RTE_ETHER_ADDR_FMT_SIZE];
228 rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, ea);
235 ipv4_addr_dump(const char *what, uint32_t be_ipv4_addr)
239 ipv4_addr_to_dot(be_ipv4_addr, buf);
246 ipv4_hdr_cksum(struct rte_ipv4_hdr *ip_h)
252 * Compute the sum of successive 16-bit words of the IPv4 header,
253 * skipping the checksum field of the header.
255 v16_h = (unaligned_uint16_t *) ip_h;
256 ip_cksum = v16_h[0] + v16_h[1] + v16_h[2] + v16_h[3] +
257 v16_h[4] + v16_h[6] + v16_h[7] + v16_h[8] + v16_h[9];
259 /* reduce 32 bit checksum to 16 bits and complement it */
260 ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16);
261 ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16);
262 ip_cksum = (~ip_cksum) & 0x0000FFFF;
263 return (ip_cksum == 0) ? 0xFFFF : (uint16_t) ip_cksum;
266 #define is_multicast_ipv4_addr(ipv4_addr) \
267 (((rte_be_to_cpu_32((ipv4_addr)) >> 24) & 0x000000FF) == 0xE0)
270 * Receive a burst of packets, lookup for ICMP echo requests, and, if any,
271 * send back ICMP echo replies.
274 reply_to_icmp_echo_rqsts(struct fwd_stream *fs)
276 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
277 struct rte_mbuf *pkt;
278 struct rte_ether_hdr *eth_h;
279 struct rte_vlan_hdr *vlan_h;
280 struct rte_arp_hdr *arp_h;
281 struct rte_ipv4_hdr *ip_h;
282 struct rte_icmp_hdr *icmp_h;
283 struct rte_ether_addr eth_addr;
296 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
299 uint64_t core_cycles;
302 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
303 start_tsc = rte_rdtsc();
307 * First, receive a burst of packets.
309 nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst,
311 if (unlikely(nb_rx == 0))
314 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
315 fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
317 fs->rx_packets += nb_rx;
319 for (i = 0; i < nb_rx; i++) {
320 if (likely(i < nb_rx - 1))
321 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i + 1],
324 eth_h = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
325 eth_type = RTE_BE_TO_CPU_16(eth_h->ether_type);
326 l2_len = sizeof(struct rte_ether_hdr);
327 if (verbose_level > 0) {
328 printf("\nPort %d pkt-len=%u nb-segs=%u\n",
329 fs->rx_port, pkt->pkt_len, pkt->nb_segs);
330 ether_addr_dump(" ETH: src=", ð_h->s_addr);
331 ether_addr_dump(" dst=", ð_h->d_addr);
333 if (eth_type == RTE_ETHER_TYPE_VLAN) {
334 vlan_h = (struct rte_vlan_hdr *)
335 ((char *)eth_h + sizeof(struct rte_ether_hdr));
336 l2_len += sizeof(struct rte_vlan_hdr);
337 eth_type = rte_be_to_cpu_16(vlan_h->eth_proto);
338 if (verbose_level > 0) {
339 vlan_id = rte_be_to_cpu_16(vlan_h->vlan_tci)
341 printf(" [vlan id=%u]", vlan_id);
344 if (verbose_level > 0) {
345 printf(" type=0x%04x\n", eth_type);
348 /* Reply to ARP requests */
349 if (eth_type == RTE_ETHER_TYPE_ARP) {
350 arp_h = (struct rte_arp_hdr *) ((char *)eth_h + l2_len);
351 arp_op = RTE_BE_TO_CPU_16(arp_h->arp_opcode);
352 arp_pro = RTE_BE_TO_CPU_16(arp_h->arp_protocol);
353 if (verbose_level > 0) {
354 printf(" ARP: hrd=%d proto=0x%04x hln=%d "
355 "pln=%d op=%u (%s)\n",
356 RTE_BE_TO_CPU_16(arp_h->arp_hardware),
357 arp_pro, arp_h->arp_hlen,
358 arp_h->arp_plen, arp_op,
359 arp_op_name(arp_op));
361 if ((RTE_BE_TO_CPU_16(arp_h->arp_hardware) !=
362 RTE_ARP_HRD_ETHER) ||
363 (arp_pro != RTE_ETHER_TYPE_IPV4) ||
364 (arp_h->arp_hlen != 6) ||
365 (arp_h->arp_plen != 4)
367 rte_pktmbuf_free(pkt);
368 if (verbose_level > 0)
372 if (verbose_level > 0) {
373 rte_ether_addr_copy(&arp_h->arp_data.arp_sha,
375 ether_addr_dump(" sha=", ð_addr);
376 ip_addr = arp_h->arp_data.arp_sip;
377 ipv4_addr_dump(" sip=", ip_addr);
379 rte_ether_addr_copy(&arp_h->arp_data.arp_tha,
381 ether_addr_dump(" tha=", ð_addr);
382 ip_addr = arp_h->arp_data.arp_tip;
383 ipv4_addr_dump(" tip=", ip_addr);
386 if (arp_op != RTE_ARP_OP_REQUEST) {
387 rte_pktmbuf_free(pkt);
395 /* Use source MAC address as destination MAC address. */
396 rte_ether_addr_copy(ð_h->s_addr, ð_h->d_addr);
397 /* Set source MAC address with MAC address of TX port */
398 rte_ether_addr_copy(&ports[fs->tx_port].eth_addr,
401 arp_h->arp_opcode = rte_cpu_to_be_16(RTE_ARP_OP_REPLY);
402 rte_ether_addr_copy(&arp_h->arp_data.arp_tha,
404 rte_ether_addr_copy(&arp_h->arp_data.arp_sha,
405 &arp_h->arp_data.arp_tha);
406 rte_ether_addr_copy(ð_h->s_addr,
407 &arp_h->arp_data.arp_sha);
409 /* Swap IP addresses in ARP payload */
410 ip_addr = arp_h->arp_data.arp_sip;
411 arp_h->arp_data.arp_sip = arp_h->arp_data.arp_tip;
412 arp_h->arp_data.arp_tip = ip_addr;
413 pkts_burst[nb_replies++] = pkt;
417 if (eth_type != RTE_ETHER_TYPE_IPV4) {
418 rte_pktmbuf_free(pkt);
421 ip_h = (struct rte_ipv4_hdr *) ((char *)eth_h + l2_len);
422 if (verbose_level > 0) {
423 ipv4_addr_dump(" IPV4: src=", ip_h->src_addr);
424 ipv4_addr_dump(" dst=", ip_h->dst_addr);
425 printf(" proto=%d (%s)\n",
427 ip_proto_name(ip_h->next_proto_id));
431 * Check if packet is a ICMP echo request.
433 icmp_h = (struct rte_icmp_hdr *) ((char *)ip_h +
434 sizeof(struct rte_ipv4_hdr));
435 if (! ((ip_h->next_proto_id == IPPROTO_ICMP) &&
436 (icmp_h->icmp_type == RTE_IP_ICMP_ECHO_REQUEST) &&
437 (icmp_h->icmp_code == 0))) {
438 rte_pktmbuf_free(pkt);
442 if (verbose_level > 0)
443 printf(" ICMP: echo request seq id=%d\n",
444 rte_be_to_cpu_16(icmp_h->icmp_seq_nb));
447 * Prepare ICMP echo reply to be sent back.
448 * - switch ethernet source and destinations addresses,
449 * - use the request IP source address as the reply IP
450 * destination address,
451 * - if the request IP destination address is a multicast
453 * - choose a reply IP source address different from the
454 * request IP source address,
455 * - re-compute the IP header checksum.
457 * - switch the request IP source and destination
458 * addresses in the reply IP header,
459 * - keep the IP header checksum unchanged.
460 * - set RTE_IP_ICMP_ECHO_REPLY in ICMP header.
461 * ICMP checksum is computed by assuming it is valid in the
462 * echo request and not verified.
464 rte_ether_addr_copy(ð_h->s_addr, ð_addr);
465 rte_ether_addr_copy(ð_h->d_addr, ð_h->s_addr);
466 rte_ether_addr_copy(ð_addr, ð_h->d_addr);
467 ip_addr = ip_h->src_addr;
468 if (is_multicast_ipv4_addr(ip_h->dst_addr)) {
471 ip_src = rte_be_to_cpu_32(ip_addr);
472 if ((ip_src & 0x00000003) == 1)
473 ip_src = (ip_src & 0xFFFFFFFC) | 0x00000002;
475 ip_src = (ip_src & 0xFFFFFFFC) | 0x00000001;
476 ip_h->src_addr = rte_cpu_to_be_32(ip_src);
477 ip_h->dst_addr = ip_addr;
478 ip_h->hdr_checksum = ipv4_hdr_cksum(ip_h);
480 ip_h->src_addr = ip_h->dst_addr;
481 ip_h->dst_addr = ip_addr;
483 icmp_h->icmp_type = RTE_IP_ICMP_ECHO_REPLY;
484 cksum = ~icmp_h->icmp_cksum & 0xffff;
485 cksum += ~htons(RTE_IP_ICMP_ECHO_REQUEST << 8) & 0xffff;
486 cksum += htons(RTE_IP_ICMP_ECHO_REPLY << 8);
487 cksum = (cksum & 0xffff) + (cksum >> 16);
488 cksum = (cksum & 0xffff) + (cksum >> 16);
489 icmp_h->icmp_cksum = ~cksum;
490 pkts_burst[nb_replies++] = pkt;
493 /* Send back ICMP echo replies, if any. */
494 if (nb_replies > 0) {
495 nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
500 if (unlikely(nb_tx < nb_replies) && fs->retry_enabled) {
502 while (nb_tx < nb_replies &&
503 retry++ < burst_tx_retry_num) {
504 rte_delay_us(burst_tx_delay_time);
505 nb_tx += rte_eth_tx_burst(fs->tx_port,
511 fs->tx_packets += nb_tx;
512 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
513 fs->tx_burst_stats.pkt_burst_spread[nb_tx]++;
515 if (unlikely(nb_tx < nb_replies)) {
516 fs->fwd_dropped += (nb_replies - nb_tx);
518 rte_pktmbuf_free(pkts_burst[nb_tx]);
519 } while (++nb_tx < nb_replies);
523 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
524 end_tsc = rte_rdtsc();
525 core_cycles = (end_tsc - start_tsc);
526 fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles);
530 struct fwd_engine icmp_echo_engine = {
531 .fwd_mode_name = "icmpecho",
532 .port_fwd_begin = NULL,
533 .port_fwd_end = NULL,
534 .packet_fwd = reply_to_icmp_echo_rqsts,