4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 #include <sys/queue.h>
44 #include <rte_common.h>
45 #include <rte_byteorder.h>
47 #include <rte_debug.h>
48 #include <rte_cycles.h>
49 #include <rte_memory.h>
50 #include <rte_memcpy.h>
51 #include <rte_memzone.h>
52 #include <rte_launch.h>
53 #include <rte_tailq.h>
55 #include <rte_per_lcore.h>
56 #include <rte_lcore.h>
57 #include <rte_atomic.h>
58 #include <rte_branch_prediction.h>
60 #include <rte_memory.h>
61 #include <rte_mempool.h>
63 #include <rte_memcpy.h>
64 #include <rte_interrupts.h>
66 #include <rte_ether.h>
67 #include <rte_ethdev.h>
72 #include <rte_prefetch.h>
73 #include <rte_string_fns.h>
78 #define IP_DEFTTL 64 /* from RFC 1340. */
79 #define IP_VERSION 0x40
80 #define IP_HDRLEN 0x05 /* default IP header length == five 32-bits words. */
81 #define IP_VHL_DEF (IP_VERSION | IP_HDRLEN)
83 static inline uint16_t
84 get_16b_sum(uint16_t *ptr16, uint32_t nr)
90 nr -= sizeof(uint16_t);
96 /* If length is in odd bytes */
98 sum += *((uint8_t*)ptr16);
100 sum = ((sum & 0xffff0000) >> 16) + (sum & 0xffff);
102 return (uint16_t)sum;
105 static inline uint16_t
106 get_ipv4_cksum(struct ipv4_hdr *ipv4_hdr)
109 cksum = get_16b_sum((uint16_t*)ipv4_hdr, sizeof(struct ipv4_hdr));
110 return (uint16_t)((cksum == 0xffff)?cksum:~cksum);
114 static inline uint16_t
115 get_ipv4_psd_sum (struct ipv4_hdr * ip_hdr)
117 /* Pseudo Header for IPv4/UDP/TCP checksum */
118 union ipv4_psd_header {
120 uint32_t src_addr; /* IP address of source host. */
121 uint32_t dst_addr; /* IP address of destination host(s). */
122 uint8_t zero; /* zero. */
123 uint8_t proto; /* L4 protocol type. */
124 uint16_t len; /* L4 length. */
125 } __attribute__((__packed__));
129 psd_hdr.src_addr = ip_hdr->src_addr;
130 psd_hdr.dst_addr = ip_hdr->dst_addr;
132 psd_hdr.proto = ip_hdr->next_proto_id;
133 psd_hdr.len = rte_cpu_to_be_16((uint16_t)(rte_be_to_cpu_16(ip_hdr->total_length)
134 - sizeof(struct ipv4_hdr)));
135 return get_16b_sum(psd_hdr.u16_arr, sizeof(psd_hdr));
138 static inline uint16_t
139 get_ipv6_psd_sum (struct ipv6_hdr * ip_hdr)
141 /* Pseudo Header for IPv6/UDP/TCP checksum */
142 union ipv6_psd_header {
144 uint8_t src_addr[16]; /* IP address of source host. */
145 uint8_t dst_addr[16]; /* IP address of destination host(s). */
146 uint32_t len; /* L4 length. */
147 uint32_t proto; /* L4 protocol - top 3 bytes must be zero */
148 } __attribute__((__packed__));
150 uint16_t u16_arr[0]; /* allow use as 16-bit values with safe aliasing */
153 rte_memcpy(&psd_hdr.src_addr, ip_hdr->src_addr,
154 sizeof(ip_hdr->src_addr) + sizeof(ip_hdr->dst_addr));
155 psd_hdr.len = ip_hdr->payload_len;
156 psd_hdr.proto = (ip_hdr->proto << 24);
158 return get_16b_sum(psd_hdr.u16_arr, sizeof(psd_hdr));
161 static inline uint16_t
162 get_ipv4_udptcp_checksum(struct ipv4_hdr *ipv4_hdr, uint16_t *l4_hdr)
167 l4_len = rte_be_to_cpu_16(ipv4_hdr->total_length) - sizeof(struct ipv4_hdr);
169 cksum = get_16b_sum(l4_hdr, l4_len);
170 cksum += get_ipv4_psd_sum(ipv4_hdr);
172 cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
173 cksum = (~cksum) & 0xffff;
176 return (uint16_t)cksum;
180 static inline uint16_t
181 get_ipv6_udptcp_checksum(struct ipv6_hdr *ipv6_hdr, uint16_t *l4_hdr)
186 l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
188 cksum = get_16b_sum(l4_hdr, l4_len);
189 cksum += get_ipv6_psd_sum(ipv6_hdr);
191 cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
192 cksum = (~cksum) & 0xffff;
196 return (uint16_t)cksum;
201 * Forwarding of packets. Change the checksum field with HW or SW methods
202 * The HW/SW method selection depends on the ol_flags on every packet
205 pkt_burst_checksum_forward(struct fwd_stream *fs)
207 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
208 struct rte_port *txp;
210 struct ether_hdr *eth_hdr;
211 struct ipv4_hdr *ipv4_hdr;
212 struct ether_hdr *inner_eth_hdr;
213 struct ipv4_hdr *inner_ipv4_hdr = NULL;
214 struct ipv6_hdr *ipv6_hdr;
215 struct ipv6_hdr *inner_ipv6_hdr = NULL;
216 struct udp_hdr *udp_hdr;
217 struct udp_hdr *inner_udp_hdr;
218 struct tcp_hdr *tcp_hdr;
219 struct tcp_hdr *inner_tcp_hdr;
220 struct sctp_hdr *sctp_hdr;
221 struct sctp_hdr *inner_sctp_hdr;
227 uint64_t pkt_ol_flags;
228 uint64_t tx_ol_flags;
230 uint16_t inner_l4_proto = 0;
234 uint8_t inner_l2_len = 0;
235 uint8_t inner_l3_len = 0;
237 uint32_t rx_bad_ip_csum;
238 uint32_t rx_bad_l4_csum;
242 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
245 uint64_t core_cycles;
248 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
249 start_tsc = rte_rdtsc();
253 * Receive a burst of packets and forward them.
255 nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst,
257 if (unlikely(nb_rx == 0))
260 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
261 fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
263 fs->rx_packets += nb_rx;
267 txp = &ports[fs->tx_port];
268 tx_ol_flags = txp->tx_ol_flags;
270 for (i = 0; i < nb_rx; i++) {
273 l2_len = sizeof(struct ether_hdr);
274 pkt_ol_flags = mb->ol_flags;
275 ol_flags = (pkt_ol_flags & (~PKT_TX_L4_MASK));
276 ipv4_tunnel = (pkt_ol_flags & PKT_RX_TUNNEL_IPV4_HDR) ?
278 ipv6_tunnel = (pkt_ol_flags & PKT_RX_TUNNEL_IPV6_HDR) ?
280 eth_hdr = rte_pktmbuf_mtod(mb, struct ether_hdr *);
281 eth_type = rte_be_to_cpu_16(eth_hdr->ether_type);
282 if (eth_type == ETHER_TYPE_VLAN) {
283 /* Only allow single VLAN label here */
284 l2_len += sizeof(struct vlan_hdr);
285 eth_type = rte_be_to_cpu_16(*(uint16_t *)
286 ((uintptr_t)ð_hdr->ether_type +
287 sizeof(struct vlan_hdr)));
290 /* Update the L3/L4 checksum error packet count */
291 rx_bad_ip_csum += (uint16_t) ((pkt_ol_flags & PKT_RX_IP_CKSUM_BAD) != 0);
292 rx_bad_l4_csum += (uint16_t) ((pkt_ol_flags & PKT_RX_L4_CKSUM_BAD) != 0);
295 * Try to figure out L3 packet type by SW.
297 if ((pkt_ol_flags & (PKT_RX_IPV4_HDR | PKT_RX_IPV4_HDR_EXT |
298 PKT_RX_IPV6_HDR | PKT_RX_IPV6_HDR_EXT)) == 0) {
299 if (eth_type == ETHER_TYPE_IPv4)
300 pkt_ol_flags |= PKT_RX_IPV4_HDR;
301 else if (eth_type == ETHER_TYPE_IPv6)
302 pkt_ol_flags |= PKT_RX_IPV6_HDR;
306 * Simplify the protocol parsing
307 * Assuming the incoming packets format as
308 * Ethernet2 + optional single VLAN
310 * + udp or tcp or sctp or others
312 if (pkt_ol_flags & (PKT_RX_IPV4_HDR | PKT_RX_TUNNEL_IPV4_HDR)) {
314 /* Do not support ipv4 option field */
315 l3_len = sizeof(struct ipv4_hdr) ;
317 ipv4_hdr = (struct ipv4_hdr *) (rte_pktmbuf_mtod(mb,
318 unsigned char *) + l2_len);
320 l4_proto = ipv4_hdr->next_proto_id;
322 /* Do not delete, this is required by HW*/
323 ipv4_hdr->hdr_checksum = 0;
325 if (tx_ol_flags & 0x1) {
327 ol_flags |= PKT_TX_IP_CKSUM;
330 ol_flags |= PKT_TX_IPV4;
331 /* SW checksum calculation */
332 ipv4_hdr->src_addr++;
333 ipv4_hdr->hdr_checksum = get_ipv4_cksum(ipv4_hdr);
336 if (l4_proto == IPPROTO_UDP) {
337 udp_hdr = (struct udp_hdr*) (rte_pktmbuf_mtod(mb,
338 unsigned char *) + l2_len + l3_len);
339 if (tx_ol_flags & 0x2) {
341 ol_flags |= PKT_TX_UDP_CKSUM;
343 udp_hdr->dgram_cksum = 0;
345 /* Pseudo header sum need be set properly */
346 udp_hdr->dgram_cksum =
347 get_ipv4_psd_sum(ipv4_hdr);
350 /* SW Implementation, clear checksum field first */
351 udp_hdr->dgram_cksum = 0;
352 udp_hdr->dgram_cksum = get_ipv4_udptcp_checksum(ipv4_hdr,
353 (uint16_t *)udp_hdr);
360 /* Check if inner L3/L4 checkum flag is set */
361 if (tx_ol_flags & 0xF0)
362 ol_flags |= PKT_TX_VXLAN_CKSUM;
364 inner_l2_len = sizeof(struct ether_hdr);
365 inner_eth_hdr = (struct ether_hdr *) (rte_pktmbuf_mtod(mb,
366 unsigned char *) + l2_len + l3_len
369 eth_type = rte_be_to_cpu_16(inner_eth_hdr->ether_type);
370 if (eth_type == ETHER_TYPE_VLAN) {
371 inner_l2_len += sizeof(struct vlan_hdr);
372 eth_type = rte_be_to_cpu_16(*(uint16_t *)
373 ((uintptr_t)ð_hdr->ether_type +
374 sizeof(struct vlan_hdr)));
377 len = l2_len + l3_len + ETHER_VXLAN_HLEN + inner_l2_len;
378 if (eth_type == ETHER_TYPE_IPv4) {
379 inner_l3_len = sizeof(struct ipv4_hdr);
380 inner_ipv4_hdr = (struct ipv4_hdr *) (rte_pktmbuf_mtod(mb,
381 unsigned char *) + len);
382 inner_l4_proto = inner_ipv4_hdr->next_proto_id;
384 if (tx_ol_flags & 0x10) {
386 /* Do not delete, this is required by HW*/
387 inner_ipv4_hdr->hdr_checksum = 0;
388 ol_flags |= PKT_TX_IPV4_CSUM;
391 } else if (eth_type == ETHER_TYPE_IPv6) {
392 inner_l3_len = sizeof(struct ipv6_hdr);
393 inner_ipv6_hdr = (struct ipv6_hdr *) (rte_pktmbuf_mtod(mb,
394 unsigned char *) + len);
395 inner_l4_proto = inner_ipv6_hdr->proto;
397 if ((inner_l4_proto == IPPROTO_UDP) && (tx_ol_flags & 0x20)) {
400 ol_flags |= PKT_TX_UDP_CKSUM;
401 inner_udp_hdr = (struct udp_hdr *) (rte_pktmbuf_mtod(mb,
402 unsigned char *) + len + inner_l3_len);
403 if (eth_type == ETHER_TYPE_IPv4)
404 inner_udp_hdr->dgram_cksum = get_ipv4_psd_sum(inner_ipv4_hdr);
405 else if (eth_type == ETHER_TYPE_IPv6)
406 inner_udp_hdr->dgram_cksum = get_ipv6_psd_sum(inner_ipv6_hdr);
408 } else if ((inner_l4_proto == IPPROTO_TCP) && (tx_ol_flags & 0x40)) {
410 ol_flags |= PKT_TX_TCP_CKSUM;
411 inner_tcp_hdr = (struct tcp_hdr *) (rte_pktmbuf_mtod(mb,
412 unsigned char *) + len + inner_l3_len);
413 if (eth_type == ETHER_TYPE_IPv4)
414 inner_tcp_hdr->cksum = get_ipv4_psd_sum(inner_ipv4_hdr);
415 else if (eth_type == ETHER_TYPE_IPv6)
416 inner_tcp_hdr->cksum = get_ipv6_psd_sum(inner_ipv6_hdr);
417 } else if ((inner_l4_proto == IPPROTO_SCTP) && (tx_ol_flags & 0x80)) {
419 ol_flags |= PKT_TX_SCTP_CKSUM;
420 inner_sctp_hdr = (struct sctp_hdr *) (rte_pktmbuf_mtod(mb,
421 unsigned char *) + len + inner_l3_len);
422 inner_sctp_hdr->cksum = 0;
427 } else if (l4_proto == IPPROTO_TCP) {
428 tcp_hdr = (struct tcp_hdr*) (rte_pktmbuf_mtod(mb,
429 unsigned char *) + l2_len + l3_len);
430 if (tx_ol_flags & 0x4) {
431 ol_flags |= PKT_TX_TCP_CKSUM;
432 tcp_hdr->cksum = get_ipv4_psd_sum(ipv4_hdr);
436 tcp_hdr->cksum = get_ipv4_udptcp_checksum(ipv4_hdr,
439 } else if (l4_proto == IPPROTO_SCTP) {
440 sctp_hdr = (struct sctp_hdr*) (rte_pktmbuf_mtod(mb,
441 unsigned char *) + l2_len + l3_len);
443 if (tx_ol_flags & 0x8) {
444 ol_flags |= PKT_TX_SCTP_CKSUM;
447 /* Sanity check, only number of 4 bytes supported */
448 if ((rte_be_to_cpu_16(ipv4_hdr->total_length) % 4) != 0)
449 printf("sctp payload must be a multiple "
450 "of 4 bytes for checksum offload");
454 /* CRC32c sample code available in RFC3309 */
457 /* End of L4 Handling*/
458 } else if (pkt_ol_flags & (PKT_RX_IPV6_HDR | PKT_RX_TUNNEL_IPV6_HDR)) {
459 ipv6_hdr = (struct ipv6_hdr *) (rte_pktmbuf_mtod(mb,
460 unsigned char *) + l2_len);
461 l3_len = sizeof(struct ipv6_hdr) ;
462 l4_proto = ipv6_hdr->proto;
463 ol_flags |= PKT_TX_IPV6;
465 if (l4_proto == IPPROTO_UDP) {
466 udp_hdr = (struct udp_hdr*) (rte_pktmbuf_mtod(mb,
467 unsigned char *) + l2_len + l3_len);
468 if (tx_ol_flags & 0x2) {
470 ol_flags |= PKT_TX_UDP_CKSUM;
472 udp_hdr->dgram_cksum = 0;
474 udp_hdr->dgram_cksum =
475 get_ipv6_psd_sum(ipv6_hdr);
478 /* SW Implementation */
479 /* checksum field need be clear first */
480 udp_hdr->dgram_cksum = 0;
481 udp_hdr->dgram_cksum = get_ipv6_udptcp_checksum(ipv6_hdr,
482 (uint16_t *)udp_hdr);
489 /* Check if inner L3/L4 checksum flag is set */
490 if (tx_ol_flags & 0xF0)
491 ol_flags |= PKT_TX_VXLAN_CKSUM;
493 inner_l2_len = sizeof(struct ether_hdr);
494 inner_eth_hdr = (struct ether_hdr *) (rte_pktmbuf_mtod(mb,
495 unsigned char *) + l2_len + l3_len + ETHER_VXLAN_HLEN);
496 eth_type = rte_be_to_cpu_16(inner_eth_hdr->ether_type);
498 if (eth_type == ETHER_TYPE_VLAN) {
499 inner_l2_len += sizeof(struct vlan_hdr);
500 eth_type = rte_be_to_cpu_16(*(uint16_t *)
501 ((uintptr_t)ð_hdr->ether_type +
502 sizeof(struct vlan_hdr)));
505 len = l2_len + l3_len + ETHER_VXLAN_HLEN + inner_l2_len;
507 if (eth_type == ETHER_TYPE_IPv4) {
508 inner_l3_len = sizeof(struct ipv4_hdr);
509 inner_ipv4_hdr = (struct ipv4_hdr *) (rte_pktmbuf_mtod(mb,
510 unsigned char *) + len);
511 inner_l4_proto = inner_ipv4_hdr->next_proto_id;
514 if (tx_ol_flags & 0x10) {
516 /* Do not delete, this is required by HW*/
517 inner_ipv4_hdr->hdr_checksum = 0;
518 ol_flags |= PKT_TX_IPV4_CSUM;
520 } else if (eth_type == ETHER_TYPE_IPv6) {
521 inner_l3_len = sizeof(struct ipv6_hdr);
522 inner_ipv6_hdr = (struct ipv6_hdr *) (rte_pktmbuf_mtod(mb,
523 unsigned char *) + len);
524 inner_l4_proto = inner_ipv6_hdr->proto;
527 if ((inner_l4_proto == IPPROTO_UDP) && (tx_ol_flags & 0x20)) {
528 inner_udp_hdr = (struct udp_hdr *) (rte_pktmbuf_mtod(mb,
529 unsigned char *) + len + inner_l3_len);
531 ol_flags |= PKT_TX_UDP_CKSUM;
532 inner_udp_hdr->dgram_cksum = 0;
533 if (eth_type == ETHER_TYPE_IPv4)
534 inner_udp_hdr->dgram_cksum = get_ipv4_psd_sum(inner_ipv4_hdr);
535 else if (eth_type == ETHER_TYPE_IPv6)
536 inner_udp_hdr->dgram_cksum = get_ipv6_psd_sum(inner_ipv6_hdr);
537 } else if ((inner_l4_proto == IPPROTO_TCP) && (tx_ol_flags & 0x40)) {
539 ol_flags |= PKT_TX_TCP_CKSUM;
540 inner_tcp_hdr = (struct tcp_hdr *) (rte_pktmbuf_mtod(mb,
541 unsigned char *) + len + inner_l3_len);
543 if (eth_type == ETHER_TYPE_IPv4)
544 inner_tcp_hdr->cksum = get_ipv4_psd_sum(inner_ipv4_hdr);
545 else if (eth_type == ETHER_TYPE_IPv6)
546 inner_tcp_hdr->cksum = get_ipv6_psd_sum(inner_ipv6_hdr);
548 } else if ((inner_l4_proto == IPPROTO_SCTP) && (tx_ol_flags & 0x80)) {
550 ol_flags |= PKT_TX_SCTP_CKSUM;
551 inner_sctp_hdr = (struct sctp_hdr *) (rte_pktmbuf_mtod(mb,
552 unsigned char *) + len + inner_l3_len);
553 inner_sctp_hdr->cksum = 0;
559 else if (l4_proto == IPPROTO_TCP) {
560 tcp_hdr = (struct tcp_hdr*) (rte_pktmbuf_mtod(mb,
561 unsigned char *) + l2_len + l3_len);
562 if (tx_ol_flags & 0x4) {
563 ol_flags |= PKT_TX_TCP_CKSUM;
564 tcp_hdr->cksum = get_ipv6_psd_sum(ipv6_hdr);
568 tcp_hdr->cksum = get_ipv6_udptcp_checksum(ipv6_hdr,
572 else if (l4_proto == IPPROTO_SCTP) {
573 sctp_hdr = (struct sctp_hdr*) (rte_pktmbuf_mtod(mb,
574 unsigned char *) + l2_len + l3_len);
576 if (tx_ol_flags & 0x8) {
577 ol_flags |= PKT_TX_SCTP_CKSUM;
579 /* Sanity check, only number of 4 bytes supported by HW */
580 if ((rte_be_to_cpu_16(ipv6_hdr->payload_len) % 4) != 0)
581 printf("sctp payload must be a multiple "
582 "of 4 bytes for checksum offload");
585 /* CRC32c sample code available in RFC3309 */
589 printf("Test flow control for 1G PMD \n");
591 /* End of L6 Handling*/
595 printf("Unhandled packet type: %#hx\n", eth_type);
598 /* Combine the packet header write. VLAN is not consider here */
601 mb->inner_l2_len = inner_l2_len;
602 mb->inner_l3_len = inner_l3_len;
603 mb->ol_flags = ol_flags;
605 nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
606 fs->tx_packets += nb_tx;
607 fs->rx_bad_ip_csum += rx_bad_ip_csum;
608 fs->rx_bad_l4_csum += rx_bad_l4_csum;
610 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
611 fs->tx_burst_stats.pkt_burst_spread[nb_tx]++;
613 if (unlikely(nb_tx < nb_rx)) {
614 fs->fwd_dropped += (nb_rx - nb_tx);
616 rte_pktmbuf_free(pkts_burst[nb_tx]);
617 } while (++nb_tx < nb_rx);
619 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
620 end_tsc = rte_rdtsc();
621 core_cycles = (end_tsc - start_tsc);
622 fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles);
627 struct fwd_engine csum_fwd_engine = {
628 .fwd_mode_name = "csum",
629 .port_fwd_begin = NULL,
630 .port_fwd_end = NULL,
631 .packet_fwd = pkt_burst_checksum_forward,