4 * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 #include <sys/queue.h>
45 #include <rte_common.h>
46 #include <rte_byteorder.h>
48 #include <rte_debug.h>
49 #include <rte_cycles.h>
50 #include <rte_memory.h>
51 #include <rte_memcpy.h>
52 #include <rte_memzone.h>
53 #include <rte_launch.h>
54 #include <rte_tailq.h>
56 #include <rte_per_lcore.h>
57 #include <rte_lcore.h>
58 #include <rte_atomic.h>
59 #include <rte_branch_prediction.h>
61 #include <rte_memory.h>
62 #include <rte_mempool.h>
64 #include <rte_memcpy.h>
65 #include <rte_interrupts.h>
67 #include <rte_ether.h>
68 #include <rte_ethdev.h>
73 #include <rte_prefetch.h>
74 #include <rte_string_fns.h>
79 #define IP_DEFTTL 64 /* from RFC 1340. */
80 #define IP_VERSION 0x40
81 #define IP_HDRLEN 0x05 /* default IP header length == five 32-bits words. */
82 #define IP_VHL_DEF (IP_VERSION | IP_HDRLEN)
84 /* Pseudo Header for IPv4/UDP/TCP checksum */
86 uint32_t src_addr; /* IP address of source host. */
87 uint32_t dst_addr; /* IP address of destination host(s). */
88 uint8_t zero; /* zero. */
89 uint8_t proto; /* L4 protocol type. */
90 uint16_t len; /* L4 length. */
91 } __attribute__((__packed__));
94 /* Pseudo Header for IPv6/UDP/TCP checksum */
95 struct ipv6_psd_header {
96 uint8_t src_addr[16]; /* IP address of source host. */
97 uint8_t dst_addr[16]; /* IP address of destination host(s). */
98 uint32_t len; /* L4 length. */
99 uint8_t zero[3]; /* zero. */
100 uint8_t proto; /* L4 protocol. */
101 } __attribute__((__packed__));
104 static inline uint16_t
105 get_16b_sum(uint16_t *ptr16, uint32_t nr)
111 nr -= sizeof(uint16_t);
113 if (sum > UINT16_MAX)
117 /* If length is in odd bytes */
119 sum += *((uint8_t*)ptr16);
121 sum = ((sum & 0xffff0000) >> 16) + (sum & 0xffff);
123 return (uint16_t)sum;
126 static inline uint16_t
127 get_ipv4_cksum(struct ipv4_hdr *ipv4_hdr)
130 cksum = get_16b_sum((uint16_t*)ipv4_hdr, sizeof(struct ipv4_hdr));
131 return (uint16_t)((cksum == 0xffff)?cksum:~cksum);
135 static inline uint16_t
136 get_ipv4_psd_sum (struct ipv4_hdr * ip_hdr)
138 struct psd_header psd_hdr;
139 psd_hdr.src_addr = ip_hdr->src_addr;
140 psd_hdr.dst_addr = ip_hdr->dst_addr;
142 psd_hdr.proto = ip_hdr->next_proto_id;
143 psd_hdr.len = rte_cpu_to_be_16((uint16_t)(rte_be_to_cpu_16(ip_hdr->total_length)
144 - sizeof(struct ipv4_hdr)));
145 return get_16b_sum((uint16_t*)&psd_hdr, sizeof(struct psd_header));
148 static inline uint16_t
149 get_ipv6_psd_sum (struct ipv6_hdr * ip_hdr)
151 struct ipv6_psd_header psd_hdr;
152 rte_memcpy(&psd_hdr.src_addr, ip_hdr->src_addr, sizeof(ip_hdr->src_addr)
153 + sizeof(ip_hdr->dst_addr));
158 psd_hdr.proto = ip_hdr->proto;
159 psd_hdr.len = ip_hdr->payload_len;
161 return get_16b_sum((uint16_t*)&psd_hdr, sizeof(struct ipv6_psd_header));
164 static inline uint16_t
165 get_ipv4_udptcp_checksum(struct ipv4_hdr *ipv4_hdr, uint16_t *l4_hdr)
170 l4_len = rte_be_to_cpu_16(ipv4_hdr->total_length) - sizeof(struct ipv4_hdr);
172 cksum = get_16b_sum(l4_hdr, l4_len);
173 cksum += get_ipv4_psd_sum(ipv4_hdr);
175 cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
176 cksum = (~cksum) & 0xffff;
179 return (uint16_t)cksum;
183 static inline uint16_t
184 get_ipv6_udptcp_checksum(struct ipv6_hdr *ipv6_hdr, uint16_t *l4_hdr)
189 l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
191 cksum = get_16b_sum(l4_hdr, l4_len);
192 cksum += get_ipv6_psd_sum(ipv6_hdr);
194 cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
195 cksum = (~cksum) & 0xffff;
199 return (uint16_t)cksum;
204 * Forwarding of packets. Change the checksum field with HW or SW methods
205 * The HW/SW method selection depends on the ol_flags on every packet
208 pkt_burst_checksum_forward(struct fwd_stream *fs)
210 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
211 struct rte_port *txp;
213 struct ether_hdr *eth_hdr;
214 struct ipv4_hdr *ipv4_hdr;
215 struct ipv6_hdr *ipv6_hdr;
216 struct udp_hdr *udp_hdr;
217 struct tcp_hdr *tcp_hdr;
218 struct sctp_hdr *sctp_hdr;
224 uint16_t pkt_ol_flags;
225 uint16_t tx_ol_flags;
231 uint32_t rx_bad_ip_csum;
232 uint32_t rx_bad_l4_csum;
234 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
237 uint64_t core_cycles;
240 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
241 start_tsc = rte_rdtsc();
245 * Receive a burst of packets and forward them.
247 nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst,
249 if (unlikely(nb_rx == 0))
252 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
253 fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
255 fs->rx_packets += nb_rx;
259 txp = &ports[fs->tx_port];
260 tx_ol_flags = txp->tx_ol_flags;
262 for (i = 0; i < nb_rx; i++) {
265 l2_len = sizeof(struct ether_hdr);
266 pkt_ol_flags = mb->ol_flags;
267 ol_flags = (uint16_t) (pkt_ol_flags & (~PKT_TX_L4_MASK));
269 eth_hdr = (struct ether_hdr *) mb->pkt.data;
270 eth_type = rte_be_to_cpu_16(eth_hdr->ether_type);
271 if (eth_type == ETHER_TYPE_VLAN) {
272 /* Only allow single VLAN label here */
273 l2_len += sizeof(struct vlan_hdr);
274 eth_type = rte_be_to_cpu_16(*(uint16_t *)
275 ((uintptr_t)ð_hdr->ether_type +
276 sizeof(struct vlan_hdr)));
279 /* Update the L3/L4 checksum error packet count */
280 rx_bad_ip_csum += (uint16_t) ((pkt_ol_flags & PKT_RX_IP_CKSUM_BAD) != 0);
281 rx_bad_l4_csum += (uint16_t) ((pkt_ol_flags & PKT_RX_L4_CKSUM_BAD) != 0);
284 * Try to figure out L3 packet type by SW.
286 if ((pkt_ol_flags & (PKT_RX_IPV4_HDR | PKT_RX_IPV4_HDR_EXT |
287 PKT_RX_IPV6_HDR | PKT_RX_IPV6_HDR_EXT)) == 0) {
288 if (eth_type == ETHER_TYPE_IPv4)
289 pkt_ol_flags |= PKT_RX_IPV4_HDR;
290 else if (eth_type == ETHER_TYPE_IPv6)
291 pkt_ol_flags |= PKT_RX_IPV6_HDR;
295 * Simplify the protocol parsing
296 * Assuming the incoming packets format as
297 * Ethernet2 + optional single VLAN
299 * + udp or tcp or sctp or others
301 if (pkt_ol_flags & PKT_RX_IPV4_HDR) {
303 /* Do not support ipv4 option field */
304 l3_len = sizeof(struct ipv4_hdr) ;
306 ipv4_hdr = (struct ipv4_hdr *) (rte_pktmbuf_mtod(mb,
307 unsigned char *) + l2_len);
309 l4_proto = ipv4_hdr->next_proto_id;
311 /* Do not delete, this is required by HW*/
312 ipv4_hdr->hdr_checksum = 0;
314 if (tx_ol_flags & 0x1) {
316 ol_flags |= PKT_TX_IP_CKSUM;
319 /* SW checksum calculation */
320 ipv4_hdr->src_addr++;
321 ipv4_hdr->hdr_checksum = get_ipv4_cksum(ipv4_hdr);
324 if (l4_proto == IPPROTO_UDP) {
325 udp_hdr = (struct udp_hdr*) (rte_pktmbuf_mtod(mb,
326 unsigned char *) + l2_len + l3_len);
327 if (tx_ol_flags & 0x2) {
329 ol_flags |= PKT_TX_UDP_CKSUM;
330 /* Pseudo header sum need be set properly */
331 udp_hdr->dgram_cksum = get_ipv4_psd_sum(ipv4_hdr);
334 /* SW Implementation, clear checksum field first */
335 udp_hdr->dgram_cksum = 0;
336 udp_hdr->dgram_cksum = get_ipv4_udptcp_checksum(ipv4_hdr,
340 else if (l4_proto == IPPROTO_TCP){
341 tcp_hdr = (struct tcp_hdr*) (rte_pktmbuf_mtod(mb,
342 unsigned char *) + l2_len + l3_len);
343 if (tx_ol_flags & 0x4) {
344 ol_flags |= PKT_TX_TCP_CKSUM;
345 tcp_hdr->cksum = get_ipv4_psd_sum(ipv4_hdr);
349 tcp_hdr->cksum = get_ipv4_udptcp_checksum(ipv4_hdr,
353 else if (l4_proto == IPPROTO_SCTP) {
354 sctp_hdr = (struct sctp_hdr*) (rte_pktmbuf_mtod(mb,
355 unsigned char *) + l2_len + l3_len);
357 if (tx_ol_flags & 0x8) {
358 ol_flags |= PKT_TX_SCTP_CKSUM;
361 /* Sanity check, only number of 4 bytes supported */
362 if ((rte_be_to_cpu_16(ipv4_hdr->total_length) % 4) != 0)
363 printf("sctp payload must be a multiple "
364 "of 4 bytes for checksum offload");
368 /* CRC32c sample code available in RFC3309 */
371 /* End of L4 Handling*/
373 else if (pkt_ol_flags & PKT_RX_IPV6_HDR) {
375 ipv6_hdr = (struct ipv6_hdr *) (rte_pktmbuf_mtod(mb,
376 unsigned char *) + l2_len);
377 l3_len = sizeof(struct ipv6_hdr) ;
378 l4_proto = ipv6_hdr->proto;
380 if (l4_proto == IPPROTO_UDP) {
381 udp_hdr = (struct udp_hdr*) (rte_pktmbuf_mtod(mb,
382 unsigned char *) + l2_len + l3_len);
383 if (tx_ol_flags & 0x2) {
385 ol_flags |= PKT_TX_UDP_CKSUM;
386 udp_hdr->dgram_cksum = get_ipv6_psd_sum(ipv6_hdr);
389 /* SW Implementation */
390 /* checksum field need be clear first */
391 udp_hdr->dgram_cksum = 0;
392 udp_hdr->dgram_cksum = get_ipv6_udptcp_checksum(ipv6_hdr,
396 else if (l4_proto == IPPROTO_TCP) {
397 tcp_hdr = (struct tcp_hdr*) (rte_pktmbuf_mtod(mb,
398 unsigned char *) + l2_len + l3_len);
399 if (tx_ol_flags & 0x4) {
400 ol_flags |= PKT_TX_TCP_CKSUM;
401 tcp_hdr->cksum = get_ipv6_psd_sum(ipv6_hdr);
405 tcp_hdr->cksum = get_ipv6_udptcp_checksum(ipv6_hdr,
409 else if (l4_proto == IPPROTO_SCTP) {
410 sctp_hdr = (struct sctp_hdr*) (rte_pktmbuf_mtod(mb,
411 unsigned char *) + l2_len + l3_len);
413 if (tx_ol_flags & 0x8) {
414 ol_flags |= PKT_TX_SCTP_CKSUM;
416 /* Sanity check, only number of 4 bytes supported by HW */
417 if ((rte_be_to_cpu_16(ipv6_hdr->payload_len) % 4) != 0)
418 printf("sctp payload must be a multiple "
419 "of 4 bytes for checksum offload");
422 /* CRC32c sample code available in RFC3309 */
426 printf("Test flow control for 1G PMD \n");
428 /* End of L6 Handling*/
432 printf("Unhandled packet type: %#hx\n", eth_type);
435 /* Combine the packet header write. VLAN is not consider here */
436 mb->pkt.vlan_macip.f.l2_len = l2_len;
437 mb->pkt.vlan_macip.f.l3_len = l3_len;
438 mb->ol_flags = ol_flags;
440 nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
441 fs->tx_packets += nb_tx;
442 fs->rx_bad_ip_csum += rx_bad_ip_csum;
443 fs->rx_bad_l4_csum += rx_bad_l4_csum;
445 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
446 fs->tx_burst_stats.pkt_burst_spread[nb_tx]++;
448 if (unlikely(nb_tx < nb_rx)) {
449 fs->fwd_dropped += (nb_rx - nb_tx);
451 rte_pktmbuf_free(pkts_burst[nb_tx]);
452 } while (++nb_tx < nb_rx);
454 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
455 end_tsc = rte_rdtsc();
456 core_cycles = (end_tsc - start_tsc);
457 fs->core_cycles = (uint64_t) (fs->core_cycles + core_cycles);
462 struct fwd_engine csum_fwd_engine = {
463 .fwd_mode_name = "csum",
464 .port_fwd_begin = NULL,
465 .port_fwd_end = NULL,
466 .packet_fwd = pkt_burst_checksum_forward,