e90a61e35de6e7bf4a9ccf595eb258f36a53129a
[dpdk.git] / examples / ip_fragmentation / main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <stdint.h>
8 #include <inttypes.h>
9 #include <sys/types.h>
10 #include <sys/param.h>
11 #include <string.h>
12 #include <sys/queue.h>
13 #include <stdarg.h>
14 #include <errno.h>
15 #include <getopt.h>
16
17 #include <rte_common.h>
18 #include <rte_byteorder.h>
19 #include <rte_log.h>
20 #include <rte_memory.h>
21 #include <rte_memcpy.h>
22 #include <rte_eal.h>
23 #include <rte_launch.h>
24 #include <rte_atomic.h>
25 #include <rte_cycles.h>
26 #include <rte_prefetch.h>
27 #include <rte_lcore.h>
28 #include <rte_per_lcore.h>
29 #include <rte_branch_prediction.h>
30 #include <rte_interrupts.h>
31 #include <rte_random.h>
32 #include <rte_debug.h>
33 #include <rte_ether.h>
34 #include <rte_ethdev.h>
35 #include <rte_mempool.h>
36 #include <rte_mbuf.h>
37 #include <rte_lpm.h>
38 #include <rte_lpm6.h>
39 #include <rte_ip.h>
40 #include <rte_string_fns.h>
41
42 #include <rte_ip_frag.h>
43
44 #define RTE_LOGTYPE_IP_FRAG RTE_LOGTYPE_USER1
45
46 /* allow max jumbo frame 9.5 KB */
47 #define JUMBO_FRAME_MAX_SIZE    0x2600
48
49 #define ROUNDUP_DIV(a, b)       (((a) + (b) - 1) / (b))
50
51 /*
52  * Default byte size for the IPv6 Maximum Transfer Unit (MTU).
53  * This value includes the size of IPv6 header.
54  */
55 #define IPV4_MTU_DEFAULT        ETHER_MTU
56 #define IPV6_MTU_DEFAULT        ETHER_MTU
57
58 /*
59  * The overhead from max frame size to MTU.
60  * We have to consider the max possible overhead.
61  */
62 #define MTU_OVERHEAD    \
63         (ETHER_HDR_LEN + ETHER_CRC_LEN + 2 * sizeof(struct vlan_hdr))
64
65 /*
66  * Default payload in bytes for the IPv6 packet.
67  */
68 #define IPV4_DEFAULT_PAYLOAD    (IPV4_MTU_DEFAULT - sizeof(struct ipv4_hdr))
69 #define IPV6_DEFAULT_PAYLOAD    (IPV6_MTU_DEFAULT - sizeof(struct ipv6_hdr))
70
71 /*
72  * Max number of fragments per packet expected - defined by config file.
73  */
74 #define MAX_PACKET_FRAG RTE_LIBRTE_IP_FRAG_MAX_FRAG
75
76 #define NB_MBUF   8192
77
78 #define MAX_PKT_BURST   32
79 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
80
81 /* Configure how many packets ahead to prefetch, when reading packets */
82 #define PREFETCH_OFFSET 3
83
84 /*
85  * Configurable number of RX/TX ring descriptors
86  */
87 #define RTE_TEST_RX_DESC_DEFAULT 1024
88 #define RTE_TEST_TX_DESC_DEFAULT 1024
89 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
90 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
91
92 /* ethernet addresses of ports */
93 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
94
95 #ifndef IPv4_BYTES
96 #define IPv4_BYTES_FMT "%" PRIu8 ".%" PRIu8 ".%" PRIu8 ".%" PRIu8
97 #define IPv4_BYTES(addr) \
98                 (uint8_t) (((addr) >> 24) & 0xFF),\
99                 (uint8_t) (((addr) >> 16) & 0xFF),\
100                 (uint8_t) (((addr) >> 8) & 0xFF),\
101                 (uint8_t) ((addr) & 0xFF)
102 #endif
103
104 #ifndef IPv6_BYTES
105 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\
106                        "%02x%02x:%02x%02x:%02x%02x:%02x%02x"
107 #define IPv6_BYTES(addr) \
108         addr[0],  addr[1], addr[2],  addr[3], \
109         addr[4],  addr[5], addr[6],  addr[7], \
110         addr[8],  addr[9], addr[10], addr[11],\
111         addr[12], addr[13],addr[14], addr[15]
112 #endif
113
114 #define IPV6_ADDR_LEN 16
115
116 /* mask of enabled ports */
117 static int enabled_port_mask = 0;
118
119 static int rx_queue_per_lcore = 1;
120
121 #define MBUF_TABLE_SIZE  (2 * MAX(MAX_PKT_BURST, MAX_PACKET_FRAG))
122
123 struct mbuf_table {
124         uint16_t len;
125         struct rte_mbuf *m_table[MBUF_TABLE_SIZE];
126 };
127
128 struct rx_queue {
129         struct rte_mempool *direct_pool;
130         struct rte_mempool *indirect_pool;
131         struct rte_lpm *lpm;
132         struct rte_lpm6 *lpm6;
133         uint16_t portid;
134 };
135
136 #define MAX_RX_QUEUE_PER_LCORE 16
137 #define MAX_TX_QUEUE_PER_PORT 16
138 struct lcore_queue_conf {
139         uint16_t n_rx_queue;
140         uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
141         struct rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
142         struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
143 } __rte_cache_aligned;
144 struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
145
146 static struct rte_eth_conf port_conf = {
147         .rxmode = {
148                 .max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE,
149                 .split_hdr_size = 0,
150                 .offloads = (DEV_RX_OFFLOAD_CHECKSUM |
151                              DEV_RX_OFFLOAD_SCATTER |
152                              DEV_RX_OFFLOAD_JUMBO_FRAME),
153         },
154         .txmode = {
155                 .mq_mode = ETH_MQ_TX_NONE,
156                 .offloads = (DEV_TX_OFFLOAD_IPV4_CKSUM |
157                              DEV_TX_OFFLOAD_MULTI_SEGS),
158         },
159 };
160
161 /*
162  * IPv4 forwarding table
163  */
164 struct l3fwd_ipv4_route {
165         uint32_t ip;
166         uint8_t  depth;
167         uint8_t  if_out;
168 };
169
170 struct l3fwd_ipv4_route l3fwd_ipv4_route_array[] = {
171                 {IPv4(100,10,0,0), 16, 0},
172                 {IPv4(100,20,0,0), 16, 1},
173                 {IPv4(100,30,0,0), 16, 2},
174                 {IPv4(100,40,0,0), 16, 3},
175                 {IPv4(100,50,0,0), 16, 4},
176                 {IPv4(100,60,0,0), 16, 5},
177                 {IPv4(100,70,0,0), 16, 6},
178                 {IPv4(100,80,0,0), 16, 7},
179 };
180
181 /*
182  * IPv6 forwarding table
183  */
184
185 struct l3fwd_ipv6_route {
186         uint8_t ip[IPV6_ADDR_LEN];
187         uint8_t depth;
188         uint8_t if_out;
189 };
190
191 static struct l3fwd_ipv6_route l3fwd_ipv6_route_array[] = {
192         {{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 0},
193         {{2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 1},
194         {{3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 2},
195         {{4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 3},
196         {{5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 4},
197         {{6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 5},
198         {{7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 6},
199         {{8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 7},
200 };
201
202 #define LPM_MAX_RULES         1024
203 #define LPM6_MAX_RULES         1024
204 #define LPM6_NUMBER_TBL8S (1 << 16)
205
206 struct rte_lpm6_config lpm6_config = {
207                 .max_rules = LPM6_MAX_RULES,
208                 .number_tbl8s = LPM6_NUMBER_TBL8S,
209                 .flags = 0
210 };
211
212 static struct rte_mempool *socket_direct_pool[RTE_MAX_NUMA_NODES];
213 static struct rte_mempool *socket_indirect_pool[RTE_MAX_NUMA_NODES];
214 static struct rte_lpm *socket_lpm[RTE_MAX_NUMA_NODES];
215 static struct rte_lpm6 *socket_lpm6[RTE_MAX_NUMA_NODES];
216
217 /* Send burst of packets on an output interface */
218 static inline int
219 send_burst(struct lcore_queue_conf *qconf, uint16_t n, uint16_t port)
220 {
221         struct rte_mbuf **m_table;
222         int ret;
223         uint16_t queueid;
224
225         queueid = qconf->tx_queue_id[port];
226         m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
227
228         ret = rte_eth_tx_burst(port, queueid, m_table, n);
229         if (unlikely(ret < n)) {
230                 do {
231                         rte_pktmbuf_free(m_table[ret]);
232                 } while (++ret < n);
233         }
234
235         return 0;
236 }
237
238 static inline void
239 l3fwd_simple_forward(struct rte_mbuf *m, struct lcore_queue_conf *qconf,
240                 uint8_t queueid, uint16_t port_in)
241 {
242         struct rx_queue *rxq;
243         uint32_t i, len, next_hop;
244         uint8_t ipv6;
245         uint16_t port_out;
246         int32_t len2;
247
248         ipv6 = 0;
249         rxq = &qconf->rx_queue_list[queueid];
250
251         /* by default, send everything back to the source port */
252         port_out = port_in;
253
254         /* Remove the Ethernet header and trailer from the input packet */
255         rte_pktmbuf_adj(m, (uint16_t)sizeof(struct ether_hdr));
256
257         /* Build transmission burst */
258         len = qconf->tx_mbufs[port_out].len;
259
260         /* if this is an IPv4 packet */
261         if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
262                 struct ipv4_hdr *ip_hdr;
263                 uint32_t ip_dst;
264                 /* Read the lookup key (i.e. ip_dst) from the input packet */
265                 ip_hdr = rte_pktmbuf_mtod(m, struct ipv4_hdr *);
266                 ip_dst = rte_be_to_cpu_32(ip_hdr->dst_addr);
267
268                 /* Find destination port */
269                 if (rte_lpm_lookup(rxq->lpm, ip_dst, &next_hop) == 0 &&
270                                 (enabled_port_mask & 1 << next_hop) != 0) {
271                         port_out = next_hop;
272
273                         /* Build transmission burst for new port */
274                         len = qconf->tx_mbufs[port_out].len;
275                 }
276
277                 /* if we don't need to do any fragmentation */
278                 if (likely (IPV4_MTU_DEFAULT >= m->pkt_len)) {
279                         qconf->tx_mbufs[port_out].m_table[len] = m;
280                         len2 = 1;
281                 } else {
282                         len2 = rte_ipv4_fragment_packet(m,
283                                 &qconf->tx_mbufs[port_out].m_table[len],
284                                 (uint16_t)(MBUF_TABLE_SIZE - len),
285                                 IPV4_MTU_DEFAULT,
286                                 rxq->direct_pool, rxq->indirect_pool);
287
288                         /* Free input packet */
289                         rte_pktmbuf_free(m);
290
291                         /* If we fail to fragment the packet */
292                         if (unlikely (len2 < 0))
293                                 return;
294                 }
295         } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
296                 /* if this is an IPv6 packet */
297                 struct ipv6_hdr *ip_hdr;
298
299                 ipv6 = 1;
300
301                 /* Read the lookup key (i.e. ip_dst) from the input packet */
302                 ip_hdr = rte_pktmbuf_mtod(m, struct ipv6_hdr *);
303
304                 /* Find destination port */
305                 if (rte_lpm6_lookup(rxq->lpm6, ip_hdr->dst_addr,
306                                                 &next_hop) == 0 &&
307                                 (enabled_port_mask & 1 << next_hop) != 0) {
308                         port_out = next_hop;
309
310                         /* Build transmission burst for new port */
311                         len = qconf->tx_mbufs[port_out].len;
312                 }
313
314                 /* if we don't need to do any fragmentation */
315                 if (likely (IPV6_MTU_DEFAULT >= m->pkt_len)) {
316                         qconf->tx_mbufs[port_out].m_table[len] = m;
317                         len2 = 1;
318                 } else {
319                         len2 = rte_ipv6_fragment_packet(m,
320                                 &qconf->tx_mbufs[port_out].m_table[len],
321                                 (uint16_t)(MBUF_TABLE_SIZE - len),
322                                 IPV6_MTU_DEFAULT,
323                                 rxq->direct_pool, rxq->indirect_pool);
324
325                         /* Free input packet */
326                         rte_pktmbuf_free(m);
327
328                         /* If we fail to fragment the packet */
329                         if (unlikely (len2 < 0))
330                                 return;
331                 }
332         }
333         /* else, just forward the packet */
334         else {
335                 qconf->tx_mbufs[port_out].m_table[len] = m;
336                 len2 = 1;
337         }
338
339         for (i = len; i < len + len2; i ++) {
340                 void *d_addr_bytes;
341
342                 m = qconf->tx_mbufs[port_out].m_table[i];
343                 struct ether_hdr *eth_hdr = (struct ether_hdr *)
344                         rte_pktmbuf_prepend(m, (uint16_t)sizeof(struct ether_hdr));
345                 if (eth_hdr == NULL) {
346                         rte_panic("No headroom in mbuf.\n");
347                 }
348
349                 m->l2_len = sizeof(struct ether_hdr);
350
351                 /* 02:00:00:00:00:xx */
352                 d_addr_bytes = &eth_hdr->d_addr.addr_bytes[0];
353                 *((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)port_out << 40);
354
355                 /* src addr */
356                 ether_addr_copy(&ports_eth_addr[port_out], &eth_hdr->s_addr);
357                 if (ipv6)
358                         eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv6);
359                 else
360                         eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv4);
361         }
362
363         len += len2;
364
365         if (likely(len < MAX_PKT_BURST)) {
366                 qconf->tx_mbufs[port_out].len = (uint16_t)len;
367                 return;
368         }
369
370         /* Transmit packets */
371         send_burst(qconf, (uint16_t)len, port_out);
372         qconf->tx_mbufs[port_out].len = 0;
373 }
374
375 /* main processing loop */
376 static int
377 main_loop(__attribute__((unused)) void *dummy)
378 {
379         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
380         unsigned lcore_id;
381         uint64_t prev_tsc, diff_tsc, cur_tsc;
382         int i, j, nb_rx;
383         uint16_t portid;
384         struct lcore_queue_conf *qconf;
385         const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
386
387         prev_tsc = 0;
388
389         lcore_id = rte_lcore_id();
390         qconf = &lcore_queue_conf[lcore_id];
391
392         if (qconf->n_rx_queue == 0) {
393                 RTE_LOG(INFO, IP_FRAG, "lcore %u has nothing to do\n", lcore_id);
394                 return 0;
395         }
396
397         RTE_LOG(INFO, IP_FRAG, "entering main loop on lcore %u\n", lcore_id);
398
399         for (i = 0; i < qconf->n_rx_queue; i++) {
400
401                 portid = qconf->rx_queue_list[i].portid;
402                 RTE_LOG(INFO, IP_FRAG, " -- lcoreid=%u portid=%d\n", lcore_id,
403                                 portid);
404         }
405
406         while (1) {
407
408                 cur_tsc = rte_rdtsc();
409
410                 /*
411                  * TX burst queue drain
412                  */
413                 diff_tsc = cur_tsc - prev_tsc;
414                 if (unlikely(diff_tsc > drain_tsc)) {
415
416                         /*
417                          * This could be optimized (use queueid instead of
418                          * portid), but it is not called so often
419                          */
420                         for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
421                                 if (qconf->tx_mbufs[portid].len == 0)
422                                         continue;
423                                 send_burst(&lcore_queue_conf[lcore_id],
424                                            qconf->tx_mbufs[portid].len,
425                                            portid);
426                                 qconf->tx_mbufs[portid].len = 0;
427                         }
428
429                         prev_tsc = cur_tsc;
430                 }
431
432                 /*
433                  * Read packet from RX queues
434                  */
435                 for (i = 0; i < qconf->n_rx_queue; i++) {
436
437                         portid = qconf->rx_queue_list[i].portid;
438                         nb_rx = rte_eth_rx_burst(portid, 0, pkts_burst,
439                                                  MAX_PKT_BURST);
440
441                         /* Prefetch first packets */
442                         for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
443                                 rte_prefetch0(rte_pktmbuf_mtod(
444                                                 pkts_burst[j], void *));
445                         }
446
447                         /* Prefetch and forward already prefetched packets */
448                         for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
449                                 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
450                                                 j + PREFETCH_OFFSET], void *));
451                                 l3fwd_simple_forward(pkts_burst[j], qconf, i, portid);
452                         }
453
454                         /* Forward remaining prefetched packets */
455                         for (; j < nb_rx; j++) {
456                                 l3fwd_simple_forward(pkts_burst[j], qconf, i, portid);
457                         }
458                 }
459         }
460 }
461
462 /* display usage */
463 static void
464 print_usage(const char *prgname)
465 {
466         printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n"
467                "  -p PORTMASK: hexadecimal bitmask of ports to configure\n"
468                "  -q NQ: number of queue (=ports) per lcore (default is 1)\n",
469                prgname);
470 }
471
472 static int
473 parse_portmask(const char *portmask)
474 {
475         char *end = NULL;
476         unsigned long pm;
477
478         /* parse hexadecimal string */
479         pm = strtoul(portmask, &end, 16);
480         if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
481                 return -1;
482
483         if (pm == 0)
484                 return -1;
485
486         return pm;
487 }
488
489 static int
490 parse_nqueue(const char *q_arg)
491 {
492         char *end = NULL;
493         unsigned long n;
494
495         /* parse hexadecimal string */
496         n = strtoul(q_arg, &end, 10);
497         if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
498                 return -1;
499         if (n == 0)
500                 return -1;
501         if (n >= MAX_RX_QUEUE_PER_LCORE)
502                 return -1;
503
504         return n;
505 }
506
507 /* Parse the argument given in the command line of the application */
508 static int
509 parse_args(int argc, char **argv)
510 {
511         int opt, ret;
512         char **argvopt;
513         int option_index;
514         char *prgname = argv[0];
515         static struct option lgopts[] = {
516                 {NULL, 0, 0, 0}
517         };
518
519         argvopt = argv;
520
521         while ((opt = getopt_long(argc, argvopt, "p:q:",
522                                   lgopts, &option_index)) != EOF) {
523
524                 switch (opt) {
525                 /* portmask */
526                 case 'p':
527                         enabled_port_mask = parse_portmask(optarg);
528                         if (enabled_port_mask < 0) {
529                                 printf("invalid portmask\n");
530                                 print_usage(prgname);
531                                 return -1;
532                         }
533                         break;
534
535                 /* nqueue */
536                 case 'q':
537                         rx_queue_per_lcore = parse_nqueue(optarg);
538                         if (rx_queue_per_lcore < 0) {
539                                 printf("invalid queue number\n");
540                                 print_usage(prgname);
541                                 return -1;
542                         }
543                         break;
544
545                 /* long options */
546                 case 0:
547                         print_usage(prgname);
548                         return -1;
549
550                 default:
551                         print_usage(prgname);
552                         return -1;
553                 }
554         }
555
556         if (enabled_port_mask == 0) {
557                 printf("portmask not specified\n");
558                 print_usage(prgname);
559                 return -1;
560         }
561
562         if (optind >= 0)
563                 argv[optind-1] = prgname;
564
565         ret = optind-1;
566         optind = 1; /* reset getopt lib */
567         return ret;
568 }
569
570 static void
571 print_ethaddr(const char *name, struct ether_addr *eth_addr)
572 {
573         char buf[ETHER_ADDR_FMT_SIZE];
574         ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
575         printf("%s%s", name, buf);
576 }
577
578 /* Check the link status of all ports in up to 9s, and print them finally */
579 static void
580 check_all_ports_link_status(uint32_t port_mask)
581 {
582 #define CHECK_INTERVAL 100 /* 100ms */
583 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
584         uint16_t portid;
585         uint8_t count, all_ports_up, print_flag = 0;
586         struct rte_eth_link link;
587
588         printf("\nChecking link status");
589         fflush(stdout);
590         for (count = 0; count <= MAX_CHECK_TIME; count++) {
591                 all_ports_up = 1;
592                 RTE_ETH_FOREACH_DEV(portid) {
593                         if ((port_mask & (1 << portid)) == 0)
594                                 continue;
595                         memset(&link, 0, sizeof(link));
596                         rte_eth_link_get_nowait(portid, &link);
597                         /* print link status if flag set */
598                         if (print_flag == 1) {
599                                 if (link.link_status)
600                                         printf(
601                                         "Port%d Link Up .Speed %u Mbps - %s\n",
602                                                 portid, link.link_speed,
603                                 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
604                                         ("full-duplex") : ("half-duplex\n"));
605                                 else
606                                         printf("Port %d Link Down\n", portid);
607                                 continue;
608                         }
609                         /* clear all_ports_up flag if any link down */
610                         if (link.link_status == ETH_LINK_DOWN) {
611                                 all_ports_up = 0;
612                                 break;
613                         }
614                 }
615                 /* after finally printing all link status, get out */
616                 if (print_flag == 1)
617                         break;
618
619                 if (all_ports_up == 0) {
620                         printf(".");
621                         fflush(stdout);
622                         rte_delay_ms(CHECK_INTERVAL);
623                 }
624
625                 /* set the print_flag if all ports up or timeout */
626                 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
627                         print_flag = 1;
628                         printf("\ndone\n");
629                 }
630         }
631 }
632
633 /* Check L3 packet type detection capablity of the NIC port */
634 static int
635 check_ptype(int portid)
636 {
637         int i, ret;
638         int ptype_l3_ipv4 = 0, ptype_l3_ipv6 = 0;
639         uint32_t ptype_mask = RTE_PTYPE_L3_MASK;
640
641         ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0);
642         if (ret <= 0)
643                 return 0;
644
645         uint32_t ptypes[ret];
646
647         ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret);
648         for (i = 0; i < ret; ++i) {
649                 if (ptypes[i] & RTE_PTYPE_L3_IPV4)
650                         ptype_l3_ipv4 = 1;
651                 if (ptypes[i] & RTE_PTYPE_L3_IPV6)
652                         ptype_l3_ipv6 = 1;
653         }
654
655         if (ptype_l3_ipv4 == 0)
656                 printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid);
657
658         if (ptype_l3_ipv6 == 0)
659                 printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid);
660
661         if (ptype_l3_ipv4 && ptype_l3_ipv6)
662                 return 1;
663
664         return 0;
665
666 }
667
668 /* Parse packet type of a packet by SW */
669 static inline void
670 parse_ptype(struct rte_mbuf *m)
671 {
672         struct ether_hdr *eth_hdr;
673         uint32_t packet_type = RTE_PTYPE_UNKNOWN;
674         uint16_t ether_type;
675
676         eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
677         ether_type = eth_hdr->ether_type;
678         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
679                 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
680         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6))
681                 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
682
683         m->packet_type = packet_type;
684 }
685
686 /* callback function to detect packet type for a queue of a port */
687 static uint16_t
688 cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused,
689                    struct rte_mbuf *pkts[], uint16_t nb_pkts,
690                    uint16_t max_pkts __rte_unused,
691                    void *user_param __rte_unused)
692 {
693         uint16_t i;
694
695         for (i = 0; i < nb_pkts; ++i)
696                 parse_ptype(pkts[i]);
697
698         return nb_pkts;
699 }
700
701 static int
702 init_routing_table(void)
703 {
704         struct rte_lpm *lpm;
705         struct rte_lpm6 *lpm6;
706         int socket, ret;
707         unsigned i;
708
709         for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
710                 if (socket_lpm[socket]) {
711                         lpm = socket_lpm[socket];
712                         /* populate the LPM table */
713                         for (i = 0; i < RTE_DIM(l3fwd_ipv4_route_array); i++) {
714                                 ret = rte_lpm_add(lpm,
715                                         l3fwd_ipv4_route_array[i].ip,
716                                         l3fwd_ipv4_route_array[i].depth,
717                                         l3fwd_ipv4_route_array[i].if_out);
718
719                                 if (ret < 0) {
720                                         RTE_LOG(ERR, IP_FRAG, "Unable to add entry %i to the l3fwd "
721                                                 "LPM table\n", i);
722                                         return -1;
723                                 }
724
725                                 RTE_LOG(INFO, IP_FRAG, "Socket %i: adding route " IPv4_BYTES_FMT
726                                                 "/%d (port %d)\n",
727                                         socket,
728                                         IPv4_BYTES(l3fwd_ipv4_route_array[i].ip),
729                                         l3fwd_ipv4_route_array[i].depth,
730                                         l3fwd_ipv4_route_array[i].if_out);
731                         }
732                 }
733
734                 if (socket_lpm6[socket]) {
735                         lpm6 = socket_lpm6[socket];
736                         /* populate the LPM6 table */
737                         for (i = 0; i < RTE_DIM(l3fwd_ipv6_route_array); i++) {
738                                 ret = rte_lpm6_add(lpm6,
739                                         l3fwd_ipv6_route_array[i].ip,
740                                         l3fwd_ipv6_route_array[i].depth,
741                                         l3fwd_ipv6_route_array[i].if_out);
742
743                                 if (ret < 0) {
744                                         RTE_LOG(ERR, IP_FRAG, "Unable to add entry %i to the l3fwd "
745                                                 "LPM6 table\n", i);
746                                         return -1;
747                                 }
748
749                                 RTE_LOG(INFO, IP_FRAG, "Socket %i: adding route " IPv6_BYTES_FMT
750                                                 "/%d (port %d)\n",
751                                         socket,
752                                         IPv6_BYTES(l3fwd_ipv6_route_array[i].ip),
753                                         l3fwd_ipv6_route_array[i].depth,
754                                         l3fwd_ipv6_route_array[i].if_out);
755                         }
756                 }
757         }
758         return 0;
759 }
760
761 static int
762 init_mem(void)
763 {
764         char buf[PATH_MAX];
765         struct rte_mempool *mp;
766         struct rte_lpm *lpm;
767         struct rte_lpm6 *lpm6;
768         struct rte_lpm_config lpm_config;
769         int socket;
770         unsigned lcore_id;
771
772         /* traverse through lcores and initialize structures on each socket */
773
774         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
775
776                 if (rte_lcore_is_enabled(lcore_id) == 0)
777                         continue;
778
779                 socket = rte_lcore_to_socket_id(lcore_id);
780
781                 if (socket == SOCKET_ID_ANY)
782                         socket = 0;
783
784                 if (socket_direct_pool[socket] == NULL) {
785                         RTE_LOG(INFO, IP_FRAG, "Creating direct mempool on socket %i\n",
786                                         socket);
787                         snprintf(buf, sizeof(buf), "pool_direct_%i", socket);
788
789                         mp = rte_pktmbuf_pool_create(buf, NB_MBUF, 32,
790                                 0, RTE_MBUF_DEFAULT_BUF_SIZE, socket);
791                         if (mp == NULL) {
792                                 RTE_LOG(ERR, IP_FRAG, "Cannot create direct mempool\n");
793                                 return -1;
794                         }
795                         socket_direct_pool[socket] = mp;
796                 }
797
798                 if (socket_indirect_pool[socket] == NULL) {
799                         RTE_LOG(INFO, IP_FRAG, "Creating indirect mempool on socket %i\n",
800                                         socket);
801                         snprintf(buf, sizeof(buf), "pool_indirect_%i", socket);
802
803                         mp = rte_pktmbuf_pool_create(buf, NB_MBUF, 32, 0, 0,
804                                 socket);
805                         if (mp == NULL) {
806                                 RTE_LOG(ERR, IP_FRAG, "Cannot create indirect mempool\n");
807                                 return -1;
808                         }
809                         socket_indirect_pool[socket] = mp;
810                 }
811
812                 if (socket_lpm[socket] == NULL) {
813                         RTE_LOG(INFO, IP_FRAG, "Creating LPM table on socket %i\n", socket);
814                         snprintf(buf, sizeof(buf), "IP_FRAG_LPM_%i", socket);
815
816                         lpm_config.max_rules = LPM_MAX_RULES;
817                         lpm_config.number_tbl8s = 256;
818                         lpm_config.flags = 0;
819
820                         lpm = rte_lpm_create(buf, socket, &lpm_config);
821                         if (lpm == NULL) {
822                                 RTE_LOG(ERR, IP_FRAG, "Cannot create LPM table\n");
823                                 return -1;
824                         }
825                         socket_lpm[socket] = lpm;
826                 }
827
828                 if (socket_lpm6[socket] == NULL) {
829                         RTE_LOG(INFO, IP_FRAG, "Creating LPM6 table on socket %i\n", socket);
830                         snprintf(buf, sizeof(buf), "IP_FRAG_LPM_%i", socket);
831
832                         lpm6 = rte_lpm6_create(buf, socket, &lpm6_config);
833                         if (lpm6 == NULL) {
834                                 RTE_LOG(ERR, IP_FRAG, "Cannot create LPM table\n");
835                                 return -1;
836                         }
837                         socket_lpm6[socket] = lpm6;
838                 }
839         }
840
841         return 0;
842 }
843
844 int
845 main(int argc, char **argv)
846 {
847         struct lcore_queue_conf *qconf;
848         struct rte_eth_dev_info dev_info;
849         struct rte_eth_txconf *txconf;
850         struct rx_queue *rxq;
851         int socket, ret;
852         uint16_t nb_ports;
853         uint16_t queueid = 0;
854         unsigned lcore_id = 0, rx_lcore_id = 0;
855         uint32_t n_tx_queue, nb_lcores;
856         uint16_t portid;
857
858         /* init EAL */
859         ret = rte_eal_init(argc, argv);
860         if (ret < 0)
861                 rte_exit(EXIT_FAILURE, "rte_eal_init failed");
862         argc -= ret;
863         argv += ret;
864
865         /* parse application arguments (after the EAL ones) */
866         ret = parse_args(argc, argv);
867         if (ret < 0)
868                 rte_exit(EXIT_FAILURE, "Invalid arguments");
869
870         nb_ports = rte_eth_dev_count_avail();
871         if (nb_ports == 0)
872                 rte_exit(EXIT_FAILURE, "No ports found!\n");
873
874         nb_lcores = rte_lcore_count();
875
876         /* initialize structures (mempools, lpm etc.) */
877         if (init_mem() < 0)
878                 rte_panic("Cannot initialize memory structures!\n");
879
880         /* check if portmask has non-existent ports */
881         if (enabled_port_mask & ~(RTE_LEN2MASK(nb_ports, unsigned)))
882                 rte_exit(EXIT_FAILURE, "Non-existent ports in portmask!\n");
883
884         /* initialize all ports */
885         RTE_ETH_FOREACH_DEV(portid) {
886                 struct rte_eth_conf local_port_conf = port_conf;
887                 struct rte_eth_rxconf rxq_conf;
888
889                 /* skip ports that are not enabled */
890                 if ((enabled_port_mask & (1 << portid)) == 0) {
891                         printf("Skipping disabled port %d\n", portid);
892                         continue;
893                 }
894
895                 qconf = &lcore_queue_conf[rx_lcore_id];
896
897                 /* limit the frame size to the maximum supported by NIC */
898                 rte_eth_dev_info_get(portid, &dev_info);
899                 local_port_conf.rxmode.max_rx_pkt_len = RTE_MIN(
900                     dev_info.max_rx_pktlen,
901                     local_port_conf.rxmode.max_rx_pkt_len);
902
903                 /* get the lcore_id for this port */
904                 while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
905                        qconf->n_rx_queue == (unsigned)rx_queue_per_lcore) {
906
907                         rx_lcore_id ++;
908                         if (rx_lcore_id >= RTE_MAX_LCORE)
909                                 rte_exit(EXIT_FAILURE, "Not enough cores\n");
910
911                         qconf = &lcore_queue_conf[rx_lcore_id];
912                 }
913
914                 socket = (int) rte_lcore_to_socket_id(rx_lcore_id);
915                 if (socket == SOCKET_ID_ANY)
916                         socket = 0;
917
918                 rxq = &qconf->rx_queue_list[qconf->n_rx_queue];
919                 rxq->portid = portid;
920                 rxq->direct_pool = socket_direct_pool[socket];
921                 rxq->indirect_pool = socket_indirect_pool[socket];
922                 rxq->lpm = socket_lpm[socket];
923                 rxq->lpm6 = socket_lpm6[socket];
924                 qconf->n_rx_queue++;
925
926                 /* init port */
927                 printf("Initializing port %d on lcore %u...", portid,
928                        rx_lcore_id);
929                 fflush(stdout);
930
931                 n_tx_queue = nb_lcores;
932                 if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
933                         n_tx_queue = MAX_TX_QUEUE_PER_PORT;
934                 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
935                         local_port_conf.txmode.offloads |=
936                                 DEV_TX_OFFLOAD_MBUF_FAST_FREE;
937                 ret = rte_eth_dev_configure(portid, 1, (uint16_t)n_tx_queue,
938                                             &local_port_conf);
939                 if (ret < 0) {
940                         printf("\n");
941                         rte_exit(EXIT_FAILURE, "Cannot configure device: "
942                                 "err=%d, port=%d\n",
943                                 ret, portid);
944                 }
945
946                 /* set the mtu to the maximum received packet size */
947                 ret = rte_eth_dev_set_mtu(portid,
948                         local_port_conf.rxmode.max_rx_pkt_len - MTU_OVERHEAD);
949                 if (ret < 0) {
950                         printf("\n");
951                         rte_exit(EXIT_FAILURE, "Set MTU failed: "
952                                 "err=%d, port=%d\n",
953                         ret, portid);
954                 }
955
956                 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd,
957                                             &nb_txd);
958                 if (ret < 0) {
959                         printf("\n");
960                         rte_exit(EXIT_FAILURE, "Cannot adjust number of "
961                                 "descriptors: err=%d, port=%d\n", ret, portid);
962                 }
963
964                 /* init one RX queue */
965                 rxq_conf = dev_info.default_rxconf;
966                 rxq_conf.offloads = local_port_conf.rxmode.offloads;
967                 ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
968                                              socket, &rxq_conf,
969                                              socket_direct_pool[socket]);
970                 if (ret < 0) {
971                         printf("\n");
972                         rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: "
973                                 "err=%d, port=%d\n",
974                                 ret, portid);
975                 }
976
977                 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
978                 print_ethaddr(" Address:", &ports_eth_addr[portid]);
979                 printf("\n");
980
981                 /* init one TX queue per couple (lcore,port) */
982                 queueid = 0;
983                 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
984                         if (rte_lcore_is_enabled(lcore_id) == 0)
985                                 continue;
986
987                         socket = (int) rte_lcore_to_socket_id(lcore_id);
988                         printf("txq=%u,%d ", lcore_id, queueid);
989                         fflush(stdout);
990
991                         txconf = &dev_info.default_txconf;
992                         txconf->offloads = local_port_conf.txmode.offloads;
993                         ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
994                                                      socket, txconf);
995                         if (ret < 0) {
996                                 printf("\n");
997                                 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: "
998                                         "err=%d, port=%d\n", ret, portid);
999                         }
1000
1001                         qconf = &lcore_queue_conf[lcore_id];
1002                         qconf->tx_queue_id[portid] = queueid;
1003                         queueid++;
1004                 }
1005
1006                 printf("\n");
1007         }
1008
1009         printf("\n");
1010
1011         /* start ports */
1012         RTE_ETH_FOREACH_DEV(portid) {
1013                 if ((enabled_port_mask & (1 << portid)) == 0) {
1014                         continue;
1015                 }
1016                 /* Start device */
1017                 ret = rte_eth_dev_start(portid);
1018                 if (ret < 0)
1019                         rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n",
1020                                 ret, portid);
1021
1022                 rte_eth_promiscuous_enable(portid);
1023
1024                 if (check_ptype(portid) == 0) {
1025                         rte_eth_add_rx_callback(portid, 0, cb_parse_ptype, NULL);
1026                         printf("Add Rx callback function to detect L3 packet type by SW :"
1027                                 " port = %d\n", portid);
1028                 }
1029         }
1030
1031         if (init_routing_table() < 0)
1032                 rte_exit(EXIT_FAILURE, "Cannot init routing table\n");
1033
1034         check_all_ports_link_status(enabled_port_mask);
1035
1036         /* launch per-lcore init on every lcore */
1037         rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
1038         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1039                 if (rte_eal_wait_lcore(lcore_id) < 0)
1040                         return -1;
1041         }
1042
1043         return 0;
1044 }