ethdev: fix max Rx packet length
[dpdk.git] / examples / l3fwd-graph / main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2020 Marvell International Ltd.
3  */
4
5 #include <arpa/inet.h>
6 #include <errno.h>
7 #include <getopt.h>
8 #include <inttypes.h>
9 #include <signal.h>
10 #include <stdarg.h>
11 #include <stdbool.h>
12 #include <stdint.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <sys/socket.h>
17 #include <sys/types.h>
18 #include <sys/queue.h>
19 #include <unistd.h>
20
21 #include <rte_branch_prediction.h>
22 #include <rte_common.h>
23 #include <rte_cycles.h>
24 #include <rte_eal.h>
25 #include <rte_ethdev.h>
26 #include <rte_graph_worker.h>
27 #include <rte_launch.h>
28 #include <rte_lcore.h>
29 #include <rte_log.h>
30 #include <rte_mempool.h>
31 #include <rte_node_eth_api.h>
32 #include <rte_node_ip4_api.h>
33 #include <rte_per_lcore.h>
34 #include <rte_string_fns.h>
35 #include <rte_vect.h>
36
37 #include <cmdline_parse.h>
38 #include <cmdline_parse_etheraddr.h>
39
40 /* Log type */
41 #define RTE_LOGTYPE_L3FWD_GRAPH RTE_LOGTYPE_USER1
42
43 /*
44  * Configurable number of RX/TX ring descriptors
45  */
46 #define RTE_TEST_RX_DESC_DEFAULT 1024
47 #define RTE_TEST_TX_DESC_DEFAULT 1024
48
49 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
50 #define MAX_RX_QUEUE_PER_PORT 128
51
52 #define MAX_RX_QUEUE_PER_LCORE 16
53
54 #define MAX_LCORE_PARAMS 1024
55
56 #define NB_SOCKETS 8
57
58 /* Static global variables used within this file. */
59 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
60 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
61
62 /**< Ports set in promiscuous mode off by default. */
63 static int promiscuous_on;
64
65 static int numa_on = 1;   /**< NUMA is enabled by default. */
66 static int per_port_pool; /**< Use separate buffer pools per port; disabled */
67                           /**< by default */
68
69 static volatile bool force_quit;
70
71 /* Ethernet addresses of ports */
72 static uint64_t dest_eth_addr[RTE_MAX_ETHPORTS];
73 static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
74 xmm_t val_eth[RTE_MAX_ETHPORTS];
75
76 /* Mask of enabled ports */
77 static uint32_t enabled_port_mask;
78
79 struct lcore_rx_queue {
80         uint16_t port_id;
81         uint8_t queue_id;
82         char node_name[RTE_NODE_NAMESIZE];
83 };
84
85 /* Lcore conf */
86 struct lcore_conf {
87         uint16_t n_rx_queue;
88         struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
89
90         struct rte_graph *graph;
91         char name[RTE_GRAPH_NAMESIZE];
92         rte_graph_t graph_id;
93 } __rte_cache_aligned;
94
95 static struct lcore_conf lcore_conf[RTE_MAX_LCORE];
96
97 struct lcore_params {
98         uint16_t port_id;
99         uint8_t queue_id;
100         uint8_t lcore_id;
101 } __rte_cache_aligned;
102
103 static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];
104 static struct lcore_params lcore_params_array_default[] = {
105         {0, 0, 2}, {0, 1, 2}, {0, 2, 2}, {1, 0, 2}, {1, 1, 2},
106         {1, 2, 2}, {2, 0, 2}, {3, 0, 3}, {3, 1, 3},
107 };
108
109 static struct lcore_params *lcore_params = lcore_params_array_default;
110 static uint16_t nb_lcore_params = RTE_DIM(lcore_params_array_default);
111
112 static struct rte_eth_conf port_conf = {
113         .rxmode = {
114                 .mq_mode = ETH_MQ_RX_RSS,
115                 .split_hdr_size = 0,
116         },
117         .rx_adv_conf = {
118                 .rss_conf = {
119                                 .rss_key = NULL,
120                                 .rss_hf = ETH_RSS_IP,
121                 },
122         },
123         .txmode = {
124                 .mq_mode = ETH_MQ_TX_NONE,
125         },
126 };
127
128 static uint32_t max_pkt_len;
129
130 static struct rte_mempool *pktmbuf_pool[RTE_MAX_ETHPORTS][NB_SOCKETS];
131
132 static struct rte_node_ethdev_config ethdev_conf[RTE_MAX_ETHPORTS];
133
134 struct ipv4_l3fwd_lpm_route {
135         uint32_t ip;
136         uint8_t depth;
137         uint8_t if_out;
138 };
139
140 #define IPV4_L3FWD_LPM_NUM_ROUTES                                              \
141         (sizeof(ipv4_l3fwd_lpm_route_array) /                                  \
142          sizeof(ipv4_l3fwd_lpm_route_array[0]))
143 /* 198.18.0.0/16 are set aside for RFC2544 benchmarking. */
144 static struct ipv4_l3fwd_lpm_route ipv4_l3fwd_lpm_route_array[] = {
145         {RTE_IPV4(198, 18, 0, 0), 24, 0}, {RTE_IPV4(198, 18, 1, 0), 24, 1},
146         {RTE_IPV4(198, 18, 2, 0), 24, 2}, {RTE_IPV4(198, 18, 3, 0), 24, 3},
147         {RTE_IPV4(198, 18, 4, 0), 24, 4}, {RTE_IPV4(198, 18, 5, 0), 24, 5},
148         {RTE_IPV4(198, 18, 6, 0), 24, 6}, {RTE_IPV4(198, 18, 7, 0), 24, 7},
149 };
150
151 static int
152 check_lcore_params(void)
153 {
154         uint8_t queue, lcore;
155         int socketid;
156         uint16_t i;
157
158         for (i = 0; i < nb_lcore_params; ++i) {
159                 queue = lcore_params[i].queue_id;
160                 if (queue >= MAX_RX_QUEUE_PER_PORT) {
161                         printf("Invalid queue number: %hhu\n", queue);
162                         return -1;
163                 }
164                 lcore = lcore_params[i].lcore_id;
165                 if (!rte_lcore_is_enabled(lcore)) {
166                         printf("Error: lcore %hhu is not enabled in lcore mask\n",
167                                lcore);
168                         return -1;
169                 }
170
171                 if (lcore == rte_get_main_lcore()) {
172                         printf("Error: lcore %u is main lcore\n", lcore);
173                         return -1;
174                 }
175                 socketid = rte_lcore_to_socket_id(lcore);
176                 if ((socketid != 0) && (numa_on == 0)) {
177                         printf("Warning: lcore %hhu is on socket %d with numa off\n",
178                                lcore, socketid);
179                 }
180         }
181
182         return 0;
183 }
184
185 static int
186 check_port_config(void)
187 {
188         uint16_t portid;
189         uint16_t i;
190
191         for (i = 0; i < nb_lcore_params; ++i) {
192                 portid = lcore_params[i].port_id;
193                 if ((enabled_port_mask & (1 << portid)) == 0) {
194                         printf("Port %u is not enabled in port mask\n", portid);
195                         return -1;
196                 }
197                 if (!rte_eth_dev_is_valid_port(portid)) {
198                         printf("Port %u is not present on the board\n", portid);
199                         return -1;
200                 }
201         }
202
203         return 0;
204 }
205
206 static uint8_t
207 get_port_n_rx_queues(const uint16_t port)
208 {
209         int queue = -1;
210         uint16_t i;
211
212         for (i = 0; i < nb_lcore_params; ++i) {
213                 if (lcore_params[i].port_id == port) {
214                         if (lcore_params[i].queue_id == queue + 1)
215                                 queue = lcore_params[i].queue_id;
216                         else
217                                 rte_exit(EXIT_FAILURE,
218                                          "Queue ids of the port %d must be"
219                                          " in sequence and must start with 0\n",
220                                          lcore_params[i].port_id);
221                 }
222         }
223
224         return (uint8_t)(++queue);
225 }
226
227 static int
228 init_lcore_rx_queues(void)
229 {
230         uint16_t i, nb_rx_queue;
231         uint8_t lcore;
232
233         for (i = 0; i < nb_lcore_params; ++i) {
234                 lcore = lcore_params[i].lcore_id;
235                 nb_rx_queue = lcore_conf[lcore].n_rx_queue;
236                 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
237                         printf("Error: too many queues (%u) for lcore: %u\n",
238                                (unsigned int)nb_rx_queue + 1,
239                                (unsigned int)lcore);
240                         return -1;
241                 }
242
243                 lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
244                         lcore_params[i].port_id;
245                 lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
246                         lcore_params[i].queue_id;
247                 lcore_conf[lcore].n_rx_queue++;
248         }
249
250         return 0;
251 }
252
253 /* Display usage */
254 static void
255 print_usage(const char *prgname)
256 {
257         fprintf(stderr,
258                 "%s [EAL options] --"
259                 " -p PORTMASK"
260                 " [-P]"
261                 " --config (port,queue,lcore)[,(port,queue,lcore)]"
262                 " [--eth-dest=X,MM:MM:MM:MM:MM:MM]"
263                 " [--max-pkt-len PKTLEN]"
264                 " [--no-numa]"
265                 " [--per-port-pool]\n\n"
266
267                 "  -p PORTMASK: Hexadecimal bitmask of ports to configure\n"
268                 "  -P : Enable promiscuous mode\n"
269                 "  --config (port,queue,lcore): Rx queue configuration\n"
270                 "  --eth-dest=X,MM:MM:MM:MM:MM:MM: Ethernet destination for "
271                 "port X\n"
272                 "  --max-pkt-len PKTLEN: maximum packet length in decimal (64-9600)\n"
273                 "  --no-numa: Disable numa awareness\n"
274                 "  --per-port-pool: Use separate buffer pool per port\n\n",
275                 prgname);
276 }
277
278 static int
279 parse_max_pkt_len(const char *pktlen)
280 {
281         unsigned long len;
282         char *end = NULL;
283
284         /* Parse decimal string */
285         len = strtoul(pktlen, &end, 10);
286         if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0'))
287                 return -1;
288
289         if (len == 0)
290                 return -1;
291
292         return len;
293 }
294
295 static int
296 parse_portmask(const char *portmask)
297 {
298         char *end = NULL;
299         unsigned long pm;
300
301         /* Parse hexadecimal string */
302         pm = strtoul(portmask, &end, 16);
303         if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
304                 return 0;
305
306         return pm;
307 }
308
309 static int
310 parse_config(const char *q_arg)
311 {
312         enum fieldnames { FLD_PORT = 0, FLD_QUEUE, FLD_LCORE, _NUM_FLD };
313         unsigned long int_fld[_NUM_FLD];
314         const char *p, *p0 = q_arg;
315         char *str_fld[_NUM_FLD];
316         uint32_t size;
317         char s[256];
318         char *end;
319         int i;
320
321         nb_lcore_params = 0;
322
323         while ((p = strchr(p0, '(')) != NULL) {
324                 ++p;
325                 p0 = strchr(p, ')');
326                 if (p0 == NULL)
327                         return -1;
328
329                 size = p0 - p;
330                 if (size >= sizeof(s))
331                         return -1;
332
333                 memcpy(s, p, size);
334                 s[size] = '\0';
335                 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') !=
336                     _NUM_FLD)
337                         return -1;
338                 for (i = 0; i < _NUM_FLD; i++) {
339                         errno = 0;
340                         int_fld[i] = strtoul(str_fld[i], &end, 0);
341                         if (errno != 0 || end == str_fld[i])
342                                 return -1;
343                 }
344
345                 if (nb_lcore_params >= MAX_LCORE_PARAMS) {
346                         printf("Exceeded max number of lcore params: %hu\n",
347                                nb_lcore_params);
348                         return -1;
349                 }
350
351                 if (int_fld[FLD_PORT] >= RTE_MAX_ETHPORTS ||
352                     int_fld[FLD_LCORE] >= RTE_MAX_LCORE) {
353                         printf("Invalid port/lcore id\n");
354                         return -1;
355                 }
356
357                 lcore_params_array[nb_lcore_params].port_id =
358                         (uint8_t)int_fld[FLD_PORT];
359                 lcore_params_array[nb_lcore_params].queue_id =
360                         (uint8_t)int_fld[FLD_QUEUE];
361                 lcore_params_array[nb_lcore_params].lcore_id =
362                         (uint8_t)int_fld[FLD_LCORE];
363                 ++nb_lcore_params;
364         }
365         lcore_params = lcore_params_array;
366
367         return 0;
368 }
369
370 static void
371 parse_eth_dest(const char *optarg)
372 {
373         uint8_t c, *dest, peer_addr[6];
374         uint16_t portid;
375         char *port_end;
376
377         errno = 0;
378         portid = strtoul(optarg, &port_end, 10);
379         if (errno != 0 || port_end == optarg || *port_end++ != ',')
380                 rte_exit(EXIT_FAILURE, "Invalid eth-dest: %s", optarg);
381         if (portid >= RTE_MAX_ETHPORTS)
382                 rte_exit(EXIT_FAILURE,
383                          "eth-dest: port %d >= RTE_MAX_ETHPORTS(%d)\n", portid,
384                          RTE_MAX_ETHPORTS);
385
386         if (cmdline_parse_etheraddr(NULL, port_end, &peer_addr,
387                                     sizeof(peer_addr)) < 0)
388                 rte_exit(EXIT_FAILURE, "Invalid ethernet address: %s\n",
389                          port_end);
390         dest = (uint8_t *)&dest_eth_addr[portid];
391         for (c = 0; c < 6; c++)
392                 dest[c] = peer_addr[c];
393         *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid];
394 }
395
396 #define MAX_JUMBO_PKT_LEN  9600
397 #define MEMPOOL_CACHE_SIZE 256
398
399 static const char short_options[] = "p:" /* portmask */
400                                     "P"  /* promiscuous */
401         ;
402
403 #define CMD_LINE_OPT_CONFIG        "config"
404 #define CMD_LINE_OPT_ETH_DEST      "eth-dest"
405 #define CMD_LINE_OPT_NO_NUMA       "no-numa"
406 #define CMD_LINE_OPT_MAX_PKT_LEN   "max-pkt-len"
407 #define CMD_LINE_OPT_PER_PORT_POOL "per-port-pool"
408 enum {
409         /* Long options mapped to a short option */
410
411         /* First long only option value must be >= 256, so that we won't
412          * conflict with short options
413          */
414         CMD_LINE_OPT_MIN_NUM = 256,
415         CMD_LINE_OPT_CONFIG_NUM,
416         CMD_LINE_OPT_ETH_DEST_NUM,
417         CMD_LINE_OPT_NO_NUMA_NUM,
418         CMD_LINE_OPT_MAX_PKT_LEN_NUM,
419         CMD_LINE_OPT_PARSE_PER_PORT_POOL,
420 };
421
422 static const struct option lgopts[] = {
423         {CMD_LINE_OPT_CONFIG, 1, 0, CMD_LINE_OPT_CONFIG_NUM},
424         {CMD_LINE_OPT_ETH_DEST, 1, 0, CMD_LINE_OPT_ETH_DEST_NUM},
425         {CMD_LINE_OPT_NO_NUMA, 0, 0, CMD_LINE_OPT_NO_NUMA_NUM},
426         {CMD_LINE_OPT_MAX_PKT_LEN, 1, 0, CMD_LINE_OPT_MAX_PKT_LEN_NUM},
427         {CMD_LINE_OPT_PER_PORT_POOL, 0, 0, CMD_LINE_OPT_PARSE_PER_PORT_POOL},
428         {NULL, 0, 0, 0},
429 };
430
431 /*
432  * This expression is used to calculate the number of mbufs needed
433  * depending on user input, taking  into account memory for rx and
434  * tx hardware rings, cache per lcore and mtable per port per lcore.
435  * RTE_MAX is used to ensure that NB_MBUF never goes below a minimum
436  * value of 8192
437  */
438 #define NB_MBUF(nports)                                                        \
439         RTE_MAX((nports * nb_rx_queue * nb_rxd +                               \
440                  nports * nb_lcores * RTE_GRAPH_BURST_SIZE +                   \
441                  nports * n_tx_queue * nb_txd +                                \
442                  nb_lcores * MEMPOOL_CACHE_SIZE), 8192u)
443
444 /* Parse the argument given in the command line of the application */
445 static int
446 parse_args(int argc, char **argv)
447 {
448         char *prgname = argv[0];
449         int option_index;
450         char **argvopt;
451         int opt, ret;
452
453         argvopt = argv;
454
455         /* Error or normal output strings. */
456         while ((opt = getopt_long(argc, argvopt, short_options, lgopts,
457                                   &option_index)) != EOF) {
458
459                 switch (opt) {
460                 /* Portmask */
461                 case 'p':
462                         enabled_port_mask = parse_portmask(optarg);
463                         if (enabled_port_mask == 0) {
464                                 fprintf(stderr, "Invalid portmask\n");
465                                 print_usage(prgname);
466                                 return -1;
467                         }
468                         break;
469
470                 case 'P':
471                         promiscuous_on = 1;
472                         break;
473
474                 /* Long options */
475                 case CMD_LINE_OPT_CONFIG_NUM:
476                         ret = parse_config(optarg);
477                         if (ret) {
478                                 fprintf(stderr, "Invalid config\n");
479                                 print_usage(prgname);
480                                 return -1;
481                         }
482                         break;
483
484                 case CMD_LINE_OPT_ETH_DEST_NUM:
485                         parse_eth_dest(optarg);
486                         break;
487
488                 case CMD_LINE_OPT_NO_NUMA_NUM:
489                         numa_on = 0;
490                         break;
491
492                 case CMD_LINE_OPT_MAX_PKT_LEN_NUM: {
493                         max_pkt_len = parse_max_pkt_len(optarg);
494                         break;
495                 }
496
497                 case CMD_LINE_OPT_PARSE_PER_PORT_POOL:
498                         printf("Per port buffer pool is enabled\n");
499                         per_port_pool = 1;
500                         break;
501
502                 default:
503                         print_usage(prgname);
504                         return -1;
505                 }
506         }
507
508         if (optind >= 0)
509                 argv[optind - 1] = prgname;
510         ret = optind - 1;
511         optind = 1; /* Reset getopt lib */
512
513         return ret;
514 }
515
516 static void
517 print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr)
518 {
519         char buf[RTE_ETHER_ADDR_FMT_SIZE];
520         rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr);
521         printf("%s%s", name, buf);
522 }
523
524 static int
525 init_mem(uint16_t portid, uint32_t nb_mbuf)
526 {
527         uint32_t lcore_id;
528         int socketid;
529         char s[64];
530
531         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
532                 if (rte_lcore_is_enabled(lcore_id) == 0)
533                         continue;
534
535                 if (numa_on)
536                         socketid = rte_lcore_to_socket_id(lcore_id);
537                 else
538                         socketid = 0;
539
540                 if (socketid >= NB_SOCKETS) {
541                         rte_exit(EXIT_FAILURE,
542                                  "Socket %d of lcore %u is out of range %d\n",
543                                  socketid, lcore_id, NB_SOCKETS);
544                 }
545
546                 if (pktmbuf_pool[portid][socketid] == NULL) {
547                         snprintf(s, sizeof(s), "mbuf_pool_%d:%d", portid,
548                                  socketid);
549                         /* Create a pool with priv size of a cacheline */
550                         pktmbuf_pool[portid][socketid] =
551                                 rte_pktmbuf_pool_create(
552                                         s, nb_mbuf, MEMPOOL_CACHE_SIZE,
553                                         RTE_CACHE_LINE_SIZE,
554                                         RTE_MBUF_DEFAULT_BUF_SIZE, socketid);
555                         if (pktmbuf_pool[portid][socketid] == NULL)
556                                 rte_exit(EXIT_FAILURE,
557                                          "Cannot init mbuf pool on socket %d\n",
558                                          socketid);
559                         else
560                                 printf("Allocated mbuf pool on socket %d\n",
561                                        socketid);
562                 }
563         }
564
565         return 0;
566 }
567
568 /* Check the link status of all ports in up to 9s, and print them finally */
569 static void
570 check_all_ports_link_status(uint32_t port_mask)
571 {
572 #define CHECK_INTERVAL 100 /* 100ms */
573 #define MAX_CHECK_TIME 90  /* 9s (90 * 100ms) in total */
574         uint8_t count, all_ports_up, print_flag = 0;
575         struct rte_eth_link link;
576         uint16_t portid;
577         int ret;
578         char link_status_text[RTE_ETH_LINK_MAX_STR_LEN];
579
580         printf("\nChecking link status");
581         fflush(stdout);
582         for (count = 0; count <= MAX_CHECK_TIME; count++) {
583                 if (force_quit)
584                         return;
585                 all_ports_up = 1;
586                 RTE_ETH_FOREACH_DEV(portid)
587                 {
588                         if (force_quit)
589                                 return;
590                         if ((port_mask & (1 << portid)) == 0)
591                                 continue;
592                         memset(&link, 0, sizeof(link));
593                         ret = rte_eth_link_get_nowait(portid, &link);
594                         if (ret < 0) {
595                                 all_ports_up = 0;
596                                 if (print_flag == 1)
597                                         printf("Port %u link get failed: %s\n",
598                                                 portid, rte_strerror(-ret));
599                                 continue;
600                         }
601                         /* Print link status if flag set */
602                         if (print_flag == 1) {
603                                 rte_eth_link_to_str(link_status_text,
604                                         sizeof(link_status_text), &link);
605                                 printf("Port %d %s\n", portid,
606                                        link_status_text);
607                                 continue;
608                         }
609                         /* Clear all_ports_up flag if any link down */
610                         if (link.link_status == ETH_LINK_DOWN) {
611                                 all_ports_up = 0;
612                                 break;
613                         }
614                 }
615                 /* After finally printing all link status, get out */
616                 if (print_flag == 1)
617                         break;
618
619                 if (all_ports_up == 0) {
620                         printf(".");
621                         fflush(stdout);
622                         rte_delay_ms(CHECK_INTERVAL);
623                 }
624
625                 /* Set the print_flag if all ports up or timeout */
626                 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
627                         print_flag = 1;
628                         printf("Done\n");
629                 }
630         }
631 }
632
633 static void
634 signal_handler(int signum)
635 {
636         if (signum == SIGINT || signum == SIGTERM) {
637                 printf("\n\nSignal %d received, preparing to exit...\n",
638                        signum);
639                 force_quit = true;
640         }
641 }
642
643 static void
644 print_stats(void)
645 {
646         const char topLeft[] = {27, '[', '1', ';', '1', 'H', '\0'};
647         const char clr[] = {27, '[', '2', 'J', '\0'};
648         struct rte_graph_cluster_stats_param s_param;
649         struct rte_graph_cluster_stats *stats;
650         const char *pattern = "worker_*";
651
652         /* Prepare stats object */
653         memset(&s_param, 0, sizeof(s_param));
654         s_param.f = stdout;
655         s_param.socket_id = SOCKET_ID_ANY;
656         s_param.graph_patterns = &pattern;
657         s_param.nb_graph_patterns = 1;
658
659         stats = rte_graph_cluster_stats_create(&s_param);
660         if (stats == NULL)
661                 rte_exit(EXIT_FAILURE, "Unable to create stats object\n");
662
663         while (!force_quit) {
664                 /* Clear screen and move to top left */
665                 printf("%s%s", clr, topLeft);
666                 rte_graph_cluster_stats_get(stats, 0);
667                 rte_delay_ms(1E3);
668         }
669
670         rte_graph_cluster_stats_destroy(stats);
671 }
672
673 /* Main processing loop. 8< */
674 static int
675 graph_main_loop(void *conf)
676 {
677         struct lcore_conf *qconf;
678         struct rte_graph *graph;
679         uint32_t lcore_id;
680
681         RTE_SET_USED(conf);
682
683         lcore_id = rte_lcore_id();
684         qconf = &lcore_conf[lcore_id];
685         graph = qconf->graph;
686
687         if (!graph) {
688                 RTE_LOG(INFO, L3FWD_GRAPH, "Lcore %u has nothing to do\n",
689                         lcore_id);
690                 return 0;
691         }
692
693         RTE_LOG(INFO, L3FWD_GRAPH,
694                 "Entering main loop on lcore %u, graph %s(%p)\n", lcore_id,
695                 qconf->name, graph);
696
697         while (likely(!force_quit))
698                 rte_graph_walk(graph);
699
700         return 0;
701 }
702 /* >8 End of main processing loop. */
703
704 static uint32_t
705 eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu)
706 {
707         uint32_t overhead_len;
708
709         if (max_mtu != UINT16_MAX && max_rx_pktlen > max_mtu)
710                 overhead_len = max_rx_pktlen - max_mtu;
711         else
712                 overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
713
714         return overhead_len;
715 }
716
717 static int
718 config_port_max_pkt_len(struct rte_eth_conf *conf,
719                 struct rte_eth_dev_info *dev_info)
720 {
721         uint32_t overhead_len;
722
723         if (max_pkt_len == 0)
724                 return 0;
725
726         if (max_pkt_len < RTE_ETHER_MIN_LEN || max_pkt_len > MAX_JUMBO_PKT_LEN)
727                 return -1;
728
729         overhead_len = eth_dev_get_overhead_len(dev_info->max_rx_pktlen,
730                         dev_info->max_mtu);
731         conf->rxmode.mtu = max_pkt_len - overhead_len;
732
733         if (conf->rxmode.mtu > RTE_ETHER_MTU) {
734                 conf->txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS;
735                 conf->rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
736         }
737
738         return 0;
739 }
740
741 int
742 main(int argc, char **argv)
743 {
744         /* Rewrite data of src and dst ether addr */
745         uint8_t rewrite_data[2 * sizeof(struct rte_ether_addr)];
746         /* Graph initialization. 8< */
747         static const char * const default_patterns[] = {
748                 "ip4*",
749                 "ethdev_tx-*",
750                 "pkt_drop",
751         };
752         uint8_t nb_rx_queue, queue, socketid;
753         struct rte_graph_param graph_conf;
754         struct rte_eth_dev_info dev_info;
755         uint32_t nb_ports, nb_conf = 0;
756         uint32_t n_tx_queue, nb_lcores;
757         struct rte_eth_txconf *txconf;
758         uint16_t queueid, portid, i;
759         const char **node_patterns;
760         struct lcore_conf *qconf;
761         uint16_t nb_graphs = 0;
762         uint16_t nb_patterns;
763         uint8_t rewrite_len;
764         uint32_t lcore_id;
765         int ret;
766
767         /* Init EAL */
768         ret = rte_eal_init(argc, argv);
769         if (ret < 0)
770                 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
771         argc -= ret;
772         argv += ret;
773
774         force_quit = false;
775         signal(SIGINT, signal_handler);
776         signal(SIGTERM, signal_handler);
777
778         /* Pre-init dst MACs for all ports to 02:00:00:00:00:xx */
779         for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
780                 dest_eth_addr[portid] =
781                         RTE_ETHER_LOCAL_ADMIN_ADDR + ((uint64_t)portid << 40);
782                 *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid];
783         }
784
785         /* Parse application arguments (after the EAL ones) */
786         ret = parse_args(argc, argv);
787         if (ret < 0)
788                 rte_exit(EXIT_FAILURE, "Invalid L3FWD_GRAPH parameters\n");
789
790         if (check_lcore_params() < 0)
791                 rte_exit(EXIT_FAILURE, "check_lcore_params() failed\n");
792
793         ret = init_lcore_rx_queues();
794         if (ret < 0)
795                 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues() failed\n");
796
797         if (check_port_config() < 0)
798                 rte_exit(EXIT_FAILURE, "check_port_config() failed\n");
799
800         nb_ports = rte_eth_dev_count_avail();
801         nb_lcores = rte_lcore_count();
802
803         /* Initialize all ports. 8< */
804         RTE_ETH_FOREACH_DEV(portid)
805         {
806                 struct rte_eth_conf local_port_conf = port_conf;
807
808                 /* Skip ports that are not enabled */
809                 if ((enabled_port_mask & (1 << portid)) == 0) {
810                         printf("\nSkipping disabled port %d\n", portid);
811                         continue;
812                 }
813
814                 /* Init port */
815                 printf("Initializing port %d ... ", portid);
816                 fflush(stdout);
817
818                 nb_rx_queue = get_port_n_rx_queues(portid);
819                 n_tx_queue = nb_lcores;
820                 if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
821                         n_tx_queue = MAX_TX_QUEUE_PER_PORT;
822                 printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
823                        nb_rx_queue, n_tx_queue);
824
825                 rte_eth_dev_info_get(portid, &dev_info);
826
827                 ret = config_port_max_pkt_len(&local_port_conf, &dev_info);
828                 if (ret != 0)
829                         rte_exit(EXIT_FAILURE,
830                                 "Invalid max packet length: %u (port %u)\n",
831                                 max_pkt_len, portid);
832
833                 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
834                         local_port_conf.txmode.offloads |=
835                                 DEV_TX_OFFLOAD_MBUF_FAST_FREE;
836
837                 local_port_conf.rx_adv_conf.rss_conf.rss_hf &=
838                         dev_info.flow_type_rss_offloads;
839                 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf !=
840                     port_conf.rx_adv_conf.rss_conf.rss_hf) {
841                         printf("Port %u modified RSS hash function based on "
842                                "hardware support,"
843                                "requested:%#" PRIx64 " configured:%#" PRIx64
844                                "\n",
845                                portid, port_conf.rx_adv_conf.rss_conf.rss_hf,
846                                local_port_conf.rx_adv_conf.rss_conf.rss_hf);
847                 }
848
849                 ret = rte_eth_dev_configure(portid, nb_rx_queue,
850                                             n_tx_queue, &local_port_conf);
851                 if (ret < 0)
852                         rte_exit(EXIT_FAILURE,
853                                  "Cannot configure device: err=%d, port=%d\n",
854                                  ret, portid);
855
856                 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd,
857                                                        &nb_txd);
858                 if (ret < 0)
859                         rte_exit(EXIT_FAILURE,
860                                  "Cannot adjust number of descriptors: err=%d, "
861                                  "port=%d\n",
862                                  ret, portid);
863
864                 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
865                 print_ethaddr(" Address:", &ports_eth_addr[portid]);
866                 printf(", ");
867                 print_ethaddr(
868                         "Destination:",
869                         (const struct rte_ether_addr *)&dest_eth_addr[portid]);
870                 printf(", ");
871
872                 /*
873                  * prepare src MACs for each port.
874                  */
875                 rte_ether_addr_copy(
876                         &ports_eth_addr[portid],
877                         (struct rte_ether_addr *)(val_eth + portid) + 1);
878
879                 /* Init memory */
880                 if (!per_port_pool) {
881                         /* portid = 0; this is *not* signifying the first port,
882                          * rather, it signifies that portid is ignored.
883                          */
884                         ret = init_mem(0, NB_MBUF(nb_ports));
885                 } else {
886                         ret = init_mem(portid, NB_MBUF(1));
887                 }
888                 if (ret < 0)
889                         rte_exit(EXIT_FAILURE, "init_mem() failed\n");
890
891                 /* Init one TX queue per couple (lcore,port) */
892                 queueid = 0;
893                 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
894                         if (rte_lcore_is_enabled(lcore_id) == 0)
895                                 continue;
896
897                         qconf = &lcore_conf[lcore_id];
898
899                         if (numa_on)
900                                 socketid = (uint8_t)rte_lcore_to_socket_id(
901                                         lcore_id);
902                         else
903                                 socketid = 0;
904
905                         printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
906                         fflush(stdout);
907
908                         txconf = &dev_info.default_txconf;
909                         txconf->offloads = local_port_conf.txmode.offloads;
910                         ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
911                                                      socketid, txconf);
912                         if (ret < 0)
913                                 rte_exit(EXIT_FAILURE,
914                                          "rte_eth_tx_queue_setup: err=%d, "
915                                          "port=%d\n",
916                                          ret, portid);
917                         queueid++;
918                 }
919
920                 /* Setup ethdev node config */
921                 ethdev_conf[nb_conf].port_id = portid;
922                 ethdev_conf[nb_conf].num_rx_queues = nb_rx_queue;
923                 ethdev_conf[nb_conf].num_tx_queues = n_tx_queue;
924                 if (!per_port_pool)
925                         ethdev_conf[nb_conf].mp = pktmbuf_pool[0];
926
927                 else
928                         ethdev_conf[nb_conf].mp = pktmbuf_pool[portid];
929                 ethdev_conf[nb_conf].mp_count = NB_SOCKETS;
930
931                 nb_conf++;
932                 printf("\n");
933         }
934
935         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
936                 if (rte_lcore_is_enabled(lcore_id) == 0)
937                         continue;
938                 qconf = &lcore_conf[lcore_id];
939                 printf("\nInitializing rx queues on lcore %u ... ", lcore_id);
940                 fflush(stdout);
941                 /* Init RX queues */
942                 for (queue = 0; queue < qconf->n_rx_queue; ++queue) {
943                         struct rte_eth_rxconf rxq_conf;
944
945                         portid = qconf->rx_queue_list[queue].port_id;
946                         queueid = qconf->rx_queue_list[queue].queue_id;
947
948                         if (numa_on)
949                                 socketid = (uint8_t)rte_lcore_to_socket_id(
950                                         lcore_id);
951                         else
952                                 socketid = 0;
953
954                         printf("rxq=%d,%d,%d ", portid, queueid, socketid);
955                         fflush(stdout);
956
957                         rte_eth_dev_info_get(portid, &dev_info);
958                         rxq_conf = dev_info.default_rxconf;
959                         rxq_conf.offloads = port_conf.rxmode.offloads;
960                         if (!per_port_pool)
961                                 ret = rte_eth_rx_queue_setup(
962                                         portid, queueid, nb_rxd, socketid,
963                                         &rxq_conf, pktmbuf_pool[0][socketid]);
964                         else
965                                 ret = rte_eth_rx_queue_setup(
966                                         portid, queueid, nb_rxd, socketid,
967                                         &rxq_conf,
968                                         pktmbuf_pool[portid][socketid]);
969                         if (ret < 0)
970                                 rte_exit(EXIT_FAILURE,
971                                          "rte_eth_rx_queue_setup: err=%d, "
972                                          "port=%d\n",
973                                          ret, portid);
974
975                         /* Add this queue node to its graph */
976                         snprintf(qconf->rx_queue_list[queue].node_name,
977                                  RTE_NODE_NAMESIZE, "ethdev_rx-%u-%u", portid,
978                                  queueid);
979                 }
980
981                 /* Alloc a graph to this lcore only if source exists  */
982                 if (qconf->n_rx_queue)
983                         nb_graphs++;
984         }
985
986         printf("\n");
987
988         /* Ethdev node config, skip rx queue mapping */
989         ret = rte_node_eth_config(ethdev_conf, nb_conf, nb_graphs);
990         /* >8 End of graph creation. */
991         if (ret)
992                 rte_exit(EXIT_FAILURE, "rte_node_eth_config: err=%d\n", ret);
993
994         /* Start ports */
995         RTE_ETH_FOREACH_DEV(portid)
996         {
997                 if ((enabled_port_mask & (1 << portid)) == 0)
998                         continue;
999
1000                 /* Start device */
1001                 ret = rte_eth_dev_start(portid);
1002                 if (ret < 0)
1003                         rte_exit(EXIT_FAILURE,
1004                                  "rte_eth_dev_start: err=%d, port=%d\n", ret,
1005                                  portid);
1006
1007                 /*
1008                  * If enabled, put device in promiscuous mode.
1009                  * This allows IO forwarding mode to forward packets
1010                  * to itself through 2 cross-connected  ports of the
1011                  * target machine.
1012                  */
1013                 if (promiscuous_on)
1014                         rte_eth_promiscuous_enable(portid);
1015         }
1016
1017         printf("\n");
1018
1019         check_all_ports_link_status(enabled_port_mask);
1020
1021         /* Graph Initialization */
1022         nb_patterns = RTE_DIM(default_patterns);
1023         node_patterns = malloc((MAX_RX_QUEUE_PER_LCORE + nb_patterns) *
1024                                sizeof(*node_patterns));
1025         if (!node_patterns)
1026                 return -ENOMEM;
1027         memcpy(node_patterns, default_patterns,
1028                nb_patterns * sizeof(*node_patterns));
1029
1030         memset(&graph_conf, 0, sizeof(graph_conf));
1031         graph_conf.node_patterns = node_patterns;
1032
1033         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1034                 rte_graph_t graph_id;
1035                 rte_edge_t i;
1036
1037                 if (rte_lcore_is_enabled(lcore_id) == 0)
1038                         continue;
1039
1040                 qconf = &lcore_conf[lcore_id];
1041
1042                 /* Skip graph creation if no source exists */
1043                 if (!qconf->n_rx_queue)
1044                         continue;
1045
1046                 /* Add rx node patterns of this lcore */
1047                 for (i = 0; i < qconf->n_rx_queue; i++) {
1048                         graph_conf.node_patterns[nb_patterns + i] =
1049                                 qconf->rx_queue_list[i].node_name;
1050                 }
1051
1052                 graph_conf.nb_node_patterns = nb_patterns + i;
1053                 graph_conf.socket_id = rte_lcore_to_socket_id(lcore_id);
1054
1055                 snprintf(qconf->name, sizeof(qconf->name), "worker_%u",
1056                          lcore_id);
1057
1058                 graph_id = rte_graph_create(qconf->name, &graph_conf);
1059                 if (graph_id == RTE_GRAPH_ID_INVALID)
1060                         rte_exit(EXIT_FAILURE,
1061                                  "rte_graph_create(): graph_id invalid"
1062                                  " for lcore %u\n", lcore_id);
1063
1064                 qconf->graph_id = graph_id;
1065                 qconf->graph = rte_graph_lookup(qconf->name);
1066                 /* >8 End of graph initialization. */
1067                 if (!qconf->graph)
1068                         rte_exit(EXIT_FAILURE,
1069                                  "rte_graph_lookup(): graph %s not found\n",
1070                                  qconf->name);
1071         }
1072
1073         memset(&rewrite_data, 0, sizeof(rewrite_data));
1074         rewrite_len = sizeof(rewrite_data);
1075
1076         /* Add route to ip4 graph infra. 8< */
1077         for (i = 0; i < IPV4_L3FWD_LPM_NUM_ROUTES; i++) {
1078                 char route_str[INET6_ADDRSTRLEN * 4];
1079                 char abuf[INET6_ADDRSTRLEN];
1080                 struct in_addr in;
1081                 uint32_t dst_port;
1082
1083                 /* Skip unused ports */
1084                 if ((1 << ipv4_l3fwd_lpm_route_array[i].if_out &
1085                      enabled_port_mask) == 0)
1086                         continue;
1087
1088                 dst_port = ipv4_l3fwd_lpm_route_array[i].if_out;
1089
1090                 in.s_addr = htonl(ipv4_l3fwd_lpm_route_array[i].ip);
1091                 snprintf(route_str, sizeof(route_str), "%s / %d (%d)",
1092                          inet_ntop(AF_INET, &in, abuf, sizeof(abuf)),
1093                          ipv4_l3fwd_lpm_route_array[i].depth,
1094                          ipv4_l3fwd_lpm_route_array[i].if_out);
1095
1096                 /* Use route index 'i' as next hop id */
1097                 ret = rte_node_ip4_route_add(
1098                         ipv4_l3fwd_lpm_route_array[i].ip,
1099                         ipv4_l3fwd_lpm_route_array[i].depth, i,
1100                         RTE_NODE_IP4_LOOKUP_NEXT_REWRITE);
1101
1102                 if (ret < 0)
1103                         rte_exit(EXIT_FAILURE,
1104                                  "Unable to add ip4 route %s to graph\n",
1105                                  route_str);
1106
1107                 memcpy(rewrite_data, val_eth + dst_port, rewrite_len);
1108
1109                 /* Add next hop rewrite data for id 'i' */
1110                 ret = rte_node_ip4_rewrite_add(i, rewrite_data,
1111                                                rewrite_len, dst_port);
1112                 if (ret < 0)
1113                         rte_exit(EXIT_FAILURE,
1114                                  "Unable to add next hop %u for "
1115                                  "route %s\n", i, route_str);
1116
1117                 RTE_LOG(INFO, L3FWD_GRAPH, "Added route %s, next_hop %u\n",
1118                         route_str, i);
1119         }
1120         /* >8 End of adding route to ip4 graph infa. */
1121
1122         /* Launch per-lcore init on every worker lcore */
1123         rte_eal_mp_remote_launch(graph_main_loop, NULL, SKIP_MAIN);
1124
1125         /* Accumulate and print stats on main until exit */
1126         if (rte_graph_has_stats_feature())
1127                 print_stats();
1128
1129         /* Wait for worker cores to exit */
1130         ret = 0;
1131         RTE_LCORE_FOREACH_WORKER(lcore_id) {
1132                 ret = rte_eal_wait_lcore(lcore_id);
1133                 /* Destroy graph */
1134                 if (ret < 0 || rte_graph_destroy(
1135                         rte_graph_from_name(lcore_conf[lcore_id].name))) {
1136                         ret = -1;
1137                         break;
1138                 }
1139         }
1140         free(node_patterns);
1141
1142         /* Stop ports */
1143         RTE_ETH_FOREACH_DEV(portid) {
1144                 if ((enabled_port_mask & (1 << portid)) == 0)
1145                         continue;
1146                 printf("Closing port %d...", portid);
1147                 ret = rte_eth_dev_stop(portid);
1148                 if (ret != 0)
1149                         printf("Failed to stop port %u: %s\n",
1150                                portid, rte_strerror(-ret));
1151                 rte_eth_dev_close(portid);
1152                 printf(" Done\n");
1153         }
1154
1155         /* clean up the EAL */
1156         rte_eal_cleanup();
1157         printf("Bye...\n");
1158
1159         return ret;
1160 }