app/flow-perf: add packet forwarding support
[dpdk.git] / app / test-flow-perf / main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  *
4  * This file contain the application main file
5  * This application provides the user the ability to test the
6  * insertion rate for specific rte_flow rule under stress state ~4M rule/
7  *
8  * Then it will also provide packet per second measurement after installing
9  * all rules, the user may send traffic to test the PPS that match the rules
10  * after all rules are installed, to check performance or functionality after
11  * the stress.
12  *
13  * The flows insertion will go for all ports first, then it will print the
14  * results, after that the application will go into forwarding packets mode
15  * it will start receiving traffic if any and then forwarding it back and
16  * gives packet per second measurement.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <stdint.h>
23 #include <inttypes.h>
24 #include <stdarg.h>
25 #include <errno.h>
26 #include <getopt.h>
27 #include <stdbool.h>
28 #include <sys/time.h>
29 #include <signal.h>
30 #include <unistd.h>
31
32 #include <rte_malloc.h>
33 #include <rte_mempool.h>
34 #include <rte_mbuf.h>
35 #include <rte_ethdev.h>
36 #include <rte_flow.h>
37
38 #include "config.h"
39 #include "flow_gen.h"
40
41 #define MAX_ITERATIONS             100
42 #define DEFAULT_RULES_COUNT    4000000
43 #define DEFAULT_ITERATION       100000
44
45 struct rte_flow *flow;
46 static uint8_t flow_group;
47
48 static uint64_t flow_items;
49 static uint64_t flow_actions;
50 static uint64_t flow_attrs;
51
52 static volatile bool force_quit;
53 static bool dump_iterations;
54 static bool delete_flag;
55 static bool dump_socket_mem_flag;
56 static bool enable_fwd;
57
58 static struct rte_mempool *mbuf_mp;
59 static uint32_t nb_lcores;
60 static uint32_t flows_count;
61 static uint32_t iterations_number;
62 static uint32_t hairpinq;
63 static uint32_t nb_lcores;
64
65 #define MAX_PKT_BURST    32
66 #define LCORE_MODE_PKT    1
67 #define LCORE_MODE_STATS  2
68 #define MAX_STREAMS      64
69 #define MAX_LCORES       64
70
71 struct stream {
72         int tx_port;
73         int tx_queue;
74         int rx_port;
75         int rx_queue;
76 };
77
78 struct lcore_info {
79         int mode;
80         int streams_nb;
81         struct stream streams[MAX_STREAMS];
82         /* stats */
83         uint64_t tx_pkts;
84         uint64_t tx_drops;
85         uint64_t rx_pkts;
86         struct rte_mbuf *pkts[MAX_PKT_BURST];
87 } __attribute__((__aligned__(64))); /* let it be cacheline aligned */
88
89 static struct lcore_info lcore_infos[MAX_LCORES];
90
91 static void
92 usage(char *progname)
93 {
94         printf("\nusage: %s\n", progname);
95         printf("\nControl configurations:\n");
96         printf("  --flows-count=N: to set the number of needed"
97                 " flows to insert, default is 4,000,000\n");
98         printf("  --dump-iterations: To print rates for each"
99                 " iteration\n");
100         printf("  --deletion-rate: Enable deletion rate"
101                 " calculations\n");
102         printf("  --dump-socket-mem: To dump all socket memory\n");
103         printf("  --enable-fwd: To enable packets forwarding"
104                 " after insertion\n");
105
106         printf("To set flow attributes:\n");
107         printf("  --ingress: set ingress attribute in flows\n");
108         printf("  --egress: set egress attribute in flows\n");
109         printf("  --transfer: set transfer attribute in flows\n");
110         printf("  --group=N: set group for all flows,"
111                 " default is 0\n");
112
113         printf("To set flow items:\n");
114         printf("  --ether: add ether layer in flow items\n");
115         printf("  --vlan: add vlan layer in flow items\n");
116         printf("  --ipv4: add ipv4 layer in flow items\n");
117         printf("  --ipv6: add ipv6 layer in flow items\n");
118         printf("  --tcp: add tcp layer in flow items\n");
119         printf("  --udp: add udp layer in flow items\n");
120         printf("  --vxlan: add vxlan layer in flow items\n");
121         printf("  --vxlan-gpe: add vxlan-gpe layer in flow items\n");
122         printf("  --gre: add gre layer in flow items\n");
123         printf("  --geneve: add geneve layer in flow items\n");
124         printf("  --gtp: add gtp layer in flow items\n");
125         printf("  --meta: add meta layer in flow items\n");
126         printf("  --tag: add tag layer in flow items\n");
127
128         printf("To set flow actions:\n");
129         printf("  --port-id: add port-id action in flow actions\n");
130         printf("  --rss: add rss action in flow actions\n");
131         printf("  --queue: add queue action in flow actions\n");
132         printf("  --jump: add jump action in flow actions\n");
133         printf("  --mark: add mark action in flow actions\n");
134         printf("  --count: add count action in flow actions\n");
135         printf("  --set-meta: add set meta action in flow actions\n");
136         printf("  --set-tag: add set tag action in flow actions\n");
137         printf("  --drop: add drop action in flow actions\n");
138         printf("  --hairpin-queue=N: add hairpin-queue action in flow actions\n");
139         printf("  --hairpin-rss=N: add hairping-rss action in flow actions\n");
140 }
141
142 static void
143 args_parse(int argc, char **argv)
144 {
145         char **argvopt;
146         int n, opt;
147         int opt_idx;
148         size_t i;
149
150         static const struct option_dict {
151                 const char *str;
152                 const uint64_t mask;
153                 uint64_t *bitmap;
154         } flow_options[] = {
155                 {
156                         .str = "ether",
157                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_ETH),
158                         .bitmap = &flow_items
159                 },
160                 {
161                         .str = "ipv4",
162                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_IPV4),
163                         .bitmap = &flow_items
164                 },
165                 {
166                         .str = "ipv6",
167                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_IPV6),
168                         .bitmap = &flow_items
169                 },
170                 {
171                         .str = "vlan",
172                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_VLAN),
173                         .bitmap = &flow_items
174                 },
175                 {
176                         .str = "tcp",
177                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_TCP),
178                         .bitmap = &flow_items
179                 },
180                 {
181                         .str = "udp",
182                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_UDP),
183                         .bitmap = &flow_items
184                 },
185                 {
186                         .str = "vxlan",
187                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_VXLAN),
188                         .bitmap = &flow_items
189                 },
190                 {
191                         .str = "vxlan-gpe",
192                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_VXLAN_GPE),
193                         .bitmap = &flow_items
194                 },
195                 {
196                         .str = "gre",
197                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_GRE),
198                         .bitmap = &flow_items
199                 },
200                 {
201                         .str = "geneve",
202                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_GENEVE),
203                         .bitmap = &flow_items
204                 },
205                 {
206                         .str = "gtp",
207                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_GTP),
208                         .bitmap = &flow_items
209                 },
210                 {
211                         .str = "meta",
212                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_META),
213                         .bitmap = &flow_items
214                 },
215                 {
216                         .str = "tag",
217                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_TAG),
218                         .bitmap = &flow_items
219                 },
220                 {
221                         .str = "ingress",
222                         .mask = INGRESS,
223                         .bitmap = &flow_attrs
224                 },
225                 {
226                         .str = "egress",
227                         .mask = EGRESS,
228                         .bitmap = &flow_attrs
229                 },
230                 {
231                         .str = "transfer",
232                         .mask = TRANSFER,
233                         .bitmap = &flow_attrs
234                 },
235                 {
236                         .str = "port-id",
237                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_PORT_ID),
238                         .bitmap = &flow_actions
239                 },
240                 {
241                         .str = "rss",
242                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_RSS),
243                         .bitmap = &flow_actions
244                 },
245                 {
246                         .str = "queue",
247                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_QUEUE),
248                         .bitmap = &flow_actions
249                 },
250                 {
251                         .str = "jump",
252                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_JUMP),
253                         .bitmap = &flow_actions
254                 },
255                 {
256                         .str = "mark",
257                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_MARK),
258                         .bitmap = &flow_actions
259                 },
260                 {
261                         .str = "count",
262                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_COUNT),
263                         .bitmap = &flow_actions
264                 },
265                 {
266                         .str = "set-meta",
267                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_SET_META),
268                         .bitmap = &flow_actions
269                 },
270                 {
271                         .str = "set-tag",
272                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_SET_TAG),
273                         .bitmap = &flow_actions
274                 },
275                 {
276                         .str = "drop",
277                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_DROP),
278                         .bitmap = &flow_actions
279                 }
280         };
281
282         static const struct option lgopts[] = {
283                 /* Control */
284                 { "help",                       0, 0, 0 },
285                 { "flows-count",                1, 0, 0 },
286                 { "dump-iterations",            0, 0, 0 },
287                 { "deletion-rate",              0, 0, 0 },
288                 { "dump-socket-mem",            0, 0, 0 },
289                 { "enable-fwd",                 0, 0, 0 },
290                 /* Attributes */
291                 { "ingress",                    0, 0, 0 },
292                 { "egress",                     0, 0, 0 },
293                 { "transfer",                   0, 0, 0 },
294                 { "group",                      1, 0, 0 },
295                 /* Items */
296                 { "ether",                      0, 0, 0 },
297                 { "vlan",                       0, 0, 0 },
298                 { "ipv4",                       0, 0, 0 },
299                 { "ipv6",                       0, 0, 0 },
300                 { "tcp",                        0, 0, 0 },
301                 { "udp",                        0, 0, 0 },
302                 { "vxlan",                      0, 0, 0 },
303                 { "vxlan-gpe",                  0, 0, 0 },
304                 { "gre",                        0, 0, 0 },
305                 { "geneve",                     0, 0, 0 },
306                 { "gtp",                        0, 0, 0 },
307                 { "meta",                       0, 0, 0 },
308                 { "tag",                        0, 0, 0 },
309                 /* Actions */
310                 { "port-id",                    0, 0, 0 },
311                 { "rss",                        0, 0, 0 },
312                 { "queue",                      0, 0, 0 },
313                 { "jump",                       0, 0, 0 },
314                 { "mark",                       0, 0, 0 },
315                 { "count",                      0, 0, 0 },
316                 { "set-meta",                   0, 0, 0 },
317                 { "set-tag",                    0, 0, 0 },
318                 { "drop",                       0, 0, 0 },
319                 { "hairpin-queue",              1, 0, 0 },
320                 { "hairpin-rss",                1, 0, 0 },
321         };
322
323         flow_items = 0;
324         flow_actions = 0;
325         flow_attrs = 0;
326         hairpinq = 0;
327         argvopt = argv;
328
329         printf(":: Flow -> ");
330         while ((opt = getopt_long(argc, argvopt, "",
331                                 lgopts, &opt_idx)) != EOF) {
332                 switch (opt) {
333                 case 0:
334                         if (strcmp(lgopts[opt_idx].name, "help") == 0) {
335                                 usage(argv[0]);
336                                 rte_exit(EXIT_SUCCESS, "Displayed help\n");
337                         }
338
339                         if (strcmp(lgopts[opt_idx].name, "group") == 0) {
340                                 n = atoi(optarg);
341                                 if (n >= 0)
342                                         flow_group = n;
343                                 else
344                                         rte_exit(EXIT_SUCCESS,
345                                                 "flow group should be >= 0\n");
346                                 printf("group %d ", flow_group);
347                         }
348
349                         for (i = 0; i < RTE_DIM(flow_options); i++)
350                                 if (strcmp(lgopts[opt_idx].name,
351                                                 flow_options[i].str) == 0) {
352                                         *flow_options[i].bitmap |=
353                                                 flow_options[i].mask;
354                                         printf("%s / ", flow_options[i].str);
355                                 }
356
357                         if (strcmp(lgopts[opt_idx].name,
358                                         "hairpin-rss") == 0) {
359                                 n = atoi(optarg);
360                                 if (n > 0)
361                                         hairpinq = n;
362                                 else
363                                         rte_exit(EXIT_SUCCESS,
364                                                 "Hairpin queues should be > 0\n");
365
366                                 flow_actions |= HAIRPIN_RSS_ACTION;
367                                 printf("hairpin-rss / ");
368                         }
369                         if (strcmp(lgopts[opt_idx].name,
370                                         "hairpin-queue") == 0) {
371                                 n = atoi(optarg);
372                                 if (n > 0)
373                                         hairpinq = n;
374                                 else
375                                         rte_exit(EXIT_SUCCESS,
376                                                 "Hairpin queues should be > 0\n");
377
378                                 flow_actions |= HAIRPIN_QUEUE_ACTION;
379                                 printf("hairpin-queue / ");
380                         }
381
382                         /* Control */
383                         if (strcmp(lgopts[opt_idx].name,
384                                         "flows-count") == 0) {
385                                 n = atoi(optarg);
386                                 if (n > (int) iterations_number)
387                                         flows_count = n;
388                                 else {
389                                         printf("\n\nflows_count should be > %d\n",
390                                                 iterations_number);
391                                         rte_exit(EXIT_SUCCESS, " ");
392                                 }
393                         }
394                         if (strcmp(lgopts[opt_idx].name,
395                                         "dump-iterations") == 0)
396                                 dump_iterations = true;
397                         if (strcmp(lgopts[opt_idx].name,
398                                         "deletion-rate") == 0)
399                                 delete_flag = true;
400                         if (strcmp(lgopts[opt_idx].name,
401                                         "dump-socket-mem") == 0)
402                                 dump_socket_mem_flag = true;
403                         if (strcmp(lgopts[opt_idx].name,
404                                         "enable-fwd") == 0)
405                                 enable_fwd = true;
406                         break;
407                 default:
408                         fprintf(stderr, "Invalid option: %s\n", argv[optind]);
409                         usage(argv[0]);
410                         rte_exit(EXIT_SUCCESS, "Invalid option\n");
411                         break;
412                 }
413         }
414         printf("end_flow\n");
415 }
416
417 /* Dump the socket memory statistics on console */
418 static size_t
419 dump_socket_mem(FILE *f)
420 {
421         struct rte_malloc_socket_stats socket_stats;
422         unsigned int i = 0;
423         size_t total = 0;
424         size_t alloc = 0;
425         size_t free = 0;
426         unsigned int n_alloc = 0;
427         unsigned int n_free = 0;
428         bool active_nodes = false;
429
430
431         for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
432                 if (rte_malloc_get_socket_stats(i, &socket_stats) ||
433                     !socket_stats.heap_totalsz_bytes)
434                         continue;
435                 active_nodes = true;
436                 total += socket_stats.heap_totalsz_bytes;
437                 alloc += socket_stats.heap_allocsz_bytes;
438                 free += socket_stats.heap_freesz_bytes;
439                 n_alloc += socket_stats.alloc_count;
440                 n_free += socket_stats.free_count;
441                 if (dump_socket_mem_flag) {
442                         fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
443                         fprintf(f,
444                                 "\nSocket %u:\nsize(M) total: %.6lf\nalloc:"
445                                 " %.6lf(%.3lf%%)\nfree: %.6lf"
446                                 "\nmax: %.6lf"
447                                 "\ncount alloc: %u\nfree: %u\n",
448                                 i,
449                                 socket_stats.heap_totalsz_bytes / 1.0e6,
450                                 socket_stats.heap_allocsz_bytes / 1.0e6,
451                                 (double)socket_stats.heap_allocsz_bytes * 100 /
452                                 (double)socket_stats.heap_totalsz_bytes,
453                                 socket_stats.heap_freesz_bytes / 1.0e6,
454                                 socket_stats.greatest_free_size / 1.0e6,
455                                 socket_stats.alloc_count,
456                                 socket_stats.free_count);
457                                 fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
458                 }
459         }
460         if (dump_socket_mem_flag && active_nodes) {
461                 fprintf(f,
462                         "\nTotal: size(M)\ntotal: %.6lf"
463                         "\nalloc: %.6lf(%.3lf%%)\nfree: %.6lf"
464                         "\ncount alloc: %u\nfree: %u\n",
465                         total / 1.0e6, alloc / 1.0e6,
466                         (double)alloc * 100 / (double)total, free / 1.0e6,
467                         n_alloc, n_free);
468                 fprintf(f, "::::::::::::::::::::::::::::::::::::::::\n");
469         }
470         return alloc;
471 }
472
473 static void
474 print_flow_error(struct rte_flow_error error)
475 {
476         printf("Flow can't be created %d message: %s\n",
477                 error.type,
478                 error.message ? error.message : "(no stated reason)");
479 }
480
481 static inline void
482 destroy_flows(int port_id, struct rte_flow **flow_list)
483 {
484         struct rte_flow_error error;
485         clock_t start_iter, end_iter;
486         double cpu_time_used = 0;
487         double flows_rate;
488         double cpu_time_per_iter[MAX_ITERATIONS];
489         double delta;
490         uint32_t i;
491         int iter_id;
492
493         for (i = 0; i < MAX_ITERATIONS; i++)
494                 cpu_time_per_iter[i] = -1;
495
496         if (iterations_number > flows_count)
497                 iterations_number = flows_count;
498
499         /* Deletion Rate */
500         printf("Flows Deletion on port = %d\n", port_id);
501         start_iter = clock();
502         for (i = 0; i < flows_count; i++) {
503                 if (flow_list[i] == 0)
504                         break;
505
506                 memset(&error, 0x33, sizeof(error));
507                 if (rte_flow_destroy(port_id, flow_list[i], &error)) {
508                         print_flow_error(error);
509                         rte_exit(EXIT_FAILURE, "Error in deleting flow");
510                 }
511
512                 if (i && !((i + 1) % iterations_number)) {
513                         /* Save the deletion rate of each iter */
514                         end_iter = clock();
515                         delta = (double) (end_iter - start_iter);
516                         iter_id = ((i + 1) / iterations_number) - 1;
517                         cpu_time_per_iter[iter_id] =
518                                 delta / CLOCKS_PER_SEC;
519                         cpu_time_used += cpu_time_per_iter[iter_id];
520                         start_iter = clock();
521                 }
522         }
523
524         /* Deletion rate per iteration */
525         if (dump_iterations)
526                 for (i = 0; i < MAX_ITERATIONS; i++) {
527                         if (cpu_time_per_iter[i] == -1)
528                                 continue;
529                         delta = (double)(iterations_number /
530                                 cpu_time_per_iter[i]);
531                         flows_rate = delta / 1000;
532                         printf(":: Iteration #%d: %d flows "
533                                 "in %f sec[ Rate = %f K/Sec ]\n",
534                                 i, iterations_number,
535                                 cpu_time_per_iter[i], flows_rate);
536                 }
537
538         /* Deletion rate for all flows */
539         flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
540         printf("\n:: Total flow deletion rate -> %f K/Sec\n",
541                 flows_rate);
542         printf(":: The time for deleting %d in flows %f seconds\n",
543                 flows_count, cpu_time_used);
544 }
545
546 static inline void
547 flows_handler(void)
548 {
549         struct rte_flow **flow_list;
550         struct rte_flow_error error;
551         clock_t start_iter, end_iter;
552         double cpu_time_used;
553         double flows_rate;
554         double cpu_time_per_iter[MAX_ITERATIONS];
555         double delta;
556         uint16_t nr_ports;
557         uint32_t i;
558         int port_id;
559         int iter_id;
560         uint32_t flow_index;
561
562         nr_ports = rte_eth_dev_count_avail();
563
564         for (i = 0; i < MAX_ITERATIONS; i++)
565                 cpu_time_per_iter[i] = -1;
566
567         if (iterations_number > flows_count)
568                 iterations_number = flows_count;
569
570         printf(":: Flows Count per port: %d\n", flows_count);
571
572         flow_list = rte_zmalloc("flow_list",
573                 (sizeof(struct rte_flow *) * flows_count) + 1, 0);
574         if (flow_list == NULL)
575                 rte_exit(EXIT_FAILURE, "No Memory available!");
576
577         for (port_id = 0; port_id < nr_ports; port_id++) {
578                 cpu_time_used = 0;
579                 flow_index = 0;
580                 if (flow_group > 0) {
581                         /*
582                          * Create global rule to jump into flow_group,
583                          * this way the app will avoid the default rules.
584                          *
585                          * Global rule:
586                          * group 0 eth / end actions jump group <flow_group>
587                          *
588                          */
589                         flow = generate_flow(port_id, 0, flow_attrs,
590                                 FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_ETH),
591                                 FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_JUMP),
592                                 flow_group, 0, 0, &error);
593
594                         if (flow == NULL) {
595                                 print_flow_error(error);
596                                 rte_exit(EXIT_FAILURE, "error in creating flow");
597                         }
598                         flow_list[flow_index++] = flow;
599                 }
600
601                 /* Insertion Rate */
602                 printf("Flows insertion on port = %d\n", port_id);
603                 start_iter = clock();
604                 for (i = 0; i < flows_count; i++) {
605                         flow = generate_flow(port_id, flow_group,
606                                 flow_attrs, flow_items, flow_actions,
607                                 JUMP_ACTION_TABLE, i, hairpinq, &error);
608
609                         if (force_quit)
610                                 i = flows_count;
611
612                         if (!flow) {
613                                 print_flow_error(error);
614                                 rte_exit(EXIT_FAILURE, "error in creating flow");
615                         }
616
617                         flow_list[flow_index++] = flow;
618
619                         if (i && !((i + 1) % iterations_number)) {
620                                 /* Save the insertion rate of each iter */
621                                 end_iter = clock();
622                                 delta = (double) (end_iter - start_iter);
623                                 iter_id = ((i + 1) / iterations_number) - 1;
624                                 cpu_time_per_iter[iter_id] =
625                                         delta / CLOCKS_PER_SEC;
626                                 cpu_time_used += cpu_time_per_iter[iter_id];
627                                 start_iter = clock();
628                         }
629                 }
630
631                 /* Iteration rate per iteration */
632                 if (dump_iterations)
633                         for (i = 0; i < MAX_ITERATIONS; i++) {
634                                 if (cpu_time_per_iter[i] == -1)
635                                         continue;
636                                 delta = (double)(iterations_number /
637                                         cpu_time_per_iter[i]);
638                                 flows_rate = delta / 1000;
639                                 printf(":: Iteration #%d: %d flows "
640                                         "in %f sec[ Rate = %f K/Sec ]\n",
641                                         i, iterations_number,
642                                         cpu_time_per_iter[i], flows_rate);
643                         }
644
645                 /* Insertion rate for all flows */
646                 flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
647                 printf("\n:: Total flow insertion rate -> %f K/Sec\n",
648                                                 flows_rate);
649                 printf(":: The time for creating %d in flows %f seconds\n",
650                                                 flows_count, cpu_time_used);
651
652                 if (delete_flag)
653                         destroy_flows(port_id, flow_list);
654         }
655 }
656
657 static void
658 signal_handler(int signum)
659 {
660         if (signum == SIGINT || signum == SIGTERM) {
661                 printf("\n\nSignal %d received, preparing to exit...\n",
662                                         signum);
663                 printf("Error: Stats are wrong due to sudden signal!\n\n");
664                 force_quit = true;
665         }
666 }
667
668 static inline uint16_t
669 do_rx(struct lcore_info *li, uint16_t rx_port, uint16_t rx_queue)
670 {
671         uint16_t cnt = 0;
672         cnt = rte_eth_rx_burst(rx_port, rx_queue, li->pkts, MAX_PKT_BURST);
673         li->rx_pkts += cnt;
674         return cnt;
675 }
676
677 static inline void
678 do_tx(struct lcore_info *li, uint16_t cnt, uint16_t tx_port,
679                         uint16_t tx_queue)
680 {
681         uint16_t nr_tx = 0;
682         uint16_t i;
683
684         nr_tx = rte_eth_tx_burst(tx_port, tx_queue, li->pkts, cnt);
685         li->tx_pkts  += nr_tx;
686         li->tx_drops += cnt - nr_tx;
687
688         for (i = nr_tx; i < cnt; i++)
689                 rte_pktmbuf_free(li->pkts[i]);
690 }
691
692 /*
693  * Method to convert numbers into pretty numbers that easy
694  * to read. The design here is to add comma after each three
695  * digits and set all of this inside buffer.
696  *
697  * For example if n = 1799321, the output will be
698  * 1,799,321 after this method which is easier to read.
699  */
700 static char *
701 pretty_number(uint64_t n, char *buf)
702 {
703         char p[6][4];
704         int i = 0;
705         int off = 0;
706
707         while (n > 1000) {
708                 sprintf(p[i], "%03d", (int)(n % 1000));
709                 n /= 1000;
710                 i += 1;
711         }
712
713         sprintf(p[i++], "%d", (int)n);
714
715         while (i--)
716                 off += sprintf(buf + off, "%s,", p[i]);
717         buf[strlen(buf) - 1] = '\0';
718
719         return buf;
720 }
721
722 static void
723 packet_per_second_stats(void)
724 {
725         struct lcore_info *old;
726         struct lcore_info *li, *oli;
727         int nr_lines = 0;
728         int i;
729
730         old = rte_zmalloc("old",
731                 sizeof(struct lcore_info) * MAX_LCORES, 0);
732         if (old == NULL)
733                 rte_exit(EXIT_FAILURE, "No Memory available!");
734
735         memcpy(old, lcore_infos,
736                 sizeof(struct lcore_info) * MAX_LCORES);
737
738         while (!force_quit) {
739                 uint64_t total_tx_pkts = 0;
740                 uint64_t total_rx_pkts = 0;
741                 uint64_t total_tx_drops = 0;
742                 uint64_t tx_delta, rx_delta, drops_delta;
743                 char buf[3][32];
744                 int nr_valid_core = 0;
745
746                 sleep(1);
747
748                 if (nr_lines) {
749                         char go_up_nr_lines[16];
750
751                         sprintf(go_up_nr_lines, "%c[%dA\r", 27, nr_lines);
752                         printf("%s\r", go_up_nr_lines);
753                 }
754
755                 printf("\n%6s %16s %16s %16s\n", "core", "tx", "tx drops", "rx");
756                 printf("%6s %16s %16s %16s\n", "------", "----------------",
757                         "----------------", "----------------");
758                 nr_lines = 3;
759                 for (i = 0; i < MAX_LCORES; i++) {
760                         li  = &lcore_infos[i];
761                         oli = &old[i];
762                         if (li->mode != LCORE_MODE_PKT)
763                                 continue;
764
765                         tx_delta    = li->tx_pkts  - oli->tx_pkts;
766                         rx_delta    = li->rx_pkts  - oli->rx_pkts;
767                         drops_delta = li->tx_drops - oli->tx_drops;
768                         printf("%6d %16s %16s %16s\n", i,
769                                 pretty_number(tx_delta,    buf[0]),
770                                 pretty_number(drops_delta, buf[1]),
771                                 pretty_number(rx_delta,    buf[2]));
772
773                         total_tx_pkts  += tx_delta;
774                         total_rx_pkts  += rx_delta;
775                         total_tx_drops += drops_delta;
776
777                         nr_valid_core++;
778                         nr_lines += 1;
779                 }
780
781                 if (nr_valid_core > 1) {
782                         printf("%6s %16s %16s %16s\n", "total",
783                                 pretty_number(total_tx_pkts,  buf[0]),
784                                 pretty_number(total_tx_drops, buf[1]),
785                                 pretty_number(total_rx_pkts,  buf[2]));
786                         nr_lines += 1;
787                 }
788
789                 memcpy(old, lcore_infos,
790                         sizeof(struct lcore_info) * MAX_LCORES);
791         }
792 }
793
794 static int
795 start_forwarding(void *data __rte_unused)
796 {
797         int lcore = rte_lcore_id();
798         int stream_id;
799         uint16_t cnt;
800         struct lcore_info *li = &lcore_infos[lcore];
801
802         if (!li->mode)
803                 return 0;
804
805         if (li->mode == LCORE_MODE_STATS) {
806                 printf(":: started stats on lcore %u\n", lcore);
807                 packet_per_second_stats();
808                 return 0;
809         }
810
811         while (!force_quit)
812                 for (stream_id = 0; stream_id < MAX_STREAMS; stream_id++) {
813                         if (li->streams[stream_id].rx_port == -1)
814                                 continue;
815
816                         cnt = do_rx(li,
817                                         li->streams[stream_id].rx_port,
818                                         li->streams[stream_id].rx_queue);
819                         if (cnt)
820                                 do_tx(li, cnt,
821                                         li->streams[stream_id].tx_port,
822                                         li->streams[stream_id].tx_queue);
823                 }
824         return 0;
825 }
826
827 static void
828 init_lcore_info(void)
829 {
830         int i, j;
831         unsigned int lcore;
832         uint16_t nr_port;
833         uint16_t queue;
834         int port;
835         int stream_id = 0;
836         int streams_per_core;
837         int unassigned_streams;
838         int nb_fwd_streams;
839         nr_port = rte_eth_dev_count_avail();
840
841         /* First logical core is reserved for stats printing */
842         lcore = rte_get_next_lcore(-1, 0, 0);
843         lcore_infos[lcore].mode = LCORE_MODE_STATS;
844
845         /*
846          * Initialize all cores
847          * All cores at first must have -1 value in all streams
848          * This means that this stream is not used, or not set
849          * yet.
850          */
851         for (i = 0; i < MAX_LCORES; i++)
852                 for (j = 0; j < MAX_STREAMS; j++) {
853                         lcore_infos[i].streams[j].tx_port = -1;
854                         lcore_infos[i].streams[j].rx_port = -1;
855                         lcore_infos[i].streams[j].tx_queue = -1;
856                         lcore_infos[i].streams[j].rx_queue = -1;
857                         lcore_infos[i].streams_nb = 0;
858                 }
859
860         /*
861          * Calculate the total streams count.
862          * Also distribute those streams count between the available
863          * logical cores except first core, since it's reserved for
864          * stats prints.
865          */
866         nb_fwd_streams = nr_port * RXQ_NUM;
867         if ((int)(nb_lcores - 1) >= nb_fwd_streams)
868                 for (i = 0; i < (int)(nb_lcores - 1); i++) {
869                         lcore = rte_get_next_lcore(lcore, 0, 0);
870                         lcore_infos[lcore].streams_nb = 1;
871                 }
872         else {
873                 streams_per_core = nb_fwd_streams / (nb_lcores - 1);
874                 unassigned_streams = nb_fwd_streams % (nb_lcores - 1);
875                 for (i = 0; i < (int)(nb_lcores - 1); i++) {
876                         lcore = rte_get_next_lcore(lcore, 0, 0);
877                         lcore_infos[lcore].streams_nb = streams_per_core;
878                         if (unassigned_streams) {
879                                 lcore_infos[lcore].streams_nb++;
880                                 unassigned_streams--;
881                         }
882                 }
883         }
884
885         /*
886          * Set the streams for the cores according to each logical
887          * core stream count.
888          * The streams is built on the design of what received should
889          * forward as well, this means that if you received packets on
890          * port 0 queue 0 then the same queue should forward the
891          * packets, using the same logical core.
892          */
893         lcore = rte_get_next_lcore(-1, 0, 0);
894         for (port = 0; port < nr_port; port++) {
895                 /* Create FWD stream */
896                 for (queue = 0; queue < RXQ_NUM; queue++) {
897                         if (!lcore_infos[lcore].streams_nb ||
898                                 !(stream_id % lcore_infos[lcore].streams_nb)) {
899                                 lcore = rte_get_next_lcore(lcore, 0, 0);
900                                 lcore_infos[lcore].mode = LCORE_MODE_PKT;
901                                 stream_id = 0;
902                         }
903                         lcore_infos[lcore].streams[stream_id].rx_queue = queue;
904                         lcore_infos[lcore].streams[stream_id].tx_queue = queue;
905                         lcore_infos[lcore].streams[stream_id].rx_port = port;
906                         lcore_infos[lcore].streams[stream_id].tx_port = port;
907                         stream_id++;
908                 }
909         }
910
911         /* Print all streams */
912         printf(":: Stream -> core id[N]: (rx_port, rx_queue)->(tx_port, tx_queue)\n");
913         for (i = 0; i < MAX_LCORES; i++)
914                 for (j = 0; j < MAX_STREAMS; j++) {
915                         /* No streams for this core */
916                         if (lcore_infos[i].streams[j].tx_port == -1)
917                                 break;
918                         printf("Stream -> core id[%d]: (%d,%d)->(%d,%d)\n",
919                                 i,
920                                 lcore_infos[i].streams[j].rx_port,
921                                 lcore_infos[i].streams[j].rx_queue,
922                                 lcore_infos[i].streams[j].tx_port,
923                                 lcore_infos[i].streams[j].tx_queue);
924                 }
925 }
926
927 static void
928 init_port(void)
929 {
930         int ret;
931         uint16_t std_queue;
932         uint16_t hairpin_q;
933         uint16_t port_id;
934         uint16_t nr_ports;
935         uint16_t nr_queues;
936         struct rte_eth_hairpin_conf hairpin_conf = {
937                 .peer_count = 1,
938         };
939         struct rte_eth_conf port_conf = {
940                 .rx_adv_conf = {
941                         .rss_conf.rss_hf =
942                                 GET_RSS_HF(),
943                 }
944         };
945         struct rte_eth_txconf txq_conf;
946         struct rte_eth_rxconf rxq_conf;
947         struct rte_eth_dev_info dev_info;
948
949         nr_queues = RXQ_NUM;
950         if (hairpinq != 0)
951                 nr_queues = RXQ_NUM + hairpinq;
952
953         nr_ports = rte_eth_dev_count_avail();
954         if (nr_ports == 0)
955                 rte_exit(EXIT_FAILURE, "Error: no port detected\n");
956
957         mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
958                                         TOTAL_MBUF_NUM, MBUF_CACHE_SIZE,
959                                         0, MBUF_SIZE,
960                                         rte_socket_id());
961         if (mbuf_mp == NULL)
962                 rte_exit(EXIT_FAILURE, "Error: can't init mbuf pool\n");
963
964         for (port_id = 0; port_id < nr_ports; port_id++) {
965                 ret = rte_eth_dev_info_get(port_id, &dev_info);
966                 if (ret != 0)
967                         rte_exit(EXIT_FAILURE,
968                                 "Error during getting device"
969                                 " (port %u) info: %s\n",
970                                 port_id, strerror(-ret));
971
972                 port_conf.txmode.offloads &= dev_info.tx_offload_capa;
973                 port_conf.rxmode.offloads &= dev_info.rx_offload_capa;
974
975                 printf(":: initializing port: %d\n", port_id);
976
977                 ret = rte_eth_dev_configure(port_id, nr_queues,
978                                 nr_queues, &port_conf);
979                 if (ret < 0)
980                         rte_exit(EXIT_FAILURE,
981                                 ":: cannot configure device: err=%d, port=%u\n",
982                                 ret, port_id);
983
984                 rxq_conf = dev_info.default_rxconf;
985                 for (std_queue = 0; std_queue < RXQ_NUM; std_queue++) {
986                         ret = rte_eth_rx_queue_setup(port_id, std_queue, NR_RXD,
987                                         rte_eth_dev_socket_id(port_id),
988                                         &rxq_conf,
989                                         mbuf_mp);
990                         if (ret < 0)
991                                 rte_exit(EXIT_FAILURE,
992                                         ":: Rx queue setup failed: err=%d, port=%u\n",
993                                         ret, port_id);
994                 }
995
996                 txq_conf = dev_info.default_txconf;
997                 for (std_queue = 0; std_queue < TXQ_NUM; std_queue++) {
998                         ret = rte_eth_tx_queue_setup(port_id, std_queue, NR_TXD,
999                                         rte_eth_dev_socket_id(port_id),
1000                                         &txq_conf);
1001                         if (ret < 0)
1002                                 rte_exit(EXIT_FAILURE,
1003                                         ":: Tx queue setup failed: err=%d, port=%u\n",
1004                                         ret, port_id);
1005                 }
1006
1007                 /* Catch all packets from traffic generator. */
1008                 ret = rte_eth_promiscuous_enable(port_id);
1009                 if (ret != 0)
1010                         rte_exit(EXIT_FAILURE,
1011                                 ":: promiscuous mode enable failed: err=%s, port=%u\n",
1012                                 rte_strerror(-ret), port_id);
1013
1014                 if (hairpinq != 0) {
1015                         for (hairpin_q = RXQ_NUM, std_queue = 0;
1016                                         std_queue < nr_queues;
1017                                         hairpin_q++, std_queue++) {
1018                                 hairpin_conf.peers[0].port = port_id;
1019                                 hairpin_conf.peers[0].queue =
1020                                         std_queue + TXQ_NUM;
1021                                 ret = rte_eth_rx_hairpin_queue_setup(
1022                                                 port_id, hairpin_q,
1023                                                 NR_RXD, &hairpin_conf);
1024                                 if (ret != 0)
1025                                         rte_exit(EXIT_FAILURE,
1026                                                 ":: Hairpin rx queue setup failed: err=%d, port=%u\n",
1027                                                 ret, port_id);
1028                         }
1029
1030                         for (hairpin_q = TXQ_NUM, std_queue = 0;
1031                                         std_queue < nr_queues;
1032                                         hairpin_q++, std_queue++) {
1033                                 hairpin_conf.peers[0].port = port_id;
1034                                 hairpin_conf.peers[0].queue =
1035                                         std_queue + RXQ_NUM;
1036                                 ret = rte_eth_tx_hairpin_queue_setup(
1037                                                 port_id, hairpin_q,
1038                                                 NR_TXD, &hairpin_conf);
1039                                 if (ret != 0)
1040                                         rte_exit(EXIT_FAILURE,
1041                                                 ":: Hairpin tx queue setup failed: err=%d, port=%u\n",
1042                                                 ret, port_id);
1043                         }
1044                 }
1045
1046                 ret = rte_eth_dev_start(port_id);
1047                 if (ret < 0)
1048                         rte_exit(EXIT_FAILURE,
1049                                 "rte_eth_dev_start:err=%d, port=%u\n",
1050                                 ret, port_id);
1051
1052                 printf(":: initializing port: %d done\n", port_id);
1053         }
1054 }
1055
1056 int
1057 main(int argc, char **argv)
1058 {
1059         int ret;
1060         uint16_t port;
1061         struct rte_flow_error error;
1062         int64_t alloc, last_alloc;
1063
1064         ret = rte_eal_init(argc, argv);
1065         if (ret < 0)
1066                 rte_exit(EXIT_FAILURE, "EAL init failed\n");
1067
1068         force_quit = false;
1069         dump_iterations = false;
1070         flows_count = DEFAULT_RULES_COUNT;
1071         iterations_number = DEFAULT_ITERATION;
1072         delete_flag = false;
1073         dump_socket_mem_flag = false;
1074         flow_group = 0;
1075
1076         signal(SIGINT, signal_handler);
1077         signal(SIGTERM, signal_handler);
1078
1079         argc -= ret;
1080         argv += ret;
1081         if (argc > 1)
1082                 args_parse(argc, argv);
1083
1084         init_port();
1085
1086         nb_lcores = rte_lcore_count();
1087         if (nb_lcores <= 1)
1088                 rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
1089
1090         last_alloc = (int64_t)dump_socket_mem(stdout);
1091         flows_handler();
1092         alloc = (int64_t)dump_socket_mem(stdout);
1093
1094         if (last_alloc)
1095                 fprintf(stdout, ":: Memory allocation change(M): %.6lf\n",
1096                 (alloc - last_alloc) / 1.0e6);
1097
1098         if (enable_fwd) {
1099                 init_lcore_info();
1100                 rte_eal_mp_remote_launch(start_forwarding, NULL, CALL_MASTER);
1101         }
1102
1103         RTE_ETH_FOREACH_DEV(port) {
1104                 rte_flow_flush(port, &error);
1105                 rte_eth_dev_stop(port);
1106                 rte_eth_dev_close(port);
1107         }
1108         return 0;
1109 }