test mbuf attach
[dpdk.git] / app / test-flow-perf / main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  *
4  * This file contain the application main file
5  * This application provides the user the ability to test the
6  * insertion rate for specific rte_flow rule under stress state ~4M rule/
7  *
8  * Then it will also provide packet per second measurement after installing
9  * all rules, the user may send traffic to test the PPS that match the rules
10  * after all rules are installed, to check performance or functionality after
11  * the stress.
12  *
13  * The flows insertion will go for all ports first, then it will print the
14  * results, after that the application will go into forwarding packets mode
15  * it will start receiving traffic if any and then forwarding it back and
16  * gives packet per second measurement.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <stdint.h>
23 #include <inttypes.h>
24 #include <stdarg.h>
25 #include <errno.h>
26 #include <getopt.h>
27 #include <stdbool.h>
28 #include <sys/time.h>
29 #include <signal.h>
30 #include <unistd.h>
31
32 #include <rte_malloc.h>
33 #include <rte_mempool.h>
34 #include <rte_mbuf.h>
35 #include <rte_ethdev.h>
36 #include <rte_flow.h>
37
38 #include "config.h"
39 #include "flow_gen.h"
40
41 #define MAX_ITERATIONS             100
42 #define DEFAULT_RULES_COUNT    4000000
43 #define DEFAULT_ITERATION       100000
44
45 struct rte_flow *flow;
46 static uint8_t flow_group;
47
48 static uint64_t flow_items;
49 static uint64_t flow_actions;
50 static uint64_t flow_attrs;
51
52 static volatile bool force_quit;
53 static bool dump_iterations;
54 static bool delete_flag;
55 static bool dump_socket_mem_flag;
56 static bool enable_fwd;
57
58 static struct rte_mempool *mbuf_mp;
59 static uint32_t nb_lcores;
60 static uint32_t flows_count;
61 static uint32_t iterations_number;
62 static uint32_t hairpin_queues_num; /* total hairpin q number - default: 0 */
63 static uint32_t nb_lcores;
64
65 #define MAX_PKT_BURST    32
66 #define LCORE_MODE_PKT    1
67 #define LCORE_MODE_STATS  2
68 #define MAX_STREAMS      64
69 #define MAX_LCORES       64
70
71 struct stream {
72         int tx_port;
73         int tx_queue;
74         int rx_port;
75         int rx_queue;
76 };
77
78 struct lcore_info {
79         int mode;
80         int streams_nb;
81         struct stream streams[MAX_STREAMS];
82         /* stats */
83         uint64_t tx_pkts;
84         uint64_t tx_drops;
85         uint64_t rx_pkts;
86         struct rte_mbuf *pkts[MAX_PKT_BURST];
87 } __rte_cache_aligned;
88
89 static struct lcore_info lcore_infos[MAX_LCORES];
90
91 static void
92 usage(char *progname)
93 {
94         printf("\nusage: %s\n", progname);
95         printf("\nControl configurations:\n");
96         printf("  --flows-count=N: to set the number of needed"
97                 " flows to insert, default is 4,000,000\n");
98         printf("  --dump-iterations: To print rates for each"
99                 " iteration\n");
100         printf("  --deletion-rate: Enable deletion rate"
101                 " calculations\n");
102         printf("  --dump-socket-mem: To dump all socket memory\n");
103         printf("  --enable-fwd: To enable packets forwarding"
104                 " after insertion\n");
105
106         printf("To set flow attributes:\n");
107         printf("  --ingress: set ingress attribute in flows\n");
108         printf("  --egress: set egress attribute in flows\n");
109         printf("  --transfer: set transfer attribute in flows\n");
110         printf("  --group=N: set group for all flows,"
111                 " default is 0\n");
112
113         printf("To set flow items:\n");
114         printf("  --ether: add ether layer in flow items\n");
115         printf("  --vlan: add vlan layer in flow items\n");
116         printf("  --ipv4: add ipv4 layer in flow items\n");
117         printf("  --ipv6: add ipv6 layer in flow items\n");
118         printf("  --tcp: add tcp layer in flow items\n");
119         printf("  --udp: add udp layer in flow items\n");
120         printf("  --vxlan: add vxlan layer in flow items\n");
121         printf("  --vxlan-gpe: add vxlan-gpe layer in flow items\n");
122         printf("  --gre: add gre layer in flow items\n");
123         printf("  --geneve: add geneve layer in flow items\n");
124         printf("  --gtp: add gtp layer in flow items\n");
125         printf("  --meta: add meta layer in flow items\n");
126         printf("  --tag: add tag layer in flow items\n");
127
128         printf("To set flow actions:\n");
129         printf("  --port-id: add port-id action in flow actions\n");
130         printf("  --rss: add rss action in flow actions\n");
131         printf("  --queue: add queue action in flow actions\n");
132         printf("  --jump: add jump action in flow actions\n");
133         printf("  --mark: add mark action in flow actions\n");
134         printf("  --count: add count action in flow actions\n");
135         printf("  --set-meta: add set meta action in flow actions\n");
136         printf("  --set-tag: add set tag action in flow actions\n");
137         printf("  --drop: add drop action in flow actions\n");
138         printf("  --hairpin-queue=N: add hairpin-queue action in flow actions\n");
139         printf("  --hairpin-rss=N: add hairpin-rss action in flow actions\n");
140 }
141
142 static void
143 args_parse(int argc, char **argv)
144 {
145         char **argvopt;
146         int n, opt;
147         int opt_idx;
148         size_t i;
149
150         static const struct option_dict {
151                 const char *str;
152                 const uint64_t mask;
153                 uint64_t *bitmap;
154         } flow_options[] = {
155                 {
156                         .str = "ether",
157                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_ETH),
158                         .bitmap = &flow_items
159                 },
160                 {
161                         .str = "ipv4",
162                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_IPV4),
163                         .bitmap = &flow_items
164                 },
165                 {
166                         .str = "ipv6",
167                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_IPV6),
168                         .bitmap = &flow_items
169                 },
170                 {
171                         .str = "vlan",
172                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_VLAN),
173                         .bitmap = &flow_items
174                 },
175                 {
176                         .str = "tcp",
177                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_TCP),
178                         .bitmap = &flow_items
179                 },
180                 {
181                         .str = "udp",
182                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_UDP),
183                         .bitmap = &flow_items
184                 },
185                 {
186                         .str = "vxlan",
187                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_VXLAN),
188                         .bitmap = &flow_items
189                 },
190                 {
191                         .str = "vxlan-gpe",
192                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_VXLAN_GPE),
193                         .bitmap = &flow_items
194                 },
195                 {
196                         .str = "gre",
197                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_GRE),
198                         .bitmap = &flow_items
199                 },
200                 {
201                         .str = "geneve",
202                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_GENEVE),
203                         .bitmap = &flow_items
204                 },
205                 {
206                         .str = "gtp",
207                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_GTP),
208                         .bitmap = &flow_items
209                 },
210                 {
211                         .str = "meta",
212                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_META),
213                         .bitmap = &flow_items
214                 },
215                 {
216                         .str = "tag",
217                         .mask = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_TAG),
218                         .bitmap = &flow_items
219                 },
220                 {
221                         .str = "ingress",
222                         .mask = INGRESS,
223                         .bitmap = &flow_attrs
224                 },
225                 {
226                         .str = "egress",
227                         .mask = EGRESS,
228                         .bitmap = &flow_attrs
229                 },
230                 {
231                         .str = "transfer",
232                         .mask = TRANSFER,
233                         .bitmap = &flow_attrs
234                 },
235                 {
236                         .str = "port-id",
237                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_PORT_ID),
238                         .bitmap = &flow_actions
239                 },
240                 {
241                         .str = "rss",
242                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_RSS),
243                         .bitmap = &flow_actions
244                 },
245                 {
246                         .str = "queue",
247                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_QUEUE),
248                         .bitmap = &flow_actions
249                 },
250                 {
251                         .str = "jump",
252                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_JUMP),
253                         .bitmap = &flow_actions
254                 },
255                 {
256                         .str = "mark",
257                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_MARK),
258                         .bitmap = &flow_actions
259                 },
260                 {
261                         .str = "count",
262                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_COUNT),
263                         .bitmap = &flow_actions
264                 },
265                 {
266                         .str = "set-meta",
267                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_SET_META),
268                         .bitmap = &flow_actions
269                 },
270                 {
271                         .str = "set-tag",
272                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_SET_TAG),
273                         .bitmap = &flow_actions
274                 },
275                 {
276                         .str = "drop",
277                         .mask = FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_DROP),
278                         .bitmap = &flow_actions
279                 }
280         };
281
282         static const struct option lgopts[] = {
283                 /* Control */
284                 { "help",                       0, 0, 0 },
285                 { "flows-count",                1, 0, 0 },
286                 { "dump-iterations",            0, 0, 0 },
287                 { "deletion-rate",              0, 0, 0 },
288                 { "dump-socket-mem",            0, 0, 0 },
289                 { "enable-fwd",                 0, 0, 0 },
290                 /* Attributes */
291                 { "ingress",                    0, 0, 0 },
292                 { "egress",                     0, 0, 0 },
293                 { "transfer",                   0, 0, 0 },
294                 { "group",                      1, 0, 0 },
295                 /* Items */
296                 { "ether",                      0, 0, 0 },
297                 { "vlan",                       0, 0, 0 },
298                 { "ipv4",                       0, 0, 0 },
299                 { "ipv6",                       0, 0, 0 },
300                 { "tcp",                        0, 0, 0 },
301                 { "udp",                        0, 0, 0 },
302                 { "vxlan",                      0, 0, 0 },
303                 { "vxlan-gpe",                  0, 0, 0 },
304                 { "gre",                        0, 0, 0 },
305                 { "geneve",                     0, 0, 0 },
306                 { "gtp",                        0, 0, 0 },
307                 { "meta",                       0, 0, 0 },
308                 { "tag",                        0, 0, 0 },
309                 /* Actions */
310                 { "port-id",                    0, 0, 0 },
311                 { "rss",                        0, 0, 0 },
312                 { "queue",                      0, 0, 0 },
313                 { "jump",                       0, 0, 0 },
314                 { "mark",                       0, 0, 0 },
315                 { "count",                      0, 0, 0 },
316                 { "set-meta",                   0, 0, 0 },
317                 { "set-tag",                    0, 0, 0 },
318                 { "drop",                       0, 0, 0 },
319                 { "hairpin-queue",              1, 0, 0 },
320                 { "hairpin-rss",                1, 0, 0 },
321         };
322
323         flow_items = 0;
324         flow_actions = 0;
325         flow_attrs = 0;
326         hairpin_queues_num = 0;
327         argvopt = argv;
328
329         printf(":: Flow -> ");
330         while ((opt = getopt_long(argc, argvopt, "",
331                                 lgopts, &opt_idx)) != EOF) {
332                 switch (opt) {
333                 case 0:
334                         if (strcmp(lgopts[opt_idx].name, "help") == 0) {
335                                 usage(argv[0]);
336                                 rte_exit(EXIT_SUCCESS, "Displayed help\n");
337                         }
338
339                         if (strcmp(lgopts[opt_idx].name, "group") == 0) {
340                                 n = atoi(optarg);
341                                 if (n >= 0)
342                                         flow_group = n;
343                                 else
344                                         rte_exit(EXIT_SUCCESS,
345                                                 "flow group should be >= 0\n");
346                                 printf("group %d ", flow_group);
347                         }
348
349                         for (i = 0; i < RTE_DIM(flow_options); i++)
350                                 if (strcmp(lgopts[opt_idx].name,
351                                                 flow_options[i].str) == 0) {
352                                         *flow_options[i].bitmap |=
353                                                 flow_options[i].mask;
354                                         printf("%s / ", flow_options[i].str);
355                                 }
356
357                         if (strcmp(lgopts[opt_idx].name,
358                                         "hairpin-rss") == 0) {
359                                 n = atoi(optarg);
360                                 if (n > 0)
361                                         hairpin_queues_num = n;
362                                 else
363                                         rte_exit(EXIT_SUCCESS,
364                                                 "Hairpin queues should be > 0\n");
365
366                                 flow_actions |= HAIRPIN_RSS_ACTION;
367                                 printf("hairpin-rss / ");
368                         }
369                         if (strcmp(lgopts[opt_idx].name,
370                                         "hairpin-queue") == 0) {
371                                 n = atoi(optarg);
372                                 if (n > 0)
373                                         hairpin_queues_num = n;
374                                 else
375                                         rte_exit(EXIT_SUCCESS,
376                                                 "Hairpin queues should be > 0\n");
377
378                                 flow_actions |= HAIRPIN_QUEUE_ACTION;
379                                 printf("hairpin-queue / ");
380                         }
381
382                         /* Control */
383                         if (strcmp(lgopts[opt_idx].name,
384                                         "flows-count") == 0) {
385                                 n = atoi(optarg);
386                                 if (n > (int) iterations_number)
387                                         flows_count = n;
388                                 else {
389                                         printf("\n\nflows_count should be > %d\n",
390                                                 iterations_number);
391                                         rte_exit(EXIT_SUCCESS, " ");
392                                 }
393                         }
394                         if (strcmp(lgopts[opt_idx].name,
395                                         "dump-iterations") == 0)
396                                 dump_iterations = true;
397                         if (strcmp(lgopts[opt_idx].name,
398                                         "deletion-rate") == 0)
399                                 delete_flag = true;
400                         if (strcmp(lgopts[opt_idx].name,
401                                         "dump-socket-mem") == 0)
402                                 dump_socket_mem_flag = true;
403                         if (strcmp(lgopts[opt_idx].name,
404                                         "enable-fwd") == 0)
405                                 enable_fwd = true;
406                         break;
407                 default:
408                         fprintf(stderr, "Invalid option: %s\n", argv[optind]);
409                         usage(argv[0]);
410                         rte_exit(EXIT_SUCCESS, "Invalid option\n");
411                         break;
412                 }
413         }
414         printf("end_flow\n");
415 }
416
417 /* Dump the socket memory statistics on console */
418 static size_t
419 dump_socket_mem(FILE *f)
420 {
421         struct rte_malloc_socket_stats socket_stats;
422         unsigned int i = 0;
423         size_t total = 0;
424         size_t alloc = 0;
425         size_t free = 0;
426         unsigned int n_alloc = 0;
427         unsigned int n_free = 0;
428         bool active_nodes = false;
429
430
431         for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
432                 if (rte_malloc_get_socket_stats(i, &socket_stats) ||
433                     !socket_stats.heap_totalsz_bytes)
434                         continue;
435                 active_nodes = true;
436                 total += socket_stats.heap_totalsz_bytes;
437                 alloc += socket_stats.heap_allocsz_bytes;
438                 free += socket_stats.heap_freesz_bytes;
439                 n_alloc += socket_stats.alloc_count;
440                 n_free += socket_stats.free_count;
441                 if (dump_socket_mem_flag) {
442                         fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
443                         fprintf(f,
444                                 "\nSocket %u:\nsize(M) total: %.6lf\nalloc:"
445                                 " %.6lf(%.3lf%%)\nfree: %.6lf"
446                                 "\nmax: %.6lf"
447                                 "\ncount alloc: %u\nfree: %u\n",
448                                 i,
449                                 socket_stats.heap_totalsz_bytes / 1.0e6,
450                                 socket_stats.heap_allocsz_bytes / 1.0e6,
451                                 (double)socket_stats.heap_allocsz_bytes * 100 /
452                                 (double)socket_stats.heap_totalsz_bytes,
453                                 socket_stats.heap_freesz_bytes / 1.0e6,
454                                 socket_stats.greatest_free_size / 1.0e6,
455                                 socket_stats.alloc_count,
456                                 socket_stats.free_count);
457                                 fprintf(f, "::::::::::::::::::::::::::::::::::::::::");
458                 }
459         }
460         if (dump_socket_mem_flag && active_nodes) {
461                 fprintf(f,
462                         "\nTotal: size(M)\ntotal: %.6lf"
463                         "\nalloc: %.6lf(%.3lf%%)\nfree: %.6lf"
464                         "\ncount alloc: %u\nfree: %u\n",
465                         total / 1.0e6, alloc / 1.0e6,
466                         (double)alloc * 100 / (double)total, free / 1.0e6,
467                         n_alloc, n_free);
468                 fprintf(f, "::::::::::::::::::::::::::::::::::::::::\n");
469         }
470         return alloc;
471 }
472
473 static void
474 print_flow_error(struct rte_flow_error error)
475 {
476         printf("Flow can't be created %d message: %s\n",
477                 error.type,
478                 error.message ? error.message : "(no stated reason)");
479 }
480
481 static inline void
482 destroy_flows(int port_id, struct rte_flow **flow_list)
483 {
484         struct rte_flow_error error;
485         clock_t start_iter, end_iter;
486         double cpu_time_used = 0;
487         double flows_rate;
488         double cpu_time_per_iter[MAX_ITERATIONS];
489         double delta;
490         uint32_t i;
491         int iter_id;
492
493         for (i = 0; i < MAX_ITERATIONS; i++)
494                 cpu_time_per_iter[i] = -1;
495
496         if (iterations_number > flows_count)
497                 iterations_number = flows_count;
498
499         /* Deletion Rate */
500         printf("Flows Deletion on port = %d\n", port_id);
501         start_iter = clock();
502         for (i = 0; i < flows_count; i++) {
503                 if (flow_list[i] == 0)
504                         break;
505
506                 memset(&error, 0x33, sizeof(error));
507                 if (rte_flow_destroy(port_id, flow_list[i], &error)) {
508                         print_flow_error(error);
509                         rte_exit(EXIT_FAILURE, "Error in deleting flow");
510                 }
511
512                 if (i && !((i + 1) % iterations_number)) {
513                         /* Save the deletion rate of each iter */
514                         end_iter = clock();
515                         delta = (double) (end_iter - start_iter);
516                         iter_id = ((i + 1) / iterations_number) - 1;
517                         cpu_time_per_iter[iter_id] =
518                                 delta / CLOCKS_PER_SEC;
519                         cpu_time_used += cpu_time_per_iter[iter_id];
520                         start_iter = clock();
521                 }
522         }
523
524         /* Deletion rate per iteration */
525         if (dump_iterations)
526                 for (i = 0; i < MAX_ITERATIONS; i++) {
527                         if (cpu_time_per_iter[i] == -1)
528                                 continue;
529                         delta = (double)(iterations_number /
530                                 cpu_time_per_iter[i]);
531                         flows_rate = delta / 1000;
532                         printf(":: Iteration #%d: %d flows "
533                                 "in %f sec[ Rate = %f K/Sec ]\n",
534                                 i, iterations_number,
535                                 cpu_time_per_iter[i], flows_rate);
536                 }
537
538         /* Deletion rate for all flows */
539         flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
540         printf("\n:: Total flow deletion rate -> %f K/Sec\n",
541                 flows_rate);
542         printf(":: The time for deleting %d in flows %f seconds\n",
543                 flows_count, cpu_time_used);
544 }
545
546 static inline void
547 flows_handler(void)
548 {
549         struct rte_flow **flow_list;
550         struct rte_flow_error error;
551         clock_t start_iter, end_iter;
552         double cpu_time_used;
553         double flows_rate;
554         double cpu_time_per_iter[MAX_ITERATIONS];
555         double delta;
556         uint16_t nr_ports;
557         uint32_t i;
558         int port_id;
559         int iter_id;
560         uint32_t flow_index;
561
562         nr_ports = rte_eth_dev_count_avail();
563
564         for (i = 0; i < MAX_ITERATIONS; i++)
565                 cpu_time_per_iter[i] = -1;
566
567         if (iterations_number > flows_count)
568                 iterations_number = flows_count;
569
570         printf(":: Flows Count per port: %d\n", flows_count);
571
572         flow_list = rte_zmalloc("flow_list",
573                 (sizeof(struct rte_flow *) * flows_count) + 1, 0);
574         if (flow_list == NULL)
575                 rte_exit(EXIT_FAILURE, "No Memory available!");
576
577         for (port_id = 0; port_id < nr_ports; port_id++) {
578                 cpu_time_used = 0;
579                 flow_index = 0;
580                 if (flow_group > 0) {
581                         /*
582                          * Create global rule to jump into flow_group,
583                          * this way the app will avoid the default rules.
584                          *
585                          * Global rule:
586                          * group 0 eth / end actions jump group <flow_group>
587                          *
588                          */
589                         flow = generate_flow(port_id, 0, flow_attrs,
590                                 FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_ETH),
591                                 FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_JUMP),
592                                 flow_group, 0, 0, &error);
593
594                         if (flow == NULL) {
595                                 print_flow_error(error);
596                                 rte_exit(EXIT_FAILURE, "error in creating flow");
597                         }
598                         flow_list[flow_index++] = flow;
599                 }
600
601                 /* Insertion Rate */
602                 printf("Flows insertion on port = %d\n", port_id);
603                 start_iter = clock();
604                 for (i = 0; i < flows_count; i++) {
605                         flow = generate_flow(port_id, flow_group,
606                                 flow_attrs, flow_items, flow_actions,
607                                 JUMP_ACTION_TABLE, i,
608                                 hairpin_queues_num, &error);
609
610                         if (force_quit)
611                                 i = flows_count;
612
613                         if (!flow) {
614                                 print_flow_error(error);
615                                 rte_exit(EXIT_FAILURE, "error in creating flow");
616                         }
617
618                         flow_list[flow_index++] = flow;
619
620                         if (i && !((i + 1) % iterations_number)) {
621                                 /* Save the insertion rate of each iter */
622                                 end_iter = clock();
623                                 delta = (double) (end_iter - start_iter);
624                                 iter_id = ((i + 1) / iterations_number) - 1;
625                                 cpu_time_per_iter[iter_id] =
626                                         delta / CLOCKS_PER_SEC;
627                                 cpu_time_used += cpu_time_per_iter[iter_id];
628                                 start_iter = clock();
629                         }
630                 }
631
632                 /* Iteration rate per iteration */
633                 if (dump_iterations)
634                         for (i = 0; i < MAX_ITERATIONS; i++) {
635                                 if (cpu_time_per_iter[i] == -1)
636                                         continue;
637                                 delta = (double)(iterations_number /
638                                         cpu_time_per_iter[i]);
639                                 flows_rate = delta / 1000;
640                                 printf(":: Iteration #%d: %d flows "
641                                         "in %f sec[ Rate = %f K/Sec ]\n",
642                                         i, iterations_number,
643                                         cpu_time_per_iter[i], flows_rate);
644                         }
645
646                 /* Insertion rate for all flows */
647                 flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
648                 printf("\n:: Total flow insertion rate -> %f K/Sec\n",
649                                                 flows_rate);
650                 printf(":: The time for creating %d in flows %f seconds\n",
651                                                 flows_count, cpu_time_used);
652
653                 if (delete_flag)
654                         destroy_flows(port_id, flow_list);
655         }
656 }
657
658 static void
659 signal_handler(int signum)
660 {
661         if (signum == SIGINT || signum == SIGTERM) {
662                 printf("\n\nSignal %d received, preparing to exit...\n",
663                                         signum);
664                 printf("Error: Stats are wrong due to sudden signal!\n\n");
665                 force_quit = true;
666         }
667 }
668
669 static inline uint16_t
670 do_rx(struct lcore_info *li, uint16_t rx_port, uint16_t rx_queue)
671 {
672         uint16_t cnt = 0;
673         cnt = rte_eth_rx_burst(rx_port, rx_queue, li->pkts, MAX_PKT_BURST);
674         li->rx_pkts += cnt;
675         return cnt;
676 }
677
678 static inline void
679 do_tx(struct lcore_info *li, uint16_t cnt, uint16_t tx_port,
680                         uint16_t tx_queue)
681 {
682         uint16_t nr_tx = 0;
683         uint16_t i;
684
685         nr_tx = rte_eth_tx_burst(tx_port, tx_queue, li->pkts, cnt);
686         li->tx_pkts  += nr_tx;
687         li->tx_drops += cnt - nr_tx;
688
689         for (i = nr_tx; i < cnt; i++)
690                 rte_pktmbuf_free(li->pkts[i]);
691 }
692
693 /*
694  * Method to convert numbers into pretty numbers that easy
695  * to read. The design here is to add comma after each three
696  * digits and set all of this inside buffer.
697  *
698  * For example if n = 1799321, the output will be
699  * 1,799,321 after this method which is easier to read.
700  */
701 static char *
702 pretty_number(uint64_t n, char *buf)
703 {
704         char p[6][4];
705         int i = 0;
706         int off = 0;
707
708         while (n > 1000) {
709                 sprintf(p[i], "%03d", (int)(n % 1000));
710                 n /= 1000;
711                 i += 1;
712         }
713
714         sprintf(p[i++], "%d", (int)n);
715
716         while (i--)
717                 off += sprintf(buf + off, "%s,", p[i]);
718         buf[strlen(buf) - 1] = '\0';
719
720         return buf;
721 }
722
723 static void
724 packet_per_second_stats(void)
725 {
726         struct lcore_info *old;
727         struct lcore_info *li, *oli;
728         int nr_lines = 0;
729         int i;
730
731         old = rte_zmalloc("old",
732                 sizeof(struct lcore_info) * MAX_LCORES, 0);
733         if (old == NULL)
734                 rte_exit(EXIT_FAILURE, "No Memory available!");
735
736         memcpy(old, lcore_infos,
737                 sizeof(struct lcore_info) * MAX_LCORES);
738
739         while (!force_quit) {
740                 uint64_t total_tx_pkts = 0;
741                 uint64_t total_rx_pkts = 0;
742                 uint64_t total_tx_drops = 0;
743                 uint64_t tx_delta, rx_delta, drops_delta;
744                 char buf[3][32];
745                 int nr_valid_core = 0;
746
747                 sleep(1);
748
749                 if (nr_lines) {
750                         char go_up_nr_lines[16];
751
752                         sprintf(go_up_nr_lines, "%c[%dA\r", 27, nr_lines);
753                         printf("%s\r", go_up_nr_lines);
754                 }
755
756                 printf("\n%6s %16s %16s %16s\n", "core", "tx", "tx drops", "rx");
757                 printf("%6s %16s %16s %16s\n", "------", "----------------",
758                         "----------------", "----------------");
759                 nr_lines = 3;
760                 for (i = 0; i < MAX_LCORES; i++) {
761                         li  = &lcore_infos[i];
762                         oli = &old[i];
763                         if (li->mode != LCORE_MODE_PKT)
764                                 continue;
765
766                         tx_delta    = li->tx_pkts  - oli->tx_pkts;
767                         rx_delta    = li->rx_pkts  - oli->rx_pkts;
768                         drops_delta = li->tx_drops - oli->tx_drops;
769                         printf("%6d %16s %16s %16s\n", i,
770                                 pretty_number(tx_delta,    buf[0]),
771                                 pretty_number(drops_delta, buf[1]),
772                                 pretty_number(rx_delta,    buf[2]));
773
774                         total_tx_pkts  += tx_delta;
775                         total_rx_pkts  += rx_delta;
776                         total_tx_drops += drops_delta;
777
778                         nr_valid_core++;
779                         nr_lines += 1;
780                 }
781
782                 if (nr_valid_core > 1) {
783                         printf("%6s %16s %16s %16s\n", "total",
784                                 pretty_number(total_tx_pkts,  buf[0]),
785                                 pretty_number(total_tx_drops, buf[1]),
786                                 pretty_number(total_rx_pkts,  buf[2]));
787                         nr_lines += 1;
788                 }
789
790                 memcpy(old, lcore_infos,
791                         sizeof(struct lcore_info) * MAX_LCORES);
792         }
793 }
794
795 static int
796 start_forwarding(void *data __rte_unused)
797 {
798         int lcore = rte_lcore_id();
799         int stream_id;
800         uint16_t cnt;
801         struct lcore_info *li = &lcore_infos[lcore];
802
803         if (!li->mode)
804                 return 0;
805
806         if (li->mode == LCORE_MODE_STATS) {
807                 printf(":: started stats on lcore %u\n", lcore);
808                 packet_per_second_stats();
809                 return 0;
810         }
811
812         while (!force_quit)
813                 for (stream_id = 0; stream_id < MAX_STREAMS; stream_id++) {
814                         if (li->streams[stream_id].rx_port == -1)
815                                 continue;
816
817                         cnt = do_rx(li,
818                                         li->streams[stream_id].rx_port,
819                                         li->streams[stream_id].rx_queue);
820                         if (cnt)
821                                 do_tx(li, cnt,
822                                         li->streams[stream_id].tx_port,
823                                         li->streams[stream_id].tx_queue);
824                 }
825         return 0;
826 }
827
828 static void
829 init_lcore_info(void)
830 {
831         int i, j;
832         unsigned int lcore;
833         uint16_t nr_port;
834         uint16_t queue;
835         int port;
836         int stream_id = 0;
837         int streams_per_core;
838         int unassigned_streams;
839         int nb_fwd_streams;
840         nr_port = rte_eth_dev_count_avail();
841
842         /* First logical core is reserved for stats printing */
843         lcore = rte_get_next_lcore(-1, 0, 0);
844         lcore_infos[lcore].mode = LCORE_MODE_STATS;
845
846         /*
847          * Initialize all cores
848          * All cores at first must have -1 value in all streams
849          * This means that this stream is not used, or not set
850          * yet.
851          */
852         for (i = 0; i < MAX_LCORES; i++)
853                 for (j = 0; j < MAX_STREAMS; j++) {
854                         lcore_infos[i].streams[j].tx_port = -1;
855                         lcore_infos[i].streams[j].rx_port = -1;
856                         lcore_infos[i].streams[j].tx_queue = -1;
857                         lcore_infos[i].streams[j].rx_queue = -1;
858                         lcore_infos[i].streams_nb = 0;
859                 }
860
861         /*
862          * Calculate the total streams count.
863          * Also distribute those streams count between the available
864          * logical cores except first core, since it's reserved for
865          * stats prints.
866          */
867         nb_fwd_streams = nr_port * RXQ_NUM;
868         if ((int)(nb_lcores - 1) >= nb_fwd_streams)
869                 for (i = 0; i < (int)(nb_lcores - 1); i++) {
870                         lcore = rte_get_next_lcore(lcore, 0, 0);
871                         lcore_infos[lcore].streams_nb = 1;
872                 }
873         else {
874                 streams_per_core = nb_fwd_streams / (nb_lcores - 1);
875                 unassigned_streams = nb_fwd_streams % (nb_lcores - 1);
876                 for (i = 0; i < (int)(nb_lcores - 1); i++) {
877                         lcore = rte_get_next_lcore(lcore, 0, 0);
878                         lcore_infos[lcore].streams_nb = streams_per_core;
879                         if (unassigned_streams) {
880                                 lcore_infos[lcore].streams_nb++;
881                                 unassigned_streams--;
882                         }
883                 }
884         }
885
886         /*
887          * Set the streams for the cores according to each logical
888          * core stream count.
889          * The streams is built on the design of what received should
890          * forward as well, this means that if you received packets on
891          * port 0 queue 0 then the same queue should forward the
892          * packets, using the same logical core.
893          */
894         lcore = rte_get_next_lcore(-1, 0, 0);
895         for (port = 0; port < nr_port; port++) {
896                 /* Create FWD stream */
897                 for (queue = 0; queue < RXQ_NUM; queue++) {
898                         if (!lcore_infos[lcore].streams_nb ||
899                                 !(stream_id % lcore_infos[lcore].streams_nb)) {
900                                 lcore = rte_get_next_lcore(lcore, 0, 0);
901                                 lcore_infos[lcore].mode = LCORE_MODE_PKT;
902                                 stream_id = 0;
903                         }
904                         lcore_infos[lcore].streams[stream_id].rx_queue = queue;
905                         lcore_infos[lcore].streams[stream_id].tx_queue = queue;
906                         lcore_infos[lcore].streams[stream_id].rx_port = port;
907                         lcore_infos[lcore].streams[stream_id].tx_port = port;
908                         stream_id++;
909                 }
910         }
911
912         /* Print all streams */
913         printf(":: Stream -> core id[N]: (rx_port, rx_queue)->(tx_port, tx_queue)\n");
914         for (i = 0; i < MAX_LCORES; i++)
915                 for (j = 0; j < MAX_STREAMS; j++) {
916                         /* No streams for this core */
917                         if (lcore_infos[i].streams[j].tx_port == -1)
918                                 break;
919                         printf("Stream -> core id[%d]: (%d,%d)->(%d,%d)\n",
920                                 i,
921                                 lcore_infos[i].streams[j].rx_port,
922                                 lcore_infos[i].streams[j].rx_queue,
923                                 lcore_infos[i].streams[j].tx_port,
924                                 lcore_infos[i].streams[j].tx_queue);
925                 }
926 }
927
928 static void
929 init_port(void)
930 {
931         int ret;
932         uint16_t std_queue;
933         uint16_t hairpin_queue;
934         uint16_t port_id;
935         uint16_t nr_ports;
936         uint16_t nr_queues;
937         struct rte_eth_hairpin_conf hairpin_conf = {
938                 .peer_count = 1,
939         };
940         struct rte_eth_conf port_conf = {
941                 .rx_adv_conf = {
942                         .rss_conf.rss_hf =
943                                 GET_RSS_HF(),
944                 }
945         };
946         struct rte_eth_txconf txq_conf;
947         struct rte_eth_rxconf rxq_conf;
948         struct rte_eth_dev_info dev_info;
949
950         nr_queues = RXQ_NUM;
951         if (hairpin_queues_num != 0)
952                 nr_queues = RXQ_NUM + hairpin_queues_num;
953
954         nr_ports = rte_eth_dev_count_avail();
955         if (nr_ports == 0)
956                 rte_exit(EXIT_FAILURE, "Error: no port detected\n");
957
958         mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool",
959                                         TOTAL_MBUF_NUM, MBUF_CACHE_SIZE,
960                                         0, MBUF_SIZE,
961                                         rte_socket_id());
962         if (mbuf_mp == NULL)
963                 rte_exit(EXIT_FAILURE, "Error: can't init mbuf pool\n");
964
965         for (port_id = 0; port_id < nr_ports; port_id++) {
966                 ret = rte_eth_dev_info_get(port_id, &dev_info);
967                 if (ret != 0)
968                         rte_exit(EXIT_FAILURE,
969                                 "Error during getting device"
970                                 " (port %u) info: %s\n",
971                                 port_id, strerror(-ret));
972
973                 port_conf.txmode.offloads &= dev_info.tx_offload_capa;
974                 port_conf.rxmode.offloads &= dev_info.rx_offload_capa;
975
976                 printf(":: initializing port: %d\n", port_id);
977
978                 ret = rte_eth_dev_configure(port_id, nr_queues,
979                                 nr_queues, &port_conf);
980                 if (ret < 0)
981                         rte_exit(EXIT_FAILURE,
982                                 ":: cannot configure device: err=%d, port=%u\n",
983                                 ret, port_id);
984
985                 rxq_conf = dev_info.default_rxconf;
986                 for (std_queue = 0; std_queue < RXQ_NUM; std_queue++) {
987                         ret = rte_eth_rx_queue_setup(port_id, std_queue, NR_RXD,
988                                         rte_eth_dev_socket_id(port_id),
989                                         &rxq_conf,
990                                         mbuf_mp);
991                         if (ret < 0)
992                                 rte_exit(EXIT_FAILURE,
993                                         ":: Rx queue setup failed: err=%d, port=%u\n",
994                                         ret, port_id);
995                 }
996
997                 txq_conf = dev_info.default_txconf;
998                 for (std_queue = 0; std_queue < TXQ_NUM; std_queue++) {
999                         ret = rte_eth_tx_queue_setup(port_id, std_queue, NR_TXD,
1000                                         rte_eth_dev_socket_id(port_id),
1001                                         &txq_conf);
1002                         if (ret < 0)
1003                                 rte_exit(EXIT_FAILURE,
1004                                         ":: Tx queue setup failed: err=%d, port=%u\n",
1005                                         ret, port_id);
1006                 }
1007
1008                 /* Catch all packets from traffic generator. */
1009                 ret = rte_eth_promiscuous_enable(port_id);
1010                 if (ret != 0)
1011                         rte_exit(EXIT_FAILURE,
1012                                 ":: promiscuous mode enable failed: err=%s, port=%u\n",
1013                                 rte_strerror(-ret), port_id);
1014
1015                 if (hairpin_queues_num != 0) {
1016                         /*
1017                          * Configure peer which represents hairpin Tx.
1018                          * Hairpin queue numbers start after standard queues
1019                          * (RXQ_NUM and TXQ_NUM).
1020                          */
1021                         for (hairpin_queue = RXQ_NUM, std_queue = 0;
1022                                         hairpin_queue < nr_queues;
1023                                         hairpin_queue++, std_queue++) {
1024                                 hairpin_conf.peers[0].port = port_id;
1025                                 hairpin_conf.peers[0].queue =
1026                                         std_queue + TXQ_NUM;
1027                                 ret = rte_eth_rx_hairpin_queue_setup(
1028                                                 port_id, hairpin_queue,
1029                                                 NR_RXD, &hairpin_conf);
1030                                 if (ret != 0)
1031                                         rte_exit(EXIT_FAILURE,
1032                                                 ":: Hairpin rx queue setup failed: err=%d, port=%u\n",
1033                                                 ret, port_id);
1034                         }
1035
1036                         for (hairpin_queue = TXQ_NUM, std_queue = 0;
1037                                         hairpin_queue < nr_queues;
1038                                         hairpin_queue++, std_queue++) {
1039                                 hairpin_conf.peers[0].port = port_id;
1040                                 hairpin_conf.peers[0].queue =
1041                                         std_queue + RXQ_NUM;
1042                                 ret = rte_eth_tx_hairpin_queue_setup(
1043                                                 port_id, hairpin_queue,
1044                                                 NR_TXD, &hairpin_conf);
1045                                 if (ret != 0)
1046                                         rte_exit(EXIT_FAILURE,
1047                                                 ":: Hairpin tx queue setup failed: err=%d, port=%u\n",
1048                                                 ret, port_id);
1049                         }
1050                 }
1051
1052                 ret = rte_eth_dev_start(port_id);
1053                 if (ret < 0)
1054                         rte_exit(EXIT_FAILURE,
1055                                 "rte_eth_dev_start:err=%d, port=%u\n",
1056                                 ret, port_id);
1057
1058                 printf(":: initializing port: %d done\n", port_id);
1059         }
1060 }
1061
1062 int
1063 main(int argc, char **argv)
1064 {
1065         int ret;
1066         uint16_t port;
1067         struct rte_flow_error error;
1068         int64_t alloc, last_alloc;
1069
1070         ret = rte_eal_init(argc, argv);
1071         if (ret < 0)
1072                 rte_exit(EXIT_FAILURE, "EAL init failed\n");
1073
1074         force_quit = false;
1075         dump_iterations = false;
1076         flows_count = DEFAULT_RULES_COUNT;
1077         iterations_number = DEFAULT_ITERATION;
1078         delete_flag = false;
1079         dump_socket_mem_flag = false;
1080         flow_group = 0;
1081
1082         signal(SIGINT, signal_handler);
1083         signal(SIGTERM, signal_handler);
1084
1085         argc -= ret;
1086         argv += ret;
1087         if (argc > 1)
1088                 args_parse(argc, argv);
1089
1090         init_port();
1091
1092         nb_lcores = rte_lcore_count();
1093         if (nb_lcores <= 1)
1094                 rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
1095
1096         last_alloc = (int64_t)dump_socket_mem(stdout);
1097         flows_handler();
1098         alloc = (int64_t)dump_socket_mem(stdout);
1099
1100         if (last_alloc)
1101                 fprintf(stdout, ":: Memory allocation change(M): %.6lf\n",
1102                 (alloc - last_alloc) / 1.0e6);
1103
1104         if (enable_fwd) {
1105                 init_lcore_info();
1106                 rte_eal_mp_remote_launch(start_forwarding, NULL, CALL_MASTER);
1107         }
1108
1109         RTE_ETH_FOREACH_DEV(port) {
1110                 rte_flow_flush(port, &error);
1111                 rte_eth_dev_stop(port);
1112                 rte_eth_dev_close(port);
1113         }
1114         return 0;
1115 }