net/ice/base: expose link configuration error
[dpdk.git] / app / test-flow-perf / main.c
index 5ec9a15..99d0463 100644 (file)
@@ -34,6 +34,7 @@
 #include <rte_mbuf.h>
 #include <rte_ethdev.h>
 #include <rte_flow.h>
+#include <rte_mtr.h>
 
 #include "config.h"
 #include "flow_gen.h"
@@ -72,7 +73,8 @@ static uint32_t nb_lcores;
 #define LCORE_MODE_PKT    1
 #define LCORE_MODE_STATS  2
 #define MAX_STREAMS      64
-#define MAX_LCORES       64
+#define METER_CREATE     1
+#define METER_DELETE     2
 
 struct stream {
        int tx_port;
@@ -92,7 +94,25 @@ struct lcore_info {
        struct rte_mbuf *pkts[MAX_PKT_BURST];
 } __rte_cache_aligned;
 
-static struct lcore_info lcore_infos[MAX_LCORES];
+static struct lcore_info lcore_infos[RTE_MAX_LCORE];
+
+struct used_cpu_time {
+       double insertion[MAX_PORTS][RTE_MAX_LCORE];
+       double deletion[MAX_PORTS][RTE_MAX_LCORE];
+};
+
+struct multi_cores_pool {
+       uint32_t cores_count;
+       uint32_t rules_count;
+       struct used_cpu_time create_meter;
+       struct used_cpu_time create_flow;
+       int64_t last_alloc[RTE_MAX_LCORE];
+       int64_t current_alloc[RTE_MAX_LCORE];
+} __rte_cache_aligned;
+
+static struct multi_cores_pool mc_pool = {
+       .cores_count = 1,
+};
 
 static void
 usage(char *progname)
@@ -118,6 +138,8 @@ usage(char *progname)
        printf("  --transfer: set transfer attribute in flows\n");
        printf("  --group=N: set group for all flows,"
                " default is %d\n", DEFAULT_GROUP);
+       printf("  --cores=N: to set the number of needed "
+               "cores to insert rte_flow rules, default is 1\n");
 
        printf("To set flow items:\n");
        printf("  --ether: add ether layer in flow items\n");
@@ -181,6 +203,7 @@ usage(char *progname)
        printf("  --set-ipv6-dscp: add set ipv6 dscp action to flow actions\n"
                "ipv6 dscp value to be set is random each flow\n");
        printf("  --flag: add flag action to flow actions\n");
+       printf("  --meter: add meter action to flow actions\n");
        printf("  --raw-encap=<data>: add raw encap action to flow actions\n"
                "Data is the data needed to be encaped\n"
                "Example: raw-encap=ether,ipv4,udp,vxlan\n");
@@ -509,6 +532,14 @@ args_parse(int argc, char **argv)
                        .map = &flow_actions[0],
                        .map_idx = &actions_idx
                },
+               {
+                       .str = "meter",
+                       .mask = FLOW_ACTION_MASK(
+                               RTE_FLOW_ACTION_TYPE_METER
+                       ),
+                       .map = &flow_actions[0],
+                       .map_idx = &actions_idx
+               },
                {
                        .str = "vxlan-encap",
                        .mask = FLOW_ACTION_MASK(
@@ -537,6 +568,7 @@ args_parse(int argc, char **argv)
                { "dump-socket-mem",            0, 0, 0 },
                { "enable-fwd",                 0, 0, 0 },
                { "portmask",                   1, 0, 0 },
+               { "cores",                      1, 0, 0 },
                /* Attributes */
                { "ingress",                    0, 0, 0 },
                { "egress",                     0, 0, 0 },
@@ -587,6 +619,7 @@ args_parse(int argc, char **argv)
                { "set-ipv4-dscp",              0, 0, 0 },
                { "set-ipv6-dscp",              0, 0, 0 },
                { "flag",                       0, 0, 0 },
+               { "meter",                      0, 0, 0 },
                { "raw-encap",                  1, 0, 0 },
                { "raw-decap",                  1, 0, 0 },
                { "vxlan-encap",                0, 0, 0 },
@@ -750,6 +783,21 @@ args_parse(int argc, char **argv)
                                        rte_exit(EXIT_FAILURE, "Invalid fwd port mask\n");
                                ports_mask = pm;
                        }
+                       if (strcmp(lgopts[opt_idx].name, "cores") == 0) {
+                               n = atoi(optarg);
+                               if ((int) rte_lcore_count() <= n) {
+                                       printf("\nError: you need %d cores to run on multi-cores\n"
+                                               "Existing cores are: %d\n", n, rte_lcore_count());
+                                       rte_exit(EXIT_FAILURE, " ");
+                               }
+                               if (n <= RTE_MAX_LCORE && n > 0)
+                                       mc_pool.cores_count = n;
+                               else {
+                                       printf("Error: cores count must be > 0 "
+                                               " and < %d\n", RTE_MAX_LCORE);
+                                       rte_exit(EXIT_FAILURE, " ");
+                               }
+                       }
                        break;
                default:
                        fprintf(stderr, "Invalid option: %s\n", argv[optind]);
@@ -844,8 +892,187 @@ print_rules_batches(double *cpu_time_per_batch)
        }
 }
 
+
+static inline int
+has_meter(void)
+{
+       int i;
+
+       for (i = 0; i < MAX_ACTIONS_NUM; i++) {
+               if (flow_actions[i] == 0)
+                       break;
+               if (flow_actions[i]
+                               & FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_METER))
+                       return 1;
+       }
+       return 0;
+}
+
+static void
+create_meter_rule(int port_id, uint32_t counter)
+{
+       int ret;
+       struct rte_mtr_params params;
+       uint32_t default_prof_id = 100;
+       struct rte_mtr_error error;
+
+       memset(&params, 0, sizeof(struct rte_mtr_params));
+       params.meter_enable = 1;
+       params.stats_mask = 0xffff;
+       params.use_prev_mtr_color = 0;
+       params.dscp_table = NULL;
+
+       /*create meter*/
+       params.meter_profile_id = default_prof_id;
+       params.action[RTE_COLOR_GREEN] =
+               MTR_POLICER_ACTION_COLOR_GREEN;
+       params.action[RTE_COLOR_YELLOW] =
+               MTR_POLICER_ACTION_COLOR_YELLOW;
+       params.action[RTE_COLOR_RED] =
+               MTR_POLICER_ACTION_DROP;
+
+       ret = rte_mtr_create(port_id, counter, &params, 1, &error);
+       if (ret != 0) {
+               printf("Port %u create meter idx(%d) error(%d) message: %s\n",
+                       port_id, counter, error.type,
+                       error.message ? error.message : "(no stated reason)");
+               rte_exit(EXIT_FAILURE, "error in creating meter");
+       }
+}
+
+static void
+destroy_meter_rule(int port_id, uint32_t counter)
+{
+       struct rte_mtr_error error;
+
+       if (rte_mtr_destroy(port_id, counter, &error)) {
+               printf("Port %u destroy meter(%d) error(%d) message: %s\n",
+                       port_id, counter, error.type,
+                       error.message ? error.message : "(no stated reason)");
+               rte_exit(EXIT_FAILURE, "Error in deleting meter rule");
+       }
+}
+
+static void
+meters_handler(int port_id, uint8_t core_id, uint8_t ops)
+{
+       uint64_t start_batch;
+       double cpu_time_used, insertion_rate;
+       int rules_count_per_core, rules_batch_idx;
+       uint32_t counter, start_counter = 0, end_counter;
+       double cpu_time_per_batch[MAX_BATCHES_COUNT] = { 0 };
+
+       rules_count_per_core = rules_count / mc_pool.cores_count;
+
+       if (core_id)
+               start_counter = core_id * rules_count_per_core;
+       end_counter = (core_id + 1) * rules_count_per_core;
+
+       cpu_time_used = 0;
+       start_batch = rte_rdtsc();
+       for (counter = start_counter; counter < end_counter; counter++) {
+               if (ops == METER_CREATE)
+                       create_meter_rule(port_id, counter);
+               else
+                       destroy_meter_rule(port_id, counter);
+               /*
+                * Save the insertion rate for rules batch.
+                * Check if the insertion reached the rules
+                * patch counter, then save the insertion rate
+                * for this batch.
+                */
+               if (!((counter + 1) % rules_batch)) {
+                       rules_batch_idx = ((counter + 1) / rules_batch) - 1;
+                       cpu_time_per_batch[rules_batch_idx] =
+                               ((double)(rte_rdtsc() - start_batch))
+                               / rte_get_tsc_hz();
+                       cpu_time_used += cpu_time_per_batch[rules_batch_idx];
+                       start_batch = rte_rdtsc();
+               }
+       }
+
+       /* Print insertion rates for all batches */
+       if (dump_iterations)
+               print_rules_batches(cpu_time_per_batch);
+
+       insertion_rate =
+               ((double) (rules_count_per_core / cpu_time_used) / 1000);
+
+       /* Insertion rate for all rules in one core */
+       printf(":: Port %d :: Core %d Meter %s :: start @[%d] - end @[%d],"
+               " use:%.02fs, rate:%.02fk Rule/Sec\n",
+               port_id, core_id, ops == METER_CREATE ? "create" : "delete",
+               start_counter, end_counter - 1,
+               cpu_time_used, insertion_rate);
+
+       if (ops == METER_CREATE)
+               mc_pool.create_meter.insertion[port_id][core_id]
+                       = cpu_time_used;
+       else
+               mc_pool.create_meter.deletion[port_id][core_id]
+                       = cpu_time_used;
+}
+
+static void
+destroy_meter_profile(void)
+{
+       struct rte_mtr_error error;
+       uint16_t nr_ports;
+       int port_id;
+
+       nr_ports = rte_eth_dev_count_avail();
+       for (port_id = 0; port_id < nr_ports; port_id++) {
+               /* If port outside portmask */
+               if (!((ports_mask >> port_id) & 0x1))
+                       continue;
+
+               if (rte_mtr_meter_profile_delete
+                       (port_id, DEFAULT_METER_PROF_ID, &error)) {
+                       printf("Port %u del profile error(%d) message: %s\n",
+                               port_id, error.type,
+                               error.message ? error.message : "(no stated reason)");
+                       rte_exit(EXIT_FAILURE, "Error: Destroy meter profile Failed!\n");
+               }
+       }
+}
+
+static void
+create_meter_profile(void)
+{
+       uint16_t nr_ports;
+       int ret, port_id;
+       struct rte_mtr_meter_profile mp;
+       struct rte_mtr_error error;
+
+       /*
+        *currently , only create one meter file for one port
+        *1 meter profile -> N meter rules -> N rte flows
+        */
+       memset(&mp, 0, sizeof(struct rte_mtr_meter_profile));
+       nr_ports = rte_eth_dev_count_avail();
+       for (port_id = 0; port_id < nr_ports; port_id++) {
+               /* If port outside portmask */
+               if (!((ports_mask >> port_id) & 0x1))
+                       continue;
+
+               mp.alg = RTE_MTR_SRTCM_RFC2697;
+               mp.srtcm_rfc2697.cir = METER_CIR;
+               mp.srtcm_rfc2697.cbs = METER_CIR / 8;
+               mp.srtcm_rfc2697.ebs = 0;
+
+               ret = rte_mtr_meter_profile_add
+                       (port_id, DEFAULT_METER_PROF_ID, &mp, &error);
+               if (ret != 0) {
+                       printf("Port %u create Profile error(%d) message: %s\n",
+                               port_id, error.type,
+                               error.message ? error.message : "(no stated reason)");
+                       rte_exit(EXIT_FAILURE, "Error: Creation meter profile Failed!\n");
+               }
+       }
+}
+
 static inline void
-destroy_flows(int port_id, struct rte_flow **flows_list)
+destroy_flows(int port_id, uint8_t core_id, struct rte_flow **flows_list)
 {
        struct rte_flow_error error;
        clock_t start_batch, end_batch;
@@ -855,12 +1082,15 @@ destroy_flows(int port_id, struct rte_flow **flows_list)
        double delta;
        uint32_t i;
        int rules_batch_idx;
+       int rules_count_per_core;
 
-       /* Deletion Rate */
-       printf("\nRules Deletion on port = %d\n", port_id);
+       rules_count_per_core = rules_count / mc_pool.cores_count;
+       /* If group > 0 , should add 1 flow which created in group 0 */
+       if (flow_group > 0 && core_id == 0)
+               rules_count_per_core++;
 
-       start_batch = clock();
-       for (i = 0; i < rules_count; i++) {
+       start_batch = rte_rdtsc();
+       for (i = 0; i < (uint32_t) rules_count_per_core; i++) {
                if (flows_list[i] == 0)
                        break;
 
@@ -877,12 +1107,12 @@ destroy_flows(int port_id, struct rte_flow **flows_list)
                 * for this batch.
                 */
                if (!((i + 1) % rules_batch)) {
-                       end_batch = clock();
+                       end_batch = rte_rdtsc();
                        delta = (double) (end_batch - start_batch);
                        rules_batch_idx = ((i + 1) / rules_batch) - 1;
-                       cpu_time_per_batch[rules_batch_idx] = delta / CLOCKS_PER_SEC;
+                       cpu_time_per_batch[rules_batch_idx] = delta / rte_get_tsc_hz();
                        cpu_time_used += cpu_time_per_batch[rules_batch_idx];
-                       start_batch = clock();
+                       start_batch = rte_rdtsc();
                }
        }
 
@@ -891,15 +1121,17 @@ destroy_flows(int port_id, struct rte_flow **flows_list)
                print_rules_batches(cpu_time_per_batch);
 
        /* Deletion rate for all rules */
-       deletion_rate = ((double) (rules_count / cpu_time_used) / 1000);
-       printf(":: Total rules deletion rate -> %f K Rule/Sec\n",
-               deletion_rate);
-       printf(":: The time for deleting %d in rules %f seconds\n",
-               rules_count, cpu_time_used);
+       deletion_rate = ((double) (rules_count_per_core / cpu_time_used) / 1000);
+       printf(":: Port %d :: Core %d :: Rules deletion rate -> %f K Rule/Sec\n",
+               port_id, core_id, deletion_rate);
+       printf(":: Port %d :: Core %d :: The time for deleting %d rules is %f seconds\n",
+               port_id, core_id, rules_count_per_core, cpu_time_used);
+
+       mc_pool.create_flow.deletion[port_id][core_id] = cpu_time_used;
 }
 
 static struct rte_flow **
-insert_flows(int port_id)
+insert_flows(int port_id, uint8_t core_id)
 {
        struct rte_flow **flows_list;
        struct rte_flow_error error;
@@ -909,32 +1141,42 @@ insert_flows(int port_id)
        double cpu_time_per_batch[MAX_BATCHES_COUNT] = { 0 };
        double delta;
        uint32_t flow_index;
-       uint32_t counter;
+       uint32_t counter, start_counter = 0, end_counter;
        uint64_t global_items[MAX_ITEMS_NUM] = { 0 };
        uint64_t global_actions[MAX_ACTIONS_NUM] = { 0 };
        int rules_batch_idx;
+       int rules_count_per_core;
+
+       rules_count_per_core = rules_count / mc_pool.cores_count;
+
+       /* Set boundaries of rules for each core. */
+       if (core_id)
+               start_counter = core_id * rules_count_per_core;
+       end_counter = (core_id + 1) * rules_count_per_core;
 
        global_items[0] = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_ETH);
        global_actions[0] = FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_JUMP);
 
        flows_list = rte_zmalloc("flows_list",
-               (sizeof(struct rte_flow *) * rules_count) + 1, 0);
+               (sizeof(struct rte_flow *) * rules_count_per_core) + 1, 0);
        if (flows_list == NULL)
                rte_exit(EXIT_FAILURE, "No Memory available!");
 
        cpu_time_used = 0;
        flow_index = 0;
-       if (flow_group > 0) {
+       if (flow_group > 0 && core_id == 0) {
                /*
                 * Create global rule to jump into flow_group,
                 * this way the app will avoid the default rules.
                 *
+                * This rule will be created only once.
+                *
                 * Global rule:
                 * group 0 eth / end actions jump group <flow_group>
                 */
                flow = generate_flow(port_id, 0, flow_attrs,
                        global_items, global_actions,
-                       flow_group, 0, 0, 0, 0, &error);
+                       flow_group, 0, 0, 0, 0, core_id, &error);
 
                if (flow == NULL) {
                        print_flow_error(error);
@@ -943,19 +1185,17 @@ insert_flows(int port_id)
                flows_list[flow_index++] = flow;
        }
 
-       /* Insertion Rate */
-       printf("Rules insertion on port = %d\n", port_id);
-       start_batch = clock();
-       for (counter = 0; counter < rules_count; counter++) {
+       start_batch = rte_rdtsc();
+       for (counter = start_counter; counter < end_counter; counter++) {
                flow = generate_flow(port_id, flow_group,
                        flow_attrs, flow_items, flow_actions,
                        JUMP_ACTION_TABLE, counter,
                        hairpin_queues_num,
                        encap_data, decap_data,
-                       &error);
+                       core_id, &error);
 
                if (force_quit)
-                       counter = rules_count;
+                       counter = end_counter;
 
                if (!flow) {
                        print_flow_error(error);
@@ -971,12 +1211,12 @@ insert_flows(int port_id)
                 * for this batch.
                 */
                if (!((counter + 1) % rules_batch)) {
-                       end_batch = clock();
+                       end_batch = rte_rdtsc();
                        delta = (double) (end_batch - start_batch);
                        rules_batch_idx = ((counter + 1) / rules_batch) - 1;
-                       cpu_time_per_batch[rules_batch_idx] = delta / CLOCKS_PER_SEC;
+                       cpu_time_per_batch[rules_batch_idx] = delta / rte_get_tsc_hz();
                        cpu_time_used += cpu_time_per_batch[rules_batch_idx];
-                       start_batch = clock();
+                       start_batch = rte_rdtsc();
                }
        }
 
@@ -984,23 +1224,25 @@ insert_flows(int port_id)
        if (dump_iterations)
                print_rules_batches(cpu_time_per_batch);
 
-       /* Insertion rate for all rules */
-       insertion_rate = ((double) (rules_count / cpu_time_used) / 1000);
-       printf(":: Total flow insertion rate -> %f K Rule/Sec\n",
-                       insertion_rate);
-       printf(":: The time for creating %d in flows %f seconds\n",
-                       rules_count, cpu_time_used);
+       printf(":: Port %d :: Core %d boundaries :: start @[%d] - end @[%d]\n",
+               port_id, core_id, start_counter, end_counter - 1);
 
+       /* Insertion rate for all rules in one core */
+       insertion_rate = ((double) (rules_count_per_core / cpu_time_used) / 1000);
+       printf(":: Port %d :: Core %d :: Rules insertion rate -> %f K Rule/Sec\n",
+               port_id, core_id, insertion_rate);
+       printf(":: Port %d :: Core %d :: The time for creating %d in rules %f seconds\n",
+               port_id, core_id, rules_count_per_core, cpu_time_used);
+
+       mc_pool.create_flow.insertion[port_id][core_id] = cpu_time_used;
        return flows_list;
 }
 
-static inline void
-flows_handler(void)
+static void
+flows_handler(uint8_t core_id)
 {
        struct rte_flow **flows_list;
        uint16_t nr_ports;
-       int64_t alloc, last_alloc;
-       int flow_size_in_bytes;
        int port_id;
 
        nr_ports = rte_eth_dev_count_avail();
@@ -1016,23 +1258,189 @@ flows_handler(void)
                        continue;
 
                /* Insertion part. */
-               last_alloc = (int64_t)dump_socket_mem(stdout);
-               flows_list = insert_flows(port_id);
-               alloc = (int64_t)dump_socket_mem(stdout);
+               mc_pool.last_alloc[core_id] = (int64_t)dump_socket_mem(stdout);
+               if (has_meter())
+                       meters_handler(port_id, core_id, METER_CREATE);
+               flows_list = insert_flows(port_id, core_id);
+               if (flows_list == NULL)
+                       rte_exit(EXIT_FAILURE, "Error: Insertion Failed!\n");
+               mc_pool.current_alloc[core_id] = (int64_t)dump_socket_mem(stdout);
 
                /* Deletion part. */
-               if (delete_flag)
-                       destroy_flows(port_id, flows_list);
-
-               /* Report rte_flow size in huge pages. */
-               if (last_alloc) {
-                       flow_size_in_bytes = (alloc - last_alloc) / rules_count;
-                       printf("\n:: rte_flow size in DPDK layer: %d Bytes",
-                               flow_size_in_bytes);
+               if (delete_flag) {
+                       destroy_flows(port_id, core_id, flows_list);
+                       if (has_meter())
+                               meters_handler(port_id, core_id, METER_DELETE);
                }
        }
 }
 
+static void
+dump_used_cpu_time(const char *item,
+               uint16_t port, struct used_cpu_time *used_time)
+{
+       uint32_t i;
+       /* Latency: total count of rte rules divided
+        * over max time used by thread between all
+        * threads time.
+        *
+        * Throughput: total count of rte rules divided
+        * over the average of the time cosumed by all
+        * threads time.
+        */
+       double insertion_latency_time;
+       double insertion_throughput_time;
+       double deletion_latency_time;
+       double deletion_throughput_time;
+       double insertion_latency, insertion_throughput;
+       double deletion_latency, deletion_throughput;
+
+       /* Save first insertion/deletion rates from first thread.
+        * Start comparing with all threads, if any thread used
+        * time more than current saved, replace it.
+        *
+        * Thus in the end we will have the max time used for
+        * insertion/deletion by one thread.
+        *
+        * As for memory consumption, save the min of all threads
+        * of last alloc, and save the max for all threads for
+        * current alloc.
+        */
+
+       insertion_latency_time = used_time->insertion[port][0];
+       deletion_latency_time = used_time->deletion[port][0];
+       insertion_throughput_time = used_time->insertion[port][0];
+       deletion_throughput_time = used_time->deletion[port][0];
+
+       i = mc_pool.cores_count;
+       while (i-- > 1) {
+               insertion_throughput_time += used_time->insertion[port][i];
+               deletion_throughput_time += used_time->deletion[port][i];
+               if (insertion_latency_time < used_time->insertion[port][i])
+                       insertion_latency_time = used_time->insertion[port][i];
+               if (deletion_latency_time < used_time->deletion[port][i])
+                       deletion_latency_time = used_time->deletion[port][i];
+       }
+
+       insertion_latency = ((double) (mc_pool.rules_count
+                               / insertion_latency_time) / 1000);
+       deletion_latency = ((double) (mc_pool.rules_count
+                               / deletion_latency_time) / 1000);
+
+       insertion_throughput_time /= mc_pool.cores_count;
+       deletion_throughput_time /= mc_pool.cores_count;
+       insertion_throughput = ((double) (mc_pool.rules_count
+                               / insertion_throughput_time) / 1000);
+       deletion_throughput = ((double) (mc_pool.rules_count
+                               / deletion_throughput_time) / 1000);
+
+       /* Latency stats */
+       printf("\n%s\n:: [Latency | Insertion] All Cores :: Port %d :: ",
+               item, port);
+       printf("Total flows insertion rate -> %f K Rules/Sec\n",
+               insertion_latency);
+       printf(":: [Latency | Insertion] All Cores :: Port %d :: ", port);
+       printf("The time for creating %d rules is %f seconds\n",
+               mc_pool.rules_count, insertion_latency_time);
+
+       /* Throughput stats */
+       printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port);
+       printf("Total flows insertion rate -> %f K Rules/Sec\n",
+               insertion_throughput);
+       printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port);
+       printf("The average time for creating %d rules is %f seconds\n",
+               mc_pool.rules_count, insertion_throughput_time);
+
+       if (delete_flag) {
+       /* Latency stats */
+               printf(":: [Latency | Deletion] All Cores :: Port %d :: Total "
+                       "deletion rate -> %f K Rules/Sec\n",
+                       port, deletion_latency);
+               printf(":: [Latency | Deletion] All Cores :: Port %d :: ",
+                       port);
+               printf("The time for deleting %d rules is %f seconds\n",
+                       mc_pool.rules_count, deletion_latency_time);
+
+               /* Throughput stats */
+               printf(":: [Throughput | Deletion] All Cores :: Port %d :: Total "
+                       "deletion rate -> %f K Rules/Sec\n",
+                       port, deletion_throughput);
+               printf(":: [Throughput | Deletion] All Cores :: Port %d :: ",
+                       port);
+               printf("The average time for deleting %d rules is %f seconds\n",
+                       mc_pool.rules_count, deletion_throughput_time);
+       }
+}
+
+static void
+dump_used_mem(uint16_t port)
+{
+       uint32_t i;
+       int64_t last_alloc, current_alloc;
+       int flow_size_in_bytes;
+
+       last_alloc = mc_pool.last_alloc[0];
+       current_alloc = mc_pool.current_alloc[0];
+
+       i = mc_pool.cores_count;
+       while (i-- > 1) {
+               if (last_alloc > mc_pool.last_alloc[i])
+                       last_alloc = mc_pool.last_alloc[i];
+               if (current_alloc < mc_pool.current_alloc[i])
+                       current_alloc = mc_pool.current_alloc[i];
+       }
+
+       flow_size_in_bytes = (current_alloc - last_alloc) / mc_pool.rules_count;
+       printf("\n:: Port %d :: rte_flow size in DPDK layer: %d Bytes\n",
+               port, flow_size_in_bytes);
+}
+
+static int
+run_rte_flow_handler_cores(void *data __rte_unused)
+{
+       uint16_t port;
+       int lcore_counter = 0;
+       int lcore_id = rte_lcore_id();
+       int i;
+
+       RTE_LCORE_FOREACH(i) {
+               /*  If core not needed return. */
+               if (lcore_id == i) {
+                       printf(":: lcore %d mapped with index %d\n", lcore_id, lcore_counter);
+                       if (lcore_counter >= (int) mc_pool.cores_count)
+                               return 0;
+                       break;
+               }
+               lcore_counter++;
+       }
+       lcore_id = lcore_counter;
+
+       if (lcore_id >= (int) mc_pool.cores_count)
+               return 0;
+
+       mc_pool.rules_count = rules_count;
+
+       flows_handler(lcore_id);
+
+       /* Only main core to print total results. */
+       if (lcore_id != 0)
+               return 0;
+
+       /* Make sure all cores finished insertion/deletion process. */
+       rte_eal_mp_wait_lcore();
+
+       RTE_ETH_FOREACH_DEV(port) {
+               if (has_meter())
+                       dump_used_cpu_time("Meters:",
+                               port, &mc_pool.create_meter);
+               dump_used_cpu_time("Flows:",
+                       port, &mc_pool.create_flow);
+               dump_used_mem(port);
+       }
+
+       return 0;
+}
+
 static void
 signal_handler(int signum)
 {
@@ -1107,12 +1515,12 @@ packet_per_second_stats(void)
        int i;
 
        old = rte_zmalloc("old",
-               sizeof(struct lcore_info) * MAX_LCORES, 0);
+               sizeof(struct lcore_info) * RTE_MAX_LCORE, 0);
        if (old == NULL)
                rte_exit(EXIT_FAILURE, "No Memory available!");
 
        memcpy(old, lcore_infos,
-               sizeof(struct lcore_info) * MAX_LCORES);
+               sizeof(struct lcore_info) * RTE_MAX_LCORE);
 
        while (!force_quit) {
                uint64_t total_tx_pkts = 0;
@@ -1135,7 +1543,7 @@ packet_per_second_stats(void)
                printf("%6s %16s %16s %16s\n", "------", "----------------",
                        "----------------", "----------------");
                nr_lines = 3;
-               for (i = 0; i < MAX_LCORES; i++) {
+               for (i = 0; i < RTE_MAX_LCORE; i++) {
                        li  = &lcore_infos[i];
                        oli = &old[i];
                        if (li->mode != LCORE_MODE_PKT)
@@ -1166,7 +1574,7 @@ packet_per_second_stats(void)
                }
 
                memcpy(old, lcore_infos,
-                       sizeof(struct lcore_info) * MAX_LCORES);
+                       sizeof(struct lcore_info) * RTE_MAX_LCORE);
        }
 }
 
@@ -1227,7 +1635,7 @@ init_lcore_info(void)
         * This means that this stream is not used, or not set
         * yet.
         */
-       for (i = 0; i < MAX_LCORES; i++)
+       for (i = 0; i < RTE_MAX_LCORE; i++)
                for (j = 0; j < MAX_STREAMS; j++) {
                        lcore_infos[i].streams[j].tx_port = -1;
                        lcore_infos[i].streams[j].rx_port = -1;
@@ -1289,7 +1697,7 @@ init_lcore_info(void)
 
        /* Print all streams */
        printf(":: Stream -> core id[N]: (rx_port, rx_queue)->(tx_port, tx_queue)\n");
-       for (i = 0; i < MAX_LCORES; i++)
+       for (i = 0; i < RTE_MAX_LCORE; i++)
                for (j = 0; j < MAX_STREAMS; j++) {
                        /* No streams for this core */
                        if (lcore_infos[i].streams[j].tx_port == -1)
@@ -1470,12 +1878,19 @@ main(int argc, char **argv)
        if (nb_lcores <= 1)
                rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
 
-       flows_handler();
+
+       printf(":: Flows Count per port: %d\n\n", rules_count);
+
+       if (has_meter())
+               create_meter_profile();
+       rte_eal_mp_remote_launch(run_rte_flow_handler_cores, NULL, CALL_MAIN);
 
        if (enable_fwd) {
                init_lcore_info();
                rte_eal_mp_remote_launch(start_forwarding, NULL, CALL_MAIN);
        }
+       if (has_meter() && delete_flag)
+               destroy_meter_profile();
 
        RTE_ETH_FOREACH_DEV(port) {
                rte_flow_flush(port, &error);