examples/service_cores: fix lcore count check
[dpdk.git] / app / test-regex / main.c
index 0d35f45..8e665df 100644 (file)
@@ -33,6 +33,45 @@ enum app_args {
        ARG_NUM_OF_JOBS,
        ARG_PERF_MODE,
        ARG_NUM_OF_ITERATIONS,
+       ARG_NUM_OF_QPS,
+       ARG_NUM_OF_LCORES,
+       ARG_NUM_OF_MBUF_SEGS,
+};
+
+struct job_ctx {
+       struct rte_mbuf *mbuf;
+};
+
+struct qp_params {
+       uint32_t total_enqueue;
+       uint32_t total_dequeue;
+       uint32_t total_matches;
+       struct rte_regex_ops **ops;
+       struct job_ctx *jobs_ctx;
+       char *buf;
+       uint64_t start;
+       uint64_t cycles;
+};
+
+struct qps_per_lcore {
+       unsigned int lcore_id;
+       int socket;
+       uint16_t qp_id_base;
+       uint16_t nb_qps;
+};
+
+struct regex_conf {
+       uint32_t nb_jobs;
+       bool perf_mode;
+       uint32_t nb_iterations;
+       char *data_file;
+       uint8_t nb_max_matches;
+       uint32_t nb_qps;
+       uint16_t qp_id_base;
+       char *data_buf;
+       long data_len;
+       long job_len;
+       uint32_t nb_segs;
 };
 
 static void
@@ -43,13 +82,17 @@ usage(const char *prog_name)
                " --data NAME: data file to use\n"
                " --nb_jobs: number of jobs to use\n"
                " --perf N: only outputs the performance data\n"
-               " --nb_iter N: number of iteration to run\n",
+               " --nb_iter N: number of iteration to run\n"
+               " --nb_qps N: number of queues to use\n"
+               " --nb_lcores N: number of lcores to use\n"
+               " --nb_segs N: number of mbuf segments\n",
                prog_name);
 }
 
 static void
 args_parse(int argc, char **argv, char *rules_file, char *data_file,
-          uint32_t *nb_jobs, bool *perf_mode, uint32_t *nb_iterations)
+          uint32_t *nb_jobs, bool *perf_mode, uint32_t *nb_iterations,
+          uint32_t *nb_qps, uint32_t *nb_lcores, uint32_t *nb_segs)
 {
        char **argvopt;
        int opt;
@@ -66,7 +109,15 @@ args_parse(int argc, char **argv, char *rules_file, char *data_file,
                /* Perf test only */
                { "perf", 0, 0, ARG_PERF_MODE},
                /* Number of iterations to run with perf test */
-               { "nb_iter", 1, 0, ARG_NUM_OF_ITERATIONS}
+               { "nb_iter", 1, 0, ARG_NUM_OF_ITERATIONS},
+               /* Number of QPs. */
+               { "nb_qps", 1, 0, ARG_NUM_OF_QPS},
+               /* Number of lcores. */
+               { "nb_lcores", 1, 0, ARG_NUM_OF_LCORES},
+               /* Number of mbuf segments. */
+               { "nb_segs", 1, 0, ARG_NUM_OF_MBUF_SEGS},
+               /* End of options */
+               { 0, 0, 0, 0 }
        };
 
        argvopt = argv;
@@ -98,13 +149,21 @@ args_parse(int argc, char **argv, char *rules_file, char *data_file,
                case ARG_NUM_OF_ITERATIONS:
                        *nb_iterations = atoi(optarg);
                        break;
+               case ARG_NUM_OF_QPS:
+                       *nb_qps = atoi(optarg);
+                       break;
+               case ARG_NUM_OF_LCORES:
+                       *nb_lcores = atoi(optarg);
+                       break;
+               case ARG_NUM_OF_MBUF_SEGS:
+                       *nb_segs = atoi(optarg);
+                       break;
                case ARG_HELP:
-                       usage("RegEx test app");
+                       usage(argv[0]);
                        break;
                default:
-                       fprintf(stderr, "Invalid option: %s\n", argv[optind]);
-                       usage("RegEx test app");
-                       rte_exit(EXIT_FAILURE, "Invalid option\n");
+                       usage(argv[0]);
+                       rte_exit(EXIT_FAILURE, "Invalid option: %s\n", argv[optind]);
                        break;
                }
        }
@@ -157,21 +216,35 @@ error:
 }
 
 static int
-init_port(struct rte_mempool **mbuf_mp, uint32_t nb_jobs,
-         uint16_t *nb_max_payload, char *rules_file, uint8_t *nb_max_matches)
+clone_buf(char *data_buf, char **buf, long data_len)
+{
+       char *dest_buf;
+       dest_buf =
+               rte_malloc(NULL, sizeof(char) * (data_len + 1), 4096);
+       if (!dest_buf)
+               return -ENOMEM;
+       memcpy(dest_buf, data_buf, data_len + 1);
+       *buf = dest_buf;
+       return 0;
+}
+
+static int
+init_port(uint16_t *nb_max_payload, char *rules_file, uint8_t *nb_max_matches,
+         uint32_t nb_qps)
 {
        uint16_t id;
+       uint16_t qp_id;
        uint16_t num_devs;
        char *rules = NULL;
        long rules_len;
        struct rte_regexdev_info info;
        struct rte_regexdev_config dev_conf = {
-               .nb_queue_pairs = 1,
+               .nb_queue_pairs = nb_qps,
                .nb_groups = 1,
        };
        struct rte_regexdev_qp_conf qp_conf = {
                .nb_desc = 1024,
-               .qp_conf_flags = RTE_REGEX_QUEUE_PAIR_CFG_OOS_F,
+               .qp_conf_flags = 0,
        };
        int res = 0;
 
@@ -181,14 +254,6 @@ init_port(struct rte_mempool **mbuf_mp, uint32_t nb_jobs,
                return -EINVAL;
        }
 
-       *mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool", nb_jobs, 0,
-                                         0, MBUF_SIZE, rte_socket_id());
-       if (*mbuf_mp == NULL) {
-               printf("Error, can't create memory pool\n");
-               res = -ENOMEM;
-               goto error;
-       }
-
        rules_len = read_file(rules_file, &rules);
        if (rules_len < 0) {
                printf("Error, can't read rules files.\n");
@@ -206,7 +271,8 @@ init_port(struct rte_mempool **mbuf_mp, uint32_t nb_jobs,
                *nb_max_matches = info.max_matches;
                *nb_max_payload = info.max_payload_size;
                if (info.regexdev_capa & RTE_REGEXDEV_SUPP_MATCH_AS_END_F)
-                       dev_conf.dev_cfg_flags |= RTE_REGEXDEV_CFG_MATCH_AS_END_F;
+                       dev_conf.dev_cfg_flags |=
+                       RTE_REGEXDEV_CFG_MATCH_AS_END_F;
                dev_conf.nb_max_matches = info.max_matches;
                dev_conf.nb_rules_per_group = info.max_rules_per_group;
                dev_conf.rule_db_len = rules_len;
@@ -216,11 +282,17 @@ init_port(struct rte_mempool **mbuf_mp, uint32_t nb_jobs,
                        printf("Error, can't configure device %d.\n", id);
                        goto error;
                }
-               res = rte_regexdev_queue_pair_setup(id, 0, &qp_conf);
-               if (res < 0) {
-                       printf("Error, can't setup queue pair for device %d.\n",
-                              id);
-                       goto error;
+               if (info.regexdev_capa & RTE_REGEXDEV_CAPA_QUEUE_PAIR_OOS_F)
+                       qp_conf.qp_conf_flags |=
+                       RTE_REGEX_QUEUE_PAIR_CFG_OOS_F;
+               for (qp_id = 0; qp_id < nb_qps; qp_id++) {
+                       res = rte_regexdev_queue_pair_setup(id, qp_id,
+                                                           &qp_conf);
+                       if (res < 0) {
+                               printf("Error, can't setup queue pair %u for "
+                                      "device %d.\n", qp_id, id);
+                               goto error;
+                       }
                }
                printf(":: initializing device: %d done\n", id);
        }
@@ -229,8 +301,6 @@ init_port(struct rte_mempool **mbuf_mp, uint32_t nb_jobs,
 error:
        if (rules)
                rte_free(rules);
-       if (*mbuf_mp)
-               rte_mempool_free(*mbuf_mp);
        return res;
 }
 
@@ -239,176 +309,397 @@ extbuf_free_cb(void *addr __rte_unused, void *fcb_opaque __rte_unused)
 {
 }
 
+static inline struct rte_mbuf *
+regex_create_segmented_mbuf(struct rte_mempool *mbuf_pool, int pkt_len,
+               int nb_segs, void *buf) {
+
+       struct rte_mbuf *m = NULL, *mbuf = NULL;
+       uint8_t *dst;
+       char *src = buf;
+       int data_len = 0;
+       int i, size;
+       int t_len;
+
+       if (pkt_len < 1) {
+               printf("Packet size must be 1 or more (is %d)\n", pkt_len);
+               return NULL;
+       }
+
+       if (nb_segs < 1) {
+               printf("Number of segments must be 1 or more (is %d)\n",
+                               nb_segs);
+               return NULL;
+       }
+
+       t_len = pkt_len >= nb_segs ? (pkt_len / nb_segs +
+                                    !!(pkt_len % nb_segs)) : 1;
+       size = pkt_len;
+
+       /* Create chained mbuf_src and fill it with buf data */
+       for (i = 0; size > 0; i++) {
+
+               m = rte_pktmbuf_alloc(mbuf_pool);
+               if (i == 0)
+                       mbuf = m;
+
+               if (m == NULL) {
+                       printf("Cannot create segment for source mbuf");
+                       goto fail;
+               }
+
+               data_len = size > t_len ? t_len : size;
+               memset(rte_pktmbuf_mtod(m, uint8_t *), 0,
+                               rte_pktmbuf_tailroom(m));
+               memcpy(rte_pktmbuf_mtod(m, uint8_t *), src, data_len);
+               dst = (uint8_t *)rte_pktmbuf_append(m, data_len);
+               if (dst == NULL) {
+                       printf("Cannot append %d bytes to the mbuf\n",
+                                       data_len);
+                       goto fail;
+               }
+
+               if (mbuf != m)
+                       rte_pktmbuf_chain(mbuf, m);
+               src += data_len;
+               size -= data_len;
+
+       }
+       return mbuf;
+
+fail:
+       if (mbuf)
+               rte_pktmbuf_free(mbuf);
+       return NULL;
+}
+
 static int
-run_regex(struct rte_mempool *mbuf_mp, uint32_t nb_jobs,
-         uint16_t nb_max_payload, bool perf_mode, uint32_t nb_iterations,
-         char *data_file, uint8_t nb_max_matches)
+run_regex(void *args)
 {
+       struct regex_conf *rgxc = args;
+       uint32_t nb_jobs = rgxc->nb_jobs;
+       uint32_t nb_segs = rgxc->nb_segs;
+       uint32_t nb_iterations = rgxc->nb_iterations;
+       uint8_t nb_max_matches = rgxc->nb_max_matches;
+       uint32_t nb_qps = rgxc->nb_qps;
+       uint16_t qp_id_base  = rgxc->qp_id_base;
+       char *data_buf = rgxc->data_buf;
+       long data_len = rgxc->data_len;
+       long job_len = rgxc->job_len;
+
        char *buf = NULL;
-       long buf_len;
-       long job_len;
        uint32_t actual_jobs = 0;
        uint32_t i;
-       struct rte_regex_ops **ops;
+       uint16_t qp_id;
        uint16_t dev_id = 0;
-       uint16_t qp_id = 0;
        uint8_t nb_matches;
        struct rte_regexdev_match *match;
-       long pos = 0;
+       long pos;
        unsigned long d_ind = 0;
        struct rte_mbuf_ext_shared_info shinfo;
-       uint32_t total_enqueue = 0;
-       uint32_t total_dequeue = 0;
-       uint32_t total_matches = 0;
        int res = 0;
-       time_t start;
-       time_t end;
-       double time;
+       long double time;
+       struct rte_mempool *mbuf_mp;
+       struct qp_params *qp;
+       struct qp_params *qps = NULL;
+       bool update;
+       uint16_t qps_used = 0;
+       char mbuf_pool[16];
 
        shinfo.free_cb = extbuf_free_cb;
-
-       ops = rte_malloc(NULL, sizeof(*ops) * nb_jobs, 0);
-       if (!ops) {
-               printf("Error, can't allocate memory for ops.\n");
+       snprintf(mbuf_pool,
+                sizeof(mbuf_pool),
+                "mbuf_pool_%2u", qp_id_base);
+       mbuf_mp = rte_pktmbuf_pool_create(mbuf_pool,
+                       rte_align32pow2(nb_jobs * nb_qps * nb_segs),
+                       0, 0, (nb_segs == 1) ? MBUF_SIZE :
+                       (rte_align32pow2(job_len) / nb_segs +
+                       RTE_PKTMBUF_HEADROOM),
+                       rte_socket_id());
+       if (mbuf_mp == NULL) {
+               printf("Error, can't create memory pool\n");
                return -ENOMEM;
        }
 
-       /* Allocate the jobs and assign each job with an mbuf. */
-       for (i = 0; i < nb_jobs; i++) {
-               ops[i] = rte_malloc(NULL, sizeof(*ops[0]) + nb_max_matches *
-                                   sizeof(struct rte_regexdev_match), 0);
-               if (!ops[i]) {
-                       printf("Error, can't allocate memory for op.\n");
+       qps = rte_malloc(NULL, sizeof(*qps) * nb_qps, 0);
+       if (!qps) {
+               printf("Error, can't allocate memory for QPs\n");
+               res = -ENOMEM;
+               goto end;
+       }
+
+       for (qp_id = 0; qp_id < nb_qps; qp_id++) {
+               struct rte_regex_ops **ops;
+               struct job_ctx *jobs_ctx;
+
+               qps_used++;
+               qp = &qps[qp_id];
+               qp->jobs_ctx = NULL;
+               qp->buf = NULL;
+               qp->ops = ops = rte_malloc(NULL, sizeof(*ops) * nb_jobs, 0);
+               if (!ops) {
+                       printf("Error, can't allocate memory for ops.\n");
                        res = -ENOMEM;
                        goto end;
                }
-               ops[i]->mbuf = rte_pktmbuf_alloc(mbuf_mp);
-               if (!ops[i]->mbuf) {
-                       printf("Error, can't attach mbuf.\n");
+
+               qp->jobs_ctx = jobs_ctx =
+                       rte_malloc(NULL, sizeof(*jobs_ctx) * nb_jobs, 0);
+               if (!jobs_ctx) {
+                       printf("Error, can't allocate memory for jobs_ctx.\n");
                        res = -ENOMEM;
                        goto end;
                }
-       }
 
-       buf_len = read_file(data_file, &buf);
-       if (buf_len <= 0) {
-               printf("Error, can't read file, or file is empty.\n");
-               res = -EXIT_FAILURE;
-               goto end;
-       }
+               if (clone_buf(data_buf, &buf, data_len)) {
+                       printf("Error, can't clone buf.\n");
+                       res = -EXIT_FAILURE;
+                       goto end;
+               }
 
-       job_len = buf_len / nb_jobs;
-       if (job_len == 0) {
-               printf("Error, To many jobs, for the given input.\n");
-               res = -EXIT_FAILURE;
-               goto end;
-       }
+               /* Assign each mbuf with the data to handle. */
+               actual_jobs = 0;
+               pos = 0;
+               /* Allocate the jobs and assign each job with an mbuf. */
+               for (i = 0; (pos < data_len) && (i < nb_jobs) ; i++) {
+                       long act_job_len = RTE_MIN(job_len, data_len - pos);
+
+                       ops[i] = rte_malloc(NULL, sizeof(*ops[0]) +
+                                       nb_max_matches *
+                                       sizeof(struct rte_regexdev_match), 0);
+                       if (!ops[i]) {
+                               printf("Error, can't allocate "
+                                      "memory for op.\n");
+                               res = -ENOMEM;
+                               goto end;
+                       }
+                       if (nb_segs > 1) {
+                               ops[i]->mbuf = regex_create_segmented_mbuf
+                                                       (mbuf_mp, act_job_len,
+                                                        nb_segs, &buf[pos]);
+                       } else {
+                               ops[i]->mbuf = rte_pktmbuf_alloc(mbuf_mp);
+                               if (ops[i]->mbuf) {
+                                       rte_pktmbuf_attach_extbuf(ops[i]->mbuf,
+                                       &buf[pos], 0, act_job_len, &shinfo);
+                                       ops[i]->mbuf->data_len = job_len;
+                                       ops[i]->mbuf->pkt_len = act_job_len;
+                               }
+                       }
+                       if (!ops[i]->mbuf) {
+                               printf("Error, can't add mbuf.\n");
+                               res = -ENOMEM;
+                               goto end;
+                       }
 
-       if (job_len > nb_max_payload) {
-               printf("Error, not enough jobs to cover input.\n");
-               res = -EXIT_FAILURE;
-               goto end;
-       }
+                       jobs_ctx[i].mbuf = ops[i]->mbuf;
+                       ops[i]->user_id = i;
+                       ops[i]->group_id0 = 1;
+                       pos += act_job_len;
+                       actual_jobs++;
+               }
 
-       /* Assign each mbuf with the data to handle. */
-       for (i = 0; (pos < buf_len) && (i < nb_jobs) ; i++) {
-               long act_job_len = RTE_MIN(job_len, buf_len - pos);
-               rte_pktmbuf_attach_extbuf(ops[i]->mbuf, &buf[pos], 0,
-                                         act_job_len, &shinfo);
-               ops[i]->mbuf->data_len = job_len;
-               ops[i]->mbuf->pkt_len = act_job_len;
-               ops[i]->user_id = i;
-               ops[i]->group_id0 = 1;
-               pos += act_job_len;
-               actual_jobs++;
+               qp->buf = buf;
+               qp->total_matches = 0;
+               qp->start = 0;
+               qp->cycles = 0;
        }
 
-       start = clock();
        for (i = 0; i < nb_iterations; i++) {
-               total_enqueue = 0;
-               total_dequeue = 0;
-               while (total_dequeue < actual_jobs) {
-                       struct rte_regex_ops **cur_ops_to_enqueue = ops +
-                               total_enqueue;
-                       struct rte_regex_ops **cur_ops_to_dequeue = ops +
-                               total_dequeue;
-
-                       if (actual_jobs - total_enqueue)
-                               total_enqueue += rte_regexdev_enqueue_burst
-                                       (dev_id, qp_id, cur_ops_to_enqueue,
-                                        actual_jobs - total_enqueue);
-
-                       total_dequeue += rte_regexdev_dequeue_burst
-                               (dev_id, qp_id, cur_ops_to_dequeue,
-                                total_enqueue - total_dequeue);
+               for (qp_id = 0; qp_id < nb_qps; qp_id++) {
+                       qp = &qps[qp_id];
+                       qp->total_enqueue = 0;
+                       qp->total_dequeue = 0;
                }
+               do {
+                       update = false;
+                       for (qp_id = 0; qp_id < nb_qps; qp_id++) {
+                               qp = &qps[qp_id];
+                               if (qp->total_dequeue < actual_jobs) {
+                                       qp->start = rte_rdtsc_precise();
+                                       struct rte_regex_ops **
+                                               cur_ops_to_enqueue = qp->ops +
+                                               qp->total_enqueue;
+
+                                       if (actual_jobs - qp->total_enqueue)
+                                               qp->total_enqueue +=
+                                               rte_regexdev_enqueue_burst
+                                                       (dev_id,
+                                                       qp_id_base + qp_id,
+                                                       cur_ops_to_enqueue,
+                                                       actual_jobs -
+                                                       qp->total_enqueue);
+                               }
+                       }
+                       for (qp_id = 0; qp_id < nb_qps; qp_id++) {
+                               qp = &qps[qp_id];
+                               if (qp->total_dequeue < actual_jobs) {
+                                       struct rte_regex_ops **
+                                               cur_ops_to_dequeue = qp->ops +
+                                               qp->total_dequeue;
+
+                                       qp->total_dequeue +=
+                                               rte_regexdev_dequeue_burst
+                                                       (dev_id,
+                                                       qp_id_base + qp_id,
+                                                       cur_ops_to_dequeue,
+                                                       qp->total_enqueue -
+                                                       qp->total_dequeue);
+                                       qp->cycles +=
+                                            (rte_rdtsc_precise() - qp->start);
+                                       update = true;
+                               }
+                       }
+               } while (update);
+       }
+       for (qp_id = 0; qp_id < nb_qps; qp_id++) {
+               qp = &qps[qp_id];
+               time = (long double)qp->cycles / rte_get_timer_hz();
+               printf("Core=%u QP=%u Job=%ld Bytes Time=%Lf sec Perf=%Lf "
+                      "Gbps\n", rte_lcore_id(), qp_id + qp_id_base,
+                      job_len, time,
+                      (((double)actual_jobs * job_len * nb_iterations * 8)
+                      / time) / 1000000000.0);
        }
-       end = clock();
-       time = ((double)end - start) / CLOCKS_PER_SEC;
-       printf("Job len = %ld Bytes\n",  job_len);
-       printf("Time = %lf sec\n",  time);
-       printf("Perf = %lf Gbps\n",
-              (((double)actual_jobs * job_len * nb_iterations * 8) / time) /
-               1000000000.0);
-
-       if (!perf_mode) {
+
+       if (rgxc->perf_mode)
+               goto end;
+       for (qp_id = 0; qp_id < nb_qps; qp_id++) {
+               printf("\n############ Core=%u QP=%u ############\n",
+                      rte_lcore_id(), qp_id + qp_id_base);
+               qp = &qps[qp_id];
                /* Log results per job. */
-               for (d_ind = 0; d_ind < total_dequeue; d_ind++) {
-                       nb_matches = ops[d_ind % actual_jobs]->nb_matches;
+               for (d_ind = 0; d_ind < qp->total_dequeue; d_ind++) {
+                       nb_matches = qp->ops[d_ind % actual_jobs]->nb_matches;
                        printf("Job id %"PRIu64" number of matches = %d\n",
-                              ops[d_ind]->user_id, nb_matches);
-                       total_matches += nb_matches;
-                       match = ops[d_ind % actual_jobs]->matches;
+                                       qp->ops[d_ind]->user_id, nb_matches);
+                       qp->total_matches += nb_matches;
+                       match = qp->ops[d_ind % actual_jobs]->matches;
                        for (i = 0; i < nb_matches; i++) {
-                               printf("match %d, rule = %d, start = %d,len = %d\n",
+                               printf("match %d, rule = %d, "
+                                      "start = %d,len = %d\n",
                                       i, match->rule_id, match->start_offset,
                                       match->len);
                                match++;
                        }
                }
-               printf("Total matches = %d\n", total_matches);
+               printf("Total matches = %d\n", qp->total_matches);
                printf("All Matches:\n");
-
                /* Log absolute results. */
-               for (d_ind = 0; d_ind < total_dequeue; d_ind++) {
-                       nb_matches = ops[d_ind % actual_jobs]->nb_matches;
-                       total_matches += nb_matches;
-                       match = ops[d_ind % actual_jobs]->matches;
+               for (d_ind = 0; d_ind < qp->total_dequeue; d_ind++) {
+                       nb_matches = qp->ops[d_ind % actual_jobs]->nb_matches;
+                       qp->total_matches += nb_matches;
+                       match = qp->ops[d_ind % actual_jobs]->matches;
                        for (i = 0; i < nb_matches; i++) {
                                printf("start = %ld, len = %d, rule = %d\n",
-                                      match->start_offset + d_ind * job_len,
-                                      match->len, match->rule_id);
+                                               match->start_offset +
+                                               d_ind * job_len,
+                                               match->len, match->rule_id);
                                match++;
                        }
                }
        }
 end:
-       for (i = 0; i < actual_jobs; i++) {
-               if (ops[i]) {
-                       if (ops[i]->mbuf)
-                               rte_pktmbuf_free(ops[i]->mbuf);
-                       rte_free(ops[i]);
-               }
+       for (qp_id = 0; qp_id < qps_used; qp_id++) {
+               qp = &qps[qp_id];
+               for (i = 0; i < actual_jobs && qp->ops; i++)
+                       rte_free(qp->ops[i]);
+               rte_free(qp->ops);
+               qp->ops = NULL;
+               for (i = 0; i < actual_jobs && qp->jobs_ctx; i++)
+                       rte_pktmbuf_free(qp->jobs_ctx[i].mbuf);
+               rte_free(qp->jobs_ctx);
+               qp->jobs_ctx = NULL;
+               rte_free(qp->buf);
+               qp->buf = NULL;
        }
-       rte_free(ops);
-       if (buf)
-               rte_free(buf);
+       if (mbuf_mp)
+               rte_mempool_free(mbuf_mp);
+       rte_free(qps);
        return res;
 }
 
+static int
+distribute_qps_to_lcores(uint32_t nb_cores, uint32_t nb_qps,
+                        struct qps_per_lcore **qpl)
+{
+       int socket;
+       unsigned lcore_id;
+       uint32_t i;
+       uint16_t min_qp_id;
+       uint16_t max_qp_id;
+       struct qps_per_lcore *qps_per_lcore;
+       uint32_t detected_lcores;
+
+       if (nb_qps < nb_cores) {
+               nb_cores = nb_qps;
+               printf("Reducing number of cores to number of QPs (%u)\n",
+                      nb_cores);
+       }
+       /* Allocate qps_per_lcore array */
+       qps_per_lcore =
+               rte_malloc(NULL, sizeof(*qps_per_lcore) * nb_cores, 0);
+       if (!qps_per_lcore)
+               rte_exit(EXIT_FAILURE, "Failed to create qps_per_lcore array\n");
+       *qpl = qps_per_lcore;
+       detected_lcores = 0;
+       min_qp_id = 0;
+
+       RTE_LCORE_FOREACH_WORKER(lcore_id) {
+               if (detected_lcores >= nb_cores)
+                       break;
+               qps_per_lcore[detected_lcores].lcore_id = lcore_id;
+               socket = rte_lcore_to_socket_id(lcore_id);
+               if (socket == SOCKET_ID_ANY)
+                       socket = 0;
+               qps_per_lcore[detected_lcores].socket = socket;
+               qps_per_lcore[detected_lcores].qp_id_base = min_qp_id;
+               max_qp_id = min_qp_id + nb_qps / nb_cores - 1;
+               if (nb_qps % nb_cores > detected_lcores)
+                       max_qp_id++;
+               qps_per_lcore[detected_lcores].nb_qps = max_qp_id -
+                                                       min_qp_id + 1;
+               min_qp_id = max_qp_id + 1;
+               detected_lcores++;
+       }
+       if (detected_lcores != nb_cores)
+               return -1;
+
+       for (i = 0; i < detected_lcores; i++) {
+               printf("===> Core %d: allocated queues: ",
+                      qps_per_lcore[i].lcore_id);
+               min_qp_id = qps_per_lcore[i].qp_id_base;
+               max_qp_id =
+                       qps_per_lcore[i].qp_id_base + qps_per_lcore[i].nb_qps;
+               while (min_qp_id < max_qp_id) {
+                       printf("%u ", min_qp_id);
+                       min_qp_id++;
+               }
+               printf("\n");
+       }
+       return 0;
+}
+
 int
 main(int argc, char **argv)
 {
        char rules_file[MAX_FILE_NAME];
        char data_file[MAX_FILE_NAME];
-       struct rte_mempool *mbuf_mp = NULL;
        uint32_t nb_jobs = 0;
-       uint16_t nb_max_payload = 0;
        bool perf_mode = 0;
        uint32_t nb_iterations = 0;
-       uint8_t nb_max_matches = 0;
        int ret;
+       uint16_t nb_max_payload = 0;
+       uint8_t nb_max_matches = 0;
+       uint32_t nb_qps = 1;
+       char *data_buf;
+       long data_len;
+       long job_len;
+       uint32_t nb_lcores = 1, nb_segs = 1;
+       struct regex_conf *rgxc;
+       uint32_t i;
+       struct qps_per_lcore *qps_per_lcore;
 
+       /* Init EAL. */
        ret = rte_eal_init(argc, argv);
        if (ret < 0)
                rte_exit(EXIT_FAILURE, "EAL init failed\n");
@@ -416,18 +707,53 @@ main(int argc, char **argv)
        argv += ret;
        if (argc > 1)
                args_parse(argc, argv, rules_file, data_file, &nb_jobs,
-                          &perf_mode, &nb_iterations);
-
-       ret = init_port(&mbuf_mp, nb_jobs, &nb_max_payload, rules_file,
-                       &nb_max_matches);
+                               &perf_mode, &nb_iterations, &nb_qps,
+                               &nb_lcores, &nb_segs);
+
+       if (nb_qps == 0)
+               rte_exit(EXIT_FAILURE, "Number of QPs must be greater than 0\n");
+       if (nb_lcores == 0)
+               rte_exit(EXIT_FAILURE, "Number of lcores must be greater than 0\n");
+       if (distribute_qps_to_lcores(nb_lcores, nb_qps, &qps_per_lcore) < 0)
+               rte_exit(EXIT_FAILURE, "Failed to distribute queues to lcores!\n");
+       ret = init_port(&nb_max_payload, rules_file,
+                       &nb_max_matches, nb_qps);
        if (ret < 0)
                rte_exit(EXIT_FAILURE, "init port failed\n");
-       ret = run_regex(mbuf_mp, nb_jobs, nb_max_payload, perf_mode,
-                       nb_iterations, data_file, nb_max_matches);
-       if (ret < 0) {
-               rte_mempool_free(mbuf_mp);
-               rte_exit(EXIT_FAILURE, "RegEx function failed\n");
+
+       data_len = read_file(data_file, &data_buf);
+       if (data_len <= 0)
+               rte_exit(EXIT_FAILURE, "Error, can't read file, or file is empty.\n");
+
+       job_len = data_len / nb_jobs;
+       if (job_len == 0)
+               rte_exit(EXIT_FAILURE, "Error, To many jobs, for the given input.\n");
+
+       if (job_len > nb_max_payload)
+               rte_exit(EXIT_FAILURE, "Error, not enough jobs to cover input.\n");
+
+       rgxc = rte_malloc(NULL, sizeof(*rgxc) * nb_lcores, 0);
+       if (!rgxc)
+               rte_exit(EXIT_FAILURE, "Failed to create Regex Conf\n");
+       for (i = 0; i < nb_lcores; i++) {
+               rgxc[i] = (struct regex_conf){
+                       .nb_jobs = nb_jobs,
+                       .nb_segs = nb_segs,
+                       .perf_mode = perf_mode,
+                       .nb_iterations = nb_iterations,
+                       .nb_max_matches = nb_max_matches,
+                       .nb_qps = qps_per_lcore[i].nb_qps,
+                       .qp_id_base = qps_per_lcore[i].qp_id_base,
+                       .data_buf = data_buf,
+                       .data_len = data_len,
+                       .job_len = job_len,
+               };
+               rte_eal_remote_launch(run_regex, &rgxc[i],
+                                     qps_per_lcore[i].lcore_id);
        }
-       rte_mempool_free(mbuf_mp);
+       rte_eal_mp_wait_lcore();
+       rte_free(data_buf);
+       rte_free(rgxc);
+       rte_free(qps_per_lcore);
        return EXIT_SUCCESS;
 }