bpf: allow self-xor operation

[dpdk.git] / app / test-eventdev / test_perf_common.c
diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c

index 2e9e83e88dc91c0a4695ccad0af02125ea26da1c..ed2b81f0632d276f0a8096d92b7a30959d4969b7 100644 (file)
--- a/app/test-eventdev/test_perf_common.c
+++ b/app/test-eventdev/test_perf_common.c
@@ -2,6 +2,8 @@
   * Copyright(c) 2017 Cavium, Inc
   */
  
+#include <math.h>
+
  #include "test_perf_common.h"
  
  int
@@ -17,7 +19,7 @@ perf_test_result(struct evt_test *test, struct evt_options *opt)
                 total += t->worker[i].processed_pkts;
         for (i = 0; i < t->nb_workers; i++)
                 printf("Worker %d packets: "CLGRN"%"PRIx64" "CLNRM"percentage:"
-                               CLGRN" %3.2f\n"CLNRM, i,
+                               CLGRN" %3.2f"CLNRM"\n", i,
                                 t->worker[i].processed_pkts,
                                 (((double)t->worker[i].processed_pkts)/total)
                                 * 100);
@@ -28,6 +30,7 @@ perf_test_result(struct evt_test *test, struct evt_options *opt)
  static inline int
  perf_producer(void *arg)
  {
+       int i;
         struct prod_data *p  = arg;
         struct test_perf *t = p->t;
         struct evt_options *opt = t->opt;
@@ -38,7 +41,7 @@ perf_producer(void *arg)
         const uint32_t nb_flows = t->nb_flows;
         uint32_t flow_counter = 0;
         uint64_t count = 0;
-       struct perf_elt *m;
+       struct perf_elt *m[BURST_SIZE + 1] = {NULL};
         struct rte_event ev;
  
         if (opt->verbose_level > 1)
@@ -54,27 +57,95 @@ perf_producer(void *arg)
         ev.sub_event_type = 0; /* stage 0 */
  
         while (count < nb_pkts && t->done == false) {
-               if (rte_mempool_get(pool, (void **)&m) < 0)
+               if (rte_mempool_get_bulk(pool, (void **)m, BURST_SIZE) < 0)
                         continue;
+               for (i = 0; i < BURST_SIZE; i++) {
+                       ev.flow_id = flow_counter++ % nb_flows;
+                       ev.event_ptr = m[i];
+                       m[i]->timestamp = rte_get_timer_cycles();
+                       while (rte_event_enqueue_burst(dev_id,
+                                                      port, &ev, 1) != 1) {
+                               if (t->done)
+                                       break;
+                               rte_pause();
+                               m[i]->timestamp = rte_get_timer_cycles();
+                       }
+               }
+               count += BURST_SIZE;
+       }
+
+       return 0;
+}
+
+static inline int
+perf_producer_burst(void *arg)
+{
+       uint32_t i;
+       uint64_t timestamp;
+       struct rte_event_dev_info dev_info;
+       struct prod_data *p  = arg;
+       struct test_perf *t = p->t;
+       struct evt_options *opt = t->opt;
+       const uint8_t dev_id = p->dev_id;
+       const uint8_t port = p->port_id;
+       struct rte_mempool *pool = t->pool;
+       const uint64_t nb_pkts = t->nb_pkts;
+       const uint32_t nb_flows = t->nb_flows;
+       uint32_t flow_counter = 0;
+       uint16_t enq = 0;
+       uint64_t count = 0;
+       struct perf_elt *m[MAX_PROD_ENQ_BURST_SIZE + 1];
+       struct rte_event ev[MAX_PROD_ENQ_BURST_SIZE + 1];
+       uint32_t burst_size = opt->prod_enq_burst_sz;
+
+       memset(m, 0, sizeof(*m) * (MAX_PROD_ENQ_BURST_SIZE + 1));
+       rte_event_dev_info_get(dev_id, &dev_info);
+       if (dev_info.max_event_port_enqueue_depth < burst_size)
+               burst_size = dev_info.max_event_port_enqueue_depth;
+
+       if (opt->verbose_level > 1)
+               printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__,
+                               rte_lcore_id(), dev_id, port, p->queue_id);
+
+       for (i = 0; i < burst_size; i++) {
+               ev[i].op = RTE_EVENT_OP_NEW;
+               ev[i].queue_id = p->queue_id;
+               ev[i].sched_type = t->opt->sched_type_list[0];
+               ev[i].priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
+               ev[i].event_type =  RTE_EVENT_TYPE_CPU;
+               ev[i].sub_event_type = 0; /* stage 0 */
+       }
  
-               ev.flow_id = flow_counter++ % nb_flows;
-               ev.event_ptr = m;
-               m->timestamp = rte_get_timer_cycles();
-               while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
+       while (count < nb_pkts && t->done == false) {
+               if (rte_mempool_get_bulk(pool, (void **)m, burst_size) < 0)
+                       continue;
+               timestamp = rte_get_timer_cycles();
+               for (i = 0; i < burst_size; i++) {
+                       ev[i].flow_id = flow_counter++ % nb_flows;
+                       ev[i].event_ptr = m[i];
+                       m[i]->timestamp = timestamp;
+               }
+               enq = rte_event_enqueue_burst(dev_id, port, ev, burst_size);
+               while (enq < burst_size) {
+                       enq += rte_event_enqueue_burst(dev_id, port,
+                                                       ev + enq,
+                                                       burst_size - enq);
                         if (t->done)
                                 break;
                         rte_pause();
-                       m->timestamp = rte_get_timer_cycles();
+                       timestamp = rte_get_timer_cycles();
+                       for (i = enq; i < burst_size; i++)
+                               m[i]->timestamp = timestamp;
                 }
-               count++;
+               count += burst_size;
         }
-
         return 0;
  }
  
  static inline int
  perf_event_timer_producer(void *arg)
  {
+       int i;
         struct prod_data *p  = arg;
         struct test_perf *t = p->t;
         struct evt_options *opt = t->opt;
@@ -85,17 +156,19 @@ perf_event_timer_producer(void *arg)
         const uint32_t nb_flows = t->nb_flows;
         const uint64_t nb_timers = opt->nb_timers;
         struct rte_mempool *pool = t->pool;
-       struct perf_elt *m;
+       struct perf_elt *m[BURST_SIZE + 1] = {NULL};
         struct rte_event_timer_adapter **adptr = t->timer_adptr;
         struct rte_event_timer tim;
         uint64_t timeout_ticks = opt->expiry_nsec / opt->timer_tick_nsec;
  
         memset(&tim, 0, sizeof(struct rte_event_timer));
-       timeout_ticks = opt->optm_timer_tick_nsec ?
-                       (timeout_ticks * opt->timer_tick_nsec)
-                       / opt->optm_timer_tick_nsec : timeout_ticks;
+       timeout_ticks =
+               opt->optm_timer_tick_nsec
+                       ? ceil((double)(timeout_ticks * opt->timer_tick_nsec) /
+                              opt->optm_timer_tick_nsec)
+                       : timeout_ticks;
         timeout_ticks += timeout_ticks ? 0 : 1;
-       tim.ev.event_type =  RTE_EVENT_TYPE_TIMER;
+       tim.ev.event_type = RTE_EVENT_TYPE_TIMER;
         tim.ev.op = RTE_EVENT_OP_NEW;
         tim.ev.sched_type = t->opt->sched_type_list[0];
         tim.ev.queue_id = p->queue_id;
@@ -107,29 +180,31 @@ perf_event_timer_producer(void *arg)
                 printf("%s(): lcore %d\n", __func__, rte_lcore_id());
  
         while (count < nb_timers && t->done == false) {
-               if (rte_mempool_get(pool, (void **)&m) < 0)
+               if (rte_mempool_get_bulk(pool, (void **)m, BURST_SIZE) < 0)
                         continue;
-
-               m->tim = tim;
-               m->tim.ev.flow_id = flow_counter++ % nb_flows;
-               m->tim.ev.event_ptr = m;
-               m->timestamp = rte_get_timer_cycles();
-               while (rte_event_timer_arm_burst(
-                               adptr[flow_counter % nb_timer_adptrs],
-                               (struct rte_event_timer **)&m, 1) != 1) {
-                       if (t->done)
-                               break;
-                       rte_pause();
-                       m->timestamp = rte_get_timer_cycles();
+               for (i = 0; i < BURST_SIZE; i++) {
+                       rte_prefetch0(m[i + 1]);
+                       m[i]->tim = tim;
+                       m[i]->tim.ev.flow_id = flow_counter++ % nb_flows;
+                       m[i]->tim.ev.event_ptr = m[i];
+                       m[i]->timestamp = rte_get_timer_cycles();
+                       while (rte_event_timer_arm_burst(
+                              adptr[flow_counter % nb_timer_adptrs],
+                              (struct rte_event_timer **)&m[i], 1) != 1) {
+                               if (t->done)
+                                       break;
+                               m[i]->timestamp = rte_get_timer_cycles();
+                       }
+                       arm_latency += rte_get_timer_cycles() - m[i]->timestamp;
                 }
-               arm_latency += rte_get_timer_cycles() - m->timestamp;
-               count++;
+               count += BURST_SIZE;
         }
         fflush(stdout);
         rte_delay_ms(1000);
         printf("%s(): lcore %d Average event timer arm latency = %.3f us\n",
-                       __func__, rte_lcore_id(), (float)(arm_latency / count) /
-                       (rte_get_timer_hz() / 1000000));
+                       __func__, rte_lcore_id(),
+                       count ? (float)(arm_latency / count) /
+                       (rte_get_timer_hz() / 1000000) : 0);
         return 0;
  }
  
@@ -153,11 +228,13 @@ perf_event_timer_producer_burst(void *arg)
         uint64_t timeout_ticks = opt->expiry_nsec / opt->timer_tick_nsec;
  
         memset(&tim, 0, sizeof(struct rte_event_timer));
-       timeout_ticks = opt->optm_timer_tick_nsec ?
-                       (timeout_ticks * opt->timer_tick_nsec)
-                       / opt->optm_timer_tick_nsec : timeout_ticks;
+       timeout_ticks =
+               opt->optm_timer_tick_nsec
+                       ? ceil((double)(timeout_ticks * opt->timer_tick_nsec) /
+                              opt->optm_timer_tick_nsec)
+                       : timeout_ticks;
         timeout_ticks += timeout_ticks ? 0 : 1;
-       tim.ev.event_type =  RTE_EVENT_TYPE_TIMER;
+       tim.ev.event_type = RTE_EVENT_TYPE_TIMER;
         tim.ev.op = RTE_EVENT_OP_NEW;
         tim.ev.sched_type = t->opt->sched_type_list[0];
         tim.ev.queue_id = p->queue_id;
@@ -189,8 +266,9 @@ perf_event_timer_producer_burst(void *arg)
         fflush(stdout);
         rte_delay_ms(1000);
         printf("%s(): lcore %d Average event timer arm latency = %.3f us\n",
-                       __func__, rte_lcore_id(), (float)(arm_latency / count) /
-                       (rte_get_timer_hz() / 1000000));
+                       __func__, rte_lcore_id(),
+                       count ? (float)(arm_latency / count) /
+                       (rte_get_timer_hz() / 1000000) : 0);
         return 0;
  }
  
@@ -199,9 +277,21 @@ perf_producer_wrapper(void *arg)
  {
         struct prod_data *p  = arg;
         struct test_perf *t = p->t;
-       /* Launch the producer function only in case of synthetic producer. */
-       if (t->opt->prod_type == EVT_PROD_TYPE_SYNT)
+       bool burst = evt_has_burst_mode(p->dev_id);
+
+       /* In case of synthetic producer, launch perf_producer or
+        * perf_producer_burst depending on producer enqueue burst size
+        */
+       if (t->opt->prod_type == EVT_PROD_TYPE_SYNT &&
+                       t->opt->prod_enq_burst_sz == 1)
                 return perf_producer(arg);
+       else if (t->opt->prod_type == EVT_PROD_TYPE_SYNT &&
+                       t->opt->prod_enq_burst_sz > 1) {
+               if (!burst)
+                       evt_err("This event device does not support burst mode");
+               else
+                       return perf_producer_burst(arg);
+       }
         else if (t->opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR &&
                         !t->opt->timdev_use_burst)
                 return perf_event_timer_producer(arg);
@@ -217,7 +307,6 @@ processed_pkts(struct test_perf *t)
         uint8_t i;
         uint64_t total = 0;
  
-       rte_smp_rmb();
         for (i = 0; i < t->nb_workers; i++)
                 total += t->worker[i].processed_pkts;
  
@@ -230,7 +319,6 @@ total_latency(struct test_perf *t)
         uint8_t i;
         uint64_t total = 0;
  
-       rte_smp_rmb();
         for (i = 0; i < t->nb_workers; i++)
                 total += t->worker[i].latency;
  
@@ -247,7 +335,7 @@ perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
  
         int port_idx = 0;
         /* launch workers */
-       RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+       RTE_LCORE_FOREACH_WORKER(lcore_id) {
                 if (!(opt->wlcores[lcore_id]))
                         continue;
  
@@ -261,7 +349,7 @@ perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
         }
  
         /* launch producers */
-       RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+       RTE_LCORE_FOREACH_WORKER(lcore_id) {
                 if (!(opt->plcores[lcore_id]))
                         continue;
  
@@ -320,7 +408,6 @@ perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
                                         opt->prod_type ==
                                         EVT_PROD_TYPE_EVENT_TIMER_ADPTR) {
                                         t->done = true;
-                                       rte_smp_wmb();
                                         break;
                                 }
                         }
@@ -334,7 +421,6 @@ perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
                                 rte_event_dev_dump(opt->dev_id, stdout);
                                 evt_err("No schedules for seconds, deadlock");
                                 t->done = true;
-                               rte_smp_wmb();
                                 break;
                         }
                         dead_lock_remaining = remaining;
@@ -434,7 +520,7 @@ perf_event_timer_adapter_setup(struct test_perf *t)
  
                 if (!(adapter_info.caps &
                                 RTE_EVENT_TIMER_ADAPTER_CAP_INTERNAL_PORT)) {
-                       uint32_t service_id;
+                       uint32_t service_id = -1U;
  
                         rte_event_timer_adapter_service_id_get(wl,
                                         &service_id);
@@ -471,7 +557,10 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
                 w->processed_pkts = 0;
                 w->latency = 0;
  
-               ret = rte_event_port_setup(opt->dev_id, port, port_conf);
+               struct rte_event_port_conf conf = *port_conf;
+               conf.event_port_cfg |= RTE_EVENT_PORT_CFG_HINT_WORKER;
+
+               ret = rte_event_port_setup(opt->dev_id, port, &conf);
                 if (ret) {
                         evt_err("failed to setup port %d", port);
                         return ret;
@@ -491,7 +580,10 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
                         p->t = t;
                 }
  
-               ret = perf_event_rx_adapter_setup(opt, stride, *port_conf);
+               struct rte_event_port_conf conf = *port_conf;
+               conf.event_port_cfg |= RTE_EVENT_PORT_CFG_HINT_PRODUCER;
+
+               ret = perf_event_rx_adapter_setup(opt, stride, conf);
                 if (ret)
                         return ret;
         } else if (opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR) {
@@ -516,8 +608,12 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
                         p->queue_id = prod * stride;
                         p->t = t;
  
-                       ret = rte_event_port_setup(opt->dev_id, port,
-                                       port_conf);
+                       struct rte_event_port_conf conf = *port_conf;
+                       conf.event_port_cfg |=
+                               RTE_EVENT_PORT_CFG_HINT_PRODUCER |
+                               RTE_EVENT_PORT_CFG_HINT_CONSUMER;
+
+                       ret = rte_event_port_setup(opt->dev_id, port, &conf);
                         if (ret) {
                                 evt_err("failed to setup port %d", port);
                                 return ret;
@@ -534,8 +630,8 @@ perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
  {
         unsigned int lcores;
  
-       /* N producer + N worker + 1 master when producer cores are used
-        * Else N worker + 1 master when Rx adapter is used
+       /* N producer + N worker + main when producer cores are used
+        * Else N worker + main when Rx adapter is used
          */
         lcores = opt->prod_type == EVT_PROD_TYPE_SYNT ? 3 : 2;
  
@@ -545,8 +641,8 @@ perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
         }
  
         /* Validate worker lcores */
-       if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) {
-               evt_err("worker lcores overlaps with master lcore");
+       if (evt_lcores_has_overlap(opt->wlcores, rte_get_main_lcore())) {
+               evt_err("worker lcores overlaps with main lcore");
                 return -1;
         }
         if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) {
@@ -566,8 +662,8 @@ perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
                         opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR) {
                 /* Validate producer lcores */
                 if (evt_lcores_has_overlap(opt->plcores,
-                                       rte_get_master_lcore())) {
-                       evt_err("producer lcores overlaps with master lcore");
+                                       rte_get_main_lcore())) {
+                       evt_err("producer lcores overlaps with main lcore");
                         return -1;
                 }
                 if (evt_has_disabled_lcore(opt->plcores)) {
@@ -626,6 +722,7 @@ perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
         evt_dump_queue_priority(opt);
         evt_dump_sched_type_list(opt);
         evt_dump_producer_type(opt);
+       evt_dump("prod_enq_burst_sz", "%d", opt->prod_enq_burst_sz);
  }
  
  void
@@ -655,11 +752,11 @@ int
  perf_ethdev_setup(struct evt_test *test, struct evt_options *opt)
  {
         uint16_t i;
+       int ret;
         struct test_perf *t = evt_test_priv(test);
         struct rte_eth_conf port_conf = {
                 .rxmode = {
                         .mq_mode = ETH_MQ_RX_RSS,
-                       .max_rx_pkt_len = RTE_ETHER_MAX_LEN,
                         .split_hdr_size = 0,
                 },
                 .rx_adv_conf = {
@@ -683,7 +780,12 @@ perf_ethdev_setup(struct evt_test *test, struct evt_options *opt)
                 struct rte_eth_dev_info dev_info;
                 struct rte_eth_conf local_port_conf = port_conf;
  
-               rte_eth_dev_info_get(i, &dev_info);
+               ret = rte_eth_dev_info_get(i, &dev_info);
+               if (ret != 0) {
+                       evt_err("Error during getting device (port %u) info: %s\n",
+                                       i, strerror(-ret));
+                       return ret;
+               }
  
                 local_port_conf.rx_adv_conf.rss_conf.rss_hf &=
                         dev_info.flow_type_rss_offloads;
@@ -715,7 +817,12 @@ perf_ethdev_setup(struct evt_test *test, struct evt_options *opt)
                         return -EINVAL;
                 }
  
-               rte_eth_promiscuous_enable(i);
+               ret = rte_eth_promiscuous_enable(i);
+               if (ret != 0) {
+                       evt_err("Failed to enable promiscuous mode for eth port [%d]: %s",
+                               i, rte_strerror(-ret));
+                       return ret;
+               }
         }
  
         return 0;