app/test-eventdev/test_perf_common.c

   1 /* SPDX-License-Identifier: BSD-3-Clause
   2  * Copyright(c) 2017 Cavium, Inc
   3  */
   4
   5 #include "test_perf_common.h"
   6
   7 int
   8 perf_test_result(struct evt_test *test, struct evt_options *opt)
   9 {
  10         RTE_SET_USED(opt);
  11         struct test_perf *t = evt_test_priv(test);
  12
  13         return t->result;
  14 }
  15
  16 static inline int
  17 perf_producer(void *arg)
  18 {
  19         struct prod_data *p  = arg;
  20         struct test_perf *t = p->t;
  21         struct evt_options *opt = t->opt;
  22         const uint8_t dev_id = p->dev_id;
  23         const uint8_t port = p->port_id;
  24         struct rte_mempool *pool = t->pool;
  25         const uint64_t nb_pkts = t->nb_pkts;
  26         const uint32_t nb_flows = t->nb_flows;
  27         uint32_t flow_counter = 0;
  28         uint64_t count = 0;
  29         struct perf_elt *m;
  30         struct rte_event ev;
  31
  32         if (opt->verbose_level > 1)
  33                 printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__,
  34                                 rte_lcore_id(), dev_id, port, p->queue_id);
  35
  36         ev.event = 0;
  37         ev.op = RTE_EVENT_OP_NEW;
  38         ev.queue_id = p->queue_id;
  39         ev.sched_type = t->opt->sched_type_list[0];
  40         ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
  41         ev.event_type =  RTE_EVENT_TYPE_CPU;
  42         ev.sub_event_type = 0; /* stage 0 */
  43
  44         while (count < nb_pkts && t->done == false) {
  45                 if (rte_mempool_get(pool, (void **)&m) < 0)
  46                         continue;
  47
  48                 ev.flow_id = flow_counter++ % nb_flows;
  49                 ev.event_ptr = m;
  50                 m->timestamp = rte_get_timer_cycles();
  51                 while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
  52                         if (t->done)
  53                                 break;
  54                         rte_pause();
  55                         m->timestamp = rte_get_timer_cycles();
  56                 }
  57                 count++;
  58         }
  59
  60         return 0;
  61 }
  62
  63 static inline uint64_t
  64 processed_pkts(struct test_perf *t)
  65 {
  66         uint8_t i;
  67         uint64_t total = 0;
  68
  69         rte_smp_rmb();
  70         for (i = 0; i < t->nb_workers; i++)
  71                 total += t->worker[i].processed_pkts;
  72
  73         return total;
  74 }
  75
  76 static inline uint64_t
  77 total_latency(struct test_perf *t)
  78 {
  79         uint8_t i;
  80         uint64_t total = 0;
  81
  82         rte_smp_rmb();
  83         for (i = 0; i < t->nb_workers; i++)
  84                 total += t->worker[i].latency;
  85
  86         return total;
  87 }
  88
  89
  90 int
  91 perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
  92                 int (*worker)(void *))
  93 {
  94         int ret, lcore_id;
  95         struct test_perf *t = evt_test_priv(test);
  96
  97         int port_idx = 0;
  98         /* launch workers */
  99         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
 100                 if (!(opt->wlcores[lcore_id]))
 101                         continue;
 102
 103                 ret = rte_eal_remote_launch(worker,
 104                                  &t->worker[port_idx], lcore_id);
 105                 if (ret) {
 106                         evt_err("failed to launch worker %d", lcore_id);
 107                         return ret;
 108                 }
 109                 port_idx++;
 110         }
 111
 112         /* launch producers */
 113         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
 114                 if (!(opt->plcores[lcore_id]))
 115                         continue;
 116
 117                 ret = rte_eal_remote_launch(perf_producer, &t->prod[port_idx],
 118                                          lcore_id);
 119                 if (ret) {
 120                         evt_err("failed to launch perf_producer %d", lcore_id);
 121                         return ret;
 122                 }
 123                 port_idx++;
 124         }
 125
 126         const uint64_t total_pkts = opt->nb_pkts *
 127                         evt_nr_active_lcores(opt->plcores);
 128
 129         uint64_t dead_lock_cycles = rte_get_timer_cycles();
 130         int64_t dead_lock_remaining  =  total_pkts;
 131         const uint64_t dead_lock_sample = rte_get_timer_hz() * 5;
 132
 133         uint64_t perf_cycles = rte_get_timer_cycles();
 134         int64_t perf_remaining  = total_pkts;
 135         const uint64_t perf_sample = rte_get_timer_hz();
 136
 137         static float total_mpps;
 138         static uint64_t samples;
 139
 140         const uint64_t freq_mhz = rte_get_timer_hz() / 1000000;
 141         int64_t remaining = t->outstand_pkts - processed_pkts(t);
 142
 143         while (t->done == false) {
 144                 const uint64_t new_cycles = rte_get_timer_cycles();
 145
 146                 if ((new_cycles - perf_cycles) > perf_sample) {
 147                         const uint64_t latency = total_latency(t);
 148                         const uint64_t pkts = processed_pkts(t);
 149
 150                         remaining = t->outstand_pkts - pkts;
 151                         float mpps = (float)(perf_remaining-remaining)/1000000;
 152
 153                         perf_remaining = remaining;
 154                         perf_cycles = new_cycles;
 155                         total_mpps += mpps;
 156                         ++samples;
 157                         if (opt->fwd_latency && pkts > 0) {
 158                                 printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM,
 159                                         mpps, total_mpps/samples,
 160                                         (float)(latency/pkts)/freq_mhz);
 161                         } else {
 162                                 printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM,
 163                                         mpps, total_mpps/samples);
 164                         }
 165                         fflush(stdout);
 166
 167                         if (remaining <= 0) {
 168                                 t->done = true;
 169                                 t->result = EVT_TEST_SUCCESS;
 170                                 rte_smp_wmb();
 171                                 break;
 172                         }
 173                 }
 174
 175                 if (new_cycles - dead_lock_cycles > dead_lock_sample) {
 176                         remaining = t->outstand_pkts - processed_pkts(t);
 177                         if (dead_lock_remaining == remaining) {
 178                                 rte_event_dev_dump(opt->dev_id, stdout);
 179                                 evt_err("No schedules for seconds, deadlock");
 180                                 t->done = true;
 181                                 rte_smp_wmb();
 182                                 break;
 183                         }
 184                         dead_lock_remaining = remaining;
 185                         dead_lock_cycles = new_cycles;
 186                 }
 187         }
 188         printf("\n");
 189         return 0;
 190 }
 191
 192 int
 193 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
 194                                 uint8_t stride, uint8_t nb_queues)
 195 {
 196         struct test_perf *t = evt_test_priv(test);
 197         uint8_t port, prod;
 198         int ret = -1;
 199
 200         /* port configuration */
 201         const struct rte_event_port_conf wkr_p_conf = {
 202                         .dequeue_depth = opt->wkr_deq_dep,
 203                         .enqueue_depth = 64,
 204                         .new_event_threshold = 4096,
 205         };
 206
 207         /* setup one port per worker, linking to all queues */
 208         for (port = 0; port < evt_nr_active_lcores(opt->wlcores);
 209                                 port++) {
 210                 struct worker_data *w = &t->worker[port];
 211
 212                 w->dev_id = opt->dev_id;
 213                 w->port_id = port;
 214                 w->t = t;
 215                 w->processed_pkts = 0;
 216                 w->latency = 0;
 217
 218                 ret = rte_event_port_setup(opt->dev_id, port, &wkr_p_conf);
 219                 if (ret) {
 220                         evt_err("failed to setup port %d", port);
 221                         return ret;
 222                 }
 223
 224                 ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0);
 225                 if (ret != nb_queues) {
 226                         evt_err("failed to link all queues to port %d", port);
 227                         return -EINVAL;
 228                 }
 229         }
 230
 231         /* port for producers, no links */
 232         const struct rte_event_port_conf prod_conf = {
 233                         .dequeue_depth = 8,
 234                         .enqueue_depth = 32,
 235                         .new_event_threshold = 1200,
 236         };
 237         prod = 0;
 238         for ( ; port < perf_nb_event_ports(opt); port++) {
 239                 struct prod_data *p = &t->prod[port];
 240
 241                 p->dev_id = opt->dev_id;
 242                 p->port_id = port;
 243                 p->queue_id = prod * stride;
 244                 p->t = t;
 245
 246                 ret = rte_event_port_setup(opt->dev_id, port, &prod_conf);
 247                 if (ret) {
 248                         evt_err("failed to setup port %d", port);
 249                         return ret;
 250                 }
 251                 prod++;
 252         }
 253
 254         return ret;
 255 }
 256
 257 int
 258 perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
 259 {
 260         unsigned int lcores;
 261
 262         /* N producer + N worker + 1 master when producer cores are used
 263          * Else N worker + 1 master when Rx adapter is used
 264          */
 265         lcores = opt->prod_type == EVT_PROD_TYPE_SYNT ? 3 : 2;
 266
 267         if (rte_lcore_count() < lcores) {
 268                 evt_err("test need minimum %d lcores", lcores);
 269                 return -1;
 270         }
 271
 272         /* Validate worker lcores */
 273         if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) {
 274                 evt_err("worker lcores overlaps with master lcore");
 275                 return -1;
 276         }
 277         if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) {
 278                 evt_err("worker lcores overlaps producer lcores");
 279                 return -1;
 280         }
 281         if (evt_has_disabled_lcore(opt->wlcores)) {
 282                 evt_err("one or more workers lcores are not enabled");
 283                 return -1;
 284         }
 285         if (!evt_has_active_lcore(opt->wlcores)) {
 286                 evt_err("minimum one worker is required");
 287                 return -1;
 288         }
 289
 290         if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
 291                 /* Validate producer lcores */
 292                 if (evt_lcores_has_overlap(opt->plcores,
 293                                         rte_get_master_lcore())) {
 294                         evt_err("producer lcores overlaps with master lcore");
 295                         return -1;
 296                 }
 297                 if (evt_has_disabled_lcore(opt->plcores)) {
 298                         evt_err("one or more producer lcores are not enabled");
 299                         return -1;
 300                 }
 301                 if (!evt_has_active_lcore(opt->plcores)) {
 302                         evt_err("minimum one producer is required");
 303                         return -1;
 304                 }
 305         }
 306
 307         if (evt_has_invalid_stage(opt))
 308                 return -1;
 309
 310         if (evt_has_invalid_sched_type(opt))
 311                 return -1;
 312
 313         if (nb_queues > EVT_MAX_QUEUES) {
 314                 evt_err("number of queues exceeds %d", EVT_MAX_QUEUES);
 315                 return -1;
 316         }
 317         if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) {
 318                 evt_err("number of ports exceeds %d", EVT_MAX_PORTS);
 319                 return -1;
 320         }
 321
 322         /* Fixups */
 323         if (opt->nb_stages == 1 && opt->fwd_latency) {
 324                 evt_info("fwd_latency is valid when nb_stages > 1, disabling");
 325                 opt->fwd_latency = 0;
 326         }
 327         if (opt->fwd_latency && !opt->q_priority) {
 328                 evt_info("enabled queue priority for latency measurement");
 329                 opt->q_priority = 1;
 330         }
 331         if (opt->nb_pkts == 0)
 332                 opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores);
 333
 334         return 0;
 335 }
 336
 337 void
 338 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
 339 {
 340         evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores));
 341         evt_dump_producer_lcores(opt);
 342         evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores));
 343         evt_dump_worker_lcores(opt);
 344         evt_dump_nb_stages(opt);
 345         evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt));
 346         evt_dump("nb_evdev_queues", "%d", nb_queues);
 347         evt_dump_queue_priority(opt);
 348         evt_dump_sched_type_list(opt);
 349         evt_dump_producer_type(opt);
 350 }
 351
 352 void
 353 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt)
 354 {
 355         RTE_SET_USED(test);
 356
 357         rte_event_dev_stop(opt->dev_id);
 358         rte_event_dev_close(opt->dev_id);
 359 }
 360
 361 static inline void
 362 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused,
 363             void *obj, unsigned i __rte_unused)
 364 {
 365         memset(obj, 0, mp->elt_size);
 366 }
 367
 368 int
 369 perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
 370 {
 371         struct test_perf *t = evt_test_priv(test);
 372
 373         t->pool = rte_mempool_create(test->name, /* mempool name */
 374                                 opt->pool_sz, /* number of elements*/
 375                                 sizeof(struct perf_elt), /* element size*/
 376                                 512, /* cache size*/
 377                                 0, NULL, NULL,
 378                                 perf_elt_init, /* obj constructor */
 379                                 NULL, opt->socket_id, 0); /* flags */
 380         if (t->pool == NULL) {
 381                 evt_err("failed to create mempool");
 382                 return -ENOMEM;
 383         }
 384
 385         return 0;
 386 }
 387
 388 void
 389 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt)
 390 {
 391         RTE_SET_USED(opt);
 392         struct test_perf *t = evt_test_priv(test);
 393
 394         rte_mempool_free(t->pool);
 395 }
 396
 397 int
 398 perf_test_setup(struct evt_test *test, struct evt_options *opt)
 399 {
 400         void *test_perf;
 401
 402         test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf),
 403                                 RTE_CACHE_LINE_SIZE, opt->socket_id);
 404         if (test_perf  == NULL) {
 405                 evt_err("failed to allocate test_perf memory");
 406                 goto nomem;
 407         }
 408         test->test_priv = test_perf;
 409
 410         struct test_perf *t = evt_test_priv(test);
 411
 412         t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores);
 413         t->nb_workers = evt_nr_active_lcores(opt->wlcores);
 414         t->done = false;
 415         t->nb_pkts = opt->nb_pkts;
 416         t->nb_flows = opt->nb_flows;
 417         t->result = EVT_TEST_FAILED;
 418         t->opt = opt;
 419         memcpy(t->sched_type_list, opt->sched_type_list,
 420                         sizeof(opt->sched_type_list));
 421         return 0;
 422 nomem:
 423         return -ENOMEM;
 424 }
 425
 426 void
 427 perf_test_destroy(struct evt_test *test, struct evt_options *opt)
 428 {
 429         RTE_SET_USED(opt);
 430
 431         rte_free(test->test_priv);
 432 }