examples/load_balancer/runtime.c

   1 /*-
   2  *   BSD LICENSE
   3  *
   4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
   5  *   All rights reserved.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following conditions
   9  *   are met:
  10  *
  11  *     * Redistributions of source code must retain the above copyright
  12  *       notice, this list of conditions and the following disclaimer.
  13  *     * Redistributions in binary form must reproduce the above copyright
  14  *       notice, this list of conditions and the following disclaimer in
  15  *       the documentation and/or other materials provided with the
  16  *       distribution.
  17  *     * Neither the name of Intel Corporation nor the names of its
  18  *       contributors may be used to endorse or promote products derived
  19  *       from this software without specific prior written permission.
  20  *
  21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <stdint.h>
  37 #include <inttypes.h>
  38 #include <sys/types.h>
  39 #include <string.h>
  40 #include <sys/queue.h>
  41 #include <stdarg.h>
  42 #include <errno.h>
  43 #include <getopt.h>
  44
  45 #include <rte_common.h>
  46 #include <rte_byteorder.h>
  47 #include <rte_log.h>
  48 #include <rte_memory.h>
  49 #include <rte_memcpy.h>
  50 #include <rte_eal.h>
  51 #include <rte_launch.h>
  52 #include <rte_atomic.h>
  53 #include <rte_cycles.h>
  54 #include <rte_prefetch.h>
  55 #include <rte_lcore.h>
  56 #include <rte_per_lcore.h>
  57 #include <rte_branch_prediction.h>
  58 #include <rte_interrupts.h>
  59 #include <rte_random.h>
  60 #include <rte_debug.h>
  61 #include <rte_ether.h>
  62 #include <rte_ethdev.h>
  63 #include <rte_ring.h>
  64 #include <rte_mempool.h>
  65 #include <rte_mbuf.h>
  66 #include <rte_ip.h>
  67 #include <rte_tcp.h>
  68 #include <rte_lpm.h>
  69
  70 #include "main.h"
  71
  72 #ifndef APP_LCORE_IO_FLUSH
  73 #define APP_LCORE_IO_FLUSH           1000000
  74 #endif
  75
  76 #ifndef APP_LCORE_WORKER_FLUSH
  77 #define APP_LCORE_WORKER_FLUSH       1000000
  78 #endif
  79
  80 #ifndef APP_STATS
  81 #define APP_STATS                    1000000
  82 #endif
  83
  84 #define APP_IO_RX_DROP_ALL_PACKETS   0
  85 #define APP_WORKER_DROP_ALL_PACKETS  0
  86 #define APP_IO_TX_DROP_ALL_PACKETS   0
  87
  88 #ifndef APP_IO_RX_PREFETCH_ENABLE
  89 #define APP_IO_RX_PREFETCH_ENABLE    1
  90 #endif
  91
  92 #ifndef APP_WORKER_PREFETCH_ENABLE
  93 #define APP_WORKER_PREFETCH_ENABLE   1
  94 #endif
  95
  96 #ifndef APP_IO_TX_PREFETCH_ENABLE
  97 #define APP_IO_TX_PREFETCH_ENABLE    1
  98 #endif
  99
 100 #if APP_IO_RX_PREFETCH_ENABLE
 101 #define APP_IO_RX_PREFETCH0(p)       rte_prefetch0(p)
 102 #define APP_IO_RX_PREFETCH1(p)       rte_prefetch1(p)
 103 #else
 104 #define APP_IO_RX_PREFETCH0(p)
 105 #define APP_IO_RX_PREFETCH1(p)
 106 #endif
 107
 108 #if APP_WORKER_PREFETCH_ENABLE
 109 #define APP_WORKER_PREFETCH0(p)      rte_prefetch0(p)
 110 #define APP_WORKER_PREFETCH1(p)      rte_prefetch1(p)
 111 #else
 112 #define APP_WORKER_PREFETCH0(p)
 113 #define APP_WORKER_PREFETCH1(p)
 114 #endif
 115
 116 #if APP_IO_TX_PREFETCH_ENABLE
 117 #define APP_IO_TX_PREFETCH0(p)       rte_prefetch0(p)
 118 #define APP_IO_TX_PREFETCH1(p)       rte_prefetch1(p)
 119 #else
 120 #define APP_IO_TX_PREFETCH0(p)
 121 #define APP_IO_TX_PREFETCH1(p)
 122 #endif
 123
 124 static inline void
 125 app_lcore_io_rx_buffer_to_send (
 126         struct app_lcore_params_io *lp,
 127         uint32_t worker,
 128         struct rte_mbuf *mbuf,
 129         uint32_t bsz)
 130 {
 131         uint32_t pos;
 132         int ret;
 133
 134         pos = lp->rx.mbuf_out[worker].n_mbufs;
 135         lp->rx.mbuf_out[worker].array[pos ++] = mbuf;
 136         if (likely(pos < bsz)) {
 137                 lp->rx.mbuf_out[worker].n_mbufs = pos;
 138                 return;
 139         }
 140
 141         ret = rte_ring_sp_enqueue_bulk(
 142                 lp->rx.rings[worker],
 143                 (void **) lp->rx.mbuf_out[worker].array,
 144                 bsz,
 145                 NULL);
 146
 147         if (unlikely(ret == 0)) {
 148                 uint32_t k;
 149                 for (k = 0; k < bsz; k ++) {
 150                         struct rte_mbuf *m = lp->rx.mbuf_out[worker].array[k];
 151                         rte_pktmbuf_free(m);
 152                 }
 153         }
 154
 155         lp->rx.mbuf_out[worker].n_mbufs = 0;
 156         lp->rx.mbuf_out_flush[worker] = 0;
 157
 158 #if APP_STATS
 159         lp->rx.rings_iters[worker] ++;
 160         if (likely(ret == 0)) {
 161                 lp->rx.rings_count[worker] ++;
 162         }
 163         if (unlikely(lp->rx.rings_iters[worker] == APP_STATS)) {
 164                 unsigned lcore = rte_lcore_id();
 165
 166                 printf("\tI/O RX %u out (worker %u): enq success rate = %.2f\n",
 167                         lcore,
 168                         (unsigned)worker,
 169                         ((double) lp->rx.rings_count[worker]) / ((double) lp->rx.rings_iters[worker]));
 170                 lp->rx.rings_iters[worker] = 0;
 171                 lp->rx.rings_count[worker] = 0;
 172         }
 173 #endif
 174 }
 175
 176 static inline void
 177 app_lcore_io_rx(
 178         struct app_lcore_params_io *lp,
 179         uint32_t n_workers,
 180         uint32_t bsz_rd,
 181         uint32_t bsz_wr,
 182         uint8_t pos_lb)
 183 {
 184         struct rte_mbuf *mbuf_1_0, *mbuf_1_1, *mbuf_2_0, *mbuf_2_1;
 185         uint8_t *data_1_0, *data_1_1 = NULL;
 186         uint32_t i;
 187
 188         for (i = 0; i < lp->rx.n_nic_queues; i ++) {
 189                 uint16_t port = lp->rx.nic_queues[i].port;
 190                 uint8_t queue = lp->rx.nic_queues[i].queue;
 191                 uint32_t n_mbufs, j;
 192
 193                 n_mbufs = rte_eth_rx_burst(
 194                         port,
 195                         queue,
 196                         lp->rx.mbuf_in.array,
 197                         (uint16_t) bsz_rd);
 198
 199                 if (unlikely(n_mbufs == 0)) {
 200                         continue;
 201                 }
 202
 203 #if APP_STATS
 204                 lp->rx.nic_queues_iters[i] ++;
 205                 lp->rx.nic_queues_count[i] += n_mbufs;
 206                 if (unlikely(lp->rx.nic_queues_iters[i] == APP_STATS)) {
 207                         struct rte_eth_stats stats;
 208                         unsigned lcore = rte_lcore_id();
 209
 210                         rte_eth_stats_get(port, &stats);
 211
 212                         printf("I/O RX %u in (NIC port %u): NIC drop ratio = %.2f avg burst size = %.2f\n",
 213                                 lcore,
 214                                 port,
 215                                 (double) stats.imissed / (double) (stats.imissed + stats.ipackets),
 216                                 ((double) lp->rx.nic_queues_count[i]) / ((double) lp->rx.nic_queues_iters[i]));
 217                         lp->rx.nic_queues_iters[i] = 0;
 218                         lp->rx.nic_queues_count[i] = 0;
 219                 }
 220 #endif
 221
 222 #if APP_IO_RX_DROP_ALL_PACKETS
 223                 for (j = 0; j < n_mbufs; j ++) {
 224                         struct rte_mbuf *pkt = lp->rx.mbuf_in.array[j];
 225                         rte_pktmbuf_free(pkt);
 226                 }
 227
 228                 continue;
 229 #endif
 230
 231                 mbuf_1_0 = lp->rx.mbuf_in.array[0];
 232                 mbuf_1_1 = lp->rx.mbuf_in.array[1];
 233                 data_1_0 = rte_pktmbuf_mtod(mbuf_1_0, uint8_t *);
 234                 if (likely(n_mbufs > 1)) {
 235                         data_1_1 = rte_pktmbuf_mtod(mbuf_1_1, uint8_t *);
 236                 }
 237
 238                 mbuf_2_0 = lp->rx.mbuf_in.array[2];
 239                 mbuf_2_1 = lp->rx.mbuf_in.array[3];
 240                 APP_IO_RX_PREFETCH0(mbuf_2_0);
 241                 APP_IO_RX_PREFETCH0(mbuf_2_1);
 242
 243                 for (j = 0; j + 3 < n_mbufs; j += 2) {
 244                         struct rte_mbuf *mbuf_0_0, *mbuf_0_1;
 245                         uint8_t *data_0_0, *data_0_1;
 246                         uint32_t worker_0, worker_1;
 247
 248                         mbuf_0_0 = mbuf_1_0;
 249                         mbuf_0_1 = mbuf_1_1;
 250                         data_0_0 = data_1_0;
 251                         data_0_1 = data_1_1;
 252
 253                         mbuf_1_0 = mbuf_2_0;
 254                         mbuf_1_1 = mbuf_2_1;
 255                         data_1_0 = rte_pktmbuf_mtod(mbuf_2_0, uint8_t *);
 256                         data_1_1 = rte_pktmbuf_mtod(mbuf_2_1, uint8_t *);
 257                         APP_IO_RX_PREFETCH0(data_1_0);
 258                         APP_IO_RX_PREFETCH0(data_1_1);
 259
 260                         mbuf_2_0 = lp->rx.mbuf_in.array[j+4];
 261                         mbuf_2_1 = lp->rx.mbuf_in.array[j+5];
 262                         APP_IO_RX_PREFETCH0(mbuf_2_0);
 263                         APP_IO_RX_PREFETCH0(mbuf_2_1);
 264
 265                         worker_0 = data_0_0[pos_lb] & (n_workers - 1);
 266                         worker_1 = data_0_1[pos_lb] & (n_workers - 1);
 267
 268                         app_lcore_io_rx_buffer_to_send(lp, worker_0, mbuf_0_0, bsz_wr);
 269                         app_lcore_io_rx_buffer_to_send(lp, worker_1, mbuf_0_1, bsz_wr);
 270                 }
 271
 272                 /* Handle the last 1, 2 (when n_mbufs is even) or 3 (when n_mbufs is odd) packets  */
 273                 for ( ; j < n_mbufs; j += 1) {
 274                         struct rte_mbuf *mbuf;
 275                         uint8_t *data;
 276                         uint32_t worker;
 277
 278                         mbuf = mbuf_1_0;
 279                         mbuf_1_0 = mbuf_1_1;
 280                         mbuf_1_1 = mbuf_2_0;
 281                         mbuf_2_0 = mbuf_2_1;
 282
 283                         data = rte_pktmbuf_mtod(mbuf, uint8_t *);
 284
 285                         APP_IO_RX_PREFETCH0(mbuf_1_0);
 286
 287                         worker = data[pos_lb] & (n_workers - 1);
 288
 289                         app_lcore_io_rx_buffer_to_send(lp, worker, mbuf, bsz_wr);
 290                 }
 291         }
 292 }
 293
 294 static inline void
 295 app_lcore_io_rx_flush(struct app_lcore_params_io *lp, uint32_t n_workers)
 296 {
 297         uint32_t worker;
 298
 299         for (worker = 0; worker < n_workers; worker ++) {
 300                 int ret;
 301
 302                 if (likely((lp->rx.mbuf_out_flush[worker] == 0) ||
 303                            (lp->rx.mbuf_out[worker].n_mbufs == 0))) {
 304                         lp->rx.mbuf_out_flush[worker] = 1;
 305                         continue;
 306                 }
 307
 308                 ret = rte_ring_sp_enqueue_bulk(
 309                         lp->rx.rings[worker],
 310                         (void **) lp->rx.mbuf_out[worker].array,
 311                         lp->rx.mbuf_out[worker].n_mbufs,
 312                         NULL);
 313
 314                 if (unlikely(ret == 0)) {
 315                         uint32_t k;
 316                         for (k = 0; k < lp->rx.mbuf_out[worker].n_mbufs; k ++) {
 317                                 struct rte_mbuf *pkt_to_free = lp->rx.mbuf_out[worker].array[k];
 318                                 rte_pktmbuf_free(pkt_to_free);
 319                         }
 320                 }
 321
 322                 lp->rx.mbuf_out[worker].n_mbufs = 0;
 323                 lp->rx.mbuf_out_flush[worker] = 1;
 324         }
 325 }
 326
 327 static inline void
 328 app_lcore_io_tx(
 329         struct app_lcore_params_io *lp,
 330         uint32_t n_workers,
 331         uint32_t bsz_rd,
 332         uint32_t bsz_wr)
 333 {
 334         uint32_t worker;
 335
 336         for (worker = 0; worker < n_workers; worker ++) {
 337                 uint32_t i;
 338
 339                 for (i = 0; i < lp->tx.n_nic_ports; i ++) {
 340                         uint16_t port = lp->tx.nic_ports[i];
 341                         struct rte_ring *ring = lp->tx.rings[port][worker];
 342                         uint32_t n_mbufs, n_pkts;
 343                         int ret;
 344
 345                         n_mbufs = lp->tx.mbuf_out[port].n_mbufs;
 346                         ret = rte_ring_sc_dequeue_bulk(
 347                                 ring,
 348                                 (void **) &lp->tx.mbuf_out[port].array[n_mbufs],
 349                                 bsz_rd,
 350                                 NULL);
 351
 352                         if (unlikely(ret == 0))
 353                                 continue;
 354
 355                         n_mbufs += bsz_rd;
 356
 357 #if APP_IO_TX_DROP_ALL_PACKETS
 358                         {
 359                                 uint32_t j;
 360                                 APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[0]);
 361                                 APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[1]);
 362
 363                                 for (j = 0; j < n_mbufs; j ++) {
 364                                         if (likely(j < n_mbufs - 2)) {
 365                                                 APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[j + 2]);
 366                                         }
 367
 368                                         rte_pktmbuf_free(lp->tx.mbuf_out[port].array[j]);
 369                                 }
 370
 371                                 lp->tx.mbuf_out[port].n_mbufs = 0;
 372
 373                                 continue;
 374                         }
 375 #endif
 376
 377                         if (unlikely(n_mbufs < bsz_wr)) {
 378                                 lp->tx.mbuf_out[port].n_mbufs = n_mbufs;
 379                                 continue;
 380                         }
 381
 382                         n_pkts = rte_eth_tx_burst(
 383                                 port,
 384                                 0,
 385                                 lp->tx.mbuf_out[port].array,
 386                                 (uint16_t) n_mbufs);
 387
 388 #if APP_STATS
 389                         lp->tx.nic_ports_iters[port] ++;
 390                         lp->tx.nic_ports_count[port] += n_pkts;
 391                         if (unlikely(lp->tx.nic_ports_iters[port] == APP_STATS)) {
 392                                 unsigned lcore = rte_lcore_id();
 393
 394                                 printf("\t\t\tI/O TX %u out (port %u): avg burst size = %.2f\n",
 395                                         lcore,
 396                                         port,
 397                                         ((double) lp->tx.nic_ports_count[port]) / ((double) lp->tx.nic_ports_iters[port]));
 398                                 lp->tx.nic_ports_iters[port] = 0;
 399                                 lp->tx.nic_ports_count[port] = 0;
 400                         }
 401 #endif
 402
 403                         if (unlikely(n_pkts < n_mbufs)) {
 404                                 uint32_t k;
 405                                 for (k = n_pkts; k < n_mbufs; k ++) {
 406                                         struct rte_mbuf *pkt_to_free = lp->tx.mbuf_out[port].array[k];
 407                                         rte_pktmbuf_free(pkt_to_free);
 408                                 }
 409                         }
 410                         lp->tx.mbuf_out[port].n_mbufs = 0;
 411                         lp->tx.mbuf_out_flush[port] = 0;
 412                 }
 413         }
 414 }
 415
 416 static inline void
 417 app_lcore_io_tx_flush(struct app_lcore_params_io *lp)
 418 {
 419         uint16_t port;
 420         uint32_t i;
 421
 422         for (i = 0; i < lp->tx.n_nic_ports; i++) {
 423                 uint32_t n_pkts;
 424
 425                 port = lp->tx.nic_ports[i];
 426                 if (likely((lp->tx.mbuf_out_flush[port] == 0) ||
 427                            (lp->tx.mbuf_out[port].n_mbufs == 0))) {
 428                         lp->tx.mbuf_out_flush[port] = 1;
 429                         continue;
 430                 }
 431
 432                 n_pkts = rte_eth_tx_burst(
 433                         port,
 434                         0,
 435                         lp->tx.mbuf_out[port].array,
 436                         (uint16_t) lp->tx.mbuf_out[port].n_mbufs);
 437
 438                 if (unlikely(n_pkts < lp->tx.mbuf_out[port].n_mbufs)) {
 439                         uint32_t k;
 440                         for (k = n_pkts; k < lp->tx.mbuf_out[port].n_mbufs; k ++) {
 441                                 struct rte_mbuf *pkt_to_free = lp->tx.mbuf_out[port].array[k];
 442                                 rte_pktmbuf_free(pkt_to_free);
 443                         }
 444                 }
 445
 446                 lp->tx.mbuf_out[port].n_mbufs = 0;
 447                 lp->tx.mbuf_out_flush[port] = 1;
 448         }
 449 }
 450
 451 static void
 452 app_lcore_main_loop_io(void)
 453 {
 454         uint32_t lcore = rte_lcore_id();
 455         struct app_lcore_params_io *lp = &app.lcore_params[lcore].io;
 456         uint32_t n_workers = app_get_lcores_worker();
 457         uint64_t i = 0;
 458
 459         uint32_t bsz_rx_rd = app.burst_size_io_rx_read;
 460         uint32_t bsz_rx_wr = app.burst_size_io_rx_write;
 461         uint32_t bsz_tx_rd = app.burst_size_io_tx_read;
 462         uint32_t bsz_tx_wr = app.burst_size_io_tx_write;
 463
 464         uint8_t pos_lb = app.pos_lb;
 465
 466         for ( ; ; ) {
 467                 if (APP_LCORE_IO_FLUSH && (unlikely(i == APP_LCORE_IO_FLUSH))) {
 468                         if (likely(lp->rx.n_nic_queues > 0)) {
 469                                 app_lcore_io_rx_flush(lp, n_workers);
 470                         }
 471
 472                         if (likely(lp->tx.n_nic_ports > 0)) {
 473                                 app_lcore_io_tx_flush(lp);
 474                         }
 475
 476                         i = 0;
 477                 }
 478
 479                 if (likely(lp->rx.n_nic_queues > 0)) {
 480                         app_lcore_io_rx(lp, n_workers, bsz_rx_rd, bsz_rx_wr, pos_lb);
 481                 }
 482
 483                 if (likely(lp->tx.n_nic_ports > 0)) {
 484                         app_lcore_io_tx(lp, n_workers, bsz_tx_rd, bsz_tx_wr);
 485                 }
 486
 487                 i ++;
 488         }
 489 }
 490
 491 static inline void
 492 app_lcore_worker(
 493         struct app_lcore_params_worker *lp,
 494         uint32_t bsz_rd,
 495         uint32_t bsz_wr)
 496 {
 497         uint32_t i;
 498
 499         for (i = 0; i < lp->n_rings_in; i ++) {
 500                 struct rte_ring *ring_in = lp->rings_in[i];
 501                 uint32_t j;
 502                 int ret;
 503
 504                 ret = rte_ring_sc_dequeue_bulk(
 505                         ring_in,
 506                         (void **) lp->mbuf_in.array,
 507                         bsz_rd,
 508                         NULL);
 509
 510                 if (unlikely(ret == 0))
 511                         continue;
 512
 513 #if APP_WORKER_DROP_ALL_PACKETS
 514                 for (j = 0; j < bsz_rd; j ++) {
 515                         struct rte_mbuf *pkt = lp->mbuf_in.array[j];
 516                         rte_pktmbuf_free(pkt);
 517                 }
 518
 519                 continue;
 520 #endif
 521
 522                 APP_WORKER_PREFETCH1(rte_pktmbuf_mtod(lp->mbuf_in.array[0], unsigned char *));
 523                 APP_WORKER_PREFETCH0(lp->mbuf_in.array[1]);
 524
 525                 for (j = 0; j < bsz_rd; j ++) {
 526                         struct rte_mbuf *pkt;
 527                         struct ipv4_hdr *ipv4_hdr;
 528                         uint32_t ipv4_dst, pos;
 529                         uint32_t port;
 530
 531                         if (likely(j < bsz_rd - 1)) {
 532                                 APP_WORKER_PREFETCH1(rte_pktmbuf_mtod(lp->mbuf_in.array[j+1], unsigned char *));
 533                         }
 534                         if (likely(j < bsz_rd - 2)) {
 535                                 APP_WORKER_PREFETCH0(lp->mbuf_in.array[j+2]);
 536                         }
 537
 538                         pkt = lp->mbuf_in.array[j];
 539                         ipv4_hdr = rte_pktmbuf_mtod_offset(pkt,
 540                                                            struct ipv4_hdr *,
 541                                                            sizeof(struct ether_hdr));
 542                         ipv4_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr);
 543
 544                         if (unlikely(rte_lpm_lookup(lp->lpm_table, ipv4_dst, &port) != 0)) {
 545                                 port = pkt->port;
 546                         }
 547
 548                         pos = lp->mbuf_out[port].n_mbufs;
 549
 550                         lp->mbuf_out[port].array[pos ++] = pkt;
 551                         if (likely(pos < bsz_wr)) {
 552                                 lp->mbuf_out[port].n_mbufs = pos;
 553                                 continue;
 554                         }
 555
 556                         ret = rte_ring_sp_enqueue_bulk(
 557                                 lp->rings_out[port],
 558                                 (void **) lp->mbuf_out[port].array,
 559                                 bsz_wr,
 560                                 NULL);
 561
 562 #if APP_STATS
 563                         lp->rings_out_iters[port] ++;
 564                         if (ret > 0) {
 565                                 lp->rings_out_count[port] += 1;
 566                         }
 567                         if (lp->rings_out_iters[port] == APP_STATS){
 568                                 printf("\t\tWorker %u out (NIC port %u): enq success rate = %.2f\n",
 569                                         (unsigned) lp->worker_id,
 570                                         port,
 571                                         ((double) lp->rings_out_count[port]) / ((double) lp->rings_out_iters[port]));
 572                                 lp->rings_out_iters[port] = 0;
 573                                 lp->rings_out_count[port] = 0;
 574                         }
 575 #endif
 576
 577                         if (unlikely(ret == 0)) {
 578                                 uint32_t k;
 579                                 for (k = 0; k < bsz_wr; k ++) {
 580                                         struct rte_mbuf *pkt_to_free = lp->mbuf_out[port].array[k];
 581                                         rte_pktmbuf_free(pkt_to_free);
 582                                 }
 583                         }
 584
 585                         lp->mbuf_out[port].n_mbufs = 0;
 586                         lp->mbuf_out_flush[port] = 0;
 587                 }
 588         }
 589 }
 590
 591 static inline void
 592 app_lcore_worker_flush(struct app_lcore_params_worker *lp)
 593 {
 594         uint32_t port;
 595
 596         for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
 597                 int ret;
 598
 599                 if (unlikely(lp->rings_out[port] == NULL)) {
 600                         continue;
 601                 }
 602
 603                 if (likely((lp->mbuf_out_flush[port] == 0) ||
 604                            (lp->mbuf_out[port].n_mbufs == 0))) {
 605                         lp->mbuf_out_flush[port] = 1;
 606                         continue;
 607                 }
 608
 609                 ret = rte_ring_sp_enqueue_bulk(
 610                         lp->rings_out[port],
 611                         (void **) lp->mbuf_out[port].array,
 612                         lp->mbuf_out[port].n_mbufs,
 613                         NULL);
 614
 615                 if (unlikely(ret == 0)) {
 616                         uint32_t k;
 617                         for (k = 0; k < lp->mbuf_out[port].n_mbufs; k ++) {
 618                                 struct rte_mbuf *pkt_to_free = lp->mbuf_out[port].array[k];
 619                                 rte_pktmbuf_free(pkt_to_free);
 620                         }
 621                 }
 622
 623                 lp->mbuf_out[port].n_mbufs = 0;
 624                 lp->mbuf_out_flush[port] = 1;
 625         }
 626 }
 627
 628 static void
 629 app_lcore_main_loop_worker(void) {
 630         uint32_t lcore = rte_lcore_id();
 631         struct app_lcore_params_worker *lp = &app.lcore_params[lcore].worker;
 632         uint64_t i = 0;
 633
 634         uint32_t bsz_rd = app.burst_size_worker_read;
 635         uint32_t bsz_wr = app.burst_size_worker_write;
 636
 637         for ( ; ; ) {
 638                 if (APP_LCORE_WORKER_FLUSH && (unlikely(i == APP_LCORE_WORKER_FLUSH))) {
 639                         app_lcore_worker_flush(lp);
 640                         i = 0;
 641                 }
 642
 643                 app_lcore_worker(lp, bsz_rd, bsz_wr);
 644
 645                 i ++;
 646         }
 647 }
 648
 649 int
 650 app_lcore_main_loop(__attribute__((unused)) void *arg)
 651 {
 652         struct app_lcore_params *lp;
 653         unsigned lcore;
 654
 655         lcore = rte_lcore_id();
 656         lp = &app.lcore_params[lcore];
 657
 658         if (lp->type == e_APP_LCORE_IO) {
 659                 printf("Logical core %u (I/O) main loop.\n", lcore);
 660                 app_lcore_main_loop_io();
 661         }
 662
 663         if (lp->type == e_APP_LCORE_WORKER) {
 664                 printf("Logical core %u (worker %u) main loop.\n",
 665                         lcore,
 666                         (unsigned) lp->worker.worker_id);
 667                 app_lcore_main_loop_worker();
 668         }
 669
 670         return 0;
 671 }