examples/load_balancer/runtime.c

   1 /*-
   2  *   BSD LICENSE
   3  *
   4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
   5  *   All rights reserved.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following conditions
   9  *   are met:
  10  *
  11  *     * Redistributions of source code must retain the above copyright
  12  *       notice, this list of conditions and the following disclaimer.
  13  *     * Redistributions in binary form must reproduce the above copyright
  14  *       notice, this list of conditions and the following disclaimer in
  15  *       the documentation and/or other materials provided with the
  16  *       distribution.
  17  *     * Neither the name of Intel Corporation nor the names of its
  18  *       contributors may be used to endorse or promote products derived
  19  *       from this software without specific prior written permission.
  20  *
  21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <stdint.h>
  37 #include <inttypes.h>
  38 #include <sys/types.h>
  39 #include <string.h>
  40 #include <sys/queue.h>
  41 #include <stdarg.h>
  42 #include <errno.h>
  43 #include <getopt.h>
  44
  45 #include <rte_common.h>
  46 #include <rte_byteorder.h>
  47 #include <rte_log.h>
  48 #include <rte_memory.h>
  49 #include <rte_memcpy.h>
  50 #include <rte_memzone.h>
  51 #include <rte_eal.h>
  52 #include <rte_per_lcore.h>
  53 #include <rte_launch.h>
  54 #include <rte_atomic.h>
  55 #include <rte_cycles.h>
  56 #include <rte_prefetch.h>
  57 #include <rte_lcore.h>
  58 #include <rte_per_lcore.h>
  59 #include <rte_branch_prediction.h>
  60 #include <rte_interrupts.h>
  61 #include <rte_pci.h>
  62 #include <rte_random.h>
  63 #include <rte_debug.h>
  64 #include <rte_ether.h>
  65 #include <rte_ethdev.h>
  66 #include <rte_ring.h>
  67 #include <rte_mempool.h>
  68 #include <rte_mbuf.h>
  69 #include <rte_ip.h>
  70 #include <rte_tcp.h>
  71 #include <rte_lpm.h>
  72
  73 #include "main.h"
  74
  75 #ifndef APP_LCORE_IO_FLUSH
  76 #define APP_LCORE_IO_FLUSH           1000000
  77 #endif
  78
  79 #ifndef APP_LCORE_WORKER_FLUSH
  80 #define APP_LCORE_WORKER_FLUSH       1000000
  81 #endif
  82
  83 #ifndef APP_STATS
  84 #define APP_STATS                    1000000
  85 #endif
  86
  87 #define APP_IO_RX_DROP_ALL_PACKETS   0
  88 #define APP_WORKER_DROP_ALL_PACKETS  0
  89 #define APP_IO_TX_DROP_ALL_PACKETS   0
  90
  91 #ifndef APP_IO_RX_PREFETCH_ENABLE
  92 #define APP_IO_RX_PREFETCH_ENABLE    1
  93 #endif
  94
  95 #ifndef APP_WORKER_PREFETCH_ENABLE
  96 #define APP_WORKER_PREFETCH_ENABLE   1
  97 #endif
  98
  99 #ifndef APP_IO_TX_PREFETCH_ENABLE
 100 #define APP_IO_TX_PREFETCH_ENABLE    1
 101 #endif
 102
 103 #if APP_IO_RX_PREFETCH_ENABLE
 104 #define APP_IO_RX_PREFETCH0(p)       rte_prefetch0(p)
 105 #define APP_IO_RX_PREFETCH1(p)       rte_prefetch1(p)
 106 #else
 107 #define APP_IO_RX_PREFETCH0(p)
 108 #define APP_IO_RX_PREFETCH1(p)
 109 #endif
 110
 111 #if APP_WORKER_PREFETCH_ENABLE
 112 #define APP_WORKER_PREFETCH0(p)      rte_prefetch0(p)
 113 #define APP_WORKER_PREFETCH1(p)      rte_prefetch1(p)
 114 #else
 115 #define APP_WORKER_PREFETCH0(p)
 116 #define APP_WORKER_PREFETCH1(p)
 117 #endif
 118
 119 #if APP_IO_TX_PREFETCH_ENABLE
 120 #define APP_IO_TX_PREFETCH0(p)       rte_prefetch0(p)
 121 #define APP_IO_TX_PREFETCH1(p)       rte_prefetch1(p)
 122 #else
 123 #define APP_IO_TX_PREFETCH0(p)
 124 #define APP_IO_TX_PREFETCH1(p)
 125 #endif
 126
 127 static inline void
 128 app_lcore_io_rx_buffer_to_send (
 129         struct app_lcore_params_io *lp,
 130         uint32_t worker,
 131         struct rte_mbuf *mbuf,
 132         uint32_t bsz)
 133 {
 134         uint32_t pos;
 135         int ret;
 136
 137         pos = lp->rx.mbuf_out[worker].n_mbufs;
 138         lp->rx.mbuf_out[worker].array[pos ++] = mbuf;
 139         if (likely(pos < bsz)) {
 140                 lp->rx.mbuf_out[worker].n_mbufs = pos;
 141                 return;
 142         }
 143
 144         ret = rte_ring_sp_enqueue_bulk(
 145                 lp->rx.rings[worker],
 146                 (void **) lp->rx.mbuf_out[worker].array,
 147                 bsz);
 148
 149         if (unlikely(ret == -ENOBUFS)) {
 150                 uint32_t k;
 151                 for (k = 0; k < bsz; k ++) {
 152                         struct rte_mbuf *m = lp->rx.mbuf_out[worker].array[k];
 153                         rte_pktmbuf_free(m);
 154                 }
 155         }
 156
 157         lp->rx.mbuf_out[worker].n_mbufs = 0;
 158         lp->rx.mbuf_out_flush[worker] = 0;
 159
 160 #if APP_STATS
 161         lp->rx.rings_iters[worker] ++;
 162         if (likely(ret == 0)) {
 163                 lp->rx.rings_count[worker] ++;
 164         }
 165         if (unlikely(lp->rx.rings_iters[worker] == APP_STATS)) {
 166                 unsigned lcore = rte_lcore_id();
 167
 168                 printf("\tI/O RX %u out (worker %u): enq success rate = %.2f\n",
 169                         lcore,
 170                         (unsigned)worker,
 171                         ((double) lp->rx.rings_count[worker]) / ((double) lp->rx.rings_iters[worker]));
 172                 lp->rx.rings_iters[worker] = 0;
 173                 lp->rx.rings_count[worker] = 0;
 174         }
 175 #endif
 176 }
 177
 178 static inline void
 179 app_lcore_io_rx(
 180         struct app_lcore_params_io *lp,
 181         uint32_t n_workers,
 182         uint32_t bsz_rd,
 183         uint32_t bsz_wr,
 184         uint8_t pos_lb)
 185 {
 186         struct rte_mbuf *mbuf_1_0, *mbuf_1_1, *mbuf_2_0, *mbuf_2_1;
 187         uint8_t *data_1_0, *data_1_1 = NULL;
 188         uint32_t i;
 189
 190         for (i = 0; i < lp->rx.n_nic_queues; i ++) {
 191                 uint8_t port = lp->rx.nic_queues[i].port;
 192                 uint8_t queue = lp->rx.nic_queues[i].queue;
 193                 uint32_t n_mbufs, j;
 194
 195                 n_mbufs = rte_eth_rx_burst(
 196                         port,
 197                         queue,
 198                         lp->rx.mbuf_in.array,
 199                         (uint16_t) bsz_rd);
 200
 201                 if (unlikely(n_mbufs == 0)) {
 202                         continue;
 203                 }
 204
 205 #if APP_STATS
 206                 lp->rx.nic_queues_iters[i] ++;
 207                 lp->rx.nic_queues_count[i] += n_mbufs;
 208                 if (unlikely(lp->rx.nic_queues_iters[i] == APP_STATS)) {
 209                         struct rte_eth_stats stats;
 210                         unsigned lcore = rte_lcore_id();
 211
 212                         rte_eth_stats_get(port, &stats);
 213
 214                         printf("I/O RX %u in (NIC port %u): NIC drop ratio = %.2f avg burst size = %.2f\n",
 215                                 lcore,
 216                                 (unsigned) port,
 217                                 (double) stats.imissed / (double) (stats.imissed + stats.ipackets),
 218                                 ((double) lp->rx.nic_queues_count[i]) / ((double) lp->rx.nic_queues_iters[i]));
 219                         lp->rx.nic_queues_iters[i] = 0;
 220                         lp->rx.nic_queues_count[i] = 0;
 221                 }
 222 #endif
 223
 224 #if APP_IO_RX_DROP_ALL_PACKETS
 225                 for (j = 0; j < n_mbufs; j ++) {
 226                         struct rte_mbuf *pkt = lp->rx.mbuf_in.array[j];
 227                         rte_pktmbuf_free(pkt);
 228                 }
 229
 230                 continue;
 231 #endif
 232
 233                 mbuf_1_0 = lp->rx.mbuf_in.array[0];
 234                 mbuf_1_1 = lp->rx.mbuf_in.array[1];
 235                 data_1_0 = rte_pktmbuf_mtod(mbuf_1_0, uint8_t *);
 236                 if (likely(n_mbufs > 1)) {
 237                         data_1_1 = rte_pktmbuf_mtod(mbuf_1_1, uint8_t *);
 238                 }
 239
 240                 mbuf_2_0 = lp->rx.mbuf_in.array[2];
 241                 mbuf_2_1 = lp->rx.mbuf_in.array[3];
 242                 APP_IO_RX_PREFETCH0(mbuf_2_0);
 243                 APP_IO_RX_PREFETCH0(mbuf_2_1);
 244
 245                 for (j = 0; j + 3 < n_mbufs; j += 2) {
 246                         struct rte_mbuf *mbuf_0_0, *mbuf_0_1;
 247                         uint8_t *data_0_0, *data_0_1;
 248                         uint32_t worker_0, worker_1;
 249
 250                         mbuf_0_0 = mbuf_1_0;
 251                         mbuf_0_1 = mbuf_1_1;
 252                         data_0_0 = data_1_0;
 253                         data_0_1 = data_1_1;
 254
 255                         mbuf_1_0 = mbuf_2_0;
 256                         mbuf_1_1 = mbuf_2_1;
 257                         data_1_0 = rte_pktmbuf_mtod(mbuf_2_0, uint8_t *);
 258                         data_1_1 = rte_pktmbuf_mtod(mbuf_2_1, uint8_t *);
 259                         APP_IO_RX_PREFETCH0(data_1_0);
 260                         APP_IO_RX_PREFETCH0(data_1_1);
 261
 262                         mbuf_2_0 = lp->rx.mbuf_in.array[j+4];
 263                         mbuf_2_1 = lp->rx.mbuf_in.array[j+5];
 264                         APP_IO_RX_PREFETCH0(mbuf_2_0);
 265                         APP_IO_RX_PREFETCH0(mbuf_2_1);
 266
 267                         worker_0 = data_0_0[pos_lb] & (n_workers - 1);
 268                         worker_1 = data_0_1[pos_lb] & (n_workers - 1);
 269
 270                         app_lcore_io_rx_buffer_to_send(lp, worker_0, mbuf_0_0, bsz_wr);
 271                         app_lcore_io_rx_buffer_to_send(lp, worker_1, mbuf_0_1, bsz_wr);
 272                 }
 273
 274                 /* Handle the last 1, 2 (when n_mbufs is even) or 3 (when n_mbufs is odd) packets  */
 275                 for ( ; j < n_mbufs; j += 1) {
 276                         struct rte_mbuf *mbuf;
 277                         uint8_t *data;
 278                         uint32_t worker;
 279
 280                         mbuf = mbuf_1_0;
 281                         mbuf_1_0 = mbuf_1_1;
 282                         mbuf_1_1 = mbuf_2_0;
 283                         mbuf_2_0 = mbuf_2_1;
 284
 285                         data = rte_pktmbuf_mtod(mbuf, uint8_t *);
 286
 287                         APP_IO_RX_PREFETCH0(mbuf_1_0);
 288
 289                         worker = data[pos_lb] & (n_workers - 1);
 290
 291                         app_lcore_io_rx_buffer_to_send(lp, worker, mbuf, bsz_wr);
 292                 }
 293         }
 294 }
 295
 296 static inline void
 297 app_lcore_io_rx_flush(struct app_lcore_params_io *lp, uint32_t n_workers)
 298 {
 299         uint32_t worker;
 300
 301         for (worker = 0; worker < n_workers; worker ++) {
 302                 int ret;
 303
 304                 if (likely((lp->rx.mbuf_out_flush[worker] == 0) ||
 305                            (lp->rx.mbuf_out[worker].n_mbufs == 0))) {
 306                         lp->rx.mbuf_out_flush[worker] = 1;
 307                         continue;
 308                 }
 309
 310                 ret = rte_ring_sp_enqueue_bulk(
 311                         lp->rx.rings[worker],
 312                         (void **) lp->rx.mbuf_out[worker].array,
 313                         lp->rx.mbuf_out[worker].n_mbufs);
 314
 315                 if (unlikely(ret < 0)) {
 316                         uint32_t k;
 317                         for (k = 0; k < lp->rx.mbuf_out[worker].n_mbufs; k ++) {
 318                                 struct rte_mbuf *pkt_to_free = lp->rx.mbuf_out[worker].array[k];
 319                                 rte_pktmbuf_free(pkt_to_free);
 320                         }
 321                 }
 322
 323                 lp->rx.mbuf_out[worker].n_mbufs = 0;
 324                 lp->rx.mbuf_out_flush[worker] = 1;
 325         }
 326 }
 327
 328 static inline void
 329 app_lcore_io_tx(
 330         struct app_lcore_params_io *lp,
 331         uint32_t n_workers,
 332         uint32_t bsz_rd,
 333         uint32_t bsz_wr)
 334 {
 335         uint32_t worker;
 336
 337         for (worker = 0; worker < n_workers; worker ++) {
 338                 uint32_t i;
 339
 340                 for (i = 0; i < lp->tx.n_nic_ports; i ++) {
 341                         uint8_t port = lp->tx.nic_ports[i];
 342                         struct rte_ring *ring = lp->tx.rings[port][worker];
 343                         uint32_t n_mbufs, n_pkts;
 344                         int ret;
 345
 346                         n_mbufs = lp->tx.mbuf_out[port].n_mbufs;
 347                         ret = rte_ring_sc_dequeue_bulk(
 348                                 ring,
 349                                 (void **) &lp->tx.mbuf_out[port].array[n_mbufs],
 350                                 bsz_rd);
 351
 352                         if (unlikely(ret == -ENOENT)) {
 353                                 continue;
 354                         }
 355
 356                         n_mbufs += bsz_rd;
 357
 358 #if APP_IO_TX_DROP_ALL_PACKETS
 359                         {
 360                                 uint32_t j;
 361                                 APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[0]);
 362                                 APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[1]);
 363
 364                                 for (j = 0; j < n_mbufs; j ++) {
 365                                         if (likely(j < n_mbufs - 2)) {
 366                                                 APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[j + 2]);
 367                                         }
 368
 369                                         rte_pktmbuf_free(lp->tx.mbuf_out[port].array[j]);
 370                                 }
 371
 372                                 lp->tx.mbuf_out[port].n_mbufs = 0;
 373
 374                                 continue;
 375                         }
 376 #endif
 377
 378                         if (unlikely(n_mbufs < bsz_wr)) {
 379                                 lp->tx.mbuf_out[port].n_mbufs = n_mbufs;
 380                                 continue;
 381                         }
 382
 383                         n_pkts = rte_eth_tx_burst(
 384                                 port,
 385                                 0,
 386                                 lp->tx.mbuf_out[port].array,
 387                                 (uint16_t) n_mbufs);
 388
 389 #if APP_STATS
 390                         lp->tx.nic_ports_iters[port] ++;
 391                         lp->tx.nic_ports_count[port] += n_pkts;
 392                         if (unlikely(lp->tx.nic_ports_iters[port] == APP_STATS)) {
 393                                 unsigned lcore = rte_lcore_id();
 394
 395                                 printf("\t\t\tI/O TX %u out (port %u): avg burst size = %.2f\n",
 396                                         lcore,
 397                                         (unsigned) port,
 398                                         ((double) lp->tx.nic_ports_count[port]) / ((double) lp->tx.nic_ports_iters[port]));
 399                                 lp->tx.nic_ports_iters[port] = 0;
 400                                 lp->tx.nic_ports_count[port] = 0;
 401                         }
 402 #endif
 403
 404                         if (unlikely(n_pkts < n_mbufs)) {
 405                                 uint32_t k;
 406                                 for (k = n_pkts; k < n_mbufs; k ++) {
 407                                         struct rte_mbuf *pkt_to_free = lp->tx.mbuf_out[port].array[k];
 408                                         rte_pktmbuf_free(pkt_to_free);
 409                                 }
 410                         }
 411                         lp->tx.mbuf_out[port].n_mbufs = 0;
 412                         lp->tx.mbuf_out_flush[port] = 0;
 413                 }
 414         }
 415 }
 416
 417 static inline void
 418 app_lcore_io_tx_flush(struct app_lcore_params_io *lp)
 419 {
 420         uint8_t port;
 421
 422         for (port = 0; port < lp->tx.n_nic_ports; port ++) {
 423                 uint32_t n_pkts;
 424
 425                 if (likely((lp->tx.mbuf_out_flush[port] == 0) ||
 426                            (lp->tx.mbuf_out[port].n_mbufs == 0))) {
 427                         lp->tx.mbuf_out_flush[port] = 1;
 428                         continue;
 429                 }
 430
 431                 n_pkts = rte_eth_tx_burst(
 432                         port,
 433                         0,
 434                         lp->tx.mbuf_out[port].array,
 435                         (uint16_t) lp->tx.mbuf_out[port].n_mbufs);
 436
 437                 if (unlikely(n_pkts < lp->tx.mbuf_out[port].n_mbufs)) {
 438                         uint32_t k;
 439                         for (k = n_pkts; k < lp->tx.mbuf_out[port].n_mbufs; k ++) {
 440                                 struct rte_mbuf *pkt_to_free = lp->tx.mbuf_out[port].array[k];
 441                                 rte_pktmbuf_free(pkt_to_free);
 442                         }
 443                 }
 444
 445                 lp->tx.mbuf_out[port].n_mbufs = 0;
 446                 lp->tx.mbuf_out_flush[port] = 1;
 447         }
 448 }
 449
 450 static void
 451 app_lcore_main_loop_io(void)
 452 {
 453         uint32_t lcore = rte_lcore_id();
 454         struct app_lcore_params_io *lp = &app.lcore_params[lcore].io;
 455         uint32_t n_workers = app_get_lcores_worker();
 456         uint64_t i = 0;
 457
 458         uint32_t bsz_rx_rd = app.burst_size_io_rx_read;
 459         uint32_t bsz_rx_wr = app.burst_size_io_rx_write;
 460         uint32_t bsz_tx_rd = app.burst_size_io_tx_read;
 461         uint32_t bsz_tx_wr = app.burst_size_io_tx_write;
 462
 463         uint8_t pos_lb = app.pos_lb;
 464
 465         for ( ; ; ) {
 466                 if (APP_LCORE_IO_FLUSH && (unlikely(i == APP_LCORE_IO_FLUSH))) {
 467                         if (likely(lp->rx.n_nic_queues > 0)) {
 468                                 app_lcore_io_rx_flush(lp, n_workers);
 469                         }
 470
 471                         if (likely(lp->tx.n_nic_ports > 0)) {
 472                                 app_lcore_io_tx_flush(lp);
 473                         }
 474
 475                         i = 0;
 476                 }
 477
 478                 if (likely(lp->rx.n_nic_queues > 0)) {
 479                         app_lcore_io_rx(lp, n_workers, bsz_rx_rd, bsz_rx_wr, pos_lb);
 480                 }
 481
 482                 if (likely(lp->tx.n_nic_ports > 0)) {
 483                         app_lcore_io_tx(lp, n_workers, bsz_tx_rd, bsz_tx_wr);
 484                 }
 485
 486                 i ++;
 487         }
 488 }
 489
 490 static inline void
 491 app_lcore_worker(
 492         struct app_lcore_params_worker *lp,
 493         uint32_t bsz_rd,
 494         uint32_t bsz_wr)
 495 {
 496         uint32_t i;
 497
 498         for (i = 0; i < lp->n_rings_in; i ++) {
 499                 struct rte_ring *ring_in = lp->rings_in[i];
 500                 uint32_t j;
 501                 int ret;
 502
 503                 ret = rte_ring_sc_dequeue_bulk(
 504                         ring_in,
 505                         (void **) lp->mbuf_in.array,
 506                         bsz_rd);
 507
 508                 if (unlikely(ret == -ENOENT)) {
 509                         continue;
 510                 }
 511
 512 #if APP_WORKER_DROP_ALL_PACKETS
 513                 for (j = 0; j < bsz_rd; j ++) {
 514                         struct rte_mbuf *pkt = lp->mbuf_in.array[j];
 515                         rte_pktmbuf_free(pkt);
 516                 }
 517
 518                 continue;
 519 #endif
 520
 521                 APP_WORKER_PREFETCH1(rte_pktmbuf_mtod(lp->mbuf_in.array[0], unsigned char *));
 522                 APP_WORKER_PREFETCH0(lp->mbuf_in.array[1]);
 523
 524                 for (j = 0; j < bsz_rd; j ++) {
 525                         struct rte_mbuf *pkt;
 526                         struct ipv4_hdr *ipv4_hdr;
 527                         uint32_t ipv4_dst, pos;
 528                         uint32_t port;
 529
 530                         if (likely(j < bsz_rd - 1)) {
 531                                 APP_WORKER_PREFETCH1(rte_pktmbuf_mtod(lp->mbuf_in.array[j+1], unsigned char *));
 532                         }
 533                         if (likely(j < bsz_rd - 2)) {
 534                                 APP_WORKER_PREFETCH0(lp->mbuf_in.array[j+2]);
 535                         }
 536
 537                         pkt = lp->mbuf_in.array[j];
 538                         ipv4_hdr = rte_pktmbuf_mtod_offset(pkt,
 539                                                            struct ipv4_hdr *,
 540                                                            sizeof(struct ether_hdr));
 541                         ipv4_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr);
 542
 543                         if (unlikely(rte_lpm_lookup(lp->lpm_table, ipv4_dst, &port) != 0)) {
 544                                 port = pkt->port;
 545                         }
 546
 547                         pos = lp->mbuf_out[port].n_mbufs;
 548
 549                         lp->mbuf_out[port].array[pos ++] = pkt;
 550                         if (likely(pos < bsz_wr)) {
 551                                 lp->mbuf_out[port].n_mbufs = pos;
 552                                 continue;
 553                         }
 554
 555                         ret = rte_ring_sp_enqueue_bulk(
 556                                 lp->rings_out[port],
 557                                 (void **) lp->mbuf_out[port].array,
 558                                 bsz_wr);
 559
 560 #if APP_STATS
 561                         lp->rings_out_iters[port] ++;
 562                         if (ret == 0) {
 563                                 lp->rings_out_count[port] += 1;
 564                         }
 565                         if (lp->rings_out_iters[port] == APP_STATS){
 566                                 printf("\t\tWorker %u out (NIC port %u): enq success rate = %.2f\n",
 567                                         (unsigned) lp->worker_id,
 568                                         (unsigned) port,
 569                                         ((double) lp->rings_out_count[port]) / ((double) lp->rings_out_iters[port]));
 570                                 lp->rings_out_iters[port] = 0;
 571                                 lp->rings_out_count[port] = 0;
 572                         }
 573 #endif
 574
 575                         if (unlikely(ret == -ENOBUFS)) {
 576                                 uint32_t k;
 577                                 for (k = 0; k < bsz_wr; k ++) {
 578                                         struct rte_mbuf *pkt_to_free = lp->mbuf_out[port].array[k];
 579                                         rte_pktmbuf_free(pkt_to_free);
 580                                 }
 581                         }
 582
 583                         lp->mbuf_out[port].n_mbufs = 0;
 584                         lp->mbuf_out_flush[port] = 0;
 585                 }
 586         }
 587 }
 588
 589 static inline void
 590 app_lcore_worker_flush(struct app_lcore_params_worker *lp)
 591 {
 592         uint32_t port;
 593
 594         for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
 595                 int ret;
 596
 597                 if (unlikely(lp->rings_out[port] == NULL)) {
 598                         continue;
 599                 }
 600
 601                 if (likely((lp->mbuf_out_flush[port] == 0) ||
 602                            (lp->mbuf_out[port].n_mbufs == 0))) {
 603                         lp->mbuf_out_flush[port] = 1;
 604                         continue;
 605                 }
 606
 607                 ret = rte_ring_sp_enqueue_bulk(
 608                         lp->rings_out[port],
 609                         (void **) lp->mbuf_out[port].array,
 610                         lp->mbuf_out[port].n_mbufs);
 611
 612                 if (unlikely(ret < 0)) {
 613                         uint32_t k;
 614                         for (k = 0; k < lp->mbuf_out[port].n_mbufs; k ++) {
 615                                 struct rte_mbuf *pkt_to_free = lp->mbuf_out[port].array[k];
 616                                 rte_pktmbuf_free(pkt_to_free);
 617                         }
 618                 }
 619
 620                 lp->mbuf_out[port].n_mbufs = 0;
 621                 lp->mbuf_out_flush[port] = 1;
 622         }
 623 }
 624
 625 static void
 626 app_lcore_main_loop_worker(void) {
 627         uint32_t lcore = rte_lcore_id();
 628         struct app_lcore_params_worker *lp = &app.lcore_params[lcore].worker;
 629         uint64_t i = 0;
 630
 631         uint32_t bsz_rd = app.burst_size_worker_read;
 632         uint32_t bsz_wr = app.burst_size_worker_write;
 633
 634         for ( ; ; ) {
 635                 if (APP_LCORE_WORKER_FLUSH && (unlikely(i == APP_LCORE_WORKER_FLUSH))) {
 636                         app_lcore_worker_flush(lp);
 637                         i = 0;
 638                 }
 639
 640                 app_lcore_worker(lp, bsz_rd, bsz_wr);
 641
 642                 i ++;
 643         }
 644 }
 645
 646 int
 647 app_lcore_main_loop(__attribute__((unused)) void *arg)
 648 {
 649         struct app_lcore_params *lp;
 650         unsigned lcore;
 651
 652         lcore = rte_lcore_id();
 653         lp = &app.lcore_params[lcore];
 654
 655         if (lp->type == e_APP_LCORE_IO) {
 656                 printf("Logical core %u (I/O) main loop.\n", lcore);
 657                 app_lcore_main_loop_io();
 658         }
 659
 660         if (lp->type == e_APP_LCORE_WORKER) {
 661                 printf("Logical core %u (worker %u) main loop.\n",
 662                         lcore,
 663                         (unsigned) lp->worker.worker_id);
 664                 app_lcore_main_loop_worker();
 665         }
 666
 667         return 0;
 668 }