4 * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 * version: DPDK.L.1.2.3-3
40 #include <sys/types.h>
42 #include <sys/queue.h>
47 #include <rte_common.h>
48 #include <rte_byteorder.h>
50 #include <rte_memory.h>
51 #include <rte_memcpy.h>
52 #include <rte_memzone.h>
53 #include <rte_tailq.h>
55 #include <rte_per_lcore.h>
56 #include <rte_launch.h>
57 #include <rte_atomic.h>
58 #include <rte_cycles.h>
59 #include <rte_prefetch.h>
60 #include <rte_lcore.h>
61 #include <rte_per_lcore.h>
62 #include <rte_branch_prediction.h>
63 #include <rte_interrupts.h>
65 #include <rte_random.h>
66 #include <rte_debug.h>
67 #include <rte_ether.h>
68 #include <rte_ethdev.h>
70 #include <rte_mempool.h>
78 #ifndef APP_LCORE_IO_FLUSH
79 #define APP_LCORE_IO_FLUSH 1000000
82 #ifndef APP_LCORE_WORKER_FLUSH
83 #define APP_LCORE_WORKER_FLUSH 1000000
87 #define APP_STATS 1000000
90 #define APP_IO_RX_DROP_ALL_PACKETS 0
91 #define APP_WORKER_DROP_ALL_PACKETS 0
92 #define APP_IO_TX_DROP_ALL_PACKETS 0
94 #ifndef APP_IO_RX_PREFETCH_ENABLE
95 #define APP_IO_RX_PREFETCH_ENABLE 1
98 #ifndef APP_WORKER_PREFETCH_ENABLE
99 #define APP_WORKER_PREFETCH_ENABLE 1
102 #ifndef APP_IO_TX_PREFETCH_ENABLE
103 #define APP_IO_TX_PREFETCH_ENABLE 1
106 #if APP_IO_RX_PREFETCH_ENABLE
107 #define APP_IO_RX_PREFETCH0(p) rte_prefetch0(p)
108 #define APP_IO_RX_PREFETCH1(p) rte_prefetch1(p)
110 #define APP_IO_RX_PREFETCH0(p)
111 #define APP_IO_RX_PREFETCH1(p)
114 #if APP_WORKER_PREFETCH_ENABLE
115 #define APP_WORKER_PREFETCH0(p) rte_prefetch0(p)
116 #define APP_WORKER_PREFETCH1(p) rte_prefetch1(p)
118 #define APP_WORKER_PREFETCH0(p)
119 #define APP_WORKER_PREFETCH1(p)
122 #if APP_IO_TX_PREFETCH_ENABLE
123 #define APP_IO_TX_PREFETCH0(p) rte_prefetch0(p)
124 #define APP_IO_TX_PREFETCH1(p) rte_prefetch1(p)
126 #define APP_IO_TX_PREFETCH0(p)
127 #define APP_IO_TX_PREFETCH1(p)
131 app_lcore_io_rx_buffer_to_send (
132 struct app_lcore_params_io *lp,
134 struct rte_mbuf *mbuf,
140 pos = lp->rx.mbuf_out[worker].n_mbufs;
141 lp->rx.mbuf_out[worker].array[pos ++] = mbuf;
142 if (likely(pos < bsz)) {
143 lp->rx.mbuf_out[worker].n_mbufs = pos;
147 ret = rte_ring_sp_enqueue_bulk(
148 lp->rx.rings[worker],
149 (void **) lp->rx.mbuf_out[worker].array,
152 if (unlikely(ret == -ENOBUFS)) {
154 for (k = 0; k < bsz; k ++) {
155 struct rte_mbuf *m = lp->rx.mbuf_out[worker].array[k];
160 lp->rx.mbuf_out[worker].n_mbufs = 0;
161 lp->rx.mbuf_out_flush[worker] = 0;
164 lp->rx.rings_iters[worker] ++;
165 if (likely(ret == 0)) {
166 lp->rx.rings_count[worker] ++;
168 if (unlikely(lp->rx.rings_iters[worker] == APP_STATS)) {
169 uint32_t lcore = rte_lcore_id();
171 printf("\tI/O RX %u out (worker %u): enq success rate = %.2f\n",
174 ((double) lp->rx.rings_count[worker]) / ((double) lp->rx.rings_iters[worker]));
175 lp->rx.rings_iters[worker] = 0;
176 lp->rx.rings_count[worker] = 0;
183 struct app_lcore_params_io *lp,
189 struct rte_mbuf *mbuf_1_0, *mbuf_1_1, *mbuf_2_0, *mbuf_2_1;
190 uint8_t *data_1_0, *data_1_1;
193 for (i = 0; i < lp->rx.n_nic_queues; i ++) {
194 uint8_t port = lp->rx.nic_queues[i].port;
195 uint8_t queue = lp->rx.nic_queues[i].queue;
198 n_mbufs = rte_eth_rx_burst(
201 lp->rx.mbuf_in.array,
204 if (unlikely(n_mbufs == 0)) {
209 lp->rx.nic_queues_iters[i] ++;
210 lp->rx.nic_queues_count[i] += n_mbufs;
211 if (unlikely(lp->rx.nic_queues_iters[i] == APP_STATS)) {
212 struct rte_eth_stats stats;
213 uint32_t lcore = rte_lcore_id();
215 rte_eth_stats_get(port, &stats);
217 printf("I/O RX %u in (NIC port %u): NIC drop ratio = %.2f avg burst size = %.2f\n",
220 (double) stats.ierrors / (double) (stats.ierrors + stats.ipackets),
221 ((double) lp->rx.nic_queues_count[i]) / ((double) lp->rx.nic_queues_iters[i]));
222 lp->rx.nic_queues_iters[i] = 0;
223 lp->rx.nic_queues_count[i] = 0;
227 #if APP_IO_RX_DROP_ALL_PACKETS
228 for (j = 0; j < n_mbufs; j ++) {
229 struct rte_mbuf *pkt = lp->rx.mbuf_in.array[j];
230 rte_pktmbuf_free(pkt);
236 mbuf_1_0 = lp->rx.mbuf_in.array[0];
237 mbuf_1_1 = lp->rx.mbuf_in.array[1];
238 data_1_0 = rte_pktmbuf_mtod(mbuf_1_0, uint8_t *);
239 if (likely(n_mbufs > 1)) {
240 data_1_1 = rte_pktmbuf_mtod(mbuf_1_1, uint8_t *);
243 mbuf_2_0 = lp->rx.mbuf_in.array[2];
244 mbuf_2_1 = lp->rx.mbuf_in.array[3];
245 APP_IO_RX_PREFETCH0(mbuf_2_0);
246 APP_IO_RX_PREFETCH0(mbuf_2_1);
248 for (j = 0; j + 3 < n_mbufs; j += 2) {
249 struct rte_mbuf *mbuf_0_0, *mbuf_0_1;
250 uint8_t *data_0_0, *data_0_1;
251 uint32_t worker_0, worker_1;
260 data_1_0 = rte_pktmbuf_mtod(mbuf_2_0, uint8_t *);
261 data_1_1 = rte_pktmbuf_mtod(mbuf_2_1, uint8_t *);
262 APP_IO_RX_PREFETCH0(data_1_0);
263 APP_IO_RX_PREFETCH0(data_1_1);
265 mbuf_2_0 = lp->rx.mbuf_in.array[j+4];
266 mbuf_2_1 = lp->rx.mbuf_in.array[j+5];
267 APP_IO_RX_PREFETCH0(mbuf_2_0);
268 APP_IO_RX_PREFETCH0(mbuf_2_1);
270 worker_0 = data_0_0[pos_lb] & (n_workers - 1);
271 worker_1 = data_0_1[pos_lb] & (n_workers - 1);
273 app_lcore_io_rx_buffer_to_send(lp, worker_0, mbuf_0_0, bsz_wr);
274 app_lcore_io_rx_buffer_to_send(lp, worker_1, mbuf_0_1, bsz_wr);
277 /* Handle the last 1, 2 (when n_mbufs is even) or 3 (when n_mbufs is odd) packets */
278 for ( ; j < n_mbufs; j += 1) {
279 struct rte_mbuf *mbuf;
288 data = rte_pktmbuf_mtod(mbuf, uint8_t *);
290 APP_IO_RX_PREFETCH0(mbuf_1_0);
292 worker = data[pos_lb] & (n_workers - 1);
294 app_lcore_io_rx_buffer_to_send(lp, worker, mbuf, bsz_wr);
300 app_lcore_io_rx_flush(struct app_lcore_params_io *lp, uint32_t n_workers)
304 for (worker = 0; worker < n_workers; worker ++) {
307 if (likely((lp->rx.mbuf_out_flush[worker] == 0) ||
308 (lp->rx.mbuf_out[worker].n_mbufs == 0))) {
309 lp->rx.mbuf_out_flush[worker] = 1;
313 ret = rte_ring_sp_enqueue_bulk(
314 lp->rx.rings[worker],
315 (void **) lp->rx.mbuf_out[worker].array,
316 lp->rx.mbuf_out[worker].n_mbufs);
318 if (unlikely(ret < 0)) {
320 for (k = 0; k < lp->rx.mbuf_out[worker].n_mbufs; k ++) {
321 struct rte_mbuf *pkt_to_free = lp->rx.mbuf_out[worker].array[k];
322 rte_pktmbuf_free(pkt_to_free);
326 lp->rx.mbuf_out[worker].n_mbufs = 0;
327 lp->rx.mbuf_out_flush[worker] = 1;
333 struct app_lcore_params_io *lp,
340 for (worker = 0; worker < n_workers; worker ++) {
343 for (i = 0; i < lp->tx.n_nic_ports; i ++) {
344 uint8_t port = lp->tx.nic_ports[i];
345 struct rte_ring *ring = lp->tx.rings[port][worker];
346 uint32_t n_mbufs, n_pkts;
349 n_mbufs = lp->tx.mbuf_out[port].n_mbufs;
350 ret = rte_ring_sc_dequeue_bulk(
352 (void **) &lp->tx.mbuf_out[port].array[n_mbufs],
355 if (unlikely(ret == -ENOENT)) {
361 #if APP_IO_TX_DROP_ALL_PACKETS
364 APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[0]);
365 APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[1]);
367 for (j = 0; j < n_mbufs; j ++) {
368 if (likely(j < n_mbufs - 2)) {
369 APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[j + 2]);
372 rte_pktmbuf_free(lp->tx.mbuf_out[port].array[j]);
375 lp->tx.mbuf_out[port].n_mbufs = 0;
381 if (unlikely(n_mbufs < bsz_wr)) {
382 lp->tx.mbuf_out[port].n_mbufs = n_mbufs;
386 n_pkts = rte_eth_tx_burst(
389 lp->tx.mbuf_out[port].array,
393 lp->tx.nic_ports_iters[port] ++;
394 lp->tx.nic_ports_count[port] += n_pkts;
395 if (unlikely(lp->tx.nic_ports_iters[port] == APP_STATS)) {
396 uint32_t lcore = rte_lcore_id();
398 printf("\t\t\tI/O TX %u out (port %u): avg burst size = %.2f\n",
401 ((double) lp->tx.nic_ports_count[port]) / ((double) lp->tx.nic_ports_iters[port]));
402 lp->tx.nic_ports_iters[port] = 0;
403 lp->tx.nic_ports_count[port] = 0;
407 if (unlikely(n_pkts < n_mbufs)) {
409 for (k = n_pkts; k < n_mbufs; k ++) {
410 struct rte_mbuf *pkt_to_free = lp->tx.mbuf_out[port].array[k];
411 rte_pktmbuf_free(pkt_to_free);
414 lp->tx.mbuf_out[port].n_mbufs = 0;
415 lp->tx.mbuf_out_flush[port] = 0;
421 app_lcore_io_tx_flush(struct app_lcore_params_io *lp)
425 for (port = 0; port < lp->tx.n_nic_ports; port ++) {
428 if (likely((lp->tx.mbuf_out_flush[port] == 0) ||
429 (lp->tx.mbuf_out[port].n_mbufs == 0))) {
430 lp->tx.mbuf_out_flush[port] = 1;
434 n_pkts = rte_eth_tx_burst(
437 lp->tx.mbuf_out[port].array,
438 (uint16_t) lp->tx.mbuf_out[port].n_mbufs);
440 if (unlikely(n_pkts < lp->tx.mbuf_out[port].n_mbufs)) {
442 for (k = n_pkts; k < lp->tx.mbuf_out[port].n_mbufs; k ++) {
443 struct rte_mbuf *pkt_to_free = lp->tx.mbuf_out[port].array[k];
444 rte_pktmbuf_free(pkt_to_free);
448 lp->tx.mbuf_out[port].n_mbufs = 0;
449 lp->tx.mbuf_out_flush[port] = 1;
454 app_lcore_main_loop_io(void)
456 uint32_t lcore = rte_lcore_id();
457 struct app_lcore_params_io *lp = &app.lcore_params[lcore].io;
458 uint32_t n_workers = app_get_lcores_worker();
461 uint32_t bsz_rx_rd = app.burst_size_io_rx_read;
462 uint32_t bsz_rx_wr = app.burst_size_io_rx_write;
463 uint32_t bsz_tx_rd = app.burst_size_io_tx_read;
464 uint32_t bsz_tx_wr = app.burst_size_io_tx_write;
466 uint8_t pos_lb = app.pos_lb;
469 if (APP_LCORE_IO_FLUSH && (unlikely(i == APP_LCORE_IO_FLUSH))) {
470 if (likely(lp->rx.n_nic_queues > 0)) {
471 app_lcore_io_rx_flush(lp, n_workers);
474 if (likely(lp->tx.n_nic_ports > 0)) {
475 app_lcore_io_tx_flush(lp);
481 if (likely(lp->rx.n_nic_queues > 0)) {
482 app_lcore_io_rx(lp, n_workers, bsz_rx_rd, bsz_rx_wr, pos_lb);
485 if (likely(lp->tx.n_nic_ports > 0)) {
486 app_lcore_io_tx(lp, n_workers, bsz_tx_rd, bsz_tx_wr);
495 struct app_lcore_params_worker *lp,
501 for (i = 0; i < lp->n_rings_in; i ++) {
502 struct rte_ring *ring_in = lp->rings_in[i];
506 ret = rte_ring_sc_dequeue_bulk(
508 (void **) lp->mbuf_in.array,
511 if (unlikely(ret == -ENOENT)) {
515 #if APP_WORKER_DROP_ALL_PACKETS
516 for (j = 0; j < bsz_rd; j ++) {
517 struct rte_mbuf *pkt = lp->mbuf_in.array[j];
518 rte_pktmbuf_free(pkt);
524 APP_WORKER_PREFETCH1(rte_pktmbuf_mtod(lp->mbuf_in.array[0], unsigned char *));
525 APP_WORKER_PREFETCH0(lp->mbuf_in.array[1]);
527 for (j = 0; j < bsz_rd; j ++) {
528 struct rte_mbuf *pkt;
529 struct ipv4_hdr *ipv4_hdr;
530 uint32_t ipv4_dst, pos;
533 if (likely(j < bsz_rd - 1)) {
534 APP_WORKER_PREFETCH1(rte_pktmbuf_mtod(lp->mbuf_in.array[j+1], unsigned char *));
536 if (likely(j < bsz_rd - 2)) {
537 APP_WORKER_PREFETCH0(lp->mbuf_in.array[j+2]);
540 pkt = lp->mbuf_in.array[j];
541 ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, unsigned char *) + sizeof(struct ether_hdr));
542 ipv4_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr);
544 if (unlikely(rte_lpm_lookup(lp->lpm_table, ipv4_dst, &port) != 0)) {
545 port = pkt->pkt.in_port;
548 pos = lp->mbuf_out[port].n_mbufs;
550 lp->mbuf_out[port].array[pos ++] = pkt;
551 if (likely(pos < bsz_wr)) {
552 lp->mbuf_out[port].n_mbufs = pos;
556 ret = rte_ring_sp_enqueue_bulk(
558 (void **) lp->mbuf_out[port].array,
562 lp->rings_out_iters[port] ++;
564 lp->rings_out_count[port] += 1;
566 if (lp->rings_out_iters[port] == APP_STATS){
567 printf("\t\tWorker %u out (NIC port %u): enq success rate = %.2f\n",
570 ((double) lp->rings_out_count[port]) / ((double) lp->rings_out_iters[port]));
571 lp->rings_out_iters[port] = 0;
572 lp->rings_out_count[port] = 0;
576 if (unlikely(ret == -ENOBUFS)) {
578 for (k = 0; k < bsz_wr; k ++) {
579 struct rte_mbuf *pkt_to_free = lp->mbuf_out[port].array[k];
580 rte_pktmbuf_free(pkt_to_free);
584 lp->mbuf_out[port].n_mbufs = 0;
585 lp->mbuf_out_flush[port] = 0;
591 app_lcore_worker_flush(struct app_lcore_params_worker *lp)
595 for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
598 if (unlikely(lp->rings_out[port] == NULL)) {
602 if (likely((lp->mbuf_out_flush[port] == 0) ||
603 (lp->mbuf_out[port].n_mbufs == 0))) {
604 lp->mbuf_out_flush[port] = 1;
608 ret = rte_ring_sp_enqueue_bulk(
610 (void **) lp->mbuf_out[port].array,
611 lp->mbuf_out[port].n_mbufs);
613 if (unlikely(ret < 0)) {
615 for (k = 0; k < lp->mbuf_out[port].n_mbufs; k ++) {
616 struct rte_mbuf *pkt_to_free = lp->mbuf_out[port].array[k];
617 rte_pktmbuf_free(pkt_to_free);
621 lp->mbuf_out[port].n_mbufs = 0;
622 lp->mbuf_out_flush[port] = 1;
627 app_lcore_main_loop_worker(void) {
628 uint32_t lcore = rte_lcore_id();
629 struct app_lcore_params_worker *lp = &app.lcore_params[lcore].worker;
632 uint32_t bsz_rd = app.burst_size_worker_read;
633 uint32_t bsz_wr = app.burst_size_worker_write;
636 if (APP_LCORE_WORKER_FLUSH && (unlikely(i == APP_LCORE_WORKER_FLUSH))) {
637 app_lcore_worker_flush(lp);
641 app_lcore_worker(lp, bsz_rd, bsz_wr);
648 app_lcore_main_loop(__attribute__((unused)) void *arg)
650 struct app_lcore_params *lp;
653 lcore = rte_lcore_id();
654 lp = &app.lcore_params[lcore];
656 if (lp->type == e_APP_LCORE_IO) {
657 printf("Logical core %u (I/O) main loop.\n", lcore);
658 app_lcore_main_loop_io();
661 if (lp->type == e_APP_LCORE_WORKER) {
662 printf("Logical core %u (worker %u) main loop.\n",
664 lp->worker.worker_id);
665 app_lcore_main_loop_worker();