4 * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
16 * * Neither the name of Intel Corporation nor the names of its
17 * contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <rte_ethdev.h>
41 #include <rte_cycles.h>
42 #include <rte_malloc.h>
43 #include <rte_debug.h>
44 #include <rte_prefetch.h>
45 #include <rte_distributor.h>
47 #define RX_RING_SIZE 256
48 #define TX_RING_SIZE 512
49 #define NUM_MBUFS ((64*1024)-1)
50 #define MBUF_CACHE_SIZE 250
52 #define RTE_RING_SZ 1024
54 #define RTE_LOGTYPE_DISTRAPP RTE_LOGTYPE_USER1
56 #define ANSI_COLOR_RED "\x1b[31m"
57 #define ANSI_COLOR_RESET "\x1b[0m"
59 /* mask of enabled ports */
60 static uint32_t enabled_port_mask;
61 volatile uint8_t quit_signal;
62 volatile uint8_t quit_signal_rx;
63 volatile uint8_t quit_signal_dist;
64 volatile uint8_t quit_signal_work;
66 static volatile struct app_stats {
69 uint64_t returned_pkts;
70 uint64_t enqueued_pkts;
71 uint64_t enqdrop_pkts;
72 } rx __rte_cache_aligned;
73 int pad1 __rte_cache_aligned;
79 uint64_t enqdrop_pkts;
80 } dist __rte_cache_aligned;
81 int pad2 __rte_cache_aligned;
84 uint64_t dequeue_pkts;
86 uint64_t enqdrop_pkts;
87 } tx __rte_cache_aligned;
88 int pad3 __rte_cache_aligned;
90 uint64_t worker_pkts[64] __rte_cache_aligned;
92 int pad4 __rte_cache_aligned;
94 uint64_t worker_bursts[64][8] __rte_cache_aligned;
96 int pad5 __rte_cache_aligned;
98 uint64_t port_rx_pkts[64] __rte_cache_aligned;
99 uint64_t port_tx_pkts[64] __rte_cache_aligned;
102 struct app_stats prev_app_stats;
104 static const struct rte_eth_conf port_conf_default = {
106 .mq_mode = ETH_MQ_RX_RSS,
107 .max_rx_pkt_len = ETHER_MAX_LEN,
110 .mq_mode = ETH_MQ_TX_NONE,
114 .rss_hf = ETH_RSS_IP | ETH_RSS_UDP |
115 ETH_RSS_TCP | ETH_RSS_SCTP,
120 struct output_buffer {
122 struct rte_mbuf *mbufs[BURST_SIZE];
125 static void print_stats(void);
128 * Initialises a given port using global settings and with the rx buffers
129 * coming from the mbuf_pool passed as parameter
132 port_init(uint8_t port, struct rte_mempool *mbuf_pool)
134 struct rte_eth_conf port_conf = port_conf_default;
135 const uint16_t rxRings = 1, txRings = rte_lcore_count() - 1;
139 if (port >= rte_eth_dev_count())
142 retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
146 for (q = 0; q < rxRings; q++) {
147 retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
148 rte_eth_dev_socket_id(port),
154 for (q = 0; q < txRings; q++) {
155 retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
156 rte_eth_dev_socket_id(port),
162 retval = rte_eth_dev_start(port);
166 struct rte_eth_link link;
167 rte_eth_link_get_nowait(port, &link);
168 while (!link.link_status) {
169 printf("Waiting for Link up on port %"PRIu8"\n", port);
171 rte_eth_link_get_nowait(port, &link);
174 if (!link.link_status) {
175 printf("Link down on port %"PRIu8"\n", port);
179 struct ether_addr addr;
180 rte_eth_macaddr_get(port, &addr);
181 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
182 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
184 addr.addr_bytes[0], addr.addr_bytes[1],
185 addr.addr_bytes[2], addr.addr_bytes[3],
186 addr.addr_bytes[4], addr.addr_bytes[5]);
188 rte_eth_promiscuous_enable(port);
193 struct lcore_params {
195 struct rte_distributor *d;
197 struct rte_mempool *mem_pool;
201 quit_workers(struct rte_distributor *d, struct rte_mempool *p)
203 const unsigned num_workers = rte_lcore_count() - 2;
205 struct rte_mbuf *bufs[num_workers];
207 if (rte_mempool_get_bulk(p, (void *)bufs, num_workers) != 0) {
208 printf("line %d: Error getting mbufs from pool\n", __LINE__);
212 for (i = 0; i < num_workers; i++)
213 bufs[i]->hash.rss = i << 1;
215 rte_distributor_process(d, bufs, num_workers);
216 rte_mempool_put_bulk(p, (void *)bufs, num_workers);
222 lcore_rx(struct lcore_params *p)
224 struct rte_distributor *d = p->d;
225 struct rte_mempool *mem_pool = p->mem_pool;
226 struct rte_ring *r = p->r;
227 const uint8_t nb_ports = rte_eth_dev_count();
228 const int socket_id = rte_socket_id();
231 for (port = 0; port < nb_ports; port++) {
232 /* skip ports that are not enabled */
233 if ((enabled_port_mask & (1 << port)) == 0)
236 if (rte_eth_dev_socket_id(port) > 0 &&
237 rte_eth_dev_socket_id(port) != socket_id)
238 printf("WARNING, port %u is on remote NUMA node to "
239 "RX thread.\n\tPerformance will not "
240 "be optimal.\n", port);
243 printf("\nCore %u doing packet RX.\n", rte_lcore_id());
245 while (!quit_signal_rx) {
247 /* skip ports that are not enabled */
248 if ((enabled_port_mask & (1 << port)) == 0) {
249 if (++port == nb_ports)
253 struct rte_mbuf *bufs[BURST_SIZE*2];
254 const uint16_t nb_rx = rte_eth_rx_burst(port, 0, bufs,
256 if (unlikely(nb_rx == 0)) {
257 if (++port == nb_ports)
261 app_stats.rx.rx_pkts += nb_rx;
263 rte_distributor_process(d, bufs, nb_rx);
264 const uint16_t nb_ret = rte_distributor_returned_pkts(d,
266 app_stats.rx.returned_pkts += nb_ret;
267 if (unlikely(nb_ret == 0)) {
268 if (++port == nb_ports)
273 uint16_t sent = rte_ring_enqueue_burst(r, (void *)bufs, nb_ret);
274 app_stats.rx.enqueued_pkts += sent;
275 if (unlikely(sent < nb_ret)) {
276 RTE_LOG_DP(DEBUG, DISTRAPP,
277 "%s:Packet loss due to full ring\n", __func__);
278 while (sent < nb_ret)
279 rte_pktmbuf_free(bufs[sent++]);
281 if (++port == nb_ports)
284 rte_distributor_process(d, NULL, 0);
285 /* flush distributor to bring to known state */
286 rte_distributor_flush(d);
287 /* set worker & tx threads quit flag */
290 * worker threads may hang in get packet as
291 * distributor process is not running, just make sure workers
292 * get packets till quit_signal is actually been
293 * received and they gracefully shutdown
295 if (quit_workers(d, mem_pool) != 0)
297 /* rx thread should quit at last */
302 flush_one_port(struct output_buffer *outbuf, uint8_t outp)
304 unsigned nb_tx = rte_eth_tx_burst(outp, 0, outbuf->mbufs,
306 app_stats.tx.tx_pkts += nb_tx;
308 if (unlikely(nb_tx < outbuf->count)) {
309 RTE_LOG_DP(DEBUG, DISTRAPP,
310 "%s:Packet loss with tx_burst\n", __func__);
312 rte_pktmbuf_free(outbuf->mbufs[nb_tx]);
313 } while (++nb_tx < outbuf->count);
319 flush_all_ports(struct output_buffer *tx_buffers, uint8_t nb_ports)
322 for (outp = 0; outp < nb_ports; outp++) {
323 /* skip ports that are not enabled */
324 if ((enabled_port_mask & (1 << outp)) == 0)
327 if (tx_buffers[outp].count == 0)
330 flush_one_port(&tx_buffers[outp], outp);
335 lcore_tx(struct rte_ring *in_r)
337 static struct output_buffer tx_buffers[RTE_MAX_ETHPORTS];
338 const uint8_t nb_ports = rte_eth_dev_count();
339 const int socket_id = rte_socket_id();
342 for (port = 0; port < nb_ports; port++) {
343 /* skip ports that are not enabled */
344 if ((enabled_port_mask & (1 << port)) == 0)
347 if (rte_eth_dev_socket_id(port) > 0 &&
348 rte_eth_dev_socket_id(port) != socket_id)
349 printf("WARNING, port %u is on remote NUMA node to "
350 "TX thread.\n\tPerformance will not "
351 "be optimal.\n", port);
354 printf("\nCore %u doing packet TX.\n", rte_lcore_id());
355 while (!quit_signal) {
357 for (port = 0; port < nb_ports; port++) {
358 /* skip ports that are not enabled */
359 if ((enabled_port_mask & (1 << port)) == 0)
362 struct rte_mbuf *bufs[BURST_SIZE];
363 const uint16_t nb_rx = rte_ring_dequeue_burst(in_r,
364 (void *)bufs, BURST_SIZE);
365 app_stats.tx.dequeue_pkts += nb_rx;
367 /* if we get no traffic, flush anything we have */
368 if (unlikely(nb_rx == 0)) {
369 flush_all_ports(tx_buffers, nb_ports);
373 /* for traffic we receive, queue it up for transmit */
375 rte_prefetch_non_temporal((void *)bufs[0]);
376 rte_prefetch_non_temporal((void *)bufs[1]);
377 rte_prefetch_non_temporal((void *)bufs[2]);
378 for (i = 0; i < nb_rx; i++) {
379 struct output_buffer *outbuf;
381 rte_prefetch_non_temporal((void *)bufs[i + 3]);
383 * workers should update in_port to hold the
386 outp = bufs[i]->port;
387 /* skip ports that are not enabled */
388 if ((enabled_port_mask & (1 << outp)) == 0)
391 outbuf = &tx_buffers[outp];
392 outbuf->mbufs[outbuf->count++] = bufs[i];
393 if (outbuf->count == BURST_SIZE)
394 flush_one_port(outbuf, outp);
402 int_handler(int sig_num)
404 printf("Exiting on signal %d\n", sig_num);
405 /* set quit flag for rx thread to exit */
412 struct rte_eth_stats eth_stats;
414 const unsigned int num_workers = rte_lcore_count() - 4;
416 for (i = 0; i < rte_eth_dev_count(); i++) {
417 rte_eth_stats_get(i, ð_stats);
418 app_stats.port_rx_pkts[i] = eth_stats.ipackets;
419 app_stats.port_tx_pkts[i] = eth_stats.opackets;
422 printf("\n\nRX Thread:\n");
423 for (i = 0; i < rte_eth_dev_count(); i++) {
424 printf("Port %u Pktsin : %5.2f\n", i,
425 (app_stats.port_rx_pkts[i] -
426 prev_app_stats.port_rx_pkts[i])/1000000.0);
427 prev_app_stats.port_rx_pkts[i] = app_stats.port_rx_pkts[i];
429 printf(" - Received: %5.2f\n",
430 (app_stats.rx.rx_pkts -
431 prev_app_stats.rx.rx_pkts)/1000000.0);
432 printf(" - Returned: %5.2f\n",
433 (app_stats.rx.returned_pkts -
434 prev_app_stats.rx.returned_pkts)/1000000.0);
435 printf(" - Enqueued: %5.2f\n",
436 (app_stats.rx.enqueued_pkts -
437 prev_app_stats.rx.enqueued_pkts)/1000000.0);
438 printf(" - Dropped: %s%5.2f%s\n", ANSI_COLOR_RED,
439 (app_stats.rx.enqdrop_pkts -
440 prev_app_stats.rx.enqdrop_pkts)/1000000.0,
443 printf("Distributor thread:\n");
444 printf(" - In: %5.2f\n",
445 (app_stats.dist.in_pkts -
446 prev_app_stats.dist.in_pkts)/1000000.0);
447 printf(" - Returned: %5.2f\n",
448 (app_stats.dist.ret_pkts -
449 prev_app_stats.dist.ret_pkts)/1000000.0);
450 printf(" - Sent: %5.2f\n",
451 (app_stats.dist.sent_pkts -
452 prev_app_stats.dist.sent_pkts)/1000000.0);
453 printf(" - Dropped %s%5.2f%s\n", ANSI_COLOR_RED,
454 (app_stats.dist.enqdrop_pkts -
455 prev_app_stats.dist.enqdrop_pkts)/1000000.0,
458 printf("TX thread:\n");
459 printf(" - Dequeued: %5.2f\n",
460 (app_stats.tx.dequeue_pkts -
461 prev_app_stats.tx.dequeue_pkts)/1000000.0);
462 for (i = 0; i < rte_eth_dev_count(); i++) {
463 printf("Port %u Pktsout: %5.2f\n",
464 i, (app_stats.port_tx_pkts[i] -
465 prev_app_stats.port_tx_pkts[i])/1000000.0);
466 prev_app_stats.port_tx_pkts[i] = app_stats.port_tx_pkts[i];
468 printf(" - Transmitted: %5.2f\n",
469 (app_stats.tx.tx_pkts -
470 prev_app_stats.tx.tx_pkts)/1000000.0);
471 printf(" - Dropped: %s%5.2f%s\n", ANSI_COLOR_RED,
472 (app_stats.tx.enqdrop_pkts -
473 prev_app_stats.tx.enqdrop_pkts)/1000000.0,
476 prev_app_stats.rx.rx_pkts = app_stats.rx.rx_pkts;
477 prev_app_stats.rx.returned_pkts = app_stats.rx.returned_pkts;
478 prev_app_stats.rx.enqueued_pkts = app_stats.rx.enqueued_pkts;
479 prev_app_stats.rx.enqdrop_pkts = app_stats.rx.enqdrop_pkts;
480 prev_app_stats.dist.in_pkts = app_stats.dist.in_pkts;
481 prev_app_stats.dist.ret_pkts = app_stats.dist.ret_pkts;
482 prev_app_stats.dist.sent_pkts = app_stats.dist.sent_pkts;
483 prev_app_stats.dist.enqdrop_pkts = app_stats.dist.enqdrop_pkts;
484 prev_app_stats.tx.dequeue_pkts = app_stats.tx.dequeue_pkts;
485 prev_app_stats.tx.tx_pkts = app_stats.tx.tx_pkts;
486 prev_app_stats.tx.enqdrop_pkts = app_stats.tx.enqdrop_pkts;
488 for (i = 0; i < num_workers; i++) {
489 printf("Worker %02u Pkts: %5.2f. Bursts(1-8): ", i,
490 (app_stats.worker_pkts[i] -
491 prev_app_stats.worker_pkts[i])/1000000.0);
492 for (j = 0; j < 8; j++) {
493 printf("%"PRIu64" ", app_stats.worker_bursts[i][j]);
494 app_stats.worker_bursts[i][j] = 0;
497 prev_app_stats.worker_pkts[i] = app_stats.worker_pkts[i];
502 lcore_worker(struct lcore_params *p)
504 struct rte_distributor *d = p->d;
505 const unsigned id = p->worker_id;
506 unsigned int num = 0;
510 * for single port, xor_val will be zero so we won't modify the output
511 * port, otherwise we send traffic from 0 to 1, 2 to 3, and vice versa
513 const unsigned xor_val = (rte_eth_dev_count() > 1);
514 struct rte_mbuf *buf[8] __rte_cache_aligned;
516 for (i = 0; i < 8; i++)
519 printf("\nCore %u acting as worker core.\n", rte_lcore_id());
520 while (!quit_signal) {
521 num = rte_distributor_get_pkt(d, id, buf, buf, num);
522 /* Do a little bit of work for each packet */
523 for (i = 0; i < num; i++) {
524 uint64_t t = rte_rdtsc()+100;
526 while (rte_rdtsc() < t)
528 buf[i]->port ^= xor_val;
536 print_usage(const char *prgname)
538 printf("%s [EAL options] -- -p PORTMASK\n"
539 " -p PORTMASK: hexadecimal bitmask of ports to configure\n",
544 parse_portmask(const char *portmask)
549 /* parse hexadecimal string */
550 pm = strtoul(portmask, &end, 16);
551 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
560 /* Parse the argument given in the command line of the application */
562 parse_args(int argc, char **argv)
567 char *prgname = argv[0];
568 static struct option lgopts[] = {
574 while ((opt = getopt_long(argc, argvopt, "p:",
575 lgopts, &option_index)) != EOF) {
580 enabled_port_mask = parse_portmask(optarg);
581 if (enabled_port_mask == 0) {
582 printf("invalid portmask\n");
583 print_usage(prgname);
589 print_usage(prgname);
595 print_usage(prgname);
599 argv[optind-1] = prgname;
601 optind = 1; /* reset getopt lib */
605 /* Main function, does initialization and calls the per-lcore functions */
607 main(int argc, char *argv[])
609 struct rte_mempool *mbuf_pool;
610 struct rte_distributor *d;
611 struct rte_ring *output_ring;
612 unsigned lcore_id, worker_id = 0;
615 uint8_t nb_ports_available;
618 /* catch ctrl-c so we can print on exit */
619 signal(SIGINT, int_handler);
622 int ret = rte_eal_init(argc, argv);
624 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
628 /* parse application arguments (after the EAL ones) */
629 ret = parse_args(argc, argv);
631 rte_exit(EXIT_FAILURE, "Invalid distributor parameters\n");
633 if (rte_lcore_count() < 3)
634 rte_exit(EXIT_FAILURE, "Error, This application needs at "
635 "least 3 logical cores to run:\n"
636 "1 lcore for packet RX and distribution\n"
637 "1 lcore for packet TX\n"
638 "and at least 1 lcore for worker threads\n");
640 nb_ports = rte_eth_dev_count();
642 rte_exit(EXIT_FAILURE, "Error: no ethernet ports detected\n");
643 if (nb_ports != 1 && (nb_ports & 1))
644 rte_exit(EXIT_FAILURE, "Error: number of ports must be even, except "
645 "when using a single port\n");
647 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
648 NUM_MBUFS * nb_ports, MBUF_CACHE_SIZE, 0,
649 RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
650 if (mbuf_pool == NULL)
651 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
652 nb_ports_available = nb_ports;
654 /* initialize all ports */
655 for (portid = 0; portid < nb_ports; portid++) {
656 /* skip ports that are not enabled */
657 if ((enabled_port_mask & (1 << portid)) == 0) {
658 printf("\nSkipping disabled port %d\n", portid);
659 nb_ports_available--;
663 printf("Initializing port %u... done\n", (unsigned) portid);
665 if (port_init(portid, mbuf_pool) != 0)
666 rte_exit(EXIT_FAILURE, "Cannot initialize port %"PRIu8"\n",
670 if (!nb_ports_available) {
671 rte_exit(EXIT_FAILURE,
672 "All available ports are disabled. Please set portmask.\n");
675 d = rte_distributor_create("PKT_DIST", rte_socket_id(),
676 rte_lcore_count() - 2,
679 rte_exit(EXIT_FAILURE, "Cannot create distributor\n");
682 * scheduler ring is read only by the transmitter core, but written to
683 * by multiple threads
685 output_ring = rte_ring_create("Output_ring", RTE_RING_SZ,
686 rte_socket_id(), RING_F_SC_DEQ);
687 if (output_ring == NULL)
688 rte_exit(EXIT_FAILURE, "Cannot create output ring\n");
690 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
691 if (worker_id == rte_lcore_count() - 2)
692 rte_eal_remote_launch((lcore_function_t *)lcore_tx,
693 output_ring, lcore_id);
695 struct lcore_params *p =
696 rte_malloc(NULL, sizeof(*p), 0);
698 rte_panic("malloc failure\n");
699 *p = (struct lcore_params){worker_id, d, output_ring, mbuf_pool};
701 rte_eal_remote_launch((lcore_function_t *)lcore_worker,
706 /* call lcore_main on master core only */
707 struct lcore_params p = { 0, d, output_ring, mbuf_pool};
709 if (lcore_rx(&p) != 0)
712 freq = rte_get_timer_hz();
713 t = rte_rdtsc() + freq;
714 while (!quit_signal_dist) {
715 if (t < rte_rdtsc()) {
717 t = rte_rdtsc() + freq;
722 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
723 if (rte_eal_wait_lcore(lcore_id) < 0)