4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 #include <rte_ethdev.h>
42 #include <rte_cycles.h>
43 #include <rte_malloc.h>
44 #include <rte_debug.h>
45 #include <rte_distributor.h>
49 #define RX_RING_SIZE 256
50 #define TX_RING_SIZE 512
51 #define NUM_MBUFS ((64*1024)-1)
52 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
53 #define MBUF_CACHE_SIZE 250
55 #define RTE_RING_SZ 1024
57 /* uncommnet below line to enable debug logs */
61 #define LOG_LEVEL RTE_LOG_DEBUG
62 #define LOG_DEBUG(log_type, fmt, args...) do { \
63 RTE_LOG(DEBUG, log_type, fmt, ##args) \
66 #define LOG_LEVEL RTE_LOG_INFO
67 #define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
70 #define RTE_LOGTYPE_DISTRAPP RTE_LOGTYPE_USER1
72 /* mask of enabled ports */
73 static uint32_t enabled_port_mask;
74 volatile uint8_t quit_signal;
75 volatile uint8_t quit_signal_rx;
77 static volatile struct app_stats {
80 uint64_t returned_pkts;
81 uint64_t enqueued_pkts;
82 } rx __rte_cache_aligned;
85 uint64_t dequeue_pkts;
87 } tx __rte_cache_aligned;
90 static const struct rte_eth_conf port_conf_default = {
92 .mq_mode = ETH_MQ_RX_RSS,
93 .max_rx_pkt_len = ETHER_MAX_LEN,
96 .mq_mode = ETH_MQ_TX_NONE,
100 .rss_hf = ETH_RSS_IPV4 | ETH_RSS_IPV6 |
101 ETH_RSS_IPV4_TCP | ETH_RSS_IPV4_UDP |
102 ETH_RSS_IPV6_TCP | ETH_RSS_IPV6_UDP,
107 struct output_buffer {
109 struct rte_mbuf *mbufs[BURST_SIZE];
113 * Initialises a given port using global settings and with the rx buffers
114 * coming from the mbuf_pool passed as parameter
117 port_init(uint8_t port, struct rte_mempool *mbuf_pool)
119 struct rte_eth_conf port_conf = port_conf_default;
120 const uint16_t rxRings = 1, txRings = rte_lcore_count() - 1;
124 if (port >= rte_eth_dev_count())
127 retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
131 for (q = 0; q < rxRings; q++) {
132 retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
133 rte_eth_dev_socket_id(port),
139 for (q = 0; q < txRings; q++) {
140 retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
141 rte_eth_dev_socket_id(port),
147 retval = rte_eth_dev_start(port);
151 struct rte_eth_link link;
152 rte_eth_link_get_nowait(port, &link);
153 if (!link.link_status) {
155 rte_eth_link_get_nowait(port, &link);
158 if (!link.link_status) {
159 printf("Link down on port %"PRIu8"\n", port);
163 struct ether_addr addr;
164 rte_eth_macaddr_get(port, &addr);
165 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
166 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
168 addr.addr_bytes[0], addr.addr_bytes[1],
169 addr.addr_bytes[2], addr.addr_bytes[3],
170 addr.addr_bytes[4], addr.addr_bytes[5]);
172 rte_eth_promiscuous_enable(port);
177 struct lcore_params {
179 struct rte_distributor *d;
181 struct rte_mempool *mem_pool;
185 quit_workers(struct rte_distributor *d, struct rte_mempool *p)
187 const unsigned num_workers = rte_lcore_count() - 2;
189 struct rte_mbuf *bufs[num_workers];
190 rte_mempool_get_bulk(p, (void *)bufs, num_workers);
192 for (i = 0; i < num_workers; i++)
193 bufs[i]->hash.rss = i << 1;
195 rte_distributor_process(d, bufs, num_workers);
196 rte_mempool_put_bulk(p, (void *)bufs, num_workers);
200 lcore_rx(struct lcore_params *p)
202 struct rte_distributor *d = p->d;
203 struct rte_mempool *mem_pool = p->mem_pool;
204 struct rte_ring *r = p->r;
205 const uint8_t nb_ports = rte_eth_dev_count();
206 const int socket_id = rte_socket_id();
209 for (port = 0; port < nb_ports; port++) {
210 /* skip ports that are not enabled */
211 if ((enabled_port_mask & (1 << port)) == 0)
214 if (rte_eth_dev_socket_id(port) > 0 &&
215 rte_eth_dev_socket_id(port) != socket_id)
216 printf("WARNING, port %u is on remote NUMA node to "
217 "RX thread.\n\tPerformance will not "
218 "be optimal.\n", port);
221 printf("\nCore %u doing packet RX.\n", rte_lcore_id());
223 while (!quit_signal_rx) {
225 /* skip ports that are not enabled */
226 if ((enabled_port_mask & (1 << port)) == 0) {
227 if (++port == nb_ports)
231 struct rte_mbuf *bufs[BURST_SIZE*2];
232 const uint16_t nb_rx = rte_eth_rx_burst(port, 0, bufs,
234 app_stats.rx.rx_pkts += nb_rx;
236 rte_distributor_process(d, bufs, nb_rx);
237 const uint16_t nb_ret = rte_distributor_returned_pkts(d,
239 app_stats.rx.returned_pkts += nb_ret;
240 if (unlikely(nb_ret == 0))
243 uint16_t sent = rte_ring_enqueue_burst(r, (void *)bufs, nb_ret);
244 app_stats.rx.enqueued_pkts += sent;
245 if (unlikely(sent < nb_ret)) {
246 LOG_DEBUG(DISTRAPP, "%s:Packet loss due to full ring\n", __func__);
247 while (sent < nb_ret)
248 rte_pktmbuf_free(bufs[sent++]);
250 if (++port == nb_ports)
253 rte_distributor_process(d, NULL, 0);
254 /* flush distributor to bring to known state */
255 rte_distributor_flush(d);
256 /* set worker & tx threads quit flag */
259 * worker threads may hang in get packet as
260 * distributor process is not running, just make sure workers
261 * get packets till quit_signal is actually been
262 * received and they gracefully shutdown
264 quit_workers(d, mem_pool);
265 /* rx thread should quit at last */
270 flush_one_port(struct output_buffer *outbuf, uint8_t outp)
272 unsigned nb_tx = rte_eth_tx_burst(outp, 0, outbuf->mbufs,
274 app_stats.tx.tx_pkts += nb_tx;
276 if (unlikely(nb_tx < outbuf->count)) {
277 LOG_DEBUG(DISTRAPP, "%s:Packet loss with tx_burst\n", __func__);
279 rte_pktmbuf_free(outbuf->mbufs[nb_tx]);
280 } while (++nb_tx < outbuf->count);
286 flush_all_ports(struct output_buffer *tx_buffers, uint8_t nb_ports)
289 for (outp = 0; outp < nb_ports; outp++) {
290 /* skip ports that are not enabled */
291 if ((enabled_port_mask & (1 << outp)) == 0)
294 if (tx_buffers[outp].count == 0)
297 flush_one_port(&tx_buffers[outp], outp);
302 lcore_tx(struct rte_ring *in_r)
304 static struct output_buffer tx_buffers[RTE_MAX_ETHPORTS];
305 const uint8_t nb_ports = rte_eth_dev_count();
306 const int socket_id = rte_socket_id();
309 for (port = 0; port < nb_ports; port++) {
310 /* skip ports that are not enabled */
311 if ((enabled_port_mask & (1 << port)) == 0)
314 if (rte_eth_dev_socket_id(port) > 0 &&
315 rte_eth_dev_socket_id(port) != socket_id)
316 printf("WARNING, port %u is on remote NUMA node to "
317 "TX thread.\n\tPerformance will not "
318 "be optimal.\n", port);
321 printf("\nCore %u doing packet TX.\n", rte_lcore_id());
322 while (!quit_signal) {
324 for (port = 0; port < nb_ports; port++) {
325 /* skip ports that are not enabled */
326 if ((enabled_port_mask & (1 << port)) == 0)
329 struct rte_mbuf *bufs[BURST_SIZE];
330 const uint16_t nb_rx = rte_ring_dequeue_burst(in_r,
331 (void *)bufs, BURST_SIZE);
332 app_stats.tx.dequeue_pkts += nb_rx;
334 /* if we get no traffic, flush anything we have */
335 if (unlikely(nb_rx == 0)) {
336 flush_all_ports(tx_buffers, nb_ports);
340 /* for traffic we receive, queue it up for transmit */
342 _mm_prefetch(bufs[0], 0);
343 _mm_prefetch(bufs[1], 0);
344 _mm_prefetch(bufs[2], 0);
345 for (i = 0; i < nb_rx; i++) {
346 struct output_buffer *outbuf;
348 _mm_prefetch(bufs[i + 3], 0);
350 * workers should update in_port to hold the
353 outp = bufs[i]->port;
354 /* skip ports that are not enabled */
355 if ((enabled_port_mask & (1 << outp)) == 0)
358 outbuf = &tx_buffers[outp];
359 outbuf->mbufs[outbuf->count++] = bufs[i];
360 if (outbuf->count == BURST_SIZE)
361 flush_one_port(outbuf, outp);
369 int_handler(int sig_num)
371 printf("Exiting on signal %d\n", sig_num);
372 /* set quit flag for rx thread to exit */
379 struct rte_eth_stats eth_stats;
382 printf("\nRX thread stats:\n");
383 printf(" - Received: %"PRIu64"\n", app_stats.rx.rx_pkts);
384 printf(" - Processed: %"PRIu64"\n", app_stats.rx.returned_pkts);
385 printf(" - Enqueued: %"PRIu64"\n", app_stats.rx.enqueued_pkts);
387 printf("\nTX thread stats:\n");
388 printf(" - Dequeued: %"PRIu64"\n", app_stats.tx.dequeue_pkts);
389 printf(" - Transmitted: %"PRIu64"\n", app_stats.tx.tx_pkts);
391 for (i = 0; i < rte_eth_dev_count(); i++) {
392 rte_eth_stats_get(i, ð_stats);
393 printf("\nPort %u stats:\n", i);
394 printf(" - Pkts in: %"PRIu64"\n", eth_stats.ipackets);
395 printf(" - Pkts out: %"PRIu64"\n", eth_stats.opackets);
396 printf(" - In Errs: %"PRIu64"\n", eth_stats.ierrors);
397 printf(" - Out Errs: %"PRIu64"\n", eth_stats.oerrors);
398 printf(" - Mbuf Errs: %"PRIu64"\n", eth_stats.rx_nombuf);
403 lcore_worker(struct lcore_params *p)
405 struct rte_distributor *d = p->d;
406 const unsigned id = p->worker_id;
408 * for single port, xor_val will be zero so we won't modify the output
409 * port, otherwise we send traffic from 0 to 1, 2 to 3, and vice versa
411 const unsigned xor_val = (rte_eth_dev_count() > 1);
412 struct rte_mbuf *buf = NULL;
414 printf("\nCore %u acting as worker core.\n", rte_lcore_id());
415 while (!quit_signal) {
416 buf = rte_distributor_get_pkt(d, id, buf);
417 buf->port ^= xor_val;
424 print_usage(const char *prgname)
426 printf("%s [EAL options] -- -p PORTMASK\n"
427 " -p PORTMASK: hexadecimal bitmask of ports to configure\n",
432 parse_portmask(const char *portmask)
437 /* parse hexadecimal string */
438 pm = strtoul(portmask, &end, 16);
439 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
448 /* Parse the argument given in the command line of the application */
450 parse_args(int argc, char **argv)
455 char *prgname = argv[0];
456 static struct option lgopts[] = {
462 while ((opt = getopt_long(argc, argvopt, "p:",
463 lgopts, &option_index)) != EOF) {
468 enabled_port_mask = parse_portmask(optarg);
469 if (enabled_port_mask == 0) {
470 printf("invalid portmask\n");
471 print_usage(prgname);
477 print_usage(prgname);
483 print_usage(prgname);
487 argv[optind-1] = prgname;
489 optind = 0; /* reset getopt lib */
493 /* Main function, does initialization and calls the per-lcore functions */
495 MAIN(int argc, char *argv[])
497 struct rte_mempool *mbuf_pool;
498 struct rte_distributor *d;
499 struct rte_ring *output_ring;
500 unsigned lcore_id, worker_id = 0;
503 uint8_t nb_ports_available;
505 /* catch ctrl-c so we can print on exit */
506 signal(SIGINT, int_handler);
509 int ret = rte_eal_init(argc, argv);
511 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
515 /* parse application arguments (after the EAL ones) */
516 ret = parse_args(argc, argv);
518 rte_exit(EXIT_FAILURE, "Invalid distributor parameters\n");
520 if (rte_lcore_count() < 3)
521 rte_exit(EXIT_FAILURE, "Error, This application needs at "
522 "least 3 logical cores to run:\n"
523 "1 lcore for packet RX and distribution\n"
524 "1 lcore for packet TX\n"
525 "and at least 1 lcore for worker threads\n");
527 nb_ports = rte_eth_dev_count();
529 rte_exit(EXIT_FAILURE, "Error: no ethernet ports detected\n");
530 if (nb_ports != 1 && (nb_ports & 1))
531 rte_exit(EXIT_FAILURE, "Error: number of ports must be even, except "
532 "when using a single port\n");
534 mbuf_pool = rte_mempool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
535 MBUF_SIZE, MBUF_CACHE_SIZE,
536 sizeof(struct rte_pktmbuf_pool_private),
537 rte_pktmbuf_pool_init, NULL,
538 rte_pktmbuf_init, NULL,
540 if (mbuf_pool == NULL)
541 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
542 nb_ports_available = nb_ports;
544 /* initialize all ports */
545 for (portid = 0; portid < nb_ports; portid++) {
546 /* skip ports that are not enabled */
547 if ((enabled_port_mask & (1 << portid)) == 0) {
548 printf("\nSkipping disabled port %d\n", portid);
549 nb_ports_available--;
553 printf("Initializing port %u... done\n", (unsigned) portid);
555 if (port_init(portid, mbuf_pool) != 0)
556 rte_exit(EXIT_FAILURE, "Cannot initialize port %"PRIu8"\n",
560 if (!nb_ports_available) {
561 rte_exit(EXIT_FAILURE,
562 "All available ports are disabled. Please set portmask.\n");
565 d = rte_distributor_create("PKT_DIST", rte_socket_id(),
566 rte_lcore_count() - 2);
568 rte_exit(EXIT_FAILURE, "Cannot create distributor\n");
571 * scheduler ring is read only by the transmitter core, but written to
572 * by multiple threads
574 output_ring = rte_ring_create("Output_ring", RTE_RING_SZ,
575 rte_socket_id(), RING_F_SC_DEQ);
576 if (output_ring == NULL)
577 rte_exit(EXIT_FAILURE, "Cannot create output ring\n");
579 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
580 if (worker_id == rte_lcore_count() - 2)
581 rte_eal_remote_launch((lcore_function_t *)lcore_tx,
582 output_ring, lcore_id);
584 struct lcore_params *p =
585 rte_malloc(NULL, sizeof(*p), 0);
587 rte_panic("malloc failure\n");
588 *p = (struct lcore_params){worker_id, d, output_ring, mbuf_pool};
590 rte_eal_remote_launch((lcore_function_t *)lcore_worker,
595 /* call lcore_main on master core only */
596 struct lcore_params p = { 0, d, output_ring, mbuf_pool};
599 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
600 if (rte_eal_wait_lcore(lcore_id) < 0)