4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_common.h>
48 #include <rte_memory.h>
49 #include <rte_memcpy.h>
51 #include <rte_launch.h>
52 #include <rte_atomic.h>
53 #include <rte_cycles.h>
54 #include <rte_prefetch.h>
55 #include <rte_lcore.h>
56 #include <rte_per_lcore.h>
57 #include <rte_branch_prediction.h>
58 #include <rte_interrupts.h>
59 #include <rte_random.h>
60 #include <rte_debug.h>
61 #include <rte_ether.h>
62 #include <rte_ethdev.h>
63 #include <rte_mempool.h>
66 /* basic constants used in application */
67 #define MAX_QUEUES 1024
69 * 1024 queues require to meet the needs of a large number of vmdq_pools.
70 * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
72 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
73 RTE_TEST_TX_DESC_DEFAULT))
74 #define MBUF_CACHE_SIZE 64
76 #define MAX_PKT_BURST 32
79 * Configurable number of RX/TX ring descriptors
81 #define RTE_TEST_RX_DESC_DEFAULT 128
82 #define RTE_TEST_TX_DESC_DEFAULT 512
84 #define INVALID_PORT_ID 0xFF
86 /* mask of enabled ports */
87 static uint32_t enabled_port_mask;
88 static uint16_t ports[RTE_MAX_ETHPORTS];
89 static unsigned num_ports;
91 /* number of pools (if user does not specify any, 32 by default */
92 static enum rte_eth_nb_pools num_pools = ETH_32_POOLS;
93 static enum rte_eth_nb_tcs num_tcs = ETH_4_TCS;
94 static uint16_t num_queues, num_vmdq_queues;
95 static uint16_t vmdq_pool_base, vmdq_queue_base;
96 static uint8_t rss_enable;
98 /* empty vmdq+dcb configuration structure. Filled in programatically */
99 static const struct rte_eth_conf vmdq_dcb_conf_default = {
101 .mq_mode = ETH_MQ_RX_VMDQ_DCB,
103 .header_split = 0, /**< Header Split disabled */
104 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
105 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
106 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
109 .mq_mode = ETH_MQ_TX_VMDQ_DCB,
112 * should be overridden separately in code with
117 .nb_queue_pools = ETH_32_POOLS,
118 .enable_default_pool = 0,
121 .pool_map = {{0, 0},},
126 /** Traffic class each UP mapped to. */
130 .nb_queue_pools = ETH_32_POOLS,
131 .enable_default_pool = 0,
134 .pool_map = {{0, 0},},
138 .vmdq_dcb_tx_conf = {
139 .nb_queue_pools = ETH_32_POOLS,
145 /* array used for printing out statistics */
146 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
148 const uint16_t vlan_tags[] = {
149 0, 1, 2, 3, 4, 5, 6, 7,
150 8, 9, 10, 11, 12, 13, 14, 15,
151 16, 17, 18, 19, 20, 21, 22, 23,
152 24, 25, 26, 27, 28, 29, 30, 31
155 const uint16_t num_vlans = RTE_DIM(vlan_tags);
156 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
157 static struct ether_addr pool_addr_template = {
158 .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
161 /* ethernet addresses of ports */
162 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
164 /* Builds up the correct configuration for vmdq+dcb based on the vlan tags array
165 * given above, and the number of traffic classes available for use. */
167 get_eth_conf(struct rte_eth_conf *eth_conf)
169 struct rte_eth_vmdq_dcb_conf conf;
170 struct rte_eth_vmdq_rx_conf vmdq_conf;
171 struct rte_eth_dcb_rx_conf dcb_conf;
172 struct rte_eth_vmdq_dcb_tx_conf tx_conf;
175 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
176 vmdq_conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
177 tx_conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
178 conf.nb_pool_maps = num_pools;
179 vmdq_conf.nb_pool_maps = num_pools;
180 conf.enable_default_pool = 0;
181 vmdq_conf.enable_default_pool = 0;
182 conf.default_pool = 0; /* set explicit value, even if not used */
183 vmdq_conf.default_pool = 0;
185 for (i = 0; i < conf.nb_pool_maps; i++) {
186 conf.pool_map[i].vlan_id = vlan_tags[i];
187 vmdq_conf.pool_map[i].vlan_id = vlan_tags[i];
188 conf.pool_map[i].pools = 1UL << i;
189 vmdq_conf.pool_map[i].pools = 1UL << i;
191 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++){
192 conf.dcb_tc[i] = i % num_tcs;
193 dcb_conf.dcb_tc[i] = i % num_tcs;
194 tx_conf.dcb_tc[i] = i % num_tcs;
196 dcb_conf.nb_tcs = (enum rte_eth_nb_tcs)num_tcs;
197 (void)(rte_memcpy(eth_conf, &vmdq_dcb_conf_default, sizeof(*eth_conf)));
198 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_dcb_conf, &conf,
200 (void)(rte_memcpy(ð_conf->rx_adv_conf.dcb_rx_conf, &dcb_conf,
202 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &vmdq_conf,
204 (void)(rte_memcpy(ð_conf->tx_adv_conf.vmdq_dcb_tx_conf, &tx_conf,
207 eth_conf->rxmode.mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
208 eth_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP |
217 * Initialises a given port using global settings and with the rx buffers
218 * coming from the mbuf_pool passed as parameter
221 port_init(uint16_t port, struct rte_mempool *mbuf_pool)
223 struct rte_eth_dev_info dev_info;
224 struct rte_eth_conf port_conf = {0};
225 uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT;
226 uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT;
229 uint16_t queues_per_pool;
230 uint32_t max_nb_pools;
233 * The max pool number from dev_info will be used to validate the pool
234 * number specified in cmd line
236 rte_eth_dev_info_get(port, &dev_info);
237 max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
239 * We allow to process part of VMDQ pools specified by num_pools in
242 if (num_pools > max_nb_pools) {
243 printf("num_pools %d >max_nb_pools %d\n",
244 num_pools, max_nb_pools);
249 * NIC queues are divided into pf queues and vmdq queues.
250 * There is assumption here all ports have the same configuration!
252 vmdq_queue_base = dev_info.vmdq_queue_base;
253 vmdq_pool_base = dev_info.vmdq_pool_base;
254 printf("vmdq queue base: %d pool base %d\n",
255 vmdq_queue_base, vmdq_pool_base);
256 if (vmdq_pool_base == 0) {
257 num_vmdq_queues = dev_info.max_rx_queues;
258 num_queues = dev_info.max_rx_queues;
259 if (num_tcs != num_vmdq_queues / num_pools) {
260 printf("nb_tcs %d is invalid considering with"
261 " nb_pools %d, nb_tcs * nb_pools should = %d\n",
262 num_tcs, num_pools, num_vmdq_queues);
266 queues_per_pool = dev_info.vmdq_queue_num /
267 dev_info.max_vmdq_pools;
268 if (num_tcs > queues_per_pool) {
269 printf("num_tcs %d > num of queues per pool %d\n",
270 num_tcs, queues_per_pool);
273 num_vmdq_queues = num_pools * queues_per_pool;
274 num_queues = vmdq_queue_base + num_vmdq_queues;
275 printf("Configured vmdq pool num: %u,"
276 " each vmdq pool has %u queues\n",
277 num_pools, queues_per_pool);
280 if (port >= rte_eth_dev_count())
283 retval = get_eth_conf(&port_conf);
288 * Though in this example, all queues including pf queues are setup.
289 * This is because VMDQ queues doesn't always start from zero, and the
290 * PMD layer doesn't support selectively initialising part of rx/tx
293 retval = rte_eth_dev_configure(port, num_queues, num_queues, &port_conf);
297 retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rxRingSize,
301 if (RTE_MAX(rxRingSize, txRingSize) >
302 RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, RTE_TEST_TX_DESC_DEFAULT)) {
303 printf("Mbuf pool has an insufficient size for port %u.\n",
308 for (q = 0; q < num_queues; q++) {
309 retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
310 rte_eth_dev_socket_id(port),
314 printf("initialize rx queue %d failed\n", q);
319 for (q = 0; q < num_queues; q++) {
320 retval = rte_eth_tx_queue_setup(port, q, txRingSize,
321 rte_eth_dev_socket_id(port),
324 printf("initialize tx queue %d failed\n", q);
329 retval = rte_eth_dev_start(port);
331 printf("port %d start failed\n", port);
335 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
336 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
337 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
339 vmdq_ports_eth_addr[port].addr_bytes[0],
340 vmdq_ports_eth_addr[port].addr_bytes[1],
341 vmdq_ports_eth_addr[port].addr_bytes[2],
342 vmdq_ports_eth_addr[port].addr_bytes[3],
343 vmdq_ports_eth_addr[port].addr_bytes[4],
344 vmdq_ports_eth_addr[port].addr_bytes[5]);
346 /* Set mac for each pool.*/
347 for (q = 0; q < num_pools; q++) {
348 struct ether_addr mac;
350 mac = pool_addr_template;
351 mac.addr_bytes[4] = port;
352 mac.addr_bytes[5] = q;
353 printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
355 mac.addr_bytes[0], mac.addr_bytes[1],
356 mac.addr_bytes[2], mac.addr_bytes[3],
357 mac.addr_bytes[4], mac.addr_bytes[5]);
358 retval = rte_eth_dev_mac_addr_add(port, &mac,
361 printf("mac addr add failed at pool %d\n", q);
369 /* Check num_pools parameter and set it if OK*/
371 vmdq_parse_num_pools(const char *q_arg)
376 /* parse number string */
377 n = strtol(q_arg, &end, 10);
378 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
380 if (n != 16 && n != 32)
383 num_pools = ETH_16_POOLS;
385 num_pools = ETH_32_POOLS;
390 /* Check num_tcs parameter and set it if OK*/
392 vmdq_parse_num_tcs(const char *q_arg)
397 /* parse number string */
398 n = strtol(q_arg, &end, 10);
399 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
402 if (n != 4 && n != 8)
413 parse_portmask(const char *portmask)
418 /* parse hexadecimal string */
419 pm = strtoul(portmask, &end, 16);
420 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
431 vmdq_usage(const char *prgname)
433 printf("%s [EAL options] -- -p PORTMASK]\n"
434 " --nb-pools NP: number of pools (32 default, 16)\n"
435 " --nb-tcs NP: number of TCs (4 default, 8)\n"
436 " --enable-rss: enable RSS (disabled by default)\n",
440 /* Parse the argument (num_pools) given in the command line of the application */
442 vmdq_parse_args(int argc, char **argv)
447 const char *prgname = argv[0];
448 static struct option long_option[] = {
449 {"nb-pools", required_argument, NULL, 0},
450 {"nb-tcs", required_argument, NULL, 0},
451 {"enable-rss", 0, NULL, 0},
455 /* Parse command line */
456 while ((opt = getopt_long(argc, argv, "p:", long_option,
457 &option_index)) != EOF) {
461 enabled_port_mask = parse_portmask(optarg);
462 if (enabled_port_mask == 0) {
463 printf("invalid portmask\n");
469 if (!strcmp(long_option[option_index].name, "nb-pools")) {
470 if (vmdq_parse_num_pools(optarg) == -1) {
471 printf("invalid number of pools\n");
476 if (!strcmp(long_option[option_index].name, "nb-tcs")) {
477 if (vmdq_parse_num_tcs(optarg) == -1) {
478 printf("invalid number of tcs\n");
483 if (!strcmp(long_option[option_index].name, "enable-rss"))
493 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
494 if (enabled_port_mask & (1 << i))
495 ports[num_ports++] = (uint8_t)i;
498 if (num_ports < 2 || num_ports % 2) {
499 printf("Current enabled port number is %u,"
500 " but it should be even and at least 2\n", num_ports);
508 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
510 struct ether_hdr *eth;
513 eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
515 /* 02:00:00:00:00:xx */
516 tmp = ð->d_addr.addr_bytes[0];
517 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
520 ether_addr_copy(&vmdq_ports_eth_addr[dst_port], ð->s_addr);
523 /* When we receive a HUP signal, print out our stats */
525 sighup_handler(int signum)
527 unsigned q = vmdq_queue_base;
529 for (; q < num_queues; q++) {
530 if (q % (num_vmdq_queues / num_pools) == 0)
531 printf("\nPool %u: ", (q - vmdq_queue_base) /
532 (num_vmdq_queues / num_pools));
533 printf("%lu ", rxPackets[q]);
535 printf("\nFinished handling signal %d\n", signum);
539 * Main thread that does the work, reading from INPUT_PORT
540 * and writing to OUTPUT_PORT
543 lcore_main(void *arg)
545 const uintptr_t core_num = (uintptr_t)arg;
546 const unsigned num_cores = rte_lcore_count();
547 uint16_t startQueue, endQueue;
549 const uint16_t quot = (uint16_t)(num_vmdq_queues / num_cores);
550 const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
554 if (core_num < remainder) {
555 startQueue = (uint16_t)(core_num * (quot + 1));
556 endQueue = (uint16_t)(startQueue + quot + 1);
558 startQueue = (uint16_t)(core_num * quot + remainder);
559 endQueue = (uint16_t)(startQueue + quot);
562 startQueue = (uint16_t)(core_num * quot);
563 endQueue = (uint16_t)(startQueue + quot);
566 /* vmdq queue idx doesn't always start from zero.*/
567 startQueue += vmdq_queue_base;
568 endQueue += vmdq_queue_base;
569 printf("Core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_num,
570 rte_lcore_id(), startQueue, endQueue - 1);
572 if (startQueue == endQueue) {
573 printf("lcore %u has nothing to do\n", (unsigned)core_num);
578 struct rte_mbuf *buf[MAX_PKT_BURST];
579 const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
580 for (p = 0; p < num_ports; p++) {
581 const uint8_t src = ports[p];
582 const uint8_t dst = ports[p ^ 1]; /* 0 <-> 1, 2 <-> 3 etc */
584 if ((src == INVALID_PORT_ID) || (dst == INVALID_PORT_ID))
587 for (q = startQueue; q < endQueue; q++) {
588 const uint16_t rxCount = rte_eth_rx_burst(src,
591 if (unlikely(rxCount == 0))
594 rxPackets[q] += rxCount;
596 for (i = 0; i < rxCount; i++)
597 update_mac_address(buf[i], dst);
599 const uint16_t txCount = rte_eth_tx_burst(dst,
601 if (txCount != rxCount) {
602 for (i = txCount; i < rxCount; i++)
603 rte_pktmbuf_free(buf[i]);
611 * Update the global var NUM_PORTS and array PORTS according to system ports number
612 * and return valid ports number
614 static unsigned check_ports_num(unsigned nb_ports)
616 unsigned valid_num_ports = num_ports;
619 if (num_ports > nb_ports) {
620 printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
621 num_ports, nb_ports);
622 num_ports = nb_ports;
625 for (portid = 0; portid < num_ports; portid++) {
626 if (ports[portid] >= nb_ports) {
627 printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
628 ports[portid], (nb_ports - 1));
629 ports[portid] = INVALID_PORT_ID;
633 return valid_num_ports;
637 /* Main function, does initialisation and calls the per-lcore functions */
639 main(int argc, char *argv[])
642 struct rte_mempool *mbuf_pool;
646 unsigned nb_ports, valid_num_ports;
649 signal(SIGHUP, sighup_handler);
652 ret = rte_eal_init(argc, argv);
654 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
658 /* parse app arguments */
659 ret = vmdq_parse_args(argc, argv);
661 rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
663 cores = rte_lcore_count();
664 if ((cores & (cores - 1)) != 0 || cores > RTE_MAX_LCORE) {
665 rte_exit(EXIT_FAILURE,"This program can only run on an even"
666 " number of cores(1-%d)\n\n", RTE_MAX_LCORE);
669 nb_ports = rte_eth_dev_count();
672 * Update the global var NUM_PORTS and global array PORTS
673 * and get value of var VALID_NUM_PORTS according to system ports number
675 valid_num_ports = check_ports_num(nb_ports);
677 if (valid_num_ports < 2 || valid_num_ports % 2) {
678 printf("Current valid ports number is %u\n", valid_num_ports);
679 rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
682 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
683 NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
684 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
685 if (mbuf_pool == NULL)
686 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
688 /* initialize all ports */
689 for (portid = 0; portid < nb_ports; portid++) {
690 /* skip ports that are not enabled */
691 if ((enabled_port_mask & (1 << portid)) == 0) {
692 printf("\nSkipping disabled port %d\n", portid);
695 if (port_init(portid, mbuf_pool) != 0)
696 rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
699 /* call lcore_main() on every slave lcore */
701 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
702 rte_eal_remote_launch(lcore_main, (void*)i++, lcore_id);
704 /* call on master too */
705 (void) lcore_main((void*)i);