4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_common.h>
48 #include <rte_memory.h>
49 #include <rte_memcpy.h>
51 #include <rte_launch.h>
52 #include <rte_atomic.h>
53 #include <rte_cycles.h>
54 #include <rte_prefetch.h>
55 #include <rte_lcore.h>
56 #include <rte_per_lcore.h>
57 #include <rte_branch_prediction.h>
58 #include <rte_interrupts.h>
60 #include <rte_random.h>
61 #include <rte_debug.h>
62 #include <rte_ether.h>
63 #include <rte_ethdev.h>
64 #include <rte_mempool.h>
67 /* basic constants used in application */
68 #define MAX_QUEUES 1024
70 * 1024 queues require to meet the needs of a large number of vmdq_pools.
71 * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
73 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
74 RTE_TEST_TX_DESC_DEFAULT))
75 #define MBUF_CACHE_SIZE 64
77 #define MAX_PKT_BURST 32
80 * Configurable number of RX/TX ring descriptors
82 #define RTE_TEST_RX_DESC_DEFAULT 128
83 #define RTE_TEST_TX_DESC_DEFAULT 512
85 #define INVALID_PORT_ID 0xFF
87 /* mask of enabled ports */
88 static uint32_t enabled_port_mask;
89 static uint16_t ports[RTE_MAX_ETHPORTS];
90 static unsigned num_ports;
92 /* number of pools (if user does not specify any, 32 by default */
93 static enum rte_eth_nb_pools num_pools = ETH_32_POOLS;
94 static enum rte_eth_nb_tcs num_tcs = ETH_4_TCS;
95 static uint16_t num_queues, num_vmdq_queues;
96 static uint16_t vmdq_pool_base, vmdq_queue_base;
97 static uint8_t rss_enable;
99 /* empty vmdq+dcb configuration structure. Filled in programatically */
100 static const struct rte_eth_conf vmdq_dcb_conf_default = {
102 .mq_mode = ETH_MQ_RX_VMDQ_DCB,
104 .header_split = 0, /**< Header Split disabled */
105 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
106 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
107 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
110 .mq_mode = ETH_MQ_TX_VMDQ_DCB,
113 * should be overridden separately in code with
118 .nb_queue_pools = ETH_32_POOLS,
119 .enable_default_pool = 0,
122 .pool_map = {{0, 0},},
127 /** Traffic class each UP mapped to. */
131 .nb_queue_pools = ETH_32_POOLS,
132 .enable_default_pool = 0,
135 .pool_map = {{0, 0},},
139 .vmdq_dcb_tx_conf = {
140 .nb_queue_pools = ETH_32_POOLS,
146 /* array used for printing out statistics */
147 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
149 const uint16_t vlan_tags[] = {
150 0, 1, 2, 3, 4, 5, 6, 7,
151 8, 9, 10, 11, 12, 13, 14, 15,
152 16, 17, 18, 19, 20, 21, 22, 23,
153 24, 25, 26, 27, 28, 29, 30, 31
156 const uint16_t num_vlans = RTE_DIM(vlan_tags);
157 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
158 static struct ether_addr pool_addr_template = {
159 .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
162 /* ethernet addresses of ports */
163 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
165 /* Builds up the correct configuration for vmdq+dcb based on the vlan tags array
166 * given above, and the number of traffic classes available for use. */
168 get_eth_conf(struct rte_eth_conf *eth_conf)
170 struct rte_eth_vmdq_dcb_conf conf;
171 struct rte_eth_vmdq_rx_conf vmdq_conf;
172 struct rte_eth_dcb_rx_conf dcb_conf;
173 struct rte_eth_vmdq_dcb_tx_conf tx_conf;
176 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
177 vmdq_conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
178 tx_conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
179 conf.nb_pool_maps = num_pools;
180 vmdq_conf.nb_pool_maps = num_pools;
181 conf.enable_default_pool = 0;
182 vmdq_conf.enable_default_pool = 0;
183 conf.default_pool = 0; /* set explicit value, even if not used */
184 vmdq_conf.default_pool = 0;
186 for (i = 0; i < conf.nb_pool_maps; i++) {
187 conf.pool_map[i].vlan_id = vlan_tags[i];
188 vmdq_conf.pool_map[i].vlan_id = vlan_tags[i];
189 conf.pool_map[i].pools = 1UL << i;
190 vmdq_conf.pool_map[i].pools = 1UL << i;
192 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++){
193 conf.dcb_tc[i] = i % num_tcs;
194 dcb_conf.dcb_tc[i] = i % num_tcs;
195 tx_conf.dcb_tc[i] = i % num_tcs;
197 dcb_conf.nb_tcs = (enum rte_eth_nb_tcs)num_tcs;
198 (void)(rte_memcpy(eth_conf, &vmdq_dcb_conf_default, sizeof(*eth_conf)));
199 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_dcb_conf, &conf,
201 (void)(rte_memcpy(ð_conf->rx_adv_conf.dcb_rx_conf, &dcb_conf,
203 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &vmdq_conf,
205 (void)(rte_memcpy(ð_conf->tx_adv_conf.vmdq_dcb_tx_conf, &tx_conf,
208 eth_conf->rxmode.mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
209 eth_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP |
218 * Initialises a given port using global settings and with the rx buffers
219 * coming from the mbuf_pool passed as parameter
222 port_init(uint16_t port, struct rte_mempool *mbuf_pool)
224 struct rte_eth_dev_info dev_info;
225 struct rte_eth_conf port_conf = {0};
226 uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT;
227 uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT;
230 uint16_t queues_per_pool;
231 uint32_t max_nb_pools;
234 * The max pool number from dev_info will be used to validate the pool
235 * number specified in cmd line
237 rte_eth_dev_info_get(port, &dev_info);
238 max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
240 * We allow to process part of VMDQ pools specified by num_pools in
243 if (num_pools > max_nb_pools) {
244 printf("num_pools %d >max_nb_pools %d\n",
245 num_pools, max_nb_pools);
250 * NIC queues are divided into pf queues and vmdq queues.
251 * There is assumption here all ports have the same configuration!
253 vmdq_queue_base = dev_info.vmdq_queue_base;
254 vmdq_pool_base = dev_info.vmdq_pool_base;
255 printf("vmdq queue base: %d pool base %d\n",
256 vmdq_queue_base, vmdq_pool_base);
257 if (vmdq_pool_base == 0) {
258 num_vmdq_queues = dev_info.max_rx_queues;
259 num_queues = dev_info.max_rx_queues;
260 if (num_tcs != num_vmdq_queues / num_pools) {
261 printf("nb_tcs %d is invalid considering with"
262 " nb_pools %d, nb_tcs * nb_pools should = %d\n",
263 num_tcs, num_pools, num_vmdq_queues);
267 queues_per_pool = dev_info.vmdq_queue_num /
268 dev_info.max_vmdq_pools;
269 if (num_tcs > queues_per_pool) {
270 printf("num_tcs %d > num of queues per pool %d\n",
271 num_tcs, queues_per_pool);
274 num_vmdq_queues = num_pools * queues_per_pool;
275 num_queues = vmdq_queue_base + num_vmdq_queues;
276 printf("Configured vmdq pool num: %u,"
277 " each vmdq pool has %u queues\n",
278 num_pools, queues_per_pool);
281 if (port >= rte_eth_dev_count())
284 retval = get_eth_conf(&port_conf);
289 * Though in this example, all queues including pf queues are setup.
290 * This is because VMDQ queues doesn't always start from zero, and the
291 * PMD layer doesn't support selectively initialising part of rx/tx
294 retval = rte_eth_dev_configure(port, num_queues, num_queues, &port_conf);
298 retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rxRingSize,
302 if (RTE_MAX(rxRingSize, txRingSize) >
303 RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, RTE_TEST_TX_DESC_DEFAULT)) {
304 printf("Mbuf pool has an insufficient size for port %u.\n",
309 for (q = 0; q < num_queues; q++) {
310 retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
311 rte_eth_dev_socket_id(port),
315 printf("initialize rx queue %d failed\n", q);
320 for (q = 0; q < num_queues; q++) {
321 retval = rte_eth_tx_queue_setup(port, q, txRingSize,
322 rte_eth_dev_socket_id(port),
325 printf("initialize tx queue %d failed\n", q);
330 retval = rte_eth_dev_start(port);
332 printf("port %d start failed\n", port);
336 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
337 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
338 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
340 vmdq_ports_eth_addr[port].addr_bytes[0],
341 vmdq_ports_eth_addr[port].addr_bytes[1],
342 vmdq_ports_eth_addr[port].addr_bytes[2],
343 vmdq_ports_eth_addr[port].addr_bytes[3],
344 vmdq_ports_eth_addr[port].addr_bytes[4],
345 vmdq_ports_eth_addr[port].addr_bytes[5]);
347 /* Set mac for each pool.*/
348 for (q = 0; q < num_pools; q++) {
349 struct ether_addr mac;
351 mac = pool_addr_template;
352 mac.addr_bytes[4] = port;
353 mac.addr_bytes[5] = q;
354 printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
356 mac.addr_bytes[0], mac.addr_bytes[1],
357 mac.addr_bytes[2], mac.addr_bytes[3],
358 mac.addr_bytes[4], mac.addr_bytes[5]);
359 retval = rte_eth_dev_mac_addr_add(port, &mac,
362 printf("mac addr add failed at pool %d\n", q);
370 /* Check num_pools parameter and set it if OK*/
372 vmdq_parse_num_pools(const char *q_arg)
377 /* parse number string */
378 n = strtol(q_arg, &end, 10);
379 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
381 if (n != 16 && n != 32)
384 num_pools = ETH_16_POOLS;
386 num_pools = ETH_32_POOLS;
391 /* Check num_tcs parameter and set it if OK*/
393 vmdq_parse_num_tcs(const char *q_arg)
398 /* parse number string */
399 n = strtol(q_arg, &end, 10);
400 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
403 if (n != 4 && n != 8)
414 parse_portmask(const char *portmask)
419 /* parse hexadecimal string */
420 pm = strtoul(portmask, &end, 16);
421 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
432 vmdq_usage(const char *prgname)
434 printf("%s [EAL options] -- -p PORTMASK]\n"
435 " --nb-pools NP: number of pools (32 default, 16)\n"
436 " --nb-tcs NP: number of TCs (4 default, 8)\n"
437 " --enable-rss: enable RSS (disabled by default)\n",
441 /* Parse the argument (num_pools) given in the command line of the application */
443 vmdq_parse_args(int argc, char **argv)
448 const char *prgname = argv[0];
449 static struct option long_option[] = {
450 {"nb-pools", required_argument, NULL, 0},
451 {"nb-tcs", required_argument, NULL, 0},
452 {"enable-rss", 0, NULL, 0},
456 /* Parse command line */
457 while ((opt = getopt_long(argc, argv, "p:", long_option,
458 &option_index)) != EOF) {
462 enabled_port_mask = parse_portmask(optarg);
463 if (enabled_port_mask == 0) {
464 printf("invalid portmask\n");
470 if (!strcmp(long_option[option_index].name, "nb-pools")) {
471 if (vmdq_parse_num_pools(optarg) == -1) {
472 printf("invalid number of pools\n");
477 if (!strcmp(long_option[option_index].name, "nb-tcs")) {
478 if (vmdq_parse_num_tcs(optarg) == -1) {
479 printf("invalid number of tcs\n");
484 if (!strcmp(long_option[option_index].name, "enable-rss"))
494 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
495 if (enabled_port_mask & (1 << i))
496 ports[num_ports++] = (uint8_t)i;
499 if (num_ports < 2 || num_ports % 2) {
500 printf("Current enabled port number is %u,"
501 " but it should be even and at least 2\n", num_ports);
509 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
511 struct ether_hdr *eth;
514 eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
516 /* 02:00:00:00:00:xx */
517 tmp = ð->d_addr.addr_bytes[0];
518 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
521 ether_addr_copy(&vmdq_ports_eth_addr[dst_port], ð->s_addr);
524 /* When we receive a HUP signal, print out our stats */
526 sighup_handler(int signum)
528 unsigned q = vmdq_queue_base;
530 for (; q < num_queues; q++) {
531 if (q % (num_vmdq_queues / num_pools) == 0)
532 printf("\nPool %u: ", (q - vmdq_queue_base) /
533 (num_vmdq_queues / num_pools));
534 printf("%lu ", rxPackets[q]);
536 printf("\nFinished handling signal %d\n", signum);
540 * Main thread that does the work, reading from INPUT_PORT
541 * and writing to OUTPUT_PORT
544 lcore_main(void *arg)
546 const uintptr_t core_num = (uintptr_t)arg;
547 const unsigned num_cores = rte_lcore_count();
548 uint16_t startQueue, endQueue;
550 const uint16_t quot = (uint16_t)(num_vmdq_queues / num_cores);
551 const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
555 if (core_num < remainder) {
556 startQueue = (uint16_t)(core_num * (quot + 1));
557 endQueue = (uint16_t)(startQueue + quot + 1);
559 startQueue = (uint16_t)(core_num * quot + remainder);
560 endQueue = (uint16_t)(startQueue + quot);
563 startQueue = (uint16_t)(core_num * quot);
564 endQueue = (uint16_t)(startQueue + quot);
567 /* vmdq queue idx doesn't always start from zero.*/
568 startQueue += vmdq_queue_base;
569 endQueue += vmdq_queue_base;
570 printf("Core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_num,
571 rte_lcore_id(), startQueue, endQueue - 1);
573 if (startQueue == endQueue) {
574 printf("lcore %u has nothing to do\n", (unsigned)core_num);
579 struct rte_mbuf *buf[MAX_PKT_BURST];
580 const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
581 for (p = 0; p < num_ports; p++) {
582 const uint8_t src = ports[p];
583 const uint8_t dst = ports[p ^ 1]; /* 0 <-> 1, 2 <-> 3 etc */
585 if ((src == INVALID_PORT_ID) || (dst == INVALID_PORT_ID))
588 for (q = startQueue; q < endQueue; q++) {
589 const uint16_t rxCount = rte_eth_rx_burst(src,
592 if (unlikely(rxCount == 0))
595 rxPackets[q] += rxCount;
597 for (i = 0; i < rxCount; i++)
598 update_mac_address(buf[i], dst);
600 const uint16_t txCount = rte_eth_tx_burst(dst,
602 if (txCount != rxCount) {
603 for (i = txCount; i < rxCount; i++)
604 rte_pktmbuf_free(buf[i]);
612 * Update the global var NUM_PORTS and array PORTS according to system ports number
613 * and return valid ports number
615 static unsigned check_ports_num(unsigned nb_ports)
617 unsigned valid_num_ports = num_ports;
620 if (num_ports > nb_ports) {
621 printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
622 num_ports, nb_ports);
623 num_ports = nb_ports;
626 for (portid = 0; portid < num_ports; portid++) {
627 if (ports[portid] >= nb_ports) {
628 printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
629 ports[portid], (nb_ports - 1));
630 ports[portid] = INVALID_PORT_ID;
634 return valid_num_ports;
638 /* Main function, does initialisation and calls the per-lcore functions */
640 main(int argc, char *argv[])
643 struct rte_mempool *mbuf_pool;
647 unsigned nb_ports, valid_num_ports;
650 signal(SIGHUP, sighup_handler);
653 ret = rte_eal_init(argc, argv);
655 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
659 /* parse app arguments */
660 ret = vmdq_parse_args(argc, argv);
662 rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
664 cores = rte_lcore_count();
665 if ((cores & (cores - 1)) != 0 || cores > RTE_MAX_LCORE) {
666 rte_exit(EXIT_FAILURE,"This program can only run on an even"
667 " number of cores(1-%d)\n\n", RTE_MAX_LCORE);
670 nb_ports = rte_eth_dev_count();
673 * Update the global var NUM_PORTS and global array PORTS
674 * and get value of var VALID_NUM_PORTS according to system ports number
676 valid_num_ports = check_ports_num(nb_ports);
678 if (valid_num_ports < 2 || valid_num_ports % 2) {
679 printf("Current valid ports number is %u\n", valid_num_ports);
680 rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
683 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
684 NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
685 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
686 if (mbuf_pool == NULL)
687 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
689 /* initialize all ports */
690 for (portid = 0; portid < nb_ports; portid++) {
691 /* skip ports that are not enabled */
692 if ((enabled_port_mask & (1 << portid)) == 0) {
693 printf("\nSkipping disabled port %d\n", portid);
696 if (port_init(portid, mbuf_pool) != 0)
697 rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
700 /* call lcore_main() on every slave lcore */
702 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
703 rte_eal_remote_launch(lcore_main, (void*)i++, lcore_id);
705 /* call on master too */
706 (void) lcore_main((void*)i);