4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_common.h>
48 #include <rte_memory.h>
49 #include <rte_memcpy.h>
50 #include <rte_memzone.h>
52 #include <rte_per_lcore.h>
53 #include <rte_launch.h>
54 #include <rte_atomic.h>
55 #include <rte_cycles.h>
56 #include <rte_prefetch.h>
57 #include <rte_lcore.h>
58 #include <rte_per_lcore.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_interrupts.h>
62 #include <rte_random.h>
63 #include <rte_debug.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
67 #include <rte_mempool.h>
69 #include <rte_memcpy.h>
71 /* basic constants used in application */
72 #define MAX_QUEUES 1024
74 * 1024 queues require to meet the needs of a large number of vmdq_pools.
75 * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
77 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
78 RTE_TEST_TX_DESC_DEFAULT))
79 #define MBUF_CACHE_SIZE 64
81 #define MAX_PKT_BURST 32
84 * Configurable number of RX/TX ring descriptors
86 #define RTE_TEST_RX_DESC_DEFAULT 128
87 #define RTE_TEST_TX_DESC_DEFAULT 512
89 #define INVALID_PORT_ID 0xFF
91 /* mask of enabled ports */
92 static uint32_t enabled_port_mask;
93 static uint8_t ports[RTE_MAX_ETHPORTS];
94 static unsigned num_ports;
96 /* number of pools (if user does not specify any, 32 by default */
97 static enum rte_eth_nb_pools num_pools = ETH_32_POOLS;
98 static enum rte_eth_nb_tcs num_tcs = ETH_4_TCS;
99 static uint16_t num_queues, num_vmdq_queues;
100 static uint16_t vmdq_pool_base, vmdq_queue_base;
101 static uint8_t rss_enable;
103 /* empty vmdq+dcb configuration structure. Filled in programatically */
104 static const struct rte_eth_conf vmdq_dcb_conf_default = {
106 .mq_mode = ETH_MQ_RX_VMDQ_DCB,
108 .header_split = 0, /**< Header Split disabled */
109 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
110 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
111 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
114 .mq_mode = ETH_MQ_TX_VMDQ_DCB,
117 * should be overridden separately in code with
122 .nb_queue_pools = ETH_32_POOLS,
123 .enable_default_pool = 0,
126 .pool_map = {{0, 0},},
131 /** Traffic class each UP mapped to. */
135 .nb_queue_pools = ETH_32_POOLS,
136 .enable_default_pool = 0,
139 .pool_map = {{0, 0},},
143 .vmdq_dcb_tx_conf = {
144 .nb_queue_pools = ETH_32_POOLS,
150 /* array used for printing out statistics */
151 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
153 const uint16_t vlan_tags[] = {
154 0, 1, 2, 3, 4, 5, 6, 7,
155 8, 9, 10, 11, 12, 13, 14, 15,
156 16, 17, 18, 19, 20, 21, 22, 23,
157 24, 25, 26, 27, 28, 29, 30, 31
160 const uint16_t num_vlans = RTE_DIM(vlan_tags);
161 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
162 static struct ether_addr pool_addr_template = {
163 .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
166 /* ethernet addresses of ports */
167 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
169 /* Builds up the correct configuration for vmdq+dcb based on the vlan tags array
170 * given above, and the number of traffic classes available for use. */
172 get_eth_conf(struct rte_eth_conf *eth_conf)
174 struct rte_eth_vmdq_dcb_conf conf;
175 struct rte_eth_vmdq_rx_conf vmdq_conf;
176 struct rte_eth_dcb_rx_conf dcb_conf;
177 struct rte_eth_vmdq_dcb_tx_conf tx_conf;
180 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
181 vmdq_conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
182 tx_conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
183 conf.nb_pool_maps = num_pools;
184 vmdq_conf.nb_pool_maps = num_pools;
185 conf.enable_default_pool = 0;
186 vmdq_conf.enable_default_pool = 0;
187 conf.default_pool = 0; /* set explicit value, even if not used */
188 vmdq_conf.default_pool = 0;
190 for (i = 0; i < conf.nb_pool_maps; i++) {
191 conf.pool_map[i].vlan_id = vlan_tags[i];
192 vmdq_conf.pool_map[i].vlan_id = vlan_tags[i];
193 conf.pool_map[i].pools = 1UL << i;
194 vmdq_conf.pool_map[i].pools = 1UL << i;
196 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++){
197 conf.dcb_tc[i] = i % num_tcs;
198 dcb_conf.dcb_tc[i] = i % num_tcs;
199 tx_conf.dcb_tc[i] = i % num_tcs;
201 dcb_conf.nb_tcs = (enum rte_eth_nb_tcs)num_tcs;
202 (void)(rte_memcpy(eth_conf, &vmdq_dcb_conf_default, sizeof(*eth_conf)));
203 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_dcb_conf, &conf,
205 (void)(rte_memcpy(ð_conf->rx_adv_conf.dcb_rx_conf, &dcb_conf,
207 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &vmdq_conf,
209 (void)(rte_memcpy(ð_conf->tx_adv_conf.vmdq_dcb_tx_conf, &tx_conf,
212 eth_conf->rxmode.mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
213 eth_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP |
222 * Initialises a given port using global settings and with the rx buffers
223 * coming from the mbuf_pool passed as parameter
226 port_init(uint8_t port, struct rte_mempool *mbuf_pool)
228 struct rte_eth_dev_info dev_info;
229 struct rte_eth_conf port_conf = {0};
230 uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT;
231 uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT;
234 uint16_t queues_per_pool;
235 uint32_t max_nb_pools;
238 * The max pool number from dev_info will be used to validate the pool
239 * number specified in cmd line
241 rte_eth_dev_info_get(port, &dev_info);
242 max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
244 * We allow to process part of VMDQ pools specified by num_pools in
247 if (num_pools > max_nb_pools) {
248 printf("num_pools %d >max_nb_pools %d\n",
249 num_pools, max_nb_pools);
254 * NIC queues are divided into pf queues and vmdq queues.
255 * There is assumption here all ports have the same configuration!
257 vmdq_queue_base = dev_info.vmdq_queue_base;
258 vmdq_pool_base = dev_info.vmdq_pool_base;
259 printf("vmdq queue base: %d pool base %d\n",
260 vmdq_queue_base, vmdq_pool_base);
261 if (vmdq_pool_base == 0) {
262 num_vmdq_queues = dev_info.max_rx_queues;
263 num_queues = dev_info.max_rx_queues;
264 if (num_tcs != num_vmdq_queues / num_pools) {
265 printf("nb_tcs %d is invalid considering with"
266 " nb_pools %d, nb_tcs * nb_pools should = %d\n",
267 num_tcs, num_pools, num_vmdq_queues);
271 queues_per_pool = dev_info.vmdq_queue_num /
272 dev_info.max_vmdq_pools;
273 if (num_tcs > queues_per_pool) {
274 printf("num_tcs %d > num of queues per pool %d\n",
275 num_tcs, queues_per_pool);
278 num_vmdq_queues = num_pools * queues_per_pool;
279 num_queues = vmdq_queue_base + num_vmdq_queues;
280 printf("Configured vmdq pool num: %u,"
281 " each vmdq pool has %u queues\n",
282 num_pools, queues_per_pool);
285 if (port >= rte_eth_dev_count())
288 retval = get_eth_conf(&port_conf);
293 * Though in this example, all queues including pf queues are setup.
294 * This is because VMDQ queues doesn't always start from zero, and the
295 * PMD layer doesn't support selectively initialising part of rx/tx
298 retval = rte_eth_dev_configure(port, num_queues, num_queues, &port_conf);
302 retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rxRingSize,
306 if (RTE_MAX(rxRingSize, txRingSize) >
307 RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, RTE_TEST_TX_DESC_DEFAULT)) {
308 printf("Mbuf pool has an insufficient size for port %u.\n",
313 for (q = 0; q < num_queues; q++) {
314 retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
315 rte_eth_dev_socket_id(port),
319 printf("initialize rx queue %d failed\n", q);
324 for (q = 0; q < num_queues; q++) {
325 retval = rte_eth_tx_queue_setup(port, q, txRingSize,
326 rte_eth_dev_socket_id(port),
329 printf("initialize tx queue %d failed\n", q);
334 retval = rte_eth_dev_start(port);
336 printf("port %d start failed\n", port);
340 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
341 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
342 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
344 vmdq_ports_eth_addr[port].addr_bytes[0],
345 vmdq_ports_eth_addr[port].addr_bytes[1],
346 vmdq_ports_eth_addr[port].addr_bytes[2],
347 vmdq_ports_eth_addr[port].addr_bytes[3],
348 vmdq_ports_eth_addr[port].addr_bytes[4],
349 vmdq_ports_eth_addr[port].addr_bytes[5]);
351 /* Set mac for each pool.*/
352 for (q = 0; q < num_pools; q++) {
353 struct ether_addr mac;
355 mac = pool_addr_template;
356 mac.addr_bytes[4] = port;
357 mac.addr_bytes[5] = q;
358 printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
360 mac.addr_bytes[0], mac.addr_bytes[1],
361 mac.addr_bytes[2], mac.addr_bytes[3],
362 mac.addr_bytes[4], mac.addr_bytes[5]);
363 retval = rte_eth_dev_mac_addr_add(port, &mac,
366 printf("mac addr add failed at pool %d\n", q);
374 /* Check num_pools parameter and set it if OK*/
376 vmdq_parse_num_pools(const char *q_arg)
381 /* parse number string */
382 n = strtol(q_arg, &end, 10);
383 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
385 if (n != 16 && n != 32)
388 num_pools = ETH_16_POOLS;
390 num_pools = ETH_32_POOLS;
395 /* Check num_tcs parameter and set it if OK*/
397 vmdq_parse_num_tcs(const char *q_arg)
402 /* parse number string */
403 n = strtol(q_arg, &end, 10);
404 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
407 if (n != 4 && n != 8)
418 parse_portmask(const char *portmask)
423 /* parse hexadecimal string */
424 pm = strtoul(portmask, &end, 16);
425 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
436 vmdq_usage(const char *prgname)
438 printf("%s [EAL options] -- -p PORTMASK]\n"
439 " --nb-pools NP: number of pools (32 default, 16)\n"
440 " --nb-tcs NP: number of TCs (4 default, 8)\n"
441 " --enable-rss: enable RSS (disabled by default)\n",
445 /* Parse the argument (num_pools) given in the command line of the application */
447 vmdq_parse_args(int argc, char **argv)
452 const char *prgname = argv[0];
453 static struct option long_option[] = {
454 {"nb-pools", required_argument, NULL, 0},
455 {"nb-tcs", required_argument, NULL, 0},
456 {"enable-rss", 0, NULL, 0},
460 /* Parse command line */
461 while ((opt = getopt_long(argc, argv, "p:", long_option,
462 &option_index)) != EOF) {
466 enabled_port_mask = parse_portmask(optarg);
467 if (enabled_port_mask == 0) {
468 printf("invalid portmask\n");
474 if (!strcmp(long_option[option_index].name, "nb-pools")) {
475 if (vmdq_parse_num_pools(optarg) == -1) {
476 printf("invalid number of pools\n");
481 if (!strcmp(long_option[option_index].name, "nb-tcs")) {
482 if (vmdq_parse_num_tcs(optarg) == -1) {
483 printf("invalid number of tcs\n");
488 if (!strcmp(long_option[option_index].name, "enable-rss"))
498 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
499 if (enabled_port_mask & (1 << i))
500 ports[num_ports++] = (uint8_t)i;
503 if (num_ports < 2 || num_ports % 2) {
504 printf("Current enabled port number is %u,"
505 " but it should be even and at least 2\n", num_ports);
513 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
515 struct ether_hdr *eth;
518 eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
520 /* 02:00:00:00:00:xx */
521 tmp = ð->d_addr.addr_bytes[0];
522 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
525 ether_addr_copy(&vmdq_ports_eth_addr[dst_port], ð->s_addr);
528 /* When we receive a HUP signal, print out our stats */
530 sighup_handler(int signum)
532 unsigned q = vmdq_queue_base;
534 for (; q < num_queues; q++) {
535 if (q % (num_vmdq_queues / num_pools) == 0)
536 printf("\nPool %u: ", (q - vmdq_queue_base) /
537 (num_vmdq_queues / num_pools));
538 printf("%lu ", rxPackets[q]);
540 printf("\nFinished handling signal %d\n", signum);
544 * Main thread that does the work, reading from INPUT_PORT
545 * and writing to OUTPUT_PORT
548 lcore_main(void *arg)
550 const uintptr_t core_num = (uintptr_t)arg;
551 const unsigned num_cores = rte_lcore_count();
552 uint16_t startQueue, endQueue;
554 const uint16_t quot = (uint16_t)(num_vmdq_queues / num_cores);
555 const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
559 if (core_num < remainder) {
560 startQueue = (uint16_t)(core_num * (quot + 1));
561 endQueue = (uint16_t)(startQueue + quot + 1);
563 startQueue = (uint16_t)(core_num * quot + remainder);
564 endQueue = (uint16_t)(startQueue + quot);
567 startQueue = (uint16_t)(core_num * quot);
568 endQueue = (uint16_t)(startQueue + quot);
571 /* vmdq queue idx doesn't always start from zero.*/
572 startQueue += vmdq_queue_base;
573 endQueue += vmdq_queue_base;
574 printf("Core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_num,
575 rte_lcore_id(), startQueue, endQueue - 1);
577 if (startQueue == endQueue) {
578 printf("lcore %u has nothing to do\n", (unsigned)core_num);
583 struct rte_mbuf *buf[MAX_PKT_BURST];
584 const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
585 for (p = 0; p < num_ports; p++) {
586 const uint8_t src = ports[p];
587 const uint8_t dst = ports[p ^ 1]; /* 0 <-> 1, 2 <-> 3 etc */
589 if ((src == INVALID_PORT_ID) || (dst == INVALID_PORT_ID))
592 for (q = startQueue; q < endQueue; q++) {
593 const uint16_t rxCount = rte_eth_rx_burst(src,
596 if (unlikely(rxCount == 0))
599 rxPackets[q] += rxCount;
601 for (i = 0; i < rxCount; i++)
602 update_mac_address(buf[i], dst);
604 const uint16_t txCount = rte_eth_tx_burst(dst,
606 if (txCount != rxCount) {
607 for (i = txCount; i < rxCount; i++)
608 rte_pktmbuf_free(buf[i]);
616 * Update the global var NUM_PORTS and array PORTS according to system ports number
617 * and return valid ports number
619 static unsigned check_ports_num(unsigned nb_ports)
621 unsigned valid_num_ports = num_ports;
624 if (num_ports > nb_ports) {
625 printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
626 num_ports, nb_ports);
627 num_ports = nb_ports;
630 for (portid = 0; portid < num_ports; portid++) {
631 if (ports[portid] >= nb_ports) {
632 printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
633 ports[portid], (nb_ports - 1));
634 ports[portid] = INVALID_PORT_ID;
638 return valid_num_ports;
642 /* Main function, does initialisation and calls the per-lcore functions */
644 main(int argc, char *argv[])
647 struct rte_mempool *mbuf_pool;
651 unsigned nb_ports, valid_num_ports;
654 signal(SIGHUP, sighup_handler);
657 ret = rte_eal_init(argc, argv);
659 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
663 /* parse app arguments */
664 ret = vmdq_parse_args(argc, argv);
666 rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
668 cores = rte_lcore_count();
669 if ((cores & (cores - 1)) != 0 || cores > RTE_MAX_LCORE) {
670 rte_exit(EXIT_FAILURE,"This program can only run on an even"
671 " number of cores(1-%d)\n\n", RTE_MAX_LCORE);
674 nb_ports = rte_eth_dev_count();
677 * Update the global var NUM_PORTS and global array PORTS
678 * and get value of var VALID_NUM_PORTS according to system ports number
680 valid_num_ports = check_ports_num(nb_ports);
682 if (valid_num_ports < 2 || valid_num_ports % 2) {
683 printf("Current valid ports number is %u\n", valid_num_ports);
684 rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
687 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
688 NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
689 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
690 if (mbuf_pool == NULL)
691 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
693 /* initialize all ports */
694 for (portid = 0; portid < nb_ports; portid++) {
695 /* skip ports that are not enabled */
696 if ((enabled_port_mask & (1 << portid)) == 0) {
697 printf("\nSkipping disabled port %d\n", portid);
700 if (port_init(portid, mbuf_pool) != 0)
701 rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
704 /* call lcore_main() on every slave lcore */
706 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
707 rte_eal_remote_launch(lcore_main, (void*)i++, lcore_id);
709 /* call on master too */
710 (void) lcore_main((void*)i);