4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_common.h>
48 #include <rte_memory.h>
49 #include <rte_memcpy.h>
50 #include <rte_memzone.h>
52 #include <rte_per_lcore.h>
53 #include <rte_launch.h>
54 #include <rte_atomic.h>
55 #include <rte_cycles.h>
56 #include <rte_prefetch.h>
57 #include <rte_lcore.h>
58 #include <rte_per_lcore.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_interrupts.h>
62 #include <rte_random.h>
63 #include <rte_debug.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
68 #include <rte_mempool.h>
70 #include <rte_memcpy.h>
72 /* basic constants used in application */
73 #define MAX_QUEUES 1024
75 * 1024 queues require to meet the needs of a large number of vmdq_pools.
76 * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
78 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
79 RTE_TEST_TX_DESC_DEFAULT))
80 #define MBUF_CACHE_SIZE 64
82 #define MAX_PKT_BURST 32
85 * Configurable number of RX/TX ring descriptors
87 #define RTE_TEST_RX_DESC_DEFAULT 128
88 #define RTE_TEST_TX_DESC_DEFAULT 512
90 #define INVALID_PORT_ID 0xFF
92 /* mask of enabled ports */
93 static uint32_t enabled_port_mask;
94 static uint8_t ports[RTE_MAX_ETHPORTS];
95 static unsigned num_ports;
97 /* number of pools (if user does not specify any, 32 by default */
98 static enum rte_eth_nb_pools num_pools = ETH_32_POOLS;
99 static enum rte_eth_nb_tcs num_tcs = ETH_4_TCS;
100 static uint16_t num_queues, num_vmdq_queues;
101 static uint16_t vmdq_pool_base, vmdq_queue_base;
102 static uint8_t rss_enable;
104 /* empty vmdq+dcb configuration structure. Filled in programatically */
105 static const struct rte_eth_conf vmdq_dcb_conf_default = {
107 .mq_mode = ETH_MQ_RX_VMDQ_DCB,
109 .header_split = 0, /**< Header Split disabled */
110 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
111 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
112 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
115 .mq_mode = ETH_MQ_TX_VMDQ_DCB,
118 * should be overridden separately in code with
123 .nb_queue_pools = ETH_32_POOLS,
124 .enable_default_pool = 0,
127 .pool_map = {{0, 0},},
132 /** Traffic class each UP mapped to. */
136 .nb_queue_pools = ETH_32_POOLS,
137 .enable_default_pool = 0,
140 .pool_map = {{0, 0},},
144 .vmdq_dcb_tx_conf = {
145 .nb_queue_pools = ETH_32_POOLS,
151 /* array used for printing out statistics */
152 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
154 const uint16_t vlan_tags[] = {
155 0, 1, 2, 3, 4, 5, 6, 7,
156 8, 9, 10, 11, 12, 13, 14, 15,
157 16, 17, 18, 19, 20, 21, 22, 23,
158 24, 25, 26, 27, 28, 29, 30, 31
161 const uint16_t num_vlans = RTE_DIM(vlan_tags);
162 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
163 static struct ether_addr pool_addr_template = {
164 .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
167 /* ethernet addresses of ports */
168 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
170 /* Builds up the correct configuration for vmdq+dcb based on the vlan tags array
171 * given above, and the number of traffic classes available for use. */
173 get_eth_conf(struct rte_eth_conf *eth_conf)
175 struct rte_eth_vmdq_dcb_conf conf;
176 struct rte_eth_vmdq_rx_conf vmdq_conf;
177 struct rte_eth_dcb_rx_conf dcb_conf;
178 struct rte_eth_vmdq_dcb_tx_conf tx_conf;
181 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
182 vmdq_conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
183 tx_conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
184 conf.nb_pool_maps = num_pools;
185 vmdq_conf.nb_pool_maps = num_pools;
186 conf.enable_default_pool = 0;
187 vmdq_conf.enable_default_pool = 0;
188 conf.default_pool = 0; /* set explicit value, even if not used */
189 vmdq_conf.default_pool = 0;
191 for (i = 0; i < conf.nb_pool_maps; i++) {
192 conf.pool_map[i].vlan_id = vlan_tags[i];
193 vmdq_conf.pool_map[i].vlan_id = vlan_tags[i];
194 conf.pool_map[i].pools = 1UL << i;
195 vmdq_conf.pool_map[i].pools = 1UL << i;
197 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++){
198 conf.dcb_tc[i] = i % num_tcs;
199 dcb_conf.dcb_tc[i] = i % num_tcs;
200 tx_conf.dcb_tc[i] = i % num_tcs;
202 dcb_conf.nb_tcs = (enum rte_eth_nb_tcs)num_tcs;
203 (void)(rte_memcpy(eth_conf, &vmdq_dcb_conf_default, sizeof(*eth_conf)));
204 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_dcb_conf, &conf,
206 (void)(rte_memcpy(ð_conf->rx_adv_conf.dcb_rx_conf, &dcb_conf,
208 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &vmdq_conf,
210 (void)(rte_memcpy(ð_conf->tx_adv_conf.vmdq_dcb_tx_conf, &tx_conf,
213 eth_conf->rxmode.mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
214 eth_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP |
223 * Initialises a given port using global settings and with the rx buffers
224 * coming from the mbuf_pool passed as parameter
227 port_init(uint8_t port, struct rte_mempool *mbuf_pool)
229 struct rte_eth_dev_info dev_info;
230 struct rte_eth_conf port_conf = {0};
231 const uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT;
232 const uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT;
235 uint16_t queues_per_pool;
236 uint32_t max_nb_pools;
239 * The max pool number from dev_info will be used to validate the pool
240 * number specified in cmd line
242 rte_eth_dev_info_get(port, &dev_info);
243 max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
245 * We allow to process part of VMDQ pools specified by num_pools in
248 if (num_pools > max_nb_pools) {
249 printf("num_pools %d >max_nb_pools %d\n",
250 num_pools, max_nb_pools);
255 * NIC queues are divided into pf queues and vmdq queues.
256 * There is assumption here all ports have the same configuration!
258 vmdq_queue_base = dev_info.vmdq_queue_base;
259 vmdq_pool_base = dev_info.vmdq_pool_base;
260 printf("vmdq queue base: %d pool base %d\n",
261 vmdq_queue_base, vmdq_pool_base);
262 if (vmdq_pool_base == 0) {
263 num_vmdq_queues = dev_info.max_rx_queues;
264 num_queues = dev_info.max_rx_queues;
265 if (num_tcs != num_vmdq_queues / num_pools) {
266 printf("nb_tcs %d is invalid considering with"
267 " nb_pools %d, nb_tcs * nb_pools should = %d\n",
268 num_tcs, num_pools, num_vmdq_queues);
272 queues_per_pool = dev_info.vmdq_queue_num /
273 dev_info.max_vmdq_pools;
274 if (num_tcs > queues_per_pool) {
275 printf("num_tcs %d > num of queues per pool %d\n",
276 num_tcs, queues_per_pool);
279 num_vmdq_queues = num_pools * queues_per_pool;
280 num_queues = vmdq_queue_base + num_vmdq_queues;
281 printf("Configured vmdq pool num: %u,"
282 " each vmdq pool has %u queues\n",
283 num_pools, queues_per_pool);
286 if (port >= rte_eth_dev_count())
289 retval = get_eth_conf(&port_conf);
294 * Though in this example, all queues including pf queues are setup.
295 * This is because VMDQ queues doesn't always start from zero, and the
296 * PMD layer doesn't support selectively initialising part of rx/tx
299 retval = rte_eth_dev_configure(port, num_queues, num_queues, &port_conf);
303 for (q = 0; q < num_queues; q++) {
304 retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
305 rte_eth_dev_socket_id(port),
309 printf("initialize rx queue %d failed\n", q);
314 for (q = 0; q < num_queues; q++) {
315 retval = rte_eth_tx_queue_setup(port, q, txRingSize,
316 rte_eth_dev_socket_id(port),
319 printf("initialize tx queue %d failed\n", q);
324 retval = rte_eth_dev_start(port);
326 printf("port %d start failed\n", port);
330 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
331 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
332 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
334 vmdq_ports_eth_addr[port].addr_bytes[0],
335 vmdq_ports_eth_addr[port].addr_bytes[1],
336 vmdq_ports_eth_addr[port].addr_bytes[2],
337 vmdq_ports_eth_addr[port].addr_bytes[3],
338 vmdq_ports_eth_addr[port].addr_bytes[4],
339 vmdq_ports_eth_addr[port].addr_bytes[5]);
341 /* Set mac for each pool.*/
342 for (q = 0; q < num_pools; q++) {
343 struct ether_addr mac;
345 mac = pool_addr_template;
346 mac.addr_bytes[4] = port;
347 mac.addr_bytes[5] = q;
348 printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
350 mac.addr_bytes[0], mac.addr_bytes[1],
351 mac.addr_bytes[2], mac.addr_bytes[3],
352 mac.addr_bytes[4], mac.addr_bytes[5]);
353 retval = rte_eth_dev_mac_addr_add(port, &mac,
356 printf("mac addr add failed at pool %d\n", q);
364 /* Check num_pools parameter and set it if OK*/
366 vmdq_parse_num_pools(const char *q_arg)
371 /* parse number string */
372 n = strtol(q_arg, &end, 10);
373 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
375 if (n != 16 && n != 32)
378 num_pools = ETH_16_POOLS;
380 num_pools = ETH_32_POOLS;
385 /* Check num_tcs parameter and set it if OK*/
387 vmdq_parse_num_tcs(const char *q_arg)
392 /* parse number string */
393 n = strtol(q_arg, &end, 10);
394 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
397 if (n != 4 && n != 8)
408 parse_portmask(const char *portmask)
413 /* parse hexadecimal string */
414 pm = strtoul(portmask, &end, 16);
415 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
426 vmdq_usage(const char *prgname)
428 printf("%s [EAL options] -- -p PORTMASK]\n"
429 " --nb-pools NP: number of pools (32 default, 16)\n"
430 " --nb-tcs NP: number of TCs (4 default, 8)\n"
431 " --enable-rss: enable RSS (disabled by default)\n",
435 /* Parse the argument (num_pools) given in the command line of the application */
437 vmdq_parse_args(int argc, char **argv)
442 const char *prgname = argv[0];
443 static struct option long_option[] = {
444 {"nb-pools", required_argument, NULL, 0},
445 {"nb-tcs", required_argument, NULL, 0},
446 {"enable-rss", 0, NULL, 0},
450 /* Parse command line */
451 while ((opt = getopt_long(argc, argv, "p:", long_option,
452 &option_index)) != EOF) {
456 enabled_port_mask = parse_portmask(optarg);
457 if (enabled_port_mask == 0) {
458 printf("invalid portmask\n");
464 if (!strcmp(long_option[option_index].name, "nb-pools")) {
465 if (vmdq_parse_num_pools(optarg) == -1) {
466 printf("invalid number of pools\n");
471 if (!strcmp(long_option[option_index].name, "nb-tcs")) {
472 if (vmdq_parse_num_tcs(optarg) == -1) {
473 printf("invalid number of tcs\n");
478 if (!strcmp(long_option[option_index].name, "enable-rss"))
488 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
489 if (enabled_port_mask & (1 << i))
490 ports[num_ports++] = (uint8_t)i;
493 if (num_ports < 2 || num_ports % 2) {
494 printf("Current enabled port number is %u,"
495 " but it should be even and at least 2\n", num_ports);
503 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
505 struct ether_hdr *eth;
508 eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
510 /* 02:00:00:00:00:xx */
511 tmp = ð->d_addr.addr_bytes[0];
512 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
515 ether_addr_copy(&vmdq_ports_eth_addr[dst_port], ð->s_addr);
518 /* When we receive a HUP signal, print out our stats */
520 sighup_handler(int signum)
522 unsigned q = vmdq_queue_base;
524 for (; q < num_queues; q++) {
525 if (q % (num_vmdq_queues / num_pools) == 0)
526 printf("\nPool %u: ", (q - vmdq_queue_base) /
527 (num_vmdq_queues / num_pools));
528 printf("%lu ", rxPackets[q]);
530 printf("\nFinished handling signal %d\n", signum);
534 * Main thread that does the work, reading from INPUT_PORT
535 * and writing to OUTPUT_PORT
538 lcore_main(void *arg)
540 const uintptr_t core_num = (uintptr_t)arg;
541 const unsigned num_cores = rte_lcore_count();
542 uint16_t startQueue, endQueue;
544 const uint16_t quot = (uint16_t)(num_vmdq_queues / num_cores);
545 const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
549 if (core_num < remainder) {
550 startQueue = (uint16_t)(core_num * (quot + 1));
551 endQueue = (uint16_t)(startQueue + quot + 1);
553 startQueue = (uint16_t)(core_num * quot + remainder);
554 endQueue = (uint16_t)(startQueue + quot);
557 startQueue = (uint16_t)(core_num * quot);
558 endQueue = (uint16_t)(startQueue + quot);
561 /* vmdq queue idx doesn't always start from zero.*/
562 startQueue += vmdq_queue_base;
563 endQueue += vmdq_queue_base;
564 printf("Core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_num,
565 rte_lcore_id(), startQueue, endQueue - 1);
567 if (startQueue == endQueue) {
568 printf("lcore %u has nothing to do\n", (unsigned)core_num);
573 struct rte_mbuf *buf[MAX_PKT_BURST];
574 const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
575 for (p = 0; p < num_ports; p++) {
576 const uint8_t src = ports[p];
577 const uint8_t dst = ports[p ^ 1]; /* 0 <-> 1, 2 <-> 3 etc */
579 if ((src == INVALID_PORT_ID) || (dst == INVALID_PORT_ID))
582 for (q = startQueue; q < endQueue; q++) {
583 const uint16_t rxCount = rte_eth_rx_burst(src,
586 if (unlikely(rxCount == 0))
589 rxPackets[q] += rxCount;
591 for (i = 0; i < rxCount; i++)
592 update_mac_address(buf[i], dst);
594 const uint16_t txCount = rte_eth_tx_burst(dst,
596 if (txCount != rxCount) {
597 for (i = txCount; i < rxCount; i++)
598 rte_pktmbuf_free(buf[i]);
606 * Update the global var NUM_PORTS and array PORTS according to system ports number
607 * and return valid ports number
609 static unsigned check_ports_num(unsigned nb_ports)
611 unsigned valid_num_ports = num_ports;
614 if (num_ports > nb_ports) {
615 printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
616 num_ports, nb_ports);
617 num_ports = nb_ports;
620 for (portid = 0; portid < num_ports; portid++) {
621 if (ports[portid] >= nb_ports) {
622 printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
623 ports[portid], (nb_ports - 1));
624 ports[portid] = INVALID_PORT_ID;
628 return valid_num_ports;
632 /* Main function, does initialisation and calls the per-lcore functions */
634 main(int argc, char *argv[])
637 struct rte_mempool *mbuf_pool;
641 unsigned nb_ports, valid_num_ports;
644 signal(SIGHUP, sighup_handler);
647 ret = rte_eal_init(argc, argv);
649 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
653 /* parse app arguments */
654 ret = vmdq_parse_args(argc, argv);
656 rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
658 cores = rte_lcore_count();
659 if ((cores & (cores - 1)) != 0 || cores > RTE_MAX_LCORE) {
660 rte_exit(EXIT_FAILURE,"This program can only run on an even"
661 " number of cores(1-%d)\n\n", RTE_MAX_LCORE);
664 nb_ports = rte_eth_dev_count();
667 * Update the global var NUM_PORTS and global array PORTS
668 * and get value of var VALID_NUM_PORTS according to system ports number
670 valid_num_ports = check_ports_num(nb_ports);
672 if (valid_num_ports < 2 || valid_num_ports % 2) {
673 printf("Current valid ports number is %u\n", valid_num_ports);
674 rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
677 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
678 NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
679 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
680 if (mbuf_pool == NULL)
681 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
683 /* initialize all ports */
684 for (portid = 0; portid < nb_ports; portid++) {
685 /* skip ports that are not enabled */
686 if ((enabled_port_mask & (1 << portid)) == 0) {
687 printf("\nSkipping disabled port %d\n", portid);
690 if (port_init(portid, mbuf_pool) != 0)
691 rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
694 /* call lcore_main() on every slave lcore */
696 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
697 rte_eal_remote_launch(lcore_main, (void*)i++, lcore_id);
699 /* call on master too */
700 (void) lcore_main((void*)i);