4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_common.h>
48 #include <rte_memory.h>
49 #include <rte_memcpy.h>
50 #include <rte_memzone.h>
52 #include <rte_launch.h>
53 #include <rte_atomic.h>
54 #include <rte_cycles.h>
55 #include <rte_prefetch.h>
56 #include <rte_lcore.h>
57 #include <rte_per_lcore.h>
58 #include <rte_branch_prediction.h>
59 #include <rte_interrupts.h>
61 #include <rte_random.h>
62 #include <rte_debug.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_mempool.h>
68 /* basic constants used in application */
69 #define MAX_QUEUES 1024
71 * 1024 queues require to meet the needs of a large number of vmdq_pools.
72 * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
74 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
75 RTE_TEST_TX_DESC_DEFAULT))
76 #define MBUF_CACHE_SIZE 64
78 #define MAX_PKT_BURST 32
81 * Configurable number of RX/TX ring descriptors
83 #define RTE_TEST_RX_DESC_DEFAULT 128
84 #define RTE_TEST_TX_DESC_DEFAULT 512
86 #define INVALID_PORT_ID 0xFF
88 /* mask of enabled ports */
89 static uint32_t enabled_port_mask;
90 static uint8_t ports[RTE_MAX_ETHPORTS];
91 static unsigned num_ports;
93 /* number of pools (if user does not specify any, 32 by default */
94 static enum rte_eth_nb_pools num_pools = ETH_32_POOLS;
95 static enum rte_eth_nb_tcs num_tcs = ETH_4_TCS;
96 static uint16_t num_queues, num_vmdq_queues;
97 static uint16_t vmdq_pool_base, vmdq_queue_base;
98 static uint8_t rss_enable;
100 /* empty vmdq+dcb configuration structure. Filled in programatically */
101 static const struct rte_eth_conf vmdq_dcb_conf_default = {
103 .mq_mode = ETH_MQ_RX_VMDQ_DCB,
105 .header_split = 0, /**< Header Split disabled */
106 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
107 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
108 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
111 .mq_mode = ETH_MQ_TX_VMDQ_DCB,
114 * should be overridden separately in code with
119 .nb_queue_pools = ETH_32_POOLS,
120 .enable_default_pool = 0,
123 .pool_map = {{0, 0},},
128 /** Traffic class each UP mapped to. */
132 .nb_queue_pools = ETH_32_POOLS,
133 .enable_default_pool = 0,
136 .pool_map = {{0, 0},},
140 .vmdq_dcb_tx_conf = {
141 .nb_queue_pools = ETH_32_POOLS,
147 /* array used for printing out statistics */
148 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
150 const uint16_t vlan_tags[] = {
151 0, 1, 2, 3, 4, 5, 6, 7,
152 8, 9, 10, 11, 12, 13, 14, 15,
153 16, 17, 18, 19, 20, 21, 22, 23,
154 24, 25, 26, 27, 28, 29, 30, 31
157 const uint16_t num_vlans = RTE_DIM(vlan_tags);
158 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
159 static struct ether_addr pool_addr_template = {
160 .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
163 /* ethernet addresses of ports */
164 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
166 /* Builds up the correct configuration for vmdq+dcb based on the vlan tags array
167 * given above, and the number of traffic classes available for use. */
169 get_eth_conf(struct rte_eth_conf *eth_conf)
171 struct rte_eth_vmdq_dcb_conf conf;
172 struct rte_eth_vmdq_rx_conf vmdq_conf;
173 struct rte_eth_dcb_rx_conf dcb_conf;
174 struct rte_eth_vmdq_dcb_tx_conf tx_conf;
177 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
178 vmdq_conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
179 tx_conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
180 conf.nb_pool_maps = num_pools;
181 vmdq_conf.nb_pool_maps = num_pools;
182 conf.enable_default_pool = 0;
183 vmdq_conf.enable_default_pool = 0;
184 conf.default_pool = 0; /* set explicit value, even if not used */
185 vmdq_conf.default_pool = 0;
187 for (i = 0; i < conf.nb_pool_maps; i++) {
188 conf.pool_map[i].vlan_id = vlan_tags[i];
189 vmdq_conf.pool_map[i].vlan_id = vlan_tags[i];
190 conf.pool_map[i].pools = 1UL << i;
191 vmdq_conf.pool_map[i].pools = 1UL << i;
193 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++){
194 conf.dcb_tc[i] = i % num_tcs;
195 dcb_conf.dcb_tc[i] = i % num_tcs;
196 tx_conf.dcb_tc[i] = i % num_tcs;
198 dcb_conf.nb_tcs = (enum rte_eth_nb_tcs)num_tcs;
199 (void)(rte_memcpy(eth_conf, &vmdq_dcb_conf_default, sizeof(*eth_conf)));
200 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_dcb_conf, &conf,
202 (void)(rte_memcpy(ð_conf->rx_adv_conf.dcb_rx_conf, &dcb_conf,
204 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &vmdq_conf,
206 (void)(rte_memcpy(ð_conf->tx_adv_conf.vmdq_dcb_tx_conf, &tx_conf,
209 eth_conf->rxmode.mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
210 eth_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP |
219 * Initialises a given port using global settings and with the rx buffers
220 * coming from the mbuf_pool passed as parameter
223 port_init(uint8_t port, struct rte_mempool *mbuf_pool)
225 struct rte_eth_dev_info dev_info;
226 struct rte_eth_conf port_conf = {0};
227 uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT;
228 uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT;
231 uint16_t queues_per_pool;
232 uint32_t max_nb_pools;
235 * The max pool number from dev_info will be used to validate the pool
236 * number specified in cmd line
238 rte_eth_dev_info_get(port, &dev_info);
239 max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
241 * We allow to process part of VMDQ pools specified by num_pools in
244 if (num_pools > max_nb_pools) {
245 printf("num_pools %d >max_nb_pools %d\n",
246 num_pools, max_nb_pools);
251 * NIC queues are divided into pf queues and vmdq queues.
252 * There is assumption here all ports have the same configuration!
254 vmdq_queue_base = dev_info.vmdq_queue_base;
255 vmdq_pool_base = dev_info.vmdq_pool_base;
256 printf("vmdq queue base: %d pool base %d\n",
257 vmdq_queue_base, vmdq_pool_base);
258 if (vmdq_pool_base == 0) {
259 num_vmdq_queues = dev_info.max_rx_queues;
260 num_queues = dev_info.max_rx_queues;
261 if (num_tcs != num_vmdq_queues / num_pools) {
262 printf("nb_tcs %d is invalid considering with"
263 " nb_pools %d, nb_tcs * nb_pools should = %d\n",
264 num_tcs, num_pools, num_vmdq_queues);
268 queues_per_pool = dev_info.vmdq_queue_num /
269 dev_info.max_vmdq_pools;
270 if (num_tcs > queues_per_pool) {
271 printf("num_tcs %d > num of queues per pool %d\n",
272 num_tcs, queues_per_pool);
275 num_vmdq_queues = num_pools * queues_per_pool;
276 num_queues = vmdq_queue_base + num_vmdq_queues;
277 printf("Configured vmdq pool num: %u,"
278 " each vmdq pool has %u queues\n",
279 num_pools, queues_per_pool);
282 if (port >= rte_eth_dev_count())
285 retval = get_eth_conf(&port_conf);
290 * Though in this example, all queues including pf queues are setup.
291 * This is because VMDQ queues doesn't always start from zero, and the
292 * PMD layer doesn't support selectively initialising part of rx/tx
295 retval = rte_eth_dev_configure(port, num_queues, num_queues, &port_conf);
299 retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rxRingSize,
303 if (RTE_MAX(rxRingSize, txRingSize) >
304 RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, RTE_TEST_TX_DESC_DEFAULT)) {
305 printf("Mbuf pool has an insufficient size for port %u.\n",
310 for (q = 0; q < num_queues; q++) {
311 retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
312 rte_eth_dev_socket_id(port),
316 printf("initialize rx queue %d failed\n", q);
321 for (q = 0; q < num_queues; q++) {
322 retval = rte_eth_tx_queue_setup(port, q, txRingSize,
323 rte_eth_dev_socket_id(port),
326 printf("initialize tx queue %d failed\n", q);
331 retval = rte_eth_dev_start(port);
333 printf("port %d start failed\n", port);
337 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
338 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
339 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
341 vmdq_ports_eth_addr[port].addr_bytes[0],
342 vmdq_ports_eth_addr[port].addr_bytes[1],
343 vmdq_ports_eth_addr[port].addr_bytes[2],
344 vmdq_ports_eth_addr[port].addr_bytes[3],
345 vmdq_ports_eth_addr[port].addr_bytes[4],
346 vmdq_ports_eth_addr[port].addr_bytes[5]);
348 /* Set mac for each pool.*/
349 for (q = 0; q < num_pools; q++) {
350 struct ether_addr mac;
352 mac = pool_addr_template;
353 mac.addr_bytes[4] = port;
354 mac.addr_bytes[5] = q;
355 printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
357 mac.addr_bytes[0], mac.addr_bytes[1],
358 mac.addr_bytes[2], mac.addr_bytes[3],
359 mac.addr_bytes[4], mac.addr_bytes[5]);
360 retval = rte_eth_dev_mac_addr_add(port, &mac,
363 printf("mac addr add failed at pool %d\n", q);
371 /* Check num_pools parameter and set it if OK*/
373 vmdq_parse_num_pools(const char *q_arg)
378 /* parse number string */
379 n = strtol(q_arg, &end, 10);
380 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
382 if (n != 16 && n != 32)
385 num_pools = ETH_16_POOLS;
387 num_pools = ETH_32_POOLS;
392 /* Check num_tcs parameter and set it if OK*/
394 vmdq_parse_num_tcs(const char *q_arg)
399 /* parse number string */
400 n = strtol(q_arg, &end, 10);
401 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
404 if (n != 4 && n != 8)
415 parse_portmask(const char *portmask)
420 /* parse hexadecimal string */
421 pm = strtoul(portmask, &end, 16);
422 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
433 vmdq_usage(const char *prgname)
435 printf("%s [EAL options] -- -p PORTMASK]\n"
436 " --nb-pools NP: number of pools (32 default, 16)\n"
437 " --nb-tcs NP: number of TCs (4 default, 8)\n"
438 " --enable-rss: enable RSS (disabled by default)\n",
442 /* Parse the argument (num_pools) given in the command line of the application */
444 vmdq_parse_args(int argc, char **argv)
449 const char *prgname = argv[0];
450 static struct option long_option[] = {
451 {"nb-pools", required_argument, NULL, 0},
452 {"nb-tcs", required_argument, NULL, 0},
453 {"enable-rss", 0, NULL, 0},
457 /* Parse command line */
458 while ((opt = getopt_long(argc, argv, "p:", long_option,
459 &option_index)) != EOF) {
463 enabled_port_mask = parse_portmask(optarg);
464 if (enabled_port_mask == 0) {
465 printf("invalid portmask\n");
471 if (!strcmp(long_option[option_index].name, "nb-pools")) {
472 if (vmdq_parse_num_pools(optarg) == -1) {
473 printf("invalid number of pools\n");
478 if (!strcmp(long_option[option_index].name, "nb-tcs")) {
479 if (vmdq_parse_num_tcs(optarg) == -1) {
480 printf("invalid number of tcs\n");
485 if (!strcmp(long_option[option_index].name, "enable-rss"))
495 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
496 if (enabled_port_mask & (1 << i))
497 ports[num_ports++] = (uint8_t)i;
500 if (num_ports < 2 || num_ports % 2) {
501 printf("Current enabled port number is %u,"
502 " but it should be even and at least 2\n", num_ports);
510 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
512 struct ether_hdr *eth;
515 eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
517 /* 02:00:00:00:00:xx */
518 tmp = ð->d_addr.addr_bytes[0];
519 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
522 ether_addr_copy(&vmdq_ports_eth_addr[dst_port], ð->s_addr);
525 /* When we receive a HUP signal, print out our stats */
527 sighup_handler(int signum)
529 unsigned q = vmdq_queue_base;
531 for (; q < num_queues; q++) {
532 if (q % (num_vmdq_queues / num_pools) == 0)
533 printf("\nPool %u: ", (q - vmdq_queue_base) /
534 (num_vmdq_queues / num_pools));
535 printf("%lu ", rxPackets[q]);
537 printf("\nFinished handling signal %d\n", signum);
541 * Main thread that does the work, reading from INPUT_PORT
542 * and writing to OUTPUT_PORT
545 lcore_main(void *arg)
547 const uintptr_t core_num = (uintptr_t)arg;
548 const unsigned num_cores = rte_lcore_count();
549 uint16_t startQueue, endQueue;
551 const uint16_t quot = (uint16_t)(num_vmdq_queues / num_cores);
552 const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
556 if (core_num < remainder) {
557 startQueue = (uint16_t)(core_num * (quot + 1));
558 endQueue = (uint16_t)(startQueue + quot + 1);
560 startQueue = (uint16_t)(core_num * quot + remainder);
561 endQueue = (uint16_t)(startQueue + quot);
564 startQueue = (uint16_t)(core_num * quot);
565 endQueue = (uint16_t)(startQueue + quot);
568 /* vmdq queue idx doesn't always start from zero.*/
569 startQueue += vmdq_queue_base;
570 endQueue += vmdq_queue_base;
571 printf("Core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_num,
572 rte_lcore_id(), startQueue, endQueue - 1);
574 if (startQueue == endQueue) {
575 printf("lcore %u has nothing to do\n", (unsigned)core_num);
580 struct rte_mbuf *buf[MAX_PKT_BURST];
581 const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
582 for (p = 0; p < num_ports; p++) {
583 const uint8_t src = ports[p];
584 const uint8_t dst = ports[p ^ 1]; /* 0 <-> 1, 2 <-> 3 etc */
586 if ((src == INVALID_PORT_ID) || (dst == INVALID_PORT_ID))
589 for (q = startQueue; q < endQueue; q++) {
590 const uint16_t rxCount = rte_eth_rx_burst(src,
593 if (unlikely(rxCount == 0))
596 rxPackets[q] += rxCount;
598 for (i = 0; i < rxCount; i++)
599 update_mac_address(buf[i], dst);
601 const uint16_t txCount = rte_eth_tx_burst(dst,
603 if (txCount != rxCount) {
604 for (i = txCount; i < rxCount; i++)
605 rte_pktmbuf_free(buf[i]);
613 * Update the global var NUM_PORTS and array PORTS according to system ports number
614 * and return valid ports number
616 static unsigned check_ports_num(unsigned nb_ports)
618 unsigned valid_num_ports = num_ports;
621 if (num_ports > nb_ports) {
622 printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
623 num_ports, nb_ports);
624 num_ports = nb_ports;
627 for (portid = 0; portid < num_ports; portid++) {
628 if (ports[portid] >= nb_ports) {
629 printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
630 ports[portid], (nb_ports - 1));
631 ports[portid] = INVALID_PORT_ID;
635 return valid_num_ports;
639 /* Main function, does initialisation and calls the per-lcore functions */
641 main(int argc, char *argv[])
644 struct rte_mempool *mbuf_pool;
648 unsigned nb_ports, valid_num_ports;
651 signal(SIGHUP, sighup_handler);
654 ret = rte_eal_init(argc, argv);
656 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
660 /* parse app arguments */
661 ret = vmdq_parse_args(argc, argv);
663 rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
665 cores = rte_lcore_count();
666 if ((cores & (cores - 1)) != 0 || cores > RTE_MAX_LCORE) {
667 rte_exit(EXIT_FAILURE,"This program can only run on an even"
668 " number of cores(1-%d)\n\n", RTE_MAX_LCORE);
671 nb_ports = rte_eth_dev_count();
674 * Update the global var NUM_PORTS and global array PORTS
675 * and get value of var VALID_NUM_PORTS according to system ports number
677 valid_num_ports = check_ports_num(nb_ports);
679 if (valid_num_ports < 2 || valid_num_ports % 2) {
680 printf("Current valid ports number is %u\n", valid_num_ports);
681 rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
684 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
685 NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
686 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
687 if (mbuf_pool == NULL)
688 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
690 /* initialize all ports */
691 for (portid = 0; portid < nb_ports; portid++) {
692 /* skip ports that are not enabled */
693 if ((enabled_port_mask & (1 << portid)) == 0) {
694 printf("\nSkipping disabled port %d\n", portid);
697 if (port_init(portid, mbuf_pool) != 0)
698 rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
701 /* call lcore_main() on every slave lcore */
703 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
704 rte_eal_remote_launch(lcore_main, (void*)i++, lcore_id);
706 /* call on master too */
707 (void) lcore_main((void*)i);