4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_common.h>
48 #include <rte_memory.h>
49 #include <rte_memcpy.h>
51 #include <rte_launch.h>
52 #include <rte_atomic.h>
53 #include <rte_cycles.h>
54 #include <rte_prefetch.h>
55 #include <rte_lcore.h>
56 #include <rte_per_lcore.h>
57 #include <rte_branch_prediction.h>
58 #include <rte_interrupts.h>
59 #include <rte_random.h>
60 #include <rte_debug.h>
61 #include <rte_ether.h>
62 #include <rte_ethdev.h>
63 #include <rte_mempool.h>
66 #define MAX_QUEUES 1024
68 * 1024 queues require to meet the needs of a large number of vmdq_pools.
69 * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
71 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
72 RTE_TEST_TX_DESC_DEFAULT))
73 #define MBUF_CACHE_SIZE 64
75 #define MAX_PKT_BURST 32
78 * Configurable number of RX/TX ring descriptors
80 #define RTE_TEST_RX_DESC_DEFAULT 128
81 #define RTE_TEST_TX_DESC_DEFAULT 512
83 #define INVALID_PORT_ID 0xFF
85 /* mask of enabled ports */
86 static uint32_t enabled_port_mask;
88 /* number of pools (if user does not specify any, 8 by default */
89 static uint32_t num_queues = 8;
90 static uint32_t num_pools = 8;
92 /* empty vmdq configuration structure. Filled in programatically */
93 static const struct rte_eth_conf vmdq_conf_default = {
95 .mq_mode = ETH_MQ_RX_VMDQ_ONLY,
97 .header_split = 0, /**< Header Split disabled */
98 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
99 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
100 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
104 .mq_mode = ETH_MQ_TX_NONE,
108 * should be overridden separately in code with
112 .nb_queue_pools = ETH_8_POOLS,
113 .enable_default_pool = 0,
116 .pool_map = {{0, 0},},
121 static unsigned lcore_ids[RTE_MAX_LCORE];
122 static uint16_t ports[RTE_MAX_ETHPORTS];
123 static unsigned num_ports; /**< The number of ports specified in command line */
125 /* array used for printing out statistics */
126 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
128 const uint16_t vlan_tags[] = {
129 0, 1, 2, 3, 4, 5, 6, 7,
130 8, 9, 10, 11, 12, 13, 14, 15,
131 16, 17, 18, 19, 20, 21, 22, 23,
132 24, 25, 26, 27, 28, 29, 30, 31,
133 32, 33, 34, 35, 36, 37, 38, 39,
134 40, 41, 42, 43, 44, 45, 46, 47,
135 48, 49, 50, 51, 52, 53, 54, 55,
136 56, 57, 58, 59, 60, 61, 62, 63,
138 const uint16_t num_vlans = RTE_DIM(vlan_tags);
139 static uint16_t num_pf_queues, num_vmdq_queues;
140 static uint16_t vmdq_pool_base, vmdq_queue_base;
141 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
142 static struct ether_addr pool_addr_template = {
143 .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
146 /* ethernet addresses of ports */
147 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
149 #define MAX_QUEUE_NUM_10G 128
150 #define MAX_QUEUE_NUM_1G 8
151 #define MAX_POOL_MAP_NUM_10G 64
152 #define MAX_POOL_MAP_NUM_1G 32
153 #define MAX_POOL_NUM_10G 64
154 #define MAX_POOL_NUM_1G 8
156 * Builds up the correct configuration for vmdq based on the vlan tags array
157 * given above, and determine the queue number and pool map number according to
161 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools)
163 struct rte_eth_vmdq_rx_conf conf;
166 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
167 conf.nb_pool_maps = num_pools;
168 conf.enable_default_pool = 0;
169 conf.default_pool = 0; /* set explicit value, even if not used */
171 for (i = 0; i < conf.nb_pool_maps; i++) {
172 conf.pool_map[i].vlan_id = vlan_tags[i];
173 conf.pool_map[i].pools = (1UL << (i % num_pools));
176 (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
177 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf,
178 sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
183 * Initialises a given port using global settings and with the rx buffers
184 * coming from the mbuf_pool passed as parameter
187 port_init(uint16_t port, struct rte_mempool *mbuf_pool)
189 struct rte_eth_dev_info dev_info;
190 struct rte_eth_rxconf *rxconf;
191 struct rte_eth_conf port_conf;
192 uint16_t rxRings, txRings;
193 uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT;
194 uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT;
197 uint16_t queues_per_pool;
198 uint32_t max_nb_pools;
201 * The max pool number from dev_info will be used to validate the pool
202 * number specified in cmd line
204 rte_eth_dev_info_get(port, &dev_info);
205 max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
207 * We allow to process part of VMDQ pools specified by num_pools in
210 if (num_pools > max_nb_pools) {
211 printf("num_pools %d >max_nb_pools %d\n",
212 num_pools, max_nb_pools);
215 retval = get_eth_conf(&port_conf, max_nb_pools);
220 * NIC queues are divided into pf queues and vmdq queues.
222 /* There is assumption here all ports have the same configuration! */
223 num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
224 queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
225 num_vmdq_queues = num_pools * queues_per_pool;
226 num_queues = num_pf_queues + num_vmdq_queues;
227 vmdq_queue_base = dev_info.vmdq_queue_base;
228 vmdq_pool_base = dev_info.vmdq_pool_base;
230 printf("pf queue num: %u, configured vmdq pool num: %u,"
231 " each vmdq pool has %u queues\n",
232 num_pf_queues, num_pools, queues_per_pool);
233 printf("vmdq queue base: %d pool base %d\n",
234 vmdq_queue_base, vmdq_pool_base);
235 if (port >= rte_eth_dev_count())
239 * Though in this example, we only receive packets from the first queue
240 * of each pool and send packets through first rte_lcore_count() tx
241 * queues of vmdq queues, all queues including pf queues are setup.
242 * This is because VMDQ queues doesn't always start from zero, and the
243 * PMD layer doesn't support selectively initialising part of rx/tx
246 rxRings = (uint16_t)dev_info.max_rx_queues;
247 txRings = (uint16_t)dev_info.max_tx_queues;
248 retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
252 retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rxRingSize,
256 if (RTE_MAX(rxRingSize, txRingSize) > RTE_MAX(RTE_TEST_RX_DESC_DEFAULT,
257 RTE_TEST_TX_DESC_DEFAULT)) {
258 printf("Mbuf pool has an insufficient size for port %u.\n",
263 rte_eth_dev_info_get(port, &dev_info);
264 rxconf = &dev_info.default_rxconf;
265 rxconf->rx_drop_en = 1;
266 for (q = 0; q < rxRings; q++) {
267 retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
268 rte_eth_dev_socket_id(port),
272 printf("initialise rx queue %d failed\n", q);
277 for (q = 0; q < txRings; q++) {
278 retval = rte_eth_tx_queue_setup(port, q, txRingSize,
279 rte_eth_dev_socket_id(port),
282 printf("initialise tx queue %d failed\n", q);
287 retval = rte_eth_dev_start(port);
289 printf("port %d start failed\n", port);
293 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
294 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
295 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
297 vmdq_ports_eth_addr[port].addr_bytes[0],
298 vmdq_ports_eth_addr[port].addr_bytes[1],
299 vmdq_ports_eth_addr[port].addr_bytes[2],
300 vmdq_ports_eth_addr[port].addr_bytes[3],
301 vmdq_ports_eth_addr[port].addr_bytes[4],
302 vmdq_ports_eth_addr[port].addr_bytes[5]);
305 * Set mac for each pool.
306 * There is no default mac for the pools in i40.
307 * Removes this after i40e fixes this issue.
309 for (q = 0; q < num_pools; q++) {
310 struct ether_addr mac;
311 mac = pool_addr_template;
312 mac.addr_bytes[4] = port;
313 mac.addr_bytes[5] = q;
314 printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
316 mac.addr_bytes[0], mac.addr_bytes[1],
317 mac.addr_bytes[2], mac.addr_bytes[3],
318 mac.addr_bytes[4], mac.addr_bytes[5]);
319 retval = rte_eth_dev_mac_addr_add(port, &mac,
322 printf("mac addr add failed at pool %d\n", q);
330 /* Check num_pools parameter and set it if OK*/
332 vmdq_parse_num_pools(const char *q_arg)
337 /* parse number string */
338 n = strtol(q_arg, &end, 10);
339 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
342 if (num_pools > num_vlans) {
343 printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans);
354 parse_portmask(const char *portmask)
359 /* parse hexadecimal string */
360 pm = strtoul(portmask, &end, 16);
361 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
372 vmdq_usage(const char *prgname)
374 printf("%s [EAL options] -- -p PORTMASK]\n"
375 " --nb-pools NP: number of pools\n",
379 /* Parse the argument (num_pools) given in the command line of the application */
381 vmdq_parse_args(int argc, char **argv)
386 const char *prgname = argv[0];
387 static struct option long_option[] = {
388 {"nb-pools", required_argument, NULL, 0},
392 /* Parse command line */
393 while ((opt = getopt_long(argc, argv, "p:", long_option,
394 &option_index)) != EOF) {
398 enabled_port_mask = parse_portmask(optarg);
399 if (enabled_port_mask == 0) {
400 printf("invalid portmask\n");
406 if (vmdq_parse_num_pools(optarg) == -1) {
407 printf("invalid number of pools\n");
419 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
420 if (enabled_port_mask & (1 << i))
421 ports[num_ports++] = (uint8_t)i;
424 if (num_ports < 2 || num_ports % 2) {
425 printf("Current enabled port number is %u,"
426 "but it should be even and at least 2\n", num_ports);
434 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
436 struct ether_hdr *eth;
439 eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
441 /* 02:00:00:00:00:xx */
442 tmp = ð->d_addr.addr_bytes[0];
443 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
446 ether_addr_copy(&vmdq_ports_eth_addr[dst_port], ð->s_addr);
449 /* When we receive a HUP signal, print out our stats */
451 sighup_handler(int signum)
454 for (q = 0; q < num_queues; q++) {
455 if (q % (num_queues/num_pools) == 0)
456 printf("\nPool %u: ", q/(num_queues/num_pools));
457 printf("%lu ", rxPackets[q]);
459 printf("\nFinished handling signal %d\n", signum);
463 * Main thread that does the work, reading from INPUT_PORT
464 * and writing to OUTPUT_PORT
467 lcore_main(__attribute__((__unused__)) void *dummy)
469 const uint16_t lcore_id = (uint16_t)rte_lcore_id();
470 const uint16_t num_cores = (uint16_t)rte_lcore_count();
471 uint16_t core_id = 0;
472 uint16_t startQueue, endQueue;
474 const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
476 for (i = 0; i < num_cores; i++)
477 if (lcore_ids[i] == lcore_id) {
482 if (remainder != 0) {
483 if (core_id < remainder) {
484 startQueue = (uint16_t)(core_id *
485 (num_vmdq_queues / num_cores + 1));
486 endQueue = (uint16_t)(startQueue +
487 (num_vmdq_queues / num_cores) + 1);
489 startQueue = (uint16_t)(core_id *
490 (num_vmdq_queues / num_cores) +
492 endQueue = (uint16_t)(startQueue +
493 (num_vmdq_queues / num_cores));
496 startQueue = (uint16_t)(core_id *
497 (num_vmdq_queues / num_cores));
498 endQueue = (uint16_t)(startQueue +
499 (num_vmdq_queues / num_cores));
502 /* vmdq queue idx doesn't always start from zero.*/
503 startQueue += vmdq_queue_base;
504 endQueue += vmdq_queue_base;
505 printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id,
506 (unsigned)lcore_id, startQueue, endQueue - 1);
508 if (startQueue == endQueue) {
509 printf("lcore %u has nothing to do\n", lcore_id);
514 struct rte_mbuf *buf[MAX_PKT_BURST];
515 const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
517 for (p = 0; p < num_ports; p++) {
518 const uint8_t sport = ports[p];
519 /* 0 <-> 1, 2 <-> 3 etc */
520 const uint8_t dport = ports[p ^ 1];
521 if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID))
524 for (q = startQueue; q < endQueue; q++) {
525 const uint16_t rxCount = rte_eth_rx_burst(sport,
528 if (unlikely(rxCount == 0))
531 rxPackets[q] += rxCount;
533 for (i = 0; i < rxCount; i++)
534 update_mac_address(buf[i], dport);
536 const uint16_t txCount = rte_eth_tx_burst(dport,
537 vmdq_queue_base + core_id,
541 if (txCount != rxCount) {
542 for (i = txCount; i < rxCount; i++)
543 rte_pktmbuf_free(buf[i]);
551 * Update the global var NUM_PORTS and array PORTS according to system ports number
552 * and return valid ports number
554 static unsigned check_ports_num(unsigned nb_ports)
556 unsigned valid_num_ports = num_ports;
559 if (num_ports > nb_ports) {
560 printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
561 num_ports, nb_ports);
562 num_ports = nb_ports;
565 for (portid = 0; portid < num_ports; portid++) {
566 if (ports[portid] >= nb_ports) {
567 printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
568 ports[portid], (nb_ports - 1));
569 ports[portid] = INVALID_PORT_ID;
573 return valid_num_ports;
576 /* Main function, does initialisation and calls the per-lcore functions */
578 main(int argc, char *argv[])
580 struct rte_mempool *mbuf_pool;
581 unsigned lcore_id, core_id = 0;
583 unsigned nb_ports, valid_num_ports;
586 signal(SIGHUP, sighup_handler);
589 ret = rte_eal_init(argc, argv);
591 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
595 /* parse app arguments */
596 ret = vmdq_parse_args(argc, argv);
598 rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
600 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
601 if (rte_lcore_is_enabled(lcore_id))
602 lcore_ids[core_id++] = lcore_id;
604 if (rte_lcore_count() > RTE_MAX_LCORE)
605 rte_exit(EXIT_FAILURE, "Not enough cores\n");
607 nb_ports = rte_eth_dev_count();
610 * Update the global var NUM_PORTS and global array PORTS
611 * and get value of var VALID_NUM_PORTS according to system ports number
613 valid_num_ports = check_ports_num(nb_ports);
615 if (valid_num_ports < 2 || valid_num_ports % 2) {
616 printf("Current valid ports number is %u\n", valid_num_ports);
617 rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
620 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
621 NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
622 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
623 if (mbuf_pool == NULL)
624 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
626 /* initialize all ports */
627 for (portid = 0; portid < nb_ports; portid++) {
628 /* skip ports that are not enabled */
629 if ((enabled_port_mask & (1 << portid)) == 0) {
630 printf("\nSkipping disabled port %d\n", portid);
633 if (port_init(portid, mbuf_pool) != 0)
634 rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
637 /* call lcore_main() on every lcore */
638 rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER);
639 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
640 if (rte_eal_wait_lcore(lcore_id) < 0)