4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_common.h>
48 #include <rte_memory.h>
49 #include <rte_memcpy.h>
51 #include <rte_launch.h>
52 #include <rte_atomic.h>
53 #include <rte_cycles.h>
54 #include <rte_prefetch.h>
55 #include <rte_lcore.h>
56 #include <rte_per_lcore.h>
57 #include <rte_branch_prediction.h>
58 #include <rte_interrupts.h>
60 #include <rte_random.h>
61 #include <rte_debug.h>
62 #include <rte_ether.h>
63 #include <rte_ethdev.h>
64 #include <rte_mempool.h>
67 #define MAX_QUEUES 1024
69 * 1024 queues require to meet the needs of a large number of vmdq_pools.
70 * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
72 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
73 RTE_TEST_TX_DESC_DEFAULT))
74 #define MBUF_CACHE_SIZE 64
76 #define MAX_PKT_BURST 32
79 * Configurable number of RX/TX ring descriptors
81 #define RTE_TEST_RX_DESC_DEFAULT 128
82 #define RTE_TEST_TX_DESC_DEFAULT 512
84 #define INVALID_PORT_ID 0xFF
86 /* mask of enabled ports */
87 static uint32_t enabled_port_mask;
89 /* number of pools (if user does not specify any, 8 by default */
90 static uint32_t num_queues = 8;
91 static uint32_t num_pools = 8;
93 /* empty vmdq configuration structure. Filled in programatically */
94 static const struct rte_eth_conf vmdq_conf_default = {
96 .mq_mode = ETH_MQ_RX_VMDQ_ONLY,
98 .header_split = 0, /**< Header Split disabled */
99 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
100 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
101 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
105 .mq_mode = ETH_MQ_TX_NONE,
109 * should be overridden separately in code with
113 .nb_queue_pools = ETH_8_POOLS,
114 .enable_default_pool = 0,
117 .pool_map = {{0, 0},},
122 static unsigned lcore_ids[RTE_MAX_LCORE];
123 static uint16_t ports[RTE_MAX_ETHPORTS];
124 static unsigned num_ports; /**< The number of ports specified in command line */
126 /* array used for printing out statistics */
127 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
129 const uint16_t vlan_tags[] = {
130 0, 1, 2, 3, 4, 5, 6, 7,
131 8, 9, 10, 11, 12, 13, 14, 15,
132 16, 17, 18, 19, 20, 21, 22, 23,
133 24, 25, 26, 27, 28, 29, 30, 31,
134 32, 33, 34, 35, 36, 37, 38, 39,
135 40, 41, 42, 43, 44, 45, 46, 47,
136 48, 49, 50, 51, 52, 53, 54, 55,
137 56, 57, 58, 59, 60, 61, 62, 63,
139 const uint16_t num_vlans = RTE_DIM(vlan_tags);
140 static uint16_t num_pf_queues, num_vmdq_queues;
141 static uint16_t vmdq_pool_base, vmdq_queue_base;
142 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
143 static struct ether_addr pool_addr_template = {
144 .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
147 /* ethernet addresses of ports */
148 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
150 #define MAX_QUEUE_NUM_10G 128
151 #define MAX_QUEUE_NUM_1G 8
152 #define MAX_POOL_MAP_NUM_10G 64
153 #define MAX_POOL_MAP_NUM_1G 32
154 #define MAX_POOL_NUM_10G 64
155 #define MAX_POOL_NUM_1G 8
157 * Builds up the correct configuration for vmdq based on the vlan tags array
158 * given above, and determine the queue number and pool map number according to
162 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools)
164 struct rte_eth_vmdq_rx_conf conf;
167 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
168 conf.nb_pool_maps = num_pools;
169 conf.enable_default_pool = 0;
170 conf.default_pool = 0; /* set explicit value, even if not used */
172 for (i = 0; i < conf.nb_pool_maps; i++) {
173 conf.pool_map[i].vlan_id = vlan_tags[i];
174 conf.pool_map[i].pools = (1UL << (i % num_pools));
177 (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
178 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf,
179 sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
184 * Initialises a given port using global settings and with the rx buffers
185 * coming from the mbuf_pool passed as parameter
188 port_init(uint16_t port, struct rte_mempool *mbuf_pool)
190 struct rte_eth_dev_info dev_info;
191 struct rte_eth_rxconf *rxconf;
192 struct rte_eth_conf port_conf;
193 uint16_t rxRings, txRings;
194 uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT;
195 uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT;
198 uint16_t queues_per_pool;
199 uint32_t max_nb_pools;
202 * The max pool number from dev_info will be used to validate the pool
203 * number specified in cmd line
205 rte_eth_dev_info_get(port, &dev_info);
206 max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
208 * We allow to process part of VMDQ pools specified by num_pools in
211 if (num_pools > max_nb_pools) {
212 printf("num_pools %d >max_nb_pools %d\n",
213 num_pools, max_nb_pools);
216 retval = get_eth_conf(&port_conf, max_nb_pools);
221 * NIC queues are divided into pf queues and vmdq queues.
223 /* There is assumption here all ports have the same configuration! */
224 num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
225 queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
226 num_vmdq_queues = num_pools * queues_per_pool;
227 num_queues = num_pf_queues + num_vmdq_queues;
228 vmdq_queue_base = dev_info.vmdq_queue_base;
229 vmdq_pool_base = dev_info.vmdq_pool_base;
231 printf("pf queue num: %u, configured vmdq pool num: %u,"
232 " each vmdq pool has %u queues\n",
233 num_pf_queues, num_pools, queues_per_pool);
234 printf("vmdq queue base: %d pool base %d\n",
235 vmdq_queue_base, vmdq_pool_base);
236 if (port >= rte_eth_dev_count())
240 * Though in this example, we only receive packets from the first queue
241 * of each pool and send packets through first rte_lcore_count() tx
242 * queues of vmdq queues, all queues including pf queues are setup.
243 * This is because VMDQ queues doesn't always start from zero, and the
244 * PMD layer doesn't support selectively initialising part of rx/tx
247 rxRings = (uint16_t)dev_info.max_rx_queues;
248 txRings = (uint16_t)dev_info.max_tx_queues;
249 retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
253 retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rxRingSize,
257 if (RTE_MAX(rxRingSize, txRingSize) > RTE_MAX(RTE_TEST_RX_DESC_DEFAULT,
258 RTE_TEST_TX_DESC_DEFAULT)) {
259 printf("Mbuf pool has an insufficient size for port %u.\n",
264 rte_eth_dev_info_get(port, &dev_info);
265 rxconf = &dev_info.default_rxconf;
266 rxconf->rx_drop_en = 1;
267 for (q = 0; q < rxRings; q++) {
268 retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
269 rte_eth_dev_socket_id(port),
273 printf("initialise rx queue %d failed\n", q);
278 for (q = 0; q < txRings; q++) {
279 retval = rte_eth_tx_queue_setup(port, q, txRingSize,
280 rte_eth_dev_socket_id(port),
283 printf("initialise tx queue %d failed\n", q);
288 retval = rte_eth_dev_start(port);
290 printf("port %d start failed\n", port);
294 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
295 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
296 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
298 vmdq_ports_eth_addr[port].addr_bytes[0],
299 vmdq_ports_eth_addr[port].addr_bytes[1],
300 vmdq_ports_eth_addr[port].addr_bytes[2],
301 vmdq_ports_eth_addr[port].addr_bytes[3],
302 vmdq_ports_eth_addr[port].addr_bytes[4],
303 vmdq_ports_eth_addr[port].addr_bytes[5]);
306 * Set mac for each pool.
307 * There is no default mac for the pools in i40.
308 * Removes this after i40e fixes this issue.
310 for (q = 0; q < num_pools; q++) {
311 struct ether_addr mac;
312 mac = pool_addr_template;
313 mac.addr_bytes[4] = port;
314 mac.addr_bytes[5] = q;
315 printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
317 mac.addr_bytes[0], mac.addr_bytes[1],
318 mac.addr_bytes[2], mac.addr_bytes[3],
319 mac.addr_bytes[4], mac.addr_bytes[5]);
320 retval = rte_eth_dev_mac_addr_add(port, &mac,
323 printf("mac addr add failed at pool %d\n", q);
331 /* Check num_pools parameter and set it if OK*/
333 vmdq_parse_num_pools(const char *q_arg)
338 /* parse number string */
339 n = strtol(q_arg, &end, 10);
340 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
343 if (num_pools > num_vlans) {
344 printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans);
355 parse_portmask(const char *portmask)
360 /* parse hexadecimal string */
361 pm = strtoul(portmask, &end, 16);
362 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
373 vmdq_usage(const char *prgname)
375 printf("%s [EAL options] -- -p PORTMASK]\n"
376 " --nb-pools NP: number of pools\n",
380 /* Parse the argument (num_pools) given in the command line of the application */
382 vmdq_parse_args(int argc, char **argv)
387 const char *prgname = argv[0];
388 static struct option long_option[] = {
389 {"nb-pools", required_argument, NULL, 0},
393 /* Parse command line */
394 while ((opt = getopt_long(argc, argv, "p:", long_option,
395 &option_index)) != EOF) {
399 enabled_port_mask = parse_portmask(optarg);
400 if (enabled_port_mask == 0) {
401 printf("invalid portmask\n");
407 if (vmdq_parse_num_pools(optarg) == -1) {
408 printf("invalid number of pools\n");
420 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
421 if (enabled_port_mask & (1 << i))
422 ports[num_ports++] = (uint8_t)i;
425 if (num_ports < 2 || num_ports % 2) {
426 printf("Current enabled port number is %u,"
427 "but it should be even and at least 2\n", num_ports);
435 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
437 struct ether_hdr *eth;
440 eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
442 /* 02:00:00:00:00:xx */
443 tmp = ð->d_addr.addr_bytes[0];
444 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
447 ether_addr_copy(&vmdq_ports_eth_addr[dst_port], ð->s_addr);
450 /* When we receive a HUP signal, print out our stats */
452 sighup_handler(int signum)
455 for (q = 0; q < num_queues; q++) {
456 if (q % (num_queues/num_pools) == 0)
457 printf("\nPool %u: ", q/(num_queues/num_pools));
458 printf("%lu ", rxPackets[q]);
460 printf("\nFinished handling signal %d\n", signum);
464 * Main thread that does the work, reading from INPUT_PORT
465 * and writing to OUTPUT_PORT
468 lcore_main(__attribute__((__unused__)) void *dummy)
470 const uint16_t lcore_id = (uint16_t)rte_lcore_id();
471 const uint16_t num_cores = (uint16_t)rte_lcore_count();
472 uint16_t core_id = 0;
473 uint16_t startQueue, endQueue;
475 const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
477 for (i = 0; i < num_cores; i++)
478 if (lcore_ids[i] == lcore_id) {
483 if (remainder != 0) {
484 if (core_id < remainder) {
485 startQueue = (uint16_t)(core_id *
486 (num_vmdq_queues / num_cores + 1));
487 endQueue = (uint16_t)(startQueue +
488 (num_vmdq_queues / num_cores) + 1);
490 startQueue = (uint16_t)(core_id *
491 (num_vmdq_queues / num_cores) +
493 endQueue = (uint16_t)(startQueue +
494 (num_vmdq_queues / num_cores));
497 startQueue = (uint16_t)(core_id *
498 (num_vmdq_queues / num_cores));
499 endQueue = (uint16_t)(startQueue +
500 (num_vmdq_queues / num_cores));
503 /* vmdq queue idx doesn't always start from zero.*/
504 startQueue += vmdq_queue_base;
505 endQueue += vmdq_queue_base;
506 printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id,
507 (unsigned)lcore_id, startQueue, endQueue - 1);
509 if (startQueue == endQueue) {
510 printf("lcore %u has nothing to do\n", lcore_id);
515 struct rte_mbuf *buf[MAX_PKT_BURST];
516 const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
518 for (p = 0; p < num_ports; p++) {
519 const uint8_t sport = ports[p];
520 /* 0 <-> 1, 2 <-> 3 etc */
521 const uint8_t dport = ports[p ^ 1];
522 if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID))
525 for (q = startQueue; q < endQueue; q++) {
526 const uint16_t rxCount = rte_eth_rx_burst(sport,
529 if (unlikely(rxCount == 0))
532 rxPackets[q] += rxCount;
534 for (i = 0; i < rxCount; i++)
535 update_mac_address(buf[i], dport);
537 const uint16_t txCount = rte_eth_tx_burst(dport,
538 vmdq_queue_base + core_id,
542 if (txCount != rxCount) {
543 for (i = txCount; i < rxCount; i++)
544 rte_pktmbuf_free(buf[i]);
552 * Update the global var NUM_PORTS and array PORTS according to system ports number
553 * and return valid ports number
555 static unsigned check_ports_num(unsigned nb_ports)
557 unsigned valid_num_ports = num_ports;
560 if (num_ports > nb_ports) {
561 printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
562 num_ports, nb_ports);
563 num_ports = nb_ports;
566 for (portid = 0; portid < num_ports; portid++) {
567 if (ports[portid] >= nb_ports) {
568 printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
569 ports[portid], (nb_ports - 1));
570 ports[portid] = INVALID_PORT_ID;
574 return valid_num_ports;
577 /* Main function, does initialisation and calls the per-lcore functions */
579 main(int argc, char *argv[])
581 struct rte_mempool *mbuf_pool;
582 unsigned lcore_id, core_id = 0;
584 unsigned nb_ports, valid_num_ports;
587 signal(SIGHUP, sighup_handler);
590 ret = rte_eal_init(argc, argv);
592 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
596 /* parse app arguments */
597 ret = vmdq_parse_args(argc, argv);
599 rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
601 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
602 if (rte_lcore_is_enabled(lcore_id))
603 lcore_ids[core_id++] = lcore_id;
605 if (rte_lcore_count() > RTE_MAX_LCORE)
606 rte_exit(EXIT_FAILURE, "Not enough cores\n");
608 nb_ports = rte_eth_dev_count();
611 * Update the global var NUM_PORTS and global array PORTS
612 * and get value of var VALID_NUM_PORTS according to system ports number
614 valid_num_ports = check_ports_num(nb_ports);
616 if (valid_num_ports < 2 || valid_num_ports % 2) {
617 printf("Current valid ports number is %u\n", valid_num_ports);
618 rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
621 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
622 NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
623 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
624 if (mbuf_pool == NULL)
625 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
627 /* initialize all ports */
628 for (portid = 0; portid < nb_ports; portid++) {
629 /* skip ports that are not enabled */
630 if ((enabled_port_mask & (1 << portid)) == 0) {
631 printf("\nSkipping disabled port %d\n", portid);
634 if (port_init(portid, mbuf_pool) != 0)
635 rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
638 /* call lcore_main() on every lcore */
639 rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER);
640 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
641 if (rte_eal_wait_lcore(lcore_id) < 0)