4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_common.h>
48 #include <rte_memory.h>
49 #include <rte_memcpy.h>
50 #include <rte_memzone.h>
52 #include <rte_per_lcore.h>
53 #include <rte_launch.h>
54 #include <rte_atomic.h>
55 #include <rte_cycles.h>
56 #include <rte_prefetch.h>
57 #include <rte_lcore.h>
58 #include <rte_per_lcore.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_interrupts.h>
62 #include <rte_random.h>
63 #include <rte_debug.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
67 #include <rte_mempool.h>
69 #include <rte_memcpy.h>
71 #define MAX_QUEUES 1024
73 * 1024 queues require to meet the needs of a large number of vmdq_pools.
74 * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
76 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
77 RTE_TEST_TX_DESC_DEFAULT))
78 #define MBUF_CACHE_SIZE 64
80 #define MAX_PKT_BURST 32
83 * Configurable number of RX/TX ring descriptors
85 #define RTE_TEST_RX_DESC_DEFAULT 128
86 #define RTE_TEST_TX_DESC_DEFAULT 512
88 #define INVALID_PORT_ID 0xFF
90 /* mask of enabled ports */
91 static uint32_t enabled_port_mask;
93 /* number of pools (if user does not specify any, 8 by default */
94 static uint32_t num_queues = 8;
95 static uint32_t num_pools = 8;
97 /* empty vmdq configuration structure. Filled in programatically */
98 static const struct rte_eth_conf vmdq_conf_default = {
100 .mq_mode = ETH_MQ_RX_VMDQ_ONLY,
102 .header_split = 0, /**< Header Split disabled */
103 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
104 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
105 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
109 .mq_mode = ETH_MQ_TX_NONE,
113 * should be overridden separately in code with
117 .nb_queue_pools = ETH_8_POOLS,
118 .enable_default_pool = 0,
121 .pool_map = {{0, 0},},
126 static unsigned lcore_ids[RTE_MAX_LCORE];
127 static uint8_t ports[RTE_MAX_ETHPORTS];
128 static unsigned num_ports; /**< The number of ports specified in command line */
130 /* array used for printing out statistics */
131 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
133 const uint16_t vlan_tags[] = {
134 0, 1, 2, 3, 4, 5, 6, 7,
135 8, 9, 10, 11, 12, 13, 14, 15,
136 16, 17, 18, 19, 20, 21, 22, 23,
137 24, 25, 26, 27, 28, 29, 30, 31,
138 32, 33, 34, 35, 36, 37, 38, 39,
139 40, 41, 42, 43, 44, 45, 46, 47,
140 48, 49, 50, 51, 52, 53, 54, 55,
141 56, 57, 58, 59, 60, 61, 62, 63,
143 const uint16_t num_vlans = RTE_DIM(vlan_tags);
144 static uint16_t num_pf_queues, num_vmdq_queues;
145 static uint16_t vmdq_pool_base, vmdq_queue_base;
146 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
147 static struct ether_addr pool_addr_template = {
148 .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
151 /* ethernet addresses of ports */
152 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
154 #define MAX_QUEUE_NUM_10G 128
155 #define MAX_QUEUE_NUM_1G 8
156 #define MAX_POOL_MAP_NUM_10G 64
157 #define MAX_POOL_MAP_NUM_1G 32
158 #define MAX_POOL_NUM_10G 64
159 #define MAX_POOL_NUM_1G 8
161 * Builds up the correct configuration for vmdq based on the vlan tags array
162 * given above, and determine the queue number and pool map number according to
166 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools)
168 struct rte_eth_vmdq_rx_conf conf;
171 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
172 conf.nb_pool_maps = num_pools;
173 conf.enable_default_pool = 0;
174 conf.default_pool = 0; /* set explicit value, even if not used */
176 for (i = 0; i < conf.nb_pool_maps; i++) {
177 conf.pool_map[i].vlan_id = vlan_tags[i];
178 conf.pool_map[i].pools = (1UL << (i % num_pools));
181 (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
182 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf,
183 sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
188 * Initialises a given port using global settings and with the rx buffers
189 * coming from the mbuf_pool passed as parameter
192 port_init(uint8_t port, struct rte_mempool *mbuf_pool)
194 struct rte_eth_dev_info dev_info;
195 struct rte_eth_rxconf *rxconf;
196 struct rte_eth_conf port_conf;
197 uint16_t rxRings, txRings;
198 const uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT, txRingSize = RTE_TEST_TX_DESC_DEFAULT;
201 uint16_t queues_per_pool;
202 uint32_t max_nb_pools;
205 * The max pool number from dev_info will be used to validate the pool
206 * number specified in cmd line
208 rte_eth_dev_info_get(port, &dev_info);
209 max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
211 * We allow to process part of VMDQ pools specified by num_pools in
214 if (num_pools > max_nb_pools) {
215 printf("num_pools %d >max_nb_pools %d\n",
216 num_pools, max_nb_pools);
219 retval = get_eth_conf(&port_conf, max_nb_pools);
224 * NIC queues are divided into pf queues and vmdq queues.
226 /* There is assumption here all ports have the same configuration! */
227 num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
228 queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
229 num_vmdq_queues = num_pools * queues_per_pool;
230 num_queues = num_pf_queues + num_vmdq_queues;
231 vmdq_queue_base = dev_info.vmdq_queue_base;
232 vmdq_pool_base = dev_info.vmdq_pool_base;
234 printf("pf queue num: %u, configured vmdq pool num: %u,"
235 " each vmdq pool has %u queues\n",
236 num_pf_queues, num_pools, queues_per_pool);
237 printf("vmdq queue base: %d pool base %d\n",
238 vmdq_queue_base, vmdq_pool_base);
239 if (port >= rte_eth_dev_count())
243 * Though in this example, we only receive packets from the first queue
244 * of each pool and send packets through first rte_lcore_count() tx
245 * queues of vmdq queues, all queues including pf queues are setup.
246 * This is because VMDQ queues doesn't always start from zero, and the
247 * PMD layer doesn't support selectively initialising part of rx/tx
250 rxRings = (uint16_t)dev_info.max_rx_queues;
251 txRings = (uint16_t)dev_info.max_tx_queues;
252 retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
256 rte_eth_dev_info_get(port, &dev_info);
257 rxconf = &dev_info.default_rxconf;
258 rxconf->rx_drop_en = 1;
259 for (q = 0; q < rxRings; q++) {
260 retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
261 rte_eth_dev_socket_id(port),
265 printf("initialise rx queue %d failed\n", q);
270 for (q = 0; q < txRings; q++) {
271 retval = rte_eth_tx_queue_setup(port, q, txRingSize,
272 rte_eth_dev_socket_id(port),
275 printf("initialise tx queue %d failed\n", q);
280 retval = rte_eth_dev_start(port);
282 printf("port %d start failed\n", port);
286 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
287 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
288 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
290 vmdq_ports_eth_addr[port].addr_bytes[0],
291 vmdq_ports_eth_addr[port].addr_bytes[1],
292 vmdq_ports_eth_addr[port].addr_bytes[2],
293 vmdq_ports_eth_addr[port].addr_bytes[3],
294 vmdq_ports_eth_addr[port].addr_bytes[4],
295 vmdq_ports_eth_addr[port].addr_bytes[5]);
298 * Set mac for each pool.
299 * There is no default mac for the pools in i40.
300 * Removes this after i40e fixes this issue.
302 for (q = 0; q < num_pools; q++) {
303 struct ether_addr mac;
304 mac = pool_addr_template;
305 mac.addr_bytes[4] = port;
306 mac.addr_bytes[5] = q;
307 printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
309 mac.addr_bytes[0], mac.addr_bytes[1],
310 mac.addr_bytes[2], mac.addr_bytes[3],
311 mac.addr_bytes[4], mac.addr_bytes[5]);
312 retval = rte_eth_dev_mac_addr_add(port, &mac,
315 printf("mac addr add failed at pool %d\n", q);
323 /* Check num_pools parameter and set it if OK*/
325 vmdq_parse_num_pools(const char *q_arg)
330 /* parse number string */
331 n = strtol(q_arg, &end, 10);
332 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
335 if (num_pools > num_vlans) {
336 printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans);
347 parse_portmask(const char *portmask)
352 /* parse hexadecimal string */
353 pm = strtoul(portmask, &end, 16);
354 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
365 vmdq_usage(const char *prgname)
367 printf("%s [EAL options] -- -p PORTMASK]\n"
368 " --nb-pools NP: number of pools\n",
372 /* Parse the argument (num_pools) given in the command line of the application */
374 vmdq_parse_args(int argc, char **argv)
379 const char *prgname = argv[0];
380 static struct option long_option[] = {
381 {"nb-pools", required_argument, NULL, 0},
385 /* Parse command line */
386 while ((opt = getopt_long(argc, argv, "p:", long_option,
387 &option_index)) != EOF) {
391 enabled_port_mask = parse_portmask(optarg);
392 if (enabled_port_mask == 0) {
393 printf("invalid portmask\n");
399 if (vmdq_parse_num_pools(optarg) == -1) {
400 printf("invalid number of pools\n");
412 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
413 if (enabled_port_mask & (1 << i))
414 ports[num_ports++] = (uint8_t)i;
417 if (num_ports < 2 || num_ports % 2) {
418 printf("Current enabled port number is %u,"
419 "but it should be even and at least 2\n", num_ports);
427 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
429 struct ether_hdr *eth;
432 eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
434 /* 02:00:00:00:00:xx */
435 tmp = ð->d_addr.addr_bytes[0];
436 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
439 ether_addr_copy(&vmdq_ports_eth_addr[dst_port], ð->s_addr);
442 /* When we receive a HUP signal, print out our stats */
444 sighup_handler(int signum)
447 for (q = 0; q < num_queues; q++) {
448 if (q % (num_queues/num_pools) == 0)
449 printf("\nPool %u: ", q/(num_queues/num_pools));
450 printf("%lu ", rxPackets[q]);
452 printf("\nFinished handling signal %d\n", signum);
456 * Main thread that does the work, reading from INPUT_PORT
457 * and writing to OUTPUT_PORT
460 lcore_main(__attribute__((__unused__)) void *dummy)
462 const uint16_t lcore_id = (uint16_t)rte_lcore_id();
463 const uint16_t num_cores = (uint16_t)rte_lcore_count();
464 uint16_t core_id = 0;
465 uint16_t startQueue, endQueue;
467 const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
469 for (i = 0; i < num_cores; i++)
470 if (lcore_ids[i] == lcore_id) {
475 if (remainder != 0) {
476 if (core_id < remainder) {
477 startQueue = (uint16_t)(core_id *
478 (num_vmdq_queues / num_cores + 1));
479 endQueue = (uint16_t)(startQueue +
480 (num_vmdq_queues / num_cores) + 1);
482 startQueue = (uint16_t)(core_id *
483 (num_vmdq_queues / num_cores) +
485 endQueue = (uint16_t)(startQueue +
486 (num_vmdq_queues / num_cores));
489 startQueue = (uint16_t)(core_id *
490 (num_vmdq_queues / num_cores));
491 endQueue = (uint16_t)(startQueue +
492 (num_vmdq_queues / num_cores));
495 /* vmdq queue idx doesn't always start from zero.*/
496 startQueue += vmdq_queue_base;
497 endQueue += vmdq_queue_base;
498 printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id,
499 (unsigned)lcore_id, startQueue, endQueue - 1);
501 if (startQueue == endQueue) {
502 printf("lcore %u has nothing to do\n", lcore_id);
507 struct rte_mbuf *buf[MAX_PKT_BURST];
508 const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
510 for (p = 0; p < num_ports; p++) {
511 const uint8_t sport = ports[p];
512 /* 0 <-> 1, 2 <-> 3 etc */
513 const uint8_t dport = ports[p ^ 1];
514 if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID))
517 for (q = startQueue; q < endQueue; q++) {
518 const uint16_t rxCount = rte_eth_rx_burst(sport,
521 if (unlikely(rxCount == 0))
524 rxPackets[q] += rxCount;
526 for (i = 0; i < rxCount; i++)
527 update_mac_address(buf[i], dport);
529 const uint16_t txCount = rte_eth_tx_burst(dport,
530 vmdq_queue_base + core_id,
534 if (txCount != rxCount) {
535 for (i = txCount; i < rxCount; i++)
536 rte_pktmbuf_free(buf[i]);
544 * Update the global var NUM_PORTS and array PORTS according to system ports number
545 * and return valid ports number
547 static unsigned check_ports_num(unsigned nb_ports)
549 unsigned valid_num_ports = num_ports;
552 if (num_ports > nb_ports) {
553 printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
554 num_ports, nb_ports);
555 num_ports = nb_ports;
558 for (portid = 0; portid < num_ports; portid++) {
559 if (ports[portid] >= nb_ports) {
560 printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
561 ports[portid], (nb_ports - 1));
562 ports[portid] = INVALID_PORT_ID;
566 return valid_num_ports;
569 /* Main function, does initialisation and calls the per-lcore functions */
571 main(int argc, char *argv[])
573 struct rte_mempool *mbuf_pool;
574 unsigned lcore_id, core_id = 0;
576 unsigned nb_ports, valid_num_ports;
579 signal(SIGHUP, sighup_handler);
582 ret = rte_eal_init(argc, argv);
584 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
588 /* parse app arguments */
589 ret = vmdq_parse_args(argc, argv);
591 rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
593 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
594 if (rte_lcore_is_enabled(lcore_id))
595 lcore_ids[core_id++] = lcore_id;
597 if (rte_lcore_count() > RTE_MAX_LCORE)
598 rte_exit(EXIT_FAILURE, "Not enough cores\n");
600 nb_ports = rte_eth_dev_count();
603 * Update the global var NUM_PORTS and global array PORTS
604 * and get value of var VALID_NUM_PORTS according to system ports number
606 valid_num_ports = check_ports_num(nb_ports);
608 if (valid_num_ports < 2 || valid_num_ports % 2) {
609 printf("Current valid ports number is %u\n", valid_num_ports);
610 rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
613 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
614 NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
615 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
616 if (mbuf_pool == NULL)
617 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
619 /* initialize all ports */
620 for (portid = 0; portid < nb_ports; portid++) {
621 /* skip ports that are not enabled */
622 if ((enabled_port_mask & (1 << portid)) == 0) {
623 printf("\nSkipping disabled port %d\n", portid);
626 if (port_init(portid, mbuf_pool) != 0)
627 rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
630 /* call lcore_main() on every lcore */
631 rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER);
632 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
633 if (rte_eal_wait_lcore(lcore_id) < 0)