4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_common.h>
48 #include <rte_memory.h>
49 #include <rte_memcpy.h>
50 #include <rte_memzone.h>
52 #include <rte_launch.h>
53 #include <rte_atomic.h>
54 #include <rte_cycles.h>
55 #include <rte_prefetch.h>
56 #include <rte_lcore.h>
57 #include <rte_per_lcore.h>
58 #include <rte_branch_prediction.h>
59 #include <rte_interrupts.h>
61 #include <rte_random.h>
62 #include <rte_debug.h>
63 #include <rte_ether.h>
64 #include <rte_ethdev.h>
65 #include <rte_mempool.h>
68 #define MAX_QUEUES 1024
70 * 1024 queues require to meet the needs of a large number of vmdq_pools.
71 * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
73 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
74 RTE_TEST_TX_DESC_DEFAULT))
75 #define MBUF_CACHE_SIZE 64
77 #define MAX_PKT_BURST 32
80 * Configurable number of RX/TX ring descriptors
82 #define RTE_TEST_RX_DESC_DEFAULT 128
83 #define RTE_TEST_TX_DESC_DEFAULT 512
85 #define INVALID_PORT_ID 0xFF
87 /* mask of enabled ports */
88 static uint32_t enabled_port_mask;
90 /* number of pools (if user does not specify any, 8 by default */
91 static uint32_t num_queues = 8;
92 static uint32_t num_pools = 8;
94 /* empty vmdq configuration structure. Filled in programatically */
95 static const struct rte_eth_conf vmdq_conf_default = {
97 .mq_mode = ETH_MQ_RX_VMDQ_ONLY,
99 .header_split = 0, /**< Header Split disabled */
100 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
101 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
102 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
106 .mq_mode = ETH_MQ_TX_NONE,
110 * should be overridden separately in code with
114 .nb_queue_pools = ETH_8_POOLS,
115 .enable_default_pool = 0,
118 .pool_map = {{0, 0},},
123 static unsigned lcore_ids[RTE_MAX_LCORE];
124 static uint8_t ports[RTE_MAX_ETHPORTS];
125 static unsigned num_ports; /**< The number of ports specified in command line */
127 /* array used for printing out statistics */
128 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
130 const uint16_t vlan_tags[] = {
131 0, 1, 2, 3, 4, 5, 6, 7,
132 8, 9, 10, 11, 12, 13, 14, 15,
133 16, 17, 18, 19, 20, 21, 22, 23,
134 24, 25, 26, 27, 28, 29, 30, 31,
135 32, 33, 34, 35, 36, 37, 38, 39,
136 40, 41, 42, 43, 44, 45, 46, 47,
137 48, 49, 50, 51, 52, 53, 54, 55,
138 56, 57, 58, 59, 60, 61, 62, 63,
140 const uint16_t num_vlans = RTE_DIM(vlan_tags);
141 static uint16_t num_pf_queues, num_vmdq_queues;
142 static uint16_t vmdq_pool_base, vmdq_queue_base;
143 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
144 static struct ether_addr pool_addr_template = {
145 .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
148 /* ethernet addresses of ports */
149 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
151 #define MAX_QUEUE_NUM_10G 128
152 #define MAX_QUEUE_NUM_1G 8
153 #define MAX_POOL_MAP_NUM_10G 64
154 #define MAX_POOL_MAP_NUM_1G 32
155 #define MAX_POOL_NUM_10G 64
156 #define MAX_POOL_NUM_1G 8
158 * Builds up the correct configuration for vmdq based on the vlan tags array
159 * given above, and determine the queue number and pool map number according to
163 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools)
165 struct rte_eth_vmdq_rx_conf conf;
168 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
169 conf.nb_pool_maps = num_pools;
170 conf.enable_default_pool = 0;
171 conf.default_pool = 0; /* set explicit value, even if not used */
173 for (i = 0; i < conf.nb_pool_maps; i++) {
174 conf.pool_map[i].vlan_id = vlan_tags[i];
175 conf.pool_map[i].pools = (1UL << (i % num_pools));
178 (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
179 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf,
180 sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
185 * Initialises a given port using global settings and with the rx buffers
186 * coming from the mbuf_pool passed as parameter
189 port_init(uint8_t port, struct rte_mempool *mbuf_pool)
191 struct rte_eth_dev_info dev_info;
192 struct rte_eth_rxconf *rxconf;
193 struct rte_eth_conf port_conf;
194 uint16_t rxRings, txRings;
195 uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT;
196 uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT;
199 uint16_t queues_per_pool;
200 uint32_t max_nb_pools;
203 * The max pool number from dev_info will be used to validate the pool
204 * number specified in cmd line
206 rte_eth_dev_info_get(port, &dev_info);
207 max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
209 * We allow to process part of VMDQ pools specified by num_pools in
212 if (num_pools > max_nb_pools) {
213 printf("num_pools %d >max_nb_pools %d\n",
214 num_pools, max_nb_pools);
217 retval = get_eth_conf(&port_conf, max_nb_pools);
222 * NIC queues are divided into pf queues and vmdq queues.
224 /* There is assumption here all ports have the same configuration! */
225 num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
226 queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
227 num_vmdq_queues = num_pools * queues_per_pool;
228 num_queues = num_pf_queues + num_vmdq_queues;
229 vmdq_queue_base = dev_info.vmdq_queue_base;
230 vmdq_pool_base = dev_info.vmdq_pool_base;
232 printf("pf queue num: %u, configured vmdq pool num: %u,"
233 " each vmdq pool has %u queues\n",
234 num_pf_queues, num_pools, queues_per_pool);
235 printf("vmdq queue base: %d pool base %d\n",
236 vmdq_queue_base, vmdq_pool_base);
237 if (port >= rte_eth_dev_count())
241 * Though in this example, we only receive packets from the first queue
242 * of each pool and send packets through first rte_lcore_count() tx
243 * queues of vmdq queues, all queues including pf queues are setup.
244 * This is because VMDQ queues doesn't always start from zero, and the
245 * PMD layer doesn't support selectively initialising part of rx/tx
248 rxRings = (uint16_t)dev_info.max_rx_queues;
249 txRings = (uint16_t)dev_info.max_tx_queues;
250 retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
254 retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rxRingSize,
258 if (RTE_MAX(rxRingSize, txRingSize) > RTE_MAX(RTE_TEST_RX_DESC_DEFAULT,
259 RTE_TEST_TX_DESC_DEFAULT)) {
260 printf("Mbuf pool has an insufficient size for port %u.\n",
265 rte_eth_dev_info_get(port, &dev_info);
266 rxconf = &dev_info.default_rxconf;
267 rxconf->rx_drop_en = 1;
268 for (q = 0; q < rxRings; q++) {
269 retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
270 rte_eth_dev_socket_id(port),
274 printf("initialise rx queue %d failed\n", q);
279 for (q = 0; q < txRings; q++) {
280 retval = rte_eth_tx_queue_setup(port, q, txRingSize,
281 rte_eth_dev_socket_id(port),
284 printf("initialise tx queue %d failed\n", q);
289 retval = rte_eth_dev_start(port);
291 printf("port %d start failed\n", port);
295 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
296 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
297 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
299 vmdq_ports_eth_addr[port].addr_bytes[0],
300 vmdq_ports_eth_addr[port].addr_bytes[1],
301 vmdq_ports_eth_addr[port].addr_bytes[2],
302 vmdq_ports_eth_addr[port].addr_bytes[3],
303 vmdq_ports_eth_addr[port].addr_bytes[4],
304 vmdq_ports_eth_addr[port].addr_bytes[5]);
307 * Set mac for each pool.
308 * There is no default mac for the pools in i40.
309 * Removes this after i40e fixes this issue.
311 for (q = 0; q < num_pools; q++) {
312 struct ether_addr mac;
313 mac = pool_addr_template;
314 mac.addr_bytes[4] = port;
315 mac.addr_bytes[5] = q;
316 printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
318 mac.addr_bytes[0], mac.addr_bytes[1],
319 mac.addr_bytes[2], mac.addr_bytes[3],
320 mac.addr_bytes[4], mac.addr_bytes[5]);
321 retval = rte_eth_dev_mac_addr_add(port, &mac,
324 printf("mac addr add failed at pool %d\n", q);
332 /* Check num_pools parameter and set it if OK*/
334 vmdq_parse_num_pools(const char *q_arg)
339 /* parse number string */
340 n = strtol(q_arg, &end, 10);
341 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
344 if (num_pools > num_vlans) {
345 printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans);
356 parse_portmask(const char *portmask)
361 /* parse hexadecimal string */
362 pm = strtoul(portmask, &end, 16);
363 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
374 vmdq_usage(const char *prgname)
376 printf("%s [EAL options] -- -p PORTMASK]\n"
377 " --nb-pools NP: number of pools\n",
381 /* Parse the argument (num_pools) given in the command line of the application */
383 vmdq_parse_args(int argc, char **argv)
388 const char *prgname = argv[0];
389 static struct option long_option[] = {
390 {"nb-pools", required_argument, NULL, 0},
394 /* Parse command line */
395 while ((opt = getopt_long(argc, argv, "p:", long_option,
396 &option_index)) != EOF) {
400 enabled_port_mask = parse_portmask(optarg);
401 if (enabled_port_mask == 0) {
402 printf("invalid portmask\n");
408 if (vmdq_parse_num_pools(optarg) == -1) {
409 printf("invalid number of pools\n");
421 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
422 if (enabled_port_mask & (1 << i))
423 ports[num_ports++] = (uint8_t)i;
426 if (num_ports < 2 || num_ports % 2) {
427 printf("Current enabled port number is %u,"
428 "but it should be even and at least 2\n", num_ports);
436 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
438 struct ether_hdr *eth;
441 eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
443 /* 02:00:00:00:00:xx */
444 tmp = ð->d_addr.addr_bytes[0];
445 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
448 ether_addr_copy(&vmdq_ports_eth_addr[dst_port], ð->s_addr);
451 /* When we receive a HUP signal, print out our stats */
453 sighup_handler(int signum)
456 for (q = 0; q < num_queues; q++) {
457 if (q % (num_queues/num_pools) == 0)
458 printf("\nPool %u: ", q/(num_queues/num_pools));
459 printf("%lu ", rxPackets[q]);
461 printf("\nFinished handling signal %d\n", signum);
465 * Main thread that does the work, reading from INPUT_PORT
466 * and writing to OUTPUT_PORT
469 lcore_main(__attribute__((__unused__)) void *dummy)
471 const uint16_t lcore_id = (uint16_t)rte_lcore_id();
472 const uint16_t num_cores = (uint16_t)rte_lcore_count();
473 uint16_t core_id = 0;
474 uint16_t startQueue, endQueue;
476 const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
478 for (i = 0; i < num_cores; i++)
479 if (lcore_ids[i] == lcore_id) {
484 if (remainder != 0) {
485 if (core_id < remainder) {
486 startQueue = (uint16_t)(core_id *
487 (num_vmdq_queues / num_cores + 1));
488 endQueue = (uint16_t)(startQueue +
489 (num_vmdq_queues / num_cores) + 1);
491 startQueue = (uint16_t)(core_id *
492 (num_vmdq_queues / num_cores) +
494 endQueue = (uint16_t)(startQueue +
495 (num_vmdq_queues / num_cores));
498 startQueue = (uint16_t)(core_id *
499 (num_vmdq_queues / num_cores));
500 endQueue = (uint16_t)(startQueue +
501 (num_vmdq_queues / num_cores));
504 /* vmdq queue idx doesn't always start from zero.*/
505 startQueue += vmdq_queue_base;
506 endQueue += vmdq_queue_base;
507 printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id,
508 (unsigned)lcore_id, startQueue, endQueue - 1);
510 if (startQueue == endQueue) {
511 printf("lcore %u has nothing to do\n", lcore_id);
516 struct rte_mbuf *buf[MAX_PKT_BURST];
517 const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
519 for (p = 0; p < num_ports; p++) {
520 const uint8_t sport = ports[p];
521 /* 0 <-> 1, 2 <-> 3 etc */
522 const uint8_t dport = ports[p ^ 1];
523 if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID))
526 for (q = startQueue; q < endQueue; q++) {
527 const uint16_t rxCount = rte_eth_rx_burst(sport,
530 if (unlikely(rxCount == 0))
533 rxPackets[q] += rxCount;
535 for (i = 0; i < rxCount; i++)
536 update_mac_address(buf[i], dport);
538 const uint16_t txCount = rte_eth_tx_burst(dport,
539 vmdq_queue_base + core_id,
543 if (txCount != rxCount) {
544 for (i = txCount; i < rxCount; i++)
545 rte_pktmbuf_free(buf[i]);
553 * Update the global var NUM_PORTS and array PORTS according to system ports number
554 * and return valid ports number
556 static unsigned check_ports_num(unsigned nb_ports)
558 unsigned valid_num_ports = num_ports;
561 if (num_ports > nb_ports) {
562 printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
563 num_ports, nb_ports);
564 num_ports = nb_ports;
567 for (portid = 0; portid < num_ports; portid++) {
568 if (ports[portid] >= nb_ports) {
569 printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
570 ports[portid], (nb_ports - 1));
571 ports[portid] = INVALID_PORT_ID;
575 return valid_num_ports;
578 /* Main function, does initialisation and calls the per-lcore functions */
580 main(int argc, char *argv[])
582 struct rte_mempool *mbuf_pool;
583 unsigned lcore_id, core_id = 0;
585 unsigned nb_ports, valid_num_ports;
588 signal(SIGHUP, sighup_handler);
591 ret = rte_eal_init(argc, argv);
593 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
597 /* parse app arguments */
598 ret = vmdq_parse_args(argc, argv);
600 rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
602 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
603 if (rte_lcore_is_enabled(lcore_id))
604 lcore_ids[core_id++] = lcore_id;
606 if (rte_lcore_count() > RTE_MAX_LCORE)
607 rte_exit(EXIT_FAILURE, "Not enough cores\n");
609 nb_ports = rte_eth_dev_count();
612 * Update the global var NUM_PORTS and global array PORTS
613 * and get value of var VALID_NUM_PORTS according to system ports number
615 valid_num_ports = check_ports_num(nb_ports);
617 if (valid_num_ports < 2 || valid_num_ports % 2) {
618 printf("Current valid ports number is %u\n", valid_num_ports);
619 rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
622 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
623 NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
624 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
625 if (mbuf_pool == NULL)
626 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
628 /* initialize all ports */
629 for (portid = 0; portid < nb_ports; portid++) {
630 /* skip ports that are not enabled */
631 if ((enabled_port_mask & (1 << portid)) == 0) {
632 printf("\nSkipping disabled port %d\n", portid);
635 if (port_init(portid, mbuf_pool) != 0)
636 rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
639 /* call lcore_main() on every lcore */
640 rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER);
641 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
642 if (rte_eal_wait_lcore(lcore_id) < 0)