4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_common.h>
48 #include <rte_memory.h>
49 #include <rte_memcpy.h>
50 #include <rte_memzone.h>
52 #include <rte_per_lcore.h>
53 #include <rte_launch.h>
54 #include <rte_atomic.h>
55 #include <rte_cycles.h>
56 #include <rte_prefetch.h>
57 #include <rte_lcore.h>
58 #include <rte_per_lcore.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_interrupts.h>
62 #include <rte_random.h>
63 #include <rte_debug.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
67 #include <rte_mempool.h>
69 #include <rte_memcpy.h>
71 #define MAX_QUEUES 1024
73 * 1024 queues require to meet the needs of a large number of vmdq_pools.
74 * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
76 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
77 RTE_TEST_TX_DESC_DEFAULT))
78 #define MBUF_CACHE_SIZE 64
80 #define MAX_PKT_BURST 32
83 * Configurable number of RX/TX ring descriptors
85 #define RTE_TEST_RX_DESC_DEFAULT 128
86 #define RTE_TEST_TX_DESC_DEFAULT 512
88 #define INVALID_PORT_ID 0xFF
90 /* mask of enabled ports */
91 static uint32_t enabled_port_mask;
93 /* number of pools (if user does not specify any, 8 by default */
94 static uint32_t num_queues = 8;
95 static uint32_t num_pools = 8;
97 /* empty vmdq configuration structure. Filled in programatically */
98 static const struct rte_eth_conf vmdq_conf_default = {
100 .mq_mode = ETH_MQ_RX_VMDQ_ONLY,
102 .header_split = 0, /**< Header Split disabled */
103 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
104 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
105 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
109 .mq_mode = ETH_MQ_TX_NONE,
113 * should be overridden separately in code with
117 .nb_queue_pools = ETH_8_POOLS,
118 .enable_default_pool = 0,
121 .pool_map = {{0, 0},},
126 static unsigned lcore_ids[RTE_MAX_LCORE];
127 static uint8_t ports[RTE_MAX_ETHPORTS];
128 static unsigned num_ports; /**< The number of ports specified in command line */
130 /* array used for printing out statistics */
131 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
133 const uint16_t vlan_tags[] = {
134 0, 1, 2, 3, 4, 5, 6, 7,
135 8, 9, 10, 11, 12, 13, 14, 15,
136 16, 17, 18, 19, 20, 21, 22, 23,
137 24, 25, 26, 27, 28, 29, 30, 31,
138 32, 33, 34, 35, 36, 37, 38, 39,
139 40, 41, 42, 43, 44, 45, 46, 47,
140 48, 49, 50, 51, 52, 53, 54, 55,
141 56, 57, 58, 59, 60, 61, 62, 63,
143 const uint16_t num_vlans = RTE_DIM(vlan_tags);
144 static uint16_t num_pf_queues, num_vmdq_queues;
145 static uint16_t vmdq_pool_base, vmdq_queue_base;
146 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
147 static struct ether_addr pool_addr_template = {
148 .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
151 /* ethernet addresses of ports */
152 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
154 #define MAX_QUEUE_NUM_10G 128
155 #define MAX_QUEUE_NUM_1G 8
156 #define MAX_POOL_MAP_NUM_10G 64
157 #define MAX_POOL_MAP_NUM_1G 32
158 #define MAX_POOL_NUM_10G 64
159 #define MAX_POOL_NUM_1G 8
161 * Builds up the correct configuration for vmdq based on the vlan tags array
162 * given above, and determine the queue number and pool map number according to
166 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools)
168 struct rte_eth_vmdq_rx_conf conf;
171 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
172 conf.nb_pool_maps = num_pools;
173 conf.enable_default_pool = 0;
174 conf.default_pool = 0; /* set explicit value, even if not used */
176 for (i = 0; i < conf.nb_pool_maps; i++) {
177 conf.pool_map[i].vlan_id = vlan_tags[i];
178 conf.pool_map[i].pools = (1UL << (i % num_pools));
181 (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
182 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf,
183 sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
188 * Initialises a given port using global settings and with the rx buffers
189 * coming from the mbuf_pool passed as parameter
192 port_init(uint8_t port, struct rte_mempool *mbuf_pool)
194 struct rte_eth_dev_info dev_info;
195 struct rte_eth_rxconf *rxconf;
196 struct rte_eth_conf port_conf;
197 uint16_t rxRings, txRings;
198 uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT;
199 uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT;
202 uint16_t queues_per_pool;
203 uint32_t max_nb_pools;
206 * The max pool number from dev_info will be used to validate the pool
207 * number specified in cmd line
209 rte_eth_dev_info_get(port, &dev_info);
210 max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
212 * We allow to process part of VMDQ pools specified by num_pools in
215 if (num_pools > max_nb_pools) {
216 printf("num_pools %d >max_nb_pools %d\n",
217 num_pools, max_nb_pools);
220 retval = get_eth_conf(&port_conf, max_nb_pools);
225 * NIC queues are divided into pf queues and vmdq queues.
227 /* There is assumption here all ports have the same configuration! */
228 num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
229 queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
230 num_vmdq_queues = num_pools * queues_per_pool;
231 num_queues = num_pf_queues + num_vmdq_queues;
232 vmdq_queue_base = dev_info.vmdq_queue_base;
233 vmdq_pool_base = dev_info.vmdq_pool_base;
235 printf("pf queue num: %u, configured vmdq pool num: %u,"
236 " each vmdq pool has %u queues\n",
237 num_pf_queues, num_pools, queues_per_pool);
238 printf("vmdq queue base: %d pool base %d\n",
239 vmdq_queue_base, vmdq_pool_base);
240 if (port >= rte_eth_dev_count())
244 * Though in this example, we only receive packets from the first queue
245 * of each pool and send packets through first rte_lcore_count() tx
246 * queues of vmdq queues, all queues including pf queues are setup.
247 * This is because VMDQ queues doesn't always start from zero, and the
248 * PMD layer doesn't support selectively initialising part of rx/tx
251 rxRings = (uint16_t)dev_info.max_rx_queues;
252 txRings = (uint16_t)dev_info.max_tx_queues;
253 retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
257 retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rxRingSize,
261 if (RTE_MAX(rxRingSize, txRingSize) > RTE_MAX(RTE_TEST_RX_DESC_DEFAULT,
262 RTE_TEST_TX_DESC_DEFAULT)) {
263 printf("Mbuf pool has an insufficient size for port %u.\n",
268 rte_eth_dev_info_get(port, &dev_info);
269 rxconf = &dev_info.default_rxconf;
270 rxconf->rx_drop_en = 1;
271 for (q = 0; q < rxRings; q++) {
272 retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
273 rte_eth_dev_socket_id(port),
277 printf("initialise rx queue %d failed\n", q);
282 for (q = 0; q < txRings; q++) {
283 retval = rte_eth_tx_queue_setup(port, q, txRingSize,
284 rte_eth_dev_socket_id(port),
287 printf("initialise tx queue %d failed\n", q);
292 retval = rte_eth_dev_start(port);
294 printf("port %d start failed\n", port);
298 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
299 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
300 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
302 vmdq_ports_eth_addr[port].addr_bytes[0],
303 vmdq_ports_eth_addr[port].addr_bytes[1],
304 vmdq_ports_eth_addr[port].addr_bytes[2],
305 vmdq_ports_eth_addr[port].addr_bytes[3],
306 vmdq_ports_eth_addr[port].addr_bytes[4],
307 vmdq_ports_eth_addr[port].addr_bytes[5]);
310 * Set mac for each pool.
311 * There is no default mac for the pools in i40.
312 * Removes this after i40e fixes this issue.
314 for (q = 0; q < num_pools; q++) {
315 struct ether_addr mac;
316 mac = pool_addr_template;
317 mac.addr_bytes[4] = port;
318 mac.addr_bytes[5] = q;
319 printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
321 mac.addr_bytes[0], mac.addr_bytes[1],
322 mac.addr_bytes[2], mac.addr_bytes[3],
323 mac.addr_bytes[4], mac.addr_bytes[5]);
324 retval = rte_eth_dev_mac_addr_add(port, &mac,
327 printf("mac addr add failed at pool %d\n", q);
335 /* Check num_pools parameter and set it if OK*/
337 vmdq_parse_num_pools(const char *q_arg)
342 /* parse number string */
343 n = strtol(q_arg, &end, 10);
344 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
347 if (num_pools > num_vlans) {
348 printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans);
359 parse_portmask(const char *portmask)
364 /* parse hexadecimal string */
365 pm = strtoul(portmask, &end, 16);
366 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
377 vmdq_usage(const char *prgname)
379 printf("%s [EAL options] -- -p PORTMASK]\n"
380 " --nb-pools NP: number of pools\n",
384 /* Parse the argument (num_pools) given in the command line of the application */
386 vmdq_parse_args(int argc, char **argv)
391 const char *prgname = argv[0];
392 static struct option long_option[] = {
393 {"nb-pools", required_argument, NULL, 0},
397 /* Parse command line */
398 while ((opt = getopt_long(argc, argv, "p:", long_option,
399 &option_index)) != EOF) {
403 enabled_port_mask = parse_portmask(optarg);
404 if (enabled_port_mask == 0) {
405 printf("invalid portmask\n");
411 if (vmdq_parse_num_pools(optarg) == -1) {
412 printf("invalid number of pools\n");
424 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
425 if (enabled_port_mask & (1 << i))
426 ports[num_ports++] = (uint8_t)i;
429 if (num_ports < 2 || num_ports % 2) {
430 printf("Current enabled port number is %u,"
431 "but it should be even and at least 2\n", num_ports);
439 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
441 struct ether_hdr *eth;
444 eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
446 /* 02:00:00:00:00:xx */
447 tmp = ð->d_addr.addr_bytes[0];
448 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
451 ether_addr_copy(&vmdq_ports_eth_addr[dst_port], ð->s_addr);
454 /* When we receive a HUP signal, print out our stats */
456 sighup_handler(int signum)
459 for (q = 0; q < num_queues; q++) {
460 if (q % (num_queues/num_pools) == 0)
461 printf("\nPool %u: ", q/(num_queues/num_pools));
462 printf("%lu ", rxPackets[q]);
464 printf("\nFinished handling signal %d\n", signum);
468 * Main thread that does the work, reading from INPUT_PORT
469 * and writing to OUTPUT_PORT
472 lcore_main(__attribute__((__unused__)) void *dummy)
474 const uint16_t lcore_id = (uint16_t)rte_lcore_id();
475 const uint16_t num_cores = (uint16_t)rte_lcore_count();
476 uint16_t core_id = 0;
477 uint16_t startQueue, endQueue;
479 const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
481 for (i = 0; i < num_cores; i++)
482 if (lcore_ids[i] == lcore_id) {
487 if (remainder != 0) {
488 if (core_id < remainder) {
489 startQueue = (uint16_t)(core_id *
490 (num_vmdq_queues / num_cores + 1));
491 endQueue = (uint16_t)(startQueue +
492 (num_vmdq_queues / num_cores) + 1);
494 startQueue = (uint16_t)(core_id *
495 (num_vmdq_queues / num_cores) +
497 endQueue = (uint16_t)(startQueue +
498 (num_vmdq_queues / num_cores));
501 startQueue = (uint16_t)(core_id *
502 (num_vmdq_queues / num_cores));
503 endQueue = (uint16_t)(startQueue +
504 (num_vmdq_queues / num_cores));
507 /* vmdq queue idx doesn't always start from zero.*/
508 startQueue += vmdq_queue_base;
509 endQueue += vmdq_queue_base;
510 printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id,
511 (unsigned)lcore_id, startQueue, endQueue - 1);
513 if (startQueue == endQueue) {
514 printf("lcore %u has nothing to do\n", lcore_id);
519 struct rte_mbuf *buf[MAX_PKT_BURST];
520 const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
522 for (p = 0; p < num_ports; p++) {
523 const uint8_t sport = ports[p];
524 /* 0 <-> 1, 2 <-> 3 etc */
525 const uint8_t dport = ports[p ^ 1];
526 if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID))
529 for (q = startQueue; q < endQueue; q++) {
530 const uint16_t rxCount = rte_eth_rx_burst(sport,
533 if (unlikely(rxCount == 0))
536 rxPackets[q] += rxCount;
538 for (i = 0; i < rxCount; i++)
539 update_mac_address(buf[i], dport);
541 const uint16_t txCount = rte_eth_tx_burst(dport,
542 vmdq_queue_base + core_id,
546 if (txCount != rxCount) {
547 for (i = txCount; i < rxCount; i++)
548 rte_pktmbuf_free(buf[i]);
556 * Update the global var NUM_PORTS and array PORTS according to system ports number
557 * and return valid ports number
559 static unsigned check_ports_num(unsigned nb_ports)
561 unsigned valid_num_ports = num_ports;
564 if (num_ports > nb_ports) {
565 printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
566 num_ports, nb_ports);
567 num_ports = nb_ports;
570 for (portid = 0; portid < num_ports; portid++) {
571 if (ports[portid] >= nb_ports) {
572 printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
573 ports[portid], (nb_ports - 1));
574 ports[portid] = INVALID_PORT_ID;
578 return valid_num_ports;
581 /* Main function, does initialisation and calls the per-lcore functions */
583 main(int argc, char *argv[])
585 struct rte_mempool *mbuf_pool;
586 unsigned lcore_id, core_id = 0;
588 unsigned nb_ports, valid_num_ports;
591 signal(SIGHUP, sighup_handler);
594 ret = rte_eal_init(argc, argv);
596 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
600 /* parse app arguments */
601 ret = vmdq_parse_args(argc, argv);
603 rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
605 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
606 if (rte_lcore_is_enabled(lcore_id))
607 lcore_ids[core_id++] = lcore_id;
609 if (rte_lcore_count() > RTE_MAX_LCORE)
610 rte_exit(EXIT_FAILURE, "Not enough cores\n");
612 nb_ports = rte_eth_dev_count();
615 * Update the global var NUM_PORTS and global array PORTS
616 * and get value of var VALID_NUM_PORTS according to system ports number
618 valid_num_ports = check_ports_num(nb_ports);
620 if (valid_num_ports < 2 || valid_num_ports % 2) {
621 printf("Current valid ports number is %u\n", valid_num_ports);
622 rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
625 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
626 NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
627 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
628 if (mbuf_pool == NULL)
629 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
631 /* initialize all ports */
632 for (portid = 0; portid < nb_ports; portid++) {
633 /* skip ports that are not enabled */
634 if ((enabled_port_mask & (1 << portid)) == 0) {
635 printf("\nSkipping disabled port %d\n", portid);
638 if (port_init(portid, mbuf_pool) != 0)
639 rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
642 /* call lcore_main() on every lcore */
643 rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER);
644 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
645 if (rte_eal_wait_lcore(lcore_id) < 0)