4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_common.h>
48 #include <rte_memory.h>
49 #include <rte_memcpy.h>
50 #include <rte_memzone.h>
51 #include <rte_tailq.h>
53 #include <rte_per_lcore.h>
54 #include <rte_launch.h>
55 #include <rte_atomic.h>
56 #include <rte_cycles.h>
57 #include <rte_prefetch.h>
58 #include <rte_lcore.h>
59 #include <rte_per_lcore.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_interrupts.h>
63 #include <rte_random.h>
64 #include <rte_debug.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
69 #include <rte_mempool.h>
71 #include <rte_memcpy.h>
73 #define MAX_QUEUES 128
75 * For 10 GbE, 128 queues require roughly
76 * 128*512 (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
78 #define NUM_MBUFS_PER_PORT (128*512)
79 #define MBUF_CACHE_SIZE 64
80 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
82 #define MAX_PKT_BURST 32
85 * Configurable number of RX/TX ring descriptors
87 #define RTE_TEST_RX_DESC_DEFAULT 128
88 #define RTE_TEST_TX_DESC_DEFAULT 512
90 #define INVALID_PORT_ID 0xFF
92 /* mask of enabled ports */
93 static uint32_t enabled_port_mask;
95 /* number of pools (if user does not specify any, 8 by default */
96 static uint32_t num_queues = 8;
97 static uint32_t num_pools = 8;
99 /* empty vmdq configuration structure. Filled in programatically */
100 static const struct rte_eth_conf vmdq_conf_default = {
102 .mq_mode = ETH_MQ_RX_VMDQ_ONLY,
104 .header_split = 0, /**< Header Split disabled */
105 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
106 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
107 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
111 .mq_mode = ETH_MQ_TX_NONE,
115 * should be overridden separately in code with
119 .nb_queue_pools = ETH_8_POOLS,
120 .enable_default_pool = 0,
123 .pool_map = {{0, 0},},
128 static unsigned lcore_ids[RTE_MAX_LCORE];
129 static uint8_t ports[RTE_MAX_ETHPORTS];
130 static unsigned num_ports; /**< The number of ports specified in command line */
132 /* array used for printing out statistics */
133 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
135 const uint16_t vlan_tags[] = {
136 0, 1, 2, 3, 4, 5, 6, 7,
137 8, 9, 10, 11, 12, 13, 14, 15,
138 16, 17, 18, 19, 20, 21, 22, 23,
139 24, 25, 26, 27, 28, 29, 30, 31,
140 32, 33, 34, 35, 36, 37, 38, 39,
141 40, 41, 42, 43, 44, 45, 46, 47,
142 48, 49, 50, 51, 52, 53, 54, 55,
143 56, 57, 58, 59, 60, 61, 62, 63,
145 const uint16_t num_vlans = RTE_DIM(vlan_tags);
146 static uint16_t num_pf_queues, num_vmdq_queues;
147 static uint16_t vmdq_pool_base, vmdq_queue_base;
148 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
149 static struct ether_addr pool_addr_template = {
150 .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
153 /* ethernet addresses of ports */
154 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
156 #define MAX_QUEUE_NUM_10G 128
157 #define MAX_QUEUE_NUM_1G 8
158 #define MAX_POOL_MAP_NUM_10G 64
159 #define MAX_POOL_MAP_NUM_1G 32
160 #define MAX_POOL_NUM_10G 64
161 #define MAX_POOL_NUM_1G 8
163 * Builds up the correct configuration for vmdq based on the vlan tags array
164 * given above, and determine the queue number and pool map number according to
168 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools)
170 struct rte_eth_vmdq_rx_conf conf;
173 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
174 conf.nb_pool_maps = num_pools;
175 conf.enable_default_pool = 0;
176 conf.default_pool = 0; /* set explicit value, even if not used */
178 for (i = 0; i < conf.nb_pool_maps; i++) {
179 conf.pool_map[i].vlan_id = vlan_tags[i];
180 conf.pool_map[i].pools = (1UL << (i % num_pools));
183 (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
184 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf,
185 sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
190 * Initialises a given port using global settings and with the rx buffers
191 * coming from the mbuf_pool passed as parameter
194 port_init(uint8_t port, struct rte_mempool *mbuf_pool)
196 struct rte_eth_dev_info dev_info;
197 struct rte_eth_rxconf *rxconf;
198 struct rte_eth_conf port_conf;
199 uint16_t rxRings, txRings;
200 const uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT, txRingSize = RTE_TEST_TX_DESC_DEFAULT;
203 uint16_t queues_per_pool;
204 uint32_t max_nb_pools;
207 * The max pool number from dev_info will be used to validate the pool
208 * number specified in cmd line
210 rte_eth_dev_info_get(port, &dev_info);
211 max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
213 * We allow to process part of VMDQ pools specified by num_pools in
216 if (num_pools > max_nb_pools) {
217 printf("num_pools %d >max_nb_pools %d\n",
218 num_pools, max_nb_pools);
221 retval = get_eth_conf(&port_conf, max_nb_pools);
226 * NIC queues are divided into pf queues and vmdq queues.
228 /* There is assumption here all ports have the same configuration! */
229 num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
230 queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
231 num_vmdq_queues = num_pools * queues_per_pool;
232 num_queues = num_pf_queues + num_vmdq_queues;
233 vmdq_queue_base = dev_info.vmdq_queue_base;
234 vmdq_pool_base = dev_info.vmdq_pool_base;
236 printf("pf queue num: %u, configured vmdq pool num: %u,"
237 " each vmdq pool has %u queues\n",
238 num_pf_queues, num_pools, queues_per_pool);
239 printf("vmdq queue base: %d pool base %d\n",
240 vmdq_queue_base, vmdq_pool_base);
241 if (port >= rte_eth_dev_count())
245 * Though in this example, we only receive packets from the first queue
246 * of each pool and send packets through first rte_lcore_count() tx
247 * queues of vmdq queues, all queues including pf queues are setup.
248 * This is because VMDQ queues doesn't always start from zero, and the
249 * PMD layer doesn't support selectively initialising part of rx/tx
252 rxRings = (uint16_t)dev_info.max_rx_queues;
253 txRings = (uint16_t)dev_info.max_tx_queues;
254 retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
258 rte_eth_dev_info_get(port, &dev_info);
259 rxconf = &dev_info.default_rxconf;
260 rxconf->rx_drop_en = 1;
261 for (q = 0; q < rxRings; q++) {
262 retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
263 rte_eth_dev_socket_id(port),
267 printf("initialise rx queue %d failed\n", q);
272 for (q = 0; q < txRings; q++) {
273 retval = rte_eth_tx_queue_setup(port, q, txRingSize,
274 rte_eth_dev_socket_id(port),
277 printf("initialise tx queue %d failed\n", q);
282 retval = rte_eth_dev_start(port);
284 printf("port %d start failed\n", port);
288 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
289 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
290 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
292 vmdq_ports_eth_addr[port].addr_bytes[0],
293 vmdq_ports_eth_addr[port].addr_bytes[1],
294 vmdq_ports_eth_addr[port].addr_bytes[2],
295 vmdq_ports_eth_addr[port].addr_bytes[3],
296 vmdq_ports_eth_addr[port].addr_bytes[4],
297 vmdq_ports_eth_addr[port].addr_bytes[5]);
300 * Set mac for each pool.
301 * There is no default mac for the pools in i40.
302 * Removes this after i40e fixes this issue.
304 for (q = 0; q < num_pools; q++) {
305 struct ether_addr mac;
306 mac = pool_addr_template;
307 mac.addr_bytes[4] = port;
308 mac.addr_bytes[5] = q;
309 printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
311 mac.addr_bytes[0], mac.addr_bytes[1],
312 mac.addr_bytes[2], mac.addr_bytes[3],
313 mac.addr_bytes[4], mac.addr_bytes[5]);
314 retval = rte_eth_dev_mac_addr_add(port, &mac,
317 printf("mac addr add failed at pool %d\n", q);
325 /* Check num_pools parameter and set it if OK*/
327 vmdq_parse_num_pools(const char *q_arg)
332 /* parse number string */
333 n = strtol(q_arg, &end, 10);
334 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
337 if (num_pools > num_vlans) {
338 printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans);
349 parse_portmask(const char *portmask)
354 /* parse hexadecimal string */
355 pm = strtoul(portmask, &end, 16);
356 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
367 vmdq_usage(const char *prgname)
369 printf("%s [EAL options] -- -p PORTMASK]\n"
370 " --nb-pools NP: number of pools\n",
374 /* Parse the argument (num_pools) given in the command line of the application */
376 vmdq_parse_args(int argc, char **argv)
381 const char *prgname = argv[0];
382 static struct option long_option[] = {
383 {"nb-pools", required_argument, NULL, 0},
387 /* Parse command line */
388 while ((opt = getopt_long(argc, argv, "p:", long_option,
389 &option_index)) != EOF) {
393 enabled_port_mask = parse_portmask(optarg);
394 if (enabled_port_mask == 0) {
395 printf("invalid portmask\n");
401 if (vmdq_parse_num_pools(optarg) == -1) {
402 printf("invalid number of pools\n");
414 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
415 if (enabled_port_mask & (1 << i))
416 ports[num_ports++] = (uint8_t)i;
419 if (num_ports < 2 || num_ports % 2) {
420 printf("Current enabled port number is %u,"
421 "but it should be even and at least 2\n", num_ports);
429 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
431 struct ether_hdr *eth;
434 eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
436 /* 02:00:00:00:00:xx */
437 tmp = ð->d_addr.addr_bytes[0];
438 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
441 ether_addr_copy(&vmdq_ports_eth_addr[dst_port], ð->s_addr);
444 /* When we receive a HUP signal, print out our stats */
446 sighup_handler(int signum)
449 for (q = 0; q < num_queues; q++) {
450 if (q % (num_queues/num_pools) == 0)
451 printf("\nPool %u: ", q/(num_queues/num_pools));
452 printf("%lu ", rxPackets[q]);
454 printf("\nFinished handling signal %d\n", signum);
458 * Main thread that does the work, reading from INPUT_PORT
459 * and writing to OUTPUT_PORT
462 lcore_main(__attribute__((__unused__)) void *dummy)
464 const uint16_t lcore_id = (uint16_t)rte_lcore_id();
465 const uint16_t num_cores = (uint16_t)rte_lcore_count();
466 uint16_t core_id = 0;
467 uint16_t startQueue, endQueue;
469 const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
471 for (i = 0; i < num_cores; i++)
472 if (lcore_ids[i] == lcore_id) {
477 if (remainder != 0) {
478 if (core_id < remainder) {
479 startQueue = (uint16_t)(core_id *
480 (num_vmdq_queues / num_cores + 1));
481 endQueue = (uint16_t)(startQueue +
482 (num_vmdq_queues / num_cores) + 1);
484 startQueue = (uint16_t)(core_id *
485 (num_vmdq_queues / num_cores) +
487 endQueue = (uint16_t)(startQueue +
488 (num_vmdq_queues / num_cores));
491 startQueue = (uint16_t)(core_id *
492 (num_vmdq_queues / num_cores));
493 endQueue = (uint16_t)(startQueue +
494 (num_vmdq_queues / num_cores));
497 /* vmdq queue idx doesn't always start from zero.*/
498 startQueue += vmdq_queue_base;
499 endQueue += vmdq_queue_base;
500 printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id,
501 (unsigned)lcore_id, startQueue, endQueue - 1);
503 if (startQueue == endQueue) {
504 printf("lcore %u has nothing to do\n", lcore_id);
509 struct rte_mbuf *buf[MAX_PKT_BURST];
510 const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
512 for (p = 0; p < num_ports; p++) {
513 const uint8_t sport = ports[p];
514 /* 0 <-> 1, 2 <-> 3 etc */
515 const uint8_t dport = ports[p ^ 1];
516 if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID))
519 for (q = startQueue; q < endQueue; q++) {
520 const uint16_t rxCount = rte_eth_rx_burst(sport,
523 if (unlikely(rxCount == 0))
526 rxPackets[q] += rxCount;
528 for (i = 0; i < rxCount; i++)
529 update_mac_address(buf[i], dport);
531 const uint16_t txCount = rte_eth_tx_burst(dport,
532 vmdq_queue_base + core_id,
536 if (txCount != rxCount) {
537 for (i = txCount; i < rxCount; i++)
538 rte_pktmbuf_free(buf[i]);
546 * Update the global var NUM_PORTS and array PORTS according to system ports number
547 * and return valid ports number
549 static unsigned check_ports_num(unsigned nb_ports)
551 unsigned valid_num_ports = num_ports;
554 if (num_ports > nb_ports) {
555 printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
556 num_ports, nb_ports);
557 num_ports = nb_ports;
560 for (portid = 0; portid < num_ports; portid++) {
561 if (ports[portid] >= nb_ports) {
562 printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
563 ports[portid], (nb_ports - 1));
564 ports[portid] = INVALID_PORT_ID;
568 return valid_num_ports;
571 /* Main function, does initialisation and calls the per-lcore functions */
573 main(int argc, char *argv[])
575 struct rte_mempool *mbuf_pool;
576 unsigned lcore_id, core_id = 0;
578 unsigned nb_ports, valid_num_ports;
581 signal(SIGHUP, sighup_handler);
584 ret = rte_eal_init(argc, argv);
586 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
590 /* parse app arguments */
591 ret = vmdq_parse_args(argc, argv);
593 rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
595 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
596 if (rte_lcore_is_enabled(lcore_id))
597 lcore_ids[core_id++] = lcore_id;
599 if (rte_lcore_count() > RTE_MAX_LCORE)
600 rte_exit(EXIT_FAILURE, "Not enough cores\n");
602 nb_ports = rte_eth_dev_count();
603 if (nb_ports > RTE_MAX_ETHPORTS)
604 nb_ports = RTE_MAX_ETHPORTS;
607 * Update the global var NUM_PORTS and global array PORTS
608 * and get value of var VALID_NUM_PORTS according to system ports number
610 valid_num_ports = check_ports_num(nb_ports);
612 if (valid_num_ports < 2 || valid_num_ports % 2) {
613 printf("Current valid ports number is %u\n", valid_num_ports);
614 rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
617 mbuf_pool = rte_mempool_create("MBUF_POOL", NUM_MBUFS_PER_PORT * nb_ports,
618 MBUF_SIZE, MBUF_CACHE_SIZE,
619 sizeof(struct rte_pktmbuf_pool_private),
620 rte_pktmbuf_pool_init, NULL,
621 rte_pktmbuf_init, NULL,
623 if (mbuf_pool == NULL)
624 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
626 /* initialize all ports */
627 for (portid = 0; portid < nb_ports; portid++) {
628 /* skip ports that are not enabled */
629 if ((enabled_port_mask & (1 << portid)) == 0) {
630 printf("\nSkipping disabled port %d\n", portid);
633 if (port_init(portid, mbuf_pool) != 0)
634 rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
637 /* call lcore_main() on every lcore */
638 rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER);
639 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
640 if (rte_eal_wait_lcore(lcore_id) < 0)