4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_common.h>
48 #include <rte_memory.h>
49 #include <rte_memcpy.h>
50 #include <rte_memzone.h>
51 #include <rte_tailq.h>
53 #include <rte_per_lcore.h>
54 #include <rte_launch.h>
55 #include <rte_atomic.h>
56 #include <rte_cycles.h>
57 #include <rte_prefetch.h>
58 #include <rte_lcore.h>
59 #include <rte_per_lcore.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_interrupts.h>
63 #include <rte_random.h>
64 #include <rte_debug.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
69 #include <rte_mempool.h>
71 #include <rte_memcpy.h>
75 #define MAX_QUEUES 128
77 * For 10 GbE, 128 queues require roughly
78 * 128*512 (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
80 #define NUM_MBUFS_PER_PORT (128*512)
81 #define MBUF_CACHE_SIZE 64
82 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
84 #define MAX_PKT_BURST 32
87 * Configurable number of RX/TX ring descriptors
89 #define RTE_TEST_RX_DESC_DEFAULT 128
90 #define RTE_TEST_TX_DESC_DEFAULT 512
92 #define INVALID_PORT_ID 0xFF
94 /* mask of enabled ports */
95 static uint32_t enabled_port_mask = 0;
97 /* number of pools (if user does not specify any, 8 by default */
98 static uint32_t num_queues = 8;
99 static uint32_t num_pools = 8;
101 /* empty vmdq configuration structure. Filled in programatically */
102 static const struct rte_eth_conf vmdq_conf_default = {
104 .mq_mode = ETH_MQ_RX_VMDQ_ONLY,
106 .header_split = 0, /**< Header Split disabled */
107 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
108 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
109 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
113 .mq_mode = ETH_MQ_TX_NONE,
117 * should be overridden separately in code with
121 .nb_queue_pools = ETH_8_POOLS,
122 .enable_default_pool = 0,
125 .pool_map = {{0, 0},},
130 static unsigned lcore_ids[RTE_MAX_LCORE];
131 static uint8_t ports[RTE_MAX_ETHPORTS];
132 static unsigned num_ports = 0; /**< The number of ports specified in command line */
134 /* array used for printing out statistics */
135 volatile unsigned long rxPackets[ MAX_QUEUES ] = {0};
137 const uint16_t vlan_tags[] = {
138 0, 1, 2, 3, 4, 5, 6, 7,
139 8, 9, 10, 11, 12, 13, 14, 15,
140 16, 17, 18, 19, 20, 21, 22, 23,
141 24, 25, 26, 27, 28, 29, 30, 31,
142 32, 33, 34, 35, 36, 37, 38, 39,
143 40, 41, 42, 43, 44, 45, 46, 47,
144 48, 49, 50, 51, 52, 53, 54, 55,
145 56, 57, 58, 59, 60, 61, 62, 63,
147 const uint16_t num_vlans = RTE_DIM(vlan_tags);
148 static uint16_t num_pf_queues, num_vmdq_queues;
149 static uint16_t vmdq_pool_base, vmdq_queue_base;
150 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
151 static struct ether_addr pool_addr_template = {
152 .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
155 /* ethernet addresses of ports */
156 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
158 #define MAX_QUEUE_NUM_10G 128
159 #define MAX_QUEUE_NUM_1G 8
160 #define MAX_POOL_MAP_NUM_10G 64
161 #define MAX_POOL_MAP_NUM_1G 32
162 #define MAX_POOL_NUM_10G 64
163 #define MAX_POOL_NUM_1G 8
164 /* Builds up the correct configuration for vmdq based on the vlan tags array
165 * given above, and determine the queue number and pool map number according to valid pool number */
167 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools)
169 struct rte_eth_vmdq_rx_conf conf;
172 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
173 conf.nb_pool_maps = num_pools;
174 conf.enable_default_pool = 0;
175 conf.default_pool = 0; /* set explicit value, even if not used */
177 for (i = 0; i < conf.nb_pool_maps; i++){
178 conf.pool_map[i].vlan_id = vlan_tags[ i ];
179 conf.pool_map[i].pools = (1UL << (i % num_pools));
182 (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
183 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf,
184 sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
189 * Initialises a given port using global settings and with the rx buffers
190 * coming from the mbuf_pool passed as parameter
193 port_init(uint8_t port, struct rte_mempool *mbuf_pool)
195 struct rte_eth_dev_info dev_info;
196 struct rte_eth_rxconf *rxconf;
197 struct rte_eth_conf port_conf;
198 uint16_t rxRings, txRings;
199 const uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT, txRingSize = RTE_TEST_TX_DESC_DEFAULT;
202 uint16_t queues_per_pool;
203 uint32_t max_nb_pools;
205 /* The max pool number from dev_info will be used to validate the pool number specified in cmd line */
206 rte_eth_dev_info_get (port, &dev_info);
207 max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
209 * We allow to process part of VMDQ pools specified by num_pools in
212 if (num_pools > max_nb_pools) {
213 printf("num_pools %d >max_nb_pools %d\n",
214 num_pools, max_nb_pools);
217 retval = get_eth_conf(&port_conf, max_nb_pools);
222 * NIC queues are divided into pf queues and vmdq queues.
224 /* There is assumption here all ports have the same configuration! */
225 num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
226 queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
227 num_vmdq_queues = num_pools * queues_per_pool;
228 num_queues = num_pf_queues + num_vmdq_queues;
229 vmdq_queue_base = dev_info.vmdq_queue_base;
230 vmdq_pool_base = dev_info.vmdq_pool_base;
232 printf("pf queue num: %u, configured vmdq pool num: %u,"
233 " each vmdq pool has %u queues\n",
234 num_pf_queues, num_pools, queues_per_pool);
235 printf("vmdq queue base: %d pool base %d\n",
236 vmdq_queue_base, vmdq_pool_base);
237 if (port >= rte_eth_dev_count()) return -1;
240 * Though in this example, we only receive packets from the first queue
241 * of each pool and send packets through first rte_lcore_count() tx
242 * queues of vmdq queues, all queues including pf queues are setup.
243 * This is because VMDQ queues doesn't always start from zero, and the
244 * PMD layer doesn't support selectively initialising part of rx/tx
247 rxRings = (uint16_t)dev_info.max_rx_queues;
248 txRings = (uint16_t)dev_info.max_tx_queues;
249 retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
253 rte_eth_dev_info_get(port, &dev_info);
254 rxconf = &dev_info.default_rxconf;
255 rxconf->rx_drop_en = 1;
256 for (q = 0; q < rxRings; q ++) {
257 retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
258 rte_eth_dev_socket_id(port),
262 printf("initialise rx queue %d failed\n", q);
267 for (q = 0; q < txRings; q ++) {
268 retval = rte_eth_tx_queue_setup(port, q, txRingSize,
269 rte_eth_dev_socket_id(port),
272 printf("initialise tx queue %d failed\n", q);
277 retval = rte_eth_dev_start(port);
279 printf("port %d start failed\n", port);
283 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
284 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
285 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
287 vmdq_ports_eth_addr[port].addr_bytes[0],
288 vmdq_ports_eth_addr[port].addr_bytes[1],
289 vmdq_ports_eth_addr[port].addr_bytes[2],
290 vmdq_ports_eth_addr[port].addr_bytes[3],
291 vmdq_ports_eth_addr[port].addr_bytes[4],
292 vmdq_ports_eth_addr[port].addr_bytes[5]);
295 * Set mac for each pool.
296 * There is no default mac for the pools in i40.
297 * Removes this after i40e fixes this issue.
299 for (q = 0; q < num_pools; q++) {
300 struct ether_addr mac;
301 mac = pool_addr_template;
302 mac.addr_bytes[4] = port;
303 mac.addr_bytes[5] = q;
304 printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
306 mac.addr_bytes[0], mac.addr_bytes[1],
307 mac.addr_bytes[2], mac.addr_bytes[3],
308 mac.addr_bytes[4], mac.addr_bytes[5]);
309 retval = rte_eth_dev_mac_addr_add(port, &mac,
312 printf("mac addr add failed at pool %d\n", q);
320 /* Check num_pools parameter and set it if OK*/
322 vmdq_parse_num_pools(const char *q_arg)
327 /* parse number string */
328 n = strtol(q_arg, &end, 10);
329 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
332 if (num_pools > num_vlans) {
333 printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans);
344 parse_portmask(const char *portmask)
349 /* parse hexadecimal string */
350 pm = strtoul(portmask, &end, 16);
351 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
362 vmdq_usage(const char *prgname)
364 printf("%s [EAL options] -- -p PORTMASK]\n"
365 " --nb-pools NP: number of pools\n",
369 /* Parse the argument (num_pools) given in the command line of the application */
371 vmdq_parse_args(int argc, char **argv)
376 const char *prgname = argv[0];
377 static struct option long_option[] = {
378 {"nb-pools", required_argument, NULL, 0},
382 /* Parse command line */
383 while ((opt = getopt_long(argc, argv, "p:",long_option,&option_index)) != EOF) {
387 enabled_port_mask = parse_portmask(optarg);
388 if (enabled_port_mask == 0) {
389 printf("invalid portmask\n");
395 if (vmdq_parse_num_pools(optarg) == -1){
396 printf("invalid number of pools\n");
408 for(i = 0; i < RTE_MAX_ETHPORTS; i++) {
409 if (enabled_port_mask & (1 << i))
410 ports[num_ports++] = (uint8_t)i;
413 if (num_ports < 2 || num_ports % 2) {
414 printf("Current enabled port number is %u,"
415 "but it should be even and at least 2\n",num_ports);
423 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
425 struct ether_hdr *eth;
428 eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
430 /* 02:00:00:00:00:xx */
431 tmp = ð->d_addr.addr_bytes[0];
432 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
435 ether_addr_copy(&vmdq_ports_eth_addr[dst_port], ð->s_addr);
438 #ifndef RTE_EXEC_ENV_BAREMETAL
439 /* When we receive a HUP signal, print out our stats */
441 sighup_handler(int signum)
444 for (q = 0; q < num_queues; q ++) {
445 if (q % (num_queues/num_pools) == 0)
446 printf("\nPool %u: ", q/(num_queues/num_pools));
447 printf("%lu ", rxPackets[ q ]);
449 printf("\nFinished handling signal %d\n", signum);
454 * Main thread that does the work, reading from INPUT_PORT
455 * and writing to OUTPUT_PORT
458 lcore_main(__attribute__((__unused__)) void* dummy)
460 const uint16_t lcore_id = (uint16_t)rte_lcore_id();
461 const uint16_t num_cores = (uint16_t)rte_lcore_count();
462 uint16_t core_id = 0;
463 uint16_t startQueue, endQueue;
465 const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
467 for (i = 0; i < num_cores; i ++)
468 if (lcore_ids[i] == lcore_id) {
473 if (remainder != 0) {
474 if (core_id < remainder) {
475 startQueue = (uint16_t)(core_id *
476 (num_vmdq_queues / num_cores + 1));
477 endQueue = (uint16_t)(startQueue +
478 (num_vmdq_queues / num_cores) + 1);
480 startQueue = (uint16_t)(core_id *
481 (num_vmdq_queues / num_cores) +
483 endQueue = (uint16_t)(startQueue +
484 (num_vmdq_queues / num_cores));
487 startQueue = (uint16_t)(core_id *
488 (num_vmdq_queues / num_cores));
489 endQueue = (uint16_t)(startQueue +
490 (num_vmdq_queues / num_cores));
493 /* vmdq queue idx doesn't always start from zero.*/
494 startQueue += vmdq_queue_base;
495 endQueue += vmdq_queue_base;
496 printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id,
497 (unsigned)lcore_id, startQueue, endQueue - 1);
499 if (startQueue == endQueue) {
500 printf("lcore %u has nothing to do\n", lcore_id);
505 struct rte_mbuf *buf[MAX_PKT_BURST];
506 const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
508 for (p = 0; p < num_ports; p++) {
509 const uint8_t sport = ports[p];
510 const uint8_t dport = ports[p ^ 1]; /* 0 <-> 1, 2 <-> 3 etc */
512 if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID))
515 for (q = startQueue; q < endQueue; q++) {
516 const uint16_t rxCount = rte_eth_rx_burst(sport,
519 if (unlikely(rxCount == 0))
522 rxPackets[q] += rxCount;
524 for (i = 0; i < rxCount; i++)
525 update_mac_address(buf[i], dport);
527 const uint16_t txCount = rte_eth_tx_burst(dport,
528 vmdq_queue_base + core_id,
532 if (txCount != rxCount) {
533 for (i = txCount; i < rxCount; i++)
534 rte_pktmbuf_free(buf[i]);
542 * Update the global var NUM_PORTS and array PORTS according to system ports number
543 * and return valid ports number
545 static unsigned check_ports_num(unsigned nb_ports)
547 unsigned valid_num_ports = num_ports;
550 if (num_ports > nb_ports) {
551 printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
552 num_ports, nb_ports);
553 num_ports = nb_ports;
556 for (portid = 0; portid < num_ports; portid ++) {
557 if (ports[portid] >= nb_ports) {
558 printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
559 ports[portid], (nb_ports - 1));
560 ports[portid] = INVALID_PORT_ID;
564 return valid_num_ports;
567 /* Main function, does initialisation and calls the per-lcore functions */
569 MAIN(int argc, char *argv[])
571 struct rte_mempool *mbuf_pool;
572 unsigned lcore_id, core_id = 0;
574 unsigned nb_ports, valid_num_ports;
577 #ifndef RTE_EXEC_ENV_BAREMETAL
578 signal(SIGHUP, sighup_handler);
582 ret = rte_eal_init(argc, argv);
584 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
588 /* parse app arguments */
589 ret = vmdq_parse_args(argc, argv);
591 rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
593 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id ++)
594 if (rte_lcore_is_enabled(lcore_id))
595 lcore_ids[core_id ++] = lcore_id;
597 if (rte_lcore_count() > RTE_MAX_LCORE)
598 rte_exit(EXIT_FAILURE,"Not enough cores\n");
600 nb_ports = rte_eth_dev_count();
601 if (nb_ports > RTE_MAX_ETHPORTS)
602 nb_ports = RTE_MAX_ETHPORTS;
605 * Update the global var NUM_PORTS and global array PORTS
606 * and get value of var VALID_NUM_PORTS according to system ports number
608 valid_num_ports = check_ports_num(nb_ports);
610 if (valid_num_ports < 2 || valid_num_ports % 2) {
611 printf("Current valid ports number is %u\n", valid_num_ports);
612 rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
615 mbuf_pool = rte_mempool_create("MBUF_POOL", NUM_MBUFS_PER_PORT * nb_ports,
616 MBUF_SIZE, MBUF_CACHE_SIZE,
617 sizeof(struct rte_pktmbuf_pool_private),
618 rte_pktmbuf_pool_init, NULL,
619 rte_pktmbuf_init, NULL,
621 if (mbuf_pool == NULL)
622 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
624 /* initialize all ports */
625 for (portid = 0; portid < nb_ports; portid++) {
626 /* skip ports that are not enabled */
627 if ((enabled_port_mask & (1 << portid)) == 0) {
628 printf("\nSkipping disabled port %d\n", portid);
631 if (port_init(portid, mbuf_pool) != 0)
632 rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
635 /* call lcore_main() on every lcore */
636 rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER);
637 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
638 if (rte_eal_wait_lcore(lcore_id) < 0)