4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <arpa/inet.h>
36 #include <linux/if_ether.h>
37 #include <linux/if_vlan.h>
38 #include <linux/virtio_net.h>
39 #include <linux/virtio_ring.h>
42 #include <sys/eventfd.h>
43 #include <sys/param.h>
46 #include <rte_atomic.h>
47 #include <rte_cycles.h>
48 #include <rte_ethdev.h>
50 #include <rte_string_fns.h>
51 #include <rte_malloc.h>
52 #include <rte_virtio_net.h>
59 #define MAX_QUEUES 128
62 /* the maximum number of external ports supported */
63 #define MAX_SUP_PORTS 1
66 * Calculate the number of buffers needed per port
68 #define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) + \
69 (num_switching_cores*MAX_PKT_BURST) + \
70 (num_switching_cores*RTE_TEST_TX_DESC_DEFAULT) +\
71 ((num_switching_cores+1)*MBUF_CACHE_SIZE))
73 #define MBUF_CACHE_SIZE 128
74 #define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE
76 #define MAX_PKT_BURST 32 /* Max burst size for RX/TX */
77 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
79 #define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */
80 #define BURST_RX_RETRIES 4 /* Number of retries on RX. */
82 #define JUMBO_FRAME_MAX_SIZE 0x2600
84 /* State of virtio device. */
85 #define DEVICE_MAC_LEARNING 0
87 #define DEVICE_SAFE_REMOVE 2
89 /* Config_core_flag status definitions. */
90 #define REQUEST_DEV_REMOVAL 1
91 #define ACK_DEV_REMOVAL 0
93 /* Configurable number of RX/TX ring descriptors */
94 #define RTE_TEST_RX_DESC_DEFAULT 1024
95 #define RTE_TEST_TX_DESC_DEFAULT 512
97 #define INVALID_PORT_ID 0xFF
99 /* Max number of devices. Limited by vmdq. */
100 #define MAX_DEVICES 64
102 /* Size of buffers used for snprintfs. */
103 #define MAX_PRINT_BUFF 6072
105 /* Maximum character device basename size. */
106 #define MAX_BASENAME_SZ 10
108 /* Maximum long option length for option parsing. */
109 #define MAX_LONG_OPT_SZ 64
111 /* Used to compare MAC addresses. */
112 #define MAC_ADDR_CMP 0xFFFFFFFFFFFFULL
114 /* Number of descriptors per cacheline. */
115 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
117 /* mask of enabled ports */
118 static uint32_t enabled_port_mask = 0;
120 /* Promiscuous mode */
121 static uint32_t promiscuous;
123 /*Number of switching cores enabled*/
124 static uint32_t num_switching_cores = 0;
126 /* number of devices/queues to support*/
127 static uint32_t num_queues = 0;
128 static uint32_t num_devices;
130 static struct rte_mempool *mbuf_pool;
131 static int mergeable;
133 /* Do vlan strip on host, enabled on default */
134 static uint32_t vlan_strip = 1;
136 /* Enable VM2VM communications. If this is disabled then the MAC address compare is skipped. */
143 static vm2vm_type vm2vm_mode = VM2VM_SOFTWARE;
146 static uint32_t enable_stats = 0;
147 /* Enable retries on RX. */
148 static uint32_t enable_retry = 1;
150 /* Disable TX checksum offload */
151 static uint32_t enable_tx_csum;
153 /* Disable TSO offload */
154 static uint32_t enable_tso;
156 /* Specify timeout (in useconds) between retries on RX. */
157 static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
158 /* Specify the number of retries on RX. */
159 static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
161 /* Character device basename. Can be set by user. */
162 static char dev_basename[MAX_BASENAME_SZ] = "vhost-net";
164 /* empty vmdq configuration structure. Filled in programatically */
165 static struct rte_eth_conf vmdq_conf_default = {
167 .mq_mode = ETH_MQ_RX_VMDQ_ONLY,
169 .header_split = 0, /**< Header Split disabled */
170 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
171 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
173 * It is necessary for 1G NIC such as I350,
174 * this fixes bug of ipv4 forwarding in guest can't
175 * forward pakets from one virtio dev to another virtio dev.
177 .hw_vlan_strip = 1, /**< VLAN strip enabled. */
178 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
179 .hw_strip_crc = 0, /**< CRC stripped by hardware */
183 .mq_mode = ETH_MQ_TX_NONE,
187 * should be overridden separately in code with
191 .nb_queue_pools = ETH_8_POOLS,
192 .enable_default_pool = 0,
195 .pool_map = {{0, 0},},
200 static unsigned lcore_ids[RTE_MAX_LCORE];
201 static uint8_t ports[RTE_MAX_ETHPORTS];
202 static unsigned num_ports = 0; /**< The number of ports specified in command line */
203 static uint16_t num_pf_queues, num_vmdq_queues;
204 static uint16_t vmdq_pool_base, vmdq_queue_base;
205 static uint16_t queues_per_pool;
207 const uint16_t vlan_tags[] = {
208 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007,
209 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015,
210 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
211 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031,
212 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039,
213 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
214 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
215 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,
218 /* ethernet addresses of ports */
219 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
221 /* heads for the main used and free linked lists for the data path. */
222 static struct virtio_net_data_ll *ll_root_used = NULL;
223 static struct virtio_net_data_ll *ll_root_free = NULL;
225 /* Array of data core structures containing information on individual core linked lists. */
226 static struct lcore_info lcore_info[RTE_MAX_LCORE];
228 /* Used for queueing bursts of TX packets. */
232 struct rte_mbuf *m_table[MAX_PKT_BURST];
235 /* TX queue for each data core. */
236 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
238 /* Vlan header struct used to insert vlan tags on TX. */
240 unsigned char h_dest[ETH_ALEN];
241 unsigned char h_source[ETH_ALEN];
244 __be16 h_vlan_encapsulated_proto;
247 /* Header lengths. */
249 #define VLAN_ETH_HLEN 18
251 /* Per-device statistics struct */
252 struct device_statistics {
254 rte_atomic64_t rx_total_atomic;
256 rte_atomic64_t rx_atomic;
257 } __rte_cache_aligned;
258 struct device_statistics dev_statistics[MAX_DEVICES];
261 * Builds up the correct configuration for VMDQ VLAN pool map
262 * according to the pool & queue limits.
265 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices)
267 struct rte_eth_vmdq_rx_conf conf;
268 struct rte_eth_vmdq_rx_conf *def_conf =
269 &vmdq_conf_default.rx_adv_conf.vmdq_rx_conf;
272 memset(&conf, 0, sizeof(conf));
273 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_devices;
274 conf.nb_pool_maps = num_devices;
275 conf.enable_loop_back = def_conf->enable_loop_back;
276 conf.rx_mode = def_conf->rx_mode;
278 for (i = 0; i < conf.nb_pool_maps; i++) {
279 conf.pool_map[i].vlan_id = vlan_tags[ i ];
280 conf.pool_map[i].pools = (1UL << i);
283 (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
284 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf,
285 sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
290 * Validate the device number according to the max pool number gotten form
291 * dev_info. If the device number is invalid, give the error message and
292 * return -1. Each device must have its own pool.
295 validate_num_devices(uint32_t max_nb_devices)
297 if (num_devices > max_nb_devices) {
298 RTE_LOG(ERR, VHOST_PORT, "invalid number of devices\n");
305 * Initialises a given port using global settings and with the rx buffers
306 * coming from the mbuf_pool passed as parameter
309 port_init(uint8_t port)
311 struct rte_eth_dev_info dev_info;
312 struct rte_eth_conf port_conf;
313 struct rte_eth_rxconf *rxconf;
314 struct rte_eth_txconf *txconf;
315 int16_t rx_rings, tx_rings;
316 uint16_t rx_ring_size, tx_ring_size;
320 /* The max pool number from dev_info will be used to validate the pool number specified in cmd line */
321 rte_eth_dev_info_get (port, &dev_info);
323 if (dev_info.max_rx_queues > MAX_QUEUES) {
324 rte_exit(EXIT_FAILURE,
325 "please define MAX_QUEUES no less than %u in %s\n",
326 dev_info.max_rx_queues, __FILE__);
329 rxconf = &dev_info.default_rxconf;
330 txconf = &dev_info.default_txconf;
331 rxconf->rx_drop_en = 1;
333 /* Enable vlan offload */
334 txconf->txq_flags &= ~ETH_TXQ_FLAGS_NOVLANOFFL;
336 /*configure the number of supported virtio devices based on VMDQ limits */
337 num_devices = dev_info.max_vmdq_pools;
339 rx_ring_size = RTE_TEST_RX_DESC_DEFAULT;
340 tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
341 tx_rings = (uint16_t)rte_lcore_count();
343 retval = validate_num_devices(MAX_DEVICES);
347 /* Get port configuration. */
348 retval = get_eth_conf(&port_conf, num_devices);
351 /* NIC queues are divided into pf queues and vmdq queues. */
352 num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
353 queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
354 num_vmdq_queues = num_devices * queues_per_pool;
355 num_queues = num_pf_queues + num_vmdq_queues;
356 vmdq_queue_base = dev_info.vmdq_queue_base;
357 vmdq_pool_base = dev_info.vmdq_pool_base;
358 printf("pf queue num: %u, configured vmdq pool num: %u, each vmdq pool has %u queues\n",
359 num_pf_queues, num_devices, queues_per_pool);
361 if (port >= rte_eth_dev_count()) return -1;
363 if (enable_tx_csum == 0)
364 rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_CSUM);
366 if (enable_tso == 0) {
367 rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4);
368 rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO6);
371 rx_rings = (uint16_t)dev_info.max_rx_queues;
372 /* Configure ethernet device. */
373 retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
377 /* Setup the queues. */
378 for (q = 0; q < rx_rings; q ++) {
379 retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
380 rte_eth_dev_socket_id(port),
386 for (q = 0; q < tx_rings; q ++) {
387 retval = rte_eth_tx_queue_setup(port, q, tx_ring_size,
388 rte_eth_dev_socket_id(port),
394 /* Start the device. */
395 retval = rte_eth_dev_start(port);
397 RTE_LOG(ERR, VHOST_DATA, "Failed to start the device.\n");
402 rte_eth_promiscuous_enable(port);
404 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
405 RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices);
406 RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
407 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
409 vmdq_ports_eth_addr[port].addr_bytes[0],
410 vmdq_ports_eth_addr[port].addr_bytes[1],
411 vmdq_ports_eth_addr[port].addr_bytes[2],
412 vmdq_ports_eth_addr[port].addr_bytes[3],
413 vmdq_ports_eth_addr[port].addr_bytes[4],
414 vmdq_ports_eth_addr[port].addr_bytes[5]);
420 * Set character device basename.
423 us_vhost_parse_basename(const char *q_arg)
425 /* parse number string */
427 if (strnlen(q_arg, MAX_BASENAME_SZ) > MAX_BASENAME_SZ)
430 snprintf((char*)&dev_basename, MAX_BASENAME_SZ, "%s", q_arg);
436 * Parse the portmask provided at run time.
439 parse_portmask(const char *portmask)
446 /* parse hexadecimal string */
447 pm = strtoul(portmask, &end, 16);
448 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
459 * Parse num options at run time.
462 parse_num_opt(const char *q_arg, uint32_t max_valid_value)
469 /* parse unsigned int string */
470 num = strtoul(q_arg, &end, 10);
471 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
474 if (num > max_valid_value)
485 us_vhost_usage(const char *prgname)
487 RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n"
489 " --rx_retry [0|1] --mergeable [0|1] --stats [0-N]\n"
490 " --dev-basename <name>\n"
492 " -p PORTMASK: Set mask for ports to be used by application\n"
493 " --vm2vm [0|1|2]: disable/software(default)/hardware vm2vm comms\n"
494 " --rx-retry [0|1]: disable/enable(default) retries on rx. Enable retry if destintation queue is full\n"
495 " --rx-retry-delay [0-N]: timeout(in usecond) between retries on RX. This makes effect only if retries on rx enabled\n"
496 " --rx-retry-num [0-N]: the number of retries on rx. This makes effect only if retries on rx enabled\n"
497 " --mergeable [0|1]: disable(default)/enable RX mergeable buffers\n"
498 " --vlan-strip [0|1]: disable/enable(default) RX VLAN strip on host\n"
499 " --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n"
500 " --dev-basename: The basename to be used for the character device.\n"
501 " --tx-csum [0|1] disable/enable TX checksum offload.\n"
502 " --tso [0|1] disable/enable TCP segment offload.\n",
507 * Parse the arguments given in the command line of the application.
510 us_vhost_parse_args(int argc, char **argv)
515 const char *prgname = argv[0];
516 static struct option long_option[] = {
517 {"vm2vm", required_argument, NULL, 0},
518 {"rx-retry", required_argument, NULL, 0},
519 {"rx-retry-delay", required_argument, NULL, 0},
520 {"rx-retry-num", required_argument, NULL, 0},
521 {"mergeable", required_argument, NULL, 0},
522 {"vlan-strip", required_argument, NULL, 0},
523 {"stats", required_argument, NULL, 0},
524 {"dev-basename", required_argument, NULL, 0},
525 {"tx-csum", required_argument, NULL, 0},
526 {"tso", required_argument, NULL, 0},
530 /* Parse command line */
531 while ((opt = getopt_long(argc, argv, "p:P",
532 long_option, &option_index)) != EOF) {
536 enabled_port_mask = parse_portmask(optarg);
537 if (enabled_port_mask == 0) {
538 RTE_LOG(INFO, VHOST_CONFIG, "Invalid portmask\n");
539 us_vhost_usage(prgname);
546 vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.rx_mode =
547 ETH_VMDQ_ACCEPT_BROADCAST |
548 ETH_VMDQ_ACCEPT_MULTICAST;
549 rte_vhost_feature_enable(1ULL << VIRTIO_NET_F_CTRL_RX);
554 /* Enable/disable vm2vm comms. */
555 if (!strncmp(long_option[option_index].name, "vm2vm",
557 ret = parse_num_opt(optarg, (VM2VM_LAST - 1));
559 RTE_LOG(INFO, VHOST_CONFIG,
560 "Invalid argument for "
562 us_vhost_usage(prgname);
565 vm2vm_mode = (vm2vm_type)ret;
569 /* Enable/disable retries on RX. */
570 if (!strncmp(long_option[option_index].name, "rx-retry", MAX_LONG_OPT_SZ)) {
571 ret = parse_num_opt(optarg, 1);
573 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry [0|1]\n");
574 us_vhost_usage(prgname);
581 /* Enable/disable TX checksum offload. */
582 if (!strncmp(long_option[option_index].name, "tx-csum", MAX_LONG_OPT_SZ)) {
583 ret = parse_num_opt(optarg, 1);
585 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tx-csum [0|1]\n");
586 us_vhost_usage(prgname);
589 enable_tx_csum = ret;
592 /* Enable/disable TSO offload. */
593 if (!strncmp(long_option[option_index].name, "tso", MAX_LONG_OPT_SZ)) {
594 ret = parse_num_opt(optarg, 1);
596 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tso [0|1]\n");
597 us_vhost_usage(prgname);
603 /* Specify the retries delay time (in useconds) on RX. */
604 if (!strncmp(long_option[option_index].name, "rx-retry-delay", MAX_LONG_OPT_SZ)) {
605 ret = parse_num_opt(optarg, INT32_MAX);
607 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-delay [0-N]\n");
608 us_vhost_usage(prgname);
611 burst_rx_delay_time = ret;
615 /* Specify the retries number on RX. */
616 if (!strncmp(long_option[option_index].name, "rx-retry-num", MAX_LONG_OPT_SZ)) {
617 ret = parse_num_opt(optarg, INT32_MAX);
619 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-num [0-N]\n");
620 us_vhost_usage(prgname);
623 burst_rx_retry_num = ret;
627 /* Enable/disable RX mergeable buffers. */
628 if (!strncmp(long_option[option_index].name, "mergeable", MAX_LONG_OPT_SZ)) {
629 ret = parse_num_opt(optarg, 1);
631 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for mergeable [0|1]\n");
632 us_vhost_usage(prgname);
637 vmdq_conf_default.rxmode.jumbo_frame = 1;
638 vmdq_conf_default.rxmode.max_rx_pkt_len
639 = JUMBO_FRAME_MAX_SIZE;
644 /* Enable/disable RX VLAN strip on host. */
645 if (!strncmp(long_option[option_index].name,
646 "vlan-strip", MAX_LONG_OPT_SZ)) {
647 ret = parse_num_opt(optarg, 1);
649 RTE_LOG(INFO, VHOST_CONFIG,
650 "Invalid argument for VLAN strip [0|1]\n");
651 us_vhost_usage(prgname);
655 vmdq_conf_default.rxmode.hw_vlan_strip =
660 /* Enable/disable stats. */
661 if (!strncmp(long_option[option_index].name, "stats", MAX_LONG_OPT_SZ)) {
662 ret = parse_num_opt(optarg, INT32_MAX);
664 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for stats [0..N]\n");
665 us_vhost_usage(prgname);
672 /* Set character device basename. */
673 if (!strncmp(long_option[option_index].name, "dev-basename", MAX_LONG_OPT_SZ)) {
674 if (us_vhost_parse_basename(optarg) == -1) {
675 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for character device basename (Max %d characters)\n", MAX_BASENAME_SZ);
676 us_vhost_usage(prgname);
683 /* Invalid option - print options. */
685 us_vhost_usage(prgname);
690 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
691 if (enabled_port_mask & (1 << i))
692 ports[num_ports++] = (uint8_t)i;
695 if ((num_ports == 0) || (num_ports > MAX_SUP_PORTS)) {
696 RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
697 "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
705 * Update the global var NUM_PORTS and array PORTS according to system ports number
706 * and return valid ports number
708 static unsigned check_ports_num(unsigned nb_ports)
710 unsigned valid_num_ports = num_ports;
713 if (num_ports > nb_ports) {
714 RTE_LOG(INFO, VHOST_PORT, "\nSpecified port number(%u) exceeds total system port number(%u)\n",
715 num_ports, nb_ports);
716 num_ports = nb_ports;
719 for (portid = 0; portid < num_ports; portid ++) {
720 if (ports[portid] >= nb_ports) {
721 RTE_LOG(INFO, VHOST_PORT, "\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
722 ports[portid], (nb_ports - 1));
723 ports[portid] = INVALID_PORT_ID;
727 return valid_num_ports;
731 * Compares a packet destination MAC address to a device MAC address.
733 static inline int __attribute__((always_inline))
734 ether_addr_cmp(struct ether_addr *ea, struct ether_addr *eb)
736 return ((*(uint64_t *)ea ^ *(uint64_t *)eb) & MAC_ADDR_CMP) == 0;
740 * This function learns the MAC address of the device and registers this along with a
741 * vlan tag to a VMDQ.
744 link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m)
746 struct ether_hdr *pkt_hdr;
747 struct virtio_net_data_ll *dev_ll;
748 struct virtio_net *dev = vdev->dev;
751 /* Learn MAC address of guest device from packet */
752 pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
754 dev_ll = ll_root_used;
756 while (dev_ll != NULL) {
757 if (ether_addr_cmp(&(pkt_hdr->s_addr), &dev_ll->vdev->mac_address)) {
758 RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") WARNING: This device is using an existing MAC address and has not been registered.\n", dev->device_fh);
761 dev_ll = dev_ll->next;
764 for (i = 0; i < ETHER_ADDR_LEN; i++)
765 vdev->mac_address.addr_bytes[i] = pkt_hdr->s_addr.addr_bytes[i];
767 /* vlan_tag currently uses the device_id. */
768 vdev->vlan_tag = vlan_tags[dev->device_fh];
770 /* Print out VMDQ registration info. */
771 RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") MAC_ADDRESS %02x:%02x:%02x:%02x:%02x:%02x and VLAN_TAG %d registered\n",
773 vdev->mac_address.addr_bytes[0], vdev->mac_address.addr_bytes[1],
774 vdev->mac_address.addr_bytes[2], vdev->mac_address.addr_bytes[3],
775 vdev->mac_address.addr_bytes[4], vdev->mac_address.addr_bytes[5],
778 /* Register the MAC address. */
779 ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address,
780 (uint32_t)dev->device_fh + vmdq_pool_base);
782 RTE_LOG(ERR, VHOST_DATA, "(%"PRIu64") Failed to add device MAC address to VMDQ\n",
785 /* Enable stripping of the vlan tag as we handle routing. */
787 rte_eth_dev_set_vlan_strip_on_queue(ports[0],
788 (uint16_t)vdev->vmdq_rx_q, 1);
790 /* Set device as ready for RX. */
791 vdev->ready = DEVICE_RX;
797 * Removes MAC address and vlan tag from VMDQ. Ensures that nothing is adding buffers to the RX
798 * queue before disabling RX on the device.
801 unlink_vmdq(struct vhost_dev *vdev)
805 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
807 if (vdev->ready == DEVICE_RX) {
808 /*clear MAC and VLAN settings*/
809 rte_eth_dev_mac_addr_remove(ports[0], &vdev->mac_address);
810 for (i = 0; i < 6; i++)
811 vdev->mac_address.addr_bytes[i] = 0;
815 /*Clear out the receive buffers*/
816 rx_count = rte_eth_rx_burst(ports[0],
817 (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
820 for (i = 0; i < rx_count; i++)
821 rte_pktmbuf_free(pkts_burst[i]);
823 rx_count = rte_eth_rx_burst(ports[0],
824 (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
827 vdev->ready = DEVICE_MAC_LEARNING;
832 * Check if the packet destination MAC address is for a local device. If so then put
833 * the packet on that devices RX queue. If not then return.
835 static inline int __attribute__((always_inline))
836 virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
838 struct virtio_net_data_ll *dev_ll;
839 struct ether_hdr *pkt_hdr;
841 struct virtio_net *dev = vdev->dev;
842 struct virtio_net *tdev; /* destination virito device */
844 pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
846 /*get the used devices list*/
847 dev_ll = ll_root_used;
849 while (dev_ll != NULL) {
850 if ((dev_ll->vdev->ready == DEVICE_RX) && ether_addr_cmp(&(pkt_hdr->d_addr),
851 &dev_ll->vdev->mac_address)) {
853 /* Drop the packet if the TX packet is destined for the TX device. */
854 if (dev_ll->vdev->dev->device_fh == dev->device_fh) {
855 RTE_LOG(DEBUG, VHOST_DATA, "(%" PRIu64 ") TX: "
856 "Source and destination MAC addresses are the same. "
857 "Dropping packet.\n",
861 tdev = dev_ll->vdev->dev;
864 RTE_LOG(DEBUG, VHOST_DATA, "(%" PRIu64 ") TX: "
865 "MAC address is local\n", tdev->device_fh);
867 if (unlikely(dev_ll->vdev->remove)) {
868 /*drop the packet if the device is marked for removal*/
869 RTE_LOG(DEBUG, VHOST_DATA, "(%" PRIu64 ") "
870 "Device is marked for removal\n", tdev->device_fh);
872 /*send the packet to the local virtio device*/
873 ret = rte_vhost_enqueue_burst(tdev, VIRTIO_RXQ, &m, 1);
876 &dev_statistics[tdev->device_fh].rx_total_atomic,
879 &dev_statistics[tdev->device_fh].rx_atomic,
881 dev_statistics[dev->device_fh].tx_total++;
882 dev_statistics[dev->device_fh].tx += ret;
888 dev_ll = dev_ll->next;
895 * Check if the destination MAC of a packet is one local VM,
896 * and get its vlan tag, and offset if it is.
898 static inline int __attribute__((always_inline))
899 find_local_dest(struct virtio_net *dev, struct rte_mbuf *m,
900 uint32_t *offset, uint16_t *vlan_tag)
902 struct virtio_net_data_ll *dev_ll = ll_root_used;
903 struct ether_hdr *pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
905 while (dev_ll != NULL) {
906 if ((dev_ll->vdev->ready == DEVICE_RX)
907 && ether_addr_cmp(&(pkt_hdr->d_addr),
908 &dev_ll->vdev->mac_address)) {
910 * Drop the packet if the TX packet is
911 * destined for the TX device.
913 if (dev_ll->vdev->dev->device_fh == dev->device_fh) {
914 RTE_LOG(DEBUG, VHOST_DATA,
915 "(%"PRIu64") TX: Source and destination"
916 " MAC addresses are the same. Dropping "
918 dev_ll->vdev->dev->device_fh);
923 * HW vlan strip will reduce the packet length
924 * by minus length of vlan tag, so need restore
925 * the packet length by plus it.
930 vlan_tags[(uint16_t)dev_ll->vdev->dev->device_fh];
932 RTE_LOG(DEBUG, VHOST_DATA,
933 "(%"PRIu64") TX: pkt to local VM device id:"
934 "(%"PRIu64") vlan tag: %d.\n",
935 dev->device_fh, dev_ll->vdev->dev->device_fh,
940 dev_ll = dev_ll->next;
946 get_psd_sum(void *l3_hdr, uint64_t ol_flags)
948 if (ol_flags & PKT_TX_IPV4)
949 return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
950 else /* assume ethertype == ETHER_TYPE_IPv6 */
951 return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
954 static void virtio_tx_offload(struct rte_mbuf *m)
957 struct ipv4_hdr *ipv4_hdr = NULL;
958 struct tcp_hdr *tcp_hdr = NULL;
959 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
961 l3_hdr = (char *)eth_hdr + m->l2_len;
963 if (m->ol_flags & PKT_TX_IPV4) {
965 ipv4_hdr->hdr_checksum = 0;
966 m->ol_flags |= PKT_TX_IP_CKSUM;
969 tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + m->l3_len);
970 tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
974 * This function routes the TX packet to the correct interface. This may be a local device
975 * or the physical port.
977 static inline void __attribute__((always_inline))
978 virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
980 struct mbuf_table *tx_q;
981 struct rte_mbuf **m_table;
982 unsigned len, ret, offset = 0;
983 const uint16_t lcore_id = rte_lcore_id();
984 struct virtio_net *dev = vdev->dev;
985 struct ether_hdr *nh;
987 /*check if destination is local VM*/
988 if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) {
993 if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
994 if (unlikely(find_local_dest(dev, m, &offset, &vlan_tag) != 0)) {
1000 RTE_LOG(DEBUG, VHOST_DATA, "(%" PRIu64 ") TX: "
1001 "MAC address is external\n", dev->device_fh);
1003 /*Add packet to the port tx queue*/
1004 tx_q = &lcore_tx_queue[lcore_id];
1007 nh = rte_pktmbuf_mtod(m, struct ether_hdr *);
1008 if (unlikely(nh->ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN))) {
1009 /* Guest has inserted the vlan tag. */
1010 struct vlan_hdr *vh = (struct vlan_hdr *) (nh + 1);
1011 uint16_t vlan_tag_be = rte_cpu_to_be_16(vlan_tag);
1012 if ((vm2vm_mode == VM2VM_HARDWARE) &&
1013 (vh->vlan_tci != vlan_tag_be))
1014 vh->vlan_tci = vlan_tag_be;
1016 m->ol_flags |= PKT_TX_VLAN_PKT;
1019 * Find the right seg to adjust the data len when offset is
1020 * bigger than tail room size.
1022 if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
1023 if (likely(offset <= rte_pktmbuf_tailroom(m)))
1024 m->data_len += offset;
1026 struct rte_mbuf *seg = m;
1028 while ((seg->next != NULL) &&
1029 (offset > rte_pktmbuf_tailroom(seg)))
1032 seg->data_len += offset;
1034 m->pkt_len += offset;
1037 m->vlan_tci = vlan_tag;
1040 if (m->ol_flags & PKT_TX_TCP_SEG)
1041 virtio_tx_offload(m);
1043 tx_q->m_table[len] = m;
1046 dev_statistics[dev->device_fh].tx_total++;
1047 dev_statistics[dev->device_fh].tx++;
1050 if (unlikely(len == MAX_PKT_BURST)) {
1051 m_table = (struct rte_mbuf **)tx_q->m_table;
1052 ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id, m_table, (uint16_t) len);
1053 /* Free any buffers not handled by TX and update the port stats. */
1054 if (unlikely(ret < len)) {
1056 rte_pktmbuf_free(m_table[ret]);
1057 } while (++ret < len);
1067 * This function is called by each data core. It handles all RX/TX registered with the
1068 * core. For TX the specific lcore linked list is used. For RX, MAC addresses are compared
1069 * with all devices in the main linked list.
1072 switch_worker(__attribute__((unused)) void *arg)
1074 struct virtio_net *dev = NULL;
1075 struct vhost_dev *vdev = NULL;
1076 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
1077 struct virtio_net_data_ll *dev_ll;
1078 struct mbuf_table *tx_q;
1079 volatile struct lcore_ll_info *lcore_ll;
1080 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
1081 uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0;
1083 const uint16_t lcore_id = rte_lcore_id();
1084 const uint16_t num_cores = (uint16_t)rte_lcore_count();
1085 uint16_t rx_count = 0;
1089 RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id);
1090 lcore_ll = lcore_info[lcore_id].lcore_ll;
1093 tx_q = &lcore_tx_queue[lcore_id];
1094 for (i = 0; i < num_cores; i ++) {
1095 if (lcore_ids[i] == lcore_id) {
1102 cur_tsc = rte_rdtsc();
1104 * TX burst queue drain
1106 diff_tsc = cur_tsc - prev_tsc;
1107 if (unlikely(diff_tsc > drain_tsc)) {
1110 RTE_LOG(DEBUG, VHOST_DATA,
1111 "TX queue drained after timeout with burst size %u\n",
1114 /*Tx any packets in the queue*/
1115 ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id,
1116 (struct rte_mbuf **)tx_q->m_table,
1117 (uint16_t)tx_q->len);
1118 if (unlikely(ret < tx_q->len)) {
1120 rte_pktmbuf_free(tx_q->m_table[ret]);
1121 } while (++ret < tx_q->len);
1131 rte_prefetch0(lcore_ll->ll_root_used);
1133 * Inform the configuration core that we have exited the linked list and that no devices are
1134 * in use if requested.
1136 if (lcore_ll->dev_removal_flag == REQUEST_DEV_REMOVAL)
1137 lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL;
1142 dev_ll = lcore_ll->ll_root_used;
1144 while (dev_ll != NULL) {
1145 /*get virtio device ID*/
1146 vdev = dev_ll->vdev;
1149 if (unlikely(vdev->remove)) {
1150 dev_ll = dev_ll->next;
1152 vdev->ready = DEVICE_SAFE_REMOVE;
1155 if (likely(vdev->ready == DEVICE_RX)) {
1157 rx_count = rte_eth_rx_burst(ports[0],
1158 vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
1162 * Retry is enabled and the queue is full then we wait and retry to avoid packet loss
1163 * Here MAX_PKT_BURST must be less than virtio queue size
1165 if (enable_retry && unlikely(rx_count > rte_vring_available_entries(dev, VIRTIO_RXQ))) {
1166 for (retry = 0; retry < burst_rx_retry_num; retry++) {
1167 rte_delay_us(burst_rx_delay_time);
1168 if (rx_count <= rte_vring_available_entries(dev, VIRTIO_RXQ))
1172 ret_count = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ, pkts_burst, rx_count);
1175 &dev_statistics[dev_ll->vdev->dev->device_fh].rx_total_atomic,
1178 &dev_statistics[dev_ll->vdev->dev->device_fh].rx_atomic, ret_count);
1180 while (likely(rx_count)) {
1182 rte_pktmbuf_free(pkts_burst[rx_count]);
1188 if (likely(!vdev->remove)) {
1189 /* Handle guest TX*/
1190 tx_count = rte_vhost_dequeue_burst(dev, VIRTIO_TXQ, mbuf_pool, pkts_burst, MAX_PKT_BURST);
1191 /* If this is the first received packet we need to learn the MAC and setup VMDQ */
1192 if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && tx_count) {
1193 if (vdev->remove || (link_vmdq(vdev, pkts_burst[0]) == -1)) {
1195 rte_pktmbuf_free(pkts_burst[--tx_count]);
1198 for (i = 0; i < tx_count; ++i) {
1199 virtio_tx_route(vdev, pkts_burst[i],
1200 vlan_tags[(uint16_t)dev->device_fh]);
1204 /*move to the next device in the list*/
1205 dev_ll = dev_ll->next;
1213 * Add an entry to a used linked list. A free entry must first be found
1214 * in the free linked list using get_data_ll_free_entry();
1217 add_data_ll_entry(struct virtio_net_data_ll **ll_root_addr,
1218 struct virtio_net_data_ll *ll_dev)
1220 struct virtio_net_data_ll *ll = *ll_root_addr;
1222 /* Set next as NULL and use a compiler barrier to avoid reordering. */
1223 ll_dev->next = NULL;
1224 rte_compiler_barrier();
1226 /* If ll == NULL then this is the first device. */
1228 /* Increment to the tail of the linked list. */
1229 while ((ll->next != NULL) )
1234 *ll_root_addr = ll_dev;
1239 * Remove an entry from a used linked list. The entry must then be added to
1240 * the free linked list using put_data_ll_free_entry().
1243 rm_data_ll_entry(struct virtio_net_data_ll **ll_root_addr,
1244 struct virtio_net_data_ll *ll_dev,
1245 struct virtio_net_data_ll *ll_dev_last)
1247 struct virtio_net_data_ll *ll = *ll_root_addr;
1249 if (unlikely((ll == NULL) || (ll_dev == NULL)))
1253 *ll_root_addr = ll_dev->next;
1255 if (likely(ll_dev_last != NULL))
1256 ll_dev_last->next = ll_dev->next;
1258 RTE_LOG(ERR, VHOST_CONFIG, "Remove entry form ll failed.\n");
1262 * Find and return an entry from the free linked list.
1264 static struct virtio_net_data_ll *
1265 get_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr)
1267 struct virtio_net_data_ll *ll_free = *ll_root_addr;
1268 struct virtio_net_data_ll *ll_dev;
1270 if (ll_free == NULL)
1274 *ll_root_addr = ll_free->next;
1280 * Place an entry back on to the free linked list.
1283 put_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr,
1284 struct virtio_net_data_ll *ll_dev)
1286 struct virtio_net_data_ll *ll_free = *ll_root_addr;
1291 ll_dev->next = ll_free;
1292 *ll_root_addr = ll_dev;
1296 * Creates a linked list of a given size.
1298 static struct virtio_net_data_ll *
1299 alloc_data_ll(uint32_t size)
1301 struct virtio_net_data_ll *ll_new;
1304 /* Malloc and then chain the linked list. */
1305 ll_new = malloc(size * sizeof(struct virtio_net_data_ll));
1306 if (ll_new == NULL) {
1307 RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for ll_new.\n");
1311 for (i = 0; i < size - 1; i++) {
1312 ll_new[i].vdev = NULL;
1313 ll_new[i].next = &ll_new[i+1];
1315 ll_new[i].next = NULL;
1321 * Create the main linked list along with each individual cores linked list. A used and a free list
1322 * are created to manage entries.
1329 RTE_LCORE_FOREACH_SLAVE(lcore) {
1330 lcore_info[lcore].lcore_ll = malloc(sizeof(struct lcore_ll_info));
1331 if (lcore_info[lcore].lcore_ll == NULL) {
1332 RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for lcore_ll.\n");
1336 lcore_info[lcore].lcore_ll->device_num = 0;
1337 lcore_info[lcore].lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL;
1338 lcore_info[lcore].lcore_ll->ll_root_used = NULL;
1339 if (num_devices % num_switching_cores)
1340 lcore_info[lcore].lcore_ll->ll_root_free = alloc_data_ll((num_devices / num_switching_cores) + 1);
1342 lcore_info[lcore].lcore_ll->ll_root_free = alloc_data_ll(num_devices / num_switching_cores);
1345 /* Allocate devices up to a maximum of MAX_DEVICES. */
1346 ll_root_free = alloc_data_ll(MIN((num_devices), MAX_DEVICES));
1352 * Remove a device from the specific data core linked list and from the main linked list. Synchonization
1353 * occurs through the use of the lcore dev_removal_flag. Device is made volatile here to avoid re-ordering
1354 * of dev->remove=1 which can cause an infinite loop in the rte_pause loop.
1357 destroy_device (volatile struct virtio_net *dev)
1359 struct virtio_net_data_ll *ll_lcore_dev_cur;
1360 struct virtio_net_data_ll *ll_main_dev_cur;
1361 struct virtio_net_data_ll *ll_lcore_dev_last = NULL;
1362 struct virtio_net_data_ll *ll_main_dev_last = NULL;
1363 struct vhost_dev *vdev;
1366 dev->flags &= ~VIRTIO_DEV_RUNNING;
1368 vdev = (struct vhost_dev *)dev->priv;
1369 /*set the remove flag. */
1371 while(vdev->ready != DEVICE_SAFE_REMOVE) {
1375 /* Search for entry to be removed from lcore ll */
1376 ll_lcore_dev_cur = lcore_info[vdev->coreid].lcore_ll->ll_root_used;
1377 while (ll_lcore_dev_cur != NULL) {
1378 if (ll_lcore_dev_cur->vdev == vdev) {
1381 ll_lcore_dev_last = ll_lcore_dev_cur;
1382 ll_lcore_dev_cur = ll_lcore_dev_cur->next;
1386 if (ll_lcore_dev_cur == NULL) {
1387 RTE_LOG(ERR, VHOST_CONFIG,
1388 "(%"PRIu64") Failed to find the dev to be destroy.\n",
1393 /* Search for entry to be removed from main ll */
1394 ll_main_dev_cur = ll_root_used;
1395 ll_main_dev_last = NULL;
1396 while (ll_main_dev_cur != NULL) {
1397 if (ll_main_dev_cur->vdev == vdev) {
1400 ll_main_dev_last = ll_main_dev_cur;
1401 ll_main_dev_cur = ll_main_dev_cur->next;
1405 /* Remove entries from the lcore and main ll. */
1406 rm_data_ll_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_used, ll_lcore_dev_cur, ll_lcore_dev_last);
1407 rm_data_ll_entry(&ll_root_used, ll_main_dev_cur, ll_main_dev_last);
1409 /* Set the dev_removal_flag on each lcore. */
1410 RTE_LCORE_FOREACH_SLAVE(lcore) {
1411 lcore_info[lcore].lcore_ll->dev_removal_flag = REQUEST_DEV_REMOVAL;
1415 * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL we can be sure that
1416 * they can no longer access the device removed from the linked lists and that the devices
1417 * are no longer in use.
1419 RTE_LCORE_FOREACH_SLAVE(lcore) {
1420 while (lcore_info[lcore].lcore_ll->dev_removal_flag != ACK_DEV_REMOVAL) {
1425 /* Add the entries back to the lcore and main free ll.*/
1426 put_data_ll_free_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_free, ll_lcore_dev_cur);
1427 put_data_ll_free_entry(&ll_root_free, ll_main_dev_cur);
1429 /* Decrement number of device on the lcore. */
1430 lcore_info[vdev->coreid].lcore_ll->device_num--;
1432 RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been removed from data core\n", dev->device_fh);
1439 * A new device is added to a data core. First the device is added to the main linked list
1440 * and the allocated to a specific data core.
1443 new_device (struct virtio_net *dev)
1445 struct virtio_net_data_ll *ll_dev;
1446 int lcore, core_add = 0;
1447 uint32_t device_num_min = num_devices;
1448 struct vhost_dev *vdev;
1450 vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
1452 RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Couldn't allocate memory for vhost dev\n",
1459 /* Add device to main ll */
1460 ll_dev = get_data_ll_free_entry(&ll_root_free);
1461 if (ll_dev == NULL) {
1462 RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") No free entry found in linked list. Device limit "
1463 "of %d devices per core has been reached\n",
1464 dev->device_fh, num_devices);
1468 ll_dev->vdev = vdev;
1469 add_data_ll_entry(&ll_root_used, ll_dev);
1471 = dev->device_fh * queues_per_pool + vmdq_queue_base;
1473 /*reset ready flag*/
1474 vdev->ready = DEVICE_MAC_LEARNING;
1477 /* Find a suitable lcore to add the device. */
1478 RTE_LCORE_FOREACH_SLAVE(lcore) {
1479 if (lcore_info[lcore].lcore_ll->device_num < device_num_min) {
1480 device_num_min = lcore_info[lcore].lcore_ll->device_num;
1484 /* Add device to lcore ll */
1485 ll_dev = get_data_ll_free_entry(&lcore_info[core_add].lcore_ll->ll_root_free);
1486 if (ll_dev == NULL) {
1487 RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Failed to add device to data core\n", dev->device_fh);
1488 vdev->ready = DEVICE_SAFE_REMOVE;
1489 destroy_device(dev);
1493 ll_dev->vdev = vdev;
1494 vdev->coreid = core_add;
1496 add_data_ll_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_used, ll_dev);
1498 /* Initialize device stats */
1499 memset(&dev_statistics[dev->device_fh], 0, sizeof(struct device_statistics));
1501 /* Disable notifications. */
1502 rte_vhost_enable_guest_notification(dev, VIRTIO_RXQ, 0);
1503 rte_vhost_enable_guest_notification(dev, VIRTIO_TXQ, 0);
1504 lcore_info[vdev->coreid].lcore_ll->device_num++;
1505 dev->flags |= VIRTIO_DEV_RUNNING;
1507 RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been added to data core %d\n", dev->device_fh, vdev->coreid);
1513 * These callback allow devices to be added to the data core when configuration
1514 * has been fully complete.
1516 static const struct virtio_net_device_ops virtio_net_device_ops =
1518 .new_device = new_device,
1519 .destroy_device = destroy_device,
1523 * This is a thread will wake up after a period to print stats if the user has
1529 struct virtio_net_data_ll *dev_ll;
1530 uint64_t tx_dropped, rx_dropped;
1531 uint64_t tx, tx_total, rx, rx_total;
1533 const char clr[] = { 27, '[', '2', 'J', '\0' };
1534 const char top_left[] = { 27, '[', '1', ';', '1', 'H','\0' };
1537 sleep(enable_stats);
1539 /* Clear screen and move to top left */
1540 printf("%s%s", clr, top_left);
1542 printf("\nDevice statistics ====================================");
1544 dev_ll = ll_root_used;
1545 while (dev_ll != NULL) {
1546 device_fh = (uint32_t)dev_ll->vdev->dev->device_fh;
1547 tx_total = dev_statistics[device_fh].tx_total;
1548 tx = dev_statistics[device_fh].tx;
1549 tx_dropped = tx_total - tx;
1550 rx_total = rte_atomic64_read(
1551 &dev_statistics[device_fh].rx_total_atomic);
1552 rx = rte_atomic64_read(
1553 &dev_statistics[device_fh].rx_atomic);
1554 rx_dropped = rx_total - rx;
1556 printf("\nStatistics for device %"PRIu32" ------------------------------"
1557 "\nTX total: %"PRIu64""
1558 "\nTX dropped: %"PRIu64""
1559 "\nTX successful: %"PRIu64""
1560 "\nRX total: %"PRIu64""
1561 "\nRX dropped: %"PRIu64""
1562 "\nRX successful: %"PRIu64"",
1571 dev_ll = dev_ll->next;
1573 printf("\n======================================================\n");
1577 /* When we receive a INT signal, unregister vhost driver */
1579 sigint_handler(__rte_unused int signum)
1581 /* Unregister vhost driver. */
1582 int ret = rte_vhost_driver_unregister((char *)&dev_basename);
1584 rte_exit(EXIT_FAILURE, "vhost driver unregister failure.\n");
1589 * Main function, does initialisation and calls the per-lcore functions. The CUSE
1590 * device is also registered here to handle the IOCTLs.
1593 main(int argc, char *argv[])
1595 unsigned lcore_id, core_id = 0;
1596 unsigned nb_ports, valid_num_ports;
1599 static pthread_t tid;
1600 char thread_name[RTE_MAX_THREAD_NAME_LEN];
1602 signal(SIGINT, sigint_handler);
1605 ret = rte_eal_init(argc, argv);
1607 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
1611 /* parse app arguments */
1612 ret = us_vhost_parse_args(argc, argv);
1614 rte_exit(EXIT_FAILURE, "Invalid argument\n");
1616 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id ++)
1617 if (rte_lcore_is_enabled(lcore_id))
1618 lcore_ids[core_id ++] = lcore_id;
1620 if (rte_lcore_count() > RTE_MAX_LCORE)
1621 rte_exit(EXIT_FAILURE,"Not enough cores\n");
1623 /*set the number of swithcing cores available*/
1624 num_switching_cores = rte_lcore_count()-1;
1626 /* Get the number of physical ports. */
1627 nb_ports = rte_eth_dev_count();
1628 if (nb_ports > RTE_MAX_ETHPORTS)
1629 nb_ports = RTE_MAX_ETHPORTS;
1632 * Update the global var NUM_PORTS and global array PORTS
1633 * and get value of var VALID_NUM_PORTS according to system ports number
1635 valid_num_ports = check_ports_num(nb_ports);
1637 if ((valid_num_ports == 0) || (valid_num_ports > MAX_SUP_PORTS)) {
1638 RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
1639 "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
1643 /* Create the mbuf pool. */
1644 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
1645 NUM_MBUFS_PER_PORT * valid_num_ports, MBUF_CACHE_SIZE,
1646 0, MBUF_DATA_SIZE, rte_socket_id());
1647 if (mbuf_pool == NULL)
1648 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
1650 if (vm2vm_mode == VM2VM_HARDWARE) {
1651 /* Enable VT loop back to let L2 switch to do it. */
1652 vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1;
1653 RTE_LOG(DEBUG, VHOST_CONFIG,
1654 "Enable loop back for L2 switch in vmdq.\n");
1657 /* initialize all ports */
1658 for (portid = 0; portid < nb_ports; portid++) {
1659 /* skip ports that are not enabled */
1660 if ((enabled_port_mask & (1 << portid)) == 0) {
1661 RTE_LOG(INFO, VHOST_PORT,
1662 "Skipping disabled port %d\n", portid);
1665 if (port_init(portid) != 0)
1666 rte_exit(EXIT_FAILURE,
1667 "Cannot initialize network ports\n");
1670 /* Initialise all linked lists. */
1671 if (init_data_ll() == -1)
1672 rte_exit(EXIT_FAILURE, "Failed to initialize linked list\n");
1674 /* Initialize device stats */
1675 memset(&dev_statistics, 0, sizeof(dev_statistics));
1677 /* Enable stats if the user option is set. */
1679 ret = pthread_create(&tid, NULL, (void *)print_stats, NULL);
1681 rte_exit(EXIT_FAILURE,
1682 "Cannot create print-stats thread\n");
1684 /* Set thread_name for aid in debugging. */
1685 snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "print-stats");
1686 ret = rte_thread_setname(tid, thread_name);
1688 RTE_LOG(ERR, VHOST_CONFIG,
1689 "Cannot set print-stats name\n");
1692 /* Launch all data cores. */
1693 RTE_LCORE_FOREACH_SLAVE(lcore_id)
1694 rte_eal_remote_launch(switch_worker, NULL, lcore_id);
1697 rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF);
1699 /* Register vhost(cuse or user) driver to handle vhost messages. */
1700 ret = rte_vhost_driver_register((char *)&dev_basename);
1702 rte_exit(EXIT_FAILURE, "vhost driver register failure.\n");
1704 rte_vhost_driver_callback_register(&virtio_net_device_ops);
1706 /* Start CUSE session. */
1707 rte_vhost_driver_session_start();