4 * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <arpa/inet.h>
36 #include <linux/if_ether.h>
37 #include <linux/if_vlan.h>
38 #include <linux/virtio_net.h>
39 #include <linux/virtio_ring.h>
42 #include <sys/eventfd.h>
43 #include <sys/param.h>
46 #include <rte_atomic.h>
47 #include <rte_cycles.h>
48 #include <rte_ethdev.h>
50 #include <rte_string_fns.h>
51 #include <rte_malloc.h>
52 #include <rte_vhost.h>
59 #define MAX_QUEUES 128
62 /* the maximum number of external ports supported */
63 #define MAX_SUP_PORTS 1
65 #define MBUF_CACHE_SIZE 128
66 #define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE
68 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
70 #define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */
71 #define BURST_RX_RETRIES 4 /* Number of retries on RX. */
73 #define JUMBO_FRAME_MAX_SIZE 0x2600
75 /* State of virtio device. */
76 #define DEVICE_MAC_LEARNING 0
78 #define DEVICE_SAFE_REMOVE 2
80 /* Configurable number of RX/TX ring descriptors */
81 #define RTE_TEST_RX_DESC_DEFAULT 1024
82 #define RTE_TEST_TX_DESC_DEFAULT 512
84 #define INVALID_PORT_ID 0xFF
86 /* Max number of devices. Limited by vmdq. */
87 #define MAX_DEVICES 64
89 /* Size of buffers used for snprintfs. */
90 #define MAX_PRINT_BUFF 6072
92 /* Maximum long option length for option parsing. */
93 #define MAX_LONG_OPT_SZ 64
95 /* mask of enabled ports */
96 static uint32_t enabled_port_mask = 0;
98 /* Promiscuous mode */
99 static uint32_t promiscuous;
101 /* number of devices/queues to support*/
102 static uint32_t num_queues = 0;
103 static uint32_t num_devices;
105 static struct rte_mempool *mbuf_pool;
106 static int mergeable;
108 /* Enable VM2VM communications. If this is disabled then the MAC address compare is skipped. */
115 static vm2vm_type vm2vm_mode = VM2VM_SOFTWARE;
118 static uint32_t enable_stats = 0;
119 /* Enable retries on RX. */
120 static uint32_t enable_retry = 1;
122 /* Disable TX checksum offload */
123 static uint32_t enable_tx_csum;
125 /* Disable TSO offload */
126 static uint32_t enable_tso;
128 static int client_mode;
129 static int dequeue_zero_copy;
131 static int builtin_net_driver;
133 /* Specify timeout (in useconds) between retries on RX. */
134 static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
135 /* Specify the number of retries on RX. */
136 static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
138 /* Socket file paths. Can be set by user */
139 static char *socket_files;
140 static int nb_sockets;
142 /* empty vmdq configuration structure. Filled in programatically */
143 static struct rte_eth_conf vmdq_conf_default = {
145 .mq_mode = ETH_MQ_RX_VMDQ_ONLY,
147 .header_split = 0, /**< Header Split disabled */
148 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
149 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
151 * It is necessary for 1G NIC such as I350,
152 * this fixes bug of ipv4 forwarding in guest can't
153 * forward pakets from one virtio dev to another virtio dev.
155 .hw_vlan_strip = 1, /**< VLAN strip enabled. */
156 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
157 .hw_strip_crc = 1, /**< CRC stripped by hardware */
161 .mq_mode = ETH_MQ_TX_NONE,
165 * should be overridden separately in code with
169 .nb_queue_pools = ETH_8_POOLS,
170 .enable_default_pool = 0,
173 .pool_map = {{0, 0},},
178 static unsigned lcore_ids[RTE_MAX_LCORE];
179 static uint8_t ports[RTE_MAX_ETHPORTS];
180 static unsigned num_ports = 0; /**< The number of ports specified in command line */
181 static uint16_t num_pf_queues, num_vmdq_queues;
182 static uint16_t vmdq_pool_base, vmdq_queue_base;
183 static uint16_t queues_per_pool;
185 const uint16_t vlan_tags[] = {
186 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007,
187 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015,
188 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
189 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031,
190 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039,
191 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
192 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
193 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,
196 /* ethernet addresses of ports */
197 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
199 static struct vhost_dev_tailq_list vhost_dev_list =
200 TAILQ_HEAD_INITIALIZER(vhost_dev_list);
202 static struct lcore_info lcore_info[RTE_MAX_LCORE];
204 /* Used for queueing bursts of TX packets. */
208 struct rte_mbuf *m_table[MAX_PKT_BURST];
211 /* TX queue for each data core. */
212 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
214 #define MBUF_TABLE_DRAIN_TSC ((rte_get_tsc_hz() + US_PER_S - 1) \
215 / US_PER_S * BURST_TX_DRAIN_US)
219 * Builds up the correct configuration for VMDQ VLAN pool map
220 * according to the pool & queue limits.
223 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices)
225 struct rte_eth_vmdq_rx_conf conf;
226 struct rte_eth_vmdq_rx_conf *def_conf =
227 &vmdq_conf_default.rx_adv_conf.vmdq_rx_conf;
230 memset(&conf, 0, sizeof(conf));
231 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_devices;
232 conf.nb_pool_maps = num_devices;
233 conf.enable_loop_back = def_conf->enable_loop_back;
234 conf.rx_mode = def_conf->rx_mode;
236 for (i = 0; i < conf.nb_pool_maps; i++) {
237 conf.pool_map[i].vlan_id = vlan_tags[ i ];
238 conf.pool_map[i].pools = (1UL << i);
241 (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
242 (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf,
243 sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
248 * Validate the device number according to the max pool number gotten form
249 * dev_info. If the device number is invalid, give the error message and
250 * return -1. Each device must have its own pool.
253 validate_num_devices(uint32_t max_nb_devices)
255 if (num_devices > max_nb_devices) {
256 RTE_LOG(ERR, VHOST_PORT, "invalid number of devices\n");
263 * Initialises a given port using global settings and with the rx buffers
264 * coming from the mbuf_pool passed as parameter
267 port_init(uint8_t port)
269 struct rte_eth_dev_info dev_info;
270 struct rte_eth_conf port_conf;
271 struct rte_eth_rxconf *rxconf;
272 struct rte_eth_txconf *txconf;
273 int16_t rx_rings, tx_rings;
274 uint16_t rx_ring_size, tx_ring_size;
278 /* The max pool number from dev_info will be used to validate the pool number specified in cmd line */
279 rte_eth_dev_info_get (port, &dev_info);
281 if (dev_info.max_rx_queues > MAX_QUEUES) {
282 rte_exit(EXIT_FAILURE,
283 "please define MAX_QUEUES no less than %u in %s\n",
284 dev_info.max_rx_queues, __FILE__);
287 rxconf = &dev_info.default_rxconf;
288 txconf = &dev_info.default_txconf;
289 rxconf->rx_drop_en = 1;
291 /* Enable vlan offload */
292 txconf->txq_flags &= ~ETH_TXQ_FLAGS_NOVLANOFFL;
294 /*configure the number of supported virtio devices based on VMDQ limits */
295 num_devices = dev_info.max_vmdq_pools;
297 rx_ring_size = RTE_TEST_RX_DESC_DEFAULT;
298 tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
301 * When dequeue zero copy is enabled, guest Tx used vring will be
302 * updated only when corresponding mbuf is freed. Thus, the nb_tx_desc
303 * (tx_ring_size here) must be small enough so that the driver will
304 * hit the free threshold easily and free mbufs timely. Otherwise,
305 * guest Tx vring would be starved.
307 if (dequeue_zero_copy)
310 tx_rings = (uint16_t)rte_lcore_count();
312 retval = validate_num_devices(MAX_DEVICES);
316 /* Get port configuration. */
317 retval = get_eth_conf(&port_conf, num_devices);
320 /* NIC queues are divided into pf queues and vmdq queues. */
321 num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
322 queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
323 num_vmdq_queues = num_devices * queues_per_pool;
324 num_queues = num_pf_queues + num_vmdq_queues;
325 vmdq_queue_base = dev_info.vmdq_queue_base;
326 vmdq_pool_base = dev_info.vmdq_pool_base;
327 printf("pf queue num: %u, configured vmdq pool num: %u, each vmdq pool has %u queues\n",
328 num_pf_queues, num_devices, queues_per_pool);
330 if (port >= rte_eth_dev_count()) return -1;
332 rx_rings = (uint16_t)dev_info.max_rx_queues;
333 /* Configure ethernet device. */
334 retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
336 RTE_LOG(ERR, VHOST_PORT, "Failed to configure port %u: %s.\n",
337 port, strerror(-retval));
341 /* Setup the queues. */
342 for (q = 0; q < rx_rings; q ++) {
343 retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
344 rte_eth_dev_socket_id(port),
348 RTE_LOG(ERR, VHOST_PORT,
349 "Failed to setup rx queue %u of port %u: %s.\n",
350 q, port, strerror(-retval));
354 for (q = 0; q < tx_rings; q ++) {
355 retval = rte_eth_tx_queue_setup(port, q, tx_ring_size,
356 rte_eth_dev_socket_id(port),
359 RTE_LOG(ERR, VHOST_PORT,
360 "Failed to setup tx queue %u of port %u: %s.\n",
361 q, port, strerror(-retval));
366 /* Start the device. */
367 retval = rte_eth_dev_start(port);
369 RTE_LOG(ERR, VHOST_PORT, "Failed to start port %u: %s\n",
370 port, strerror(-retval));
375 rte_eth_promiscuous_enable(port);
377 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
378 RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices);
379 RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
380 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
382 vmdq_ports_eth_addr[port].addr_bytes[0],
383 vmdq_ports_eth_addr[port].addr_bytes[1],
384 vmdq_ports_eth_addr[port].addr_bytes[2],
385 vmdq_ports_eth_addr[port].addr_bytes[3],
386 vmdq_ports_eth_addr[port].addr_bytes[4],
387 vmdq_ports_eth_addr[port].addr_bytes[5]);
393 * Set socket file path.
396 us_vhost_parse_socket_path(const char *q_arg)
398 /* parse number string */
399 if (strnlen(q_arg, PATH_MAX) == PATH_MAX)
402 socket_files = realloc(socket_files, PATH_MAX * (nb_sockets + 1));
403 snprintf(socket_files + nb_sockets * PATH_MAX, PATH_MAX, "%s", q_arg);
410 * Parse the portmask provided at run time.
413 parse_portmask(const char *portmask)
420 /* parse hexadecimal string */
421 pm = strtoul(portmask, &end, 16);
422 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
433 * Parse num options at run time.
436 parse_num_opt(const char *q_arg, uint32_t max_valid_value)
443 /* parse unsigned int string */
444 num = strtoul(q_arg, &end, 10);
445 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
448 if (num > max_valid_value)
459 us_vhost_usage(const char *prgname)
461 RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n"
463 " --rx_retry [0|1] --mergeable [0|1] --stats [0-N]\n"
464 " --socket-file <path>\n"
466 " -p PORTMASK: Set mask for ports to be used by application\n"
467 " --vm2vm [0|1|2]: disable/software(default)/hardware vm2vm comms\n"
468 " --rx-retry [0|1]: disable/enable(default) retries on rx. Enable retry if destintation queue is full\n"
469 " --rx-retry-delay [0-N]: timeout(in usecond) between retries on RX. This makes effect only if retries on rx enabled\n"
470 " --rx-retry-num [0-N]: the number of retries on rx. This makes effect only if retries on rx enabled\n"
471 " --mergeable [0|1]: disable(default)/enable RX mergeable buffers\n"
472 " --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n"
473 " --socket-file: The path of the socket file.\n"
474 " --tx-csum [0|1] disable/enable TX checksum offload.\n"
475 " --tso [0|1] disable/enable TCP segment offload.\n"
476 " --client register a vhost-user socket as client mode.\n"
477 " --dequeue-zero-copy enables dequeue zero copy\n",
482 * Parse the arguments given in the command line of the application.
485 us_vhost_parse_args(int argc, char **argv)
490 const char *prgname = argv[0];
491 static struct option long_option[] = {
492 {"vm2vm", required_argument, NULL, 0},
493 {"rx-retry", required_argument, NULL, 0},
494 {"rx-retry-delay", required_argument, NULL, 0},
495 {"rx-retry-num", required_argument, NULL, 0},
496 {"mergeable", required_argument, NULL, 0},
497 {"stats", required_argument, NULL, 0},
498 {"socket-file", required_argument, NULL, 0},
499 {"tx-csum", required_argument, NULL, 0},
500 {"tso", required_argument, NULL, 0},
501 {"client", no_argument, &client_mode, 1},
502 {"dequeue-zero-copy", no_argument, &dequeue_zero_copy, 1},
503 {"builtin-net-driver", no_argument, &builtin_net_driver, 1},
507 /* Parse command line */
508 while ((opt = getopt_long(argc, argv, "p:P",
509 long_option, &option_index)) != EOF) {
513 enabled_port_mask = parse_portmask(optarg);
514 if (enabled_port_mask == 0) {
515 RTE_LOG(INFO, VHOST_CONFIG, "Invalid portmask\n");
516 us_vhost_usage(prgname);
523 vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.rx_mode =
524 ETH_VMDQ_ACCEPT_BROADCAST |
525 ETH_VMDQ_ACCEPT_MULTICAST;
530 /* Enable/disable vm2vm comms. */
531 if (!strncmp(long_option[option_index].name, "vm2vm",
533 ret = parse_num_opt(optarg, (VM2VM_LAST - 1));
535 RTE_LOG(INFO, VHOST_CONFIG,
536 "Invalid argument for "
538 us_vhost_usage(prgname);
541 vm2vm_mode = (vm2vm_type)ret;
545 /* Enable/disable retries on RX. */
546 if (!strncmp(long_option[option_index].name, "rx-retry", MAX_LONG_OPT_SZ)) {
547 ret = parse_num_opt(optarg, 1);
549 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry [0|1]\n");
550 us_vhost_usage(prgname);
557 /* Enable/disable TX checksum offload. */
558 if (!strncmp(long_option[option_index].name, "tx-csum", MAX_LONG_OPT_SZ)) {
559 ret = parse_num_opt(optarg, 1);
561 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tx-csum [0|1]\n");
562 us_vhost_usage(prgname);
565 enable_tx_csum = ret;
568 /* Enable/disable TSO offload. */
569 if (!strncmp(long_option[option_index].name, "tso", MAX_LONG_OPT_SZ)) {
570 ret = parse_num_opt(optarg, 1);
572 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tso [0|1]\n");
573 us_vhost_usage(prgname);
579 /* Specify the retries delay time (in useconds) on RX. */
580 if (!strncmp(long_option[option_index].name, "rx-retry-delay", MAX_LONG_OPT_SZ)) {
581 ret = parse_num_opt(optarg, INT32_MAX);
583 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-delay [0-N]\n");
584 us_vhost_usage(prgname);
587 burst_rx_delay_time = ret;
591 /* Specify the retries number on RX. */
592 if (!strncmp(long_option[option_index].name, "rx-retry-num", MAX_LONG_OPT_SZ)) {
593 ret = parse_num_opt(optarg, INT32_MAX);
595 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-num [0-N]\n");
596 us_vhost_usage(prgname);
599 burst_rx_retry_num = ret;
603 /* Enable/disable RX mergeable buffers. */
604 if (!strncmp(long_option[option_index].name, "mergeable", MAX_LONG_OPT_SZ)) {
605 ret = parse_num_opt(optarg, 1);
607 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for mergeable [0|1]\n");
608 us_vhost_usage(prgname);
613 vmdq_conf_default.rxmode.jumbo_frame = 1;
614 vmdq_conf_default.rxmode.max_rx_pkt_len
615 = JUMBO_FRAME_MAX_SIZE;
620 /* Enable/disable stats. */
621 if (!strncmp(long_option[option_index].name, "stats", MAX_LONG_OPT_SZ)) {
622 ret = parse_num_opt(optarg, INT32_MAX);
624 RTE_LOG(INFO, VHOST_CONFIG,
625 "Invalid argument for stats [0..N]\n");
626 us_vhost_usage(prgname);
633 /* Set socket file path. */
634 if (!strncmp(long_option[option_index].name,
635 "socket-file", MAX_LONG_OPT_SZ)) {
636 if (us_vhost_parse_socket_path(optarg) == -1) {
637 RTE_LOG(INFO, VHOST_CONFIG,
638 "Invalid argument for socket name (Max %d characters)\n",
640 us_vhost_usage(prgname);
647 /* Invalid option - print options. */
649 us_vhost_usage(prgname);
654 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
655 if (enabled_port_mask & (1 << i))
656 ports[num_ports++] = (uint8_t)i;
659 if ((num_ports == 0) || (num_ports > MAX_SUP_PORTS)) {
660 RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
661 "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
669 * Update the global var NUM_PORTS and array PORTS according to system ports number
670 * and return valid ports number
672 static unsigned check_ports_num(unsigned nb_ports)
674 unsigned valid_num_ports = num_ports;
677 if (num_ports > nb_ports) {
678 RTE_LOG(INFO, VHOST_PORT, "\nSpecified port number(%u) exceeds total system port number(%u)\n",
679 num_ports, nb_ports);
680 num_ports = nb_ports;
683 for (portid = 0; portid < num_ports; portid ++) {
684 if (ports[portid] >= nb_ports) {
685 RTE_LOG(INFO, VHOST_PORT, "\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
686 ports[portid], (nb_ports - 1));
687 ports[portid] = INVALID_PORT_ID;
691 return valid_num_ports;
694 static __rte_always_inline struct vhost_dev *
695 find_vhost_dev(struct ether_addr *mac)
697 struct vhost_dev *vdev;
699 TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
700 if (vdev->ready == DEVICE_RX &&
701 is_same_ether_addr(mac, &vdev->mac_address))
709 * This function learns the MAC address of the device and registers this along with a
710 * vlan tag to a VMDQ.
713 link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m)
715 struct ether_hdr *pkt_hdr;
718 /* Learn MAC address of guest device from packet */
719 pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
721 if (find_vhost_dev(&pkt_hdr->s_addr)) {
722 RTE_LOG(ERR, VHOST_DATA,
723 "(%d) device is using a registered MAC!\n",
728 for (i = 0; i < ETHER_ADDR_LEN; i++)
729 vdev->mac_address.addr_bytes[i] = pkt_hdr->s_addr.addr_bytes[i];
731 /* vlan_tag currently uses the device_id. */
732 vdev->vlan_tag = vlan_tags[vdev->vid];
734 /* Print out VMDQ registration info. */
735 RTE_LOG(INFO, VHOST_DATA,
736 "(%d) mac %02x:%02x:%02x:%02x:%02x:%02x and vlan %d registered\n",
738 vdev->mac_address.addr_bytes[0], vdev->mac_address.addr_bytes[1],
739 vdev->mac_address.addr_bytes[2], vdev->mac_address.addr_bytes[3],
740 vdev->mac_address.addr_bytes[4], vdev->mac_address.addr_bytes[5],
743 /* Register the MAC address. */
744 ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address,
745 (uint32_t)vdev->vid + vmdq_pool_base);
747 RTE_LOG(ERR, VHOST_DATA,
748 "(%d) failed to add device MAC address to VMDQ\n",
751 rte_eth_dev_set_vlan_strip_on_queue(ports[0], vdev->vmdq_rx_q, 1);
753 /* Set device as ready for RX. */
754 vdev->ready = DEVICE_RX;
760 * Removes MAC address and vlan tag from VMDQ. Ensures that nothing is adding buffers to the RX
761 * queue before disabling RX on the device.
764 unlink_vmdq(struct vhost_dev *vdev)
768 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
770 if (vdev->ready == DEVICE_RX) {
771 /*clear MAC and VLAN settings*/
772 rte_eth_dev_mac_addr_remove(ports[0], &vdev->mac_address);
773 for (i = 0; i < 6; i++)
774 vdev->mac_address.addr_bytes[i] = 0;
778 /*Clear out the receive buffers*/
779 rx_count = rte_eth_rx_burst(ports[0],
780 (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
783 for (i = 0; i < rx_count; i++)
784 rte_pktmbuf_free(pkts_burst[i]);
786 rx_count = rte_eth_rx_burst(ports[0],
787 (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
790 vdev->ready = DEVICE_MAC_LEARNING;
794 static __rte_always_inline void
795 virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
800 if (builtin_net_driver) {
801 ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
803 ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
807 rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
808 rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
809 src_vdev->stats.tx_total++;
810 src_vdev->stats.tx += ret;
815 * Check if the packet destination MAC address is for a local device. If so then put
816 * the packet on that devices RX queue. If not then return.
818 static __rte_always_inline int
819 virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
821 struct ether_hdr *pkt_hdr;
822 struct vhost_dev *dst_vdev;
824 pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
826 dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
830 if (vdev->vid == dst_vdev->vid) {
831 RTE_LOG_DP(DEBUG, VHOST_DATA,
832 "(%d) TX: src and dst MAC is same. Dropping packet.\n",
837 RTE_LOG_DP(DEBUG, VHOST_DATA,
838 "(%d) TX: MAC address is local\n", dst_vdev->vid);
840 if (unlikely(dst_vdev->remove)) {
841 RTE_LOG_DP(DEBUG, VHOST_DATA,
842 "(%d) device is marked for removal\n", dst_vdev->vid);
846 virtio_xmit(dst_vdev, vdev, m);
851 * Check if the destination MAC of a packet is one local VM,
852 * and get its vlan tag, and offset if it is.
854 static __rte_always_inline int
855 find_local_dest(struct vhost_dev *vdev, struct rte_mbuf *m,
856 uint32_t *offset, uint16_t *vlan_tag)
858 struct vhost_dev *dst_vdev;
859 struct ether_hdr *pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
861 dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
865 if (vdev->vid == dst_vdev->vid) {
866 RTE_LOG_DP(DEBUG, VHOST_DATA,
867 "(%d) TX: src and dst MAC is same. Dropping packet.\n",
873 * HW vlan strip will reduce the packet length
874 * by minus length of vlan tag, so need restore
875 * the packet length by plus it.
878 *vlan_tag = vlan_tags[vdev->vid];
880 RTE_LOG_DP(DEBUG, VHOST_DATA,
881 "(%d) TX: pkt to local VM device id: (%d), vlan tag: %u.\n",
882 vdev->vid, dst_vdev->vid, *vlan_tag);
888 get_psd_sum(void *l3_hdr, uint64_t ol_flags)
890 if (ol_flags & PKT_TX_IPV4)
891 return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
892 else /* assume ethertype == ETHER_TYPE_IPv6 */
893 return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
896 static void virtio_tx_offload(struct rte_mbuf *m)
899 struct ipv4_hdr *ipv4_hdr = NULL;
900 struct tcp_hdr *tcp_hdr = NULL;
901 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
903 l3_hdr = (char *)eth_hdr + m->l2_len;
905 if (m->ol_flags & PKT_TX_IPV4) {
907 ipv4_hdr->hdr_checksum = 0;
908 m->ol_flags |= PKT_TX_IP_CKSUM;
911 tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + m->l3_len);
912 tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
916 free_pkts(struct rte_mbuf **pkts, uint16_t n)
919 rte_pktmbuf_free(pkts[n]);
922 static __rte_always_inline void
923 do_drain_mbuf_table(struct mbuf_table *tx_q)
927 count = rte_eth_tx_burst(ports[0], tx_q->txq_id,
928 tx_q->m_table, tx_q->len);
929 if (unlikely(count < tx_q->len))
930 free_pkts(&tx_q->m_table[count], tx_q->len - count);
936 * This function routes the TX packet to the correct interface. This
937 * may be a local device or the physical port.
939 static __rte_always_inline void
940 virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
942 struct mbuf_table *tx_q;
944 const uint16_t lcore_id = rte_lcore_id();
945 struct ether_hdr *nh;
948 nh = rte_pktmbuf_mtod(m, struct ether_hdr *);
949 if (unlikely(is_broadcast_ether_addr(&nh->d_addr))) {
950 struct vhost_dev *vdev2;
952 TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {
953 virtio_xmit(vdev2, vdev, m);
958 /*check if destination is local VM*/
959 if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) {
964 if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
965 if (unlikely(find_local_dest(vdev, m, &offset,
972 RTE_LOG_DP(DEBUG, VHOST_DATA,
973 "(%d) TX: MAC address is external\n", vdev->vid);
977 /*Add packet to the port tx queue*/
978 tx_q = &lcore_tx_queue[lcore_id];
980 nh = rte_pktmbuf_mtod(m, struct ether_hdr *);
981 if (unlikely(nh->ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN))) {
982 /* Guest has inserted the vlan tag. */
983 struct vlan_hdr *vh = (struct vlan_hdr *) (nh + 1);
984 uint16_t vlan_tag_be = rte_cpu_to_be_16(vlan_tag);
985 if ((vm2vm_mode == VM2VM_HARDWARE) &&
986 (vh->vlan_tci != vlan_tag_be))
987 vh->vlan_tci = vlan_tag_be;
989 m->ol_flags |= PKT_TX_VLAN_PKT;
992 * Find the right seg to adjust the data len when offset is
993 * bigger than tail room size.
995 if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
996 if (likely(offset <= rte_pktmbuf_tailroom(m)))
997 m->data_len += offset;
999 struct rte_mbuf *seg = m;
1001 while ((seg->next != NULL) &&
1002 (offset > rte_pktmbuf_tailroom(seg)))
1005 seg->data_len += offset;
1007 m->pkt_len += offset;
1010 m->vlan_tci = vlan_tag;
1013 if (m->ol_flags & PKT_TX_TCP_SEG)
1014 virtio_tx_offload(m);
1016 tx_q->m_table[tx_q->len++] = m;
1018 vdev->stats.tx_total++;
1022 if (unlikely(tx_q->len == MAX_PKT_BURST))
1023 do_drain_mbuf_table(tx_q);
1027 static __rte_always_inline void
1028 drain_mbuf_table(struct mbuf_table *tx_q)
1030 static uint64_t prev_tsc;
1036 cur_tsc = rte_rdtsc();
1037 if (unlikely(cur_tsc - prev_tsc > MBUF_TABLE_DRAIN_TSC)) {
1040 RTE_LOG_DP(DEBUG, VHOST_DATA,
1041 "TX queue drained after timeout with burst size %u\n",
1043 do_drain_mbuf_table(tx_q);
1047 static __rte_always_inline void
1048 drain_eth_rx(struct vhost_dev *vdev)
1050 uint16_t rx_count, enqueue_count;
1051 struct rte_mbuf *pkts[MAX_PKT_BURST];
1053 rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
1054 pkts, MAX_PKT_BURST);
1059 * When "enable_retry" is set, here we wait and retry when there
1060 * is no enough free slots in the queue to hold @rx_count packets,
1061 * to diminish packet loss.
1064 unlikely(rx_count > rte_vhost_avail_entries(vdev->vid,
1068 for (retry = 0; retry < burst_rx_retry_num; retry++) {
1069 rte_delay_us(burst_rx_delay_time);
1070 if (rx_count <= rte_vhost_avail_entries(vdev->vid,
1076 if (builtin_net_driver) {
1077 enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
1080 enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
1084 rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
1085 rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
1088 free_pkts(pkts, rx_count);
1091 static __rte_always_inline void
1092 drain_virtio_tx(struct vhost_dev *vdev)
1094 struct rte_mbuf *pkts[MAX_PKT_BURST];
1098 if (builtin_net_driver) {
1099 count = vs_dequeue_pkts(vdev, VIRTIO_TXQ, mbuf_pool,
1100 pkts, MAX_PKT_BURST);
1102 count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ,
1103 mbuf_pool, pkts, MAX_PKT_BURST);
1106 /* setup VMDq for the first packet */
1107 if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count) {
1108 if (vdev->remove || link_vmdq(vdev, pkts[0]) == -1)
1109 free_pkts(pkts, count);
1112 for (i = 0; i < count; ++i)
1113 virtio_tx_route(vdev, pkts[i], vlan_tags[vdev->vid]);
1117 * Main function of vhost-switch. It basically does:
1119 * for each vhost device {
1122 * Which drains the host eth Rx queue linked to the vhost device,
1123 * and deliver all of them to guest virito Rx ring associated with
1124 * this vhost device.
1126 * - drain_virtio_tx()
1128 * Which drains the guest virtio Tx queue and deliver all of them
1129 * to the target, which could be another vhost device, or the
1130 * physical eth dev. The route is done in function "virtio_tx_route".
1134 switch_worker(void *arg __rte_unused)
1137 unsigned lcore_id = rte_lcore_id();
1138 struct vhost_dev *vdev;
1139 struct mbuf_table *tx_q;
1141 RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id);
1143 tx_q = &lcore_tx_queue[lcore_id];
1144 for (i = 0; i < rte_lcore_count(); i++) {
1145 if (lcore_ids[i] == lcore_id) {
1152 drain_mbuf_table(tx_q);
1155 * Inform the configuration core that we have exited the
1156 * linked list and that no devices are in use if requested.
1158 if (lcore_info[lcore_id].dev_removal_flag == REQUEST_DEV_REMOVAL)
1159 lcore_info[lcore_id].dev_removal_flag = ACK_DEV_REMOVAL;
1162 * Process vhost devices
1164 TAILQ_FOREACH(vdev, &lcore_info[lcore_id].vdev_list,
1166 if (unlikely(vdev->remove)) {
1168 vdev->ready = DEVICE_SAFE_REMOVE;
1172 if (likely(vdev->ready == DEVICE_RX))
1175 if (likely(!vdev->remove))
1176 drain_virtio_tx(vdev);
1184 * Remove a device from the specific data core linked list and from the
1185 * main linked list. Synchonization occurs through the use of the
1186 * lcore dev_removal_flag. Device is made volatile here to avoid re-ordering
1187 * of dev->remove=1 which can cause an infinite loop in the rte_pause loop.
1190 destroy_device(int vid)
1192 struct vhost_dev *vdev = NULL;
1195 TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
1196 if (vdev->vid == vid)
1201 /*set the remove flag. */
1203 while(vdev->ready != DEVICE_SAFE_REMOVE) {
1207 if (builtin_net_driver)
1208 vs_vhost_net_remove(vdev);
1210 TAILQ_REMOVE(&lcore_info[vdev->coreid].vdev_list, vdev,
1212 TAILQ_REMOVE(&vhost_dev_list, vdev, global_vdev_entry);
1215 /* Set the dev_removal_flag on each lcore. */
1216 RTE_LCORE_FOREACH_SLAVE(lcore)
1217 lcore_info[lcore].dev_removal_flag = REQUEST_DEV_REMOVAL;
1220 * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL
1221 * we can be sure that they can no longer access the device removed
1222 * from the linked lists and that the devices are no longer in use.
1224 RTE_LCORE_FOREACH_SLAVE(lcore) {
1225 while (lcore_info[lcore].dev_removal_flag != ACK_DEV_REMOVAL)
1229 lcore_info[vdev->coreid].device_num--;
1231 RTE_LOG(INFO, VHOST_DATA,
1232 "(%d) device has been removed from data core\n",
1239 * A new device is added to a data core. First the device is added to the main linked list
1240 * and the allocated to a specific data core.
1245 int lcore, core_add = 0;
1246 uint32_t device_num_min = num_devices;
1247 struct vhost_dev *vdev;
1249 vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
1251 RTE_LOG(INFO, VHOST_DATA,
1252 "(%d) couldn't allocate memory for vhost dev\n",
1258 if (builtin_net_driver)
1259 vs_vhost_net_setup(vdev);
1261 TAILQ_INSERT_TAIL(&vhost_dev_list, vdev, global_vdev_entry);
1262 vdev->vmdq_rx_q = vid * queues_per_pool + vmdq_queue_base;
1264 /*reset ready flag*/
1265 vdev->ready = DEVICE_MAC_LEARNING;
1268 /* Find a suitable lcore to add the device. */
1269 RTE_LCORE_FOREACH_SLAVE(lcore) {
1270 if (lcore_info[lcore].device_num < device_num_min) {
1271 device_num_min = lcore_info[lcore].device_num;
1275 vdev->coreid = core_add;
1277 TAILQ_INSERT_TAIL(&lcore_info[vdev->coreid].vdev_list, vdev,
1279 lcore_info[vdev->coreid].device_num++;
1281 /* Disable notifications. */
1282 rte_vhost_enable_guest_notification(vid, VIRTIO_RXQ, 0);
1283 rte_vhost_enable_guest_notification(vid, VIRTIO_TXQ, 0);
1285 RTE_LOG(INFO, VHOST_DATA,
1286 "(%d) device has been added to data core %d\n",
1293 * These callback allow devices to be added to the data core when configuration
1294 * has been fully complete.
1296 static const struct vhost_device_ops virtio_net_device_ops =
1298 .new_device = new_device,
1299 .destroy_device = destroy_device,
1303 * This is a thread will wake up after a period to print stats if the user has
1309 struct vhost_dev *vdev;
1310 uint64_t tx_dropped, rx_dropped;
1311 uint64_t tx, tx_total, rx, rx_total;
1312 const char clr[] = { 27, '[', '2', 'J', '\0' };
1313 const char top_left[] = { 27, '[', '1', ';', '1', 'H','\0' };
1316 sleep(enable_stats);
1318 /* Clear screen and move to top left */
1319 printf("%s%s\n", clr, top_left);
1320 printf("Device statistics =================================\n");
1322 TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
1323 tx_total = vdev->stats.tx_total;
1324 tx = vdev->stats.tx;
1325 tx_dropped = tx_total - tx;
1327 rx_total = rte_atomic64_read(&vdev->stats.rx_total_atomic);
1328 rx = rte_atomic64_read(&vdev->stats.rx_atomic);
1329 rx_dropped = rx_total - rx;
1331 printf("Statistics for device %d\n"
1332 "-----------------------\n"
1333 "TX total: %" PRIu64 "\n"
1334 "TX dropped: %" PRIu64 "\n"
1335 "TX successful: %" PRIu64 "\n"
1336 "RX total: %" PRIu64 "\n"
1337 "RX dropped: %" PRIu64 "\n"
1338 "RX successful: %" PRIu64 "\n",
1340 tx_total, tx_dropped, tx,
1341 rx_total, rx_dropped, rx);
1344 printf("===================================================\n");
1349 unregister_drivers(int socket_num)
1353 for (i = 0; i < socket_num; i++) {
1354 ret = rte_vhost_driver_unregister(socket_files + i * PATH_MAX);
1356 RTE_LOG(ERR, VHOST_CONFIG,
1357 "Fail to unregister vhost driver for %s.\n",
1358 socket_files + i * PATH_MAX);
1362 /* When we receive a INT signal, unregister vhost driver */
1364 sigint_handler(__rte_unused int signum)
1366 /* Unregister vhost driver. */
1367 unregister_drivers(nb_sockets);
1373 * While creating an mbuf pool, one key thing is to figure out how
1374 * many mbuf entries is enough for our use. FYI, here are some
1377 * - Each rx queue would reserve @nr_rx_desc mbufs at queue setup stage
1379 * - For each switch core (A CPU core does the packet switch), we need
1380 * also make some reservation for receiving the packets from virtio
1381 * Tx queue. How many is enough depends on the usage. It's normally
1382 * a simple calculation like following:
1384 * MAX_PKT_BURST * max packet size / mbuf size
1386 * So, we definitely need allocate more mbufs when TSO is enabled.
1388 * - Similarly, for each switching core, we should serve @nr_rx_desc
1389 * mbufs for receiving the packets from physical NIC device.
1391 * - We also need make sure, for each switch core, we have allocated
1392 * enough mbufs to fill up the mbuf cache.
1395 create_mbuf_pool(uint16_t nr_port, uint32_t nr_switch_core, uint32_t mbuf_size,
1396 uint32_t nr_queues, uint32_t nr_rx_desc, uint32_t nr_mbuf_cache)
1399 uint32_t nr_mbufs_per_core;
1400 uint32_t mtu = 1500;
1407 nr_mbufs_per_core = (mtu + mbuf_size) * MAX_PKT_BURST /
1408 (mbuf_size - RTE_PKTMBUF_HEADROOM);
1409 nr_mbufs_per_core += nr_rx_desc;
1410 nr_mbufs_per_core = RTE_MAX(nr_mbufs_per_core, nr_mbuf_cache);
1412 nr_mbufs = nr_queues * nr_rx_desc;
1413 nr_mbufs += nr_mbufs_per_core * nr_switch_core;
1414 nr_mbufs *= nr_port;
1416 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", nr_mbufs,
1417 nr_mbuf_cache, 0, mbuf_size,
1419 if (mbuf_pool == NULL)
1420 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
1424 * Main function, does initialisation and calls the per-lcore functions.
1427 main(int argc, char *argv[])
1429 unsigned lcore_id, core_id = 0;
1430 unsigned nb_ports, valid_num_ports;
1433 static pthread_t tid;
1434 char thread_name[RTE_MAX_THREAD_NAME_LEN];
1437 signal(SIGINT, sigint_handler);
1440 ret = rte_eal_init(argc, argv);
1442 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
1446 /* parse app arguments */
1447 ret = us_vhost_parse_args(argc, argv);
1449 rte_exit(EXIT_FAILURE, "Invalid argument\n");
1451 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1452 TAILQ_INIT(&lcore_info[lcore_id].vdev_list);
1454 if (rte_lcore_is_enabled(lcore_id))
1455 lcore_ids[core_id++] = lcore_id;
1458 if (rte_lcore_count() > RTE_MAX_LCORE)
1459 rte_exit(EXIT_FAILURE,"Not enough cores\n");
1461 /* Get the number of physical ports. */
1462 nb_ports = rte_eth_dev_count();
1465 * Update the global var NUM_PORTS and global array PORTS
1466 * and get value of var VALID_NUM_PORTS according to system ports number
1468 valid_num_ports = check_ports_num(nb_ports);
1470 if ((valid_num_ports == 0) || (valid_num_ports > MAX_SUP_PORTS)) {
1471 RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
1472 "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
1477 * FIXME: here we are trying to allocate mbufs big enough for
1478 * @MAX_QUEUES, but the truth is we're never going to use that
1479 * many queues here. We probably should only do allocation for
1480 * those queues we are going to use.
1482 create_mbuf_pool(valid_num_ports, rte_lcore_count() - 1, MBUF_DATA_SIZE,
1483 MAX_QUEUES, RTE_TEST_RX_DESC_DEFAULT, MBUF_CACHE_SIZE);
1485 if (vm2vm_mode == VM2VM_HARDWARE) {
1486 /* Enable VT loop back to let L2 switch to do it. */
1487 vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1;
1488 RTE_LOG(DEBUG, VHOST_CONFIG,
1489 "Enable loop back for L2 switch in vmdq.\n");
1492 /* initialize all ports */
1493 for (portid = 0; portid < nb_ports; portid++) {
1494 /* skip ports that are not enabled */
1495 if ((enabled_port_mask & (1 << portid)) == 0) {
1496 RTE_LOG(INFO, VHOST_PORT,
1497 "Skipping disabled port %d\n", portid);
1500 if (port_init(portid) != 0)
1501 rte_exit(EXIT_FAILURE,
1502 "Cannot initialize network ports\n");
1505 /* Enable stats if the user option is set. */
1507 ret = pthread_create(&tid, NULL, (void *)print_stats, NULL);
1509 rte_exit(EXIT_FAILURE,
1510 "Cannot create print-stats thread\n");
1512 /* Set thread_name for aid in debugging. */
1513 snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "print-stats");
1514 ret = rte_thread_setname(tid, thread_name);
1516 RTE_LOG(DEBUG, VHOST_CONFIG,
1517 "Cannot set print-stats name\n");
1520 /* Launch all data cores. */
1521 RTE_LCORE_FOREACH_SLAVE(lcore_id)
1522 rte_eal_remote_launch(switch_worker, NULL, lcore_id);
1525 flags |= RTE_VHOST_USER_CLIENT;
1527 if (dequeue_zero_copy)
1528 flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
1530 /* Register vhost user driver to handle vhost messages. */
1531 for (i = 0; i < nb_sockets; i++) {
1532 char *file = socket_files + i * PATH_MAX;
1533 ret = rte_vhost_driver_register(file, flags);
1535 unregister_drivers(i);
1536 rte_exit(EXIT_FAILURE,
1537 "vhost driver register failure.\n");
1540 if (builtin_net_driver)
1541 rte_vhost_driver_set_features(file, VIRTIO_NET_FEATURES);
1543 if (mergeable == 0) {
1544 rte_vhost_driver_disable_features(file,
1545 1ULL << VIRTIO_NET_F_MRG_RXBUF);
1548 if (enable_tx_csum == 0) {
1549 rte_vhost_driver_disable_features(file,
1550 1ULL << VIRTIO_NET_F_CSUM);
1553 if (enable_tso == 0) {
1554 rte_vhost_driver_disable_features(file,
1555 1ULL << VIRTIO_NET_F_HOST_TSO4);
1556 rte_vhost_driver_disable_features(file,
1557 1ULL << VIRTIO_NET_F_HOST_TSO6);
1558 rte_vhost_driver_disable_features(file,
1559 1ULL << VIRTIO_NET_F_GUEST_TSO4);
1560 rte_vhost_driver_disable_features(file,
1561 1ULL << VIRTIO_NET_F_GUEST_TSO6);
1565 rte_vhost_driver_enable_features(file,
1566 1ULL << VIRTIO_NET_F_CTRL_RX);
1569 ret = rte_vhost_driver_callback_register(file,
1570 &virtio_net_device_ops);
1572 rte_exit(EXIT_FAILURE,
1573 "failed to register vhost driver callbacks.\n");
1576 if (rte_vhost_driver_start(file) < 0) {
1577 rte_exit(EXIT_FAILURE,
1578 "failed to start vhost driver.\n");
1582 RTE_LCORE_FOREACH_SLAVE(lcore_id)
1583 rte_eal_wait_lcore(lcore_id);