mbuf: add rte prefix to offload flags
[dpdk.git] / examples / vhost / main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <arpa/inet.h>
6 #include <getopt.h>
7 #include <linux/if_ether.h>
8 #include <linux/if_vlan.h>
9 #include <linux/virtio_net.h>
10 #include <linux/virtio_ring.h>
11 #include <signal.h>
12 #include <stdint.h>
13 #include <sys/eventfd.h>
14 #include <sys/param.h>
15 #include <unistd.h>
16
17 #include <rte_cycles.h>
18 #include <rte_ethdev.h>
19 #include <rte_log.h>
20 #include <rte_string_fns.h>
21 #include <rte_malloc.h>
22 #include <rte_net.h>
23 #include <rte_vhost.h>
24 #include <rte_ip.h>
25 #include <rte_tcp.h>
26 #include <rte_pause.h>
27
28 #include "ioat.h"
29 #include "main.h"
30
31 #ifndef MAX_QUEUES
32 #define MAX_QUEUES 128
33 #endif
34
35 /* the maximum number of external ports supported */
36 #define MAX_SUP_PORTS 1
37
38 #define MBUF_CACHE_SIZE 128
39 #define MBUF_DATA_SIZE  RTE_MBUF_DEFAULT_BUF_SIZE
40
41 #define BURST_TX_DRAIN_US 100   /* TX drain every ~100us */
42
43 #define BURST_RX_WAIT_US 15     /* Defines how long we wait between retries on RX */
44 #define BURST_RX_RETRIES 4              /* Number of retries on RX. */
45
46 #define JUMBO_FRAME_MAX_SIZE    0x2600
47
48 /* State of virtio device. */
49 #define DEVICE_MAC_LEARNING 0
50 #define DEVICE_RX                       1
51 #define DEVICE_SAFE_REMOVE      2
52
53 /* Configurable number of RX/TX ring descriptors */
54 #define RTE_TEST_RX_DESC_DEFAULT 1024
55 #define RTE_TEST_TX_DESC_DEFAULT 512
56
57 #define INVALID_PORT_ID 0xFF
58
59 /* mask of enabled ports */
60 static uint32_t enabled_port_mask = 0;
61
62 /* Promiscuous mode */
63 static uint32_t promiscuous;
64
65 /* number of devices/queues to support*/
66 static uint32_t num_queues = 0;
67 static uint32_t num_devices;
68
69 static struct rte_mempool *mbuf_pool;
70 static int mergeable;
71
72 /* Enable VM2VM communications. If this is disabled then the MAC address compare is skipped. */
73 typedef enum {
74         VM2VM_DISABLED = 0,
75         VM2VM_SOFTWARE = 1,
76         VM2VM_HARDWARE = 2,
77         VM2VM_LAST
78 } vm2vm_type;
79 static vm2vm_type vm2vm_mode = VM2VM_SOFTWARE;
80
81 /* Enable stats. */
82 static uint32_t enable_stats = 0;
83 /* Enable retries on RX. */
84 static uint32_t enable_retry = 1;
85
86 /* Disable TX checksum offload */
87 static uint32_t enable_tx_csum;
88
89 /* Disable TSO offload */
90 static uint32_t enable_tso;
91
92 static int client_mode;
93
94 static int builtin_net_driver;
95
96 static int async_vhost_driver;
97
98 static char *dma_type;
99
100 /* Specify timeout (in useconds) between retries on RX. */
101 static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
102 /* Specify the number of retries on RX. */
103 static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
104
105 /* Socket file paths. Can be set by user */
106 static char *socket_files;
107 static int nb_sockets;
108
109 /* empty vmdq configuration structure. Filled in programatically */
110 static struct rte_eth_conf vmdq_conf_default = {
111         .rxmode = {
112                 .mq_mode        = ETH_MQ_RX_VMDQ_ONLY,
113                 .split_hdr_size = 0,
114                 /*
115                  * VLAN strip is necessary for 1G NIC such as I350,
116                  * this fixes bug of ipv4 forwarding in guest can't
117                  * forward pakets from one virtio dev to another virtio dev.
118                  */
119                 .offloads = DEV_RX_OFFLOAD_VLAN_STRIP,
120         },
121
122         .txmode = {
123                 .mq_mode = ETH_MQ_TX_NONE,
124                 .offloads = (DEV_TX_OFFLOAD_IPV4_CKSUM |
125                              DEV_TX_OFFLOAD_TCP_CKSUM |
126                              DEV_TX_OFFLOAD_VLAN_INSERT |
127                              DEV_TX_OFFLOAD_MULTI_SEGS |
128                              DEV_TX_OFFLOAD_TCP_TSO),
129         },
130         .rx_adv_conf = {
131                 /*
132                  * should be overridden separately in code with
133                  * appropriate values
134                  */
135                 .vmdq_rx_conf = {
136                         .nb_queue_pools = ETH_8_POOLS,
137                         .enable_default_pool = 0,
138                         .default_pool = 0,
139                         .nb_pool_maps = 0,
140                         .pool_map = {{0, 0},},
141                 },
142         },
143 };
144
145
146 static unsigned lcore_ids[RTE_MAX_LCORE];
147 static uint16_t ports[RTE_MAX_ETHPORTS];
148 static unsigned num_ports = 0; /**< The number of ports specified in command line */
149 static uint16_t num_pf_queues, num_vmdq_queues;
150 static uint16_t vmdq_pool_base, vmdq_queue_base;
151 static uint16_t queues_per_pool;
152
153 const uint16_t vlan_tags[] = {
154         1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007,
155         1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015,
156         1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
157         1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031,
158         1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039,
159         1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
160         1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
161         1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,
162 };
163
164 /* ethernet addresses of ports */
165 static struct rte_ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
166
167 static struct vhost_dev_tailq_list vhost_dev_list =
168         TAILQ_HEAD_INITIALIZER(vhost_dev_list);
169
170 static struct lcore_info lcore_info[RTE_MAX_LCORE];
171
172 /* Used for queueing bursts of TX packets. */
173 struct mbuf_table {
174         unsigned len;
175         unsigned txq_id;
176         struct rte_mbuf *m_table[MAX_PKT_BURST];
177 };
178
179 struct vhost_bufftable {
180         uint32_t len;
181         uint64_t pre_tsc;
182         struct rte_mbuf *m_table[MAX_PKT_BURST];
183 };
184
185 /* TX queue for each data core. */
186 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
187
188 /*
189  * Vhost TX buffer for each data core.
190  * Every data core maintains a TX buffer for every vhost device,
191  * which is used for batch pkts enqueue for higher performance.
192  */
193 struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE * MAX_VHOST_DEVICE];
194
195 #define MBUF_TABLE_DRAIN_TSC    ((rte_get_tsc_hz() + US_PER_S - 1) \
196                                  / US_PER_S * BURST_TX_DRAIN_US)
197 #define VLAN_HLEN       4
198
199 static inline int
200 open_dma(const char *value)
201 {
202         if (dma_type != NULL && strncmp(dma_type, "ioat", 4) == 0)
203                 return open_ioat(value);
204
205         return -1;
206 }
207
208 /*
209  * Builds up the correct configuration for VMDQ VLAN pool map
210  * according to the pool & queue limits.
211  */
212 static inline int
213 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices)
214 {
215         struct rte_eth_vmdq_rx_conf conf;
216         struct rte_eth_vmdq_rx_conf *def_conf =
217                 &vmdq_conf_default.rx_adv_conf.vmdq_rx_conf;
218         unsigned i;
219
220         memset(&conf, 0, sizeof(conf));
221         conf.nb_queue_pools = (enum rte_eth_nb_pools)num_devices;
222         conf.nb_pool_maps = num_devices;
223         conf.enable_loop_back = def_conf->enable_loop_back;
224         conf.rx_mode = def_conf->rx_mode;
225
226         for (i = 0; i < conf.nb_pool_maps; i++) {
227                 conf.pool_map[i].vlan_id = vlan_tags[ i ];
228                 conf.pool_map[i].pools = (1UL << i);
229         }
230
231         (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
232         (void)(rte_memcpy(&eth_conf->rx_adv_conf.vmdq_rx_conf, &conf,
233                    sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
234         return 0;
235 }
236
237 /*
238  * Initialises a given port using global settings and with the rx buffers
239  * coming from the mbuf_pool passed as parameter
240  */
241 static inline int
242 port_init(uint16_t port)
243 {
244         struct rte_eth_dev_info dev_info;
245         struct rte_eth_conf port_conf;
246         struct rte_eth_rxconf *rxconf;
247         struct rte_eth_txconf *txconf;
248         int16_t rx_rings, tx_rings;
249         uint16_t rx_ring_size, tx_ring_size;
250         int retval;
251         uint16_t q;
252
253         /* The max pool number from dev_info will be used to validate the pool number specified in cmd line */
254         retval = rte_eth_dev_info_get(port, &dev_info);
255         if (retval != 0) {
256                 RTE_LOG(ERR, VHOST_PORT,
257                         "Error during getting device (port %u) info: %s\n",
258                         port, strerror(-retval));
259
260                 return retval;
261         }
262
263         rxconf = &dev_info.default_rxconf;
264         txconf = &dev_info.default_txconf;
265         rxconf->rx_drop_en = 1;
266
267         /*configure the number of supported virtio devices based on VMDQ limits */
268         num_devices = dev_info.max_vmdq_pools;
269
270         rx_ring_size = RTE_TEST_RX_DESC_DEFAULT;
271         tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
272
273         tx_rings = (uint16_t)rte_lcore_count();
274
275         /* Get port configuration. */
276         retval = get_eth_conf(&port_conf, num_devices);
277         if (retval < 0)
278                 return retval;
279         /* NIC queues are divided into pf queues and vmdq queues.  */
280         num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
281         queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
282         num_vmdq_queues = num_devices * queues_per_pool;
283         num_queues = num_pf_queues + num_vmdq_queues;
284         vmdq_queue_base = dev_info.vmdq_queue_base;
285         vmdq_pool_base  = dev_info.vmdq_pool_base;
286         printf("pf queue num: %u, configured vmdq pool num: %u, each vmdq pool has %u queues\n",
287                 num_pf_queues, num_devices, queues_per_pool);
288
289         if (!rte_eth_dev_is_valid_port(port))
290                 return -1;
291
292         rx_rings = (uint16_t)dev_info.max_rx_queues;
293         if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
294                 port_conf.txmode.offloads |=
295                         DEV_TX_OFFLOAD_MBUF_FAST_FREE;
296         /* Configure ethernet device. */
297         retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
298         if (retval != 0) {
299                 RTE_LOG(ERR, VHOST_PORT, "Failed to configure port %u: %s.\n",
300                         port, strerror(-retval));
301                 return retval;
302         }
303
304         retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rx_ring_size,
305                 &tx_ring_size);
306         if (retval != 0) {
307                 RTE_LOG(ERR, VHOST_PORT, "Failed to adjust number of descriptors "
308                         "for port %u: %s.\n", port, strerror(-retval));
309                 return retval;
310         }
311         if (rx_ring_size > RTE_TEST_RX_DESC_DEFAULT) {
312                 RTE_LOG(ERR, VHOST_PORT, "Mbuf pool has an insufficient size "
313                         "for Rx queues on port %u.\n", port);
314                 return -1;
315         }
316
317         /* Setup the queues. */
318         rxconf->offloads = port_conf.rxmode.offloads;
319         for (q = 0; q < rx_rings; q ++) {
320                 retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
321                                                 rte_eth_dev_socket_id(port),
322                                                 rxconf,
323                                                 mbuf_pool);
324                 if (retval < 0) {
325                         RTE_LOG(ERR, VHOST_PORT,
326                                 "Failed to setup rx queue %u of port %u: %s.\n",
327                                 q, port, strerror(-retval));
328                         return retval;
329                 }
330         }
331         txconf->offloads = port_conf.txmode.offloads;
332         for (q = 0; q < tx_rings; q ++) {
333                 retval = rte_eth_tx_queue_setup(port, q, tx_ring_size,
334                                                 rte_eth_dev_socket_id(port),
335                                                 txconf);
336                 if (retval < 0) {
337                         RTE_LOG(ERR, VHOST_PORT,
338                                 "Failed to setup tx queue %u of port %u: %s.\n",
339                                 q, port, strerror(-retval));
340                         return retval;
341                 }
342         }
343
344         /* Start the device. */
345         retval  = rte_eth_dev_start(port);
346         if (retval < 0) {
347                 RTE_LOG(ERR, VHOST_PORT, "Failed to start port %u: %s\n",
348                         port, strerror(-retval));
349                 return retval;
350         }
351
352         if (promiscuous) {
353                 retval = rte_eth_promiscuous_enable(port);
354                 if (retval != 0) {
355                         RTE_LOG(ERR, VHOST_PORT,
356                                 "Failed to enable promiscuous mode on port %u: %s\n",
357                                 port, rte_strerror(-retval));
358                         return retval;
359                 }
360         }
361
362         retval = rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
363         if (retval < 0) {
364                 RTE_LOG(ERR, VHOST_PORT,
365                         "Failed to get MAC address on port %u: %s\n",
366                         port, rte_strerror(-retval));
367                 return retval;
368         }
369
370         RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices);
371         RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
372                         " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
373                         port,
374                         vmdq_ports_eth_addr[port].addr_bytes[0],
375                         vmdq_ports_eth_addr[port].addr_bytes[1],
376                         vmdq_ports_eth_addr[port].addr_bytes[2],
377                         vmdq_ports_eth_addr[port].addr_bytes[3],
378                         vmdq_ports_eth_addr[port].addr_bytes[4],
379                         vmdq_ports_eth_addr[port].addr_bytes[5]);
380
381         return 0;
382 }
383
384 /*
385  * Set socket file path.
386  */
387 static int
388 us_vhost_parse_socket_path(const char *q_arg)
389 {
390         char *old;
391
392         /* parse number string */
393         if (strnlen(q_arg, PATH_MAX) == PATH_MAX)
394                 return -1;
395
396         old = socket_files;
397         socket_files = realloc(socket_files, PATH_MAX * (nb_sockets + 1));
398         if (socket_files == NULL) {
399                 free(old);
400                 return -1;
401         }
402
403         strlcpy(socket_files + nb_sockets * PATH_MAX, q_arg, PATH_MAX);
404         nb_sockets++;
405
406         return 0;
407 }
408
409 /*
410  * Parse the portmask provided at run time.
411  */
412 static int
413 parse_portmask(const char *portmask)
414 {
415         char *end = NULL;
416         unsigned long pm;
417
418         errno = 0;
419
420         /* parse hexadecimal string */
421         pm = strtoul(portmask, &end, 16);
422         if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
423                 return 0;
424
425         return pm;
426
427 }
428
429 /*
430  * Parse num options at run time.
431  */
432 static int
433 parse_num_opt(const char *q_arg, uint32_t max_valid_value)
434 {
435         char *end = NULL;
436         unsigned long num;
437
438         errno = 0;
439
440         /* parse unsigned int string */
441         num = strtoul(q_arg, &end, 10);
442         if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
443                 return -1;
444
445         if (num > max_valid_value)
446                 return -1;
447
448         return num;
449
450 }
451
452 /*
453  * Display usage
454  */
455 static void
456 us_vhost_usage(const char *prgname)
457 {
458         RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n"
459         "               --vm2vm [0|1|2]\n"
460         "               --rx_retry [0|1] --mergeable [0|1] --stats [0-N]\n"
461         "               --socket-file <path>\n"
462         "               --nb-devices ND\n"
463         "               -p PORTMASK: Set mask for ports to be used by application\n"
464         "               --vm2vm [0|1|2]: disable/software(default)/hardware vm2vm comms\n"
465         "               --rx-retry [0|1]: disable/enable(default) retries on rx. Enable retry if destintation queue is full\n"
466         "               --rx-retry-delay [0-N]: timeout(in usecond) between retries on RX. This makes effect only if retries on rx enabled\n"
467         "               --rx-retry-num [0-N]: the number of retries on rx. This makes effect only if retries on rx enabled\n"
468         "               --mergeable [0|1]: disable(default)/enable RX mergeable buffers\n"
469         "               --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n"
470         "               --socket-file: The path of the socket file.\n"
471         "               --tx-csum [0|1] disable/enable TX checksum offload.\n"
472         "               --tso [0|1] disable/enable TCP segment offload.\n"
473         "               --client register a vhost-user socket as client mode.\n"
474         "               --dma-type register dma type for your vhost async driver. For example \"ioat\" for now.\n"
475         "               --dmas register dma channel for specific vhost device.\n",
476                prgname);
477 }
478
479 enum {
480 #define OPT_VM2VM               "vm2vm"
481         OPT_VM2VM_NUM = 256,
482 #define OPT_RX_RETRY            "rx-retry"
483         OPT_RX_RETRY_NUM,
484 #define OPT_RX_RETRY_DELAY      "rx-retry-delay"
485         OPT_RX_RETRY_DELAY_NUM,
486 #define OPT_RX_RETRY_NUMB       "rx-retry-num"
487         OPT_RX_RETRY_NUMB_NUM,
488 #define OPT_MERGEABLE           "mergeable"
489         OPT_MERGEABLE_NUM,
490 #define OPT_STATS               "stats"
491         OPT_STATS_NUM,
492 #define OPT_SOCKET_FILE         "socket-file"
493         OPT_SOCKET_FILE_NUM,
494 #define OPT_TX_CSUM             "tx-csum"
495         OPT_TX_CSUM_NUM,
496 #define OPT_TSO                 "tso"
497         OPT_TSO_NUM,
498 #define OPT_CLIENT              "client"
499         OPT_CLIENT_NUM,
500 #define OPT_BUILTIN_NET_DRIVER  "builtin-net-driver"
501         OPT_BUILTIN_NET_DRIVER_NUM,
502 #define OPT_DMA_TYPE            "dma-type"
503         OPT_DMA_TYPE_NUM,
504 #define OPT_DMAS                "dmas"
505         OPT_DMAS_NUM,
506 };
507
508 /*
509  * Parse the arguments given in the command line of the application.
510  */
511 static int
512 us_vhost_parse_args(int argc, char **argv)
513 {
514         int opt, ret;
515         int option_index;
516         unsigned i;
517         const char *prgname = argv[0];
518         static struct option long_option[] = {
519                 {OPT_VM2VM, required_argument,
520                                 NULL, OPT_VM2VM_NUM},
521                 {OPT_RX_RETRY, required_argument,
522                                 NULL, OPT_RX_RETRY_NUM},
523                 {OPT_RX_RETRY_DELAY, required_argument,
524                                 NULL, OPT_RX_RETRY_DELAY_NUM},
525                 {OPT_RX_RETRY_NUMB, required_argument,
526                                 NULL, OPT_RX_RETRY_NUMB_NUM},
527                 {OPT_MERGEABLE, required_argument,
528                                 NULL, OPT_MERGEABLE_NUM},
529                 {OPT_STATS, required_argument,
530                                 NULL, OPT_STATS_NUM},
531                 {OPT_SOCKET_FILE, required_argument,
532                                 NULL, OPT_SOCKET_FILE_NUM},
533                 {OPT_TX_CSUM, required_argument,
534                                 NULL, OPT_TX_CSUM_NUM},
535                 {OPT_TSO, required_argument,
536                                 NULL, OPT_TSO_NUM},
537                 {OPT_CLIENT, no_argument,
538                                 NULL, OPT_CLIENT_NUM},
539                 {OPT_BUILTIN_NET_DRIVER, no_argument,
540                                 NULL, OPT_BUILTIN_NET_DRIVER_NUM},
541                 {OPT_DMA_TYPE, required_argument,
542                                 NULL, OPT_DMA_TYPE_NUM},
543                 {OPT_DMAS, required_argument,
544                                 NULL, OPT_DMAS_NUM},
545                 {NULL, 0, 0, 0},
546         };
547
548         /* Parse command line */
549         while ((opt = getopt_long(argc, argv, "p:P",
550                         long_option, &option_index)) != EOF) {
551                 switch (opt) {
552                 /* Portmask */
553                 case 'p':
554                         enabled_port_mask = parse_portmask(optarg);
555                         if (enabled_port_mask == 0) {
556                                 RTE_LOG(INFO, VHOST_CONFIG, "Invalid portmask\n");
557                                 us_vhost_usage(prgname);
558                                 return -1;
559                         }
560                         break;
561
562                 case 'P':
563                         promiscuous = 1;
564                         vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.rx_mode =
565                                 ETH_VMDQ_ACCEPT_BROADCAST |
566                                 ETH_VMDQ_ACCEPT_MULTICAST;
567                         break;
568
569                 case OPT_VM2VM_NUM:
570                         ret = parse_num_opt(optarg, (VM2VM_LAST - 1));
571                         if (ret == -1) {
572                                 RTE_LOG(INFO, VHOST_CONFIG,
573                                         "Invalid argument for "
574                                         "vm2vm [0|1|2]\n");
575                                 us_vhost_usage(prgname);
576                                 return -1;
577                         }
578                         vm2vm_mode = (vm2vm_type)ret;
579                         break;
580
581                 case OPT_RX_RETRY_NUM:
582                         ret = parse_num_opt(optarg, 1);
583                         if (ret == -1) {
584                                 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry [0|1]\n");
585                                 us_vhost_usage(prgname);
586                                 return -1;
587                         }
588                         enable_retry = ret;
589                         break;
590
591                 case OPT_TX_CSUM_NUM:
592                         ret = parse_num_opt(optarg, 1);
593                         if (ret == -1) {
594                                 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tx-csum [0|1]\n");
595                                 us_vhost_usage(prgname);
596                                 return -1;
597                         }
598                         enable_tx_csum = ret;
599                         break;
600
601                 case OPT_TSO_NUM:
602                         ret = parse_num_opt(optarg, 1);
603                         if (ret == -1) {
604                                 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tso [0|1]\n");
605                                 us_vhost_usage(prgname);
606                                 return -1;
607                         }
608                         enable_tso = ret;
609                         break;
610
611                 case OPT_RX_RETRY_DELAY_NUM:
612                         ret = parse_num_opt(optarg, INT32_MAX);
613                         if (ret == -1) {
614                                 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-delay [0-N]\n");
615                                 us_vhost_usage(prgname);
616                                 return -1;
617                         }
618                         burst_rx_delay_time = ret;
619                         break;
620
621                 case OPT_RX_RETRY_NUMB_NUM:
622                         ret = parse_num_opt(optarg, INT32_MAX);
623                         if (ret == -1) {
624                                 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-num [0-N]\n");
625                                 us_vhost_usage(prgname);
626                                 return -1;
627                         }
628                         burst_rx_retry_num = ret;
629                         break;
630
631                 case OPT_MERGEABLE_NUM:
632                         ret = parse_num_opt(optarg, 1);
633                         if (ret == -1) {
634                                 RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for mergeable [0|1]\n");
635                                 us_vhost_usage(prgname);
636                                 return -1;
637                         }
638                         mergeable = !!ret;
639                         if (ret) {
640                                 vmdq_conf_default.rxmode.offloads |=
641                                         DEV_RX_OFFLOAD_JUMBO_FRAME;
642                                 vmdq_conf_default.rxmode.max_rx_pkt_len
643                                         = JUMBO_FRAME_MAX_SIZE;
644                         }
645                         break;
646
647                 case OPT_STATS_NUM:
648                         ret = parse_num_opt(optarg, INT32_MAX);
649                         if (ret == -1) {
650                                 RTE_LOG(INFO, VHOST_CONFIG,
651                                         "Invalid argument for stats [0..N]\n");
652                                 us_vhost_usage(prgname);
653                                 return -1;
654                         }
655                         enable_stats = ret;
656                         break;
657
658                 /* Set socket file path. */
659                 case OPT_SOCKET_FILE_NUM:
660                         if (us_vhost_parse_socket_path(optarg) == -1) {
661                                 RTE_LOG(INFO, VHOST_CONFIG,
662                                 "Invalid argument for socket name (Max %d characters)\n",
663                                 PATH_MAX);
664                                 us_vhost_usage(prgname);
665                                 return -1;
666                         }
667                         break;
668
669                 case OPT_DMA_TYPE_NUM:
670                         dma_type = optarg;
671                         break;
672
673                 case OPT_DMAS_NUM:
674                         if (open_dma(optarg) == -1) {
675                                 RTE_LOG(INFO, VHOST_CONFIG,
676                                         "Wrong DMA args\n");
677                                 us_vhost_usage(prgname);
678                                 return -1;
679                         }
680                         async_vhost_driver = 1;
681                         break;
682
683                 case OPT_CLIENT_NUM:
684                         client_mode = 1;
685                         break;
686
687                 case OPT_BUILTIN_NET_DRIVER_NUM:
688                         builtin_net_driver = 1;
689                         break;
690
691                 /* Invalid option - print options. */
692                 default:
693                         us_vhost_usage(prgname);
694                         return -1;
695                 }
696         }
697
698         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
699                 if (enabled_port_mask & (1 << i))
700                         ports[num_ports++] = i;
701         }
702
703         if ((num_ports ==  0) || (num_ports > MAX_SUP_PORTS)) {
704                 RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
705                         "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
706                 return -1;
707         }
708
709         return 0;
710 }
711
712 /*
713  * Update the global var NUM_PORTS and array PORTS according to system ports number
714  * and return valid ports number
715  */
716 static unsigned check_ports_num(unsigned nb_ports)
717 {
718         unsigned valid_num_ports = num_ports;
719         unsigned portid;
720
721         if (num_ports > nb_ports) {
722                 RTE_LOG(INFO, VHOST_PORT, "\nSpecified port number(%u) exceeds total system port number(%u)\n",
723                         num_ports, nb_ports);
724                 num_ports = nb_ports;
725         }
726
727         for (portid = 0; portid < num_ports; portid ++) {
728                 if (!rte_eth_dev_is_valid_port(ports[portid])) {
729                         RTE_LOG(INFO, VHOST_PORT,
730                                 "\nSpecified port ID(%u) is not valid\n",
731                                 ports[portid]);
732                         ports[portid] = INVALID_PORT_ID;
733                         valid_num_ports--;
734                 }
735         }
736         return valid_num_ports;
737 }
738
739 static __rte_always_inline struct vhost_dev *
740 find_vhost_dev(struct rte_ether_addr *mac)
741 {
742         struct vhost_dev *vdev;
743
744         TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
745                 if (vdev->ready == DEVICE_RX &&
746                     rte_is_same_ether_addr(mac, &vdev->mac_address))
747                         return vdev;
748         }
749
750         return NULL;
751 }
752
753 /*
754  * This function learns the MAC address of the device and registers this along with a
755  * vlan tag to a VMDQ.
756  */
757 static int
758 link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m)
759 {
760         struct rte_ether_hdr *pkt_hdr;
761         int i, ret;
762
763         /* Learn MAC address of guest device from packet */
764         pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
765
766         if (find_vhost_dev(&pkt_hdr->s_addr)) {
767                 RTE_LOG(ERR, VHOST_DATA,
768                         "(%d) device is using a registered MAC!\n",
769                         vdev->vid);
770                 return -1;
771         }
772
773         for (i = 0; i < RTE_ETHER_ADDR_LEN; i++)
774                 vdev->mac_address.addr_bytes[i] = pkt_hdr->s_addr.addr_bytes[i];
775
776         /* vlan_tag currently uses the device_id. */
777         vdev->vlan_tag = vlan_tags[vdev->vid];
778
779         /* Print out VMDQ registration info. */
780         RTE_LOG(INFO, VHOST_DATA,
781                 "(%d) mac %02x:%02x:%02x:%02x:%02x:%02x and vlan %d registered\n",
782                 vdev->vid,
783                 vdev->mac_address.addr_bytes[0], vdev->mac_address.addr_bytes[1],
784                 vdev->mac_address.addr_bytes[2], vdev->mac_address.addr_bytes[3],
785                 vdev->mac_address.addr_bytes[4], vdev->mac_address.addr_bytes[5],
786                 vdev->vlan_tag);
787
788         /* Register the MAC address. */
789         ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address,
790                                 (uint32_t)vdev->vid + vmdq_pool_base);
791         if (ret)
792                 RTE_LOG(ERR, VHOST_DATA,
793                         "(%d) failed to add device MAC address to VMDQ\n",
794                         vdev->vid);
795
796         rte_eth_dev_set_vlan_strip_on_queue(ports[0], vdev->vmdq_rx_q, 1);
797
798         /* Set device as ready for RX. */
799         vdev->ready = DEVICE_RX;
800
801         return 0;
802 }
803
804 /*
805  * Removes MAC address and vlan tag from VMDQ. Ensures that nothing is adding buffers to the RX
806  * queue before disabling RX on the device.
807  */
808 static inline void
809 unlink_vmdq(struct vhost_dev *vdev)
810 {
811         unsigned i = 0;
812         unsigned rx_count;
813         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
814
815         if (vdev->ready == DEVICE_RX) {
816                 /*clear MAC and VLAN settings*/
817                 rte_eth_dev_mac_addr_remove(ports[0], &vdev->mac_address);
818                 for (i = 0; i < 6; i++)
819                         vdev->mac_address.addr_bytes[i] = 0;
820
821                 vdev->vlan_tag = 0;
822
823                 /*Clear out the receive buffers*/
824                 rx_count = rte_eth_rx_burst(ports[0],
825                                         (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
826
827                 while (rx_count) {
828                         for (i = 0; i < rx_count; i++)
829                                 rte_pktmbuf_free(pkts_burst[i]);
830
831                         rx_count = rte_eth_rx_burst(ports[0],
832                                         (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
833                 }
834
835                 vdev->ready = DEVICE_MAC_LEARNING;
836         }
837 }
838
839 static inline void
840 free_pkts(struct rte_mbuf **pkts, uint16_t n)
841 {
842         while (n--)
843                 rte_pktmbuf_free(pkts[n]);
844 }
845
846 static __rte_always_inline void
847 complete_async_pkts(struct vhost_dev *vdev)
848 {
849         struct rte_mbuf *p_cpl[MAX_PKT_BURST];
850         uint16_t complete_count;
851
852         complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
853                                         VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
854         if (complete_count) {
855                 free_pkts(p_cpl, complete_count);
856                 __atomic_sub_fetch(&vdev->pkts_inflight, complete_count, __ATOMIC_SEQ_CST);
857         }
858
859 }
860
861 static __rte_always_inline void
862 sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
863             struct rte_mbuf *m)
864 {
865         uint16_t ret;
866
867         if (builtin_net_driver) {
868                 ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
869         } else {
870                 ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
871         }
872
873         if (enable_stats) {
874                 __atomic_add_fetch(&dst_vdev->stats.rx_total_atomic, 1,
875                                 __ATOMIC_SEQ_CST);
876                 __atomic_add_fetch(&dst_vdev->stats.rx_atomic, ret,
877                                 __ATOMIC_SEQ_CST);
878                 src_vdev->stats.tx_total++;
879                 src_vdev->stats.tx += ret;
880         }
881 }
882
883 static __rte_always_inline void
884 drain_vhost(struct vhost_dev *vdev)
885 {
886         uint16_t ret;
887         uint32_t buff_idx = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid;
888         uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
889         struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
890
891         if (builtin_net_driver) {
892                 ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
893         } else if (async_vhost_driver) {
894                 uint32_t cpu_cpl_nr = 0;
895                 uint16_t enqueue_fail = 0;
896                 struct rte_mbuf *m_cpu_cpl[nr_xmit];
897
898                 complete_async_pkts(vdev);
899                 ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
900                                         m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
901                 __atomic_add_fetch(&vdev->pkts_inflight, ret - cpu_cpl_nr, __ATOMIC_SEQ_CST);
902
903                 if (cpu_cpl_nr)
904                         free_pkts(m_cpu_cpl, cpu_cpl_nr);
905
906                 enqueue_fail = nr_xmit - ret;
907                 if (enqueue_fail)
908                         free_pkts(&m[ret], nr_xmit - ret);
909         } else {
910                 ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
911                                                 m, nr_xmit);
912         }
913
914         if (enable_stats) {
915                 __atomic_add_fetch(&vdev->stats.rx_total_atomic, nr_xmit,
916                                 __ATOMIC_SEQ_CST);
917                 __atomic_add_fetch(&vdev->stats.rx_atomic, ret,
918                                 __ATOMIC_SEQ_CST);
919         }
920
921         if (!async_vhost_driver)
922                 free_pkts(m, nr_xmit);
923 }
924
925 static __rte_always_inline void
926 drain_vhost_table(void)
927 {
928         uint16_t lcore_id = rte_lcore_id();
929         struct vhost_bufftable *vhost_txq;
930         struct vhost_dev *vdev;
931         uint64_t cur_tsc;
932
933         TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
934                 vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE
935                                                 + vdev->vid];
936
937                 cur_tsc = rte_rdtsc();
938                 if (unlikely(cur_tsc - vhost_txq->pre_tsc
939                                 > MBUF_TABLE_DRAIN_TSC)) {
940                         RTE_LOG_DP(DEBUG, VHOST_DATA,
941                                 "Vhost TX queue drained after timeout with burst size %u\n",
942                                 vhost_txq->len);
943                         drain_vhost(vdev);
944                         vhost_txq->len = 0;
945                         vhost_txq->pre_tsc = cur_tsc;
946                 }
947         }
948 }
949
950 /*
951  * Check if the packet destination MAC address is for a local device. If so then put
952  * the packet on that devices RX queue. If not then return.
953  */
954 static __rte_always_inline int
955 virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
956 {
957         struct rte_ether_hdr *pkt_hdr;
958         struct vhost_dev *dst_vdev;
959         struct vhost_bufftable *vhost_txq;
960         uint16_t lcore_id = rte_lcore_id();
961         pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
962
963         dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
964         if (!dst_vdev)
965                 return -1;
966
967         if (vdev->vid == dst_vdev->vid) {
968                 RTE_LOG_DP(DEBUG, VHOST_DATA,
969                         "(%d) TX: src and dst MAC is same. Dropping packet.\n",
970                         vdev->vid);
971                 return 0;
972         }
973
974         RTE_LOG_DP(DEBUG, VHOST_DATA,
975                 "(%d) TX: MAC address is local\n", dst_vdev->vid);
976
977         if (unlikely(dst_vdev->remove)) {
978                 RTE_LOG_DP(DEBUG, VHOST_DATA,
979                         "(%d) device is marked for removal\n", dst_vdev->vid);
980                 return 0;
981         }
982
983         vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE + dst_vdev->vid];
984         vhost_txq->m_table[vhost_txq->len++] = m;
985
986         if (enable_stats) {
987                 vdev->stats.tx_total++;
988                 vdev->stats.tx++;
989         }
990
991         if (unlikely(vhost_txq->len == MAX_PKT_BURST)) {
992                 drain_vhost(dst_vdev);
993                 vhost_txq->len = 0;
994                 vhost_txq->pre_tsc = rte_rdtsc();
995         }
996         return 0;
997 }
998
999 /*
1000  * Check if the destination MAC of a packet is one local VM,
1001  * and get its vlan tag, and offset if it is.
1002  */
1003 static __rte_always_inline int
1004 find_local_dest(struct vhost_dev *vdev, struct rte_mbuf *m,
1005         uint32_t *offset, uint16_t *vlan_tag)
1006 {
1007         struct vhost_dev *dst_vdev;
1008         struct rte_ether_hdr *pkt_hdr =
1009                 rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
1010
1011         dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
1012         if (!dst_vdev)
1013                 return 0;
1014
1015         if (vdev->vid == dst_vdev->vid) {
1016                 RTE_LOG_DP(DEBUG, VHOST_DATA,
1017                         "(%d) TX: src and dst MAC is same. Dropping packet.\n",
1018                         vdev->vid);
1019                 return -1;
1020         }
1021
1022         /*
1023          * HW vlan strip will reduce the packet length
1024          * by minus length of vlan tag, so need restore
1025          * the packet length by plus it.
1026          */
1027         *offset  = VLAN_HLEN;
1028         *vlan_tag = vlan_tags[vdev->vid];
1029
1030         RTE_LOG_DP(DEBUG, VHOST_DATA,
1031                 "(%d) TX: pkt to local VM device id: (%d), vlan tag: %u.\n",
1032                 vdev->vid, dst_vdev->vid, *vlan_tag);
1033
1034         return 0;
1035 }
1036
1037 static void virtio_tx_offload(struct rte_mbuf *m)
1038 {
1039         struct rte_net_hdr_lens hdr_lens;
1040         struct rte_ipv4_hdr *ipv4_hdr;
1041         struct rte_tcp_hdr *tcp_hdr;
1042         uint32_t ptype;
1043         void *l3_hdr;
1044
1045         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1046         m->l2_len = hdr_lens.l2_len;
1047         m->l3_len = hdr_lens.l3_len;
1048         m->l4_len = hdr_lens.l4_len;
1049
1050         l3_hdr = rte_pktmbuf_mtod_offset(m, void *, m->l2_len);
1051         tcp_hdr = rte_pktmbuf_mtod_offset(m, struct rte_tcp_hdr *,
1052                 m->l2_len + m->l3_len);
1053
1054         m->ol_flags |= RTE_MBUF_F_TX_TCP_SEG;
1055         if ((ptype & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4) {
1056                 m->ol_flags |= RTE_MBUF_F_TX_IPV4;
1057                 m->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM;
1058                 ipv4_hdr = l3_hdr;
1059                 ipv4_hdr->hdr_checksum = 0;
1060                 tcp_hdr->cksum = rte_ipv4_phdr_cksum(l3_hdr, m->ol_flags);
1061         } else { /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
1062                 m->ol_flags |= RTE_MBUF_F_TX_IPV6;
1063                 tcp_hdr->cksum = rte_ipv6_phdr_cksum(l3_hdr, m->ol_flags);
1064         }
1065 }
1066
1067 static __rte_always_inline void
1068 do_drain_mbuf_table(struct mbuf_table *tx_q)
1069 {
1070         uint16_t count;
1071
1072         count = rte_eth_tx_burst(ports[0], tx_q->txq_id,
1073                                  tx_q->m_table, tx_q->len);
1074         if (unlikely(count < tx_q->len))
1075                 free_pkts(&tx_q->m_table[count], tx_q->len - count);
1076
1077         tx_q->len = 0;
1078 }
1079
1080 /*
1081  * This function routes the TX packet to the correct interface. This
1082  * may be a local device or the physical port.
1083  */
1084 static __rte_always_inline void
1085 virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
1086 {
1087         struct mbuf_table *tx_q;
1088         unsigned offset = 0;
1089         const uint16_t lcore_id = rte_lcore_id();
1090         struct rte_ether_hdr *nh;
1091
1092
1093         nh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
1094         if (unlikely(rte_is_broadcast_ether_addr(&nh->d_addr))) {
1095                 struct vhost_dev *vdev2;
1096
1097                 TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {
1098                         if (vdev2 != vdev)
1099                                 sync_virtio_xmit(vdev2, vdev, m);
1100                 }
1101                 goto queue2nic;
1102         }
1103
1104         /*check if destination is local VM*/
1105         if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0))
1106                 return;
1107
1108         if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
1109                 if (unlikely(find_local_dest(vdev, m, &offset,
1110                                              &vlan_tag) != 0)) {
1111                         rte_pktmbuf_free(m);
1112                         return;
1113                 }
1114         }
1115
1116         RTE_LOG_DP(DEBUG, VHOST_DATA,
1117                 "(%d) TX: MAC address is external\n", vdev->vid);
1118
1119 queue2nic:
1120
1121         /*Add packet to the port tx queue*/
1122         tx_q = &lcore_tx_queue[lcore_id];
1123
1124         nh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
1125         if (unlikely(nh->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN))) {
1126                 /* Guest has inserted the vlan tag. */
1127                 struct rte_vlan_hdr *vh = (struct rte_vlan_hdr *) (nh + 1);
1128                 uint16_t vlan_tag_be = rte_cpu_to_be_16(vlan_tag);
1129                 if ((vm2vm_mode == VM2VM_HARDWARE) &&
1130                         (vh->vlan_tci != vlan_tag_be))
1131                         vh->vlan_tci = vlan_tag_be;
1132         } else {
1133                 m->ol_flags |= RTE_MBUF_F_TX_VLAN_PKT;
1134
1135                 /*
1136                  * Find the right seg to adjust the data len when offset is
1137                  * bigger than tail room size.
1138                  */
1139                 if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
1140                         if (likely(offset <= rte_pktmbuf_tailroom(m)))
1141                                 m->data_len += offset;
1142                         else {
1143                                 struct rte_mbuf *seg = m;
1144
1145                                 while ((seg->next != NULL) &&
1146                                         (offset > rte_pktmbuf_tailroom(seg)))
1147                                         seg = seg->next;
1148
1149                                 seg->data_len += offset;
1150                         }
1151                         m->pkt_len += offset;
1152                 }
1153
1154                 m->vlan_tci = vlan_tag;
1155         }
1156
1157         if (m->ol_flags & RTE_MBUF_F_RX_LRO)
1158                 virtio_tx_offload(m);
1159
1160         tx_q->m_table[tx_q->len++] = m;
1161         if (enable_stats) {
1162                 vdev->stats.tx_total++;
1163                 vdev->stats.tx++;
1164         }
1165
1166         if (unlikely(tx_q->len == MAX_PKT_BURST))
1167                 do_drain_mbuf_table(tx_q);
1168 }
1169
1170
1171 static __rte_always_inline void
1172 drain_mbuf_table(struct mbuf_table *tx_q)
1173 {
1174         static uint64_t prev_tsc;
1175         uint64_t cur_tsc;
1176
1177         if (tx_q->len == 0)
1178                 return;
1179
1180         cur_tsc = rte_rdtsc();
1181         if (unlikely(cur_tsc - prev_tsc > MBUF_TABLE_DRAIN_TSC)) {
1182                 prev_tsc = cur_tsc;
1183
1184                 RTE_LOG_DP(DEBUG, VHOST_DATA,
1185                         "TX queue drained after timeout with burst size %u\n",
1186                         tx_q->len);
1187                 do_drain_mbuf_table(tx_q);
1188         }
1189 }
1190
1191 static __rte_always_inline void
1192 drain_eth_rx(struct vhost_dev *vdev)
1193 {
1194         uint16_t rx_count, enqueue_count;
1195         struct rte_mbuf *pkts[MAX_PKT_BURST];
1196
1197         rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
1198                                     pkts, MAX_PKT_BURST);
1199
1200         if (!rx_count)
1201                 return;
1202
1203         /*
1204          * When "enable_retry" is set, here we wait and retry when there
1205          * is no enough free slots in the queue to hold @rx_count packets,
1206          * to diminish packet loss.
1207          */
1208         if (enable_retry &&
1209             unlikely(rx_count > rte_vhost_avail_entries(vdev->vid,
1210                         VIRTIO_RXQ))) {
1211                 uint32_t retry;
1212
1213                 for (retry = 0; retry < burst_rx_retry_num; retry++) {
1214                         rte_delay_us(burst_rx_delay_time);
1215                         if (rx_count <= rte_vhost_avail_entries(vdev->vid,
1216                                         VIRTIO_RXQ))
1217                                 break;
1218                 }
1219         }
1220
1221         if (builtin_net_driver) {
1222                 enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
1223                                                 pkts, rx_count);
1224         } else if (async_vhost_driver) {
1225                 uint32_t cpu_cpl_nr = 0;
1226                 uint16_t enqueue_fail = 0;
1227                 struct rte_mbuf *m_cpu_cpl[MAX_PKT_BURST];
1228
1229                 complete_async_pkts(vdev);
1230                 enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
1231                                         VIRTIO_RXQ, pkts, rx_count,
1232                                         m_cpu_cpl, &cpu_cpl_nr);
1233                 __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count - cpu_cpl_nr,
1234                                         __ATOMIC_SEQ_CST);
1235
1236                 if (cpu_cpl_nr)
1237                         free_pkts(m_cpu_cpl, cpu_cpl_nr);
1238
1239                 enqueue_fail = rx_count - enqueue_count;
1240                 if (enqueue_fail)
1241                         free_pkts(&pkts[enqueue_count], enqueue_fail);
1242
1243         } else {
1244                 enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
1245                                                 pkts, rx_count);
1246         }
1247
1248         if (enable_stats) {
1249                 __atomic_add_fetch(&vdev->stats.rx_total_atomic, rx_count,
1250                                 __ATOMIC_SEQ_CST);
1251                 __atomic_add_fetch(&vdev->stats.rx_atomic, enqueue_count,
1252                                 __ATOMIC_SEQ_CST);
1253         }
1254
1255         if (!async_vhost_driver)
1256                 free_pkts(pkts, rx_count);
1257 }
1258
1259 static __rte_always_inline void
1260 drain_virtio_tx(struct vhost_dev *vdev)
1261 {
1262         struct rte_mbuf *pkts[MAX_PKT_BURST];
1263         uint16_t count;
1264         uint16_t i;
1265
1266         if (builtin_net_driver) {
1267                 count = vs_dequeue_pkts(vdev, VIRTIO_TXQ, mbuf_pool,
1268                                         pkts, MAX_PKT_BURST);
1269         } else {
1270                 count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ,
1271                                         mbuf_pool, pkts, MAX_PKT_BURST);
1272         }
1273
1274         /* setup VMDq for the first packet */
1275         if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count) {
1276                 if (vdev->remove || link_vmdq(vdev, pkts[0]) == -1)
1277                         free_pkts(pkts, count);
1278         }
1279
1280         for (i = 0; i < count; ++i)
1281                 virtio_tx_route(vdev, pkts[i], vlan_tags[vdev->vid]);
1282 }
1283
1284 /*
1285  * Main function of vhost-switch. It basically does:
1286  *
1287  * for each vhost device {
1288  *    - drain_eth_rx()
1289  *
1290  *      Which drains the host eth Rx queue linked to the vhost device,
1291  *      and deliver all of them to guest virito Rx ring associated with
1292  *      this vhost device.
1293  *
1294  *    - drain_virtio_tx()
1295  *
1296  *      Which drains the guest virtio Tx queue and deliver all of them
1297  *      to the target, which could be another vhost device, or the
1298  *      physical eth dev. The route is done in function "virtio_tx_route".
1299  * }
1300  */
1301 static int
1302 switch_worker(void *arg __rte_unused)
1303 {
1304         unsigned i;
1305         unsigned lcore_id = rte_lcore_id();
1306         struct vhost_dev *vdev;
1307         struct mbuf_table *tx_q;
1308
1309         RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id);
1310
1311         tx_q = &lcore_tx_queue[lcore_id];
1312         for (i = 0; i < rte_lcore_count(); i++) {
1313                 if (lcore_ids[i] == lcore_id) {
1314                         tx_q->txq_id = i;
1315                         break;
1316                 }
1317         }
1318
1319         while(1) {
1320                 drain_mbuf_table(tx_q);
1321                 drain_vhost_table();
1322                 /*
1323                  * Inform the configuration core that we have exited the
1324                  * linked list and that no devices are in use if requested.
1325                  */
1326                 if (lcore_info[lcore_id].dev_removal_flag == REQUEST_DEV_REMOVAL)
1327                         lcore_info[lcore_id].dev_removal_flag = ACK_DEV_REMOVAL;
1328
1329                 /*
1330                  * Process vhost devices
1331                  */
1332                 TAILQ_FOREACH(vdev, &lcore_info[lcore_id].vdev_list,
1333                               lcore_vdev_entry) {
1334                         if (unlikely(vdev->remove)) {
1335                                 unlink_vmdq(vdev);
1336                                 vdev->ready = DEVICE_SAFE_REMOVE;
1337                                 continue;
1338                         }
1339
1340                         if (likely(vdev->ready == DEVICE_RX))
1341                                 drain_eth_rx(vdev);
1342
1343                         if (likely(!vdev->remove))
1344                                 drain_virtio_tx(vdev);
1345                 }
1346         }
1347
1348         return 0;
1349 }
1350
1351 /*
1352  * Remove a device from the specific data core linked list and from the
1353  * main linked list. Synchonization  occurs through the use of the
1354  * lcore dev_removal_flag. Device is made volatile here to avoid re-ordering
1355  * of dev->remove=1 which can cause an infinite loop in the rte_pause loop.
1356  */
1357 static void
1358 destroy_device(int vid)
1359 {
1360         struct vhost_dev *vdev = NULL;
1361         int lcore;
1362         uint16_t i;
1363
1364         TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
1365                 if (vdev->vid == vid)
1366                         break;
1367         }
1368         if (!vdev)
1369                 return;
1370         /*set the remove flag. */
1371         vdev->remove = 1;
1372         while(vdev->ready != DEVICE_SAFE_REMOVE) {
1373                 rte_pause();
1374         }
1375
1376         for (i = 0; i < RTE_MAX_LCORE; i++)
1377                 rte_free(vhost_txbuff[i * MAX_VHOST_DEVICE + vid]);
1378
1379         if (builtin_net_driver)
1380                 vs_vhost_net_remove(vdev);
1381
1382         TAILQ_REMOVE(&lcore_info[vdev->coreid].vdev_list, vdev,
1383                      lcore_vdev_entry);
1384         TAILQ_REMOVE(&vhost_dev_list, vdev, global_vdev_entry);
1385
1386
1387         /* Set the dev_removal_flag on each lcore. */
1388         RTE_LCORE_FOREACH_WORKER(lcore)
1389                 lcore_info[lcore].dev_removal_flag = REQUEST_DEV_REMOVAL;
1390
1391         /*
1392          * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL
1393          * we can be sure that they can no longer access the device removed
1394          * from the linked lists and that the devices are no longer in use.
1395          */
1396         RTE_LCORE_FOREACH_WORKER(lcore) {
1397                 while (lcore_info[lcore].dev_removal_flag != ACK_DEV_REMOVAL)
1398                         rte_pause();
1399         }
1400
1401         lcore_info[vdev->coreid].device_num--;
1402
1403         RTE_LOG(INFO, VHOST_DATA,
1404                 "(%d) device has been removed from data core\n",
1405                 vdev->vid);
1406
1407         if (async_vhost_driver) {
1408                 uint16_t n_pkt = 0;
1409                 struct rte_mbuf *m_cpl[vdev->pkts_inflight];
1410
1411                 while (vdev->pkts_inflight) {
1412                         n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, VIRTIO_RXQ,
1413                                                 m_cpl, vdev->pkts_inflight);
1414                         free_pkts(m_cpl, n_pkt);
1415                         __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, __ATOMIC_SEQ_CST);
1416                 }
1417
1418                 rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
1419         }
1420
1421         rte_free(vdev);
1422 }
1423
1424 /*
1425  * A new device is added to a data core. First the device is added to the main linked list
1426  * and then allocated to a specific data core.
1427  */
1428 static int
1429 new_device(int vid)
1430 {
1431         int lcore, core_add = 0;
1432         uint16_t i;
1433         uint32_t device_num_min = num_devices;
1434         struct vhost_dev *vdev;
1435         vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
1436         if (vdev == NULL) {
1437                 RTE_LOG(INFO, VHOST_DATA,
1438                         "(%d) couldn't allocate memory for vhost dev\n",
1439                         vid);
1440                 return -1;
1441         }
1442         vdev->vid = vid;
1443
1444         for (i = 0; i < RTE_MAX_LCORE; i++) {
1445                 vhost_txbuff[i * MAX_VHOST_DEVICE + vid]
1446                         = rte_zmalloc("vhost bufftable",
1447                                 sizeof(struct vhost_bufftable),
1448                                 RTE_CACHE_LINE_SIZE);
1449
1450                 if (vhost_txbuff[i * MAX_VHOST_DEVICE + vid] == NULL) {
1451                         RTE_LOG(INFO, VHOST_DATA,
1452                           "(%d) couldn't allocate memory for vhost TX\n", vid);
1453                         return -1;
1454                 }
1455         }
1456
1457         if (builtin_net_driver)
1458                 vs_vhost_net_setup(vdev);
1459
1460         TAILQ_INSERT_TAIL(&vhost_dev_list, vdev, global_vdev_entry);
1461         vdev->vmdq_rx_q = vid * queues_per_pool + vmdq_queue_base;
1462
1463         /*reset ready flag*/
1464         vdev->ready = DEVICE_MAC_LEARNING;
1465         vdev->remove = 0;
1466
1467         /* Find a suitable lcore to add the device. */
1468         RTE_LCORE_FOREACH_WORKER(lcore) {
1469                 if (lcore_info[lcore].device_num < device_num_min) {
1470                         device_num_min = lcore_info[lcore].device_num;
1471                         core_add = lcore;
1472                 }
1473         }
1474         vdev->coreid = core_add;
1475
1476         TAILQ_INSERT_TAIL(&lcore_info[vdev->coreid].vdev_list, vdev,
1477                           lcore_vdev_entry);
1478         lcore_info[vdev->coreid].device_num++;
1479
1480         /* Disable notifications. */
1481         rte_vhost_enable_guest_notification(vid, VIRTIO_RXQ, 0);
1482         rte_vhost_enable_guest_notification(vid, VIRTIO_TXQ, 0);
1483
1484         RTE_LOG(INFO, VHOST_DATA,
1485                 "(%d) device has been added to data core %d\n",
1486                 vid, vdev->coreid);
1487
1488         if (async_vhost_driver) {
1489                 struct rte_vhost_async_config config = {0};
1490                 struct rte_vhost_async_channel_ops channel_ops;
1491
1492                 if (dma_type != NULL && strncmp(dma_type, "ioat", 4) == 0) {
1493                         channel_ops.transfer_data = ioat_transfer_data_cb;
1494                         channel_ops.check_completed_copies =
1495                                 ioat_check_completed_copies_cb;
1496
1497                         config.features = RTE_VHOST_ASYNC_INORDER;
1498                         config.async_threshold = 256;
1499
1500                         return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
1501                                 config, &channel_ops);
1502                 }
1503         }
1504
1505         return 0;
1506 }
1507
1508 static int
1509 vring_state_changed(int vid, uint16_t queue_id, int enable)
1510 {
1511         struct vhost_dev *vdev = NULL;
1512
1513         TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
1514                 if (vdev->vid == vid)
1515                         break;
1516         }
1517         if (!vdev)
1518                 return -1;
1519
1520         if (queue_id != VIRTIO_RXQ)
1521                 return 0;
1522
1523         if (async_vhost_driver) {
1524                 if (!enable) {
1525                         uint16_t n_pkt = 0;
1526                         struct rte_mbuf *m_cpl[vdev->pkts_inflight];
1527
1528                         while (vdev->pkts_inflight) {
1529                                 n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, queue_id,
1530                                                         m_cpl, vdev->pkts_inflight);
1531                                 free_pkts(m_cpl, n_pkt);
1532                                 __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, __ATOMIC_SEQ_CST);
1533                         }
1534                 }
1535         }
1536
1537         return 0;
1538 }
1539
1540 /*
1541  * These callback allow devices to be added to the data core when configuration
1542  * has been fully complete.
1543  */
1544 static const struct vhost_device_ops virtio_net_device_ops =
1545 {
1546         .new_device =  new_device,
1547         .destroy_device = destroy_device,
1548         .vring_state_changed = vring_state_changed,
1549 };
1550
1551 /*
1552  * This is a thread will wake up after a period to print stats if the user has
1553  * enabled them.
1554  */
1555 static void *
1556 print_stats(__rte_unused void *arg)
1557 {
1558         struct vhost_dev *vdev;
1559         uint64_t tx_dropped, rx_dropped;
1560         uint64_t tx, tx_total, rx, rx_total;
1561         const char clr[] = { 27, '[', '2', 'J', '\0' };
1562         const char top_left[] = { 27, '[', '1', ';', '1', 'H','\0' };
1563
1564         while(1) {
1565                 sleep(enable_stats);
1566
1567                 /* Clear screen and move to top left */
1568                 printf("%s%s\n", clr, top_left);
1569                 printf("Device statistics =================================\n");
1570
1571                 TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
1572                         tx_total   = vdev->stats.tx_total;
1573                         tx         = vdev->stats.tx;
1574                         tx_dropped = tx_total - tx;
1575
1576                         rx_total = __atomic_load_n(&vdev->stats.rx_total_atomic,
1577                                 __ATOMIC_SEQ_CST);
1578                         rx         = __atomic_load_n(&vdev->stats.rx_atomic,
1579                                 __ATOMIC_SEQ_CST);
1580                         rx_dropped = rx_total - rx;
1581
1582                         printf("Statistics for device %d\n"
1583                                 "-----------------------\n"
1584                                 "TX total:              %" PRIu64 "\n"
1585                                 "TX dropped:            %" PRIu64 "\n"
1586                                 "TX successful:         %" PRIu64 "\n"
1587                                 "RX total:              %" PRIu64 "\n"
1588                                 "RX dropped:            %" PRIu64 "\n"
1589                                 "RX successful:         %" PRIu64 "\n",
1590                                 vdev->vid,
1591                                 tx_total, tx_dropped, tx,
1592                                 rx_total, rx_dropped, rx);
1593                 }
1594
1595                 printf("===================================================\n");
1596
1597                 fflush(stdout);
1598         }
1599
1600         return NULL;
1601 }
1602
1603 static void
1604 unregister_drivers(int socket_num)
1605 {
1606         int i, ret;
1607
1608         for (i = 0; i < socket_num; i++) {
1609                 ret = rte_vhost_driver_unregister(socket_files + i * PATH_MAX);
1610                 if (ret != 0)
1611                         RTE_LOG(ERR, VHOST_CONFIG,
1612                                 "Fail to unregister vhost driver for %s.\n",
1613                                 socket_files + i * PATH_MAX);
1614         }
1615 }
1616
1617 /* When we receive a INT signal, unregister vhost driver */
1618 static void
1619 sigint_handler(__rte_unused int signum)
1620 {
1621         /* Unregister vhost driver. */
1622         unregister_drivers(nb_sockets);
1623
1624         exit(0);
1625 }
1626
1627 /*
1628  * While creating an mbuf pool, one key thing is to figure out how
1629  * many mbuf entries is enough for our use. FYI, here are some
1630  * guidelines:
1631  *
1632  * - Each rx queue would reserve @nr_rx_desc mbufs at queue setup stage
1633  *
1634  * - For each switch core (A CPU core does the packet switch), we need
1635  *   also make some reservation for receiving the packets from virtio
1636  *   Tx queue. How many is enough depends on the usage. It's normally
1637  *   a simple calculation like following:
1638  *
1639  *       MAX_PKT_BURST * max packet size / mbuf size
1640  *
1641  *   So, we definitely need allocate more mbufs when TSO is enabled.
1642  *
1643  * - Similarly, for each switching core, we should serve @nr_rx_desc
1644  *   mbufs for receiving the packets from physical NIC device.
1645  *
1646  * - We also need make sure, for each switch core, we have allocated
1647  *   enough mbufs to fill up the mbuf cache.
1648  */
1649 static void
1650 create_mbuf_pool(uint16_t nr_port, uint32_t nr_switch_core, uint32_t mbuf_size,
1651         uint32_t nr_queues, uint32_t nr_rx_desc, uint32_t nr_mbuf_cache)
1652 {
1653         uint32_t nr_mbufs;
1654         uint32_t nr_mbufs_per_core;
1655         uint32_t mtu = 1500;
1656
1657         if (mergeable)
1658                 mtu = 9000;
1659         if (enable_tso)
1660                 mtu = 64 * 1024;
1661
1662         nr_mbufs_per_core  = (mtu + mbuf_size) * MAX_PKT_BURST /
1663                         (mbuf_size - RTE_PKTMBUF_HEADROOM);
1664         nr_mbufs_per_core += nr_rx_desc;
1665         nr_mbufs_per_core  = RTE_MAX(nr_mbufs_per_core, nr_mbuf_cache);
1666
1667         nr_mbufs  = nr_queues * nr_rx_desc;
1668         nr_mbufs += nr_mbufs_per_core * nr_switch_core;
1669         nr_mbufs *= nr_port;
1670
1671         mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", nr_mbufs,
1672                                             nr_mbuf_cache, 0, mbuf_size,
1673                                             rte_socket_id());
1674         if (mbuf_pool == NULL)
1675                 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
1676 }
1677
1678 /*
1679  * Main function, does initialisation and calls the per-lcore functions.
1680  */
1681 int
1682 main(int argc, char *argv[])
1683 {
1684         unsigned lcore_id, core_id = 0;
1685         unsigned nb_ports, valid_num_ports;
1686         int ret, i;
1687         uint16_t portid;
1688         static pthread_t tid;
1689         uint64_t flags = RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS;
1690
1691         signal(SIGINT, sigint_handler);
1692
1693         /* init EAL */
1694         ret = rte_eal_init(argc, argv);
1695         if (ret < 0)
1696                 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
1697         argc -= ret;
1698         argv += ret;
1699
1700         /* parse app arguments */
1701         ret = us_vhost_parse_args(argc, argv);
1702         if (ret < 0)
1703                 rte_exit(EXIT_FAILURE, "Invalid argument\n");
1704
1705         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1706                 TAILQ_INIT(&lcore_info[lcore_id].vdev_list);
1707
1708                 if (rte_lcore_is_enabled(lcore_id))
1709                         lcore_ids[core_id++] = lcore_id;
1710         }
1711
1712         if (rte_lcore_count() > RTE_MAX_LCORE)
1713                 rte_exit(EXIT_FAILURE,"Not enough cores\n");
1714
1715         /* Get the number of physical ports. */
1716         nb_ports = rte_eth_dev_count_avail();
1717
1718         /*
1719          * Update the global var NUM_PORTS and global array PORTS
1720          * and get value of var VALID_NUM_PORTS according to system ports number
1721          */
1722         valid_num_ports = check_ports_num(nb_ports);
1723
1724         if ((valid_num_ports ==  0) || (valid_num_ports > MAX_SUP_PORTS)) {
1725                 RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
1726                         "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
1727                 return -1;
1728         }
1729
1730         /*
1731          * FIXME: here we are trying to allocate mbufs big enough for
1732          * @MAX_QUEUES, but the truth is we're never going to use that
1733          * many queues here. We probably should only do allocation for
1734          * those queues we are going to use.
1735          */
1736         create_mbuf_pool(valid_num_ports, rte_lcore_count() - 1, MBUF_DATA_SIZE,
1737                          MAX_QUEUES, RTE_TEST_RX_DESC_DEFAULT, MBUF_CACHE_SIZE);
1738
1739         if (vm2vm_mode == VM2VM_HARDWARE) {
1740                 /* Enable VT loop back to let L2 switch to do it. */
1741                 vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1;
1742                 RTE_LOG(DEBUG, VHOST_CONFIG,
1743                         "Enable loop back for L2 switch in vmdq.\n");
1744         }
1745
1746         /* initialize all ports */
1747         RTE_ETH_FOREACH_DEV(portid) {
1748                 /* skip ports that are not enabled */
1749                 if ((enabled_port_mask & (1 << portid)) == 0) {
1750                         RTE_LOG(INFO, VHOST_PORT,
1751                                 "Skipping disabled port %d\n", portid);
1752                         continue;
1753                 }
1754                 if (port_init(portid) != 0)
1755                         rte_exit(EXIT_FAILURE,
1756                                 "Cannot initialize network ports\n");
1757         }
1758
1759         /* Enable stats if the user option is set. */
1760         if (enable_stats) {
1761                 ret = rte_ctrl_thread_create(&tid, "print-stats", NULL,
1762                                         print_stats, NULL);
1763                 if (ret < 0)
1764                         rte_exit(EXIT_FAILURE,
1765                                 "Cannot create print-stats thread\n");
1766         }
1767
1768         /* Launch all data cores. */
1769         RTE_LCORE_FOREACH_WORKER(lcore_id)
1770                 rte_eal_remote_launch(switch_worker, NULL, lcore_id);
1771
1772         if (client_mode)
1773                 flags |= RTE_VHOST_USER_CLIENT;
1774
1775         /* Register vhost user driver to handle vhost messages. */
1776         for (i = 0; i < nb_sockets; i++) {
1777                 char *file = socket_files + i * PATH_MAX;
1778
1779                 if (async_vhost_driver)
1780                         flags = flags | RTE_VHOST_USER_ASYNC_COPY;
1781
1782                 ret = rte_vhost_driver_register(file, flags);
1783                 if (ret != 0) {
1784                         unregister_drivers(i);
1785                         rte_exit(EXIT_FAILURE,
1786                                 "vhost driver register failure.\n");
1787                 }
1788
1789                 if (builtin_net_driver)
1790                         rte_vhost_driver_set_features(file, VIRTIO_NET_FEATURES);
1791
1792                 if (mergeable == 0) {
1793                         rte_vhost_driver_disable_features(file,
1794                                 1ULL << VIRTIO_NET_F_MRG_RXBUF);
1795                 }
1796
1797                 if (enable_tx_csum == 0) {
1798                         rte_vhost_driver_disable_features(file,
1799                                 1ULL << VIRTIO_NET_F_CSUM);
1800                 }
1801
1802                 if (enable_tso == 0) {
1803                         rte_vhost_driver_disable_features(file,
1804                                 1ULL << VIRTIO_NET_F_HOST_TSO4);
1805                         rte_vhost_driver_disable_features(file,
1806                                 1ULL << VIRTIO_NET_F_HOST_TSO6);
1807                         rte_vhost_driver_disable_features(file,
1808                                 1ULL << VIRTIO_NET_F_GUEST_TSO4);
1809                         rte_vhost_driver_disable_features(file,
1810                                 1ULL << VIRTIO_NET_F_GUEST_TSO6);
1811                 }
1812
1813                 if (promiscuous) {
1814                         rte_vhost_driver_enable_features(file,
1815                                 1ULL << VIRTIO_NET_F_CTRL_RX);
1816                 }
1817
1818                 ret = rte_vhost_driver_callback_register(file,
1819                         &virtio_net_device_ops);
1820                 if (ret != 0) {
1821                         rte_exit(EXIT_FAILURE,
1822                                 "failed to register vhost driver callbacks.\n");
1823                 }
1824
1825                 if (rte_vhost_driver_start(file) < 0) {
1826                         rte_exit(EXIT_FAILURE,
1827                                 "failed to start vhost driver.\n");
1828                 }
1829         }
1830
1831         RTE_LCORE_FOREACH_WORKER(lcore_id)
1832                 rte_eal_wait_lcore(lcore_id);
1833
1834         /* clean up the EAL */
1835         rte_eal_cleanup();
1836
1837         return 0;
1838 }