net: add rte prefix to ether structures
[dpdk.git] / drivers / net / vhost / rte_eth_vhost.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2016 IGEL Co., Ltd.
3  * Copyright(c) 2016-2018 Intel Corporation
4  */
5 #include <unistd.h>
6 #include <pthread.h>
7 #include <stdbool.h>
8
9 #include <rte_mbuf.h>
10 #include <rte_ethdev_driver.h>
11 #include <rte_ethdev_vdev.h>
12 #include <rte_malloc.h>
13 #include <rte_memcpy.h>
14 #include <rte_bus_vdev.h>
15 #include <rte_kvargs.h>
16 #include <rte_vhost.h>
17 #include <rte_spinlock.h>
18
19 #include "rte_eth_vhost.h"
20
21 static int vhost_logtype;
22
23 #define VHOST_LOG(level, ...) \
24         rte_log(RTE_LOG_ ## level, vhost_logtype, __VA_ARGS__)
25
26 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
27
28 #define ETH_VHOST_IFACE_ARG             "iface"
29 #define ETH_VHOST_QUEUES_ARG            "queues"
30 #define ETH_VHOST_CLIENT_ARG            "client"
31 #define ETH_VHOST_DEQUEUE_ZERO_COPY     "dequeue-zero-copy"
32 #define ETH_VHOST_IOMMU_SUPPORT         "iommu-support"
33 #define ETH_VHOST_POSTCOPY_SUPPORT      "postcopy-support"
34 #define VHOST_MAX_PKT_BURST 32
35
36 static const char *valid_arguments[] = {
37         ETH_VHOST_IFACE_ARG,
38         ETH_VHOST_QUEUES_ARG,
39         ETH_VHOST_CLIENT_ARG,
40         ETH_VHOST_DEQUEUE_ZERO_COPY,
41         ETH_VHOST_IOMMU_SUPPORT,
42         ETH_VHOST_POSTCOPY_SUPPORT,
43         NULL
44 };
45
46 static struct rte_ether_addr base_eth_addr = {
47         .addr_bytes = {
48                 0x56 /* V */,
49                 0x48 /* H */,
50                 0x4F /* O */,
51                 0x53 /* S */,
52                 0x54 /* T */,
53                 0x00
54         }
55 };
56
57 enum vhost_xstats_pkts {
58         VHOST_UNDERSIZE_PKT = 0,
59         VHOST_64_PKT,
60         VHOST_65_TO_127_PKT,
61         VHOST_128_TO_255_PKT,
62         VHOST_256_TO_511_PKT,
63         VHOST_512_TO_1023_PKT,
64         VHOST_1024_TO_1522_PKT,
65         VHOST_1523_TO_MAX_PKT,
66         VHOST_BROADCAST_PKT,
67         VHOST_MULTICAST_PKT,
68         VHOST_UNICAST_PKT,
69         VHOST_ERRORS_PKT,
70         VHOST_ERRORS_FRAGMENTED,
71         VHOST_ERRORS_JABBER,
72         VHOST_UNKNOWN_PROTOCOL,
73         VHOST_XSTATS_MAX,
74 };
75
76 struct vhost_stats {
77         uint64_t pkts;
78         uint64_t bytes;
79         uint64_t missed_pkts;
80         uint64_t xstats[VHOST_XSTATS_MAX];
81 };
82
83 struct vhost_queue {
84         int vid;
85         rte_atomic32_t allow_queuing;
86         rte_atomic32_t while_queuing;
87         struct pmd_internal *internal;
88         struct rte_mempool *mb_pool;
89         uint16_t port;
90         uint16_t virtqueue_id;
91         struct vhost_stats stats;
92 };
93
94 struct pmd_internal {
95         rte_atomic32_t dev_attached;
96         char *dev_name;
97         char *iface_name;
98         uint16_t max_queues;
99         int vid;
100         rte_atomic32_t started;
101         uint8_t vlan_strip;
102 };
103
104 struct internal_list {
105         TAILQ_ENTRY(internal_list) next;
106         struct rte_eth_dev *eth_dev;
107 };
108
109 TAILQ_HEAD(internal_list_head, internal_list);
110 static struct internal_list_head internal_list =
111         TAILQ_HEAD_INITIALIZER(internal_list);
112
113 static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER;
114
115 static struct rte_eth_link pmd_link = {
116                 .link_speed = 10000,
117                 .link_duplex = ETH_LINK_FULL_DUPLEX,
118                 .link_status = ETH_LINK_DOWN
119 };
120
121 struct rte_vhost_vring_state {
122         rte_spinlock_t lock;
123
124         bool cur[RTE_MAX_QUEUES_PER_PORT * 2];
125         bool seen[RTE_MAX_QUEUES_PER_PORT * 2];
126         unsigned int index;
127         unsigned int max_vring;
128 };
129
130 static struct rte_vhost_vring_state *vring_states[RTE_MAX_ETHPORTS];
131
132 #define VHOST_XSTATS_NAME_SIZE 64
133
134 struct vhost_xstats_name_off {
135         char name[VHOST_XSTATS_NAME_SIZE];
136         uint64_t offset;
137 };
138
139 /* [rx]_is prepended to the name string here */
140 static const struct vhost_xstats_name_off vhost_rxport_stat_strings[] = {
141         {"good_packets",
142          offsetof(struct vhost_queue, stats.pkts)},
143         {"total_bytes",
144          offsetof(struct vhost_queue, stats.bytes)},
145         {"missed_pkts",
146          offsetof(struct vhost_queue, stats.missed_pkts)},
147         {"broadcast_packets",
148          offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])},
149         {"multicast_packets",
150          offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])},
151         {"unicast_packets",
152          offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])},
153          {"undersize_packets",
154          offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])},
155         {"size_64_packets",
156          offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])},
157         {"size_65_to_127_packets",
158          offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])},
159         {"size_128_to_255_packets",
160          offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])},
161         {"size_256_to_511_packets",
162          offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])},
163         {"size_512_to_1023_packets",
164          offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])},
165         {"size_1024_to_1522_packets",
166          offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])},
167         {"size_1523_to_max_packets",
168          offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])},
169         {"errors_with_bad_CRC",
170          offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])},
171         {"fragmented_errors",
172          offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_FRAGMENTED])},
173         {"jabber_errors",
174          offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_JABBER])},
175         {"unknown_protos_packets",
176          offsetof(struct vhost_queue, stats.xstats[VHOST_UNKNOWN_PROTOCOL])},
177 };
178
179 /* [tx]_ is prepended to the name string here */
180 static const struct vhost_xstats_name_off vhost_txport_stat_strings[] = {
181         {"good_packets",
182          offsetof(struct vhost_queue, stats.pkts)},
183         {"total_bytes",
184          offsetof(struct vhost_queue, stats.bytes)},
185         {"missed_pkts",
186          offsetof(struct vhost_queue, stats.missed_pkts)},
187         {"broadcast_packets",
188          offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])},
189         {"multicast_packets",
190          offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])},
191         {"unicast_packets",
192          offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])},
193         {"undersize_packets",
194          offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])},
195         {"size_64_packets",
196          offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])},
197         {"size_65_to_127_packets",
198          offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])},
199         {"size_128_to_255_packets",
200          offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])},
201         {"size_256_to_511_packets",
202          offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])},
203         {"size_512_to_1023_packets",
204          offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])},
205         {"size_1024_to_1522_packets",
206          offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])},
207         {"size_1523_to_max_packets",
208          offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])},
209         {"errors_with_bad_CRC",
210          offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])},
211 };
212
213 #define VHOST_NB_XSTATS_RXPORT (sizeof(vhost_rxport_stat_strings) / \
214                                 sizeof(vhost_rxport_stat_strings[0]))
215
216 #define VHOST_NB_XSTATS_TXPORT (sizeof(vhost_txport_stat_strings) / \
217                                 sizeof(vhost_txport_stat_strings[0]))
218
219 static void
220 vhost_dev_xstats_reset(struct rte_eth_dev *dev)
221 {
222         struct vhost_queue *vq = NULL;
223         unsigned int i = 0;
224
225         for (i = 0; i < dev->data->nb_rx_queues; i++) {
226                 vq = dev->data->rx_queues[i];
227                 if (!vq)
228                         continue;
229                 memset(&vq->stats, 0, sizeof(vq->stats));
230         }
231         for (i = 0; i < dev->data->nb_tx_queues; i++) {
232                 vq = dev->data->tx_queues[i];
233                 if (!vq)
234                         continue;
235                 memset(&vq->stats, 0, sizeof(vq->stats));
236         }
237 }
238
239 static int
240 vhost_dev_xstats_get_names(struct rte_eth_dev *dev __rte_unused,
241                            struct rte_eth_xstat_name *xstats_names,
242                            unsigned int limit __rte_unused)
243 {
244         unsigned int t = 0;
245         int count = 0;
246         int nstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT;
247
248         if (!xstats_names)
249                 return nstats;
250         for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) {
251                 snprintf(xstats_names[count].name,
252                          sizeof(xstats_names[count].name),
253                          "rx_%s", vhost_rxport_stat_strings[t].name);
254                 count++;
255         }
256         for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) {
257                 snprintf(xstats_names[count].name,
258                          sizeof(xstats_names[count].name),
259                          "tx_%s", vhost_txport_stat_strings[t].name);
260                 count++;
261         }
262         return count;
263 }
264
265 static int
266 vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
267                      unsigned int n)
268 {
269         unsigned int i;
270         unsigned int t;
271         unsigned int count = 0;
272         struct vhost_queue *vq = NULL;
273         unsigned int nxstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT;
274
275         if (n < nxstats)
276                 return nxstats;
277
278         for (i = 0; i < dev->data->nb_rx_queues; i++) {
279                 vq = dev->data->rx_queues[i];
280                 if (!vq)
281                         continue;
282                 vq->stats.xstats[VHOST_UNICAST_PKT] = vq->stats.pkts
283                                 - (vq->stats.xstats[VHOST_BROADCAST_PKT]
284                                 + vq->stats.xstats[VHOST_MULTICAST_PKT]);
285         }
286         for (i = 0; i < dev->data->nb_tx_queues; i++) {
287                 vq = dev->data->tx_queues[i];
288                 if (!vq)
289                         continue;
290                 vq->stats.xstats[VHOST_UNICAST_PKT] = vq->stats.pkts
291                                 + vq->stats.missed_pkts
292                                 - (vq->stats.xstats[VHOST_BROADCAST_PKT]
293                                 + vq->stats.xstats[VHOST_MULTICAST_PKT]);
294         }
295         for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) {
296                 xstats[count].value = 0;
297                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
298                         vq = dev->data->rx_queues[i];
299                         if (!vq)
300                                 continue;
301                         xstats[count].value +=
302                                 *(uint64_t *)(((char *)vq)
303                                 + vhost_rxport_stat_strings[t].offset);
304                 }
305                 xstats[count].id = count;
306                 count++;
307         }
308         for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) {
309                 xstats[count].value = 0;
310                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
311                         vq = dev->data->tx_queues[i];
312                         if (!vq)
313                                 continue;
314                         xstats[count].value +=
315                                 *(uint64_t *)(((char *)vq)
316                                 + vhost_txport_stat_strings[t].offset);
317                 }
318                 xstats[count].id = count;
319                 count++;
320         }
321         return count;
322 }
323
324 static inline void
325 vhost_count_multicast_broadcast(struct vhost_queue *vq,
326                                 struct rte_mbuf *mbuf)
327 {
328         struct rte_ether_addr *ea = NULL;
329         struct vhost_stats *pstats = &vq->stats;
330
331         ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
332         if (is_multicast_ether_addr(ea)) {
333                 if (is_broadcast_ether_addr(ea))
334                         pstats->xstats[VHOST_BROADCAST_PKT]++;
335                 else
336                         pstats->xstats[VHOST_MULTICAST_PKT]++;
337         }
338 }
339
340 static void
341 vhost_update_packet_xstats(struct vhost_queue *vq,
342                            struct rte_mbuf **bufs,
343                            uint16_t count)
344 {
345         uint32_t pkt_len = 0;
346         uint64_t i = 0;
347         uint64_t index;
348         struct vhost_stats *pstats = &vq->stats;
349
350         for (i = 0; i < count ; i++) {
351                 pkt_len = bufs[i]->pkt_len;
352                 if (pkt_len == 64) {
353                         pstats->xstats[VHOST_64_PKT]++;
354                 } else if (pkt_len > 64 && pkt_len < 1024) {
355                         index = (sizeof(pkt_len) * 8)
356                                 - __builtin_clz(pkt_len) - 5;
357                         pstats->xstats[index]++;
358                 } else {
359                         if (pkt_len < 64)
360                                 pstats->xstats[VHOST_UNDERSIZE_PKT]++;
361                         else if (pkt_len <= 1522)
362                                 pstats->xstats[VHOST_1024_TO_1522_PKT]++;
363                         else if (pkt_len > 1522)
364                                 pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
365                 }
366                 vhost_count_multicast_broadcast(vq, bufs[i]);
367         }
368 }
369
370 static uint16_t
371 eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
372 {
373         struct vhost_queue *r = q;
374         uint16_t i, nb_rx = 0;
375         uint16_t nb_receive = nb_bufs;
376
377         if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
378                 return 0;
379
380         rte_atomic32_set(&r->while_queuing, 1);
381
382         if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
383                 goto out;
384
385         /* Dequeue packets from guest TX queue */
386         while (nb_receive) {
387                 uint16_t nb_pkts;
388                 uint16_t num = (uint16_t)RTE_MIN(nb_receive,
389                                                  VHOST_MAX_PKT_BURST);
390
391                 nb_pkts = rte_vhost_dequeue_burst(r->vid, r->virtqueue_id,
392                                                   r->mb_pool, &bufs[nb_rx],
393                                                   num);
394
395                 nb_rx += nb_pkts;
396                 nb_receive -= nb_pkts;
397                 if (nb_pkts < num)
398                         break;
399         }
400
401         r->stats.pkts += nb_rx;
402
403         for (i = 0; likely(i < nb_rx); i++) {
404                 bufs[i]->port = r->port;
405                 bufs[i]->vlan_tci = 0;
406
407                 if (r->internal->vlan_strip)
408                         rte_vlan_strip(bufs[i]);
409
410                 r->stats.bytes += bufs[i]->pkt_len;
411         }
412
413         vhost_update_packet_xstats(r, bufs, nb_rx);
414
415 out:
416         rte_atomic32_set(&r->while_queuing, 0);
417
418         return nb_rx;
419 }
420
421 static uint16_t
422 eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
423 {
424         struct vhost_queue *r = q;
425         uint16_t i, nb_tx = 0;
426         uint16_t nb_send = 0;
427
428         if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
429                 return 0;
430
431         rte_atomic32_set(&r->while_queuing, 1);
432
433         if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
434                 goto out;
435
436         for (i = 0; i < nb_bufs; i++) {
437                 struct rte_mbuf *m = bufs[i];
438
439                 /* Do VLAN tag insertion */
440                 if (m->ol_flags & PKT_TX_VLAN_PKT) {
441                         int error = rte_vlan_insert(&m);
442                         if (unlikely(error)) {
443                                 rte_pktmbuf_free(m);
444                                 continue;
445                         }
446                 }
447
448                 bufs[nb_send] = m;
449                 ++nb_send;
450         }
451
452         /* Enqueue packets to guest RX queue */
453         while (nb_send) {
454                 uint16_t nb_pkts;
455                 uint16_t num = (uint16_t)RTE_MIN(nb_send,
456                                                  VHOST_MAX_PKT_BURST);
457
458                 nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id,
459                                                   &bufs[nb_tx], num);
460
461                 nb_tx += nb_pkts;
462                 nb_send -= nb_pkts;
463                 if (nb_pkts < num)
464                         break;
465         }
466
467         r->stats.pkts += nb_tx;
468         r->stats.missed_pkts += nb_bufs - nb_tx;
469
470         for (i = 0; likely(i < nb_tx); i++)
471                 r->stats.bytes += bufs[i]->pkt_len;
472
473         vhost_update_packet_xstats(r, bufs, nb_tx);
474
475         /* According to RFC2863 page42 section ifHCOutMulticastPkts and
476          * ifHCOutBroadcastPkts, the counters "multicast" and "broadcast"
477          * are increased when packets are not transmitted successfully.
478          */
479         for (i = nb_tx; i < nb_bufs; i++)
480                 vhost_count_multicast_broadcast(r, bufs[i]);
481
482         for (i = 0; likely(i < nb_tx); i++)
483                 rte_pktmbuf_free(bufs[i]);
484 out:
485         rte_atomic32_set(&r->while_queuing, 0);
486
487         return nb_tx;
488 }
489
490 static int
491 eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
492 {
493         struct pmd_internal *internal = dev->data->dev_private;
494         const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
495
496         internal->vlan_strip = !!(rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
497
498         return 0;
499 }
500
501 static inline struct internal_list *
502 find_internal_resource(char *ifname)
503 {
504         int found = 0;
505         struct internal_list *list;
506         struct pmd_internal *internal;
507
508         if (ifname == NULL)
509                 return NULL;
510
511         pthread_mutex_lock(&internal_list_lock);
512
513         TAILQ_FOREACH(list, &internal_list, next) {
514                 internal = list->eth_dev->data->dev_private;
515                 if (!strcmp(internal->iface_name, ifname)) {
516                         found = 1;
517                         break;
518                 }
519         }
520
521         pthread_mutex_unlock(&internal_list_lock);
522
523         if (!found)
524                 return NULL;
525
526         return list;
527 }
528
529 static int
530 eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
531 {
532         struct rte_vhost_vring vring;
533         struct vhost_queue *vq;
534         int ret = 0;
535
536         vq = dev->data->rx_queues[qid];
537         if (!vq) {
538                 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid);
539                 return -1;
540         }
541
542         ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
543         if (ret < 0) {
544                 VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid);
545                 return ret;
546         }
547         VHOST_LOG(INFO, "Enable interrupt for rxq%d\n", qid);
548         rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 1);
549         rte_wmb();
550
551         return ret;
552 }
553
554 static int
555 eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid)
556 {
557         struct rte_vhost_vring vring;
558         struct vhost_queue *vq;
559         int ret = 0;
560
561         vq = dev->data->rx_queues[qid];
562         if (!vq) {
563                 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid);
564                 return -1;
565         }
566
567         ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
568         if (ret < 0) {
569                 VHOST_LOG(ERR, "Failed to get rxq%d's vring", qid);
570                 return ret;
571         }
572         VHOST_LOG(INFO, "Disable interrupt for rxq%d\n", qid);
573         rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0);
574         rte_wmb();
575
576         return 0;
577 }
578
579 static void
580 eth_vhost_uninstall_intr(struct rte_eth_dev *dev)
581 {
582         struct rte_intr_handle *intr_handle = dev->intr_handle;
583
584         if (intr_handle) {
585                 if (intr_handle->intr_vec)
586                         free(intr_handle->intr_vec);
587                 free(intr_handle);
588         }
589
590         dev->intr_handle = NULL;
591 }
592
593 static int
594 eth_vhost_install_intr(struct rte_eth_dev *dev)
595 {
596         struct rte_vhost_vring vring;
597         struct vhost_queue *vq;
598         int count = 0;
599         int nb_rxq = dev->data->nb_rx_queues;
600         int i;
601         int ret;
602
603         /* uninstall firstly if we are reconnecting */
604         if (dev->intr_handle)
605                 eth_vhost_uninstall_intr(dev);
606
607         dev->intr_handle = malloc(sizeof(*dev->intr_handle));
608         if (!dev->intr_handle) {
609                 VHOST_LOG(ERR, "Fail to allocate intr_handle\n");
610                 return -ENOMEM;
611         }
612         memset(dev->intr_handle, 0, sizeof(*dev->intr_handle));
613
614         dev->intr_handle->efd_counter_size = sizeof(uint64_t);
615
616         dev->intr_handle->intr_vec =
617                 malloc(nb_rxq * sizeof(dev->intr_handle->intr_vec[0]));
618
619         if (!dev->intr_handle->intr_vec) {
620                 VHOST_LOG(ERR,
621                         "Failed to allocate memory for interrupt vector\n");
622                 free(dev->intr_handle);
623                 return -ENOMEM;
624         }
625
626         VHOST_LOG(INFO, "Prepare intr vec\n");
627         for (i = 0; i < nb_rxq; i++) {
628                 vq = dev->data->rx_queues[i];
629                 if (!vq) {
630                         VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i);
631                         continue;
632                 }
633
634                 ret = rte_vhost_get_vhost_vring(vq->vid, (i << 1) + 1, &vring);
635                 if (ret < 0) {
636                         VHOST_LOG(INFO,
637                                 "Failed to get rxq-%d's vring, skip!\n", i);
638                         continue;
639                 }
640
641                 if (vring.kickfd < 0) {
642                         VHOST_LOG(INFO,
643                                 "rxq-%d's kickfd is invalid, skip!\n", i);
644                         continue;
645                 }
646                 dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i;
647                 dev->intr_handle->efds[i] = vring.kickfd;
648                 count++;
649                 VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i);
650         }
651
652         dev->intr_handle->nb_efd = count;
653         dev->intr_handle->max_intr = count + 1;
654         dev->intr_handle->type = RTE_INTR_HANDLE_VDEV;
655
656         return 0;
657 }
658
659 static void
660 update_queuing_status(struct rte_eth_dev *dev)
661 {
662         struct pmd_internal *internal = dev->data->dev_private;
663         struct vhost_queue *vq;
664         unsigned int i;
665         int allow_queuing = 1;
666
667         if (!dev->data->rx_queues || !dev->data->tx_queues)
668                 return;
669
670         if (rte_atomic32_read(&internal->started) == 0 ||
671             rte_atomic32_read(&internal->dev_attached) == 0)
672                 allow_queuing = 0;
673
674         /* Wait until rx/tx_pkt_burst stops accessing vhost device */
675         for (i = 0; i < dev->data->nb_rx_queues; i++) {
676                 vq = dev->data->rx_queues[i];
677                 if (vq == NULL)
678                         continue;
679                 rte_atomic32_set(&vq->allow_queuing, allow_queuing);
680                 while (rte_atomic32_read(&vq->while_queuing))
681                         rte_pause();
682         }
683
684         for (i = 0; i < dev->data->nb_tx_queues; i++) {
685                 vq = dev->data->tx_queues[i];
686                 if (vq == NULL)
687                         continue;
688                 rte_atomic32_set(&vq->allow_queuing, allow_queuing);
689                 while (rte_atomic32_read(&vq->while_queuing))
690                         rte_pause();
691         }
692 }
693
694 static void
695 queue_setup(struct rte_eth_dev *eth_dev, struct pmd_internal *internal)
696 {
697         struct vhost_queue *vq;
698         int i;
699
700         for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
701                 vq = eth_dev->data->rx_queues[i];
702                 if (!vq)
703                         continue;
704                 vq->vid = internal->vid;
705                 vq->internal = internal;
706                 vq->port = eth_dev->data->port_id;
707         }
708         for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
709                 vq = eth_dev->data->tx_queues[i];
710                 if (!vq)
711                         continue;
712                 vq->vid = internal->vid;
713                 vq->internal = internal;
714                 vq->port = eth_dev->data->port_id;
715         }
716 }
717
718 static int
719 new_device(int vid)
720 {
721         struct rte_eth_dev *eth_dev;
722         struct internal_list *list;
723         struct pmd_internal *internal;
724         struct rte_eth_conf *dev_conf;
725         unsigned i;
726         char ifname[PATH_MAX];
727 #ifdef RTE_LIBRTE_VHOST_NUMA
728         int newnode;
729 #endif
730
731         rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
732         list = find_internal_resource(ifname);
733         if (list == NULL) {
734                 VHOST_LOG(INFO, "Invalid device name: %s\n", ifname);
735                 return -1;
736         }
737
738         eth_dev = list->eth_dev;
739         internal = eth_dev->data->dev_private;
740         dev_conf = &eth_dev->data->dev_conf;
741
742 #ifdef RTE_LIBRTE_VHOST_NUMA
743         newnode = rte_vhost_get_numa_node(vid);
744         if (newnode >= 0)
745                 eth_dev->data->numa_node = newnode;
746 #endif
747
748         internal->vid = vid;
749         if (rte_atomic32_read(&internal->started) == 1) {
750                 queue_setup(eth_dev, internal);
751
752                 if (dev_conf->intr_conf.rxq) {
753                         if (eth_vhost_install_intr(eth_dev) < 0) {
754                                 VHOST_LOG(INFO,
755                                         "Failed to install interrupt handler.");
756                                         return -1;
757                         }
758                 }
759         } else {
760                 VHOST_LOG(INFO, "RX/TX queues not exist yet\n");
761         }
762
763         for (i = 0; i < rte_vhost_get_vring_num(vid); i++)
764                 rte_vhost_enable_guest_notification(vid, i, 0);
765
766         rte_vhost_get_mtu(vid, &eth_dev->data->mtu);
767
768         eth_dev->data->dev_link.link_status = ETH_LINK_UP;
769
770         rte_atomic32_set(&internal->dev_attached, 1);
771         update_queuing_status(eth_dev);
772
773         VHOST_LOG(INFO, "Vhost device %d created\n", vid);
774
775         _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL);
776
777         return 0;
778 }
779
780 static void
781 destroy_device(int vid)
782 {
783         struct rte_eth_dev *eth_dev;
784         struct pmd_internal *internal;
785         struct vhost_queue *vq;
786         struct internal_list *list;
787         char ifname[PATH_MAX];
788         unsigned i;
789         struct rte_vhost_vring_state *state;
790
791         rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
792         list = find_internal_resource(ifname);
793         if (list == NULL) {
794                 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname);
795                 return;
796         }
797         eth_dev = list->eth_dev;
798         internal = eth_dev->data->dev_private;
799
800         rte_atomic32_set(&internal->dev_attached, 0);
801         update_queuing_status(eth_dev);
802
803         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
804
805         if (eth_dev->data->rx_queues && eth_dev->data->tx_queues) {
806                 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
807                         vq = eth_dev->data->rx_queues[i];
808                         if (!vq)
809                                 continue;
810                         vq->vid = -1;
811                 }
812                 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
813                         vq = eth_dev->data->tx_queues[i];
814                         if (!vq)
815                                 continue;
816                         vq->vid = -1;
817                 }
818         }
819
820         state = vring_states[eth_dev->data->port_id];
821         rte_spinlock_lock(&state->lock);
822         for (i = 0; i <= state->max_vring; i++) {
823                 state->cur[i] = false;
824                 state->seen[i] = false;
825         }
826         state->max_vring = 0;
827         rte_spinlock_unlock(&state->lock);
828
829         VHOST_LOG(INFO, "Vhost device %d destroyed\n", vid);
830         eth_vhost_uninstall_intr(eth_dev);
831
832         _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL);
833 }
834
835 static int
836 vring_state_changed(int vid, uint16_t vring, int enable)
837 {
838         struct rte_vhost_vring_state *state;
839         struct rte_eth_dev *eth_dev;
840         struct internal_list *list;
841         char ifname[PATH_MAX];
842
843         rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
844         list = find_internal_resource(ifname);
845         if (list == NULL) {
846                 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname);
847                 return -1;
848         }
849
850         eth_dev = list->eth_dev;
851         /* won't be NULL */
852         state = vring_states[eth_dev->data->port_id];
853         rte_spinlock_lock(&state->lock);
854         state->cur[vring] = enable;
855         state->max_vring = RTE_MAX(vring, state->max_vring);
856         rte_spinlock_unlock(&state->lock);
857
858         VHOST_LOG(INFO, "vring%u is %s\n",
859                         vring, enable ? "enabled" : "disabled");
860
861         _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, NULL);
862
863         return 0;
864 }
865
866 static struct vhost_device_ops vhost_ops = {
867         .new_device          = new_device,
868         .destroy_device      = destroy_device,
869         .vring_state_changed = vring_state_changed,
870 };
871
872 int
873 rte_eth_vhost_get_queue_event(uint16_t port_id,
874                 struct rte_eth_vhost_queue_event *event)
875 {
876         struct rte_vhost_vring_state *state;
877         unsigned int i;
878         int idx;
879
880         if (port_id >= RTE_MAX_ETHPORTS) {
881                 VHOST_LOG(ERR, "Invalid port id\n");
882                 return -1;
883         }
884
885         state = vring_states[port_id];
886         if (!state) {
887                 VHOST_LOG(ERR, "Unused port\n");
888                 return -1;
889         }
890
891         rte_spinlock_lock(&state->lock);
892         for (i = 0; i <= state->max_vring; i++) {
893                 idx = state->index++ % (state->max_vring + 1);
894
895                 if (state->cur[idx] != state->seen[idx]) {
896                         state->seen[idx] = state->cur[idx];
897                         event->queue_id = idx / 2;
898                         event->rx = idx & 1;
899                         event->enable = state->cur[idx];
900                         rte_spinlock_unlock(&state->lock);
901                         return 0;
902                 }
903         }
904         rte_spinlock_unlock(&state->lock);
905
906         return -1;
907 }
908
909 int
910 rte_eth_vhost_get_vid_from_port_id(uint16_t port_id)
911 {
912         struct internal_list *list;
913         struct rte_eth_dev *eth_dev;
914         struct vhost_queue *vq;
915         int vid = -1;
916
917         if (!rte_eth_dev_is_valid_port(port_id))
918                 return -1;
919
920         pthread_mutex_lock(&internal_list_lock);
921
922         TAILQ_FOREACH(list, &internal_list, next) {
923                 eth_dev = list->eth_dev;
924                 if (eth_dev->data->port_id == port_id) {
925                         vq = eth_dev->data->rx_queues[0];
926                         if (vq) {
927                                 vid = vq->vid;
928                         }
929                         break;
930                 }
931         }
932
933         pthread_mutex_unlock(&internal_list_lock);
934
935         return vid;
936 }
937
938 static int
939 eth_dev_start(struct rte_eth_dev *eth_dev)
940 {
941         struct pmd_internal *internal = eth_dev->data->dev_private;
942         struct rte_eth_conf *dev_conf = &eth_dev->data->dev_conf;
943
944         queue_setup(eth_dev, internal);
945
946         if (rte_atomic32_read(&internal->dev_attached) == 1) {
947                 if (dev_conf->intr_conf.rxq) {
948                         if (eth_vhost_install_intr(eth_dev) < 0) {
949                                 VHOST_LOG(INFO,
950                                         "Failed to install interrupt handler.");
951                                         return -1;
952                         }
953                 }
954         }
955
956         rte_atomic32_set(&internal->started, 1);
957         update_queuing_status(eth_dev);
958
959         return 0;
960 }
961
962 static void
963 eth_dev_stop(struct rte_eth_dev *dev)
964 {
965         struct pmd_internal *internal = dev->data->dev_private;
966
967         rte_atomic32_set(&internal->started, 0);
968         update_queuing_status(dev);
969 }
970
971 static void
972 eth_dev_close(struct rte_eth_dev *dev)
973 {
974         struct pmd_internal *internal;
975         struct internal_list *list;
976         unsigned int i;
977
978         internal = dev->data->dev_private;
979         if (!internal)
980                 return;
981
982         eth_dev_stop(dev);
983
984         rte_vhost_driver_unregister(internal->iface_name);
985
986         list = find_internal_resource(internal->iface_name);
987         if (!list)
988                 return;
989
990         pthread_mutex_lock(&internal_list_lock);
991         TAILQ_REMOVE(&internal_list, list, next);
992         pthread_mutex_unlock(&internal_list_lock);
993         rte_free(list);
994
995         if (dev->data->rx_queues)
996                 for (i = 0; i < dev->data->nb_rx_queues; i++)
997                         rte_free(dev->data->rx_queues[i]);
998
999         if (dev->data->tx_queues)
1000                 for (i = 0; i < dev->data->nb_tx_queues; i++)
1001                         rte_free(dev->data->tx_queues[i]);
1002
1003         free(internal->dev_name);
1004         free(internal->iface_name);
1005         rte_free(internal);
1006
1007         dev->data->dev_private = NULL;
1008 }
1009
1010 static int
1011 eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1012                    uint16_t nb_rx_desc __rte_unused,
1013                    unsigned int socket_id,
1014                    const struct rte_eth_rxconf *rx_conf __rte_unused,
1015                    struct rte_mempool *mb_pool)
1016 {
1017         struct vhost_queue *vq;
1018
1019         vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue),
1020                         RTE_CACHE_LINE_SIZE, socket_id);
1021         if (vq == NULL) {
1022                 VHOST_LOG(ERR, "Failed to allocate memory for rx queue\n");
1023                 return -ENOMEM;
1024         }
1025
1026         vq->mb_pool = mb_pool;
1027         vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ;
1028         dev->data->rx_queues[rx_queue_id] = vq;
1029
1030         return 0;
1031 }
1032
1033 static int
1034 eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1035                    uint16_t nb_tx_desc __rte_unused,
1036                    unsigned int socket_id,
1037                    const struct rte_eth_txconf *tx_conf __rte_unused)
1038 {
1039         struct vhost_queue *vq;
1040
1041         vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue),
1042                         RTE_CACHE_LINE_SIZE, socket_id);
1043         if (vq == NULL) {
1044                 VHOST_LOG(ERR, "Failed to allocate memory for tx queue\n");
1045                 return -ENOMEM;
1046         }
1047
1048         vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ;
1049         dev->data->tx_queues[tx_queue_id] = vq;
1050
1051         return 0;
1052 }
1053
1054 static void
1055 eth_dev_info(struct rte_eth_dev *dev,
1056              struct rte_eth_dev_info *dev_info)
1057 {
1058         struct pmd_internal *internal;
1059
1060         internal = dev->data->dev_private;
1061         if (internal == NULL) {
1062                 VHOST_LOG(ERR, "Invalid device specified\n");
1063                 return;
1064         }
1065
1066         dev_info->max_mac_addrs = 1;
1067         dev_info->max_rx_pktlen = (uint32_t)-1;
1068         dev_info->max_rx_queues = internal->max_queues;
1069         dev_info->max_tx_queues = internal->max_queues;
1070         dev_info->min_rx_bufsize = 0;
1071
1072         dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS |
1073                                 DEV_TX_OFFLOAD_VLAN_INSERT;
1074         dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
1075 }
1076
1077 static int
1078 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1079 {
1080         unsigned i;
1081         unsigned long rx_total = 0, tx_total = 0, tx_missed_total = 0;
1082         unsigned long rx_total_bytes = 0, tx_total_bytes = 0;
1083         struct vhost_queue *vq;
1084
1085         for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
1086                         i < dev->data->nb_rx_queues; i++) {
1087                 if (dev->data->rx_queues[i] == NULL)
1088                         continue;
1089                 vq = dev->data->rx_queues[i];
1090                 stats->q_ipackets[i] = vq->stats.pkts;
1091                 rx_total += stats->q_ipackets[i];
1092
1093                 stats->q_ibytes[i] = vq->stats.bytes;
1094                 rx_total_bytes += stats->q_ibytes[i];
1095         }
1096
1097         for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
1098                         i < dev->data->nb_tx_queues; i++) {
1099                 if (dev->data->tx_queues[i] == NULL)
1100                         continue;
1101                 vq = dev->data->tx_queues[i];
1102                 stats->q_opackets[i] = vq->stats.pkts;
1103                 tx_missed_total += vq->stats.missed_pkts;
1104                 tx_total += stats->q_opackets[i];
1105
1106                 stats->q_obytes[i] = vq->stats.bytes;
1107                 tx_total_bytes += stats->q_obytes[i];
1108         }
1109
1110         stats->ipackets = rx_total;
1111         stats->opackets = tx_total;
1112         stats->oerrors = tx_missed_total;
1113         stats->ibytes = rx_total_bytes;
1114         stats->obytes = tx_total_bytes;
1115
1116         return 0;
1117 }
1118
1119 static void
1120 eth_stats_reset(struct rte_eth_dev *dev)
1121 {
1122         struct vhost_queue *vq;
1123         unsigned i;
1124
1125         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1126                 if (dev->data->rx_queues[i] == NULL)
1127                         continue;
1128                 vq = dev->data->rx_queues[i];
1129                 vq->stats.pkts = 0;
1130                 vq->stats.bytes = 0;
1131         }
1132         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1133                 if (dev->data->tx_queues[i] == NULL)
1134                         continue;
1135                 vq = dev->data->tx_queues[i];
1136                 vq->stats.pkts = 0;
1137                 vq->stats.bytes = 0;
1138                 vq->stats.missed_pkts = 0;
1139         }
1140 }
1141
1142 static void
1143 eth_queue_release(void *q)
1144 {
1145         rte_free(q);
1146 }
1147
1148 static int
1149 eth_tx_done_cleanup(void *txq __rte_unused, uint32_t free_cnt __rte_unused)
1150 {
1151         /*
1152          * vHost does not hang onto mbuf. eth_vhost_tx() copies packet data
1153          * and releases mbuf, so nothing to cleanup.
1154          */
1155         return 0;
1156 }
1157
1158 static int
1159 eth_link_update(struct rte_eth_dev *dev __rte_unused,
1160                 int wait_to_complete __rte_unused)
1161 {
1162         return 0;
1163 }
1164
1165 static uint32_t
1166 eth_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1167 {
1168         struct vhost_queue *vq;
1169
1170         vq = dev->data->rx_queues[rx_queue_id];
1171         if (vq == NULL)
1172                 return 0;
1173
1174         return rte_vhost_rx_queue_count(vq->vid, vq->virtqueue_id);
1175 }
1176
1177 static const struct eth_dev_ops ops = {
1178         .dev_start = eth_dev_start,
1179         .dev_stop = eth_dev_stop,
1180         .dev_close = eth_dev_close,
1181         .dev_configure = eth_dev_configure,
1182         .dev_infos_get = eth_dev_info,
1183         .rx_queue_setup = eth_rx_queue_setup,
1184         .tx_queue_setup = eth_tx_queue_setup,
1185         .rx_queue_release = eth_queue_release,
1186         .tx_queue_release = eth_queue_release,
1187         .tx_done_cleanup = eth_tx_done_cleanup,
1188         .rx_queue_count = eth_rx_queue_count,
1189         .link_update = eth_link_update,
1190         .stats_get = eth_stats_get,
1191         .stats_reset = eth_stats_reset,
1192         .xstats_reset = vhost_dev_xstats_reset,
1193         .xstats_get = vhost_dev_xstats_get,
1194         .xstats_get_names = vhost_dev_xstats_get_names,
1195         .rx_queue_intr_enable = eth_rxq_intr_enable,
1196         .rx_queue_intr_disable = eth_rxq_intr_disable,
1197 };
1198
1199 static struct rte_vdev_driver pmd_vhost_drv;
1200
1201 static int
1202 eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name,
1203         int16_t queues, const unsigned int numa_node, uint64_t flags)
1204 {
1205         const char *name = rte_vdev_device_name(dev);
1206         struct rte_eth_dev_data *data;
1207         struct pmd_internal *internal = NULL;
1208         struct rte_eth_dev *eth_dev = NULL;
1209         struct rte_ether_addr *eth_addr = NULL;
1210         struct rte_vhost_vring_state *vring_state = NULL;
1211         struct internal_list *list = NULL;
1212
1213         VHOST_LOG(INFO, "Creating VHOST-USER backend on numa socket %u\n",
1214                 numa_node);
1215
1216         list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node);
1217         if (list == NULL)
1218                 goto error;
1219
1220         /* reserve an ethdev entry */
1221         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internal));
1222         if (eth_dev == NULL)
1223                 goto error;
1224         data = eth_dev->data;
1225
1226         eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node);
1227         if (eth_addr == NULL)
1228                 goto error;
1229         data->mac_addrs = eth_addr;
1230         *eth_addr = base_eth_addr;
1231         eth_addr->addr_bytes[5] = eth_dev->data->port_id;
1232
1233         vring_state = rte_zmalloc_socket(name,
1234                         sizeof(*vring_state), 0, numa_node);
1235         if (vring_state == NULL)
1236                 goto error;
1237
1238         /* now put it all together
1239          * - store queue data in internal,
1240          * - point eth_dev_data to internals
1241          * - and point eth_dev structure to new eth_dev_data structure
1242          */
1243         internal = eth_dev->data->dev_private;
1244         internal->dev_name = strdup(name);
1245         if (internal->dev_name == NULL)
1246                 goto error;
1247         internal->iface_name = strdup(iface_name);
1248         if (internal->iface_name == NULL)
1249                 goto error;
1250
1251         list->eth_dev = eth_dev;
1252         pthread_mutex_lock(&internal_list_lock);
1253         TAILQ_INSERT_TAIL(&internal_list, list, next);
1254         pthread_mutex_unlock(&internal_list_lock);
1255
1256         rte_spinlock_init(&vring_state->lock);
1257         vring_states[eth_dev->data->port_id] = vring_state;
1258
1259         data->nb_rx_queues = queues;
1260         data->nb_tx_queues = queues;
1261         internal->max_queues = queues;
1262         internal->vid = -1;
1263         data->dev_link = pmd_link;
1264         data->dev_flags = RTE_ETH_DEV_INTR_LSC;
1265
1266         eth_dev->dev_ops = &ops;
1267
1268         /* finally assign rx and tx ops */
1269         eth_dev->rx_pkt_burst = eth_vhost_rx;
1270         eth_dev->tx_pkt_burst = eth_vhost_tx;
1271
1272         if (rte_vhost_driver_register(iface_name, flags))
1273                 goto error;
1274
1275         if (rte_vhost_driver_callback_register(iface_name, &vhost_ops) < 0) {
1276                 VHOST_LOG(ERR, "Can't register callbacks\n");
1277                 goto error;
1278         }
1279
1280         if (rte_vhost_driver_start(iface_name) < 0) {
1281                 VHOST_LOG(ERR, "Failed to start driver for %s\n",
1282                         iface_name);
1283                 goto error;
1284         }
1285
1286         rte_eth_dev_probing_finish(eth_dev);
1287         return data->port_id;
1288
1289 error:
1290         if (internal) {
1291                 free(internal->iface_name);
1292                 free(internal->dev_name);
1293         }
1294         rte_free(vring_state);
1295         rte_eth_dev_release_port(eth_dev);
1296         rte_free(list);
1297
1298         return -1;
1299 }
1300
1301 static inline int
1302 open_iface(const char *key __rte_unused, const char *value, void *extra_args)
1303 {
1304         const char **iface_name = extra_args;
1305
1306         if (value == NULL)
1307                 return -1;
1308
1309         *iface_name = value;
1310
1311         return 0;
1312 }
1313
1314 static inline int
1315 open_int(const char *key __rte_unused, const char *value, void *extra_args)
1316 {
1317         uint16_t *n = extra_args;
1318
1319         if (value == NULL || extra_args == NULL)
1320                 return -EINVAL;
1321
1322         *n = (uint16_t)strtoul(value, NULL, 0);
1323         if (*n == USHRT_MAX && errno == ERANGE)
1324                 return -1;
1325
1326         return 0;
1327 }
1328
1329 static int
1330 rte_pmd_vhost_probe(struct rte_vdev_device *dev)
1331 {
1332         struct rte_kvargs *kvlist = NULL;
1333         int ret = 0;
1334         char *iface_name;
1335         uint16_t queues;
1336         uint64_t flags = 0;
1337         int client_mode = 0;
1338         int dequeue_zero_copy = 0;
1339         int iommu_support = 0;
1340         int postcopy_support = 0;
1341         struct rte_eth_dev *eth_dev;
1342         const char *name = rte_vdev_device_name(dev);
1343
1344         VHOST_LOG(INFO, "Initializing pmd_vhost for %s\n", name);
1345
1346         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1347                 eth_dev = rte_eth_dev_attach_secondary(name);
1348                 if (!eth_dev) {
1349                         VHOST_LOG(ERR, "Failed to probe %s\n", name);
1350                         return -1;
1351                 }
1352                 /* TODO: request info from primary to set up Rx and Tx */
1353                 eth_dev->dev_ops = &ops;
1354                 eth_dev->device = &dev->device;
1355                 rte_eth_dev_probing_finish(eth_dev);
1356                 return 0;
1357         }
1358
1359         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
1360         if (kvlist == NULL)
1361                 return -1;
1362
1363         if (rte_kvargs_count(kvlist, ETH_VHOST_IFACE_ARG) == 1) {
1364                 ret = rte_kvargs_process(kvlist, ETH_VHOST_IFACE_ARG,
1365                                          &open_iface, &iface_name);
1366                 if (ret < 0)
1367                         goto out_free;
1368         } else {
1369                 ret = -1;
1370                 goto out_free;
1371         }
1372
1373         if (rte_kvargs_count(kvlist, ETH_VHOST_QUEUES_ARG) == 1) {
1374                 ret = rte_kvargs_process(kvlist, ETH_VHOST_QUEUES_ARG,
1375                                          &open_int, &queues);
1376                 if (ret < 0 || queues > RTE_MAX_QUEUES_PER_PORT)
1377                         goto out_free;
1378
1379         } else
1380                 queues = 1;
1381
1382         if (rte_kvargs_count(kvlist, ETH_VHOST_CLIENT_ARG) == 1) {
1383                 ret = rte_kvargs_process(kvlist, ETH_VHOST_CLIENT_ARG,
1384                                          &open_int, &client_mode);
1385                 if (ret < 0)
1386                         goto out_free;
1387
1388                 if (client_mode)
1389                         flags |= RTE_VHOST_USER_CLIENT;
1390         }
1391
1392         if (rte_kvargs_count(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY) == 1) {
1393                 ret = rte_kvargs_process(kvlist, ETH_VHOST_DEQUEUE_ZERO_COPY,
1394                                          &open_int, &dequeue_zero_copy);
1395                 if (ret < 0)
1396                         goto out_free;
1397
1398                 if (dequeue_zero_copy)
1399                         flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
1400         }
1401
1402         if (rte_kvargs_count(kvlist, ETH_VHOST_IOMMU_SUPPORT) == 1) {
1403                 ret = rte_kvargs_process(kvlist, ETH_VHOST_IOMMU_SUPPORT,
1404                                          &open_int, &iommu_support);
1405                 if (ret < 0)
1406                         goto out_free;
1407
1408                 if (iommu_support)
1409                         flags |= RTE_VHOST_USER_IOMMU_SUPPORT;
1410         }
1411
1412         if (rte_kvargs_count(kvlist, ETH_VHOST_POSTCOPY_SUPPORT) == 1) {
1413                 ret = rte_kvargs_process(kvlist, ETH_VHOST_POSTCOPY_SUPPORT,
1414                                          &open_int, &postcopy_support);
1415                 if (ret < 0)
1416                         goto out_free;
1417
1418                 if (postcopy_support)
1419                         flags |= RTE_VHOST_USER_POSTCOPY_SUPPORT;
1420         }
1421
1422         if (dev->device.numa_node == SOCKET_ID_ANY)
1423                 dev->device.numa_node = rte_socket_id();
1424
1425         eth_dev_vhost_create(dev, iface_name, queues, dev->device.numa_node,
1426                 flags);
1427
1428 out_free:
1429         rte_kvargs_free(kvlist);
1430         return ret;
1431 }
1432
1433 static int
1434 rte_pmd_vhost_remove(struct rte_vdev_device *dev)
1435 {
1436         const char *name;
1437         struct rte_eth_dev *eth_dev = NULL;
1438
1439         name = rte_vdev_device_name(dev);
1440         VHOST_LOG(INFO, "Un-Initializing pmd_vhost for %s\n", name);
1441
1442         /* find an ethdev entry */
1443         eth_dev = rte_eth_dev_allocated(name);
1444         if (eth_dev == NULL)
1445                 return -ENODEV;
1446
1447         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1448                 return rte_eth_dev_release_port(eth_dev);
1449
1450         eth_dev_close(eth_dev);
1451
1452         rte_free(vring_states[eth_dev->data->port_id]);
1453         vring_states[eth_dev->data->port_id] = NULL;
1454
1455         rte_eth_dev_release_port(eth_dev);
1456
1457         return 0;
1458 }
1459
1460 static struct rte_vdev_driver pmd_vhost_drv = {
1461         .probe = rte_pmd_vhost_probe,
1462         .remove = rte_pmd_vhost_remove,
1463 };
1464
1465 RTE_PMD_REGISTER_VDEV(net_vhost, pmd_vhost_drv);
1466 RTE_PMD_REGISTER_ALIAS(net_vhost, eth_vhost);
1467 RTE_PMD_REGISTER_PARAM_STRING(net_vhost,
1468         "iface=<ifc> "
1469         "queues=<int> "
1470         "client=<0|1> "
1471         "dequeue-zero-copy=<0|1> "
1472         "iommu-support=<0|1> "
1473         "postcopy-support=<0|1>");
1474
1475 RTE_INIT(vhost_init_log)
1476 {
1477         vhost_logtype = rte_log_register("pmd.net.vhost");
1478         if (vhost_logtype >= 0)
1479                 rte_log_set_level(vhost_logtype, RTE_LOG_NOTICE);
1480 }