net/virtio: enable packed virtqueues by default
[dpdk.git] / drivers / net / virtio / virtio_user / virtio_user_dev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <fcntl.h>
8 #include <string.h>
9 #include <errno.h>
10 #include <sys/mman.h>
11 #include <unistd.h>
12 #include <sys/eventfd.h>
13 #include <sys/types.h>
14 #include <sys/stat.h>
15
16 #include <rte_eal_memconfig.h>
17
18 #include "vhost.h"
19 #include "virtio_user_dev.h"
20 #include "../virtio_ethdev.h"
21
22 #define VIRTIO_USER_MEM_EVENT_CLB_NAME "virtio_user_mem_event_clb"
23
24 static int
25 virtio_user_create_queue(struct virtio_user_dev *dev, uint32_t queue_sel)
26 {
27         /* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come
28          * firstly because vhost depends on this msg to allocate virtqueue
29          * pair.
30          */
31         struct vhost_vring_file file;
32
33         file.index = queue_sel;
34         file.fd = dev->callfds[queue_sel];
35         dev->ops->send_request(dev, VHOST_USER_SET_VRING_CALL, &file);
36
37         return 0;
38 }
39
40 static int
41 virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel)
42 {
43         struct vhost_vring_file file;
44         struct vhost_vring_state state;
45         struct vring *vring = &dev->vrings[queue_sel];
46         struct vhost_vring_addr addr = {
47                 .index = queue_sel,
48                 .desc_user_addr = (uint64_t)(uintptr_t)vring->desc,
49                 .avail_user_addr = (uint64_t)(uintptr_t)vring->avail,
50                 .used_user_addr = (uint64_t)(uintptr_t)vring->used,
51                 .log_guest_addr = 0,
52                 .flags = 0, /* disable log */
53         };
54
55         state.index = queue_sel;
56         state.num = vring->num;
57         dev->ops->send_request(dev, VHOST_USER_SET_VRING_NUM, &state);
58
59         state.index = queue_sel;
60         state.num = 0; /* no reservation */
61         if (dev->features & (1ULL << VIRTIO_F_RING_PACKED))
62                 state.num |= (1 << 15);
63         dev->ops->send_request(dev, VHOST_USER_SET_VRING_BASE, &state);
64
65         dev->ops->send_request(dev, VHOST_USER_SET_VRING_ADDR, &addr);
66
67         /* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes
68          * lastly because vhost depends on this msg to judge if
69          * virtio is ready.
70          */
71         file.index = queue_sel;
72         file.fd = dev->kickfds[queue_sel];
73         dev->ops->send_request(dev, VHOST_USER_SET_VRING_KICK, &file);
74
75         return 0;
76 }
77
78 static int
79 virtio_user_queue_setup(struct virtio_user_dev *dev,
80                         int (*fn)(struct virtio_user_dev *, uint32_t))
81 {
82         uint32_t i, queue_sel;
83
84         for (i = 0; i < dev->max_queue_pairs; ++i) {
85                 queue_sel = 2 * i + VTNET_SQ_RQ_QUEUE_IDX;
86                 if (fn(dev, queue_sel) < 0) {
87                         PMD_DRV_LOG(INFO, "setup rx vq fails: %u", i);
88                         return -1;
89                 }
90         }
91         for (i = 0; i < dev->max_queue_pairs; ++i) {
92                 queue_sel = 2 * i + VTNET_SQ_TQ_QUEUE_IDX;
93                 if (fn(dev, queue_sel) < 0) {
94                         PMD_DRV_LOG(INFO, "setup tx vq fails: %u", i);
95                         return -1;
96                 }
97         }
98
99         return 0;
100 }
101
102 int
103 is_vhost_user_by_type(const char *path)
104 {
105         struct stat sb;
106
107         if (stat(path, &sb) == -1)
108                 return 0;
109
110         return S_ISSOCK(sb.st_mode);
111 }
112
113 int
114 virtio_user_start_device(struct virtio_user_dev *dev)
115 {
116         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
117         uint64_t features;
118         int ret;
119
120         /*
121          * XXX workaround!
122          *
123          * We need to make sure that the locks will be
124          * taken in the correct order to avoid deadlocks.
125          *
126          * Before releasing this lock, this thread should
127          * not trigger any memory hotplug events.
128          *
129          * This is a temporary workaround, and should be
130          * replaced when we get proper supports from the
131          * memory subsystem in the future.
132          */
133         rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
134         pthread_mutex_lock(&dev->mutex);
135
136         if (is_vhost_user_by_type(dev->path) && dev->vhostfd < 0)
137                 goto error;
138
139         /* Step 0: tell vhost to create queues */
140         if (virtio_user_queue_setup(dev, virtio_user_create_queue) < 0)
141                 goto error;
142
143         /* Step 1: set features */
144         features = dev->features;
145         /* Strip VIRTIO_NET_F_MAC, as MAC address is handled in vdev init */
146         features &= ~(1ull << VIRTIO_NET_F_MAC);
147         /* Strip VIRTIO_NET_F_CTRL_VQ, as devices do not really need to know */
148         features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ);
149         features &= ~(1ull << VIRTIO_NET_F_STATUS);
150         ret = dev->ops->send_request(dev, VHOST_USER_SET_FEATURES, &features);
151         if (ret < 0)
152                 goto error;
153         PMD_DRV_LOG(INFO, "set features: %" PRIx64, features);
154
155         /* Step 2: share memory regions */
156         ret = dev->ops->send_request(dev, VHOST_USER_SET_MEM_TABLE, NULL);
157         if (ret < 0)
158                 goto error;
159
160         /* Step 3: kick queues */
161         if (virtio_user_queue_setup(dev, virtio_user_kick_queue) < 0)
162                 goto error;
163
164         /* Step 4: enable queues
165          * we enable the 1st queue pair by default.
166          */
167         dev->ops->enable_qp(dev, 0, 1);
168
169         dev->started = true;
170         pthread_mutex_unlock(&dev->mutex);
171         rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
172
173         return 0;
174 error:
175         pthread_mutex_unlock(&dev->mutex);
176         rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
177         /* TODO: free resource here or caller to check */
178         return -1;
179 }
180
181 int virtio_user_stop_device(struct virtio_user_dev *dev)
182 {
183         struct vhost_vring_state state;
184         uint32_t i;
185         int error = 0;
186
187         pthread_mutex_lock(&dev->mutex);
188         if (!dev->started)
189                 goto out;
190
191         for (i = 0; i < dev->max_queue_pairs; ++i)
192                 dev->ops->enable_qp(dev, i, 0);
193
194         /* Stop the backend. */
195         for (i = 0; i < dev->max_queue_pairs * 2; ++i) {
196                 state.index = i;
197                 if (dev->ops->send_request(dev, VHOST_USER_GET_VRING_BASE,
198                                            &state) < 0) {
199                         PMD_DRV_LOG(ERR, "get_vring_base failed, index=%u\n",
200                                     i);
201                         error = -1;
202                         goto out;
203                 }
204         }
205
206         dev->started = false;
207 out:
208         pthread_mutex_unlock(&dev->mutex);
209
210         return error;
211 }
212
213 static inline void
214 parse_mac(struct virtio_user_dev *dev, const char *mac)
215 {
216         int i, r;
217         uint32_t tmp[ETHER_ADDR_LEN];
218
219         if (!mac)
220                 return;
221
222         r = sscanf(mac, "%x:%x:%x:%x:%x:%x", &tmp[0],
223                         &tmp[1], &tmp[2], &tmp[3], &tmp[4], &tmp[5]);
224         if (r == ETHER_ADDR_LEN) {
225                 for (i = 0; i < ETHER_ADDR_LEN; ++i)
226                         dev->mac_addr[i] = (uint8_t)tmp[i];
227                 dev->mac_specified = 1;
228         } else {
229                 /* ignore the wrong mac, use random mac */
230                 PMD_DRV_LOG(ERR, "wrong format of mac: %s", mac);
231         }
232 }
233
234 static int
235 virtio_user_dev_init_notify(struct virtio_user_dev *dev)
236 {
237         uint32_t i, j;
238         int callfd;
239         int kickfd;
240
241         for (i = 0; i < VIRTIO_MAX_VIRTQUEUES; ++i) {
242                 if (i >= dev->max_queue_pairs * 2) {
243                         dev->kickfds[i] = -1;
244                         dev->callfds[i] = -1;
245                         continue;
246                 }
247
248                 /* May use invalid flag, but some backend uses kickfd and
249                  * callfd as criteria to judge if dev is alive. so finally we
250                  * use real event_fd.
251                  */
252                 callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
253                 if (callfd < 0) {
254                         PMD_DRV_LOG(ERR, "callfd error, %s", strerror(errno));
255                         break;
256                 }
257                 kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
258                 if (kickfd < 0) {
259                         PMD_DRV_LOG(ERR, "kickfd error, %s", strerror(errno));
260                         break;
261                 }
262                 dev->callfds[i] = callfd;
263                 dev->kickfds[i] = kickfd;
264         }
265
266         if (i < VIRTIO_MAX_VIRTQUEUES) {
267                 for (j = 0; j <= i; ++j) {
268                         close(dev->callfds[j]);
269                         close(dev->kickfds[j]);
270                 }
271
272                 return -1;
273         }
274
275         return 0;
276 }
277
278 static int
279 virtio_user_fill_intr_handle(struct virtio_user_dev *dev)
280 {
281         uint32_t i;
282         struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
283
284         if (!eth_dev->intr_handle) {
285                 eth_dev->intr_handle = malloc(sizeof(*eth_dev->intr_handle));
286                 if (!eth_dev->intr_handle) {
287                         PMD_DRV_LOG(ERR, "fail to allocate intr_handle");
288                         return -1;
289                 }
290                 memset(eth_dev->intr_handle, 0, sizeof(*eth_dev->intr_handle));
291         }
292
293         for (i = 0; i < dev->max_queue_pairs; ++i)
294                 eth_dev->intr_handle->efds[i] = dev->callfds[i];
295         eth_dev->intr_handle->nb_efd = dev->max_queue_pairs;
296         eth_dev->intr_handle->max_intr = dev->max_queue_pairs + 1;
297         eth_dev->intr_handle->type = RTE_INTR_HANDLE_VDEV;
298         /* For virtio vdev, no need to read counter for clean */
299         eth_dev->intr_handle->efd_counter_size = 0;
300         eth_dev->intr_handle->fd = -1;
301         if (dev->vhostfd >= 0)
302                 eth_dev->intr_handle->fd = dev->vhostfd;
303         else if (dev->is_server)
304                 eth_dev->intr_handle->fd = dev->listenfd;
305
306         return 0;
307 }
308
309 static void
310 virtio_user_mem_event_cb(enum rte_mem_event type __rte_unused,
311                                                  const void *addr __rte_unused,
312                                                  size_t len __rte_unused,
313                                                  void *arg)
314 {
315         struct virtio_user_dev *dev = arg;
316         struct rte_memseg_list *msl;
317         uint16_t i;
318
319         /* ignore externally allocated memory */
320         msl = rte_mem_virt2memseg_list(addr);
321         if (msl->external)
322                 return;
323
324         pthread_mutex_lock(&dev->mutex);
325
326         if (dev->started == false)
327                 goto exit;
328
329         /* Step 1: pause the active queues */
330         for (i = 0; i < dev->queue_pairs; i++)
331                 dev->ops->enable_qp(dev, i, 0);
332
333         /* Step 2: update memory regions */
334         dev->ops->send_request(dev, VHOST_USER_SET_MEM_TABLE, NULL);
335
336         /* Step 3: resume the active queues */
337         for (i = 0; i < dev->queue_pairs; i++)
338                 dev->ops->enable_qp(dev, i, 1);
339
340 exit:
341         pthread_mutex_unlock(&dev->mutex);
342 }
343
344 static int
345 virtio_user_dev_setup(struct virtio_user_dev *dev)
346 {
347         uint32_t q;
348
349         dev->vhostfd = -1;
350         dev->vhostfds = NULL;
351         dev->tapfds = NULL;
352
353         if (dev->is_server) {
354                 if (access(dev->path, F_OK) == 0 &&
355                     !is_vhost_user_by_type(dev->path)) {
356                         PMD_DRV_LOG(ERR, "Server mode doesn't support vhost-kernel!");
357                         return -1;
358                 }
359                 dev->ops = &virtio_ops_user;
360         } else {
361                 if (is_vhost_user_by_type(dev->path)) {
362                         dev->ops = &virtio_ops_user;
363                 } else {
364                         dev->ops = &virtio_ops_kernel;
365
366                         dev->vhostfds = malloc(dev->max_queue_pairs *
367                                                sizeof(int));
368                         dev->tapfds = malloc(dev->max_queue_pairs *
369                                              sizeof(int));
370                         if (!dev->vhostfds || !dev->tapfds) {
371                                 PMD_INIT_LOG(ERR, "Failed to malloc");
372                                 return -1;
373                         }
374
375                         for (q = 0; q < dev->max_queue_pairs; ++q) {
376                                 dev->vhostfds[q] = -1;
377                                 dev->tapfds[q] = -1;
378                         }
379                 }
380         }
381
382         if (dev->ops->setup(dev) < 0)
383                 return -1;
384
385         if (virtio_user_dev_init_notify(dev) < 0)
386                 return -1;
387
388         if (virtio_user_fill_intr_handle(dev) < 0)
389                 return -1;
390
391         return 0;
392 }
393
394 /* Use below macro to filter features from vhost backend */
395 #define VIRTIO_USER_SUPPORTED_FEATURES                  \
396         (1ULL << VIRTIO_NET_F_MAC               |       \
397          1ULL << VIRTIO_NET_F_STATUS            |       \
398          1ULL << VIRTIO_NET_F_MQ                |       \
399          1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR     |       \
400          1ULL << VIRTIO_NET_F_CTRL_VQ           |       \
401          1ULL << VIRTIO_NET_F_CTRL_RX           |       \
402          1ULL << VIRTIO_NET_F_CTRL_VLAN         |       \
403          1ULL << VIRTIO_NET_F_CSUM              |       \
404          1ULL << VIRTIO_NET_F_HOST_TSO4         |       \
405          1ULL << VIRTIO_NET_F_HOST_TSO6         |       \
406          1ULL << VIRTIO_NET_F_MRG_RXBUF         |       \
407          1ULL << VIRTIO_RING_F_INDIRECT_DESC    |       \
408          1ULL << VIRTIO_NET_F_GUEST_CSUM        |       \
409          1ULL << VIRTIO_NET_F_GUEST_TSO4        |       \
410          1ULL << VIRTIO_NET_F_GUEST_TSO6        |       \
411          1ULL << VIRTIO_F_IN_ORDER              |       \
412          1ULL << VIRTIO_F_VERSION_1             |       \
413          1ULL << VIRTIO_F_RING_PACKED           |       \
414          1ULL << VIRTIO_RING_F_EVENT_IDX)
415
416 int
417 virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
418                      int cq, int queue_size, const char *mac, char **ifname,
419                      int mrg_rxbuf, int in_order, int packed_vq)
420 {
421         pthread_mutex_init(&dev->mutex, NULL);
422         snprintf(dev->path, PATH_MAX, "%s", path);
423         dev->started = 0;
424         dev->max_queue_pairs = queues;
425         dev->queue_pairs = 1; /* mq disabled by default */
426         dev->queue_size = queue_size;
427         dev->mac_specified = 0;
428         dev->frontend_features = 0;
429         dev->unsupported_features = ~VIRTIO_USER_SUPPORTED_FEATURES;
430         parse_mac(dev, mac);
431
432         if (*ifname) {
433                 dev->ifname = *ifname;
434                 *ifname = NULL;
435         }
436
437         if (virtio_user_dev_setup(dev) < 0) {
438                 PMD_INIT_LOG(ERR, "backend set up fails");
439                 return -1;
440         }
441
442         if (!dev->is_server) {
443                 if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER,
444                                            NULL) < 0) {
445                         PMD_INIT_LOG(ERR, "set_owner fails: %s",
446                                      strerror(errno));
447                         return -1;
448                 }
449
450                 if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES,
451                                            &dev->device_features) < 0) {
452                         PMD_INIT_LOG(ERR, "get_features failed: %s",
453                                      strerror(errno));
454                         return -1;
455                 }
456         } else {
457                 /* We just pretend vhost-user can support all these features.
458                  * Note that this could be problematic that if some feature is
459                  * negotiated but not supported by the vhost-user which comes
460                  * later.
461                  */
462                 dev->device_features = VIRTIO_USER_SUPPORTED_FEATURES;
463         }
464
465         if (!mrg_rxbuf)
466                 dev->unsupported_features |= (1ull << VIRTIO_NET_F_MRG_RXBUF);
467
468         if (!in_order)
469                 dev->unsupported_features |= (1ull << VIRTIO_F_IN_ORDER);
470
471         if (packed_vq) {
472                 if (cq) {
473                         PMD_INIT_LOG(ERR, "control vq not supported yet with "
474                                           "packed virtqueues\n");
475                         return -1;
476                 }
477                 dev->device_features |= (1ull << VIRTIO_F_RING_PACKED);
478         } else {
479                 dev->device_features &= ~(1ull << VIRTIO_F_RING_PACKED);
480         }
481
482         if (dev->mac_specified) {
483                 dev->device_features |= (1ull << VIRTIO_NET_F_MAC);
484         } else {
485                 dev->device_features &= ~(1ull << VIRTIO_NET_F_MAC);
486                 dev->unsupported_features |= (1ull << VIRTIO_NET_F_MAC);
487         }
488
489         if (cq) {
490                 /* device does not really need to know anything about CQ,
491                  * so if necessary, we just claim to support CQ
492                  */
493                 dev->frontend_features |= (1ull << VIRTIO_NET_F_CTRL_VQ);
494         } else {
495                 dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VQ);
496                 /* Also disable features that depend on VIRTIO_NET_F_CTRL_VQ */
497                 dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_RX);
498                 dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VLAN);
499                 dev->unsupported_features |=
500                         (1ull << VIRTIO_NET_F_GUEST_ANNOUNCE);
501                 dev->unsupported_features |= (1ull << VIRTIO_NET_F_MQ);
502                 dev->unsupported_features |=
503                         (1ull << VIRTIO_NET_F_CTRL_MAC_ADDR);
504         }
505
506         /* The backend will not report this feature, we add it explicitly */
507         if (is_vhost_user_by_type(dev->path))
508                 dev->frontend_features |= (1ull << VIRTIO_NET_F_STATUS);
509
510         /*
511          * Device features =
512          *     (frontend_features | backend_features) & ~unsupported_features;
513          */
514         dev->device_features |= dev->frontend_features;
515         dev->device_features &= ~dev->unsupported_features;
516
517         if (rte_mem_event_callback_register(VIRTIO_USER_MEM_EVENT_CLB_NAME,
518                                 virtio_user_mem_event_cb, dev)) {
519                 if (rte_errno != ENOTSUP) {
520                         PMD_INIT_LOG(ERR, "Failed to register mem event"
521                                         " callback\n");
522                         return -1;
523                 }
524         }
525
526         return 0;
527 }
528
529 void
530 virtio_user_dev_uninit(struct virtio_user_dev *dev)
531 {
532         uint32_t i;
533
534         virtio_user_stop_device(dev);
535
536         rte_mem_event_callback_unregister(VIRTIO_USER_MEM_EVENT_CLB_NAME, dev);
537
538         for (i = 0; i < dev->max_queue_pairs * 2; ++i) {
539                 close(dev->callfds[i]);
540                 close(dev->kickfds[i]);
541         }
542
543         close(dev->vhostfd);
544
545         if (dev->is_server && dev->listenfd >= 0) {
546                 close(dev->listenfd);
547                 dev->listenfd = -1;
548         }
549
550         if (dev->vhostfds) {
551                 for (i = 0; i < dev->max_queue_pairs; ++i)
552                         close(dev->vhostfds[i]);
553                 free(dev->vhostfds);
554                 free(dev->tapfds);
555         }
556
557         free(dev->ifname);
558
559         if (dev->is_server)
560                 unlink(dev->path);
561 }
562
563 uint8_t
564 virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs)
565 {
566         uint16_t i;
567         uint8_t ret = 0;
568
569         if (q_pairs > dev->max_queue_pairs) {
570                 PMD_INIT_LOG(ERR, "multi-q config %u, but only %u supported",
571                              q_pairs, dev->max_queue_pairs);
572                 return -1;
573         }
574
575         /* Server mode can't enable queue pairs if vhostfd is invalid,
576          * always return 0 in this case.
577          */
578         if (!dev->is_server || dev->vhostfd >= 0) {
579                 for (i = 0; i < q_pairs; ++i)
580                         ret |= dev->ops->enable_qp(dev, i, 1);
581                 for (i = q_pairs; i < dev->max_queue_pairs; ++i)
582                         ret |= dev->ops->enable_qp(dev, i, 0);
583         }
584         dev->queue_pairs = q_pairs;
585
586         return ret;
587 }
588
589 static uint32_t
590 virtio_user_handle_ctrl_msg(struct virtio_user_dev *dev, struct vring *vring,
591                             uint16_t idx_hdr)
592 {
593         struct virtio_net_ctrl_hdr *hdr;
594         virtio_net_ctrl_ack status = ~0;
595         uint16_t i, idx_data, idx_status;
596         uint32_t n_descs = 0;
597
598         /* locate desc for header, data, and status */
599         idx_data = vring->desc[idx_hdr].next;
600         n_descs++;
601
602         i = idx_data;
603         while (vring->desc[i].flags == VRING_DESC_F_NEXT) {
604                 i = vring->desc[i].next;
605                 n_descs++;
606         }
607
608         /* locate desc for status */
609         idx_status = i;
610         n_descs++;
611
612         hdr = (void *)(uintptr_t)vring->desc[idx_hdr].addr;
613         if (hdr->class == VIRTIO_NET_CTRL_MQ &&
614             hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
615                 uint16_t queues;
616
617                 queues = *(uint16_t *)(uintptr_t)vring->desc[idx_data].addr;
618                 status = virtio_user_handle_mq(dev, queues);
619         }
620
621         /* Update status */
622         *(virtio_net_ctrl_ack *)(uintptr_t)vring->desc[idx_status].addr = status;
623
624         return n_descs;
625 }
626
627 void
628 virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx)
629 {
630         uint16_t avail_idx, desc_idx;
631         struct vring_used_elem *uep;
632         uint32_t n_descs;
633         struct vring *vring = &dev->vrings[queue_idx];
634
635         /* Consume avail ring, using used ring idx as first one */
636         while (vring->used->idx != vring->avail->idx) {
637                 avail_idx = (vring->used->idx) & (vring->num - 1);
638                 desc_idx = vring->avail->ring[avail_idx];
639
640                 n_descs = virtio_user_handle_ctrl_msg(dev, vring, desc_idx);
641
642                 /* Update used ring */
643                 uep = &vring->used->ring[avail_idx];
644                 uep->id = avail_idx;
645                 uep->len = n_descs;
646
647                 vring->used->idx++;
648         }
649 }