ethdev: remove legacy Rx descriptor done API
[dpdk.git] / drivers / net / virtio / virtio_user / vhost_user.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <sys/socket.h>
6 #include <sys/types.h>
7 #include <sys/stat.h>
8 #include <unistd.h>
9 #include <fcntl.h>
10 #include <sys/un.h>
11 #include <string.h>
12 #include <errno.h>
13
14 #include <rte_alarm.h>
15 #include <rte_string_fns.h>
16 #include <rte_fbarray.h>
17
18 #include "vhost.h"
19 #include "virtio_user_dev.h"
20
21 struct vhost_user_data {
22         int vhostfd;
23         int listenfd;
24         uint64_t protocol_features;
25 };
26
27 #ifndef VHOST_USER_F_PROTOCOL_FEATURES
28 #define VHOST_USER_F_PROTOCOL_FEATURES 30
29 #endif
30
31 /** Protocol features. */
32 #ifndef VHOST_USER_PROTOCOL_F_MQ
33 #define VHOST_USER_PROTOCOL_F_MQ 0
34 #endif
35
36 #ifndef VHOST_USER_PROTOCOL_F_REPLY_ACK
37 #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
38 #endif
39
40 #ifndef VHOST_USER_PROTOCOL_F_STATUS
41 #define VHOST_USER_PROTOCOL_F_STATUS 16
42 #endif
43
44 #define VHOST_USER_SUPPORTED_PROTOCOL_FEATURES          \
45         (1ULL << VHOST_USER_PROTOCOL_F_MQ |             \
46          1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK |      \
47          1ULL << VHOST_USER_PROTOCOL_F_STATUS)
48
49 /* The version of the protocol we support */
50 #define VHOST_USER_VERSION    0x1
51
52 #define VHOST_MEMORY_MAX_NREGIONS 8
53 struct vhost_memory {
54         uint32_t nregions;
55         uint32_t padding;
56         struct vhost_memory_region regions[VHOST_MEMORY_MAX_NREGIONS];
57 };
58
59 enum vhost_user_request {
60         VHOST_USER_NONE = 0,
61         VHOST_USER_GET_FEATURES = 1,
62         VHOST_USER_SET_FEATURES = 2,
63         VHOST_USER_SET_OWNER = 3,
64         VHOST_USER_RESET_OWNER = 4,
65         VHOST_USER_SET_MEM_TABLE = 5,
66         VHOST_USER_SET_LOG_BASE = 6,
67         VHOST_USER_SET_LOG_FD = 7,
68         VHOST_USER_SET_VRING_NUM = 8,
69         VHOST_USER_SET_VRING_ADDR = 9,
70         VHOST_USER_SET_VRING_BASE = 10,
71         VHOST_USER_GET_VRING_BASE = 11,
72         VHOST_USER_SET_VRING_KICK = 12,
73         VHOST_USER_SET_VRING_CALL = 13,
74         VHOST_USER_SET_VRING_ERR = 14,
75         VHOST_USER_GET_PROTOCOL_FEATURES = 15,
76         VHOST_USER_SET_PROTOCOL_FEATURES = 16,
77         VHOST_USER_GET_QUEUE_NUM = 17,
78         VHOST_USER_SET_VRING_ENABLE = 18,
79         VHOST_USER_SET_STATUS = 39,
80         VHOST_USER_GET_STATUS = 40,
81         VHOST_USER_MAX
82 };
83
84 struct vhost_user_msg {
85         enum vhost_user_request request;
86
87 #define VHOST_USER_VERSION_MASK     0x3
88 #define VHOST_USER_REPLY_MASK       (0x1 << 2)
89 #define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
90         uint32_t flags;
91         uint32_t size; /* the following payload size */
92         union {
93 #define VHOST_USER_VRING_IDX_MASK   0xff
94 #define VHOST_USER_VRING_NOFD_MASK  (0x1 << 8)
95                 uint64_t u64;
96                 struct vhost_vring_state state;
97                 struct vhost_vring_addr addr;
98                 struct vhost_memory memory;
99         } payload;
100         int fds[VHOST_MEMORY_MAX_NREGIONS];
101 } __rte_packed;
102
103 #define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64)
104 #define VHOST_USER_PAYLOAD_SIZE \
105         (sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE)
106
107 static int
108 vhost_user_write(int fd, struct vhost_user_msg *msg, int *fds, int fd_num)
109 {
110         int r;
111         struct msghdr msgh;
112         struct iovec iov;
113         size_t fd_size = fd_num * sizeof(int);
114         char control[CMSG_SPACE(fd_size)];
115         struct cmsghdr *cmsg;
116
117         memset(&msgh, 0, sizeof(msgh));
118         memset(control, 0, sizeof(control));
119
120         iov.iov_base = (uint8_t *)msg;
121         iov.iov_len = VHOST_USER_HDR_SIZE + msg->size;
122
123         msgh.msg_iov = &iov;
124         msgh.msg_iovlen = 1;
125         msgh.msg_control = control;
126         msgh.msg_controllen = sizeof(control);
127
128         cmsg = CMSG_FIRSTHDR(&msgh);
129         cmsg->cmsg_len = CMSG_LEN(fd_size);
130         cmsg->cmsg_level = SOL_SOCKET;
131         cmsg->cmsg_type = SCM_RIGHTS;
132         memcpy(CMSG_DATA(cmsg), fds, fd_size);
133
134         do {
135                 r = sendmsg(fd, &msgh, 0);
136         } while (r < 0 && errno == EINTR);
137
138         if (r < 0)
139                 PMD_DRV_LOG(ERR, "Failed to send msg: %s", strerror(errno));
140
141         return r;
142 }
143
144 static int
145 vhost_user_read(int fd, struct vhost_user_msg *msg)
146 {
147         uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION;
148         int ret, sz_hdr = VHOST_USER_HDR_SIZE, sz_payload;
149
150         ret = recv(fd, (void *)msg, sz_hdr, 0);
151         if (ret < 0) {
152                 PMD_DRV_LOG(ERR, "Failed to recv msg header: %s", strerror(errno));
153                 return -1;
154         } else if (ret < sz_hdr) {
155                 PMD_DRV_LOG(ERR, "Failed to recv msg hdr: %d instead of %d.",
156                             ret, sz_hdr);
157                 return -1;
158         }
159
160         /* validate msg flags */
161         if (msg->flags != (valid_flags)) {
162                 PMD_DRV_LOG(ERR, "Failed to recv msg: flags 0x%x instead of 0x%x.",
163                             msg->flags, valid_flags);
164                 return -1;
165         }
166
167         sz_payload = msg->size;
168
169         if ((size_t)sz_payload > sizeof(msg->payload)) {
170                 PMD_DRV_LOG(ERR, "Payload size overflow, header says %d but max %zu",
171                                 sz_payload, sizeof(msg->payload));
172                 return -1;
173         }
174
175         if (sz_payload) {
176                 ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0);
177                 if (ret < 0) {
178                         PMD_DRV_LOG(ERR, "Failed to recv msg payload: %s", strerror(errno));
179                         return -1;
180                 } else if (ret < sz_payload) {
181                         PMD_DRV_LOG(ERR, "Failed to recv msg payload: %d instead of %u.",
182                                 ret, msg->size);
183                         return -1;
184                 }
185         }
186
187         return 0;
188 }
189
190 static int
191 vhost_user_check_reply_ack(struct virtio_user_dev *dev, struct vhost_user_msg *msg)
192 {
193         struct vhost_user_data *data = dev->backend_data;
194         enum vhost_user_request req = msg->request;
195         int ret;
196
197         if (!(msg->flags & VHOST_USER_NEED_REPLY_MASK))
198                 return 0;
199
200         ret = vhost_user_read(data->vhostfd, msg);
201         if (ret < 0) {
202                 PMD_DRV_LOG(ERR, "Failed to read reply-ack");
203                 return -1;
204         }
205
206         if (req != msg->request) {
207                 PMD_DRV_LOG(ERR, "Unexpected reply-ack request type (%d)", msg->request);
208                 return -1;
209         }
210
211         if (msg->size != sizeof(msg->payload.u64)) {
212                 PMD_DRV_LOG(ERR, "Unexpected reply-ack payload size (%u)", msg->size);
213                 return -1;
214         }
215
216         if (msg->payload.u64) {
217                 PMD_DRV_LOG(ERR, "Slave replied NACK to request type (%d)", msg->request);
218                 return -1;
219         }
220
221         return 0;
222 }
223
224 static int
225 vhost_user_set_owner(struct virtio_user_dev *dev)
226 {
227         int ret;
228         struct vhost_user_data *data = dev->backend_data;
229         struct vhost_user_msg msg = {
230                 .request = VHOST_USER_SET_OWNER,
231                 .flags = VHOST_USER_VERSION,
232         };
233
234         ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
235         if (ret < 0) {
236                 PMD_DRV_LOG(ERR, "Failed to set owner");
237                 return -1;
238         }
239
240         return 0;
241 }
242
243 static int
244 vhost_user_get_protocol_features(struct virtio_user_dev *dev, uint64_t *features)
245 {
246         int ret;
247         struct vhost_user_data *data = dev->backend_data;
248         struct vhost_user_msg msg = {
249                 .request = VHOST_USER_GET_PROTOCOL_FEATURES,
250                 .flags = VHOST_USER_VERSION,
251         };
252
253         ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
254         if (ret < 0)
255                 goto err;
256
257         ret = vhost_user_read(data->vhostfd, &msg);
258         if (ret < 0)
259                 goto err;
260
261         if (msg.request != VHOST_USER_GET_PROTOCOL_FEATURES) {
262                 PMD_DRV_LOG(ERR, "Unexpected request type (%d)", msg.request);
263                 goto err;
264         }
265
266         if (msg.size != sizeof(*features)) {
267                 PMD_DRV_LOG(ERR, "Unexpected payload size (%u)", msg.size);
268                 goto err;
269         }
270
271         *features = msg.payload.u64;
272
273         return 0;
274 err:
275         PMD_DRV_LOG(ERR, "Failed to get backend protocol features");
276
277         return -1;
278 }
279
280 static int
281 vhost_user_set_protocol_features(struct virtio_user_dev *dev, uint64_t features)
282 {
283         int ret;
284         struct vhost_user_data *data = dev->backend_data;
285         struct vhost_user_msg msg = {
286                 .request = VHOST_USER_SET_PROTOCOL_FEATURES,
287                 .flags = VHOST_USER_VERSION,
288                 .size = sizeof(features),
289                 .payload.u64 = features,
290         };
291
292         ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
293         if (ret < 0) {
294                 PMD_DRV_LOG(ERR, "Failed to set protocol features");
295                 return -1;
296         }
297
298         return 0;
299 }
300
301 static int
302 vhost_user_get_features(struct virtio_user_dev *dev, uint64_t *features)
303 {
304         int ret;
305         struct vhost_user_data *data = dev->backend_data;
306         struct vhost_user_msg msg = {
307                 .request = VHOST_USER_GET_FEATURES,
308                 .flags = VHOST_USER_VERSION,
309         };
310
311         ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
312         if (ret < 0)
313                 goto err;
314
315         ret = vhost_user_read(data->vhostfd, &msg);
316         if (ret < 0)
317                 goto err;
318
319         if (msg.request != VHOST_USER_GET_FEATURES) {
320                 PMD_DRV_LOG(ERR, "Unexpected request type (%d)", msg.request);
321                 goto err;
322         }
323
324         if (msg.size != sizeof(*features)) {
325                 PMD_DRV_LOG(ERR, "Unexpected payload size (%u)", msg.size);
326                 goto err;
327         }
328
329         *features = msg.payload.u64;
330
331         if (!(*features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)))
332                 return 0;
333
334         /* Negotiate protocol features */
335         ret = vhost_user_get_protocol_features(dev, &data->protocol_features);
336         if (ret < 0)
337                 goto err;
338
339         data->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_FEATURES;
340
341         ret = vhost_user_set_protocol_features(dev, data->protocol_features);
342         if (ret < 0)
343                 goto err;
344
345         if (!(data->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)))
346                 dev->unsupported_features |= (1ull << VIRTIO_NET_F_MQ);
347
348         return 0;
349 err:
350         PMD_DRV_LOG(ERR, "Failed to get backend features");
351
352         return -1;
353 }
354
355 static int
356 vhost_user_set_features(struct virtio_user_dev *dev, uint64_t features)
357 {
358         int ret;
359         struct vhost_user_data *data = dev->backend_data;
360         struct vhost_user_msg msg = {
361                 .request = VHOST_USER_SET_FEATURES,
362                 .flags = VHOST_USER_VERSION,
363                 .size = sizeof(features),
364                 .payload.u64 = features,
365         };
366
367         msg.payload.u64 |= dev->device_features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES);
368
369         ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
370         if (ret < 0) {
371                 PMD_DRV_LOG(ERR, "Failed to set features");
372                 return -1;
373         }
374
375         return 0;
376 }
377
378 struct walk_arg {
379         struct vhost_memory *vm;
380         int *fds;
381         int region_nr;
382 };
383
384 static int
385 update_memory_region(const struct rte_memseg_list *msl __rte_unused,
386                 const struct rte_memseg *ms, void *arg)
387 {
388         struct walk_arg *wa = arg;
389         struct vhost_memory_region *mr;
390         uint64_t start_addr, end_addr;
391         size_t offset;
392         int i, fd;
393
394         fd = rte_memseg_get_fd_thread_unsafe(ms);
395         if (fd < 0) {
396                 PMD_DRV_LOG(ERR, "Failed to get fd, ms=%p rte_errno=%d",
397                         ms, rte_errno);
398                 return -1;
399         }
400
401         if (rte_memseg_get_fd_offset_thread_unsafe(ms, &offset) < 0) {
402                 PMD_DRV_LOG(ERR, "Failed to get offset, ms=%p rte_errno=%d",
403                         ms, rte_errno);
404                 return -1;
405         }
406
407         start_addr = (uint64_t)(uintptr_t)ms->addr;
408         end_addr = start_addr + ms->len;
409
410         for (i = 0; i < wa->region_nr; i++) {
411                 if (wa->fds[i] != fd)
412                         continue;
413
414                 mr = &wa->vm->regions[i];
415
416                 if (mr->userspace_addr + mr->memory_size < end_addr)
417                         mr->memory_size = end_addr - mr->userspace_addr;
418
419                 if (mr->userspace_addr > start_addr) {
420                         mr->userspace_addr = start_addr;
421                         mr->guest_phys_addr = start_addr;
422                 }
423
424                 if (mr->mmap_offset > offset)
425                         mr->mmap_offset = offset;
426
427                 PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64
428                         " addr=0x%" PRIx64 " len=%" PRIu64, i, fd,
429                         mr->mmap_offset, mr->userspace_addr,
430                         mr->memory_size);
431
432                 return 0;
433         }
434
435         if (i >= VHOST_MEMORY_MAX_NREGIONS) {
436                 PMD_DRV_LOG(ERR, "Too many memory regions");
437                 return -1;
438         }
439
440         mr = &wa->vm->regions[i];
441         wa->fds[i] = fd;
442
443         mr->guest_phys_addr = start_addr;
444         mr->userspace_addr = start_addr;
445         mr->memory_size = ms->len;
446         mr->mmap_offset = offset;
447
448         PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64
449                 " addr=0x%" PRIx64 " len=%" PRIu64, i, fd,
450                 mr->mmap_offset, mr->userspace_addr,
451                 mr->memory_size);
452
453         wa->region_nr++;
454
455         return 0;
456 }
457
458 static int
459 vhost_user_set_memory_table(struct virtio_user_dev *dev)
460 {
461         struct walk_arg wa;
462         int fds[VHOST_MEMORY_MAX_NREGIONS];
463         int ret, fd_num;
464         struct vhost_user_data *data = dev->backend_data;
465         struct vhost_user_msg msg = {
466                 .request = VHOST_USER_SET_MEM_TABLE,
467                 .flags = VHOST_USER_VERSION,
468         };
469
470         if (data->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK))
471                 msg.flags |= VHOST_USER_NEED_REPLY_MASK;
472
473         wa.region_nr = 0;
474         wa.vm = &msg.payload.memory;
475         wa.fds = fds;
476
477         /*
478          * The memory lock has already been taken by memory subsystem
479          * or virtio_user_start_device().
480          */
481         ret = rte_memseg_walk_thread_unsafe(update_memory_region, &wa);
482         if (ret < 0)
483                 goto err;
484
485         fd_num = wa.region_nr;
486         msg.payload.memory.nregions = wa.region_nr;
487         msg.payload.memory.padding = 0;
488
489         msg.size = sizeof(msg.payload.memory.nregions);
490         msg.size += sizeof(msg.payload.memory.padding);
491         msg.size += fd_num * sizeof(struct vhost_memory_region);
492
493         ret = vhost_user_write(data->vhostfd, &msg, fds, fd_num);
494         if (ret < 0)
495                 goto err;
496
497         return vhost_user_check_reply_ack(dev, &msg);
498 err:
499         PMD_DRV_LOG(ERR, "Failed to set memory table");
500         return -1;
501 }
502
503 static int
504 vhost_user_set_vring(struct virtio_user_dev *dev, enum vhost_user_request req,
505                 struct vhost_vring_state *state)
506 {
507         int ret;
508         struct vhost_user_data *data = dev->backend_data;
509         struct vhost_user_msg msg = {
510                 .request = req,
511                 .flags = VHOST_USER_VERSION,
512                 .size = sizeof(*state),
513                 .payload.state = *state,
514         };
515
516         ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
517         if (ret < 0) {
518                 PMD_DRV_LOG(ERR, "Failed to set vring state (request %d)", req);
519                 return -1;
520         }
521
522         return 0;
523 }
524
525 static int
526 vhost_user_set_vring_enable(struct virtio_user_dev *dev, struct vhost_vring_state *state)
527 {
528         return vhost_user_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, state);
529 }
530
531 static int
532 vhost_user_set_vring_num(struct virtio_user_dev *dev, struct vhost_vring_state *state)
533 {
534         return vhost_user_set_vring(dev, VHOST_USER_SET_VRING_NUM, state);
535 }
536
537 static int
538 vhost_user_set_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state)
539 {
540         return vhost_user_set_vring(dev, VHOST_USER_SET_VRING_BASE, state);
541 }
542
543 static int
544 vhost_user_get_vring_base(struct virtio_user_dev *dev, struct vhost_vring_state *state)
545 {
546         int ret;
547         struct vhost_user_msg msg;
548         struct vhost_user_data *data = dev->backend_data;
549         unsigned int index = state->index;
550
551         ret = vhost_user_set_vring(dev, VHOST_USER_GET_VRING_BASE, state);
552         if (ret < 0) {
553                 PMD_DRV_LOG(ERR, "Failed to send request");
554                 goto err;
555         }
556
557         ret = vhost_user_read(data->vhostfd, &msg);
558         if (ret < 0) {
559                 PMD_DRV_LOG(ERR, "Failed to read reply");
560                 goto err;
561         }
562
563         if (msg.request != VHOST_USER_GET_VRING_BASE) {
564                 PMD_DRV_LOG(ERR, "Unexpected request type (%d)", msg.request);
565                 goto err;
566         }
567
568         if (msg.size != sizeof(*state)) {
569                 PMD_DRV_LOG(ERR, "Unexpected payload size (%u)", msg.size);
570                 goto err;
571         }
572
573         if (msg.payload.state.index != index) {
574                 PMD_DRV_LOG(ERR, "Unexpected ring index (%u)", state->index);
575                 goto err;
576         }
577
578         *state = msg.payload.state;
579
580         return 0;
581 err:
582         PMD_DRV_LOG(ERR, "Failed to get vring base");
583         return -1;
584 }
585
586 static int
587 vhost_user_set_vring_file(struct virtio_user_dev *dev, enum vhost_user_request req,
588                 struct vhost_vring_file *file)
589 {
590         int ret;
591         int fd = file->fd;
592         int num_fd = 0;
593         struct vhost_user_data *data = dev->backend_data;
594         struct vhost_user_msg msg = {
595                 .request = req,
596                 .flags = VHOST_USER_VERSION,
597                 .size = sizeof(msg.payload.u64),
598                 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
599         };
600
601         if (fd >= 0)
602                 num_fd++;
603         else
604                 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
605
606         ret = vhost_user_write(data->vhostfd, &msg, &fd, num_fd);
607         if (ret < 0) {
608                 PMD_DRV_LOG(ERR, "Failed to set vring file (request %d)", req);
609                 return -1;
610         }
611
612         return 0;
613 }
614
615 static int
616 vhost_user_set_vring_call(struct virtio_user_dev *dev, struct vhost_vring_file *file)
617 {
618         return vhost_user_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
619 }
620
621 static int
622 vhost_user_set_vring_kick(struct virtio_user_dev *dev, struct vhost_vring_file *file)
623 {
624         return vhost_user_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
625 }
626
627
628 static int
629 vhost_user_set_vring_addr(struct virtio_user_dev *dev, struct vhost_vring_addr *addr)
630 {
631         int ret;
632         struct vhost_user_data *data = dev->backend_data;
633         struct vhost_user_msg msg = {
634                 .request = VHOST_USER_SET_VRING_ADDR,
635                 .flags = VHOST_USER_VERSION,
636                 .size = sizeof(*addr),
637                 .payload.addr = *addr,
638         };
639
640         ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
641         if (ret < 0) {
642                 PMD_DRV_LOG(ERR, "Failed to send vring addresses");
643                 return -1;
644         }
645
646         return 0;
647 }
648
649 static int
650 vhost_user_get_status(struct virtio_user_dev *dev, uint8_t *status)
651 {
652         int ret;
653         struct vhost_user_data *data = dev->backend_data;
654         struct vhost_user_msg msg = {
655                 .request = VHOST_USER_GET_STATUS,
656                 .flags = VHOST_USER_VERSION,
657         };
658
659         /*
660          * If features have not been negotiated, we don't know if the backend
661          * supports protocol features
662          */
663         if (!(dev->status & VIRTIO_CONFIG_STATUS_FEATURES_OK))
664                 return -ENOTSUP;
665
666         /* Status protocol feature requires protocol features support */
667         if (!(dev->device_features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)))
668                 return -ENOTSUP;
669
670         if (!(data->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_STATUS)))
671                 return -ENOTSUP;
672
673         ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
674         if (ret < 0) {
675                 PMD_DRV_LOG(ERR, "Failed to send request");
676                 goto err;
677         }
678
679         ret = vhost_user_read(data->vhostfd, &msg);
680         if (ret < 0) {
681                 PMD_DRV_LOG(ERR, "Failed to recv request");
682                 goto err;
683         }
684
685         if (msg.request != VHOST_USER_GET_STATUS) {
686                 PMD_DRV_LOG(ERR, "Unexpected request type (%d)", msg.request);
687                 goto err;
688         }
689
690         if (msg.size != sizeof(msg.payload.u64)) {
691                 PMD_DRV_LOG(ERR, "Unexpected payload size (%u)", msg.size);
692                 goto err;
693         }
694
695         *status = (uint8_t)msg.payload.u64;
696
697         return 0;
698 err:
699         PMD_DRV_LOG(ERR, "Failed to get device status");
700         return -1;
701 }
702
703 static int
704 vhost_user_set_status(struct virtio_user_dev *dev, uint8_t status)
705 {
706         int ret;
707         struct vhost_user_data *data = dev->backend_data;
708         struct vhost_user_msg msg = {
709                 .request = VHOST_USER_SET_STATUS,
710                 .flags = VHOST_USER_VERSION,
711                 .size = sizeof(msg.payload.u64),
712                 .payload.u64 = status,
713         };
714
715         /*
716          * If features have not been negotiated, we don't know if the backend
717          * supports protocol features
718          */
719         if (!(dev->status & VIRTIO_CONFIG_STATUS_FEATURES_OK))
720                 return -ENOTSUP;
721
722         /* Status protocol feature requires protocol features support */
723         if (!(dev->device_features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)))
724                 return -ENOTSUP;
725
726         if (!(data->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_STATUS)))
727                 return -ENOTSUP;
728
729         if (data->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK))
730                 msg.flags |= VHOST_USER_NEED_REPLY_MASK;
731
732         ret = vhost_user_write(data->vhostfd, &msg, NULL, 0);
733         if (ret < 0) {
734                 PMD_DRV_LOG(ERR, "Failed to send get status request");
735                 return -1;
736         }
737
738         return vhost_user_check_reply_ack(dev, &msg);
739 }
740
741 #define MAX_VIRTIO_USER_BACKLOG 1
742 static int
743 vhost_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un)
744 {
745         int ret;
746         int flag;
747         struct vhost_user_data *data = dev->backend_data;
748         int fd = data->listenfd;
749
750         ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
751         if (ret < 0) {
752                 PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again",
753                             dev->path, strerror(errno));
754                 return -1;
755         }
756         ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
757         if (ret < 0)
758                 return -1;
759
760         PMD_DRV_LOG(NOTICE, "(%s) waiting for client connection...", dev->path);
761         data->vhostfd = accept(fd, NULL, NULL);
762         if (data->vhostfd < 0) {
763                 PMD_DRV_LOG(ERR, "Failed to accept initial client connection (%s)",
764                                 strerror(errno));
765                 return -1;
766         }
767
768         flag = fcntl(fd, F_GETFL);
769         if (fcntl(fd, F_SETFL, flag | O_NONBLOCK) < 0) {
770                 PMD_DRV_LOG(ERR, "fcntl failed, %s", strerror(errno));
771                 return -1;
772         }
773
774         return 0;
775 }
776
777 static int
778 vhost_user_server_disconnect(struct virtio_user_dev *dev)
779 {
780         struct vhost_user_data *data = dev->backend_data;
781
782         if (data->vhostfd < 0) {
783                 PMD_DRV_LOG(ERR, "(%s) Expected valid Vhost FD", dev->path);
784                 return -1;
785         }
786
787         close(data->vhostfd);
788         data->vhostfd = -1;
789
790         return 0;
791 }
792
793 static int
794 vhost_user_server_reconnect(struct virtio_user_dev *dev)
795 {
796         struct vhost_user_data *data = dev->backend_data;
797         int fd;
798
799         fd = accept(data->listenfd, NULL, NULL);
800         if (fd < 0)
801                 return -1;
802
803         data->vhostfd = fd;
804
805         return 0;
806 }
807
808 /**
809  * Set up environment to talk with a vhost user backend.
810  *
811  * @return
812  *   - (-1) if fail;
813  *   - (0) if succeed.
814  */
815 static int
816 vhost_user_setup(struct virtio_user_dev *dev)
817 {
818         int fd;
819         int flag;
820         struct sockaddr_un un;
821         struct vhost_user_data *data;
822
823         data = malloc(sizeof(*data));
824         if (!data) {
825                 PMD_DRV_LOG(ERR, "(%s) Failed to allocate Vhost-user data", dev->path);
826                 return -1;
827         }
828
829         memset(data, 0, sizeof(*data));
830
831         dev->backend_data = data;
832
833         data->vhostfd = -1;
834         data->listenfd = -1;
835
836         fd = socket(AF_UNIX, SOCK_STREAM, 0);
837         if (fd < 0) {
838                 PMD_DRV_LOG(ERR, "socket() error, %s", strerror(errno));
839                 goto err_data;
840         }
841
842         flag = fcntl(fd, F_GETFD);
843         if (fcntl(fd, F_SETFD, flag | FD_CLOEXEC) < 0)
844                 PMD_DRV_LOG(WARNING, "fcntl failed, %s", strerror(errno));
845
846         memset(&un, 0, sizeof(un));
847         un.sun_family = AF_UNIX;
848         strlcpy(un.sun_path, dev->path, sizeof(un.sun_path));
849
850         if (dev->is_server) {
851                 data->listenfd = fd;
852                 if (vhost_user_start_server(dev, &un) < 0) {
853                         PMD_DRV_LOG(ERR, "virtio-user startup fails in server mode");
854                         goto err_socket;
855                 }
856         } else {
857                 if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
858                         PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
859                         goto err_socket;
860                 }
861                 data->vhostfd = fd;
862         }
863
864         return 0;
865
866 err_socket:
867         close(fd);
868 err_data:
869         free(data);
870         dev->backend_data = NULL;
871
872         return -1;
873 }
874
875 static int
876 vhost_user_destroy(struct virtio_user_dev *dev)
877 {
878         struct vhost_user_data *data = dev->backend_data;
879
880         if (!data)
881                 return 0;
882
883         if (data->vhostfd >= 0) {
884                 close(data->vhostfd);
885                 data->vhostfd = -1;
886         }
887
888         if (data->listenfd >= 0) {
889                 close(data->listenfd);
890                 data->listenfd = -1;
891         }
892
893         free(data);
894         dev->backend_data = NULL;
895
896         return 0;
897 }
898
899 static int
900 vhost_user_enable_queue_pair(struct virtio_user_dev *dev,
901                              uint16_t pair_idx,
902                              int enable)
903 {
904         struct vhost_user_data *data = dev->backend_data;
905         int i;
906
907         if (data->vhostfd < 0)
908                 return 0;
909
910         if (dev->qp_enabled[pair_idx] == enable)
911                 return 0;
912
913         for (i = 0; i < 2; ++i) {
914                 struct vhost_vring_state state = {
915                         .index = pair_idx * 2 + i,
916                         .num = enable,
917                 };
918
919                 if (vhost_user_set_vring_enable(dev, &state))
920                         return -1;
921         }
922
923         dev->qp_enabled[pair_idx] = enable;
924         return 0;
925 }
926
927 static int
928 vhost_user_get_backend_features(uint64_t *features)
929 {
930         *features = 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
931
932         return 0;
933 }
934
935 static int
936 vhost_user_update_link_state(struct virtio_user_dev *dev)
937 {
938         struct vhost_user_data *data = dev->backend_data;
939         char buf[128];
940
941         if (data->vhostfd >= 0) {
942                 int r;
943                 int flags;
944
945                 flags = fcntl(data->vhostfd, F_GETFL);
946                 if (fcntl(data->vhostfd, F_SETFL, flags | O_NONBLOCK) == -1) {
947                         PMD_DRV_LOG(ERR, "error setting O_NONBLOCK flag");
948                         return -1;
949                 }
950
951                 r = recv(data->vhostfd, buf, 128, MSG_PEEK);
952                 if (r == 0 || (r < 0 && errno != EAGAIN)) {
953                         dev->net_status &= (~VIRTIO_NET_S_LINK_UP);
954                         PMD_DRV_LOG(ERR, "virtio-user port %u is down", dev->hw.port_id);
955
956                         /* This function could be called in the process
957                          * of interrupt handling, callback cannot be
958                          * unregistered here, set an alarm to do it.
959                          */
960                         rte_eal_alarm_set(1,
961                                 virtio_user_dev_delayed_disconnect_handler,
962                                 (void *)dev);
963                 } else {
964                         dev->net_status |= VIRTIO_NET_S_LINK_UP;
965                 }
966
967                 if (fcntl(data->vhostfd, F_SETFL,
968                                         flags & ~O_NONBLOCK) == -1) {
969                         PMD_DRV_LOG(ERR, "error clearing O_NONBLOCK flag");
970                         return -1;
971                 }
972         } else if (dev->is_server) {
973                 dev->net_status &= (~VIRTIO_NET_S_LINK_UP);
974                 if (virtio_user_dev_server_reconnect(dev) >= 0)
975                         dev->net_status |= VIRTIO_NET_S_LINK_UP;
976         }
977
978         return 0;
979 }
980
981 static int
982 vhost_user_get_intr_fd(struct virtio_user_dev *dev)
983 {
984         struct vhost_user_data *data = dev->backend_data;
985
986         if (dev->is_server && data->vhostfd == -1)
987                 return data->listenfd;
988
989         return data->vhostfd;
990 }
991
992 struct virtio_user_backend_ops virtio_ops_user = {
993         .setup = vhost_user_setup,
994         .destroy = vhost_user_destroy,
995         .get_backend_features = vhost_user_get_backend_features,
996         .set_owner = vhost_user_set_owner,
997         .get_features = vhost_user_get_features,
998         .set_features = vhost_user_set_features,
999         .set_memory_table = vhost_user_set_memory_table,
1000         .set_vring_num = vhost_user_set_vring_num,
1001         .set_vring_base = vhost_user_set_vring_base,
1002         .get_vring_base = vhost_user_get_vring_base,
1003         .set_vring_call = vhost_user_set_vring_call,
1004         .set_vring_kick = vhost_user_set_vring_kick,
1005         .set_vring_addr = vhost_user_set_vring_addr,
1006         .get_status = vhost_user_get_status,
1007         .set_status = vhost_user_set_status,
1008         .enable_qp = vhost_user_enable_queue_pair,
1009         .update_link_state = vhost_user_update_link_state,
1010         .server_disconnect = vhost_user_server_disconnect,
1011         .server_reconnect = vhost_user_server_reconnect,
1012         .get_intr_fd = vhost_user_get_intr_fd,
1013 };