95a0508554a838a6c9b0b001027c851597ba7c4a
[dpdk.git] / examples / vhost_blk / vhost_blk.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2019 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <unistd.h>
7 #include <stdbool.h>
8 #include <signal.h>
9 #include <assert.h>
10 #include <semaphore.h>
11 #include <linux/virtio_blk.h>
12 #include <linux/virtio_ring.h>
13
14 #include <rte_atomic.h>
15 #include <rte_cycles.h>
16 #include <rte_log.h>
17 #include <rte_malloc.h>
18 #include <rte_vhost.h>
19
20 #include "vhost_blk.h"
21 #include "blk_spec.h"
22
23 #define VIRTQ_DESC_F_NEXT       1
24 #define VIRTQ_DESC_F_AVAIL      (1 << 7)
25 #define VIRTQ_DESC_F_USED       (1 << 15)
26
27 #define MAX_TASK                12
28
29 #define VHOST_BLK_FEATURES ((1ULL << VIRTIO_F_RING_PACKED) | \
30                             (1ULL << VIRTIO_F_VERSION_1) |\
31                             (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
32                             (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))
33 #define CTRLR_NAME              "vhost.socket"
34
35 enum CTRLR_WORKER_STATUS {
36         WORKER_STATE_START = 0,
37         WORKER_STATE_STOP,
38 };
39
40 struct vhost_blk_ctrlr *g_vhost_ctrlr;
41
42 /* Path to folder where character device will be created. Can be set by user. */
43 static char dev_pathname[PATH_MAX] = "";
44 static sem_t exit_sem;
45 static enum CTRLR_WORKER_STATUS worker_thread_status;
46
47 struct vhost_blk_ctrlr *
48 vhost_blk_ctrlr_find(const char *ctrlr_name)
49 {
50         if (ctrlr_name == NULL)
51                 return NULL;
52
53         /* currently we only support 1 socket file fd */
54         return g_vhost_ctrlr;
55 }
56
57 static uint64_t
58 gpa_to_vva(struct vhost_blk_ctrlr *ctrlr, uint64_t gpa, uint64_t *len)
59 {
60         assert(ctrlr->mem != NULL);
61
62         return rte_vhost_va_from_guest_pa(ctrlr->mem, gpa, len);
63 }
64
65 static void
66 enqueue_task(struct vhost_blk_task *task)
67 {
68         struct vhost_blk_queue *vq = task->vq;
69         struct vring_used *used = vq->vring.used;
70
71         rte_vhost_set_last_inflight_io_split(task->ctrlr->vid,
72                 vq->id, task->req_idx);
73
74         /* Fill out the next entry in the "used" ring.  id = the
75          * index of the descriptor that contained the blk request.
76          * len = the total amount of data transferred for the blk
77          * request. We must report the correct len, for variable
78          * length blk CDBs, where we may return less data than
79          * allocated by the guest VM.
80          */
81         used->ring[used->idx & (vq->vring.size - 1)].id = task->req_idx;
82         used->ring[used->idx & (vq->vring.size - 1)].len = task->data_len;
83         rte_smp_mb();
84         used->idx++;
85         rte_smp_mb();
86
87         rte_vhost_clr_inflight_desc_split(task->ctrlr->vid,
88                 vq->id, used->idx, task->req_idx);
89
90         /* Send an interrupt back to the guest VM so that it knows
91          * a completion is ready to be processed.
92          */
93         rte_vhost_vring_call(task->ctrlr->vid, vq->id);
94 }
95
96 static void
97 enqueue_task_packed(struct vhost_blk_task *task)
98 {
99         struct vhost_blk_queue *vq = task->vq;
100         struct vring_packed_desc *desc;
101
102         rte_vhost_set_last_inflight_io_packed(task->ctrlr->vid, vq->id,
103                                             task->inflight_idx);
104
105         desc = &vq->vring.desc_packed[vq->last_used_idx];
106         desc->id = task->buffer_id;
107         desc->addr = 0;
108
109         rte_smp_mb();
110         if (vq->used_wrap_counter)
111                 desc->flags |= VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED;
112         else
113                 desc->flags &= ~(VIRTQ_DESC_F_AVAIL | VIRTQ_DESC_F_USED);
114         rte_smp_mb();
115
116         rte_vhost_clr_inflight_desc_packed(task->ctrlr->vid, vq->id,
117                                            task->inflight_idx);
118
119         vq->last_used_idx += task->chain_num;
120         if (vq->last_used_idx >= vq->vring.size) {
121                 vq->last_used_idx -= vq->vring.size;
122                 vq->used_wrap_counter = !vq->used_wrap_counter;
123         }
124
125         /* Send an interrupt back to the guest VM so that it knows
126          * a completion is ready to be processed.
127          */
128         rte_vhost_vring_call(task->ctrlr->vid, vq->id);
129 }
130
131 static bool
132 descriptor_has_next_packed(struct vring_packed_desc *cur_desc)
133 {
134         return !!(cur_desc->flags & VRING_DESC_F_NEXT);
135 }
136
137 static bool
138 descriptor_has_next_split(struct vring_desc *cur_desc)
139 {
140         return !!(cur_desc->flags & VRING_DESC_F_NEXT);
141 }
142
143 static int
144 desc_payload_to_iovs(struct vhost_blk_ctrlr *ctrlr, struct iovec *iovs,
145                      uint32_t *iov_index, uintptr_t payload, uint64_t remaining)
146 {
147         void *vva;
148         uint64_t len;
149
150         do {
151                 if (*iov_index >= VHOST_BLK_MAX_IOVS) {
152                         fprintf(stderr, "VHOST_BLK_MAX_IOVS reached\n");
153                         return -1;
154                 }
155                 len = remaining;
156                 vva = (void *)(uintptr_t)gpa_to_vva(ctrlr,
157                                  payload, &len);
158                 if (!vva || !len) {
159                         fprintf(stderr, "failed to translate desc address.\n");
160                         return -1;
161                 }
162
163                 iovs[*iov_index].iov_base = vva;
164                 iovs[*iov_index].iov_len = len;
165                 payload += len;
166                 remaining -= len;
167                 (*iov_index)++;
168         } while (remaining);
169
170         return 0;
171 }
172
173 static struct vring_desc *
174 vring_get_next_desc(struct vhost_blk_queue *vq, struct vring_desc *desc)
175 {
176         if (descriptor_has_next_split(desc))
177                 return &vq->vring.desc[desc->next];
178
179         return NULL;
180 }
181
182 static struct vring_packed_desc *
183 vring_get_next_desc_packed(struct vhost_blk_queue *vq, uint16_t *req_idx)
184 {
185         if (descriptor_has_next_packed(&vq->vring.desc_packed[*req_idx])) {
186                 *req_idx = (*req_idx + 1) % vq->vring.size;
187                 return &vq->vring.desc_packed[*req_idx];
188         }
189
190         return NULL;
191 }
192
193 static struct rte_vhost_inflight_desc_packed *
194 vring_get_next_inflight_desc(struct vhost_blk_queue *vq,
195                         struct rte_vhost_inflight_desc_packed *desc)
196 {
197         if (!!(desc->flags & VRING_DESC_F_NEXT))
198                 return &vq->inflight_ring.inflight_packed->desc[desc->next];
199
200         return NULL;
201 }
202
203 static int
204 setup_iovs_from_descs_split(struct vhost_blk_ctrlr *ctrlr,
205                             struct vhost_blk_queue *vq, uint16_t req_idx,
206                             struct iovec *iovs, uint32_t *iovs_idx,
207                             uint32_t *payload)
208 {
209         struct vring_desc *desc = &vq->vring.desc[req_idx];
210
211         do {
212                 /* does not support indirect descriptors */
213                 assert((desc->flags & VRING_DESC_F_INDIRECT) == 0);
214
215                 if (*iovs_idx >= VHOST_BLK_MAX_IOVS) {
216                         fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n");
217                         return -1;
218                 }
219
220                 if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx,
221                         desc->addr, desc->len) != 0) {
222                         fprintf(stderr, "Failed to convert desc payload to iovs\n");
223                         return -1;
224                 }
225
226                 *payload += desc->len;
227
228                 desc = vring_get_next_desc(vq, desc);
229         } while (desc != NULL);
230
231         return 0;
232 }
233
234 static int
235 setup_iovs_from_descs_packed(struct vhost_blk_ctrlr *ctrlr,
236                              struct vhost_blk_queue *vq, uint16_t req_idx,
237                              struct iovec *iovs, uint32_t *iovs_idx,
238                              uint32_t *payload)
239 {
240         struct vring_packed_desc *desc = &vq->vring.desc_packed[req_idx];
241
242         do {
243                 /* does not support indirect descriptors */
244                 assert((desc->flags & VRING_DESC_F_INDIRECT) == 0);
245
246                 if (*iovs_idx >= VHOST_BLK_MAX_IOVS) {
247                         fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n");
248                         return -1;
249                 }
250
251                 if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx,
252                         desc->addr, desc->len) != 0) {
253                         fprintf(stderr, "Failed to convert desc payload to iovs\n");
254                         return -1;
255                 }
256
257                 *payload += desc->len;
258
259                 desc = vring_get_next_desc_packed(vq, &req_idx);
260         } while (desc != NULL);
261
262         return 0;
263 }
264
265 static int
266 setup_iovs_from_inflight_desc(struct vhost_blk_ctrlr *ctrlr,
267                               struct vhost_blk_queue *vq, uint16_t req_idx,
268                               struct iovec *iovs, uint32_t *iovs_idx,
269                               uint32_t *payload)
270 {
271         struct rte_vhost_ring_inflight *inflight_vq;
272         struct rte_vhost_inflight_desc_packed *desc;
273
274         inflight_vq = &vq->inflight_ring;
275         desc = &inflight_vq->inflight_packed->desc[req_idx];
276
277         do {
278                 /* does not support indirect descriptors */
279                 assert((desc->flags & VRING_DESC_F_INDIRECT) == 0);
280
281                 if (*iovs_idx >= VHOST_BLK_MAX_IOVS) {
282                         fprintf(stderr, "Reach VHOST_BLK_MAX_IOVS\n");
283                         return -1;
284                 }
285
286                 if (desc_payload_to_iovs(ctrlr, iovs, iovs_idx,
287                         desc->addr, desc->len) != 0) {
288                         fprintf(stderr, "Failed to convert desc payload to iovs\n");
289                         return -1;
290                 }
291
292                 *payload += desc->len;
293
294                 desc = vring_get_next_inflight_desc(vq, desc);
295         } while (desc != NULL);
296
297         return 0;
298 }
299
300 static void
301 process_blk_task(struct vhost_blk_task *task)
302 {
303         uint32_t payload = 0;
304
305         if (task->vq->packed_ring) {
306                 struct rte_vhost_ring_inflight *inflight_ring;
307                 struct rte_vhost_resubmit_info *resubmit_inflight;
308
309                 inflight_ring = &task->vq->inflight_ring;
310                 resubmit_inflight = inflight_ring->resubmit_inflight;
311
312                 if (resubmit_inflight != NULL &&
313                     resubmit_inflight->resubmit_list != NULL) {
314                         if (setup_iovs_from_inflight_desc(task->ctrlr, task->vq,
315                                 task->req_idx, task->iovs, &task->iovs_cnt,
316                                 &payload)) {
317                                 fprintf(stderr, "Failed to setup iovs\n");
318                                 return;
319                         }
320                 } else {
321                         if (setup_iovs_from_descs_packed(task->ctrlr, task->vq,
322                                 task->req_idx, task->iovs, &task->iovs_cnt,
323                                 &payload)) {
324                                 fprintf(stderr, "Failed to setup iovs\n");
325                                 return;
326                         }
327                 }
328         } else {
329                 if (setup_iovs_from_descs_split(task->ctrlr, task->vq,
330                         task->req_idx, task->iovs, &task->iovs_cnt, &payload)) {
331                         fprintf(stderr, "Failed to setup iovs\n");
332                         return;
333                 }
334         }
335
336         /* First IOV must be the req head. */
337         task->req = (struct virtio_blk_outhdr *)task->iovs[0].iov_base;
338         assert(sizeof(*task->req) == task->iovs[0].iov_len);
339
340         /* Last IOV must be the status tail. */
341         task->status = (uint8_t *)task->iovs[task->iovs_cnt - 1].iov_base;
342         assert(sizeof(*task->status) == task->iovs[task->iovs_cnt - 1].iov_len);
343
344         /* Transport data len */
345         task->data_len = payload - task->iovs[0].iov_len -
346                 task->iovs[task->iovs_cnt - 1].iov_len;
347
348         if (vhost_bdev_process_blk_commands(task->ctrlr->bdev, task))
349                 /* invalid response */
350                 *task->status = VIRTIO_BLK_S_IOERR;
351         else
352                 /* successfully */
353                 *task->status = VIRTIO_BLK_S_OK;
354
355         if (task->vq->packed_ring)
356                 enqueue_task_packed(task);
357         else
358                 enqueue_task(task);
359 }
360
361 static void
362 blk_task_init(struct vhost_blk_task *task)
363 {
364         task->iovs_cnt = 0;
365         task->data_len = 0;
366         task->req = NULL;
367         task->status = NULL;
368 }
369
370 static void
371 submit_inflight_vq(struct vhost_blk_queue *vq)
372 {
373         struct rte_vhost_ring_inflight *inflight_ring;
374         struct rte_vhost_resubmit_info *resubmit_inflight;
375         struct vhost_blk_task *task;
376
377         inflight_ring = &vq->inflight_ring;
378         resubmit_inflight = inflight_ring->resubmit_inflight;
379
380         if (resubmit_inflight == NULL ||
381             resubmit_inflight->resubmit_num == 0)
382                 return;
383
384         fprintf(stdout, "Resubmit inflight num is %d\n",
385                 resubmit_inflight->resubmit_num);
386
387         while (resubmit_inflight->resubmit_num-- > 0) {
388                 uint16_t desc_idx;
389
390                 desc_idx = resubmit_inflight->resubmit_list[
391                                         resubmit_inflight->resubmit_num].index;
392
393                 if (vq->packed_ring) {
394                         uint16_t task_idx;
395                         struct rte_vhost_inflight_desc_packed *desc;
396
397                         desc = inflight_ring->inflight_packed->desc;
398                         task_idx = desc[desc[desc_idx].last].id;
399                         task = &vq->tasks[task_idx];
400
401                         task->req_idx = desc_idx;
402                         task->chain_num = desc[desc_idx].num;
403                         task->buffer_id = task_idx;
404                         task->inflight_idx = desc_idx;
405
406                         vq->last_avail_idx += desc[desc_idx].num;
407                         if (vq->last_avail_idx >= vq->vring.size) {
408                                 vq->last_avail_idx -= vq->vring.size;
409                                 vq->avail_wrap_counter =
410                                         !vq->avail_wrap_counter;
411                         }
412                 } else
413                         /* In split ring, the desc_idx is the req_id
414                          * which was initialized when allocated the task pool.
415                          */
416                         task = &vq->tasks[desc_idx];
417
418                 blk_task_init(task);
419                 process_blk_task(task);
420         }
421
422         free(resubmit_inflight->resubmit_list);
423         resubmit_inflight->resubmit_list = NULL;
424 }
425
426 /* Use the buffer_id as the task_idx */
427 static uint16_t
428 vhost_blk_vq_get_desc_chain_buffer_id(struct vhost_blk_queue *vq,
429                                       uint16_t *req_head, uint16_t *num)
430 {
431         struct vring_packed_desc *desc = &vq->vring.desc_packed[
432                                                 vq->last_avail_idx];
433
434         *req_head = vq->last_avail_idx;
435         *num = 1;
436
437         while (descriptor_has_next_packed(desc)) {
438                 vq->last_avail_idx = (vq->last_avail_idx + 1) % vq->vring.size;
439                 desc = &vq->vring.desc_packed[vq->last_avail_idx];
440                 *num += 1;
441         }
442
443         /* Point to next desc */
444         vq->last_avail_idx = (vq->last_avail_idx + 1) % vq->vring.size;
445         if (vq->last_avail_idx < *req_head)
446                 vq->avail_wrap_counter = !vq->avail_wrap_counter;
447
448         return desc->id;
449 }
450
451 static uint16_t
452 vq_get_desc_idx(struct vhost_blk_queue *vq)
453 {
454         uint16_t desc_idx;
455         uint16_t last_avail_idx;
456
457         last_avail_idx = vq->last_avail_idx & (vq->vring.size - 1);
458         desc_idx = vq->vring.avail->ring[last_avail_idx];
459         vq->last_avail_idx++;
460
461         return desc_idx;
462 }
463
464 static int
465 vhost_blk_vq_is_avail(struct vhost_blk_queue *vq)
466 {
467         if (vq->packed_ring) {
468                 uint16_t flags = vq->vring.desc_packed[
469                                         vq->last_avail_idx].flags;
470                 bool avail_wrap_counter = vq->avail_wrap_counter;
471
472                 return (!!(flags & VIRTQ_DESC_F_AVAIL) == avail_wrap_counter &&
473                         !!(flags & VIRTQ_DESC_F_USED) != avail_wrap_counter);
474         } else {
475                 if (vq->vring.avail->idx != vq->last_avail_idx)
476                         return 1;
477
478                 return 0;
479         }
480 }
481
482 static void
483 process_vq(struct vhost_blk_queue *vq)
484 {
485         struct vhost_blk_task *task;
486
487         if (vq->packed_ring) {
488                 while (vhost_blk_vq_is_avail(vq)) {
489                         uint16_t task_idx, req_idx, last_idx, chain_num;
490
491                         task_idx = vhost_blk_vq_get_desc_chain_buffer_id(vq,
492                                         &req_idx, &chain_num);
493                         task = &vq->tasks[task_idx];
494
495                         blk_task_init(task);
496                         task->req_idx = req_idx;
497                         task->chain_num = chain_num;
498                         task->buffer_id = task_idx;
499                         last_idx = (req_idx + chain_num - 1) % vq->vring.size;
500
501                         rte_vhost_set_inflight_desc_packed(task->ctrlr->vid,
502                                                            vq->id,
503                                                            task->req_idx,
504                                                            last_idx,
505                                                            &task->inflight_idx);
506
507                         process_blk_task(task);
508                 }
509         } else {
510                 while (vhost_blk_vq_is_avail(vq)) {
511                         uint16_t desc_idx;
512
513                         desc_idx = vq_get_desc_idx(vq);
514                         task = &vq->tasks[desc_idx];
515
516                         blk_task_init(task);
517                         rte_vhost_set_inflight_desc_split(task->ctrlr->vid,
518                                                           vq->id,
519                                                           task->req_idx);
520                         process_blk_task(task);
521                 }
522         }
523 }
524
525 static void *
526 ctrlr_worker(void *arg)
527 {
528         struct vhost_blk_ctrlr *ctrlr = (struct vhost_blk_ctrlr *)arg;
529         cpu_set_t cpuset;
530         pthread_t thread;
531         int i;
532
533         fprintf(stdout, "Ctrlr Worker Thread start\n");
534
535         if (ctrlr == NULL || ctrlr->bdev == NULL) {
536                 fprintf(stderr,
537                         "%s: Error, invalid argument passed to worker thread\n",
538                         __func__);
539                 exit(0);
540         }
541
542         thread = pthread_self();
543         CPU_ZERO(&cpuset);
544         CPU_SET(0, &cpuset);
545         pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
546
547         for (i = 0; i < NUM_OF_BLK_QUEUES; i++)
548                 submit_inflight_vq(&ctrlr->queues[i]);
549
550         while (worker_thread_status != WORKER_STATE_STOP)
551                 for (i = 0; i < NUM_OF_BLK_QUEUES; i++)
552                         process_vq(&ctrlr->queues[i]);
553
554         fprintf(stdout, "Ctrlr Worker Thread Exiting\n");
555         sem_post(&exit_sem);
556         return NULL;
557 }
558
559 static int
560 alloc_task_pool(struct vhost_blk_ctrlr *ctrlr)
561 {
562         struct vhost_blk_queue *vq;
563         int i, j;
564
565         for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
566                 vq = &ctrlr->queues[i];
567
568                 vq->tasks = rte_zmalloc(NULL,
569                         sizeof(struct vhost_blk_task) * vq->vring.size, 0);
570                 if (!vq->tasks) {
571                         fprintf(stderr, "Failed to allocate task memory\n");
572                         return -1;
573                 }
574
575                 for (j = 0; j < vq->vring.size; j++) {
576                         vq->tasks[j].req_idx = j;
577                         vq->tasks[j].ctrlr = ctrlr;
578                         vq->tasks[j].vq = vq;
579                 }
580         }
581
582         return 0;
583 }
584
585 static void
586 free_task_pool(struct vhost_blk_ctrlr *ctrlr)
587 {
588         int i;
589
590         for (i = 0; i < NUM_OF_BLK_QUEUES; i++)
591                 rte_free(ctrlr->queues[i].tasks);
592 }
593
594 static int
595 new_device(int vid)
596 {
597         struct vhost_blk_ctrlr *ctrlr;
598         struct vhost_blk_queue *vq;
599         char path[PATH_MAX];
600         uint64_t features;
601         pthread_t tid;
602         int i, ret;
603         bool packed_ring;
604
605         ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
606         if (ret) {
607                 fprintf(stderr, "Failed to get the socket path\n");
608                 return -1;
609         }
610
611         ctrlr = vhost_blk_ctrlr_find(path);
612         if (!ctrlr) {
613                 fprintf(stderr, "Failed to find controller\n");
614                 return -1;
615         }
616
617         if (ctrlr->started)
618                 return 0;
619
620         ctrlr->vid = vid;
621         ret = rte_vhost_get_negotiated_features(vid, &features);
622         if (ret) {
623                 fprintf(stderr, "Failed to get the negotiated features\n");
624                 return -1;
625         }
626         packed_ring = !!(features & (1ULL << VIRTIO_F_RING_PACKED));
627
628         /* Disable Notifications and init last idx */
629         for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
630                 vq = &ctrlr->queues[i];
631                 vq->id = i;
632
633                 assert(rte_vhost_get_vhost_vring(ctrlr->vid, i,
634                                                  &vq->vring) == 0);
635                 assert(rte_vhost_get_vring_base(ctrlr->vid, i,
636                                                &vq->last_avail_idx,
637                                                &vq->last_used_idx) == 0);
638                 assert(rte_vhost_get_vhost_ring_inflight(ctrlr->vid, i,
639                                                 &vq->inflight_ring) == 0);
640
641                 if (packed_ring) {
642                         /* for the reconnection */
643                         assert(rte_vhost_get_vring_base_from_inflight(
644                                 ctrlr->vid, i,
645                                 &vq->last_avail_idx,
646                                 &vq->last_used_idx) == 0);
647
648                         vq->avail_wrap_counter = vq->last_avail_idx &
649                                 (1 << 15);
650                         vq->last_avail_idx = vq->last_avail_idx &
651                                 0x7fff;
652                         vq->used_wrap_counter = vq->last_used_idx &
653                                 (1 << 15);
654                         vq->last_used_idx = vq->last_used_idx &
655                                 0x7fff;
656                 }
657
658                 vq->packed_ring = packed_ring;
659                 rte_vhost_enable_guest_notification(vid, i, 0);
660         }
661
662         assert(rte_vhost_get_mem_table(vid, &ctrlr->mem) == 0);
663         assert(ctrlr->mem != NULL);
664         assert(alloc_task_pool(ctrlr) == 0);
665
666         /* start polling vring */
667         worker_thread_status = WORKER_STATE_START;
668         fprintf(stdout, "New Device %s, Device ID %d\n", path, vid);
669         if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) {
670                 fprintf(stderr, "Worker Thread Started Failed\n");
671                 return -1;
672         }
673
674         /* device has been started */
675         ctrlr->started = 1;
676         pthread_detach(tid);
677         return 0;
678 }
679
680 static void
681 destroy_device(int vid)
682 {
683         char path[PATH_MAX];
684         struct vhost_blk_ctrlr *ctrlr;
685         struct vhost_blk_queue *vq;
686         int i, ret;
687
688         ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
689         if (ret) {
690                 fprintf(stderr, "Destroy Ctrlr Failed\n");
691                 return;
692         }
693
694         fprintf(stdout, "Destroy %s Device ID %d\n", path, vid);
695         ctrlr = vhost_blk_ctrlr_find(path);
696         if (!ctrlr) {
697                 fprintf(stderr, "Destroy Ctrlr Failed\n");
698                 return;
699         }
700
701         if (!ctrlr->started)
702                 return;
703
704         worker_thread_status = WORKER_STATE_STOP;
705         sem_wait(&exit_sem);
706
707         for (i = 0; i < NUM_OF_BLK_QUEUES; i++) {
708                 vq = &ctrlr->queues[i];
709                 if (vq->packed_ring) {
710                         vq->last_avail_idx |= (vq->avail_wrap_counter <<
711                                 15);
712                         vq->last_used_idx |= (vq->used_wrap_counter <<
713                                 15);
714                 }
715
716                 rte_vhost_set_vring_base(ctrlr->vid, i,
717                                          vq->last_avail_idx,
718                                          vq->last_used_idx);
719         }
720
721         free_task_pool(ctrlr);
722         free(ctrlr->mem);
723
724         ctrlr->started = 0;
725 }
726
727 static int
728 new_connection(int vid)
729 {
730         /* extend the proper features for block device */
731         vhost_session_install_rte_compat_hooks(vid);
732
733         return 0;
734 }
735
736 struct vhost_device_ops vhost_blk_device_ops = {
737         .new_device =  new_device,
738         .destroy_device = destroy_device,
739         .new_connection = new_connection,
740 };
741
742 static struct vhost_block_dev *
743 vhost_blk_bdev_construct(const char *bdev_name,
744         const char *bdev_serial, uint32_t blk_size, uint64_t blk_cnt,
745         bool wce_enable)
746 {
747         struct vhost_block_dev *bdev;
748
749         bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE);
750         if (!bdev)
751                 return NULL;
752
753         strncpy(bdev->name, bdev_name, sizeof(bdev->name));
754         strncpy(bdev->product_name, bdev_serial, sizeof(bdev->product_name));
755         bdev->blocklen = blk_size;
756         bdev->blockcnt = blk_cnt;
757         bdev->write_cache = wce_enable;
758
759         fprintf(stdout, "Blocklen=%d, blockcnt=%"PRIx64"\n", bdev->blocklen,
760                 bdev->blockcnt);
761
762         /* use memory as disk storage space */
763         bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0);
764         if (!bdev->data) {
765                 fprintf(stderr, "No enough reserved huge memory for disk\n");
766                 free(bdev);
767                 return NULL;
768         }
769
770         return bdev;
771 }
772
773 static struct vhost_blk_ctrlr *
774 vhost_blk_ctrlr_construct(const char *ctrlr_name)
775 {
776         int ret;
777         struct vhost_blk_ctrlr *ctrlr;
778         char *path;
779         char cwd[PATH_MAX];
780
781         /* always use current directory */
782         path = getcwd(cwd, PATH_MAX);
783         if (!path) {
784                 fprintf(stderr, "Cannot get current working directory\n");
785                 return NULL;
786         }
787         snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name);
788
789         unlink(dev_pathname);
790
791         if (rte_vhost_driver_register(dev_pathname, 0) != 0) {
792                 fprintf(stderr, "Socket %s already exists\n", dev_pathname);
793                 return NULL;
794         }
795
796         ret = rte_vhost_driver_set_features(dev_pathname, VHOST_BLK_FEATURES);
797         if (ret != 0) {
798                 fprintf(stderr, "Set vhost driver features failed\n");
799                 rte_vhost_driver_unregister(dev_pathname);
800                 return NULL;
801         }
802
803         /* set vhost user protocol features */
804         vhost_dev_install_rte_compat_hooks(dev_pathname);
805
806         ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE);
807         if (!ctrlr) {
808                 rte_vhost_driver_unregister(dev_pathname);
809                 return NULL;
810         }
811
812         /* hardcoded block device information with 128MiB */
813         ctrlr->bdev = vhost_blk_bdev_construct("malloc0", "vhost_blk_malloc0",
814                                                 4096, 32768, 0);
815         if (!ctrlr->bdev) {
816                 rte_free(ctrlr);
817                 rte_vhost_driver_unregister(dev_pathname);
818                 return NULL;
819         }
820
821         rte_vhost_driver_callback_register(dev_pathname,
822                                            &vhost_blk_device_ops);
823
824         return ctrlr;
825 }
826
827 static void
828 vhost_blk_ctrlr_destroy(struct vhost_blk_ctrlr *ctrlr)
829 {
830         if (ctrlr->bdev != NULL) {
831                 if (ctrlr->bdev->data != NULL)
832                         rte_free(ctrlr->bdev->data);
833
834                 rte_free(ctrlr->bdev);
835         }
836         rte_free(ctrlr);
837
838         rte_vhost_driver_unregister(dev_pathname);
839 }
840
841 static void
842 signal_handler(__rte_unused int signum)
843 {
844         struct vhost_blk_ctrlr *ctrlr;
845
846         ctrlr = vhost_blk_ctrlr_find(dev_pathname);
847         if (ctrlr == NULL)
848                 return;
849
850         if (ctrlr->started)
851                 destroy_device(ctrlr->vid);
852
853         vhost_blk_ctrlr_destroy(ctrlr);
854         exit(0);
855 }
856
857 int main(int argc, char *argv[])
858 {
859         int ret;
860
861         /* init EAL */
862         ret = rte_eal_init(argc, argv);
863         if (ret < 0)
864                 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
865
866         g_vhost_ctrlr = vhost_blk_ctrlr_construct(CTRLR_NAME);
867         if (g_vhost_ctrlr == NULL) {
868                 fprintf(stderr, "Construct vhost blk controller failed\n");
869                 return 0;
870         }
871
872         if (sem_init(&exit_sem, 0, 0) < 0) {
873                 fprintf(stderr, "Error init exit_sem\n");
874                 return -1;
875         }
876
877         signal(SIGINT, signal_handler);
878
879         rte_vhost_driver_start(dev_pathname);
880
881         /* loop for exit the application */
882         while (1)
883                 sleep(1);
884
885         return 0;
886 }