38f5d8201ccd4869e5793c7ea467c999d4e1b08a
[dpdk.git] / examples / vhost_scsi / vhost_scsi.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <unistd.h>
7 #include <stdbool.h>
8 #include <signal.h>
9 #include <assert.h>
10 #include <semaphore.h>
11 #include <linux/virtio_scsi.h>
12 #include <linux/virtio_ring.h>
13
14 #include <rte_atomic.h>
15 #include <rte_cycles.h>
16 #include <rte_log.h>
17 #include <rte_malloc.h>
18 #include <rte_vhost.h>
19
20 #include "vhost_scsi.h"
21 #include "scsi_spec.h"
22
23 #define VIRTIO_SCSI_FEATURES ((1 << VIRTIO_F_NOTIFY_ON_EMPTY) |\
24                               (1 << VIRTIO_RING_F_EVENT_IDX) |\
25                               (1 << VIRTIO_SCSI_F_INOUT) |\
26                               (1 << VIRTIO_SCSI_F_CHANGE))
27
28 /* Path to folder where character device will be created. Can be set by user. */
29 static char dev_pathname[PATH_MAX] = "";
30
31 static struct vhost_scsi_ctrlr *g_vhost_ctrlr;
32 static int g_should_stop;
33 static sem_t exit_sem;
34
35 static struct vhost_scsi_ctrlr *
36 vhost_scsi_ctrlr_find(__rte_unused const char *ctrlr_name)
37 {
38         /* currently we only support 1 socket file fd */
39         return g_vhost_ctrlr;
40 }
41
42 static uint64_t gpa_to_vva(int vid, uint64_t gpa)
43 {
44         char path[PATH_MAX];
45         struct vhost_scsi_ctrlr *ctrlr;
46         int ret = 0;
47
48         ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
49         if (ret) {
50                 fprintf(stderr, "Cannot get socket name\n");
51                 assert(ret != 0);
52         }
53
54         ctrlr = vhost_scsi_ctrlr_find(path);
55         if (!ctrlr) {
56                 fprintf(stderr, "Controller is not ready\n");
57                 assert(ctrlr != NULL);
58         }
59
60         assert(ctrlr->mem != NULL);
61
62         return rte_vhost_gpa_to_vva(ctrlr->mem, gpa);
63 }
64
65 static struct vring_desc *
66 descriptor_get_next(struct vring_desc *vq_desc, struct vring_desc *cur_desc)
67 {
68         return &vq_desc[cur_desc->next];
69 }
70
71 static bool
72 descriptor_has_next(struct vring_desc *cur_desc)
73 {
74         return !!(cur_desc->flags & VRING_DESC_F_NEXT);
75 }
76
77 static bool
78 descriptor_is_wr(struct vring_desc *cur_desc)
79 {
80         return !!(cur_desc->flags & VRING_DESC_F_WRITE);
81 }
82
83 static void
84 submit_completion(struct vhost_scsi_task *task)
85 {
86         struct rte_vhost_vring *vq;
87         struct vring_used *used;
88
89         vq = task->vq;
90         used = vq->used;
91         /* Fill out the next entry in the "used" ring.  id = the
92          * index of the descriptor that contained the SCSI request.
93          * len = the total amount of data transferred for the SCSI
94          * request. We must report the correct len, for variable
95          * length SCSI CDBs, where we may return less data than
96          * allocated by the guest VM.
97          */
98         used->ring[used->idx & (vq->size - 1)].id = task->req_idx;
99         used->ring[used->idx & (vq->size - 1)].len = task->data_len;
100         used->idx++;
101
102         /* Send an interrupt back to the guest VM so that it knows
103          * a completion is ready to be processed.
104          */
105         eventfd_write(vq->callfd, (eventfd_t)1);
106 }
107
108 static void
109 vhost_process_read_payload_chain(struct vhost_scsi_task *task)
110 {
111         void *data;
112
113         task->iovs_cnt = 0;
114         task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
115                                                    task->desc->addr);
116
117         while (descriptor_has_next(task->desc)) {
118                 task->desc = descriptor_get_next(task->vq->desc, task->desc);
119                 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
120                                                      task->desc->addr);
121                 task->iovs[task->iovs_cnt].iov_base = data;
122                 task->iovs[task->iovs_cnt].iov_len = task->desc->len;
123                 task->data_len += task->desc->len;
124                 task->iovs_cnt++;
125         }
126 }
127
128 static void
129 vhost_process_write_payload_chain(struct vhost_scsi_task *task)
130 {
131         void *data;
132
133         task->iovs_cnt = 0;
134
135         do {
136                 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
137                                                      task->desc->addr);
138                 task->iovs[task->iovs_cnt].iov_base = data;
139                 task->iovs[task->iovs_cnt].iov_len = task->desc->len;
140                 task->data_len += task->desc->len;
141                 task->iovs_cnt++;
142                 task->desc = descriptor_get_next(task->vq->desc, task->desc);
143         } while (descriptor_has_next(task->desc));
144
145         task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
146                                                    task->desc->addr);
147 }
148
149 static struct vhost_block_dev *
150 vhost_scsi_bdev_construct(const char *bdev_name, const char *bdev_serial,
151                           uint32_t blk_size, uint64_t blk_cnt,
152                           bool wce_enable)
153 {
154         struct vhost_block_dev *bdev;
155
156         bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE);
157         if (!bdev)
158                 return NULL;
159
160         strncpy(bdev->name, bdev_name, sizeof(bdev->name));
161         strncpy(bdev->product_name, bdev_serial, sizeof(bdev->product_name));
162         bdev->blocklen = blk_size;
163         bdev->blockcnt = blk_cnt;
164         bdev->write_cache = wce_enable;
165
166         /* use memory as disk storage space */
167         bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0);
168         if (!bdev->data) {
169                 fprintf(stderr, "no enough reseverd huge memory for disk\n");
170                 return NULL;
171         }
172
173         return bdev;
174 }
175
176 static void
177 process_requestq(struct vhost_scsi_ctrlr *ctrlr, uint32_t q_idx)
178 {
179         int ret;
180         struct vhost_scsi_queue *scsi_vq;
181         struct rte_vhost_vring *vq;
182
183         scsi_vq = &ctrlr->bdev->queues[q_idx];
184         vq = &scsi_vq->vq;
185         ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, q_idx, vq);
186         assert(ret == 0);
187
188         while (vq->avail->idx != scsi_vq->last_used_idx) {
189                 int req_idx;
190                 uint16_t last_idx;
191                 struct vhost_scsi_task *task;
192
193                 last_idx = scsi_vq->last_used_idx & (vq->size - 1);
194                 req_idx = vq->avail->ring[last_idx];
195
196                 task = rte_zmalloc(NULL, sizeof(*task), 0);
197                 assert(task != NULL);
198
199                 task->ctrlr = ctrlr;
200                 task->bdev = ctrlr->bdev;
201                 task->vq = vq;
202                 task->req_idx = req_idx;
203                 task->desc = &task->vq->desc[task->req_idx];
204
205                 /* does not support indirect descriptors */
206                 assert((task->desc->flags & VRING_DESC_F_INDIRECT) == 0);
207                 scsi_vq->last_used_idx++;
208
209                 task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
210                                                           task->desc->addr);
211
212                 task->desc = descriptor_get_next(task->vq->desc, task->desc);
213                 if (!descriptor_has_next(task->desc)) {
214                         task->dxfer_dir = SCSI_DIR_NONE;
215                         task->resp = (void *)(uintptr_t)
216                                               gpa_to_vva(task->bdev->vid,
217                                                          task->desc->addr);
218
219                 } else if (!descriptor_is_wr(task->desc)) {
220                         task->dxfer_dir = SCSI_DIR_TO_DEV;
221                         vhost_process_write_payload_chain(task);
222                 } else {
223                         task->dxfer_dir = SCSI_DIR_FROM_DEV;
224                         vhost_process_read_payload_chain(task);
225                 }
226
227                 ret = vhost_bdev_process_scsi_commands(ctrlr->bdev, task);
228                 if (ret) {
229                         /* invalid response */
230                         task->resp->response = VIRTIO_SCSI_S_BAD_TARGET;
231                 } else {
232                         /* successfully */
233                         task->resp->response = VIRTIO_SCSI_S_OK;
234                         task->resp->status = 0;
235                         task->resp->resid = 0;
236                 }
237                 submit_completion(task);
238                 rte_free(task);
239         }
240 }
241
242 /* Main framework for processing IOs */
243 static void *
244 ctrlr_worker(void *arg)
245 {
246         uint32_t idx, num;
247         struct vhost_scsi_ctrlr *ctrlr = (struct vhost_scsi_ctrlr *)arg;
248         cpu_set_t cpuset;
249         pthread_t thread;
250
251         thread = pthread_self();
252         CPU_ZERO(&cpuset);
253         CPU_SET(0, &cpuset);
254         pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
255
256         num =  rte_vhost_get_vring_num(ctrlr->bdev->vid);
257         fprintf(stdout, "Ctrlr Worker Thread Started with %u Vring\n", num);
258
259         if (num != NUM_OF_SCSI_QUEUES) {
260                 fprintf(stderr, "Only 1 IO queue are supported\n");
261                 exit(0);
262         }
263
264         while (!g_should_stop && ctrlr->bdev != NULL) {
265                 /* At least 3 vrings, currently only can support 1 IO queue
266                  * Queue 2 for IO queue, does not support TMF and hotplug
267                  * for the example application now
268                  */
269                 for (idx = 2; idx < num; idx++)
270                         process_requestq(ctrlr, idx);
271         }
272
273         fprintf(stdout, "Ctrlr Worker Thread Exiting\n");
274         sem_post(&exit_sem);
275         return NULL;
276 }
277
278 static int
279 new_device(int vid)
280 {
281         char path[PATH_MAX];
282         struct vhost_scsi_ctrlr *ctrlr;
283         struct vhost_scsi_queue *scsi_vq;
284         struct rte_vhost_vring *vq;
285         pthread_t tid;
286         int i, ret;
287
288         ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
289         if (ret) {
290                 fprintf(stderr, "Cannot get socket name\n");
291                 return -1;
292         }
293
294         ctrlr = vhost_scsi_ctrlr_find(path);
295         if (!ctrlr) {
296                 fprintf(stderr, "Controller is not ready\n");
297                 return -1;
298         }
299
300         ret = rte_vhost_get_mem_table(vid, &ctrlr->mem);
301         if (ret) {
302                 fprintf(stderr, "Get Controller memory region failed\n");
303                 return -1;
304         }
305         assert(ctrlr->mem != NULL);
306
307         /* hardcoded block device information with 128MiB */
308         ctrlr->bdev = vhost_scsi_bdev_construct("malloc0", "vhost_scsi_malloc0",
309                                                 4096, 32768, 0);
310         if (!ctrlr->bdev)
311                 return -1;
312
313         ctrlr->bdev->vid = vid;
314
315         /* Disable Notifications */
316         for (i = 0; i < NUM_OF_SCSI_QUEUES; i++) {
317                 rte_vhost_enable_guest_notification(vid, i, 0);
318                 /* restore used index */
319                 scsi_vq = &ctrlr->bdev->queues[i];
320                 vq = &scsi_vq->vq;
321                 ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq);
322                 assert(ret == 0);
323                 scsi_vq->last_used_idx = vq->used->idx;
324                 scsi_vq->last_avail_idx = vq->used->idx;
325         }
326
327         g_should_stop = 0;
328         fprintf(stdout, "New Device %s, Device ID %d\n", path, vid);
329         if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) {
330                 fprintf(stderr, "Worker Thread Started Failed\n");
331                 return -1;
332         }
333         pthread_detach(tid);
334         return 0;
335 }
336
337 static void
338 destroy_device(int vid)
339 {
340         char path[PATH_MAX];
341         struct vhost_scsi_ctrlr *ctrlr;
342
343         rte_vhost_get_ifname(vid, path, PATH_MAX);
344         fprintf(stdout, "Destroy %s Device ID %d\n", path, vid);
345         ctrlr = vhost_scsi_ctrlr_find(path);
346         if (!ctrlr) {
347                 fprintf(stderr, "Destroy Ctrlr Failed\n");
348                 return;
349         }
350         ctrlr->bdev = NULL;
351         g_should_stop = 1;
352
353         sem_wait(&exit_sem);
354 }
355
356 static const struct vhost_device_ops vhost_scsi_device_ops = {
357         .new_device =  new_device,
358         .destroy_device = destroy_device,
359 };
360
361 static struct vhost_scsi_ctrlr *
362 vhost_scsi_ctrlr_construct(const char *ctrlr_name)
363 {
364         int ret;
365         struct vhost_scsi_ctrlr *ctrlr;
366         char *path;
367         char cwd[PATH_MAX];
368
369         /* always use current directory */
370         path = getcwd(cwd, PATH_MAX);
371         if (!path) {
372                 fprintf(stderr, "Cannot get current working directory\n");
373                 return NULL;
374         }
375         snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name);
376
377         if (access(dev_pathname, F_OK) != -1) {
378                 if (unlink(dev_pathname) != 0)
379                         rte_exit(EXIT_FAILURE, "Cannot remove %s.\n",
380                                  dev_pathname);
381         }
382
383         if (rte_vhost_driver_register(dev_pathname, 0) != 0) {
384                 fprintf(stderr, "socket %s already exists\n", dev_pathname);
385                 return NULL;
386         }
387
388         fprintf(stdout, "socket file: %s created\n", dev_pathname);
389
390         ret = rte_vhost_driver_set_features(dev_pathname, VIRTIO_SCSI_FEATURES);
391         if (ret != 0) {
392                 fprintf(stderr, "Set vhost driver features failed\n");
393                 return NULL;
394         }
395
396         ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE);
397         if (!ctrlr)
398                 return NULL;
399
400         rte_vhost_driver_callback_register(dev_pathname,
401                                            &vhost_scsi_device_ops);
402
403         return ctrlr;
404 }
405
406 static void
407 signal_handler(__rte_unused int signum)
408 {
409
410         if (access(dev_pathname, F_OK) == 0)
411                 unlink(dev_pathname);
412         exit(0);
413 }
414
415 int main(int argc, char *argv[])
416 {
417         int ret;
418
419         signal(SIGINT, signal_handler);
420
421         /* init EAL */
422         ret = rte_eal_init(argc, argv);
423         if (ret < 0)
424                 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
425
426         g_vhost_ctrlr = vhost_scsi_ctrlr_construct("vhost.socket");
427         if (g_vhost_ctrlr == NULL) {
428                 fprintf(stderr, "Construct vhost scsi controller failed\n");
429                 return 0;
430         }
431
432         if (sem_init(&exit_sem, 0, 0) < 0) {
433                 fprintf(stderr, "Error init exit_sem\n");
434                 return -1;
435         }
436
437         rte_vhost_driver_start(dev_pathname);
438
439         /* loop for exit the application */
440         while (1)
441                 sleep(1);
442
443         return 0;
444 }
445