examples/vhost_scsi: introduce a new sample app
authorChangpeng Liu <changpeng.liu@intel.com>
Thu, 20 Jul 2017 09:16:29 +0000 (17:16 +0800)
committerYuanhan Liu <yliu@fridaylinux.org>
Wed, 19 Jul 2017 19:49:47 +0000 (22:49 +0300)
vhost-user protocol is common to many virtio devices, such as
virtio_net/virtio_scsi/virtio_blk. Since DPDK vhost library
removed the NET specific data structures, the vhost library
is common to other virtio devices, such as virtio-scsi.

Here we introduce a simple memory based block device that
can be presented to Guest VM through vhost-user-scsi-pci
controller. Similar with vhost-net, the sample application
will process the I/Os sent via virt rings.

Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
MAINTAINERS
doc/guides/sample_app_ug/index.rst
doc/guides/sample_app_ug/vhost_scsi.rst [new file with mode: 0644]
examples/Makefile
examples/vhost_scsi/Makefile [new file with mode: 0644]
examples/vhost_scsi/scsi.c [new file with mode: 0644]
examples/vhost_scsi/scsi_spec.h [new file with mode: 0644]
examples/vhost_scsi/vhost_scsi.c [new file with mode: 0644]
examples/vhost_scsi/vhost_scsi.h [new file with mode: 0644]

index 875bee0..c6ab9a7 100644 (file)
@@ -447,6 +447,8 @@ F: lib/librte_vhost/
 F: doc/guides/prog_guide/vhost_lib.rst
 F: examples/vhost/
 F: doc/guides/sample_app_ug/vhost.rst
+F: examples/vhost_scsi/
+F: doc/guides/sample_app_ug/vhost_scsi.rst
 
 Vhost PMD
 M: Tetsuya Mukawa <mtetsuyah@gmail.com>
index f9239e3..069d4f1 100644 (file)
@@ -66,6 +66,7 @@ Sample Applications User Guides
     packet_ordering
     vmdq_dcb_forwarding
     vhost
+    vhost_scsi
     netmap_compatibility
     ip_pipeline
     test_pipeline
diff --git a/doc/guides/sample_app_ug/vhost_scsi.rst b/doc/guides/sample_app_ug/vhost_scsi.rst
new file mode 100644 (file)
index 0000000..8be069e
--- /dev/null
@@ -0,0 +1,115 @@
+
+..  BSD LICENSE
+    Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+    * Neither the name of Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+Vhost_scsi Sample Application
+=============================
+
+The vhost_scsi sample application implemented a simple SCSI block device,
+which used as the  backend of Qemu vhost-user-scsi device. Users can extend
+the exist example to use other type of block device(e.g. AIO) besides
+memory based block device. Similar with vhost-user-net device, the sample
+application used domain socket to communicate with Qemu, and the virtio
+ring was processed by vhost_scsi sample application.
+
+The sample application reuse lots codes from SPDK(Storage Performance
+Development Kit, https://github.com/spdk/spdk) vhost-user-scsi target,
+for DPDK vhost library used in storage area, user can take SPDK as
+reference as well.
+
+Testing steps
+-------------
+
+This section shows the steps how to start a VM with the block device as
+fast data path for critical application.
+
+Build
+~~~~~
+
+Follow the *Getting Started Guide for Linux* on generic info about
+environment setup and building DPDK from source.
+
+In this example, you need build DPDK both on the host and inside guest.
+Also, you need build this example.
+
+.. code-block:: console
+
+    export RTE_SDK=/path/to/dpdk_source
+    export RTE_TARGET=x86_64-native-linuxapp-gcc
+
+    cd ${RTE_SDK}/examples/vhost_scsi
+    make
+
+
+Start the vhost_scsi example
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: console
+
+        ./vhost_scsi -m 1024
+
+.. _vhost_scsi_app_run_vm:
+
+Start the VM
+~~~~~~~~~~~~
+
+.. code-block:: console
+
+    qemu-system-x86_64 -machine accel=kvm \
+        -m $mem -object memory-backend-file,id=mem,size=$mem,\
+        mem-path=/dev/hugepages,share=on -numa node,memdev=mem \
+        -drive file=os.img,if=none,id=disk \
+        -device ide-hd,drive=disk,bootindex=0 \
+        -chardev socket,id=char0,path=/tmp/vhost.socket \
+        -device vhost-user-scsi-pci,chardev=char0,bootindex=2 \
+        ...
+
+.. note::
+    You must check whether your Qemu can support "vhost-user-scsi" or not,
+    Qemu v2.10 or newer version is required.
+
+Vhost_scsi Common Issues
+------------------------
+
+* vhost_scsi can not start with block size 512 Bytes:
+
+  Currently DPDK vhost library was designed for NET device(althrough the APIs
+  are generic now), for 512 Bytes block device, Qemu BIOS(x86 BIOS Enhanced
+  Disk Device) will enumerate all block device and do some IOs to those block
+  devices with 512 Bytes sector size. DPDK vhost library can not process such
+  scenarios(both BIOS and OS will enumerate the block device), so as a
+  workaround, the vhost_scsi example application hardcoded the block size
+  with 4096 Bytes.
+
+* vhost_scsi can only support the block device as fast data disk(non OS image):
+
+  Make sure ``bootindex=2`` Qemu option is given to vhost-user-scsi-pci device.
+
index 97f12ad..28354ff 100644 (file)
@@ -88,7 +88,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_HASH),y)
 DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += tep_termination
 endif
 DIRS-$(CONFIG_RTE_LIBRTE_TIMER) += timer
-DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost
+DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost vhost_scsi
 DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += vhost_xen
 DIRS-y += vmdq
 DIRS-y += vmdq_dcb
diff --git a/examples/vhost_scsi/Makefile b/examples/vhost_scsi/Makefile
new file mode 100644 (file)
index 0000000..0306a6a
--- /dev/null
@@ -0,0 +1,59 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(info This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+all:
+else
+
+# binary name
+APP = vhost-scsi
+
+# all source are stored in SRCS-y
+SRCS-y := scsi.c vhost_scsi.c
+
+CFLAGS += -O2 -D_FILE_OFFSET_BITS=64
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -D_GNU_SOURCE
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+
+endif
diff --git a/examples/vhost_scsi/scsi.c b/examples/vhost_scsi/scsi.c
new file mode 100644 (file)
index 0000000..54d3104
--- /dev/null
@@ -0,0 +1,539 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * This work is largely based on the "vhost-user-scsi" implementation by
+ * SPDK(https://github.com/spdk/spdk).
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <assert.h>
+#include <ctype.h>
+#include <string.h>
+#include <stddef.h>
+
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_byteorder.h>
+
+#include "vhost_scsi.h"
+#include "scsi_spec.h"
+
+#define INQ_OFFSET(field) (offsetof(struct scsi_cdb_inquiry_data, field) + \
+                         sizeof(((struct scsi_cdb_inquiry_data *)0x0)->field))
+
+static void
+vhost_strcpy_pad(void *dst, const char *src, size_t size, int pad)
+{
+       size_t len;
+
+       len = strlen(src);
+       if (len < size) {
+               memcpy(dst, src, len);
+               memset((char *)dst + len, pad, size - len);
+       } else {
+               memcpy(dst, src, size);
+       }
+}
+
+static int
+vhost_hex2bin(char ch)
+{
+       if ((ch >= '0') && (ch <= '9'))
+               return ch - '0';
+       ch = tolower(ch);
+       if ((ch >= 'a') && (ch <= 'f'))
+               return ch - 'a' + 10;
+       return (int)ch;
+}
+
+static void
+vhost_bdev_scsi_set_naa_ieee_extended(const char *name, uint8_t *buf)
+{
+       int i, value, count = 0;
+       uint64_t *temp64, local_value;
+
+       for (i = 0; (i < 16) && (name[i] != '\0'); i++) {
+               value = vhost_hex2bin(name[i]);
+               if (i % 2)
+                       buf[count++] |= value << 4;
+               else
+                       buf[count] = value;
+       }
+
+       local_value = *(uint64_t *)buf;
+       /*
+        * see spc3r23 7.6.3.6.2,
+        *  NAA IEEE Extended identifer format
+        */
+       local_value &= 0x0fff000000ffffffull;
+       /* NAA 02, and 00 03 47 for IEEE Intel */
+       local_value |= 0x2000000347000000ull;
+
+       temp64 = (uint64_t *)buf;
+       *temp64 = rte_cpu_to_be_64(local_value);
+}
+
+static void
+scsi_task_build_sense_data(struct vhost_scsi_task *task, int sk,
+                          int asc, int ascq)
+{
+       uint8_t *cp;
+       int resp_code;
+
+       resp_code = 0x70; /* Current + Fixed format */
+
+       /* Sense Data */
+       cp = (uint8_t *)task->resp->sense;
+
+       /* VALID(7) RESPONSE CODE(6-0) */
+       cp[0] = 0x80 | resp_code;
+       /* Obsolete */
+       cp[1] = 0;
+       /* FILEMARK(7) EOM(6) ILI(5) SENSE KEY(3-0) */
+       cp[2] = sk & 0xf;
+       /* INFORMATION */
+       memset(&cp[3], 0, 4);
+
+       /* ADDITIONAL SENSE LENGTH */
+       cp[7] = 10;
+
+       /* COMMAND-SPECIFIC INFORMATION */
+       memset(&cp[8], 0, 4);
+       /* ADDITIONAL SENSE CODE */
+       cp[12] = asc;
+       /* ADDITIONAL SENSE CODE QUALIFIER */
+       cp[13] = ascq;
+       /* FIELD REPLACEABLE UNIT CODE */
+       cp[14] = 0;
+
+       /* SKSV(7) SENSE KEY SPECIFIC(6-0,7-0,7-0) */
+       cp[15] = 0;
+       cp[16] = 0;
+       cp[17] = 0;
+
+       /* SenseLength */
+       task->resp->sense_len = 18;
+}
+
+static void
+scsi_task_set_status(struct vhost_scsi_task *task, int sc, int sk,
+                    int asc, int ascq)
+{
+       if (sc == SCSI_STATUS_CHECK_CONDITION)
+               scsi_task_build_sense_data(task, sk, asc, ascq);
+       task->resp->status = sc;
+}
+
+static int
+vhost_bdev_scsi_inquiry_command(struct vhost_block_dev *bdev,
+                               struct vhost_scsi_task *task)
+{
+       int hlen = 0;
+       uint32_t alloc_len = 0;
+       uint16_t len = 0;
+       uint16_t *temp16;
+       int pc;
+       int pd;
+       int evpd;
+       int i;
+       uint8_t *buf;
+       struct scsi_cdb_inquiry *inq;
+
+       inq = (struct scsi_cdb_inquiry *)task->req->cdb;
+
+       assert(task->iovs_cnt == 1);
+
+       /* At least 36Bytes for inquiry command */
+       if (task->data_len < 0x24)
+               goto inq_error;
+
+       pd = SPC_PERIPHERAL_DEVICE_TYPE_DISK;
+       pc = inq->page_code;
+       evpd = inq->evpd & 0x1;
+
+       if (!evpd && pc)
+               goto inq_error;
+
+       if (evpd) {
+               struct scsi_vpd_page *vpage = (struct scsi_vpd_page *)
+                                             task->iovs[0].iov_base;
+
+               /* PERIPHERAL QUALIFIER(7-5) PERIPHERAL DEVICE TYPE(4-0) */
+               vpage->peripheral = pd;
+               /* PAGE CODE */
+               vpage->page_code = pc;
+
+               switch (pc) {
+               case SPC_VPD_SUPPORTED_VPD_PAGES:
+                       hlen = 4;
+                       vpage->params[0] = SPC_VPD_SUPPORTED_VPD_PAGES;
+                       vpage->params[1] = SPC_VPD_UNIT_SERIAL_NUMBER;
+                       vpage->params[2] = SPC_VPD_DEVICE_IDENTIFICATION;
+                       len = 3;
+                       /* PAGE LENGTH */
+                       vpage->alloc_len = rte_cpu_to_be_16(len);
+                       break;
+               case SPC_VPD_UNIT_SERIAL_NUMBER:
+                       hlen = 4;
+                       strncpy((char *)vpage->params, bdev->name, 32);
+                       vpage->alloc_len = rte_cpu_to_be_16(32);
+                       break;
+               case SPC_VPD_DEVICE_IDENTIFICATION:
+                       buf = vpage->params;
+                       struct scsi_desig_desc *desig;
+
+                       hlen = 4;
+                       /* NAA designator */
+                       desig = (struct scsi_desig_desc *)buf;
+                       desig->code_set = SPC_VPD_CODE_SET_BINARY;
+                       desig->protocol_id = SPC_PROTOCOL_IDENTIFIER_ISCSI;
+                       desig->type = SPC_VPD_IDENTIFIER_TYPE_NAA;
+                       desig->association = SPC_VPD_ASSOCIATION_LOGICAL_UNIT;
+                       desig->reserved0 = 0;
+                       desig->piv = 1;
+                       desig->reserved1 = 0;
+                       desig->len = 8;
+                       vhost_bdev_scsi_set_naa_ieee_extended(bdev->name,
+                                                             desig->desig);
+                       len = sizeof(struct scsi_desig_desc) + 8;
+
+                       buf += sizeof(struct scsi_desig_desc) + desig->len;
+
+                       /* T10 Vendor ID designator */
+                       desig = (struct scsi_desig_desc *)buf;
+                       desig->code_set = SPC_VPD_CODE_SET_ASCII;
+                       desig->protocol_id = SPC_PROTOCOL_IDENTIFIER_ISCSI;
+                       desig->type = SPC_VPD_IDENTIFIER_TYPE_T10_VENDOR_ID;
+                       desig->association = SPC_VPD_ASSOCIATION_LOGICAL_UNIT;
+                       desig->reserved0 = 0;
+                       desig->piv = 1;
+                       desig->reserved1 = 0;
+                       desig->len = 8 + 16 + 32;
+                       strncpy((char *)desig->desig, "INTEL", 8);
+                       vhost_strcpy_pad((char *)&desig->desig[8],
+                                        bdev->product_name, 16, ' ');
+                       strncpy((char *)&desig->desig[24], bdev->name, 32);
+                       len += sizeof(struct scsi_desig_desc) + 8 + 16 + 32;
+
+                       buf += sizeof(struct scsi_desig_desc) + desig->len;
+
+                       /* SCSI Device Name designator */
+                       desig = (struct scsi_desig_desc *)buf;
+                       desig->code_set = SPC_VPD_CODE_SET_UTF8;
+                       desig->protocol_id = SPC_PROTOCOL_IDENTIFIER_ISCSI;
+                       desig->type = SPC_VPD_IDENTIFIER_TYPE_SCSI_NAME;
+                       desig->association = SPC_VPD_ASSOCIATION_TARGET_DEVICE;
+                       desig->reserved0 = 0;
+                       desig->piv = 1;
+                       desig->reserved1 = 0;
+                       desig->len = snprintf((char *)desig->desig,
+                                             255, "%s", bdev->name);
+                       len += sizeof(struct scsi_desig_desc) + desig->len;
+
+                       buf += sizeof(struct scsi_desig_desc) + desig->len;
+                       vpage->alloc_len = rte_cpu_to_be_16(len);
+                       break;
+               default:
+                       goto inq_error;
+               }
+
+       } else {
+               struct scsi_cdb_inquiry_data *inqdata =
+                       (struct scsi_cdb_inquiry_data *)task->iovs[0].iov_base;
+               /* Standard INQUIRY data */
+               /* PERIPHERAL QUALIFIER(7-5) PERIPHERAL DEVICE TYPE(4-0) */
+               inqdata->peripheral = pd;
+
+               /* RMB(7) */
+               inqdata->rmb = 0;
+
+               /* VERSION */
+               /* See SPC3/SBC2/MMC4/SAM2 for more details */
+               inqdata->version = SPC_VERSION_SPC3;
+
+               /* NORMACA(5) HISUP(4) RESPONSE DATA FORMAT(3-0) */
+               /* format 2 */ /* hierarchical support */
+               inqdata->response = 2 | 1 << 4;
+
+               hlen = 5;
+
+               /* SCCS(7) ACC(6) TPGS(5-4) 3PC(3) PROTECT(0) */
+               /* Not support TPGS */
+               inqdata->flags = 0;
+
+               /* MULTIP */
+               inqdata->flags2 = 0x10;
+
+               /* WBUS16(5) SYNC(4) LINKED(3) CMDQUE(1) VS(0) */
+               /* CMDQUE */
+               inqdata->flags3 = 0x2;
+
+               /* T10 VENDOR IDENTIFICATION */
+               strncpy((char *)inqdata->t10_vendor_id, "INTEL", 8);
+
+               /* PRODUCT IDENTIFICATION */
+               strncpy((char *)inqdata->product_id, bdev->product_name, 16);
+
+               /* PRODUCT REVISION LEVEL */
+               strncpy((char *)inqdata->product_rev, "0001", 4);
+
+               /* Standard inquiry data ends here. Only populate
+                * remaining fields if alloc_len indicates enough
+                * space to hold it.
+                */
+               len = INQ_OFFSET(product_rev) - 5;
+
+               if (alloc_len >= INQ_OFFSET(vendor)) {
+                       /* Vendor specific */
+                       memset(inqdata->vendor, 0x20, 20);
+                       len += sizeof(inqdata->vendor);
+               }
+
+               if (alloc_len >= INQ_OFFSET(ius)) {
+                       /* CLOCKING(3-2) QAS(1) IUS(0) */
+                       inqdata->ius = 0;
+                       len += sizeof(inqdata->ius);
+               }
+
+               if (alloc_len >= INQ_OFFSET(reserved)) {
+                       /* Reserved */
+                       inqdata->reserved = 0;
+                       len += sizeof(inqdata->reserved);
+               }
+
+               /* VERSION DESCRIPTOR 1-8 */
+               if (alloc_len >= INQ_OFFSET(reserved) + 2) {
+                       temp16 = (uint16_t *)&inqdata->desc[0];
+                       *temp16 = rte_cpu_to_be_16(0x0960);
+                       len += 2;
+               }
+
+               if (alloc_len >= INQ_OFFSET(reserved) + 4) {
+                       /* SPC-3 (no version claimed) */
+                       temp16 = (uint16_t *)&inqdata->desc[2];
+                       *temp16 = rte_cpu_to_be_16(0x0300);
+                       len += 2;
+               }
+
+               if (alloc_len >= INQ_OFFSET(reserved) + 6) {
+                       /* SBC-2 (no version claimed) */
+                       temp16 = (uint16_t *)&inqdata->desc[4];
+                       *temp16 = rte_cpu_to_be_16(0x0320);
+                       len += 2;
+               }
+
+               if (alloc_len >= INQ_OFFSET(reserved) + 8) {
+                       /* SAM-2 (no version claimed) */
+                       temp16 = (uint16_t *)&inqdata->desc[6];
+                       *temp16 = rte_cpu_to_be_16(0x0040);
+                       len += 2;
+               }
+
+               if (alloc_len > INQ_OFFSET(reserved) + 8) {
+                       i = alloc_len - (INQ_OFFSET(reserved) + 8);
+                       if (i > 30)
+                               i = 30;
+                       memset(&inqdata->desc[8], 0, i);
+                       len += i;
+               }
+
+               /* ADDITIONAL LENGTH */
+               inqdata->add_len = len;
+       }
+
+       /* STATUS GOOD */
+       scsi_task_set_status(task, SCSI_STATUS_GOOD, 0, 0, 0);
+       return hlen + len;
+
+inq_error:
+       scsi_task_set_status(task, SCSI_STATUS_CHECK_CONDITION,
+                                    SCSI_SENSE_ILLEGAL_REQUEST,
+                                    SCSI_ASC_INVALID_FIELD_IN_CDB,
+                                    SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+       return 0;
+}
+
+static int
+vhost_bdev_scsi_readwrite(struct vhost_block_dev *bdev,
+                         struct vhost_scsi_task *task,
+                         uint64_t lba, __rte_unused uint32_t xfer_len)
+{
+       uint32_t i;
+       uint64_t offset;
+       uint32_t nbytes = 0;
+
+       offset = lba * bdev->blocklen;
+
+       for (i = 0; i < task->iovs_cnt; i++) {
+               if (task->dxfer_dir == SCSI_DIR_TO_DEV)
+                       memcpy(bdev->data + offset, task->iovs[i].iov_base,
+                              task->iovs[i].iov_len);
+               else
+                       memcpy(task->iovs[i].iov_base, bdev->data + offset,
+                              task->iovs[i].iov_len);
+               offset += task->iovs[i].iov_len;
+               nbytes += task->iovs[i].iov_len;
+       }
+
+       return nbytes;
+}
+
+static int
+vhost_bdev_scsi_process_block(struct vhost_block_dev *bdev,
+                             struct vhost_scsi_task *task)
+{
+       uint64_t lba, *temp64;
+       uint32_t xfer_len, *temp32;
+       uint16_t *temp16;
+       uint8_t *cdb = (uint8_t *)task->req->cdb;
+
+       switch (cdb[0]) {
+       case SBC_READ_6:
+       case SBC_WRITE_6:
+               lba = (uint64_t)cdb[1] << 16;
+               lba |= (uint64_t)cdb[2] << 8;
+               lba |= (uint64_t)cdb[3];
+               xfer_len = cdb[4];
+               if (xfer_len == 0)
+                       xfer_len = 256;
+               return vhost_bdev_scsi_readwrite(bdev, task, lba, xfer_len);
+
+       case SBC_READ_10:
+       case SBC_WRITE_10:
+               temp32 = (uint32_t *)&cdb[2];
+               lba = rte_be_to_cpu_32(*temp32);
+               temp16 = (uint16_t *)&cdb[7];
+               xfer_len = rte_be_to_cpu_16(*temp16);
+               return vhost_bdev_scsi_readwrite(bdev, task, lba, xfer_len);
+
+       case SBC_READ_12:
+       case SBC_WRITE_12:
+               temp32 = (uint32_t *)&cdb[2];
+               lba = rte_be_to_cpu_32(*temp32);
+               temp32 = (uint32_t *)&cdb[6];
+               xfer_len = rte_be_to_cpu_32(*temp32);
+               return vhost_bdev_scsi_readwrite(bdev, task, lba, xfer_len);
+
+       case SBC_READ_16:
+       case SBC_WRITE_16:
+               temp64 = (uint64_t *)&cdb[2];
+               lba = rte_be_to_cpu_64(*temp64);
+               temp32 = (uint32_t *)&cdb[10];
+               xfer_len = rte_be_to_cpu_32(*temp32);
+               return vhost_bdev_scsi_readwrite(bdev, task, lba, xfer_len);
+
+       case SBC_READ_CAPACITY_10: {
+               uint8_t buffer[8];
+
+               if (bdev->blockcnt - 1 > 0xffffffffULL)
+                       memset(buffer, 0xff, 4);
+               else {
+                       temp32 = (uint32_t *)buffer;
+                       *temp32 = rte_cpu_to_be_32(bdev->blockcnt - 1);
+               }
+               temp32 = (uint32_t *)&buffer[4];
+               *temp32 = rte_cpu_to_be_32(bdev->blocklen);
+               memcpy(task->iovs[0].iov_base, buffer, sizeof(buffer));
+               task->resp->status = SCSI_STATUS_GOOD;
+               return sizeof(buffer);
+       }
+
+       case SBC_SYNCHRONIZE_CACHE_10:
+       case SBC_SYNCHRONIZE_CACHE_16:
+               task->resp->status = SCSI_STATUS_GOOD;
+               return 0;
+       }
+
+       scsi_task_set_status(task, SCSI_STATUS_CHECK_CONDITION,
+                            SCSI_SENSE_ILLEGAL_REQUEST,
+                            SCSI_ASC_INVALID_FIELD_IN_CDB,
+                            SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+       return 0;
+}
+
+int
+vhost_bdev_process_scsi_commands(struct vhost_block_dev *bdev,
+                                struct vhost_scsi_task *task)
+{
+       int len;
+       uint8_t *data;
+       uint64_t *temp64, fmt_lun = 0;
+       uint32_t *temp32;
+       const uint8_t *lun;
+       uint8_t *cdb = (uint8_t *)task->req->cdb;
+
+       lun = (const uint8_t *)task->req->lun;
+       /* only 1 LUN supported */
+       if (lun[0] != 1 || lun[1] >= 1)
+               return -1;
+
+       switch (cdb[0]) {
+       case SPC_INQUIRY:
+               len = vhost_bdev_scsi_inquiry_command(bdev, task);
+               task->data_len = len;
+               break;
+       case SPC_REPORT_LUNS:
+               data = (uint8_t *)task->iovs[0].iov_base;
+               fmt_lun |= (0x0ULL & 0x00ffULL) << 48;
+               temp64 = (uint64_t *)&data[8];
+               *temp64 = rte_cpu_to_be_64(fmt_lun);
+               temp32 = (uint32_t *)data;
+               *temp32 = rte_cpu_to_be_32(8);
+               task->data_len = 16;
+               scsi_task_set_status(task, SCSI_STATUS_GOOD, 0, 0, 0);
+               break;
+       case SPC_MODE_SELECT_6:
+       case SPC_MODE_SELECT_10:
+               /* don't support it now */
+               scsi_task_set_status(task, SCSI_STATUS_GOOD, 0, 0, 0);
+               break;
+       case SPC_MODE_SENSE_6:
+       case SPC_MODE_SENSE_10:
+               /* don't support it now */
+               scsi_task_set_status(task, SCSI_STATUS_GOOD, 0, 0, 0);
+               break;
+       case SPC_TEST_UNIT_READY:
+               scsi_task_set_status(task, SCSI_STATUS_GOOD, 0, 0, 0);
+               break;
+       default:
+               len = vhost_bdev_scsi_process_block(bdev, task);
+               task->data_len = len;
+       }
+
+       return 0;
+}
diff --git a/examples/vhost_scsi/scsi_spec.h b/examples/vhost_scsi/scsi_spec.h
new file mode 100644 (file)
index 0000000..60d761c
--- /dev/null
@@ -0,0 +1,493 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * SCSI specification definition
+ * refer http://www.t10.org/drafts.htm#SPC_Family for SPC-3 and SBC-3
+ */
+
+#ifndef _SCSI_SPEC_H
+#define _SCSI_SPEC_H
+
+#include <stdint.h>
+
+enum scsi_group_code {
+       SCSI_6BYTE_CMD = 0x00,
+       SCSI_10BYTE_CMD = 0x20,
+       SCSI_10BYTE_CMD2 = 0x40,
+       SCSI_16BYTE_CMD = 0x80,
+       SCSI_12BYTE_CMD = 0xa0,
+};
+
+#define SCSI_GROUP_MASK        0xe0
+#define SCSI_OPCODE_MASK       0x1f
+
+enum scsi_status {
+       SCSI_STATUS_GOOD = 0x00,
+       SCSI_STATUS_CHECK_CONDITION = 0x02,
+       SCSI_STATUS_CONDITION_MET = 0x04,
+       SCSI_STATUS_BUSY = 0x08,
+       SCSI_STATUS_INTERMEDIATE = 0x10,
+       SCSI_STATUS_INTERMEDIATE_CONDITION_MET = 0x14,
+       SCSI_STATUS_RESERVATION_CONFLICT = 0x18,
+       SCSI_STATUS_Obsolete = 0x22,
+       SCSI_STATUS_TASK_SET_FULL = 0x28,
+       SCSI_STATUS_ACA_ACTIVE = 0x30,
+       SCSI_STATUS_TASK_ABORTED = 0x40,
+};
+
+enum scsi_sense {
+       SCSI_SENSE_NO_SENSE = 0x00,
+       SCSI_SENSE_RECOVERED_ERROR = 0x01,
+       SCSI_SENSE_NOT_READY = 0x02,
+       SCSI_SENSE_MEDIUM_ERROR = 0x03,
+       SCSI_SENSE_HARDWARE_ERROR = 0x04,
+       SCSI_SENSE_ILLEGAL_REQUEST = 0x05,
+       SCSI_SENSE_UNIT_ATTENTION = 0x06,
+       SCSI_SENSE_DATA_PROTECT = 0x07,
+       SCSI_SENSE_BLANK_CHECK = 0x08,
+       SCSI_SENSE_VENDOR_SPECIFIC = 0x09,
+       SCSI_SENSE_COPY_ABORTED = 0x0a,
+       SCSI_SENSE_ABORTED_COMMAND = 0x0b,
+       SCSI_SENSE_VOLUME_OVERFLOW = 0x0d,
+       SCSI_SENSE_MISCOMPARE = 0x0e,
+};
+
+enum scsi_asc {
+       SCSI_ASC_NO_ADDITIONAL_SENSE = 0x00,
+       SCSI_ASC_PERIPHERAL_DEVICE_WRITE_FAULT = 0x03,
+       SCSI_ASC_LOGICAL_UNIT_NOT_READY = 0x04,
+       SCSI_ASC_WARNING = 0x0b,
+       SCSI_ASC_LOGICAL_BLOCK_GUARD_CHECK_FAILED = 0x10,
+       SCSI_ASC_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED = 0x10,
+       SCSI_ASC_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED = 0x10,
+       SCSI_ASC_UNRECOVERED_READ_ERROR = 0x11,
+       SCSI_ASC_MISCOMPARE_DURING_VERIFY_OPERATION = 0x1d,
+       SCSI_ASC_INVALID_COMMAND_OPERATION_CODE = 0x20,
+       SCSI_ASC_ACCESS_DENIED = 0x20,
+       SCSI_ASC_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE = 0x21,
+       SCSI_ASC_INVALID_FIELD_IN_CDB = 0x24,
+       SCSI_ASC_LOGICAL_UNIT_NOT_SUPPORTED = 0x25,
+       SCSI_ASC_WRITE_PROTECTED = 0x27,
+       SCSI_ASC_FORMAT_COMMAND_FAILED = 0x31,
+       SCSI_ASC_INTERNAL_TARGET_FAILURE = 0x44,
+};
+
+enum scsi_ascq {
+       SCSI_ASCQ_CAUSE_NOT_REPORTABLE = 0x00,
+       SCSI_ASCQ_BECOMING_READY = 0x01,
+       SCSI_ASCQ_FORMAT_COMMAND_FAILED = 0x01,
+       SCSI_ASCQ_LOGICAL_BLOCK_GUARD_CHECK_FAILED = 0x01,
+       SCSI_ASCQ_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED = 0x02,
+       SCSI_ASCQ_NO_ACCESS_RIGHTS = 0x02,
+       SCSI_ASCQ_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED = 0x03,
+       SCSI_ASCQ_POWER_LOSS_EXPECTED = 0x08,
+       SCSI_ASCQ_INVALID_LU_IDENTIFIER = 0x09,
+};
+
+enum spc_opcode {
+       /* SPC3 related */
+       SPC_ACCESS_CONTROL_IN = 0x86,
+       SPC_ACCESS_CONTROL_OUT = 0x87,
+       SPC_EXTENDED_COPY = 0x83,
+       SPC_INQUIRY = 0x12,
+       SPC_LOG_SELECT = 0x4c,
+       SPC_LOG_SENSE = 0x4d,
+       SPC_MODE_SELECT_6 = 0x15,
+       SPC_MODE_SELECT_10 = 0x55,
+       SPC_MODE_SENSE_6 = 0x1a,
+       SPC_MODE_SENSE_10 = 0x5a,
+       SPC_PERSISTENT_RESERVE_IN = 0x5e,
+       SPC_PERSISTENT_RESERVE_OUT = 0x5f,
+       SPC_PREVENT_ALLOW_MEDIUM_REMOVAL = 0x1e,
+       SPC_READ_ATTRIBUTE = 0x8c,
+       SPC_READ_BUFFER = 0x3c,
+       SPC_RECEIVE_COPY_RESULTS = 0x84,
+       SPC_RECEIVE_DIAGNOSTIC_RESULTS = 0x1c,
+       SPC_REPORT_LUNS = 0xa0,
+       SPC_REQUEST_SENSE = 0x03,
+       SPC_SEND_DIAGNOSTIC = 0x1d,
+       SPC_TEST_UNIT_READY = 0x00,
+       SPC_WRITE_ATTRIBUTE = 0x8d,
+       SPC_WRITE_BUFFER = 0x3b,
+
+       SPC_SERVICE_ACTION_IN_12 = 0xab,
+       SPC_SERVICE_ACTION_OUT_12 = 0xa9,
+       SPC_SERVICE_ACTION_IN_16 = 0x9e,
+       SPC_SERVICE_ACTION_OUT_16 = 0x9f,
+
+       SPC_VARIABLE_LENGTH = 0x7f,
+
+       SPC_MO_CHANGE_ALIASES = 0x0b,
+       SPC_MO_SET_DEVICE_IDENTIFIER = 0x06,
+       SPC_MO_SET_PRIORITY = 0x0e,
+       SPC_MO_SET_TARGET_PORT_GROUPS = 0x0a,
+       SPC_MO_SET_TIMESTAMP = 0x0f,
+       SPC_MI_REPORT_ALIASES = 0x0b,
+       SPC_MI_REPORT_DEVICE_IDENTIFIER = 0x05,
+       SPC_MI_REPORT_PRIORITY = 0x0e,
+       SPC_MI_REPORT_SUPPORTED_OPERATION_CODES = 0x0c,
+       SPC_MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS = 0x0d,
+       SPC_MI_REPORT_TARGET_PORT_GROUPS = 0x0a,
+       SPC_MI_REPORT_TIMESTAMP = 0x0f,
+
+       /* SPC2 related (Obsolete) */
+       SPC2_RELEASE_6 = 0x17,
+       SPC2_RELEASE_10 = 0x57,
+       SPC2_RESERVE_6 = 0x16,
+       SPC2_RESERVE_10 = 0x56,
+};
+
+enum scc_opcode {
+       SCC_MAINTENANCE_IN = 0xa3,
+       SCC_MAINTENANCE_OUT = 0xa4,
+};
+
+enum sbc_opcode {
+       SBC_COMPARE_AND_WRITE = 0x89,
+       SBC_FORMAT_UNIT = 0x04,
+       SBC_GET_LBA_STATUS = 0x0012009e,
+       SBC_ORWRITE_16 = 0x8b,
+       SBC_PRE_FETCH_10 = 0x34,
+       SBC_PRE_FETCH_16 = 0x90,
+       SBC_READ_6 = 0x08,
+       SBC_READ_10 = 0x28,
+       SBC_READ_12 = 0xa8,
+       SBC_READ_16 = 0x88,
+       SBC_READ_ATTRIBUTE = 0x8c,
+       SBC_READ_BUFFER = 0x3c,
+       SBC_READ_CAPACITY_10 = 0x25,
+       SBC_READ_DEFECT_DATA_10 = 0x37,
+       SBC_READ_DEFECT_DATA_12 = 0xb7,
+       SBC_READ_LONG_10 = 0x3e,
+       SBC_REASSIGN_BLOCKS = 0x07,
+       SBC_SANITIZE = 0x48,
+       SBC_START_STOP_UNIT = 0x1b,
+       SBC_SYNCHRONIZE_CACHE_10 = 0x35,
+       SBC_SYNCHRONIZE_CACHE_16 = 0x91,
+       SBC_UNMAP = 0x42,
+       SBC_VERIFY_10 = 0x2f,
+       SBC_VERIFY_12 = 0xaf,
+       SBC_VERIFY_16 = 0x8f,
+       SBC_WRITE_6 = 0x0a,
+       SBC_WRITE_10 = 0x2a,
+       SBC_WRITE_12 = 0xaa,
+       SBC_WRITE_16 = 0x8a,
+       SBC_WRITE_AND_VERIFY_10 = 0x2e,
+       SBC_WRITE_AND_VERIFY_12 = 0xae,
+       SBC_WRITE_AND_VERIFY_16 = 0x8e,
+       SBC_WRITE_LONG_10 = 0x3f,
+       SBC_WRITE_SAME_10 = 0x41,
+       SBC_WRITE_SAME_16 = 0x93,
+       SBC_XDREAD_10 = 0x52,
+       SBC_XDWRITE_10 = 0x50,
+       SBC_XDWRITEREAD_10 = 0x53,
+       SBC_XPWRITE_10 = 0x51,
+
+       SBC_SAI_READ_CAPACITY_16 = 0x10,
+       SBC_SAI_READ_LONG_16 = 0x11,
+       SBC_SAO_WRITE_LONG_16 = 0x11,
+
+       SBC_VL_READ_32 = 0x0009,
+       SBC_VL_VERIFY_32 = 0x000a,
+       SBC_VL_WRITE_32 = 0x000b,
+       SBC_VL_WRITE_AND_VERIFY_32 = 0x000c,
+       SBC_VL_WRITE_SAME_32 = 0x000d,
+       SBC_VL_XDREAD_32 = 0x0003,
+       SBC_VL_XDWRITE_32 = 0x0004,
+       SBC_VL_XDWRITEREAD_32 = 0x0007,
+       SBC_VL_XPWRITE_32 = 0x0006,
+};
+
+enum mmc_opcode {
+       /* MMC6 */
+       MMC_READ_DISC_STRUCTURE = 0xad,
+
+       /* MMC4 */
+       MMC_BLANK = 0xa1,
+       MMC_CLOSE_TRACK_SESSION = 0x5b,
+       MMC_ERASE_10 = 0x2c,
+       MMC_FORMAT_UNIT = 0x04,
+       MMC_GET_CONFIGURATION = 0x46,
+       MMC_GET_EVENT_STATUS_NOTIFICATION = 0x4a,
+       MMC_GET_PERFORMANCE = 0xac,
+       MMC_INQUIRY = 0x12,
+       MMC_LOAD_UNLOAD_MEDIUM = 0xa6,
+       MMC_MECHANISM_STATUS = 0xbd,
+       MMC_MODE_SELECT_10 = 0x55,
+       MMC_MODE_SENSE_10 = 0x5a,
+       MMC_PAUSE_RESUME = 0x4b,
+       MMC_PLAY_AUDIO_10 = 0x45,
+       MMC_PLAY_AUDIO_12 = 0xa5,
+       MMC_PLAY_AUDIO_MSF = 0x47,
+       MMC_PREVENT_ALLOW_MEDIUM_REMOVAL = 0x1e,
+       MMC_READ_10 = 0x28,
+       MMC_READ_12 = 0xa8,
+       MMC_READ_BUFFER = 0x3c,
+       MMC_READ_BUFFER_CAPACITY = 0x5c,
+       MMC_READ_CAPACITY = 0x25,
+       MMC_READ_CD = 0xbe,
+       MMC_READ_CD_MSF = 0xb9,
+       MMC_READ_DISC_INFORMATION = 0x51,
+       MMC_READ_DVD_STRUCTURE = 0xad,
+       MMC_READ_FORMAT_CAPACITIES = 0x23,
+       MMC_READ_SUB_CHANNEL = 0x42,
+       MMC_READ_TOC_PMA_ATIP = 0x43,
+       MMC_READ_TRACK_INFORMATION = 0x52,
+       MMC_REPAIR_TRACK = 0x58,
+       MMC_REPORT_KEY = 0xa4,
+       MMC_REQUEST_SENSE = 0x03,
+       MMC_RESERVE_TRACK = 0x53,
+       MMC_SCAN = 0xba,
+       MMC_SEEK_10 = 0x2b,
+       MMC_SEND_CUE_SHEET = 0x5d,
+       MMC_SEND_DVD_STRUCTURE = 0xbf,
+       MMC_SEND_KEY = 0xa3,
+       MMC_SEND_OPC_INFORMATION = 0x54,
+       MMC_SET_CD_SPEED = 0xbb,
+       MMC_SET_READ_AHEAD = 0xa7,
+       MMC_SET_STREAMING = 0xb6,
+       MMC_START_STOP_UNIT = 0x1b,
+       MMC_STOP_PLAY_SCAN = 0x4e,
+       MMC_SYNCHRONIZE_CACHE = 0x35,
+       MMC_TEST_UNIT_READY = 0x00,
+       MMC_VERIFY_10 = 0x2f,
+       MMC_WRITE_10 = 0xa2,
+       MMC_WRITE_12 = 0xaa,
+       MMC_WRITE_AND_VERIFY_10 = 0x2e,
+       MMC_WRITE_BUFFER = 0x3b,
+};
+
+enum ssc_opcode {
+       SSC_ERASE_6 = 0x19,
+       SSC_FORMAT_MEDIUM = 0x04,
+       SSC_LOAD_UNLOAD = 0x1b,
+       SSC_LOCATE_10 = 0x2b,
+       SSC_LOCATE_16 = 0x92,
+       SSC_MOVE_MEDIUM_ATTACHED = 0xa7,
+       SSC_READ_6 = 0x08,
+       SSC_READ_BLOCK_LIMITS = 0x05,
+       SSC_READ_ELEMENT_STATUS_ATTACHED = 0xb4,
+       SSC_READ_POSITION = 0x34,
+       SSC_READ_REVERSE_6 = 0x0f,
+       SSC_RECOVER_BUFFERED_DATA = 0x14,
+       SSC_REPORT_DENSITY_SUPPORT = 0x44,
+       SSC_REWIND = 0x01,
+       SSC_SET_CAPACITY = 0x0b,
+       SSC_SPACE_6 = 0x11,
+       SSC_SPACE_16 = 0x91,
+       SSC_VERIFY_6 = 0x13,
+       SSC_WRITE_6 = 0x0a,
+       SSC_WRITE_FILEMARKS_6 = 0x10,
+};
+
+enum spc_vpd {
+       SPC_VPD_DEVICE_IDENTIFICATION = 0x83,
+       SPC_VPD_EXTENDED_INQUIRY_DATA = 0x86,
+       SPC_VPD_MANAGEMENT_NETWORK_ADDRESSES = 0x85,
+       SPC_VPD_MODE_PAGE_POLICY = 0x87,
+       SPC_VPD_SCSI_PORTS = 0x88,
+       SPC_VPD_SOFTWARE_INTERFACE_IDENTIFICATION = 0x84,
+       SPC_VPD_SUPPORTED_VPD_PAGES = 0x00,
+       SPC_VPD_UNIT_SERIAL_NUMBER = 0x80,
+       SPC_VPD_BLOCK_LIMITS = 0xb0,
+       SPC_VPD_BLOCK_DEV_CHARS = 0xb1,
+       SPC_VPD_BLOCK_THIN_PROVISION = 0xb2,
+};
+
+enum {
+       SPC_PERIPHERAL_DEVICE_TYPE_DISK = 0x00,
+       SPC_PERIPHERAL_DEVICE_TYPE_TAPE = 0x01,
+       SPC_PERIPHERAL_DEVICE_TYPE_DVD = 0x05,
+       SPC_PERIPHERAL_DEVICE_TYPE_CHANGER = 0x08,
+
+       SPC_VERSION_NONE = 0x00,
+       SPC_VERSION_SPC = 0x03,
+       SPC_VERSION_SPC2 = 0x04,
+       SPC_VERSION_SPC3 = 0x05,
+       SPC_VERSION_SPC4 = 0x06,
+
+       SPC_PROTOCOL_IDENTIFIER_FC = 0x00,
+       SPC_PROTOCOL_IDENTIFIER_PSCSI = 0x01,
+       SPC_PROTOCOL_IDENTIFIER_SSA = 0x02,
+       SPC_PROTOCOL_IDENTIFIER_IEEE1394 = 0x03,
+       SPC_PROTOCOL_IDENTIFIER_RDMA = 0x04,
+       SPC_PROTOCOL_IDENTIFIER_ISCSI = 0x05,
+       SPC_PROTOCOL_IDENTIFIER_SAS = 0x06,
+       SPC_PROTOCOL_IDENTIFIER_ADT = 0x07,
+       SPC_PROTOCOL_IDENTIFIER_ATA = 0x08,
+
+       SPC_VPD_CODE_SET_BINARY = 0x01,
+       SPC_VPD_CODE_SET_ASCII = 0x02,
+       SPC_VPD_CODE_SET_UTF8 = 0x03,
+
+       SPC_VPD_ASSOCIATION_LOGICAL_UNIT = 0x00,
+       SPC_VPD_ASSOCIATION_TARGET_PORT = 0x01,
+       SPC_VPD_ASSOCIATION_TARGET_DEVICE = 0x02,
+
+       SPC_VPD_IDENTIFIER_TYPE_VENDOR_SPECIFIC = 0x00,
+       SPC_VPD_IDENTIFIER_TYPE_T10_VENDOR_ID = 0x01,
+       SPC_VPD_IDENTIFIER_TYPE_EUI64 = 0x02,
+       SPC_VPD_IDENTIFIER_TYPE_NAA = 0x03,
+       SPC_VPD_IDENTIFIER_TYPE_RELATIVE_TARGET_PORT = 0x04,
+       SPC_VPD_IDENTIFIER_TYPE_TARGET_PORT_GROUP = 0x05,
+       SPC_VPD_IDENTIFIER_TYPE_LOGICAL_UNIT_GROUP = 0x06,
+       SPC_VPD_IDENTIFIER_TYPE_MD5_LOGICAL_UNIT = 0x07,
+       SPC_VPD_IDENTIFIER_TYPE_SCSI_NAME = 0x08,
+};
+
+struct scsi_cdb_inquiry {
+       uint8_t opcode;
+       uint8_t evpd;
+       uint8_t page_code;
+       uint16_t alloc_len;
+       uint8_t control;
+};
+
+struct scsi_cdb_inquiry_data {
+       uint8_t peripheral;
+       uint8_t rmb;
+       uint8_t version;
+       uint8_t response;
+       uint8_t add_len;
+       uint8_t flags;
+       uint8_t flags2;
+       uint8_t flags3;
+       uint8_t t10_vendor_id[8];
+       uint8_t product_id[16];
+       uint8_t product_rev[4];
+       uint8_t vendor[20];
+       uint8_t ius;
+       uint8_t reserved;
+       uint8_t desc[];
+};
+
+struct scsi_vpd_page {
+       uint8_t peripheral;
+       uint8_t page_code;
+       uint16_t alloc_len;
+       uint8_t params[];
+};
+
+#define SCSI_VEXT_REF_CHK              0x01
+#define SCSI_VEXT_APP_CHK              0x02
+#define SCSI_VEXT_GRD_CHK              0x04
+#define SCSI_VEXT_SIMPSUP              0x01
+#define SCSI_VEXT_ORDSUP               0x02
+#define SCSI_VEXT_HEADSUP              0x04
+#define SCSI_VEXT_PRIOR_SUP    0x08
+#define SCSI_VEXT_GROUP_SUP    0x10
+#define SCSI_VEXT_UASK_SUP             0x20
+#define SCSI_VEXT_V_SUP                0x01
+#define SCSI_VEXT_NV_SUP               0x02
+#define SCSI_VEXT_CRD_SUP              0x04
+#define SCSI_VEXT_WU_SUP               0x08
+
+struct scsi_vpd_ext_inquiry {
+       uint8_t peripheral;
+       uint8_t page_code;
+       uint16_t alloc_len;
+       uint8_t check;
+       uint8_t sup;
+       uint8_t sup2;
+       uint8_t luiclr;
+       uint8_t cbcs;
+       uint8_t micro_dl;
+       uint8_t reserved[54];
+};
+
+#define SPC_VPD_DESIG_PIV      0x80
+
+/* designation descriptor */
+struct scsi_desig_desc {
+       uint8_t code_set        : 4;
+       uint8_t protocol_id     : 4;
+       uint8_t type            : 4;
+       uint8_t association     : 2;
+       uint8_t reserved0       : 1;
+       uint8_t piv             : 1;
+       uint8_t reserved1;
+       uint8_t len;
+       uint8_t desig[];
+};
+
+/* mode page policy descriptor */
+struct scsi_mpage_policy_desc {
+       uint8_t page_code;
+       uint8_t sub_page_code;
+       uint8_t policy;
+       uint8_t reserved;
+};
+
+/* target port descriptor */
+struct scsi_tgt_port_desc {
+       uint8_t code_set;
+       uint8_t desig_type;
+       uint8_t reserved;
+       uint8_t len;
+       uint8_t designator[];
+};
+
+/* SCSI port designation descriptor */
+struct scsi_port_desc {
+       uint16_t reserved;
+       uint16_t rel_port_id;
+       uint16_t reserved2;
+       uint16_t init_port_len;
+       uint16_t init_port_id;
+       uint16_t reserved3;
+       uint16_t tgt_desc_len;
+       uint8_t tgt_desc[];
+};
+
+/* SCSI UNMAP block descriptor */
+struct scsi_unmap_bdesc {
+       /* UNMAP LOGICAL BLOCK ADDRESS */
+       uint64_t lba;
+
+       /* NUMBER OF LOGICAL BLOCKS */
+       uint32_t block_count;
+
+       /* RESERVED */
+       uint32_t reserved;
+};
+
+#define SCSI_UNMAP_LBPU                                (1 << 7)
+#define SCSI_UNMAP_LBPWS                       (1 << 6)
+#define SCSI_UNMAP_LBPWS10                     (1 << 5)
+
+#define SCSI_UNMAP_FULL_PROVISIONING   0x00
+#define SCSI_UNMAP_RESOURCE_PROVISIONING       0x01
+#define SCSI_UNMAP_THIN_PROVISIONING   0x02
+
+#endif /* _SCSI_SPEC_H */
diff --git a/examples/vhost_scsi/vhost_scsi.c b/examples/vhost_scsi/vhost_scsi.c
new file mode 100644 (file)
index 0000000..b4f1f8d
--- /dev/null
@@ -0,0 +1,474 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <semaphore.h>
+#include <linux/virtio_scsi.h>
+#include <linux/virtio_ring.h>
+
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_vhost.h>
+
+#include "vhost_scsi.h"
+#include "scsi_spec.h"
+
+#define VIRTIO_SCSI_FEATURES ((1 << VIRTIO_F_NOTIFY_ON_EMPTY) |\
+                             (1 << VIRTIO_RING_F_EVENT_IDX) |\
+                             (1 << VIRTIO_SCSI_F_INOUT) |\
+                             (1 << VIRTIO_SCSI_F_CHANGE))
+
+/* Path to folder where character device will be created. Can be set by user. */
+static char dev_pathname[PATH_MAX] = "";
+
+static struct vhost_scsi_ctrlr *g_vhost_ctrlr;
+static int g_should_stop;
+static sem_t exit_sem;
+
+static struct vhost_scsi_ctrlr *
+vhost_scsi_ctrlr_find(__rte_unused const char *ctrlr_name)
+{
+       /* currently we only support 1 socket file fd */
+       return g_vhost_ctrlr;
+}
+
+static uint64_t gpa_to_vva(int vid, uint64_t gpa)
+{
+       char path[PATH_MAX];
+       struct vhost_scsi_ctrlr *ctrlr;
+       int ret = 0;
+
+       ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
+       if (ret) {
+               fprintf(stderr, "Cannot get socket name\n");
+               assert(ret != 0);
+       }
+
+       ctrlr = vhost_scsi_ctrlr_find(path);
+       if (!ctrlr) {
+               fprintf(stderr, "Controller is not ready\n");
+               assert(ctrlr != NULL);
+       }
+
+       assert(ctrlr->mem != NULL);
+
+       return rte_vhost_gpa_to_vva(ctrlr->mem, gpa);
+}
+
+static struct vring_desc *
+descriptor_get_next(struct vring_desc *vq_desc, struct vring_desc *cur_desc)
+{
+       return &vq_desc[cur_desc->next];
+}
+
+static bool
+descriptor_has_next(struct vring_desc *cur_desc)
+{
+       return !!(cur_desc->flags & VRING_DESC_F_NEXT);
+}
+
+static bool
+descriptor_is_wr(struct vring_desc *cur_desc)
+{
+       return !!(cur_desc->flags & VRING_DESC_F_WRITE);
+}
+
+static void
+submit_completion(struct vhost_scsi_task *task)
+{
+       struct rte_vhost_vring *vq;
+       struct vring_used *used;
+
+       vq = task->vq;
+       used = vq->used;
+       /* Fill out the next entry in the "used" ring.  id = the
+        * index of the descriptor that contained the SCSI request.
+        * len = the total amount of data transferred for the SCSI
+        * request. We must report the correct len, for variable
+        * length SCSI CDBs, where we may return less data than
+        * allocated by the guest VM.
+        */
+       used->ring[used->idx & (vq->size - 1)].id = task->req_idx;
+       used->ring[used->idx & (vq->size - 1)].len = task->data_len;
+       used->idx++;
+
+       /* Send an interrupt back to the guest VM so that it knows
+        * a completion is ready to be processed.
+        */
+       eventfd_write(vq->callfd, (eventfd_t)1);
+}
+
+static void
+vhost_process_read_payload_chain(struct vhost_scsi_task *task)
+{
+       void *data;
+
+       task->iovs_cnt = 0;
+       task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
+                                                  task->desc->addr);
+
+       while (descriptor_has_next(task->desc)) {
+               task->desc = descriptor_get_next(task->vq->desc, task->desc);
+               data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
+                                                    task->desc->addr);
+               task->iovs[task->iovs_cnt].iov_base = data;
+               task->iovs[task->iovs_cnt].iov_len = task->desc->len;
+               task->data_len += task->desc->len;
+               task->iovs_cnt++;
+       }
+}
+
+static void
+vhost_process_write_payload_chain(struct vhost_scsi_task *task)
+{
+       void *data;
+
+       task->iovs_cnt = 0;
+
+       do {
+               data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
+                                                    task->desc->addr);
+               task->iovs[task->iovs_cnt].iov_base = data;
+               task->iovs[task->iovs_cnt].iov_len = task->desc->len;
+               task->data_len += task->desc->len;
+               task->iovs_cnt++;
+               task->desc = descriptor_get_next(task->vq->desc, task->desc);
+       } while (descriptor_has_next(task->desc));
+
+       task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
+                                                  task->desc->addr);
+}
+
+static struct vhost_block_dev *
+vhost_scsi_bdev_construct(const char *bdev_name, const char *bdev_serial,
+                         uint32_t blk_size, uint64_t blk_cnt,
+                         bool wce_enable)
+{
+       struct vhost_block_dev *bdev;
+
+       bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE);
+       if (!bdev)
+               return NULL;
+
+       strncpy(bdev->name, bdev_name, sizeof(bdev->name));
+       strncpy(bdev->product_name, bdev_serial, sizeof(bdev->product_name));
+       bdev->blocklen = blk_size;
+       bdev->blockcnt = blk_cnt;
+       bdev->write_cache = wce_enable;
+
+       /* use memory as disk storage space */
+       bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0);
+       if (!bdev->data) {
+               fprintf(stderr, "no enough reseverd huge memory for disk\n");
+               return NULL;
+       }
+
+       return bdev;
+}
+
+static void
+process_requestq(struct vhost_scsi_ctrlr *ctrlr, uint32_t q_idx)
+{
+       int ret;
+       struct vhost_scsi_queue *scsi_vq;
+       struct rte_vhost_vring *vq;
+
+       scsi_vq = &ctrlr->bdev->queues[q_idx];
+       vq = &scsi_vq->vq;
+       ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, q_idx, vq);
+       assert(ret == 0);
+
+       while (vq->avail->idx != scsi_vq->last_used_idx) {
+               int req_idx;
+               uint16_t last_idx;
+               struct vhost_scsi_task *task;
+
+               last_idx = scsi_vq->last_used_idx & (vq->size - 1);
+               req_idx = vq->avail->ring[last_idx];
+
+               task = rte_zmalloc(NULL, sizeof(*task), 0);
+               assert(task != NULL);
+
+               task->ctrlr = ctrlr;
+               task->bdev = ctrlr->bdev;
+               task->vq = vq;
+               task->req_idx = req_idx;
+               task->desc = &task->vq->desc[task->req_idx];
+
+               /* does not support indirect descriptors */
+               assert((task->desc->flags & VRING_DESC_F_INDIRECT) == 0);
+               scsi_vq->last_used_idx++;
+
+               task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
+                                                         task->desc->addr);
+
+               task->desc = descriptor_get_next(task->vq->desc, task->desc);
+               if (!descriptor_has_next(task->desc)) {
+                       task->dxfer_dir = SCSI_DIR_NONE;
+                       task->resp = (void *)(uintptr_t)
+                                             gpa_to_vva(task->bdev->vid,
+                                                        task->desc->addr);
+
+               } else if (!descriptor_is_wr(task->desc)) {
+                       task->dxfer_dir = SCSI_DIR_TO_DEV;
+                       vhost_process_write_payload_chain(task);
+               } else {
+                       task->dxfer_dir = SCSI_DIR_FROM_DEV;
+                       vhost_process_read_payload_chain(task);
+               }
+
+               ret = vhost_bdev_process_scsi_commands(ctrlr->bdev, task);
+               if (ret) {
+                       /* invalid response */
+                       task->resp->response = VIRTIO_SCSI_S_BAD_TARGET;
+               } else {
+                       /* successfully */
+                       task->resp->response = VIRTIO_SCSI_S_OK;
+                       task->resp->status = 0;
+                       task->resp->resid = 0;
+               }
+               submit_completion(task);
+               rte_free(task);
+       }
+}
+
+/* Main framework for processing IOs */
+static void *
+ctrlr_worker(void *arg)
+{
+       uint32_t idx, num;
+       struct vhost_scsi_ctrlr *ctrlr = (struct vhost_scsi_ctrlr *)arg;
+       cpu_set_t cpuset;
+       pthread_t thread;
+
+       thread = pthread_self();
+       CPU_ZERO(&cpuset);
+       CPU_SET(0, &cpuset);
+       pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
+
+       num =  rte_vhost_get_vring_num(ctrlr->bdev->vid);
+       fprintf(stdout, "Ctrlr Worker Thread Started with %u Vring\n", num);
+
+       if (num != NUM_OF_SCSI_QUEUES) {
+               fprintf(stderr, "Only 1 IO queue are supported\n");
+               exit(0);
+       }
+
+       while (!g_should_stop && ctrlr->bdev != NULL) {
+               /* At least 3 vrings, currently only can support 1 IO queue
+                * Queue 2 for IO queue, does not support TMF and hotplug
+                * for the example application now
+                */
+               for (idx = 2; idx < num; idx++)
+                       process_requestq(ctrlr, idx);
+       }
+
+       fprintf(stdout, "Ctrlr Worker Thread Exiting\n");
+       sem_post(&exit_sem);
+       return NULL;
+}
+
+static int
+new_device(int vid)
+{
+       char path[PATH_MAX];
+       struct vhost_scsi_ctrlr *ctrlr;
+       struct vhost_scsi_queue *scsi_vq;
+       struct rte_vhost_vring *vq;
+       pthread_t tid;
+       int i, ret;
+
+       ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
+       if (ret) {
+               fprintf(stderr, "Cannot get socket name\n");
+               return -1;
+       }
+
+       ctrlr = vhost_scsi_ctrlr_find(path);
+       if (!ctrlr) {
+               fprintf(stderr, "Controller is not ready\n");
+               return -1;
+       }
+
+       ret = rte_vhost_get_mem_table(vid, &ctrlr->mem);
+       if (ret) {
+               fprintf(stderr, "Get Controller memory region failed\n");
+               return -1;
+       }
+       assert(ctrlr->mem != NULL);
+
+       /* hardcoded block device information with 128MiB */
+       ctrlr->bdev = vhost_scsi_bdev_construct("malloc0", "vhost_scsi_malloc0",
+                                               4096, 32768, 0);
+       if (!ctrlr->bdev)
+               return -1;
+
+       ctrlr->bdev->vid = vid;
+
+       /* Disable Notifications */
+       for (i = 0; i < NUM_OF_SCSI_QUEUES; i++) {
+               rte_vhost_enable_guest_notification(vid, i, 0);
+               /* restore used index */
+               scsi_vq = &ctrlr->bdev->queues[i];
+               vq = &scsi_vq->vq;
+               ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq);
+               assert(ret == 0);
+               scsi_vq->last_used_idx = vq->used->idx;
+               scsi_vq->last_avail_idx = vq->used->idx;
+       }
+
+       g_should_stop = 0;
+       fprintf(stdout, "New Device %s, Device ID %d\n", path, vid);
+       if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) {
+               fprintf(stderr, "Worker Thread Started Failed\n");
+               return -1;
+       }
+       pthread_detach(tid);
+       return 0;
+}
+
+static void
+destroy_device(int vid)
+{
+       char path[PATH_MAX];
+       struct vhost_scsi_ctrlr *ctrlr;
+
+       rte_vhost_get_ifname(vid, path, PATH_MAX);
+       fprintf(stdout, "Destroy %s Device ID %d\n", path, vid);
+       ctrlr = vhost_scsi_ctrlr_find(path);
+       if (!ctrlr) {
+               fprintf(stderr, "Destroy Ctrlr Failed\n");
+               return;
+       }
+       ctrlr->bdev = NULL;
+       g_should_stop = 1;
+
+       sem_wait(&exit_sem);
+}
+
+static const struct vhost_device_ops vhost_scsi_device_ops = {
+       .new_device =  new_device,
+       .destroy_device = destroy_device,
+};
+
+static struct vhost_scsi_ctrlr *
+vhost_scsi_ctrlr_construct(const char *ctrlr_name)
+{
+       int ret;
+       struct vhost_scsi_ctrlr *ctrlr;
+       char *path;
+       char cwd[PATH_MAX];
+
+       /* always use current directory */
+       path = getcwd(cwd, PATH_MAX);
+       if (!path) {
+               fprintf(stderr, "Cannot get current working directory\n");
+               return NULL;
+       }
+       snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name);
+
+       if (access(dev_pathname, F_OK) != -1) {
+               if (unlink(dev_pathname) != 0)
+                       rte_exit(EXIT_FAILURE, "Cannot remove %s.\n",
+                                dev_pathname);
+       }
+
+       if (rte_vhost_driver_register(dev_pathname, 0) != 0) {
+               fprintf(stderr, "socket %s already exists\n", dev_pathname);
+               return NULL;
+       }
+
+       fprintf(stdout, "socket file: %s created\n", dev_pathname);
+
+       ret = rte_vhost_driver_set_features(dev_pathname, VIRTIO_SCSI_FEATURES);
+       if (ret != 0) {
+               fprintf(stderr, "Set vhost driver features failed\n");
+               return NULL;
+       }
+
+       ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE);
+       if (!ctrlr)
+               return NULL;
+
+       rte_vhost_driver_callback_register(dev_pathname,
+                                          &vhost_scsi_device_ops);
+
+       return ctrlr;
+}
+
+static void
+signal_handler(__rte_unused int signum)
+{
+
+       if (access(dev_pathname, F_OK) == 0)
+               unlink(dev_pathname);
+       exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+       int ret;
+
+       signal(SIGINT, signal_handler);
+
+       /* init EAL */
+       ret = rte_eal_init(argc, argv);
+       if (ret < 0)
+               rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+
+       g_vhost_ctrlr = vhost_scsi_ctrlr_construct("vhost.socket");
+       if (g_vhost_ctrlr == NULL) {
+               fprintf(stderr, "Construct vhost scsi controller failed\n");
+               return 0;
+       }
+
+       if (sem_init(&exit_sem, 0, 0) < 0) {
+               fprintf(stderr, "Error init exit_sem\n");
+               return -1;
+       }
+
+       rte_vhost_driver_start(dev_pathname);
+
+       /* loop for exit the application */
+       while (1)
+               sleep(1);
+
+       return 0;
+}
+
diff --git a/examples/vhost_scsi/vhost_scsi.h b/examples/vhost_scsi/vhost_scsi.h
new file mode 100644 (file)
index 0000000..edb416d
--- /dev/null
@@ -0,0 +1,108 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_SCSI_H_
+#define _VHOST_SCSI_H_
+
+#include <sys/uio.h>
+#include <stdint.h>
+#include <linux/virtio_scsi.h>
+#include <linux/virtio_ring.h>
+
+#include <rte_vhost.h>
+
+struct vhost_scsi_queue {
+       struct rte_vhost_vring vq;
+       uint16_t last_avail_idx;
+       uint16_t last_used_idx;
+};
+
+#define NUM_OF_SCSI_QUEUES 3
+
+struct vhost_block_dev {
+       /** ID for vhost library. */
+       int vid;
+       /** Queues for the block device */
+       struct vhost_scsi_queue queues[NUM_OF_SCSI_QUEUES];
+       /** Unique name for this block device. */
+       char name[64];
+
+       /** Unique product name for this kind of block device. */
+       char product_name[256];
+
+       /** Size in bytes of a logical block for the backend */
+       uint32_t blocklen;
+
+       /** Number of blocks */
+       uint64_t blockcnt;
+
+       /** write cache enabled, not used at the moment */
+       int write_cache;
+
+       /** use memory as disk storage space */
+       uint8_t *data;
+};
+
+struct vhost_scsi_ctrlr {
+       /** Only support 1 LUN for the example */
+       struct vhost_block_dev *bdev;
+       /** VM memory region */
+       struct rte_vhost_memory *mem;
+} __rte_cache_aligned;
+
+#define VHOST_SCSI_MAX_IOVS 128
+
+enum scsi_data_dir {
+       SCSI_DIR_NONE = 0,
+       SCSI_DIR_TO_DEV = 1,
+       SCSI_DIR_FROM_DEV = 2,
+};
+
+struct vhost_scsi_task {
+       int req_idx;
+       uint32_t dxfer_dir;
+       uint32_t data_len;
+       struct virtio_scsi_cmd_req *req;
+       struct virtio_scsi_cmd_resp *resp;
+       struct iovec iovs[VHOST_SCSI_MAX_IOVS];
+       uint32_t iovs_cnt;
+       struct vring_desc *desc;
+       struct rte_vhost_vring *vq;
+       struct vhost_block_dev *bdev;
+       struct vhost_scsi_ctrlr *ctrlr;
+};
+
+int vhost_bdev_process_scsi_commands(struct vhost_block_dev *bdev,
+                                    struct vhost_scsi_task *task);
+
+#endif /* _VHOST_SCSI_H_ */