examples/vhost: register with lib
[dpdk.git] / examples / vhost / main.c
index c81b8f5..fdd1c81 100644 (file)
 #include <rte_log.h>
 #include <rte_string_fns.h>
 #include <rte_malloc.h>
+#include <rte_virtio_net.h>
 
 #include "main.h"
-#include "virtio-net.h"
-#include "vhost-net-cdev.h"
 
 #define MAX_QUEUES 128
 
@@ -218,8 +217,6 @@ static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
 /* Character device basename. Can be set by user. */
 static char dev_basename[MAX_BASENAME_SZ] = "vhost-net";
 
-/* Charater device index. Can be set by user. */
-static uint32_t dev_index = 0;
 
 /* This can be set by the user so it is made available here. */
 extern uint64_t VHOST_FEATURES;
@@ -560,7 +557,7 @@ us_vhost_usage(const char *prgname)
        RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n"
        "               --vm2vm [0|1|2]\n"
        "               --rx_retry [0|1] --mergeable [0|1] --stats [0-N]\n"
-       "               --dev-basename <name> --dev-index [0-N]\n"
+       "               --dev-basename <name>\n"
        "               --nb-devices ND\n"
        "               -p PORTMASK: Set mask for ports to be used by application\n"
        "               --vm2vm [0|1|2]: disable/software(default)/hardware vm2vm comms\n"
@@ -570,7 +567,6 @@ us_vhost_usage(const char *prgname)
        "               --mergeable [0|1]: disable(default)/enable RX mergeable buffers\n"
        "               --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n"
        "               --dev-basename: The basename to be used for the character device.\n"
-       "               --dev-index [0-N]: Defaults to zero if not used. Index is appended to basename.\n"
        "               --zero-copy [0|1]: disable(default)/enable rx/tx "
                        "zero copy\n"
        "               --rx-desc-num [0-N]: the number of descriptors on rx, "
@@ -598,7 +594,6 @@ us_vhost_parse_args(int argc, char **argv)
                {"mergeable", required_argument, NULL, 0},
                {"stats", required_argument, NULL, 0},
                {"dev-basename", required_argument, NULL, 0},
-               {"dev-index", required_argument, NULL, 0},
                {"zero-copy", required_argument, NULL, 0},
                {"rx-desc-num", required_argument, NULL, 0},
                {"tx-desc-num", required_argument, NULL, 0},
@@ -708,17 +703,6 @@ us_vhost_parse_args(int argc, char **argv)
                                }
                        }
 
-                       /* Set character device index. */
-                       if (!strncmp(long_option[option_index].name, "dev-index", MAX_LONG_OPT_SZ)) {
-                               ret = parse_num_opt(optarg, INT32_MAX);
-                               if (ret == -1) {
-                                       RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for character device index [0..N]\n");
-                                       us_vhost_usage(prgname);
-                                       return -1;
-                               } else
-                                       dev_index = ret;
-                       }
-
                        /* Enable/disable rx/tx zero copy. */
                        if (!strncmp(long_option[option_index].name,
                                "zero-copy", MAX_LONG_OPT_SZ)) {
@@ -866,37 +850,12 @@ static unsigned check_ports_num(unsigned nb_ports)
 #define PRINT_PACKET(device, addr, size, header) do{} while(0)
 #endif
 
-/*
- * Function to convert guest physical addresses to vhost virtual addresses. This
- * is used to convert virtio buffer addresses.
- */
-static inline uint64_t __attribute__((always_inline))
-gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
-{
-       struct virtio_memory_regions *region;
-       uint32_t regionidx;
-       uint64_t vhost_va = 0;
-
-       for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
-               region = &dev->mem->regions[regionidx];
-               if ((guest_pa >= region->guest_phys_address) &&
-                       (guest_pa <= region->guest_phys_address_end)) {
-                       vhost_va = region->address_offset + guest_pa;
-                       break;
-               }
-       }
-       LOG_DEBUG(VHOST_DATA, "(%"PRIu64") GPA %p| VVA %p\n",
-               dev->device_fh, (void*)(uintptr_t)guest_pa, (void*)(uintptr_t)vhost_va);
-
-       return vhost_va;
-}
-
 /*
  * Function to convert guest physical addresses to vhost physical addresses.
  * This is used to convert virtio buffer addresses.
  */
 static inline uint64_t __attribute__((always_inline))
-gpa_to_hpa(struct virtio_net *dev, uint64_t guest_pa,
+gpa_to_hpa(struct vhost_dev  *vdev, uint64_t guest_pa,
        uint32_t buf_len, hpa_type *addr_type)
 {
        struct virtio_memory_regions_hpa *region;
@@ -905,8 +864,8 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t guest_pa,
 
        *addr_type = PHYS_ADDR_INVALID;
 
-       for (regionidx = 0; regionidx < dev->mem->nregions_hpa; regionidx++) {
-               region = &dev->mem->regions_hpa[regionidx];
+       for (regionidx = 0; regionidx < vdev->nregions_hpa; regionidx++) {
+               region = &vdev->regions_hpa[regionidx];
                if ((guest_pa >= region->guest_phys_address) &&
                        (guest_pa <= region->guest_phys_address_end)) {
                        vhost_pa = region->host_phys_addr_offset + guest_pa;
@@ -920,503 +879,12 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t guest_pa,
        }
 
        LOG_DEBUG(VHOST_DATA, "(%"PRIu64") GPA %p| HPA %p\n",
-               dev->device_fh, (void *)(uintptr_t)guest_pa,
+               vdev->dev->device_fh, (void *)(uintptr_t)guest_pa,
                (void *)(uintptr_t)vhost_pa);
 
        return vhost_pa;
 }
 
-/*
- * This function adds buffers to the virtio devices RX virtqueue. Buffers can
- * be received from the physical port or from another virtio device. A packet
- * count is returned to indicate the number of packets that were succesfully
- * added to the RX queue. This function works when mergeable is disabled.
- */
-static inline uint32_t __attribute__((always_inline))
-virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count)
-{
-       struct vhost_virtqueue *vq;
-       struct vring_desc *desc;
-       struct rte_mbuf *buff;
-       /* The virtio_hdr is initialised to 0. */
-       struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0,0,0,0,0,0},0};
-       uint64_t buff_addr = 0;
-       uint64_t buff_hdr_addr = 0;
-       uint32_t head[MAX_PKT_BURST], packet_len = 0;
-       uint32_t head_idx, packet_success = 0;
-       uint32_t retry = 0;
-       uint16_t avail_idx, res_cur_idx;
-       uint16_t res_base_idx, res_end_idx;
-       uint16_t free_entries;
-       uint8_t success = 0;
-
-       LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
-       vq = dev->virtqueue[VIRTIO_RXQ];
-       count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
-
-       /* As many data cores may want access to available buffers, they need to be reserved. */
-       do {
-               res_base_idx = vq->last_used_idx_res;
-               avail_idx = *((volatile uint16_t *)&vq->avail->idx);
-
-               free_entries = (avail_idx - res_base_idx);
-               /* If retry is enabled and the queue is full then we wait and retry to avoid packet loss. */
-               if (enable_retry && unlikely(count > free_entries)) {
-                       for (retry = 0; retry < burst_rx_retry_num; retry++) {
-                               rte_delay_us(burst_rx_delay_time);
-                               avail_idx =
-                                       *((volatile uint16_t *)&vq->avail->idx);
-                               free_entries = (avail_idx - res_base_idx);
-                               if (count <= free_entries)
-                                       break;
-                       }
-               }
-
-               /*check that we have enough buffers*/
-               if (unlikely(count > free_entries))
-                       count = free_entries;
-
-               if (count == 0)
-                       return 0;
-
-               res_end_idx = res_base_idx + count;
-               /* vq->last_used_idx_res is atomically updated. */
-               success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx,
-                                                                       res_end_idx);
-       } while (unlikely(success == 0));
-       res_cur_idx = res_base_idx;
-       LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_end_idx);
-
-       /* Prefetch available ring to retrieve indexes. */
-       rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);
-
-       /* Retrieve all of the head indexes first to avoid caching issues. */
-       for (head_idx = 0; head_idx < count; head_idx++)
-               head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)];
-
-       /*Prefetch descriptor index. */
-       rte_prefetch0(&vq->desc[head[packet_success]]);
-
-       while (res_cur_idx != res_end_idx) {
-               /* Get descriptor from available ring */
-               desc = &vq->desc[head[packet_success]];
-
-               buff = pkts[packet_success];
-
-               /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */
-               buff_addr = gpa_to_vva(dev, desc->addr);
-               /* Prefetch buffer address. */
-               rte_prefetch0((void*)(uintptr_t)buff_addr);
-
-               /* Copy virtio_hdr to packet and increment buffer address */
-               buff_hdr_addr = buff_addr;
-               packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
-
-               /*
-                * If the descriptors are chained the header and data are
-                * placed in separate buffers.
-                */
-               if (desc->flags & VRING_DESC_F_NEXT) {
-                       desc->len = vq->vhost_hlen;
-                       desc = &vq->desc[desc->next];
-                       /* Buffer address translation. */
-                       buff_addr = gpa_to_vva(dev, desc->addr);
-                       desc->len = rte_pktmbuf_data_len(buff);
-               } else {
-                       buff_addr += vq->vhost_hlen;
-                       desc->len = packet_len;
-               }
-
-               /* Update used ring with desc information */
-               vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success];
-               vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
-
-               /* Copy mbuf data to buffer */
-               rte_memcpy((void *)(uintptr_t)buff_addr,
-                       rte_pktmbuf_mtod(buff, const void *),
-                       rte_pktmbuf_data_len(buff));
-               PRINT_PACKET(dev, (uintptr_t)buff_addr,
-                       rte_pktmbuf_data_len(buff), 0);
-
-               res_cur_idx++;
-               packet_success++;
-
-               rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
-                       (const void *)&virtio_hdr, vq->vhost_hlen);
-
-               PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
-
-               if (res_cur_idx < res_end_idx) {
-                       /* Prefetch descriptor index. */
-                       rte_prefetch0(&vq->desc[head[packet_success]]);
-               }
-       }
-
-       rte_compiler_barrier();
-
-       /* Wait until it's our turn to add our buffer to the used ring. */
-       while (unlikely(vq->last_used_idx != res_base_idx))
-               rte_pause();
-
-       *(volatile uint16_t *)&vq->used->idx += count;
-       vq->last_used_idx = res_end_idx;
-
-       /* Kick the guest if necessary. */
-       if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
-               eventfd_write((int)vq->kickfd, 1);
-       return count;
-}
-
-static inline uint32_t __attribute__((always_inline))
-copy_from_mbuf_to_vring(struct virtio_net *dev,
-       uint16_t res_base_idx, uint16_t res_end_idx,
-       struct rte_mbuf *pkt)
-{
-       uint32_t vec_idx = 0;
-       uint32_t entry_success = 0;
-       struct vhost_virtqueue *vq;
-       /* The virtio_hdr is initialised to 0. */
-       struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {
-               {0, 0, 0, 0, 0, 0}, 0};
-       uint16_t cur_idx = res_base_idx;
-       uint64_t vb_addr = 0;
-       uint64_t vb_hdr_addr = 0;
-       uint32_t seg_offset = 0;
-       uint32_t vb_offset = 0;
-       uint32_t seg_avail;
-       uint32_t vb_avail;
-       uint32_t cpy_len, entry_len;
-
-       if (pkt == NULL)
-               return 0;
-
-       LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| "
-               "End Index %d\n",
-               dev->device_fh, cur_idx, res_end_idx);
-
-       /*
-        * Convert from gpa to vva
-        * (guest physical addr -> vhost virtual addr)
-        */
-       vq = dev->virtqueue[VIRTIO_RXQ];
-       vb_addr =
-               gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
-       vb_hdr_addr = vb_addr;
-
-       /* Prefetch buffer address. */
-       rte_prefetch0((void *)(uintptr_t)vb_addr);
-
-       virtio_hdr.num_buffers = res_end_idx - res_base_idx;
-
-       LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n",
-               dev->device_fh, virtio_hdr.num_buffers);
-
-       rte_memcpy((void *)(uintptr_t)vb_hdr_addr,
-               (const void *)&virtio_hdr, vq->vhost_hlen);
-
-       PRINT_PACKET(dev, (uintptr_t)vb_hdr_addr, vq->vhost_hlen, 1);
-
-       seg_avail = rte_pktmbuf_data_len(pkt);
-       vb_offset = vq->vhost_hlen;
-       vb_avail =
-               vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
-
-       entry_len = vq->vhost_hlen;
-
-       if (vb_avail == 0) {
-               uint32_t desc_idx =
-                       vq->buf_vec[vec_idx].desc_idx;
-               vq->desc[desc_idx].len = vq->vhost_hlen;
-
-               if ((vq->desc[desc_idx].flags
-                       & VRING_DESC_F_NEXT) == 0) {
-                       /* Update used ring with desc information */
-                       vq->used->ring[cur_idx & (vq->size - 1)].id
-                               = vq->buf_vec[vec_idx].desc_idx;
-                       vq->used->ring[cur_idx & (vq->size - 1)].len
-                               = entry_len;
-
-                       entry_len = 0;
-                       cur_idx++;
-                       entry_success++;
-               }
-
-               vec_idx++;
-               vb_addr =
-                       gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
-
-               /* Prefetch buffer address. */
-               rte_prefetch0((void *)(uintptr_t)vb_addr);
-               vb_offset = 0;
-               vb_avail = vq->buf_vec[vec_idx].buf_len;
-       }
-
-       cpy_len = RTE_MIN(vb_avail, seg_avail);
-
-       while (cpy_len > 0) {
-               /* Copy mbuf data to vring buffer */
-               rte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset),
-                       (const void *)(rte_pktmbuf_mtod(pkt, char*) + seg_offset),
-                       cpy_len);
-
-               PRINT_PACKET(dev,
-                       (uintptr_t)(vb_addr + vb_offset),
-                       cpy_len, 0);
-
-               seg_offset += cpy_len;
-               vb_offset += cpy_len;
-               seg_avail -= cpy_len;
-               vb_avail -= cpy_len;
-               entry_len += cpy_len;
-
-               if (seg_avail != 0) {
-                       /*
-                        * The virtio buffer in this vring
-                        * entry reach to its end.
-                        * But the segment doesn't complete.
-                        */
-                       if ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags &
-                               VRING_DESC_F_NEXT) == 0) {
-                               /* Update used ring with desc information */
-                               vq->used->ring[cur_idx & (vq->size - 1)].id
-                                       = vq->buf_vec[vec_idx].desc_idx;
-                               vq->used->ring[cur_idx & (vq->size - 1)].len
-                                       = entry_len;
-                               entry_len = 0;
-                               cur_idx++;
-                               entry_success++;
-                       }
-
-                       vec_idx++;
-                       vb_addr = gpa_to_vva(dev,
-                               vq->buf_vec[vec_idx].buf_addr);
-                       vb_offset = 0;
-                       vb_avail = vq->buf_vec[vec_idx].buf_len;
-                       cpy_len = RTE_MIN(vb_avail, seg_avail);
-               } else {
-                       /*
-                        * This current segment complete, need continue to
-                        * check if the whole packet complete or not.
-                        */
-                       pkt = pkt->next;
-                       if (pkt != NULL) {
-                               /*
-                                * There are more segments.
-                                */
-                               if (vb_avail == 0) {
-                                       /*
-                                        * This current buffer from vring is
-                                        * used up, need fetch next buffer
-                                        * from buf_vec.
-                                        */
-                                       uint32_t desc_idx =
-                                               vq->buf_vec[vec_idx].desc_idx;
-                                       vq->desc[desc_idx].len = vb_offset;
-
-                                       if ((vq->desc[desc_idx].flags &
-                                               VRING_DESC_F_NEXT) == 0) {
-                                               uint16_t wrapped_idx =
-                                                       cur_idx & (vq->size - 1);
-                                               /*
-                                                * Update used ring with the
-                                                * descriptor information
-                                                */
-                                               vq->used->ring[wrapped_idx].id
-                                                       = desc_idx;
-                                               vq->used->ring[wrapped_idx].len
-                                                       = entry_len;
-                                               entry_success++;
-                                               entry_len = 0;
-                                               cur_idx++;
-                                       }
-
-                                       /* Get next buffer from buf_vec. */
-                                       vec_idx++;
-                                       vb_addr = gpa_to_vva(dev,
-                                               vq->buf_vec[vec_idx].buf_addr);
-                                       vb_avail =
-                                               vq->buf_vec[vec_idx].buf_len;
-                                       vb_offset = 0;
-                               }
-
-                               seg_offset = 0;
-                               seg_avail = rte_pktmbuf_data_len(pkt);
-                               cpy_len = RTE_MIN(vb_avail, seg_avail);
-                       } else {
-                               /*
-                                * This whole packet completes.
-                                */
-                               uint32_t desc_idx =
-                                       vq->buf_vec[vec_idx].desc_idx;
-                               vq->desc[desc_idx].len = vb_offset;
-
-                               while (vq->desc[desc_idx].flags &
-                                       VRING_DESC_F_NEXT) {
-                                       desc_idx = vq->desc[desc_idx].next;
-                                        vq->desc[desc_idx].len = 0;
-                               }
-
-                               /* Update used ring with desc information */
-                               vq->used->ring[cur_idx & (vq->size - 1)].id
-                                       = vq->buf_vec[vec_idx].desc_idx;
-                               vq->used->ring[cur_idx & (vq->size - 1)].len
-                                       = entry_len;
-                               entry_len = 0;
-                               cur_idx++;
-                               entry_success++;
-                               seg_avail = 0;
-                               cpy_len = RTE_MIN(vb_avail, seg_avail);
-                       }
-               }
-       }
-
-       return entry_success;
-}
-
-/*
- * This function adds buffers to the virtio devices RX virtqueue. Buffers can
- * be received from the physical port or from another virtio device. A packet
- * count is returned to indicate the number of packets that were succesfully
- * added to the RX queue. This function works for mergeable RX.
- */
-static inline uint32_t __attribute__((always_inline))
-virtio_dev_merge_rx(struct virtio_net *dev, struct rte_mbuf **pkts,
-       uint32_t count)
-{
-       struct vhost_virtqueue *vq;
-       uint32_t pkt_idx = 0, entry_success = 0;
-       uint32_t retry = 0;
-       uint16_t avail_idx, res_cur_idx;
-       uint16_t res_base_idx, res_end_idx;
-       uint8_t success = 0;
-
-       LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
-               dev->device_fh);
-       vq = dev->virtqueue[VIRTIO_RXQ];
-       count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
-
-       if (count == 0)
-               return 0;
-
-       for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
-               uint32_t secure_len = 0;
-               uint16_t need_cnt;
-               uint32_t vec_idx = 0;
-               uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
-               uint16_t i, id;
-
-               do {
-                       /*
-                        * As many data cores may want access to available
-                        * buffers, they need to be reserved.
-                        */
-                       res_base_idx = vq->last_used_idx_res;
-                       res_cur_idx = res_base_idx;
-
-                       do {
-                               avail_idx = *((volatile uint16_t *)&vq->avail->idx);
-                               if (unlikely(res_cur_idx == avail_idx)) {
-                                       /*
-                                        * If retry is enabled and the queue is
-                                        * full then we wait and retry to avoid
-                                        * packet loss.
-                                        */
-                                       if (enable_retry) {
-                                               uint8_t cont = 0;
-                                               for (retry = 0; retry < burst_rx_retry_num; retry++) {
-                                                       rte_delay_us(burst_rx_delay_time);
-                                                       avail_idx =
-                                                               *((volatile uint16_t *)&vq->avail->idx);
-                                                       if (likely(res_cur_idx != avail_idx)) {
-                                                               cont = 1;
-                                                               break;
-                                                       }
-                                               }
-                                               if (cont == 1)
-                                                       continue;
-                                       }
-
-                                       LOG_DEBUG(VHOST_DATA,
-                                               "(%"PRIu64") Failed "
-                                               "to get enough desc from "
-                                               "vring\n",
-                                               dev->device_fh);
-                                       return pkt_idx;
-                               } else {
-                                       uint16_t wrapped_idx =
-                                               (res_cur_idx) & (vq->size - 1);
-                                       uint32_t idx =
-                                               vq->avail->ring[wrapped_idx];
-                                       uint8_t next_desc;
-
-                                       do {
-                                               next_desc = 0;
-                                               secure_len += vq->desc[idx].len;
-                                               if (vq->desc[idx].flags &
-                                                       VRING_DESC_F_NEXT) {
-                                                       idx = vq->desc[idx].next;
-                                                       next_desc = 1;
-                                               }
-                                       } while (next_desc);
-
-                                       res_cur_idx++;
-                               }
-                       } while (pkt_len > secure_len);
-
-                       /* vq->last_used_idx_res is atomically updated. */
-                       success = rte_atomic16_cmpset(&vq->last_used_idx_res,
-                                                       res_base_idx,
-                                                       res_cur_idx);
-               } while (success == 0);
-
-               id = res_base_idx;
-               need_cnt = res_cur_idx - res_base_idx;
-
-               for (i = 0; i < need_cnt; i++, id++) {
-                       uint16_t wrapped_idx = id & (vq->size - 1);
-                       uint32_t idx = vq->avail->ring[wrapped_idx];
-                       uint8_t next_desc;
-                       do {
-                               next_desc = 0;
-                               vq->buf_vec[vec_idx].buf_addr =
-                                       vq->desc[idx].addr;
-                               vq->buf_vec[vec_idx].buf_len =
-                                       vq->desc[idx].len;
-                               vq->buf_vec[vec_idx].desc_idx = idx;
-                               vec_idx++;
-
-                               if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
-                                       idx = vq->desc[idx].next;
-                                       next_desc = 1;
-                               }
-                       } while (next_desc);
-               }
-
-               res_end_idx = res_cur_idx;
-
-               entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
-                       res_end_idx, pkts[pkt_idx]);
-
-               rte_compiler_barrier();
-
-               /*
-                * Wait until it's our turn to add our buffer
-                * to the used ring.
-                */
-               while (unlikely(vq->last_used_idx != res_base_idx))
-                       rte_pause();
-
-               *(volatile uint16_t *)&vq->used->idx += entry_success;
-               vq->last_used_idx = res_end_idx;
-
-               /* Kick the guest if necessary. */
-               if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
-                       eventfd_write((int)vq->kickfd, 1);
-       }
-
-       return count;
-}
-
 /*
  * Compares a packet destination MAC address to a device MAC address.
  */
@@ -1431,10 +899,11 @@ ether_addr_cmp(struct ether_addr *ea, struct ether_addr *eb)
  * vlan tag to a VMDQ.
  */
 static int
-link_vmdq(struct virtio_net *dev, struct rte_mbuf *m)
+link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m)
 {
        struct ether_hdr *pkt_hdr;
        struct virtio_net_data_ll *dev_ll;
+       struct virtio_net *dev = vdev->dev;
        int i, ret;
 
        /* Learn MAC address of guest device from packet */
@@ -1443,7 +912,7 @@ link_vmdq(struct virtio_net *dev, struct rte_mbuf *m)
        dev_ll = ll_root_used;
 
        while (dev_ll != NULL) {
-               if (ether_addr_cmp(&(pkt_hdr->s_addr), &dev_ll->dev->mac_address)) {
+               if (ether_addr_cmp(&(pkt_hdr->s_addr), &dev_ll->vdev->mac_address)) {
                        RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") WARNING: This device is using an existing MAC address and has not been registered.\n", dev->device_fh);
                        return -1;
                }
@@ -1451,30 +920,30 @@ link_vmdq(struct virtio_net *dev, struct rte_mbuf *m)
        }
 
        for (i = 0; i < ETHER_ADDR_LEN; i++)
-               dev->mac_address.addr_bytes[i] = pkt_hdr->s_addr.addr_bytes[i];
+               vdev->mac_address.addr_bytes[i] = pkt_hdr->s_addr.addr_bytes[i];
 
        /* vlan_tag currently uses the device_id. */
-       dev->vlan_tag = vlan_tags[dev->device_fh];
+       vdev->vlan_tag = vlan_tags[dev->device_fh];
 
        /* Print out VMDQ registration info. */
        RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") MAC_ADDRESS %02x:%02x:%02x:%02x:%02x:%02x and VLAN_TAG %d registered\n",
                dev->device_fh,
-               dev->mac_address.addr_bytes[0], dev->mac_address.addr_bytes[1],
-               dev->mac_address.addr_bytes[2], dev->mac_address.addr_bytes[3],
-               dev->mac_address.addr_bytes[4], dev->mac_address.addr_bytes[5],
-               dev->vlan_tag);
+               vdev->mac_address.addr_bytes[0], vdev->mac_address.addr_bytes[1],
+               vdev->mac_address.addr_bytes[2], vdev->mac_address.addr_bytes[3],
+               vdev->mac_address.addr_bytes[4], vdev->mac_address.addr_bytes[5],
+               vdev->vlan_tag);
 
        /* Register the MAC address. */
-       ret = rte_eth_dev_mac_addr_add(ports[0], &dev->mac_address, (uint32_t)dev->device_fh);
+       ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address, (uint32_t)dev->device_fh);
        if (ret)
                RTE_LOG(ERR, VHOST_DATA, "(%"PRIu64") Failed to add device MAC address to VMDQ\n",
                                        dev->device_fh);
 
        /* Enable stripping of the vlan tag as we handle routing. */
-       rte_eth_dev_set_vlan_strip_on_queue(ports[0], (uint16_t)dev->vmdq_rx_q, 1);
+       rte_eth_dev_set_vlan_strip_on_queue(ports[0], (uint16_t)vdev->vmdq_rx_q, 1);
 
        /* Set device as ready for RX. */
-       dev->ready = DEVICE_RX;
+       vdev->ready = DEVICE_RX;
 
        return 0;
 }
@@ -1484,33 +953,33 @@ link_vmdq(struct virtio_net *dev, struct rte_mbuf *m)
  * queue before disabling RX on the device.
  */
 static inline void
-unlink_vmdq(struct virtio_net *dev)
+unlink_vmdq(struct vhost_dev *vdev)
 {
        unsigned i = 0;
        unsigned rx_count;
        struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
 
-       if (dev->ready == DEVICE_RX) {
+       if (vdev->ready == DEVICE_RX) {
                /*clear MAC and VLAN settings*/
-               rte_eth_dev_mac_addr_remove(ports[0], &dev->mac_address);
+               rte_eth_dev_mac_addr_remove(ports[0], &vdev->mac_address);
                for (i = 0; i < 6; i++)
-                       dev->mac_address.addr_bytes[i] = 0;
+                       vdev->mac_address.addr_bytes[i] = 0;
 
-               dev->vlan_tag = 0;
+               vdev->vlan_tag = 0;
 
                /*Clear out the receive buffers*/
                rx_count = rte_eth_rx_burst(ports[0],
-                                       (uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
+                                       (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
 
                while (rx_count) {
                        for (i = 0; i < rx_count; i++)
                                rte_pktmbuf_free(pkts_burst[i]);
 
                        rx_count = rte_eth_rx_burst(ports[0],
-                                       (uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
+                                       (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
                }
 
-               dev->ready = DEVICE_MAC_LEARNING;
+               vdev->ready = DEVICE_MAC_LEARNING;
        }
 }
 
@@ -1519,11 +988,13 @@ unlink_vmdq(struct virtio_net *dev)
  * the packet on that devices RX queue. If not then return.
  */
 static inline unsigned __attribute__((always_inline))
-virtio_tx_local(struct virtio_net *dev, struct rte_mbuf *m)
+virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 {
        struct virtio_net_data_ll *dev_ll;
        struct ether_hdr *pkt_hdr;
        uint64_t ret = 0;
+       struct virtio_net *dev = vdev->dev;
+       struct virtio_net *tdev; /* destination virito device */
 
        pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
 
@@ -1531,22 +1002,23 @@ virtio_tx_local(struct virtio_net *dev, struct rte_mbuf *m)
        dev_ll = ll_root_used;
 
        while (dev_ll != NULL) {
-               if ((dev_ll->dev->ready == DEVICE_RX) && ether_addr_cmp(&(pkt_hdr->d_addr),
-                                         &dev_ll->dev->mac_address)) {
+               if ((dev_ll->vdev->ready == DEVICE_RX) && ether_addr_cmp(&(pkt_hdr->d_addr),
+                                         &dev_ll->vdev->mac_address)) {
 
                        /* Drop the packet if the TX packet is destined for the TX device. */
-                       if (dev_ll->dev->device_fh == dev->device_fh) {
+                       if (dev_ll->vdev->dev->device_fh == dev->device_fh) {
                                LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: Source and destination MAC addresses are the same. Dropping packet.\n",
-                                                       dev_ll->dev->device_fh);
+                                                       dev->device_fh);
                                return 0;
                        }
+                       tdev = dev_ll->vdev->dev;
 
 
-                       LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: MAC address is local\n", dev_ll->dev->device_fh);
+                       LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: MAC address is local\n", tdev->device_fh);
 
-                       if (dev_ll->dev->remove) {
+                       if (dev_ll->vdev->remove) {
                                /*drop the packet if the device is marked for removal*/
-                               LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Device is marked for removal\n", dev_ll->dev->device_fh);
+                               LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Device is marked for removal\n", tdev->device_fh);
                        } else {
                                uint32_t mergeable =
                                        dev_ll->dev->features &
@@ -1561,13 +1033,13 @@ virtio_tx_local(struct virtio_net *dev, struct rte_mbuf *m)
 
                                if (enable_stats) {
                                        rte_atomic64_add(
-                                       &dev_statistics[dev_ll->dev->device_fh].rx_total_atomic,
+                                       &dev_statistics[tdev->device_fh].rx_total_atomic,
                                        1);
                                        rte_atomic64_add(
-                                       &dev_statistics[dev_ll->dev->device_fh].rx_atomic,
+                                       &dev_statistics[tdev->device_fh].rx_atomic,
                                        ret);
-                                       dev_statistics[dev->device_fh].tx_total++;
-                                       dev_statistics[dev->device_fh].tx += ret;
+                                       dev_statistics[tdev->device_fh].tx_total++;
+                                       dev_statistics[tdev->device_fh].tx += ret;
                                }
                        }
 
@@ -1584,7 +1056,7 @@ virtio_tx_local(struct virtio_net *dev, struct rte_mbuf *m)
  * or the physical port.
  */
 static inline void __attribute__((always_inline))
-virtio_tx_route(struct virtio_net* dev, struct rte_mbuf *m, struct rte_mempool *mbuf_pool, uint16_t vlan_tag)
+virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, struct rte_mempool *mbuf_pool, uint16_t vlan_tag)
 {
        struct mbuf_table *tx_q;
        struct vlan_ethhdr *vlan_hdr;
@@ -1594,37 +1066,38 @@ virtio_tx_route(struct virtio_net* dev, struct rte_mbuf *m, struct rte_mempool *
        const uint16_t lcore_id = rte_lcore_id();
        struct virtio_net_data_ll *dev_ll = ll_root_used;
        struct ether_hdr *pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+       struct virtio_net *dev = vdev->dev;
 
        /*check if destination is local VM*/
-       if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(dev, m) == 0))
+       if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0))
                return;
 
        if (vm2vm_mode == VM2VM_HARDWARE) {
                while (dev_ll != NULL) {
-                       if ((dev_ll->dev->ready == DEVICE_RX)
+                       if ((dev_ll->vdev->ready == DEVICE_RX)
                                && ether_addr_cmp(&(pkt_hdr->d_addr),
-                               &dev_ll->dev->mac_address)) {
+                               &dev_ll->vdev->mac_address)) {
                                /*
                                 * Drop the packet if the TX packet is
                                 * destined for the TX device.
                                 */
-                               if (dev_ll->dev->device_fh == dev->device_fh) {
+                               if (dev_ll->vdev->dev->device_fh == dev->device_fh) {
                                        LOG_DEBUG(VHOST_DATA,
                                        "(%"PRIu64") TX: Source and destination"
                                        " MAC addresses are the same. Dropping "
                                        "packet.\n",
-                                       dev_ll->dev->device_fh);
+                                       dev_ll->vdev->device_fh);
                                        return;
                                }
                                offset = 4;
                                vlan_tag =
                                (uint16_t)
-                               vlan_tags[(uint16_t)dev_ll->dev->device_fh];
+                               vlan_tags[(uint16_t)dev_ll->vdev->dev->device_fh];
 
                                LOG_DEBUG(VHOST_DATA,
                                "(%"PRIu64") TX: pkt to local VM device id:"
                                "(%"PRIu64") vlan tag: %d.\n",
-                               dev->device_fh, dev_ll->dev->device_fh,
+                               dev->device_fh, dev_ll->vdev->dev->device_fh,
                                vlan_tag);
 
                                break;
@@ -1713,321 +1186,6 @@ virtio_tx_route(struct virtio_net* dev, struct rte_mbuf *m, struct rte_mempool *
        tx_q->len = len;
        return;
 }
-
-static inline void __attribute__((always_inline))
-virtio_dev_tx(struct virtio_net* dev, struct rte_mempool *mbuf_pool)
-{
-       struct rte_mbuf m;
-       struct vhost_virtqueue *vq;
-       struct vring_desc *desc;
-       uint64_t buff_addr = 0;
-       uint32_t head[MAX_PKT_BURST];
-       uint32_t used_idx;
-       uint32_t i;
-       uint16_t free_entries, packet_success = 0;
-       uint16_t avail_idx;
-
-       vq = dev->virtqueue[VIRTIO_TXQ];
-       avail_idx =  *((volatile uint16_t *)&vq->avail->idx);
-
-       /* If there are no available buffers then return. */
-       if (vq->last_used_idx == avail_idx)
-               return;
-
-       LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_tx()\n", dev->device_fh);
-
-       /* Prefetch available ring to retrieve head indexes. */
-       rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);
-
-       /*get the number of free entries in the ring*/
-       free_entries = (avail_idx - vq->last_used_idx);
-
-       /* Limit to MAX_PKT_BURST. */
-       if (free_entries > MAX_PKT_BURST)
-               free_entries = MAX_PKT_BURST;
-
-       LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", dev->device_fh, free_entries);
-       /* Retrieve all of the head indexes first to avoid caching issues. */
-       for (i = 0; i < free_entries; i++)
-               head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)];
-
-       /* Prefetch descriptor index. */
-       rte_prefetch0(&vq->desc[head[packet_success]]);
-       rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);
-
-       while (packet_success < free_entries) {
-               desc = &vq->desc[head[packet_success]];
-
-               /* Discard first buffer as it is the virtio header */
-               desc = &vq->desc[desc->next];
-
-               /* Buffer address translation. */
-               buff_addr = gpa_to_vva(dev, desc->addr);
-               /* Prefetch buffer address. */
-               rte_prefetch0((void*)(uintptr_t)buff_addr);
-
-               used_idx = vq->last_used_idx & (vq->size - 1);
-
-               if (packet_success < (free_entries - 1)) {
-                       /* Prefetch descriptor index. */
-                       rte_prefetch0(&vq->desc[head[packet_success+1]]);
-                       rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]);
-               }
-
-               /* Update used index buffer information. */
-               vq->used->ring[used_idx].id = head[packet_success];
-               vq->used->ring[used_idx].len = 0;
-
-               /* Setup dummy mbuf. This is copied to a real mbuf if transmitted out the physical port. */
-               m.data_len = desc->len;
-               m.pkt_len = desc->len;
-               m.data_off = 0;
-
-               PRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0);
-
-               /* If this is the first received packet we need to learn the MAC and setup VMDQ */
-               if (dev->ready == DEVICE_MAC_LEARNING) {
-                       if (dev->remove || (link_vmdq(dev, &m) == -1)) {
-                               /*discard frame if device is scheduled for removal or a duplicate MAC address is found. */
-                               packet_success += free_entries;
-                               vq->last_used_idx += packet_success;
-                               break;
-                       }
-               }
-               virtio_tx_route(dev, &m, mbuf_pool, (uint16_t)dev->device_fh);
-
-               vq->last_used_idx++;
-               packet_success++;
-       }
-
-       rte_compiler_barrier();
-       vq->used->idx += packet_success;
-       /* Kick guest if required. */
-       if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
-               eventfd_write((int)vq->kickfd, 1);
-}
-
-/* This function works for TX packets with mergeable feature enabled. */
-static inline void __attribute__((always_inline))
-virtio_dev_merge_tx(struct virtio_net *dev, struct rte_mempool *mbuf_pool)
-{
-       struct rte_mbuf *m, *prev;
-       struct vhost_virtqueue *vq;
-       struct vring_desc *desc;
-       uint64_t vb_addr = 0;
-       uint32_t head[MAX_PKT_BURST];
-       uint32_t used_idx;
-       uint32_t i;
-       uint16_t free_entries, entry_success = 0;
-       uint16_t avail_idx;
-       uint32_t buf_size = MBUF_SIZE - (sizeof(struct rte_mbuf)
-                       + RTE_PKTMBUF_HEADROOM);
-
-       vq = dev->virtqueue[VIRTIO_TXQ];
-       avail_idx =  *((volatile uint16_t *)&vq->avail->idx);
-
-       /* If there are no available buffers then return. */
-       if (vq->last_used_idx == avail_idx)
-               return;
-
-       LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_tx()\n",
-               dev->device_fh);
-
-       /* Prefetch available ring to retrieve head indexes. */
-       rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);
-
-       /*get the number of free entries in the ring*/
-       free_entries = (avail_idx - vq->last_used_idx);
-
-       /* Limit to MAX_PKT_BURST. */
-       free_entries = RTE_MIN(free_entries, MAX_PKT_BURST);
-
-       LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
-               dev->device_fh, free_entries);
-       /* Retrieve all of the head indexes first to avoid caching issues. */
-       for (i = 0; i < free_entries; i++)
-               head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)];
-
-       /* Prefetch descriptor index. */
-       rte_prefetch0(&vq->desc[head[entry_success]]);
-       rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);
-
-       while (entry_success < free_entries) {
-               uint32_t vb_avail, vb_offset;
-               uint32_t seg_avail, seg_offset;
-               uint32_t cpy_len;
-               uint32_t seg_num = 0;
-               struct rte_mbuf *cur;
-               uint8_t alloc_err = 0;
-
-               desc = &vq->desc[head[entry_success]];
-
-               /* Discard first buffer as it is the virtio header */
-               desc = &vq->desc[desc->next];
-
-               /* Buffer address translation. */
-               vb_addr = gpa_to_vva(dev, desc->addr);
-               /* Prefetch buffer address. */
-               rte_prefetch0((void *)(uintptr_t)vb_addr);
-
-               used_idx = vq->last_used_idx & (vq->size - 1);
-
-               if (entry_success < (free_entries - 1)) {
-                       /* Prefetch descriptor index. */
-                       rte_prefetch0(&vq->desc[head[entry_success+1]]);
-                       rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]);
-               }
-
-               /* Update used index buffer information. */
-               vq->used->ring[used_idx].id = head[entry_success];
-               vq->used->ring[used_idx].len = 0;
-
-               vb_offset = 0;
-               vb_avail = desc->len;
-               seg_offset = 0;
-               seg_avail = buf_size;
-               cpy_len = RTE_MIN(vb_avail, seg_avail);
-
-               PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);
-
-               /* Allocate an mbuf and populate the structure. */
-               m = rte_pktmbuf_alloc(mbuf_pool);
-               if (unlikely(m == NULL)) {
-                       RTE_LOG(ERR, VHOST_DATA,
-                               "Failed to allocate memory for mbuf.\n");
-                       return;
-               }
-
-               seg_num++;
-               cur = m;
-               prev = m;
-               while (cpy_len != 0) {
-                       rte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + seg_offset),
-                               (void *)((uintptr_t)(vb_addr + vb_offset)),
-                               cpy_len);
-
-                       seg_offset += cpy_len;
-                       vb_offset += cpy_len;
-                       vb_avail -= cpy_len;
-                       seg_avail -= cpy_len;
-
-                       if (vb_avail != 0) {
-                               /*
-                                * The segment reachs to its end,
-                                * while the virtio buffer in TX vring has
-                                * more data to be copied.
-                                */
-                               cur->data_len = seg_offset;
-                               m->pkt_len += seg_offset;
-                               /* Allocate mbuf and populate the structure. */
-                               cur = rte_pktmbuf_alloc(mbuf_pool);
-                               if (unlikely(cur == NULL)) {
-                                       RTE_LOG(ERR, VHOST_DATA, "Failed to "
-                                               "allocate memory for mbuf.\n");
-                                       rte_pktmbuf_free(m);
-                                       alloc_err = 1;
-                                       break;
-                               }
-
-                               seg_num++;
-                               prev->next = cur;
-                               prev = cur;
-                               seg_offset = 0;
-                               seg_avail = buf_size;
-                       } else {
-                               if (desc->flags & VRING_DESC_F_NEXT) {
-                                       /*
-                                        * There are more virtio buffers in
-                                        * same vring entry need to be copied.
-                                        */
-                                       if (seg_avail == 0) {
-                                               /*
-                                                * The current segment hasn't
-                                                * room to accomodate more
-                                                * data.
-                                                */
-                                               cur->data_len = seg_offset;
-                                               m->pkt_len += seg_offset;
-                                               /*
-                                                * Allocate an mbuf and
-                                                * populate the structure.
-                                                */
-                                               cur = rte_pktmbuf_alloc(mbuf_pool);
-                                               if (unlikely(cur == NULL)) {
-                                                       RTE_LOG(ERR,
-                                                               VHOST_DATA,
-                                                               "Failed to "
-                                                               "allocate memory "
-                                                               "for mbuf\n");
-                                                       rte_pktmbuf_free(m);
-                                                       alloc_err = 1;
-                                                       break;
-                                               }
-                                               seg_num++;
-                                               prev->next = cur;
-                                               prev = cur;
-                                               seg_offset = 0;
-                                               seg_avail = buf_size;
-                                       }
-
-                                       desc = &vq->desc[desc->next];
-
-                                       /* Buffer address translation. */
-                                       vb_addr = gpa_to_vva(dev, desc->addr);
-                                       /* Prefetch buffer address. */
-                                       rte_prefetch0((void *)(uintptr_t)vb_addr);
-                                       vb_offset = 0;
-                                       vb_avail = desc->len;
-
-                                       PRINT_PACKET(dev, (uintptr_t)vb_addr,
-                                               desc->len, 0);
-                               } else {
-                                       /* The whole packet completes. */
-                                       cur->data_len = seg_offset;
-                                       m->pkt_len += seg_offset;
-                                       vb_avail = 0;
-                               }
-                       }
-
-                       cpy_len = RTE_MIN(vb_avail, seg_avail);
-               }
-
-               if (unlikely(alloc_err == 1))
-                       break;
-
-               m->nb_segs = seg_num;
-
-               /*
-                * If this is the first received packet we need to learn
-                * the MAC and setup VMDQ
-                */
-               if (dev->ready == DEVICE_MAC_LEARNING) {
-                       if (dev->remove || (link_vmdq(dev, m) == -1)) {
-                               /*
-                                * Discard frame if device is scheduled for
-                                * removal or a duplicate MAC address is found.
-                                */
-                               entry_success = free_entries;
-                               vq->last_used_idx += entry_success;
-                               rte_pktmbuf_free(m);
-                               break;
-                       }
-               }
-
-               virtio_tx_route(dev, m, mbuf_pool, (uint16_t)dev->device_fh);
-               vq->last_used_idx++;
-               entry_success++;
-               rte_pktmbuf_free(m);
-       }
-
-       rte_compiler_barrier();
-       vq->used->idx += entry_success;
-       /* Kick guest if required. */
-       if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
-               eventfd_write((int)vq->kickfd, 1);
-
-}
-
 /*
  * This function is called by each data core. It handles all RX/TX registered with the
  * core. For TX the specific lcore linked list is used. For RX, MAC addresses are compared
@@ -2038,6 +1196,7 @@ switch_worker(__attribute__((unused)) void *arg)
 {
        struct rte_mempool *mbuf_pool = arg;
        struct virtio_net *dev = NULL;
+       struct vhost_dev *vdev = NULL;
        struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
        struct virtio_net_data_ll *dev_ll;
        struct mbuf_table *tx_q;
@@ -2105,20 +1264,21 @@ switch_worker(__attribute__((unused)) void *arg)
 
                while (dev_ll != NULL) {
                        /*get virtio device ID*/
-                       dev = dev_ll->dev;
+                       vdev = dev_ll->vdev;
+                       dev = vdev->dev;
                        mergeable =
                                dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF);
 
-                       if (dev->remove) {
+                       if (vdev->remove) {
                                dev_ll = dev_ll->next;
-                               unlink_vmdq(dev);
-                               dev->ready = DEVICE_SAFE_REMOVE;
+                               unlink_vmdq(vdev);
+                               vdev->ready = DEVICE_SAFE_REMOVE;
                                continue;
                        }
-                       if (likely(dev->ready == DEVICE_RX)) {
+                       if (likely(vdev->ready == DEVICE_RX)) {
                                /*Handle guest RX*/
                                rx_count = rte_eth_rx_burst(ports[0],
-                                       (uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
+                                       vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
 
                                if (rx_count) {
                                        if (likely(mergeable == 0))
@@ -2132,10 +1292,10 @@ switch_worker(__attribute__((unused)) void *arg)
 
                                        if (enable_stats) {
                                                rte_atomic64_add(
-                                               &dev_statistics[dev_ll->dev->device_fh].rx_total_atomic,
+                                               &dev_statistics[dev_ll->vdev->dev->device_fh].rx_total_atomic,
                                                rx_count);
                                                rte_atomic64_add(
-                                               &dev_statistics[dev_ll->dev->device_fh].rx_atomic, ret_count);
+                                               &dev_statistics[dev_ll->vdev->dev->device_fh].rx_atomic, ret_count);
                                        }
                                        while (likely(rx_count)) {
                                                rx_count--;
@@ -2145,7 +1305,7 @@ switch_worker(__attribute__((unused)) void *arg)
                                }
                        }
 
-                       if (!dev->remove) {
+                       if (!vdev->remove) {
                                /*Handle guest TX*/
                                if (likely(mergeable == 0))
                                        virtio_dev_tx(dev, mbuf_pool);
@@ -2267,12 +1427,13 @@ attach_rxmbuf_zcp(struct virtio_net *dev)
        struct rte_mbuf *mbuf = NULL;
        struct vpool *vpool;
        hpa_type addr_type;
+       struct vhost_dev *vdev = (struct vhost_dev *)dev->priv;
 
-       vpool = &vpool_array[dev->vmdq_rx_q];
+       vpool = &vpool_array[vdev->vmdq_rx_q];
        vq = dev->virtqueue[VIRTIO_RXQ];
 
        do {
-               if (unlikely(get_available_ring_index_zcp(dev, &res_base_idx,
+               if (unlikely(get_available_ring_index_zcp(vdev->dev, &res_base_idx,
                                1) != 1))
                        return;
                desc_idx = vq->avail->ring[(res_base_idx) & (vq->size - 1)];
@@ -2281,12 +1442,12 @@ attach_rxmbuf_zcp(struct virtio_net *dev)
                if (desc->flags & VRING_DESC_F_NEXT) {
                        desc = &vq->desc[desc->next];
                        buff_addr = gpa_to_vva(dev, desc->addr);
-                       phys_addr = gpa_to_hpa(dev, desc->addr, desc->len,
+                       phys_addr = gpa_to_hpa(vdev, desc->addr, desc->len,
                                        &addr_type);
                } else {
                        buff_addr = gpa_to_vva(dev,
                                        desc->addr + vq->vhost_hlen);
-                       phys_addr = gpa_to_hpa(dev,
+                       phys_addr = gpa_to_hpa(vdev,
                                        desc->addr + vq->vhost_hlen,
                                        desc->len, &addr_type);
                }
@@ -2609,13 +1770,14 @@ virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m,
        struct virtio_net_data_ll *dev_ll = ll_root_used;
        struct ether_hdr *pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
        uint16_t vlan_tag = (uint16_t)vlan_tags[(uint16_t)dev->device_fh];
+       uint16_t vmdq_rx_q = ((struct vhost_dev *)dev->priv)->vmdq_rx_q;
 
        /*Add packet to the port tx queue*/
-       tx_q = &tx_queue_zcp[(uint16_t)dev->vmdq_rx_q];
+       tx_q = &tx_queue_zcp[vmdq_rx_q];
        len = tx_q->len;
 
        /* Allocate an mbuf and populate the structure. */
-       vpool = &vpool_array[MAX_QUEUES + (uint16_t)dev->vmdq_rx_q];
+       vpool = &vpool_array[MAX_QUEUES + vmdq_rx_q];
        rte_ring_sc_dequeue(vpool->ring, (void **)&mbuf);
        if (unlikely(mbuf == NULL)) {
                struct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_TXQ];
@@ -2636,21 +1798,21 @@ virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m,
                 */
                vlan_tag = external_pkt_default_vlan_tag;
                while (dev_ll != NULL) {
-                       if (likely(dev_ll->dev->ready == DEVICE_RX) &&
+                       if (likely(dev_ll->vdev->ready == DEVICE_RX) &&
                                ether_addr_cmp(&(pkt_hdr->d_addr),
-                               &dev_ll->dev->mac_address)) {
+                               &dev_ll->vdev->mac_address)) {
 
                                /*
                                 * Drop the packet if the TX packet is destined
                                 * for the TX device.
                                 */
-                               if (unlikely(dev_ll->dev->device_fh
+                               if (unlikely(dev_ll->vdev->dev->device_fh
                                        == dev->device_fh)) {
                                        LOG_DEBUG(VHOST_DATA,
                                        "(%"PRIu64") TX: Source and destination"
                                        "MAC addresses are the same. Dropping "
                                        "packet.\n",
-                                       dev_ll->dev->device_fh);
+                                       dev_ll->vdev->dev->device_fh);
                                        MBUF_HEADROOM_UINT32(mbuf)
                                                = (uint32_t)desc_idx;
                                        __rte_mbuf_raw_free(mbuf);
@@ -2664,12 +1826,12 @@ virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m,
                                offset = 4;
                                vlan_tag =
                                (uint16_t)
-                               vlan_tags[(uint16_t)dev_ll->dev->device_fh];
+                               vlan_tags[(uint16_t)dev_ll->vdev->dev->device_fh];
 
                                LOG_DEBUG(VHOST_DATA,
                                "(%"PRIu64") TX: pkt to local VM device id:"
                                "(%"PRIu64") vlan tag: %d.\n",
-                               dev->device_fh, dev_ll->dev->device_fh,
+                               dev->device_fh, dev_ll->vdev->dev->device_fh,
                                vlan_tag);
 
                                break;
@@ -2754,6 +1916,7 @@ virtio_dev_tx_zcp(struct virtio_net *dev)
        uint16_t avail_idx;
        uint8_t need_copy = 0;
        hpa_type addr_type;
+       struct vhost_dev *vdev = (struct vhost_dev *)dev->priv;
 
        vq = dev->virtqueue[VIRTIO_TXQ];
        avail_idx =  *((volatile uint16_t *)&vq->avail->idx);
@@ -2797,7 +1960,7 @@ virtio_dev_tx_zcp(struct virtio_net *dev)
 
                /* Buffer address translation. */
                buff_addr = gpa_to_vva(dev, desc->addr);
-               phys_addr = gpa_to_hpa(dev, desc->addr, desc->len, &addr_type);
+               phys_addr = gpa_to_hpa(vdev, desc->addr, desc->len, &addr_type);
 
                if (likely(packet_success < (free_entries - 1)))
                        /* Prefetch descriptor index. */
@@ -2846,8 +2009,8 @@ virtio_dev_tx_zcp(struct virtio_net *dev)
                 * If this is the first received packet we need to learn
                 * the MAC and setup VMDQ
                 */
-               if (unlikely(dev->ready == DEVICE_MAC_LEARNING)) {
-                       if (dev->remove || (link_vmdq(dev, &m) == -1)) {
+               if (unlikely(vdev->ready == DEVICE_MAC_LEARNING)) {
+                       if (vdev->remove || (link_vmdq(vdev, &m) == -1)) {
                                /*
                                 * Discard frame if device is scheduled for
                                 * removal or a duplicate MAC address is found.
@@ -2872,6 +2035,7 @@ static int
 switch_worker_zcp(__attribute__((unused)) void *arg)
 {
        struct virtio_net *dev = NULL;
+       struct vhost_dev  *vdev = NULL;
        struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
        struct virtio_net_data_ll *dev_ll;
        struct mbuf_table *tx_q;
@@ -2900,12 +2064,13 @@ switch_worker_zcp(__attribute__((unused)) void *arg)
                         * put back into vpool.ring.
                         */
                        dev_ll = lcore_ll->ll_root_used;
-                       while ((dev_ll != NULL) && (dev_ll->dev != NULL)) {
+                       while ((dev_ll != NULL) && (dev_ll->vdev != NULL)) {
                                /* Get virtio device ID */
-                               dev = dev_ll->dev;
+                               vdev = dev_ll->vdev;
+                               dev = vdev->dev;
 
-                               if (likely(!dev->remove)) {
-                                       tx_q = &tx_queue_zcp[(uint16_t)dev->vmdq_rx_q];
+                               if (likely(!vdev->remove)) {
+                                       tx_q = &tx_queue_zcp[(uint16_t)vdev->vmdq_rx_q];
                                        if (tx_q->len) {
                                                LOG_DEBUG(VHOST_DATA,
                                                "TX queue drained after timeout"
@@ -2930,7 +2095,7 @@ switch_worker_zcp(__attribute__((unused)) void *arg)
                                                tx_q->len = 0;
 
                                                txmbuf_clean_zcp(dev,
-                                                       &vpool_array[MAX_QUEUES+dev->vmdq_rx_q]);
+                                                       &vpool_array[MAX_QUEUES+vdev->vmdq_rx_q]);
                                        }
                                }
                                dev_ll = dev_ll->next;
@@ -2950,17 +2115,18 @@ switch_worker_zcp(__attribute__((unused)) void *arg)
                /* Process devices */
                dev_ll = lcore_ll->ll_root_used;
 
-               while ((dev_ll != NULL) && (dev_ll->dev != NULL)) {
-                       dev = dev_ll->dev;
-                       if (unlikely(dev->remove)) {
+               while ((dev_ll != NULL) && (dev_ll->vdev != NULL)) {
+                       vdev = dev_ll->vdev;
+                       dev  = vdev->dev;
+                       if (unlikely(vdev->remove)) {
                                dev_ll = dev_ll->next;
-                               unlink_vmdq(dev);
-                               dev->ready = DEVICE_SAFE_REMOVE;
+                               unlink_vmdq(vdev);
+                               vdev->ready = DEVICE_SAFE_REMOVE;
                                continue;
                        }
 
-                       if (likely(dev->ready == DEVICE_RX)) {
-                               uint32_t index = dev->vmdq_rx_q;
+                       if (likely(vdev->ready == DEVICE_RX)) {
+                               uint32_t index = vdev->vmdq_rx_q;
                                uint16_t i;
                                count_in_ring
                                = rte_ring_count(vpool_array[index].ring);
@@ -2979,7 +2145,7 @@ switch_worker_zcp(__attribute__((unused)) void *arg)
 
                                /* Handle guest RX */
                                rx_count = rte_eth_rx_burst(ports[0],
-                                       (uint16_t)dev->vmdq_rx_q, pkts_burst,
+                                       vdev->vmdq_rx_q, pkts_burst,
                                        MAX_PKT_BURST);
 
                                if (rx_count) {
@@ -3002,7 +2168,7 @@ switch_worker_zcp(__attribute__((unused)) void *arg)
                                }
                        }
 
-                       if (likely(!dev->remove))
+                       if (likely(!vdev->remove))
                                /* Handle guest TX */
                                virtio_dev_tx_zcp(dev);
 
@@ -3115,7 +2281,7 @@ alloc_data_ll(uint32_t size)
        }
 
        for (i = 0; i < size - 1; i++) {
-               ll_new[i].dev = NULL;
+               ll_new[i].vdev = NULL;
                ll_new[i].next = &ll_new[i+1];
        }
        ll_new[i].next = NULL;
@@ -3176,21 +2342,22 @@ destroy_device (volatile struct virtio_net *dev)
        struct virtio_net_data_ll *ll_main_dev_cur;
        struct virtio_net_data_ll *ll_lcore_dev_last = NULL;
        struct virtio_net_data_ll *ll_main_dev_last = NULL;
+       struct vhost_dev *vdev;
        int lcore;
 
        dev->flags &= ~VIRTIO_DEV_RUNNING;
 
+       vdev = (struct vhost_dev *)dev->priv;
        /*set the remove flag. */
-       dev->remove = 1;
-
-       while(dev->ready != DEVICE_SAFE_REMOVE) {
+       vdev->remove = 1;
+       while(vdev->ready != DEVICE_SAFE_REMOVE) {
                rte_pause();
        }
 
        /* Search for entry to be removed from lcore ll */
-       ll_lcore_dev_cur = lcore_info[dev->coreid].lcore_ll->ll_root_used;
+       ll_lcore_dev_cur = lcore_info[vdev->coreid].lcore_ll->ll_root_used;
        while (ll_lcore_dev_cur != NULL) {
-               if (ll_lcore_dev_cur->dev == dev) {
+               if (ll_lcore_dev_cur->vdev == vdev) {
                        break;
                } else {
                        ll_lcore_dev_last = ll_lcore_dev_cur;
@@ -3209,7 +2376,7 @@ destroy_device (volatile struct virtio_net *dev)
        ll_main_dev_cur = ll_root_used;
        ll_main_dev_last = NULL;
        while (ll_main_dev_cur != NULL) {
-               if (ll_main_dev_cur->dev == dev) {
+               if (ll_main_dev_cur->vdev == vdev) {
                        break;
                } else {
                        ll_main_dev_last = ll_main_dev_cur;
@@ -3218,7 +2385,7 @@ destroy_device (volatile struct virtio_net *dev)
        }
 
        /* Remove entries from the lcore and main ll. */
-       rm_data_ll_entry(&lcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->ll_root_used, ll_lcore_dev_cur, ll_lcore_dev_last);
+       rm_data_ll_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_used, ll_lcore_dev_cur, ll_lcore_dev_last);
        rm_data_ll_entry(&ll_root_used, ll_main_dev_cur, ll_main_dev_last);
 
        /* Set the dev_removal_flag on each lcore. */
@@ -3238,54 +2405,203 @@ destroy_device (volatile struct virtio_net *dev)
        }
 
        /* Add the entries back to the lcore and main free ll.*/
-       put_data_ll_free_entry(&lcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->ll_root_free, ll_lcore_dev_cur);
+       put_data_ll_free_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_free, ll_lcore_dev_cur);
        put_data_ll_free_entry(&ll_root_free, ll_main_dev_cur);
 
        /* Decrement number of device on the lcore. */
-       lcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->device_num--;
+       lcore_info[vdev->coreid].lcore_ll->device_num--;
 
        RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been removed from data core\n", dev->device_fh);
 
        if (zero_copy) {
-               struct vpool *vpool = &vpool_array[dev->vmdq_rx_q];
+               struct vpool *vpool = &vpool_array[vdev->vmdq_rx_q];
 
                /* Stop the RX queue. */
-               if (rte_eth_dev_rx_queue_stop(ports[0], dev->vmdq_rx_q) != 0) {
+               if (rte_eth_dev_rx_queue_stop(ports[0], vdev->vmdq_rx_q) != 0) {
                        LOG_DEBUG(VHOST_CONFIG,
                                "(%"PRIu64") In destroy_device: Failed to stop "
                                "rx queue:%d\n",
                                dev->device_fh,
-                               dev->vmdq_rx_q);
+                               vdev->vmdq_rx_q);
                }
 
                LOG_DEBUG(VHOST_CONFIG,
                        "(%"PRIu64") in destroy_device: Start put mbuf in "
                        "mempool back to ring for RX queue: %d\n",
-                       dev->device_fh, dev->vmdq_rx_q);
+                       dev->device_fh, vdev->vmdq_rx_q);
 
                mbuf_destroy_zcp(vpool);
 
                /* Stop the TX queue. */
-               if (rte_eth_dev_tx_queue_stop(ports[0], dev->vmdq_rx_q) != 0) {
+               if (rte_eth_dev_tx_queue_stop(ports[0], vdev->vmdq_rx_q) != 0) {
                        LOG_DEBUG(VHOST_CONFIG,
                                "(%"PRIu64") In destroy_device: Failed to "
                                "stop tx queue:%d\n",
-                               dev->device_fh, dev->vmdq_rx_q);
+                               dev->device_fh, vdev->vmdq_rx_q);
                }
 
-               vpool = &vpool_array[dev->vmdq_rx_q + MAX_QUEUES];
+               vpool = &vpool_array[vdev->vmdq_rx_q + MAX_QUEUES];
 
                LOG_DEBUG(VHOST_CONFIG,
                        "(%"PRIu64") destroy_device: Start put mbuf in mempool "
                        "back to ring for TX queue: %d, dev:(%"PRIu64")\n",
-                       dev->device_fh, (dev->vmdq_rx_q + MAX_QUEUES),
+                       dev->device_fh, (vdev->vmdq_rx_q + MAX_QUEUES),
                        dev->device_fh);
 
                mbuf_destroy_zcp(vpool);
+               rte_free(vdev->regions_hpa);
        }
+       rte_free(vdev);
 
 }
 
+/*
+ * Calculate the region count of physical continous regions for one particular
+ * region of whose vhost virtual address is continous. The particular region
+ * start from vva_start, with size of 'size' in argument.
+ */
+static uint32_t
+check_hpa_regions(uint64_t vva_start, uint64_t size)
+{
+       uint32_t i, nregions = 0, page_size = getpagesize();
+       uint64_t cur_phys_addr = 0, next_phys_addr = 0;
+       if (vva_start % page_size) {
+               LOG_DEBUG(VHOST_CONFIG,
+                       "in check_countinous: vva start(%p) mod page_size(%d) "
+                       "has remainder\n",
+                       (void *)(uintptr_t)vva_start, page_size);
+               return 0;
+       }
+       if (size % page_size) {
+               LOG_DEBUG(VHOST_CONFIG,
+                       "in check_countinous: "
+                       "size((%"PRIu64")) mod page_size(%d) has remainder\n",
+                       size, page_size);
+               return 0;
+       }
+       for (i = 0; i < size - page_size; i = i + page_size) {
+               cur_phys_addr
+                       = rte_mem_virt2phy((void *)(uintptr_t)(vva_start + i));
+               next_phys_addr = rte_mem_virt2phy(
+                       (void *)(uintptr_t)(vva_start + i + page_size));
+               if ((cur_phys_addr + page_size) != next_phys_addr) {
+                       ++nregions;
+                       LOG_DEBUG(VHOST_CONFIG,
+                               "in check_continuous: hva addr:(%p) is not "
+                               "continuous with hva addr:(%p), diff:%d\n",
+                               (void *)(uintptr_t)(vva_start + (uint64_t)i),
+                               (void *)(uintptr_t)(vva_start + (uint64_t)i
+                               + page_size), page_size);
+                       LOG_DEBUG(VHOST_CONFIG,
+                               "in check_continuous: hpa addr:(%p) is not "
+                               "continuous with hpa addr:(%p), "
+                               "diff:(%"PRIu64")\n",
+                               (void *)(uintptr_t)cur_phys_addr,
+                               (void *)(uintptr_t)next_phys_addr,
+                               (next_phys_addr-cur_phys_addr));
+               }
+       }
+       return nregions;
+}
+
+/*
+ * Divide each region whose vhost virtual address is continous into a few
+ * sub-regions, make sure the physical address within each sub-region are
+ * continous. And fill offset(to GPA) and size etc. information of each
+ * sub-region into regions_hpa.
+ */
+static uint32_t
+fill_hpa_memory_regions(struct virtio_memory_regions_hpa *mem_region_hpa, struct virtio_memory *virtio_memory)
+{
+       uint32_t regionidx, regionidx_hpa = 0, i, k, page_size = getpagesize();
+       uint64_t cur_phys_addr = 0, next_phys_addr = 0, vva_start;
+
+       if (mem_region_hpa == NULL)
+               return 0;
+
+       for (regionidx = 0; regionidx < virtio_memory->nregions; regionidx++) {
+               vva_start = virtio_memory->regions[regionidx].guest_phys_address +
+                       virtio_memory->regions[regionidx].address_offset;
+               mem_region_hpa[regionidx_hpa].guest_phys_address
+                       = virtio_memory->regions[regionidx].guest_phys_address;
+               mem_region_hpa[regionidx_hpa].host_phys_addr_offset =
+                       rte_mem_virt2phy((void *)(uintptr_t)(vva_start)) -
+                       mem_region_hpa[regionidx_hpa].guest_phys_address;
+               LOG_DEBUG(VHOST_CONFIG,
+                       "in fill_hpa_regions: guest phys addr start[%d]:(%p)\n",
+                       regionidx_hpa,
+                       (void *)(uintptr_t)
+                       (mem_region_hpa[regionidx_hpa].guest_phys_address));
+               LOG_DEBUG(VHOST_CONFIG,
+                       "in fill_hpa_regions: host  phys addr start[%d]:(%p)\n",
+                       regionidx_hpa,
+                       (void *)(uintptr_t)
+                       (mem_region_hpa[regionidx_hpa].host_phys_addr_offset));
+               for (i = 0, k = 0;
+                       i < virtio_memory->regions[regionidx].memory_size -
+                               page_size;
+                       i += page_size) {
+                       cur_phys_addr = rte_mem_virt2phy(
+                                       (void *)(uintptr_t)(vva_start + i));
+                       next_phys_addr = rte_mem_virt2phy(
+                                       (void *)(uintptr_t)(vva_start +
+                                       i + page_size));
+                       if ((cur_phys_addr + page_size) != next_phys_addr) {
+                               mem_region_hpa[regionidx_hpa].guest_phys_address_end =
+                                       mem_region_hpa[regionidx_hpa].guest_phys_address +
+                                       k + page_size;
+                               mem_region_hpa[regionidx_hpa].memory_size
+                                       = k + page_size;
+                               LOG_DEBUG(VHOST_CONFIG, "in fill_hpa_regions: guest "
+                                       "phys addr end  [%d]:(%p)\n",
+                                       regionidx_hpa,
+                                       (void *)(uintptr_t)
+                                       (mem_region_hpa[regionidx_hpa].guest_phys_address_end));
+                               LOG_DEBUG(VHOST_CONFIG,
+                                       "in fill_hpa_regions: guest phys addr "
+                                       "size [%d]:(%p)\n",
+                                       regionidx_hpa,
+                                       (void *)(uintptr_t)
+                                       (mem_region_hpa[regionidx_hpa].memory_size));
+                               mem_region_hpa[regionidx_hpa + 1].guest_phys_address
+                                       = mem_region_hpa[regionidx_hpa].guest_phys_address_end;
+                               ++regionidx_hpa;
+                               mem_region_hpa[regionidx_hpa].host_phys_addr_offset =
+                                       next_phys_addr -
+                                       mem_region_hpa[regionidx_hpa].guest_phys_address;
+                               LOG_DEBUG(VHOST_CONFIG, "in fill_hpa_regions: guest"
+                                       " phys addr start[%d]:(%p)\n",
+                                       regionidx_hpa,
+                                       (void *)(uintptr_t)
+                                       (mem_region_hpa[regionidx_hpa].guest_phys_address));
+                               LOG_DEBUG(VHOST_CONFIG,
+                                       "in fill_hpa_regions: host  phys addr "
+                                       "start[%d]:(%p)\n",
+                                       regionidx_hpa,
+                                       (void *)(uintptr_t)
+                                       (mem_region_hpa[regionidx_hpa].host_phys_addr_offset));
+                               k = 0;
+                       } else {
+                               k += page_size;
+                       }
+               }
+               mem_region_hpa[regionidx_hpa].guest_phys_address_end
+                       = mem_region_hpa[regionidx_hpa].guest_phys_address
+                       + k + page_size;
+               mem_region_hpa[regionidx_hpa].memory_size = k + page_size;
+               LOG_DEBUG(VHOST_CONFIG, "in fill_hpa_regions: guest phys addr end  "
+                       "[%d]:(%p)\n", regionidx_hpa,
+                       (void *)(uintptr_t)
+                       (mem_region_hpa[regionidx_hpa].guest_phys_address_end));
+               LOG_DEBUG(VHOST_CONFIG, "in fill_hpa_regions: guest phys addr size "
+                       "[%d]:(%p)\n", regionidx_hpa,
+                       (void *)(uintptr_t)
+                       (mem_region_hpa[regionidx_hpa].memory_size));
+               ++regionidx_hpa;
+       }
+       return regionidx_hpa;
+}
+
 /*
  * A new device is added to a data core. First the device is added to the main linked list
  * and the allocated to a specific data core.
@@ -3296,6 +2612,52 @@ new_device (struct virtio_net *dev)
        struct virtio_net_data_ll *ll_dev;
        int lcore, core_add = 0;
        uint32_t device_num_min = num_devices;
+       struct vhost_dev *vdev;
+       uint32_t regionidx;
+
+       vdev = rte_zmalloc("vhost device", sizeof(*vdev), CACHE_LINE_SIZE);
+       if (vdev == NULL) {
+               RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Couldn't allocate memory for vhost dev\n",
+                       dev->device_fh);
+               return -1;
+       }
+       vdev->dev = dev;
+       dev->priv = vdev;
+
+       if (zero_copy) {
+               vdev->nregions_hpa = dev->mem->nregions;
+               for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
+                       vdev->nregions_hpa
+                               += check_hpa_regions(
+                                       dev->mem->regions[regionidx].guest_phys_address
+                                       + dev->mem->regions[regionidx].address_offset,
+                                       dev->mem->regions[regionidx].memory_size);
+
+               }
+
+               vdev->regions_hpa = (struct virtio_memory_regions_hpa *) rte_zmalloc("vhost hpa region",
+                       sizeof(struct virtio_memory_regions_hpa) * vdev->nregions_hpa,
+                       CACHE_LINE_SIZE);
+               if (vdev->regions_hpa == NULL) {
+                       RTE_LOG(ERR, VHOST_CONFIG, "Cannot allocate memory for hpa region\n");
+                       rte_free(vdev);
+                       return -1;
+               }
+
+
+               if (fill_hpa_memory_regions(
+                       vdev->regions_hpa, dev->mem
+                       ) != vdev->nregions_hpa) {
+
+                       RTE_LOG(ERR, VHOST_CONFIG,
+                               "hpa memory regions number mismatch: "
+                               "[%d]\n", vdev->nregions_hpa);
+                       rte_free(vdev->regions_hpa);
+                       rte_free(vdev);
+                       return -1;
+               }
+       }
+
 
        /* Add device to main ll */
        ll_dev = get_data_ll_free_entry(&ll_root_free);
@@ -3303,15 +2665,18 @@ new_device (struct virtio_net *dev)
                RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") No free entry found in linked list. Device limit "
                        "of %d devices per core has been reached\n",
                        dev->device_fh, num_devices);
+               if (vdev->regions_hpa)
+                       rte_free(vdev->regions_hpa);
+               rte_free(vdev);
                return -1;
        }
-       ll_dev->dev = dev;
+       ll_dev->vdev = vdev;
        add_data_ll_entry(&ll_root_used, ll_dev);
-       ll_dev->dev->vmdq_rx_q
-               = ll_dev->dev->device_fh * (num_queues / num_devices);
+       vdev->vmdq_rx_q
+               = dev->device_fh * (num_queues / num_devices);
 
        if (zero_copy) {
-               uint32_t index = ll_dev->dev->vmdq_rx_q;
+               uint32_t index = vdev->vmdq_rx_q;
                uint32_t count_in_ring, i;
                struct mbuf_table *tx_q;
 
@@ -3342,47 +2707,51 @@ new_device (struct virtio_net *dev)
                        dev->device_fh,
                        rte_ring_count(vpool_array[index].ring));
 
-               tx_q = &tx_queue_zcp[(uint16_t)dev->vmdq_rx_q];
-               tx_q->txq_id = dev->vmdq_rx_q;
+               tx_q = &tx_queue_zcp[(uint16_t)vdev->vmdq_rx_q];
+               tx_q->txq_id = vdev->vmdq_rx_q;
 
-               if (rte_eth_dev_tx_queue_start(ports[0], dev->vmdq_rx_q) != 0) {
-                       struct vpool *vpool = &vpool_array[dev->vmdq_rx_q];
+               if (rte_eth_dev_tx_queue_start(ports[0], vdev->vmdq_rx_q) != 0) {
+                       struct vpool *vpool = &vpool_array[vdev->vmdq_rx_q];
 
                        LOG_DEBUG(VHOST_CONFIG,
                                "(%"PRIu64") In new_device: Failed to start "
                                "tx queue:%d\n",
-                               dev->device_fh, dev->vmdq_rx_q);
+                               dev->device_fh, vdev->vmdq_rx_q);
 
                        mbuf_destroy_zcp(vpool);
+                       rte_free(vdev->regions_hpa);
+                       rte_free(vdev);
                        return -1;
                }
 
-               if (rte_eth_dev_rx_queue_start(ports[0], dev->vmdq_rx_q) != 0) {
-                       struct vpool *vpool = &vpool_array[dev->vmdq_rx_q];
+               if (rte_eth_dev_rx_queue_start(ports[0], vdev->vmdq_rx_q) != 0) {
+                       struct vpool *vpool = &vpool_array[vdev->vmdq_rx_q];
 
                        LOG_DEBUG(VHOST_CONFIG,
                                "(%"PRIu64") In new_device: Failed to start "
                                "rx queue:%d\n",
-                               dev->device_fh, dev->vmdq_rx_q);
+                               dev->device_fh, vdev->vmdq_rx_q);
 
                        /* Stop the TX queue. */
                        if (rte_eth_dev_tx_queue_stop(ports[0],
-                               dev->vmdq_rx_q) != 0) {
+                               vdev->vmdq_rx_q) != 0) {
                                LOG_DEBUG(VHOST_CONFIG,
                                        "(%"PRIu64") In new_device: Failed to "
                                        "stop tx queue:%d\n",
-                                       dev->device_fh, dev->vmdq_rx_q);
+                                       dev->device_fh, vdev->vmdq_rx_q);
                        }
 
                        mbuf_destroy_zcp(vpool);
+                       rte_free(vdev->regions_hpa);
+                       rte_free(vdev);
                        return -1;
                }
 
        }
 
        /*reset ready flag*/
-       dev->ready = DEVICE_MAC_LEARNING;
-       dev->remove = 0;
+       vdev->ready = DEVICE_MAC_LEARNING;
+       vdev->remove = 0;
 
        /* Find a suitable lcore to add the device. */
        RTE_LCORE_FOREACH_SLAVE(lcore) {
@@ -3396,11 +2765,16 @@ new_device (struct virtio_net *dev)
        ll_dev = get_data_ll_free_entry(&lcore_info[ll_dev->dev->coreid].lcore_ll->ll_root_free);
        if (ll_dev == NULL) {
                RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Failed to add device to data core\n", dev->device_fh);
-               dev->ready = DEVICE_SAFE_REMOVE;
+               vdev->ready = DEVICE_SAFE_REMOVE;
                destroy_device(dev);
+               if (vdev->regions_hpa)
+                       rte_free(vdev->regions_hpa);
+               rte_free(vdev);
                return -1;
        }
-       ll_dev->dev = dev;
+       ll_dev->vdev = vdev;
+       vdev->coreid = core_add;
+
        add_data_ll_entry(&lcore_info[ll_dev->dev->coreid].lcore_ll->ll_root_used, ll_dev);
 
        /* Initialize device stats */
@@ -3408,10 +2782,10 @@ new_device (struct virtio_net *dev)
 
        /* Disable notifications. */
        set_irq_status(dev);
-       lcore_info[ll_dev->dev->coreid].lcore_ll->device_num++;
+       lcore_info[vdev->coreid].lcore_ll->device_num++;
        dev->flags |= VIRTIO_DEV_RUNNING;
 
-       RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been added to data core %d\n", dev->device_fh, dev->coreid);
+       RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been added to data core %d\n", dev->device_fh, vdev->coreid);
 
        return 0;
 }
@@ -3450,7 +2824,7 @@ print_stats(void)
 
                dev_ll = ll_root_used;
                while (dev_ll != NULL) {
-                       device_fh = (uint32_t)dev_ll->dev->device_fh;
+                       device_fh = (uint32_t)dev_ll->vdev->dev->device_fh;
                        tx_total = dev_statistics[device_fh].tx_total;
                        tx = dev_statistics[device_fh].tx;
                        tx_dropped = tx_total - tx;
@@ -3604,9 +2978,14 @@ MAIN(int argc, char *argv[])
                char pool_name[RTE_MEMPOOL_NAMESIZE];
                char ring_name[RTE_MEMPOOL_NAMESIZE];
 
-               rx_conf_default.start_rx_per_q = (uint8_t)zero_copy;
+               /*
+                * Zero copy defers queue RX/TX start to the time when guest
+                * finishes its startup and packet buffers from that guest are
+                * available.
+                */
+               rx_conf_default.rx_deferred_start = (uint8_t)zero_copy;
                rx_conf_default.rx_drop_en = 0;
-               tx_conf_default.start_tx_per_q = (uint8_t)zero_copy;
+               tx_conf_default.tx_deferred_start = (uint8_t)zero_copy;
                nb_mbuf = num_rx_descriptor
                        + num_switching_cores * MBUF_CACHE_SIZE_ZCP
                        + num_switching_cores * MAX_PKT_BURST;
@@ -3708,14 +3087,14 @@ MAIN(int argc, char *argv[])
        }
 
        /* Register CUSE device to handle IOCTLs. */
-       ret = register_cuse_device((char*)&dev_basename, dev_index, get_virtio_net_callbacks());
+       ret = rte_vhost_driver_register((char *)&dev_basename);
        if (ret != 0)
                rte_exit(EXIT_FAILURE,"CUSE device setup failure.\n");
 
-       init_virtio_net(&virtio_net_device_ops);
+       rte_vhost_driver_callback_register(&virtio_net_device_ops);
 
        /* Start CUSE session. */
-       start_cuse_session_loop();
+       rte_vhost_driver_session_start();
        return 0;
 
 }