X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;ds=sidebyside;f=lib%2Flibrte_vhost%2Fvhost_rxtx.c;h=5d9cc91de845b18c9f3b5933d58449c73faf49c2;hb=d0cf91303d73;hp=d75ae026f0f32b94a74a55e3683267952ed540b0;hpb=fa325fa4130c42065f7d6457234113f1dc889b4e;p=dpdk.git diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c index d75ae026f0..5d9cc91de8 100644 --- a/lib/librte_vhost/vhost_rxtx.c +++ b/lib/librte_vhost/vhost_rxtx.c @@ -31,48 +31,48 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include +#include +#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include -#include "main.h" -#include "virtio-net.h" -#include "vhost-net-cdev.h" +#include "vhost-net.h" -#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ +#define MAX_PKT_BURST 32 -/* +static bool +is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb) +{ + return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM; +} + +/** * This function adds buffers to the virtio devices RX virtqueue. Buffers can * be received from the physical port or from another virtio device. A packet - * count is returned to indicate the number of packets that were succesfully - * added to the RX queue. This function works when mergeable is disabled. + * count is returned to indicate the number of packets that are succesfully + * added to the RX queue. This function works when the mbuf is scattered, but + * it doesn't support the mergeable feature. */ static inline uint32_t __attribute__((always_inline)) -virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count) +virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, + struct rte_mbuf **pkts, uint32_t count) { struct vhost_virtqueue *vq; struct vring_desc *desc; struct rte_mbuf *buff; /* The virtio_hdr is initialised to 0. */ - struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0,0,0,0,0,0},0}; + struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; uint64_t buff_addr = 0; uint64_t buff_hdr_addr = 0; - uint32_t head[MAX_PKT_BURST], packet_len = 0; + uint32_t head[MAX_PKT_BURST]; uint32_t head_idx, packet_success = 0; uint16_t avail_idx, res_cur_idx; uint16_t res_base_idx, res_end_idx; @@ -80,10 +80,23 @@ virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count) uint8_t success = 0; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh); - vq = dev->virtqueue[VIRTIO_RXQ]; + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { + RTE_LOG(ERR, VHOST_DATA, + "%s (%"PRIu64"): virtqueue idx:%d invalid.\n", + __func__, dev->device_fh, queue_id); + return 0; + } + + vq = dev->virtqueue[queue_id]; + if (unlikely(vq->enabled == 0)) + return 0; + count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count; - /* As many data cores may want access to available buffers, they need to be reserved. */ + /* + * As many data cores may want access to available buffers, + * they need to be reserved. + */ do { res_base_idx = vq->last_used_idx_res; avail_idx = *((volatile uint16_t *)&vq->avail->idx); @@ -98,23 +111,30 @@ virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count) res_end_idx = res_base_idx + count; /* vq->last_used_idx_res is atomically updated. */ - success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx, - res_end_idx); + /* TODO: Allow to disable cmpset if no concurrency in application. */ + success = rte_atomic16_cmpset(&vq->last_used_idx_res, + res_base_idx, res_end_idx); } while (unlikely(success == 0)); res_cur_idx = res_base_idx; - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_end_idx); + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", + dev->device_fh, res_cur_idx, res_end_idx); /* Prefetch available ring to retrieve indexes. */ rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]); /* Retrieve all of the head indexes first to avoid caching issues. */ for (head_idx = 0; head_idx < count; head_idx++) - head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)]; + head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & + (vq->size - 1)]; /*Prefetch descriptor index. */ rte_prefetch0(&vq->desc[head[packet_success]]); while (res_cur_idx != res_end_idx) { + uint32_t offset = 0, vb_offset = 0; + uint32_t pkt_len, len_to_cpy, data_len, total_copied = 0; + uint8_t hdr = 0, uncompleted_pkt = 0; + /* Get descriptor from available ring */ desc = &vq->desc[head[packet_success]]; @@ -123,41 +143,85 @@ virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count) /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */ buff_addr = gpa_to_vva(dev, desc->addr); /* Prefetch buffer address. */ - rte_prefetch0((void*)(uintptr_t)buff_addr); + rte_prefetch0((void *)(uintptr_t)buff_addr); /* Copy virtio_hdr to packet and increment buffer address */ buff_hdr_addr = buff_addr; - packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen; /* * If the descriptors are chained the header and data are * placed in separate buffers. */ - if (desc->flags & VRING_DESC_F_NEXT) { - desc->len = vq->vhost_hlen; + if ((desc->flags & VRING_DESC_F_NEXT) && + (desc->len == vq->vhost_hlen)) { desc = &vq->desc[desc->next]; /* Buffer address translation. */ buff_addr = gpa_to_vva(dev, desc->addr); - desc->len = rte_pktmbuf_data_len(buff); } else { - buff_addr += vq->vhost_hlen; - desc->len = packet_len; + vb_offset += vq->vhost_hlen; + hdr = 1; + } + + pkt_len = rte_pktmbuf_pkt_len(buff); + data_len = rte_pktmbuf_data_len(buff); + len_to_cpy = RTE_MIN(data_len, + hdr ? desc->len - vq->vhost_hlen : desc->len); + while (total_copied < pkt_len) { + /* Copy mbuf data to buffer */ + rte_memcpy((void *)(uintptr_t)(buff_addr + vb_offset), + rte_pktmbuf_mtod_offset(buff, const void *, offset), + len_to_cpy); + PRINT_PACKET(dev, (uintptr_t)(buff_addr + vb_offset), + len_to_cpy, 0); + + offset += len_to_cpy; + vb_offset += len_to_cpy; + total_copied += len_to_cpy; + + /* The whole packet completes */ + if (total_copied == pkt_len) + break; + + /* The current segment completes */ + if (offset == data_len) { + buff = buff->next; + offset = 0; + data_len = rte_pktmbuf_data_len(buff); + } + + /* The current vring descriptor done */ + if (vb_offset == desc->len) { + if (desc->flags & VRING_DESC_F_NEXT) { + desc = &vq->desc[desc->next]; + buff_addr = gpa_to_vva(dev, desc->addr); + vb_offset = 0; + } else { + /* Room in vring buffer is not enough */ + uncompleted_pkt = 1; + break; + } + } + len_to_cpy = RTE_MIN(data_len - offset, desc->len - vb_offset); } /* Update used ring with desc information */ - vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success]; - vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len; + vq->used->ring[res_cur_idx & (vq->size - 1)].id = + head[packet_success]; - /* Copy mbuf data to buffer */ - rte_memcpy((void *)(uintptr_t)buff_addr, - rte_pktmbuf_mtod(buff, const void *), - rte_pktmbuf_data_len(buff)); - PRINT_PACKET(dev, (uintptr_t)buff_addr, - rte_pktmbuf_data_len(buff), 0); + /* Drop the packet if it is uncompleted */ + if (unlikely(uncompleted_pkt == 1)) + vq->used->ring[res_cur_idx & (vq->size - 1)].len = + vq->vhost_hlen; + else + vq->used->ring[res_cur_idx & (vq->size - 1)].len = + pkt_len + vq->vhost_hlen; res_cur_idx++; packet_success++; + if (unlikely(uncompleted_pkt == 1)) + continue; + rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen); @@ -178,16 +242,19 @@ virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count) *(volatile uint16_t *)&vq->used->idx += count; vq->last_used_idx = res_end_idx; + /* flush used->idx update before we read avail->flags. */ + rte_mb(); + /* Kick the guest if necessary. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) - eventfd_write((int)vq->kickfd, 1); + eventfd_write(vq->callfd, (eventfd_t)1); return count; } static inline uint32_t __attribute__((always_inline)) -copy_from_mbuf_to_vring(struct virtio_net *dev, - uint16_t res_base_idx, uint16_t res_end_idx, - struct rte_mbuf *pkt) +copy_from_mbuf_to_vring(struct virtio_net *dev, uint32_t queue_id, + uint16_t res_base_idx, uint16_t res_end_idx, + struct rte_mbuf *pkt) { uint32_t vec_idx = 0; uint32_t entry_success = 0; @@ -215,9 +282,9 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, * Convert from gpa to vva * (guest physical addr -> vhost virtual addr) */ - vq = dev->virtqueue[VIRTIO_RXQ]; - vb_addr = - gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr); + vq = dev->virtqueue[queue_id]; + + vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr); vb_hdr_addr = vb_addr; /* Prefetch buffer address. */ @@ -235,15 +302,13 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, seg_avail = rte_pktmbuf_data_len(pkt); vb_offset = vq->vhost_hlen; - vb_avail = - vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen; + vb_avail = vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen; entry_len = vq->vhost_hlen; if (vb_avail == 0) { uint32_t desc_idx = vq->buf_vec[vec_idx].desc_idx; - vq->desc[desc_idx].len = vq->vhost_hlen; if ((vq->desc[desc_idx].flags & VRING_DESC_F_NEXT) == 0) { @@ -259,8 +324,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, } vec_idx++; - vb_addr = - gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr); + vb_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr); /* Prefetch buffer address. */ rte_prefetch0((void *)(uintptr_t)vb_addr); @@ -273,7 +337,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, while (cpy_len > 0) { /* Copy mbuf data to vring buffer */ rte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset), - (const void *)(rte_pktmbuf_mtod(pkt, char*) + seg_offset), + rte_pktmbuf_mtod_offset(pkt, const void *, seg_offset), cpy_len); PRINT_PACKET(dev, @@ -328,7 +392,6 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, */ uint32_t desc_idx = vq->buf_vec[vec_idx].desc_idx; - vq->desc[desc_idx].len = vb_offset; if ((vq->desc[desc_idx].flags & VRING_DESC_F_NEXT) == 0) { @@ -363,26 +426,13 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, /* * This whole packet completes. */ - uint32_t desc_idx = - vq->buf_vec[vec_idx].desc_idx; - vq->desc[desc_idx].len = vb_offset; - - while (vq->desc[desc_idx].flags & - VRING_DESC_F_NEXT) { - desc_idx = vq->desc[desc_idx].next; - vq->desc[desc_idx].len = 0; - } - /* Update used ring with desc information */ vq->used->ring[cur_idx & (vq->size - 1)].id = vq->buf_vec[vec_idx].desc_idx; vq->used->ring[cur_idx & (vq->size - 1)].len = entry_len; - entry_len = 0; - cur_idx++; entry_success++; - seg_avail = 0; - cpy_len = RTE_MIN(vb_avail, seg_avail); + break; } } } @@ -390,73 +440,87 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, return entry_success; } +static inline void __attribute__((always_inline)) +update_secure_len(struct vhost_virtqueue *vq, uint32_t id, + uint32_t *secure_len, uint32_t *vec_idx) +{ + uint16_t wrapped_idx = id & (vq->size - 1); + uint32_t idx = vq->avail->ring[wrapped_idx]; + uint8_t next_desc; + uint32_t len = *secure_len; + uint32_t vec_id = *vec_idx; + + do { + next_desc = 0; + len += vq->desc[idx].len; + vq->buf_vec[vec_id].buf_addr = vq->desc[idx].addr; + vq->buf_vec[vec_id].buf_len = vq->desc[idx].len; + vq->buf_vec[vec_id].desc_idx = idx; + vec_id++; + + if (vq->desc[idx].flags & VRING_DESC_F_NEXT) { + idx = vq->desc[idx].next; + next_desc = 1; + } + } while (next_desc); + + *secure_len = len; + *vec_idx = vec_id; +} + /* - * This function adds buffers to the virtio devices RX virtqueue. Buffers can - * be received from the physical port or from another virtio device. A packet - * count is returned to indicate the number of packets that were succesfully - * added to the RX queue. This function works for mergeable RX. + * This function works for mergeable RX. */ static inline uint32_t __attribute__((always_inline)) -virtio_dev_merge_rx(struct virtio_net *dev, struct rte_mbuf **pkts, - uint32_t count) +virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, + struct rte_mbuf **pkts, uint32_t count) { struct vhost_virtqueue *vq; uint32_t pkt_idx = 0, entry_success = 0; - uint16_t avail_idx, res_cur_idx; - uint16_t res_base_idx, res_end_idx; + uint16_t avail_idx; + uint16_t res_base_idx, res_cur_idx; uint8_t success = 0; LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n", dev->device_fh); - vq = dev->virtqueue[VIRTIO_RXQ]; + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { + RTE_LOG(ERR, VHOST_DATA, + "%s (%"PRIu64"): virtqueue idx:%d invalid.\n", + __func__, dev->device_fh, queue_id); + return 0; + } + + vq = dev->virtqueue[queue_id]; + if (unlikely(vq->enabled == 0)) + return 0; + count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); if (count == 0) return 0; for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { - uint32_t secure_len = 0; - uint16_t need_cnt; - uint32_t vec_idx = 0; uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen; - uint16_t i, id; do { /* * As many data cores may want access to available * buffers, they need to be reserved. */ + uint32_t secure_len = 0; + uint32_t vec_idx = 0; + res_base_idx = vq->last_used_idx_res; res_cur_idx = res_base_idx; do { avail_idx = *((volatile uint16_t *)&vq->avail->idx); - if (unlikely(res_cur_idx == avail_idx)) { - LOG_DEBUG(VHOST_DATA, - "(%"PRIu64") Failed " - "to get enough desc from " - "vring\n", - dev->device_fh); - return pkt_idx; - } else { - uint16_t wrapped_idx = - (res_cur_idx) & (vq->size - 1); - uint32_t idx = - vq->avail->ring[wrapped_idx]; - uint8_t next_desc; - - do { - next_desc = 0; - secure_len += vq->desc[idx].len; - if (vq->desc[idx].flags & - VRING_DESC_F_NEXT) { - idx = vq->desc[idx].next; - next_desc = 1; - } - } while (next_desc); + if (unlikely(res_cur_idx == avail_idx)) + goto merge_rx_exit; - res_cur_idx++; - } + update_secure_len(vq, res_cur_idx, + &secure_len, &vec_idx); + res_cur_idx++; } while (pkt_len > secure_len); /* vq->last_used_idx_res is atomically updated. */ @@ -465,33 +529,8 @@ virtio_dev_merge_rx(struct virtio_net *dev, struct rte_mbuf **pkts, res_cur_idx); } while (success == 0); - id = res_base_idx; - need_cnt = res_cur_idx - res_base_idx; - - for (i = 0; i < need_cnt; i++, id++) { - uint16_t wrapped_idx = id & (vq->size - 1); - uint32_t idx = vq->avail->ring[wrapped_idx]; - uint8_t next_desc; - do { - next_desc = 0; - vq->buf_vec[vec_idx].buf_addr = - vq->desc[idx].addr; - vq->buf_vec[vec_idx].buf_len = - vq->desc[idx].len; - vq->buf_vec[vec_idx].desc_idx = idx; - vec_idx++; - - if (vq->desc[idx].flags & VRING_DESC_F_NEXT) { - idx = vq->desc[idx].next; - next_desc = 1; - } - } while (next_desc); - } - - res_end_idx = res_cur_idx; - - entry_success = copy_from_mbuf_to_vring(dev, res_base_idx, - res_end_idx, pkts[pkt_idx]); + entry_success = copy_from_mbuf_to_vring(dev, queue_id, + res_base_idx, res_cur_idx, pkts[pkt_idx]); rte_compiler_barrier(); @@ -503,38 +542,157 @@ virtio_dev_merge_rx(struct virtio_net *dev, struct rte_mbuf **pkts, rte_pause(); *(volatile uint16_t *)&vq->used->idx += entry_success; - vq->last_used_idx = res_end_idx; + vq->last_used_idx = res_cur_idx; + } + +merge_rx_exit: + if (likely(pkt_idx)) { + /* flush used->idx update before we read avail->flags. */ + rte_mb(); /* Kick the guest if necessary. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) - eventfd_write((int)vq->kickfd, 1); + eventfd_write(vq->callfd, (eventfd_t)1); } - return count; + return pkt_idx; +} + +uint16_t +rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, + struct rte_mbuf **pkts, uint16_t count) +{ + if (unlikely(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))) + return virtio_dev_merge_rx(dev, queue_id, pkts, count); + else + return virtio_dev_rx(dev, queue_id, pkts, count); +} + +static void +parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr) +{ + struct ipv4_hdr *ipv4_hdr; + struct ipv6_hdr *ipv6_hdr; + void *l3_hdr = NULL; + struct ether_hdr *eth_hdr; + uint16_t ethertype; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + m->l2_len = sizeof(struct ether_hdr); + ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); + + if (ethertype == ETHER_TYPE_VLAN) { + struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1); + + m->l2_len += sizeof(struct vlan_hdr); + ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); + } + + l3_hdr = (char *)eth_hdr + m->l2_len; + + switch (ethertype) { + case ETHER_TYPE_IPv4: + ipv4_hdr = (struct ipv4_hdr *)l3_hdr; + *l4_proto = ipv4_hdr->next_proto_id; + m->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4; + *l4_hdr = (char *)l3_hdr + m->l3_len; + m->ol_flags |= PKT_TX_IPV4; + break; + case ETHER_TYPE_IPv6: + ipv6_hdr = (struct ipv6_hdr *)l3_hdr; + *l4_proto = ipv6_hdr->proto; + m->l3_len = sizeof(struct ipv6_hdr); + *l4_hdr = (char *)l3_hdr + m->l3_len; + m->ol_flags |= PKT_TX_IPV6; + break; + default: + m->l3_len = 0; + *l4_proto = 0; + break; + } } -/* This function works for TX packets with mergeable feature enabled. */ -static inline uint16_t __attribute__((always_inline)) -virtio_dev_merge_tx(struct virtio_net *dev, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) +static inline void __attribute__((always_inline)) +vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m) +{ + uint16_t l4_proto = 0; + void *l4_hdr = NULL; + struct tcp_hdr *tcp_hdr = NULL; + + parse_ethernet(m, &l4_proto, &l4_hdr); + if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { + if (hdr->csum_start == (m->l2_len + m->l3_len)) { + switch (hdr->csum_offset) { + case (offsetof(struct tcp_hdr, cksum)): + if (l4_proto == IPPROTO_TCP) + m->ol_flags |= PKT_TX_TCP_CKSUM; + break; + case (offsetof(struct udp_hdr, dgram_cksum)): + if (l4_proto == IPPROTO_UDP) + m->ol_flags |= PKT_TX_UDP_CKSUM; + break; + case (offsetof(struct sctp_hdr, cksum)): + if (l4_proto == IPPROTO_SCTP) + m->ol_flags |= PKT_TX_SCTP_CKSUM; + break; + default: + break; + } + } + } + + if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + case VIRTIO_NET_HDR_GSO_TCPV6: + tcp_hdr = (struct tcp_hdr *)l4_hdr; + m->ol_flags |= PKT_TX_TCP_SEG; + m->tso_segsz = hdr->gso_size; + m->l4_len = (tcp_hdr->data_off & 0xf0) >> 2; + break; + default: + RTE_LOG(WARNING, VHOST_DATA, + "unsupported gso type %u.\n", hdr->gso_type); + break; + } + } +} + +uint16_t +rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) { struct rte_mbuf *m, *prev; struct vhost_virtqueue *vq; struct vring_desc *desc; uint64_t vb_addr = 0; + uint64_t vb_net_hdr_addr = 0; uint32_t head[MAX_PKT_BURST]; uint32_t used_idx; uint32_t i; uint16_t free_entries, entry_success = 0; uint16_t avail_idx; + struct virtio_net_hdr *hdr = NULL; + + if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) { + RTE_LOG(ERR, VHOST_DATA, + "%s (%"PRIu64"): virtqueue idx:%d invalid.\n", + __func__, dev->device_fh, queue_id); + return 0; + } + + vq = dev->virtqueue[queue_id]; + if (unlikely(vq->enabled == 0)) + return 0; - vq = dev->virtqueue[VIRTIO_TXQ]; avail_idx = *((volatile uint16_t *)&vq->avail->idx); /* If there are no available buffers then return. */ if (vq->last_used_idx == avail_idx) return 0; - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_tx()\n", + LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__, dev->device_fh); /* Prefetch available ring to retrieve head indexes. */ @@ -548,7 +706,7 @@ virtio_dev_merge_tx(struct virtio_net *dev, struct rte_mempool *mbuf_pool, struc free_entries = RTE_MIN(free_entries, MAX_PKT_BURST); LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", - dev->device_fh, free_entries); + dev->device_fh, free_entries); /* Retrieve all of the head indexes first to avoid caching issues. */ for (i = 0; i < free_entries; i++) head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)]; @@ -567,8 +725,18 @@ virtio_dev_merge_tx(struct virtio_net *dev, struct rte_mempool *mbuf_pool, struc desc = &vq->desc[head[entry_success]]; + vb_net_hdr_addr = gpa_to_vva(dev, desc->addr); + hdr = (struct virtio_net_hdr *)((uintptr_t)vb_net_hdr_addr); + /* Discard first buffer as it is the virtio header */ - desc = &vq->desc[desc->next]; + if (desc->flags & VRING_DESC_F_NEXT) { + desc = &vq->desc[desc->next]; + vb_offset = 0; + vb_avail = desc->len; + } else { + vb_offset = vq->vhost_hlen; + vb_avail = desc->len - vb_offset; + } /* Buffer address translation. */ vb_addr = gpa_to_vva(dev, desc->addr); @@ -587,14 +755,12 @@ virtio_dev_merge_tx(struct virtio_net *dev, struct rte_mempool *mbuf_pool, struc vq->used->ring[used_idx].id = head[entry_success]; vq->used->ring[used_idx].len = 0; - vb_offset = 0; - vb_avail = desc->len; /* Allocate an mbuf and populate the structure. */ m = rte_pktmbuf_alloc(mbuf_pool); if (unlikely(m == NULL)) { RTE_LOG(ERR, VHOST_DATA, "Failed to allocate memory for mbuf.\n"); - return entry_success; + break; } seg_offset = 0; seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; @@ -606,7 +772,7 @@ virtio_dev_merge_tx(struct virtio_net *dev, struct rte_mempool *mbuf_pool, struc cur = m; prev = m; while (cpy_len != 0) { - rte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + seg_offset), + rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, seg_offset), (void *)((uintptr_t)(vb_addr + vb_offset)), cpy_len); @@ -700,6 +866,8 @@ virtio_dev_merge_tx(struct virtio_net *dev, struct rte_mempool *mbuf_pool, struc break; m->nb_segs = seg_num; + if ((hdr->flags != 0) || (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE)) + vhost_dequeue_offload(hdr, m); pkts[entry_success] = m; vq->last_used_idx++; @@ -710,7 +878,6 @@ virtio_dev_merge_tx(struct virtio_net *dev, struct rte_mempool *mbuf_pool, struc vq->used->idx += entry_success; /* Kick guest if required. */ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) - eventfd_write((int)vq->kickfd, 1); + eventfd_write(vq->callfd, (eventfd_t)1); return entry_success; - }