#include <rte_memcpy.h>
#include <rte_ether.h>
#include <rte_ip.h>
-#include <rte_virtio_net.h>
+#include <rte_vhost.h>
#include <rte_tcp.h>
#include <rte_udp.h>
#include <rte_sctp.h>
#include <rte_arp.h>
+#include "iotlb.h"
#include "vhost.h"
#define MAX_PKT_BURST 32
-#define VHOST_LOG_PAGE 4096
-static inline void __attribute__((always_inline))
-vhost_log_page(uint8_t *log_base, uint64_t page)
-{
- log_base[page / 8] |= 1 << (page % 8);
-}
-
-static inline void __attribute__((always_inline))
-vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
-{
- uint64_t page;
-
- if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
- !dev->log_base || !len))
- return;
-
- if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
- return;
-
- /* To make sure guest memory updates are committed before logging */
- rte_smp_wmb();
-
- page = addr / VHOST_LOG_PAGE;
- while (page * VHOST_LOG_PAGE < addr + len) {
- vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
- page += 1;
- }
-}
-
-static inline void __attribute__((always_inline))
-vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
- uint64_t offset, uint64_t len)
-{
- vhost_log_write(dev, vq->log_guest_addr + offset, len);
-}
+#define MAX_BATCH_LEN 256
static bool
-is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb)
+is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
{
- return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
+ return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint16_t to, uint16_t from, uint16_t size)
{
size * sizeof(struct vring_used_elem));
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq)
{
uint16_t used_idx = vq->last_used_idx & (vq->size - 1);
sizeof(vq->used->idx));
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
update_shadow_used_ring(struct vhost_virtqueue *vq,
uint16_t desc_idx, uint16_t len)
{
vq->shadow_used_ring[i].len = len;
}
+static inline void
+do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+ struct batch_copy_elem *elem = vq->batch_copy_elems;
+ uint16_t count = vq->batch_copy_nb_elems;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
+ vhost_log_write(dev, elem[i].log_addr, elem[i].len);
+ PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0);
+ }
+}
+
+static inline void
+do_data_copy_dequeue(struct vhost_virtqueue *vq)
+{
+ struct batch_copy_elem *elem = vq->batch_copy_elems;
+ uint16_t count = vq->batch_copy_nb_elems;
+ int i;
+
+ for (i = 0; i < count; i++)
+ rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
+}
+
+/* avoid write operation when necessary, to lessen cache issues */
+#define ASSIGN_UNLESS_EQUAL(var, val) do { \
+ if ((var) != (val)) \
+ (var) = (val); \
+} while (0)
+
static void
virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
{
- if (m_buf->ol_flags & PKT_TX_L4_MASK) {
+ uint64_t csum_l4 = m_buf->ol_flags & PKT_TX_L4_MASK;
+
+ if (m_buf->ol_flags & PKT_TX_TCP_SEG)
+ csum_l4 |= PKT_TX_TCP_CKSUM;
+
+ if (csum_l4) {
net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len;
- switch (m_buf->ol_flags & PKT_TX_L4_MASK) {
+ switch (csum_l4) {
case PKT_TX_TCP_CKSUM:
net_hdr->csum_offset = (offsetof(struct tcp_hdr,
cksum));
cksum));
break;
}
+ } else {
+ ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0);
+ ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0);
+ ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0);
+ }
+
+ /* IP cksum verification cannot be bypassed, then calculate here */
+ if (m_buf->ol_flags & PKT_TX_IP_CKSUM) {
+ struct ipv4_hdr *ipv4_hdr;
+
+ ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct ipv4_hdr *,
+ m_buf->l2_len);
+ ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
}
if (m_buf->ol_flags & PKT_TX_TCP_SEG) {
net_hdr->gso_size = m_buf->tso_segsz;
net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len
+ m_buf->l4_len;
+ } else {
+ ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0);
+ ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0);
+ ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0);
}
}
-static inline void
-copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr,
- struct virtio_net_hdr_mrg_rxbuf hdr)
-{
- if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
- *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr;
- else
- *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr;
-}
-
-static inline int __attribute__((always_inline))
-copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
- struct rte_mbuf *m, uint16_t desc_idx, uint32_t size)
+static __rte_always_inline int
+copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ struct vring_desc *descs, struct rte_mbuf *m,
+ uint16_t desc_idx, uint32_t size)
{
uint32_t desc_avail, desc_offset;
uint32_t mbuf_avail, mbuf_offset;
uint32_t cpy_len;
struct vring_desc *desc;
uint64_t desc_addr;
- struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
/* A counter to avoid desc dead loop chain */
uint16_t nr_desc = 1;
+ struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
+ uint16_t copy_nb = vq->batch_copy_nb_elems;
+ int error = 0;
desc = &descs[desc_idx];
- desc_addr = gpa_to_vva(dev, desc->addr);
+ desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+ desc->len, VHOST_ACCESS_RW);
/*
* Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
* performance issue with some versions of gcc (4.8.4 and 5.3.0) which
* otherwise stores offset on the stack instead of in a register.
*/
- if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr)
- return -1;
+ if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) {
+ error = -1;
+ goto out;
+ }
rte_prefetch0((void *)(uintptr_t)desc_addr);
- virtio_enqueue_offload(m, &virtio_hdr.hdr);
- copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
+ virtio_enqueue_offload(m, (struct virtio_net_hdr *)(uintptr_t)desc_addr);
vhost_log_write(dev, desc->addr, dev->vhost_hlen);
PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
if (desc_avail == 0) {
if ((desc->flags & VRING_DESC_F_NEXT) == 0) {
/* Room in vring buffer is not enough */
- return -1;
+ error = -1;
+ goto out;
+ }
+ if (unlikely(desc->next >= size || ++nr_desc > size)) {
+ error = -1;
+ goto out;
}
- if (unlikely(desc->next >= size || ++nr_desc > size))
- return -1;
desc = &descs[desc->next];
- desc_addr = gpa_to_vva(dev, desc->addr);
- if (unlikely(!desc_addr))
- return -1;
+ desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+ desc->len,
+ VHOST_ACCESS_RW);
+ if (unlikely(!desc_addr)) {
+ error = -1;
+ goto out;
+ }
desc_offset = 0;
desc_avail = desc->len;
}
cpy_len = RTE_MIN(desc_avail, mbuf_avail);
- rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
- rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
- cpy_len);
- vhost_log_write(dev, desc->addr + desc_offset, cpy_len);
- PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
- cpy_len, 0);
+ if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) {
+ rte_memcpy((void *)((uintptr_t)(desc_addr +
+ desc_offset)),
+ rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
+ cpy_len);
+ vhost_log_write(dev, desc->addr + desc_offset, cpy_len);
+ PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
+ cpy_len, 0);
+ } else {
+ batch_copy[copy_nb].dst =
+ (void *)((uintptr_t)(desc_addr + desc_offset));
+ batch_copy[copy_nb].src =
+ rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
+ batch_copy[copy_nb].log_addr = desc->addr + desc_offset;
+ batch_copy[copy_nb].len = cpy_len;
+ copy_nb++;
+ }
mbuf_avail -= cpy_len;
mbuf_offset += cpy_len;
desc_offset += cpy_len;
}
- return 0;
+out:
+ vq->batch_copy_nb_elems = copy_nb;
+
+ return error;
}
/**
* This function adds buffers to the virtio devices RX virtqueue. Buffers can
* be received from the physical port or from another virtio device. A packet
- * count is returned to indicate the number of packets that are succesfully
+ * count is returned to indicate the number of packets that are successfully
* added to the RX queue. This function works when the mbuf is scattered, but
* it doesn't support the mergeable feature.
*/
-static inline uint32_t __attribute__((always_inline))
+static __rte_always_inline uint32_t
virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
struct rte_mbuf **pkts, uint32_t count)
{
uint32_t i, sz;
LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
- if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
+ if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
dev->vid, __func__, queue_id);
return 0;
if (unlikely(vq->enabled == 0))
return 0;
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ vhost_user_iotlb_rd_lock(vq);
+
+ if (unlikely(vq->access_ok == 0)) {
+ if (unlikely(vring_translate(dev, vq) < 0)) {
+ count = 0;
+ goto out;
+ }
+ }
+
avail_idx = *((volatile uint16_t *)&vq->avail->idx);
start_idx = vq->last_used_idx;
free_entries = avail_idx - start_idx;
count = RTE_MIN(count, free_entries);
count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
if (count == 0)
- return 0;
+ goto out;
LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n",
dev->vid, start_idx, start_idx + count);
+ vq->batch_copy_nb_elems = 0;
+
/* Retrieve all of the desc indexes first to avoid caching issues. */
rte_prefetch0(&vq->avail->ring[start_idx & (vq->size - 1)]);
for (i = 0; i < count; i++) {
int err;
if (vq->desc[desc_idx].flags & VRING_DESC_F_INDIRECT) {
- descs = (struct vring_desc *)(uintptr_t)gpa_to_vva(dev,
- vq->desc[desc_idx].addr);
+ descs = (struct vring_desc *)(uintptr_t)
+ vhost_iova_to_vva(dev,
+ vq, vq->desc[desc_idx].addr,
+ vq->desc[desc_idx].len,
+ VHOST_ACCESS_RO);
if (unlikely(!descs)) {
count = i;
break;
sz = vq->size;
}
- err = copy_mbuf_to_desc(dev, descs, pkts[i], desc_idx, sz);
+ err = copy_mbuf_to_desc(dev, vq, descs, pkts[i], desc_idx, sz);
if (unlikely(err)) {
- used_idx = (start_idx + i) & (vq->size - 1);
- vq->used->ring[used_idx].len = dev->vhost_hlen;
- vhost_log_used_vring(dev, vq,
- offsetof(struct vring_used, ring[used_idx]),
- sizeof(vq->used->ring[used_idx]));
+ count = i;
+ break;
}
if (i + 1 < count)
rte_prefetch0(&vq->desc[desc_indexes[i+1]]);
}
+ do_data_copy_enqueue(dev, vq);
+
rte_smp_wmb();
*(volatile uint16_t *)&vq->used->idx += count;
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
&& (vq->callfd >= 0))
eventfd_write(vq->callfd, (eventfd_t)1);
+out:
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ vhost_user_iotlb_rd_unlock(vq);
+
return count;
}
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint32_t avail_idx, uint32_t *vec_idx,
struct buf_vector *buf_vec, uint16_t *desc_chain_head,
if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
descs = (struct vring_desc *)(uintptr_t)
- gpa_to_vva(dev, vq->desc[idx].addr);
+ vhost_iova_to_vva(dev, vq, vq->desc[idx].addr,
+ vq->desc[idx].len,
+ VHOST_ACCESS_RO);
if (unlikely(!descs))
return -1;
return 0;
}
-static inline int __attribute__((always_inline))
-copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
- struct buf_vector *buf_vec, uint16_t num_buffers)
+static __rte_always_inline int
+copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ struct rte_mbuf *m, struct buf_vector *buf_vec,
+ uint16_t num_buffers)
{
- struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
uint32_t vec_idx = 0;
uint64_t desc_addr;
uint32_t mbuf_offset, mbuf_avail;
uint32_t cpy_len;
uint64_t hdr_addr, hdr_phys_addr;
struct rte_mbuf *hdr_mbuf;
+ struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
+ uint16_t copy_nb = vq->batch_copy_nb_elems;
+ int error = 0;
- if (unlikely(m == NULL))
- return -1;
+ if (unlikely(m == NULL)) {
+ error = -1;
+ goto out;
+ }
- desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
- if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr)
- return -1;
+ desc_addr = vhost_iova_to_vva(dev, vq, buf_vec[vec_idx].buf_addr,
+ buf_vec[vec_idx].buf_len,
+ VHOST_ACCESS_RW);
+ if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) {
+ error = -1;
+ goto out;
+ }
hdr_mbuf = m;
hdr_addr = desc_addr;
hdr_phys_addr = buf_vec[vec_idx].buf_addr;
rte_prefetch0((void *)(uintptr_t)hdr_addr);
- virtio_hdr.num_buffers = num_buffers;
LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
dev->vid, num_buffers);
/* done with current desc buf, get the next one */
if (desc_avail == 0) {
vec_idx++;
- desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
- if (unlikely(!desc_addr))
- return -1;
+ desc_addr =
+ vhost_iova_to_vva(dev, vq,
+ buf_vec[vec_idx].buf_addr,
+ buf_vec[vec_idx].buf_len,
+ VHOST_ACCESS_RW);
+ if (unlikely(!desc_addr)) {
+ error = -1;
+ goto out;
+ }
/* Prefetch buffer address. */
rte_prefetch0((void *)(uintptr_t)desc_addr);
}
if (hdr_addr) {
- virtio_enqueue_offload(hdr_mbuf, &virtio_hdr.hdr);
- copy_virtio_net_hdr(dev, hdr_addr, virtio_hdr);
+ struct virtio_net_hdr_mrg_rxbuf *hdr;
+
+ hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)
+ hdr_addr;
+ virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
+ ASSIGN_UNLESS_EQUAL(hdr->num_buffers, num_buffers);
+
vhost_log_write(dev, hdr_phys_addr, dev->vhost_hlen);
PRINT_PACKET(dev, (uintptr_t)hdr_addr,
dev->vhost_hlen, 0);
}
cpy_len = RTE_MIN(desc_avail, mbuf_avail);
- rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
- rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
- cpy_len);
- vhost_log_write(dev, buf_vec[vec_idx].buf_addr + desc_offset,
- cpy_len);
- PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
- cpy_len, 0);
+
+ if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) {
+ rte_memcpy((void *)((uintptr_t)(desc_addr +
+ desc_offset)),
+ rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
+ cpy_len);
+ vhost_log_write(dev,
+ buf_vec[vec_idx].buf_addr + desc_offset,
+ cpy_len);
+ PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
+ cpy_len, 0);
+ } else {
+ batch_copy[copy_nb].dst =
+ (void *)((uintptr_t)(desc_addr + desc_offset));
+ batch_copy[copy_nb].src =
+ rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
+ batch_copy[copy_nb].log_addr =
+ buf_vec[vec_idx].buf_addr + desc_offset;
+ batch_copy[copy_nb].len = cpy_len;
+ copy_nb++;
+ }
mbuf_avail -= cpy_len;
mbuf_offset += cpy_len;
desc_offset += cpy_len;
}
- return 0;
+out:
+ vq->batch_copy_nb_elems = copy_nb;
+
+ return error;
}
-static inline uint32_t __attribute__((always_inline))
+static __rte_always_inline uint32_t
virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
struct rte_mbuf **pkts, uint32_t count)
{
uint16_t avail_head;
LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
- if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
+ if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
dev->vid, __func__, queue_id);
return 0;
if (unlikely(vq->enabled == 0))
return 0;
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ vhost_user_iotlb_rd_lock(vq);
+
+ if (unlikely(vq->access_ok == 0))
+ if (unlikely(vring_translate(dev, vq) < 0))
+ goto out;
+
count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
if (count == 0)
- return 0;
+ goto out;
+
+ vq->batch_copy_nb_elems = 0;
rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
dev->vid, vq->last_avail_idx,
vq->last_avail_idx + num_buffers);
- if (copy_mbuf_to_desc_mergeable(dev, pkts[pkt_idx],
+ if (copy_mbuf_to_desc_mergeable(dev, vq, pkts[pkt_idx],
buf_vec, num_buffers) < 0) {
vq->shadow_used_idx -= num_buffers;
break;
vq->last_avail_idx += num_buffers;
}
+ do_data_copy_enqueue(dev, vq);
+
if (likely(vq->shadow_used_idx)) {
flush_shadow_used_ring(dev, vq);
eventfd_write(vq->callfd, (eventfd_t)1);
}
+out:
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ vhost_user_iotlb_rd_unlock(vq);
+
return pkt_idx;
}
virtio_net_with_host_offload(struct virtio_net *dev)
{
if (dev->features &
- (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_ECN |
- VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 |
- VIRTIO_NET_F_HOST_UFO))
+ ((1ULL << VIRTIO_NET_F_CSUM) |
+ (1ULL << VIRTIO_NET_F_HOST_ECN) |
+ (1ULL << VIRTIO_NET_F_HOST_TSO4) |
+ (1ULL << VIRTIO_NET_F_HOST_TSO6) |
+ (1ULL << VIRTIO_NET_F_HOST_UFO)))
return true;
return false;
switch (ethertype) {
case ETHER_TYPE_IPv4:
- ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
+ ipv4_hdr = l3_hdr;
*l4_proto = ipv4_hdr->next_proto_id;
m->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4;
*l4_hdr = (char *)l3_hdr + m->l3_len;
m->ol_flags |= PKT_TX_IPV4;
break;
case ETHER_TYPE_IPv6:
- ipv6_hdr = (struct ipv6_hdr *)l3_hdr;
+ ipv6_hdr = l3_hdr;
*l4_proto = ipv6_hdr->proto;
m->l3_len = sizeof(struct ipv6_hdr);
*l4_hdr = (char *)l3_hdr + m->l3_len;
}
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
{
uint16_t l4_proto = 0;
switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
case VIRTIO_NET_HDR_GSO_TCPV4:
case VIRTIO_NET_HDR_GSO_TCPV6:
- tcp_hdr = (struct tcp_hdr *)l4_hdr;
+ tcp_hdr = l4_hdr;
m->ol_flags |= PKT_TX_TCP_SEG;
m->tso_segsz = hdr->gso_size;
m->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
return 0;
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
put_zmbuf(struct zcopy_mbuf *zmbuf)
{
zmbuf->in_use = 0;
}
-static inline int __attribute__((always_inline))
-copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
- uint16_t max_desc, struct rte_mbuf *m, uint16_t desc_idx,
+static __rte_always_inline int
+copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ struct vring_desc *descs, uint16_t max_desc,
+ struct rte_mbuf *m, uint16_t desc_idx,
struct rte_mempool *mbuf_pool)
{
struct vring_desc *desc;
struct virtio_net_hdr *hdr = NULL;
/* A counter to avoid desc dead loop chain */
uint32_t nr_desc = 1;
+ struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
+ uint16_t copy_nb = vq->batch_copy_nb_elems;
+ int error = 0;
desc = &descs[desc_idx];
if (unlikely((desc->len < dev->vhost_hlen)) ||
- (desc->flags & VRING_DESC_F_INDIRECT))
- return -1;
+ (desc->flags & VRING_DESC_F_INDIRECT)) {
+ error = -1;
+ goto out;
+ }
- desc_addr = gpa_to_vva(dev, desc->addr);
- if (unlikely(!desc_addr))
- return -1;
+ desc_addr = vhost_iova_to_vva(dev,
+ vq, desc->addr,
+ desc->len,
+ VHOST_ACCESS_RO);
+ if (unlikely(!desc_addr)) {
+ error = -1;
+ goto out;
+ }
if (virtio_net_with_host_offload(dev)) {
hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
if (likely((desc->len == dev->vhost_hlen) &&
(desc->flags & VRING_DESC_F_NEXT) != 0)) {
desc = &descs[desc->next];
- if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
- return -1;
+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
+ error = -1;
+ goto out;
+ }
- desc_addr = gpa_to_vva(dev, desc->addr);
- if (unlikely(!desc_addr))
- return -1;
+ desc_addr = vhost_iova_to_vva(dev,
+ vq, desc->addr,
+ desc->len,
+ VHOST_ACCESS_RO);
+ if (unlikely(!desc_addr)) {
+ error = -1;
+ goto out;
+ }
desc_offset = 0;
desc_avail = desc->len;
cur->data_len = cpy_len;
cur->data_off = 0;
cur->buf_addr = (void *)(uintptr_t)desc_addr;
- cur->buf_physaddr = hpa;
+ cur->buf_iova = hpa;
/*
* In zero copy mode, one mbuf can only reference data
*/
mbuf_avail = cpy_len;
} else {
- rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
- mbuf_offset),
- (void *)((uintptr_t)(desc_addr + desc_offset)),
- cpy_len);
+ if (likely(cpy_len > MAX_BATCH_LEN ||
+ copy_nb >= vq->size ||
+ (hdr && cur == m))) {
+ rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
+ mbuf_offset),
+ (void *)((uintptr_t)(desc_addr +
+ desc_offset)),
+ cpy_len);
+ } else {
+ batch_copy[copy_nb].dst =
+ rte_pktmbuf_mtod_offset(cur, void *,
+ mbuf_offset);
+ batch_copy[copy_nb].src =
+ (void *)((uintptr_t)(desc_addr +
+ desc_offset));
+ batch_copy[copy_nb].len = cpy_len;
+ copy_nb++;
+ }
}
mbuf_avail -= cpy_len;
break;
if (unlikely(desc->next >= max_desc ||
- ++nr_desc > max_desc))
- return -1;
+ ++nr_desc > max_desc)) {
+ error = -1;
+ goto out;
+ }
desc = &descs[desc->next];
- if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
- return -1;
+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
+ error = -1;
+ goto out;
+ }
- desc_addr = gpa_to_vva(dev, desc->addr);
- if (unlikely(!desc_addr))
- return -1;
+ desc_addr = vhost_iova_to_vva(dev,
+ vq, desc->addr,
+ desc->len,
+ VHOST_ACCESS_RO);
+ if (unlikely(!desc_addr)) {
+ error = -1;
+ goto out;
+ }
rte_prefetch0((void *)(uintptr_t)desc_addr);
if (unlikely(cur == NULL)) {
RTE_LOG(ERR, VHOST_DATA, "Failed to "
"allocate memory for mbuf.\n");
- return -1;
+ error = -1;
+ goto out;
}
+ if (unlikely(dev->dequeue_zero_copy))
+ rte_mbuf_refcnt_update(cur, 1);
prev->next = cur;
prev->data_len = mbuf_offset;
if (hdr)
vhost_dequeue_offload(hdr, m);
- return 0;
+out:
+ vq->batch_copy_nb_elems = copy_nb;
+
+ return error;
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint32_t used_idx, uint32_t desc_idx)
{
sizeof(vq->used->ring[used_idx]));
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint32_t count)
{
eventfd_write(vq->callfd, (eventfd_t)1);
}
-static inline struct zcopy_mbuf *__attribute__((always_inline))
+static __rte_always_inline struct zcopy_mbuf *
get_zmbuf(struct vhost_virtqueue *vq)
{
uint16_t i;
return NULL;
}
-static inline bool __attribute__((always_inline))
+static __rte_always_inline bool
mbuf_is_consumed(struct rte_mbuf *m)
{
while (m) {
if (!dev)
return 0;
- if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
+ if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
dev->vid, __func__, queue_id);
return 0;
if (unlikely(vq->enabled == 0))
return 0;
+ vq->batch_copy_nb_elems = 0;
+
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ vhost_user_iotlb_rd_lock(vq);
+
+ if (unlikely(vq->access_ok == 0))
+ if (unlikely(vring_translate(dev, vq) < 0))
+ goto out;
+
if (unlikely(dev->dequeue_zero_copy)) {
struct zcopy_mbuf *zmbuf, *next;
int nr_updated = 0;
rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) {
- desc = (struct vring_desc *)(uintptr_t)gpa_to_vva(dev,
- vq->desc[desc_indexes[i]].addr);
+ desc = (struct vring_desc *)(uintptr_t)
+ vhost_iova_to_vva(dev, vq,
+ vq->desc[desc_indexes[i]].addr,
+ sizeof(*desc),
+ VHOST_ACCESS_RO);
if (unlikely(!desc))
break;
break;
}
- err = copy_desc_to_mbuf(dev, desc, sz, pkts[i], idx, mbuf_pool);
+ err = copy_desc_to_mbuf(dev, vq, desc, sz, pkts[i], idx,
+ mbuf_pool);
if (unlikely(err)) {
rte_pktmbuf_free(pkts[i]);
break;
vq->last_avail_idx += i;
if (likely(dev->dequeue_zero_copy == 0)) {
+ do_data_copy_dequeue(vq);
vq->last_used_idx += i;
update_used_idx(dev, vq, i);
}
out:
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ vhost_user_iotlb_rd_unlock(vq);
+
if (unlikely(rarp_mbuf != NULL)) {
/*
* Inject it to the head of "pkts" array, so that switch's mac