X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_vhost%2Fvirtio_net.c;h=751c1f37337d6a47008f20e947dc921f1010532b;hb=8d44fc3a81ecdeb7128bb3edea5c88c5d880f145;hp=50aeaa03016919ae2bedf060f0d75ccd125c49ec;hpb=084fac96ca0331f2646002c302f389415fef69e2;p=dpdk.git diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 50aeaa0301..751c1f3733 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -31,12 +31,48 @@ rxvq_is_mergeable(struct virtio_net *dev) return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF); } +static __rte_always_inline bool +virtio_net_is_inorder(struct virtio_net *dev) +{ + return dev->features & (1ULL << VIRTIO_F_IN_ORDER); +} + static bool is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) { return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring; } +static inline void +do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + struct batch_copy_elem *elem = vq->batch_copy_elems; + uint16_t count = vq->batch_copy_nb_elems; + int i; + + for (i = 0; i < count; i++) { + rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); + vhost_log_cache_write_iova(dev, vq, elem[i].log_addr, + elem[i].len); + PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); + } + + vq->batch_copy_nb_elems = 0; +} + +static inline void +do_data_copy_dequeue(struct vhost_virtqueue *vq) +{ + struct batch_copy_elem *elem = vq->batch_copy_elems; + uint16_t count = vq->batch_copy_nb_elems; + int i; + + for (i = 0; i < count; i++) + rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); + + vq->batch_copy_nb_elems = 0; +} + static __rte_always_inline void do_flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq, @@ -71,11 +107,10 @@ flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq) } vq->last_used_idx += vq->shadow_used_idx; - rte_smp_wmb(); - vhost_log_cache_sync(dev, vq); - *(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx; + __atomic_add_fetch(&vq->used->idx, vq->shadow_used_idx, + __ATOMIC_RELEASE); vq->shadow_used_idx = 0; vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), sizeof(vq->used->idx)); @@ -92,8 +127,8 @@ update_shadow_used_ring_split(struct vhost_virtqueue *vq, } static __rte_always_inline void -flush_shadow_used_ring_packed(struct virtio_net *dev, - struct vhost_virtqueue *vq) +vhost_flush_enqueue_shadow_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq) { int i; uint16_t used_idx = vq->last_used_idx; @@ -140,11 +175,7 @@ flush_shadow_used_ring_packed(struct virtio_net *dev, head_flags = flags; } - vq->last_used_idx += vq->shadow_used_packed[i].count; - if (vq->last_used_idx >= vq->size) { - vq->used_wrap_counter ^= 1; - vq->last_used_idx -= vq->size; - } + vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count); } vq->desc_packed[head_idx].flags = head_flags; @@ -159,43 +190,200 @@ flush_shadow_used_ring_packed(struct virtio_net *dev, } static __rte_always_inline void -update_shadow_used_ring_packed(struct vhost_virtqueue *vq, - uint16_t desc_idx, uint32_t len, uint16_t count) +vhost_flush_dequeue_shadow_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq) { - uint16_t i = vq->shadow_used_idx++; + struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0]; - vq->shadow_used_packed[i].id = desc_idx; - vq->shadow_used_packed[i].len = len; - vq->shadow_used_packed[i].count = count; + vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id; + rte_smp_wmb(); + vq->desc_packed[vq->shadow_last_used_idx].flags = used_elem->flags; + + vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx * + sizeof(struct vring_packed_desc), + sizeof(struct vring_packed_desc)); + vq->shadow_used_idx = 0; + vhost_log_cache_sync(dev, vq); } -static inline void -do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) +static __rte_always_inline void +vhost_flush_enqueue_batch_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + uint64_t *lens, + uint16_t *ids) { - struct batch_copy_elem *elem = vq->batch_copy_elems; - uint16_t count = vq->batch_copy_nb_elems; - int i; + uint16_t i; + uint16_t flags; - for (i = 0; i < count; i++) { - rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); - vhost_log_cache_write(dev, vq, elem[i].log_addr, elem[i].len); - PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); + if (vq->shadow_used_idx) { + do_data_copy_enqueue(dev, vq); + vhost_flush_enqueue_shadow_packed(dev, vq); } - vq->batch_copy_nb_elems = 0; + flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter); + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + vq->desc_packed[vq->last_used_idx + i].id = ids[i]; + vq->desc_packed[vq->last_used_idx + i].len = lens[i]; + } + + rte_smp_wmb(); + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + vq->desc_packed[vq->last_used_idx + i].flags = flags; + + vhost_log_cache_used_vring(dev, vq, vq->last_used_idx * + sizeof(struct vring_packed_desc), + sizeof(struct vring_packed_desc) * + PACKED_BATCH_SIZE); + vhost_log_cache_sync(dev, vq); + + vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); } -static inline void -do_data_copy_dequeue(struct vhost_virtqueue *vq) +static __rte_always_inline void +vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq, + uint16_t id) { - struct batch_copy_elem *elem = vq->batch_copy_elems; - uint16_t count = vq->batch_copy_nb_elems; - int i; + vq->shadow_used_packed[0].id = id; + + if (!vq->shadow_used_idx) { + vq->shadow_last_used_idx = vq->last_used_idx; + vq->shadow_used_packed[0].flags = + PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); + vq->shadow_used_packed[0].len = 0; + vq->shadow_used_packed[0].count = 1; + vq->shadow_used_idx++; + } - for (i = 0; i < count; i++) - rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); + vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); +} - vq->batch_copy_nb_elems = 0; +static __rte_always_inline void +vhost_shadow_dequeue_batch_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + uint16_t *ids) +{ + uint16_t flags; + uint16_t i; + uint16_t begin; + + flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter); + + if (!vq->shadow_used_idx) { + vq->shadow_last_used_idx = vq->last_used_idx; + vq->shadow_used_packed[0].id = ids[0]; + vq->shadow_used_packed[0].len = 0; + vq->shadow_used_packed[0].count = 1; + vq->shadow_used_packed[0].flags = flags; + vq->shadow_used_idx++; + begin = 1; + } else + begin = 0; + + vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) { + vq->desc_packed[vq->last_used_idx + i].id = ids[i]; + vq->desc_packed[vq->last_used_idx + i].len = 0; + } + + rte_smp_wmb(); + vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) + vq->desc_packed[vq->last_used_idx + i].flags = flags; + + vhost_log_cache_used_vring(dev, vq, vq->last_used_idx * + sizeof(struct vring_packed_desc), + sizeof(struct vring_packed_desc) * + PACKED_BATCH_SIZE); + vhost_log_cache_sync(dev, vq); + + vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE); +} + +static __rte_always_inline void +vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, + uint16_t buf_id, + uint16_t count) +{ + uint16_t flags; + + flags = vq->desc_packed[vq->last_used_idx].flags; + if (vq->used_wrap_counter) { + flags |= VRING_DESC_F_USED; + flags |= VRING_DESC_F_AVAIL; + } else { + flags &= ~VRING_DESC_F_USED; + flags &= ~VRING_DESC_F_AVAIL; + } + + if (!vq->shadow_used_idx) { + vq->shadow_last_used_idx = vq->last_used_idx; + + vq->shadow_used_packed[0].id = buf_id; + vq->shadow_used_packed[0].len = 0; + vq->shadow_used_packed[0].flags = flags; + vq->shadow_used_idx++; + } else { + vq->desc_packed[vq->last_used_idx].id = buf_id; + vq->desc_packed[vq->last_used_idx].len = 0; + vq->desc_packed[vq->last_used_idx].flags = flags; + } + + vq_inc_last_used_packed(vq, count); +} + +static __rte_always_inline void +vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq, + uint16_t buf_id, + uint16_t count) +{ + uint16_t flags; + + vq->shadow_used_packed[0].id = buf_id; + + flags = vq->desc_packed[vq->last_used_idx].flags; + if (vq->used_wrap_counter) { + flags |= VRING_DESC_F_USED; + flags |= VRING_DESC_F_AVAIL; + } else { + flags &= ~VRING_DESC_F_USED; + flags &= ~VRING_DESC_F_AVAIL; + } + + if (!vq->shadow_used_idx) { + vq->shadow_last_used_idx = vq->last_used_idx; + vq->shadow_used_packed[0].len = 0; + vq->shadow_used_packed[0].flags = flags; + vq->shadow_used_idx++; + } + + vq_inc_last_used_packed(vq, count); +} + +static __rte_always_inline void +vhost_shadow_enqueue_single_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + uint32_t len[], + uint16_t id[], + uint16_t count[], + uint16_t num_buffers) +{ + uint16_t i; + for (i = 0; i < num_buffers; i++) { + /* enqueue shadow flush action aligned with batch num */ + if (!vq->shadow_used_idx) + vq->shadow_aligned_idx = vq->last_used_idx & + PACKED_BATCH_MASK; + vq->shadow_used_packed[vq->shadow_used_idx].id = id[i]; + vq->shadow_used_packed[vq->shadow_used_idx].len = len[i]; + vq->shadow_used_packed[vq->shadow_used_idx].count = count[i]; + vq->shadow_aligned_idx += count[i]; + vq->shadow_used_idx++; + } + + if (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) { + do_data_copy_enqueue(dev, vq); + vhost_flush_enqueue_shadow_packed(dev, vq); + } } /* avoid write operation when necessary, to lessen cache issues */ @@ -242,6 +430,7 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *, m_buf->l2_len); + ipv4_hdr->hdr_checksum = 0; ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); } @@ -286,6 +475,8 @@ map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, if (unlikely(!desc_addr)) return -1; + rte_prefetch0((void *)(uintptr_t)desc_addr); + buf_vec[vec_id].buf_iova = desc_iova; buf_vec[vec_id].buf_addr = desc_addr; buf_vec[vec_id].buf_len = desc_chunck_len; @@ -501,14 +692,13 @@ fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, if (avail_idx < vq->last_avail_idx) wrap_counter ^= 1; - if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter))) - return -1; - /* - * The ordering between desc flags and desc - * content reads need to be enforced. + * Perform a load-acquire barrier in desc_is_avail to + * enforce the ordering between desc flags and desc + * content. */ - rte_smp_rmb(); + if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter))) + return -1; *desc_count = 0; *len = 0; @@ -553,64 +743,6 @@ fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, return 0; } -/* - * Returns -1 on fail, 0 on success - */ -static inline int -reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint32_t size, struct buf_vector *buf_vec, - uint16_t *nr_vec, uint16_t *num_buffers, - uint16_t *nr_descs) -{ - uint16_t avail_idx; - uint16_t vec_idx = 0; - uint16_t max_tries, tries = 0; - - uint16_t buf_id = 0; - uint32_t len = 0; - uint16_t desc_count; - - *num_buffers = 0; - avail_idx = vq->last_avail_idx; - - if (rxvq_is_mergeable(dev)) - max_tries = vq->size - 1; - else - max_tries = 1; - - while (size > 0) { - /* - * if we tried all available ring items, and still - * can't get enough buf, it means something abnormal - * happened. - */ - if (unlikely(++tries > max_tries)) - return -1; - - if (unlikely(fill_vec_buf_packed(dev, vq, - avail_idx, &desc_count, - buf_vec, &vec_idx, - &buf_id, &len, - VHOST_ACCESS_RW) < 0)) - return -1; - - len = RTE_MIN(len, size); - update_shadow_used_ring_packed(vq, buf_id, len, desc_count); - size -= len; - - avail_idx += desc_count; - if (avail_idx >= vq->size) - avail_idx -= vq->size; - - *nr_descs += desc_count; - *num_buffers += 1; - } - - *nr_vec = vec_idx; - - return 0; -} - static __rte_noinline void copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, struct buf_vector *buf_vec, @@ -631,7 +763,7 @@ copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, PRINT_PACKET(dev, (uintptr_t)dst, (uint32_t)len, 0); - vhost_log_cache_write(dev, vq, + vhost_log_cache_write_iova(dev, vq, iova, len); remain -= len; @@ -666,9 +798,6 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, buf_iova = buf_vec[vec_idx].buf_iova; buf_len = buf_vec[vec_idx].buf_len; - if (nr_vec > 1) - rte_prefetch0((void *)(uintptr_t)buf_vec[1].buf_addr); - if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) { error = -1; goto out; @@ -681,7 +810,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, else hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n", + VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n", dev->vid, num_buffers); if (unlikely(buf_len < dev->vhost_hlen)) { @@ -711,10 +840,6 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, buf_iova = buf_vec[vec_idx].buf_iova; buf_len = buf_vec[vec_idx].buf_len; - /* Prefetch next buffer address. */ - if (vec_idx + 1 < nr_vec) - rte_prefetch0((void *)(uintptr_t) - buf_vec[vec_idx + 1].buf_addr); buf_offset = 0; buf_avail = buf_len; } @@ -738,7 +863,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, } else { PRINT_PACKET(dev, (uintptr_t)hdr_addr, dev->vhost_hlen, 0); - vhost_log_cache_write(dev, vq, + vhost_log_cache_write_iova(dev, vq, buf_vec[0].buf_iova, dev->vhost_hlen); } @@ -753,8 +878,9 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, rte_memcpy((void *)((uintptr_t)(buf_addr + buf_offset)), rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), cpy_len); - vhost_log_cache_write(dev, vq, buf_iova + buf_offset, - cpy_len); + vhost_log_cache_write_iova(dev, vq, + buf_iova + buf_offset, + cpy_len); PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset), cpy_len, 0); } else { @@ -779,6 +905,69 @@ out: return error; } +static __rte_always_inline int +vhost_enqueue_single_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mbuf *pkt, + struct buf_vector *buf_vec, + uint16_t *nr_descs) +{ + uint16_t nr_vec = 0; + uint16_t avail_idx = vq->last_avail_idx; + uint16_t max_tries, tries = 0; + uint16_t buf_id = 0; + uint32_t len = 0; + uint16_t desc_count; + uint32_t size = pkt->pkt_len + dev->vhost_hlen; + uint16_t num_buffers = 0; + uint32_t buffer_len[vq->size]; + uint16_t buffer_buf_id[vq->size]; + uint16_t buffer_desc_count[vq->size]; + + if (rxvq_is_mergeable(dev)) + max_tries = vq->size - 1; + else + max_tries = 1; + + while (size > 0) { + /* + * if we tried all available ring items, and still + * can't get enough buf, it means something abnormal + * happened. + */ + if (unlikely(++tries > max_tries)) + return -1; + + if (unlikely(fill_vec_buf_packed(dev, vq, + avail_idx, &desc_count, + buf_vec, &nr_vec, + &buf_id, &len, + VHOST_ACCESS_RW) < 0)) + return -1; + + len = RTE_MIN(len, size); + size -= len; + + buffer_len[num_buffers] = len; + buffer_buf_id[num_buffers] = buf_id; + buffer_desc_count[num_buffers] = desc_count; + num_buffers += 1; + + *nr_descs += desc_count; + avail_idx += desc_count; + if (avail_idx >= vq->size) + avail_idx -= vq->size; + } + + if (copy_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers) < 0) + return -1; + + vhost_shadow_enqueue_single_packed(dev, vq, buffer_len, buffer_buf_id, + buffer_desc_count, num_buffers); + + return 0; +} + static __rte_noinline uint32_t virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mbuf **pkts, uint32_t count) @@ -788,13 +977,11 @@ virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, struct buf_vector buf_vec[BUF_VECTOR_MAX]; uint16_t avail_head; - avail_head = *((volatile uint16_t *)&vq->avail->idx); - /* * The ordering between avail index and * desc reads needs to be enforced. */ - rte_smp_rmb(); + avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE); rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); @@ -805,16 +992,14 @@ virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, if (unlikely(reserve_avail_buf_split(dev, vq, pkt_len, buf_vec, &num_buffers, avail_head, &nr_vec) < 0)) { - VHOST_LOG_DEBUG(VHOST_DATA, + VHOST_LOG_DATA(DEBUG, "(%d) failed to get enough desc from vring\n", dev->vid); vq->shadow_used_idx -= num_buffers; break; } - rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr); - - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n", + VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", dev->vid, vq->last_avail_idx, vq->last_avail_idx + num_buffers); @@ -838,58 +1023,152 @@ virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, return pkt_idx; } -static __rte_noinline uint32_t -virtio_dev_rx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, - struct rte_mbuf **pkts, uint32_t count) +static __rte_always_inline int +virtio_dev_rx_batch_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mbuf **pkts) { - uint32_t pkt_idx = 0; - uint16_t num_buffers; - struct buf_vector buf_vec[BUF_VECTOR_MAX]; + bool wrap_counter = vq->avail_wrap_counter; + struct vring_packed_desc *descs = vq->desc_packed; + uint16_t avail_idx = vq->last_avail_idx; + uint64_t desc_addrs[PACKED_BATCH_SIZE]; + struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE]; + uint32_t buf_offset = dev->vhost_hlen; + uint64_t lens[PACKED_BATCH_SIZE]; + uint16_t ids[PACKED_BATCH_SIZE]; + uint16_t i; - for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { - uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; - uint16_t nr_vec = 0; - uint16_t nr_descs = 0; + if (unlikely(avail_idx & PACKED_BATCH_MASK)) + return -1; - if (unlikely(reserve_avail_buf_packed(dev, vq, - pkt_len, buf_vec, &nr_vec, - &num_buffers, &nr_descs) < 0)) { - VHOST_LOG_DEBUG(VHOST_DATA, - "(%d) failed to get enough desc from vring\n", - dev->vid); - vq->shadow_used_idx -= num_buffers; - break; - } + if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) + return -1; - rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr); + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + if (unlikely(pkts[i]->next != NULL)) + return -1; + if (unlikely(!desc_is_avail(&descs[avail_idx + i], + wrap_counter))) + return -1; + } - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n", - dev->vid, vq->last_avail_idx, - vq->last_avail_idx + num_buffers); + rte_smp_rmb(); - if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx], - buf_vec, nr_vec, - num_buffers) < 0) { - vq->shadow_used_idx -= num_buffers; - break; - } + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + lens[i] = descs[avail_idx + i].len; - vq->last_avail_idx += nr_descs; - if (vq->last_avail_idx >= vq->size) { - vq->last_avail_idx -= vq->size; - vq->avail_wrap_counter ^= 1; - } + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset))) + return -1; } - do_data_copy_enqueue(dev, vq); + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + desc_addrs[i] = vhost_iova_to_vva(dev, vq, + descs[avail_idx + i].addr, + &lens[i], + VHOST_ACCESS_RW); - if (likely(vq->shadow_used_idx)) { - flush_shadow_used_ring_packed(dev, vq); - vhost_vring_call_packed(dev, vq); + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + if (unlikely(!desc_addrs[i])) + return -1; + if (unlikely(lens[i] != descs[avail_idx + i].len)) + return -1; } - return pkt_idx; -} + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); + hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *) + (uintptr_t)desc_addrs[i]; + lens[i] = pkts[i]->pkt_len + dev->vhost_hlen; + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr); + + vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + rte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset), + rte_pktmbuf_mtod_offset(pkts[i], void *, 0), + pkts[i]->pkt_len); + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr, + lens[i]); + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + ids[i] = descs[avail_idx + i].id; + + vhost_flush_enqueue_batch_packed(dev, vq, lens, ids); + + return 0; +} + +static __rte_always_inline int16_t +virtio_dev_rx_single_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mbuf *pkt) +{ + struct buf_vector buf_vec[BUF_VECTOR_MAX]; + uint16_t nr_descs = 0; + + rte_smp_rmb(); + if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec, + &nr_descs) < 0)) { + VHOST_LOG_DATA(DEBUG, + "(%d) failed to get enough desc from vring\n", + dev->vid); + return -1; + } + + VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n", + dev->vid, vq->last_avail_idx, + vq->last_avail_idx + nr_descs); + + vq_inc_last_avail_packed(vq, nr_descs); + + return 0; +} + +static __rte_noinline uint32_t +virtio_dev_rx_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mbuf **pkts, + uint32_t count) +{ + uint32_t pkt_idx = 0; + uint32_t remained = count; + + do { + rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); + + if (remained >= PACKED_BATCH_SIZE) { + if (!virtio_dev_rx_batch_packed(dev, vq, + &pkts[pkt_idx])) { + pkt_idx += PACKED_BATCH_SIZE; + remained -= PACKED_BATCH_SIZE; + continue; + } + } + + if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx])) + break; + pkt_idx++; + remained--; + + } while (pkt_idx < count); + + if (vq->shadow_used_idx) { + do_data_copy_enqueue(dev, vq); + vhost_flush_enqueue_shadow_packed(dev, vq); + } + + if (pkt_idx) + vhost_vring_call_packed(dev, vq); + + return pkt_idx; +} static __rte_always_inline uint32_t virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, @@ -898,9 +1177,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, struct vhost_virtqueue *vq; uint32_t nb_tx = 0; - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); + VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { - RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", + VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n", dev->vid, __func__, queue_id); return 0; } @@ -948,7 +1227,7 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id, return 0; if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { - RTE_LOG(ERR, VHOST_DATA, + VHOST_LOG_DATA(ERR, "(%d) %s: built-in vhost net backend is disabled.\n", dev->vid, __func__); return 0; @@ -1065,7 +1344,7 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m) m->l4_len = sizeof(struct rte_udp_hdr); break; default: - RTE_LOG(WARNING, VHOST_DATA, + VHOST_LOG_DATA(WARNING, "unsupported gso type %u.\n", hdr->gso_type); break; } @@ -1119,9 +1398,6 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, goto out; } - if (likely(nr_vec > 1)) - rte_prefetch0((void *)(uintptr_t)buf_vec[1].buf_addr); - if (virtio_net_with_host_offload(dev)) { if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) { /* @@ -1132,7 +1408,6 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, hdr = &tmp_hdr; } else { hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr); - rte_prefetch0(hdr); } } @@ -1162,9 +1437,6 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen; } - rte_prefetch0((void *)(uintptr_t) - (buf_addr + buf_offset)); - PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset), (uint32_t)buf_avail, 0); @@ -1230,14 +1502,6 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, buf_iova = buf_vec[vec_idx].buf_iova; buf_len = buf_vec[vec_idx].buf_len; - /* - * Prefecth desc n + 1 buffer while - * desc n buffer is processed. - */ - if (vec_idx + 1 < nr_vec) - rte_prefetch0((void *)(uintptr_t) - buf_vec[vec_idx + 1].buf_addr); - buf_offset = 0; buf_avail = buf_len; @@ -1252,7 +1516,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, if (mbuf_avail == 0) { cur = rte_pktmbuf_alloc(mbuf_pool); if (unlikely(cur == NULL)) { - RTE_LOG(ERR, VHOST_DATA, "Failed to " + VHOST_LOG_DATA(ERR, "Failed to " "allocate memory for mbuf.\n"); error = -1; goto out; @@ -1313,12 +1577,104 @@ again: return NULL; } +static void +virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque) +{ + rte_free(opaque); +} + +static int +virtio_dev_extbuf_alloc(struct rte_mbuf *pkt, uint32_t size) +{ + struct rte_mbuf_ext_shared_info *shinfo = NULL; + uint32_t total_len = RTE_PKTMBUF_HEADROOM + size; + uint16_t buf_len; + rte_iova_t iova; + void *buf; + + /* Try to use pkt buffer to store shinfo to reduce the amount of memory + * required, otherwise store shinfo in the new buffer. + */ + if (rte_pktmbuf_tailroom(pkt) >= sizeof(*shinfo)) + shinfo = rte_pktmbuf_mtod(pkt, + struct rte_mbuf_ext_shared_info *); + else { + total_len += sizeof(*shinfo) + sizeof(uintptr_t); + total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t)); + } + + if (unlikely(total_len > UINT16_MAX)) + return -ENOSPC; + + buf_len = total_len; + buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE); + if (unlikely(buf == NULL)) + return -ENOMEM; + + /* Initialize shinfo */ + if (shinfo) { + shinfo->free_cb = virtio_dev_extbuf_free; + shinfo->fcb_opaque = buf; + rte_mbuf_ext_refcnt_set(shinfo, 1); + } else { + shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, + virtio_dev_extbuf_free, buf); + if (unlikely(shinfo == NULL)) { + rte_free(buf); + VHOST_LOG_DATA(ERR, "Failed to init shinfo\n"); + return -1; + } + } + + iova = rte_malloc_virt2iova(buf); + rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo); + rte_pktmbuf_reset_headroom(pkt); + + return 0; +} + +/* + * Allocate a host supported pktmbuf. + */ +static __rte_always_inline struct rte_mbuf * +virtio_dev_pktmbuf_alloc(struct virtio_net *dev, struct rte_mempool *mp, + uint32_t data_len) +{ + struct rte_mbuf *pkt = rte_pktmbuf_alloc(mp); + + if (unlikely(pkt == NULL)) { + VHOST_LOG_DATA(ERR, + "Failed to allocate memory for mbuf.\n"); + return NULL; + } + + if (rte_pktmbuf_tailroom(pkt) >= data_len) + return pkt; + + /* attach an external buffer if supported */ + if (dev->extbuf && !virtio_dev_extbuf_alloc(pkt, data_len)) + return pkt; + + /* check if chained buffers are allowed */ + if (!dev->linearbuf) + return pkt; + + /* Data doesn't fit into the buffer and the host supports + * only linear buffers + */ + rte_pktmbuf_free(pkt); + + return NULL; +} + static __rte_noinline uint16_t virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) { uint16_t i; uint16_t free_entries; + uint16_t dropped = 0; + static bool allocerr_warned; if (unlikely(dev->dequeue_zero_copy)) { struct zcopy_mbuf *zmbuf, *next; @@ -1344,49 +1700,56 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, } } - free_entries = *((volatile uint16_t *)&vq->avail->idx) - - vq->last_avail_idx; - if (free_entries == 0) - return 0; - /* * The ordering between avail index and * desc reads needs to be enforced. */ - rte_smp_rmb(); + free_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) - + vq->last_avail_idx; + if (free_entries == 0) + return 0; rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); + VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__); count = RTE_MIN(count, MAX_PKT_BURST); count = RTE_MIN(count, free_entries); - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n", + VHOST_LOG_DATA(DEBUG, "(%d) about to dequeue %u buffers\n", dev->vid, count); for (i = 0; i < count; i++) { struct buf_vector buf_vec[BUF_VECTOR_MAX]; uint16_t head_idx; - uint32_t dummy_len; + uint32_t buf_len; uint16_t nr_vec = 0; int err; if (unlikely(fill_vec_buf_split(dev, vq, vq->last_avail_idx + i, &nr_vec, buf_vec, - &head_idx, &dummy_len, + &head_idx, &buf_len, VHOST_ACCESS_RO) < 0)) break; if (likely(dev->dequeue_zero_copy == 0)) update_shadow_used_ring_split(vq, head_idx, 0); - rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr); - - pkts[i] = rte_pktmbuf_alloc(mbuf_pool); + pkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len); if (unlikely(pkts[i] == NULL)) { - RTE_LOG(ERR, VHOST_DATA, - "Failed to allocate memory for mbuf.\n"); + /* + * mbuf allocation fails for jumbo packets when external + * buffer allocation is not allowed and linear buffer + * is required. Drop this packet. + */ + if (!allocerr_warned) { + VHOST_LOG_DATA(ERR, + "Failed mbuf alloc of size %d from %s on %s.\n", + buf_len, mbuf_pool->name, dev->ifname); + allocerr_warned = true; + } + dropped += 1; + i++; break; } @@ -1394,6 +1757,14 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, mbuf_pool); if (unlikely(err)) { rte_pktmbuf_free(pkts[i]); + if (!allocerr_warned) { + VHOST_LOG_DATA(ERR, + "Failed to copy desc to mbuf on %s.\n", + dev->ifname); + allocerr_warned = true; + } + dropped += 1; + i++; break; } @@ -1403,6 +1774,8 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, zmbuf = get_zmbuf(vq); if (!zmbuf) { rte_pktmbuf_free(pkts[i]); + dropped += 1; + i++; break; } zmbuf->mbuf = pkts[i]; @@ -1432,124 +1805,397 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, } } - return i; + return (i - dropped); } -static __rte_noinline uint16_t -virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, - struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) +static __rte_always_inline int +vhost_reserve_avail_batch_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **pkts, + uint16_t avail_idx, + uintptr_t *desc_addrs, + uint16_t *ids) { - uint16_t i; + bool wrap = vq->avail_wrap_counter; + struct vring_packed_desc *descs = vq->desc_packed; + struct virtio_net_hdr *hdr; + uint64_t lens[PACKED_BATCH_SIZE]; + uint64_t buf_lens[PACKED_BATCH_SIZE]; + uint32_t buf_offset = dev->vhost_hlen; + uint16_t flags, i; - if (unlikely(dev->dequeue_zero_copy)) { - struct zcopy_mbuf *zmbuf, *next; + if (unlikely(avail_idx & PACKED_BATCH_MASK)) + return -1; + if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size)) + return -1; - for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list); - zmbuf != NULL; zmbuf = next) { - next = TAILQ_NEXT(zmbuf, next); + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + flags = descs[avail_idx + i].flags; + if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) || + (wrap == !!(flags & VRING_DESC_F_USED)) || + (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG))) + return -1; + } - if (mbuf_is_consumed(zmbuf->mbuf)) { - update_shadow_used_ring_packed(vq, - zmbuf->desc_idx, - 0, - zmbuf->desc_count); + rte_smp_rmb(); - TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next); - restore_mbuf(zmbuf->mbuf); - rte_pktmbuf_free(zmbuf->mbuf); - put_zmbuf(zmbuf); - vq->nr_zmbuf -= 1; - } - } + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + lens[i] = descs[avail_idx + i].len; - if (likely(vq->shadow_used_idx)) { - flush_shadow_used_ring_packed(dev, vq); - vhost_vring_call_packed(dev, vq); + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + desc_addrs[i] = vhost_iova_to_vva(dev, vq, + descs[avail_idx + i].addr, + &lens[i], VHOST_ACCESS_RW); + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + if (unlikely(!desc_addrs[i])) + return -1; + if (unlikely((lens[i] != descs[avail_idx + i].len))) + return -1; + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + pkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, lens[i]); + if (!pkts[i]) + goto free_buf; + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off; + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + if (unlikely(buf_lens[i] < (lens[i] - buf_offset))) + goto free_buf; + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + pkts[i]->pkt_len = descs[avail_idx + i].len - buf_offset; + pkts[i]->data_len = pkts[i]->pkt_len; + ids[i] = descs[avail_idx + i].id; + } + + if (virtio_net_with_host_offload(dev)) { + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + hdr = (struct virtio_net_hdr *)(desc_addrs[i]); + vhost_dequeue_offload(hdr, pkts[i]); } } - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); + return 0; - count = RTE_MIN(count, MAX_PKT_BURST); - VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n", - dev->vid, count); +free_buf: + for (i = 0; i < PACKED_BATCH_SIZE; i++) + rte_pktmbuf_free(pkts[i]); - for (i = 0; i < count; i++) { - struct buf_vector buf_vec[BUF_VECTOR_MAX]; - uint16_t buf_id; - uint32_t dummy_len; - uint16_t desc_count, nr_vec = 0; - int err; + return -1; +} - if (unlikely(fill_vec_buf_packed(dev, vq, - vq->last_avail_idx, &desc_count, - buf_vec, &nr_vec, - &buf_id, &dummy_len, - VHOST_ACCESS_RO) < 0)) - break; +static __rte_always_inline int +virtio_dev_tx_batch_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **pkts) +{ + uint16_t avail_idx = vq->last_avail_idx; + uint32_t buf_offset = dev->vhost_hlen; + uintptr_t desc_addrs[PACKED_BATCH_SIZE]; + uint16_t ids[PACKED_BATCH_SIZE]; + uint16_t i; - if (likely(dev->dequeue_zero_copy == 0)) - update_shadow_used_ring_packed(vq, buf_id, 0, - desc_count); + if (vhost_reserve_avail_batch_packed(dev, vq, mbuf_pool, pkts, + avail_idx, desc_addrs, ids)) + return -1; - rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr); + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + rte_prefetch0((void *)(uintptr_t)desc_addrs[i]); - pkts[i] = rte_pktmbuf_alloc(mbuf_pool); - if (unlikely(pkts[i] == NULL)) { - RTE_LOG(ERR, VHOST_DATA, - "Failed to allocate memory for mbuf.\n"); - break; + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0), + (void *)(uintptr_t)(desc_addrs[i] + buf_offset), + pkts[i]->pkt_len); + + if (virtio_net_is_inorder(dev)) + vhost_shadow_dequeue_batch_packed_inorder(vq, + ids[PACKED_BATCH_SIZE - 1]); + else + vhost_shadow_dequeue_batch_packed(dev, vq, ids); + + vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); + + return 0; +} + +static __rte_always_inline int +vhost_dequeue_single_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **pkts, + uint16_t *buf_id, + uint16_t *desc_count) +{ + struct buf_vector buf_vec[BUF_VECTOR_MAX]; + uint32_t buf_len; + uint16_t nr_vec = 0; + int err; + static bool allocerr_warned; + + if (unlikely(fill_vec_buf_packed(dev, vq, + vq->last_avail_idx, desc_count, + buf_vec, &nr_vec, + buf_id, &buf_len, + VHOST_ACCESS_RO) < 0)) + return -1; + + *pkts = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len); + if (unlikely(*pkts == NULL)) { + if (!allocerr_warned) { + VHOST_LOG_DATA(ERR, + "Failed mbuf alloc of size %d from %s on %s.\n", + buf_len, mbuf_pool->name, dev->ifname); + allocerr_warned = true; } + return -1; + } - err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], + err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, *pkts, mbuf_pool); - if (unlikely(err)) { - rte_pktmbuf_free(pkts[i]); - break; + if (unlikely(err)) { + if (!allocerr_warned) { + VHOST_LOG_DATA(ERR, + "Failed to copy desc to mbuf on %s.\n", + dev->ifname); + allocerr_warned = true; } + rte_pktmbuf_free(*pkts); + return -1; + } - if (unlikely(dev->dequeue_zero_copy)) { - struct zcopy_mbuf *zmbuf; + return 0; +} - zmbuf = get_zmbuf(vq); - if (!zmbuf) { - rte_pktmbuf_free(pkts[i]); - break; +static __rte_always_inline int +virtio_dev_tx_single_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **pkts) +{ + + uint16_t buf_id, desc_count = 0; + int ret; + + ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id, + &desc_count); + + if (likely(desc_count > 0)) { + if (virtio_net_is_inorder(dev)) + vhost_shadow_dequeue_single_packed_inorder(vq, buf_id, + desc_count); + else + vhost_shadow_dequeue_single_packed(vq, buf_id, + desc_count); + + vq_inc_last_avail_packed(vq, desc_count); + } + + return ret; +} + +static __rte_always_inline int +virtio_dev_tx_batch_packed_zmbuf(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **pkts) +{ + struct zcopy_mbuf *zmbufs[PACKED_BATCH_SIZE]; + uintptr_t desc_addrs[PACKED_BATCH_SIZE]; + uint16_t ids[PACKED_BATCH_SIZE]; + uint16_t i; + + uint16_t avail_idx = vq->last_avail_idx; + + if (vhost_reserve_avail_batch_packed(dev, vq, mbuf_pool, pkts, + avail_idx, desc_addrs, ids)) + return -1; + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + zmbufs[i] = get_zmbuf(vq); + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + if (!zmbufs[i]) + goto free_pkt; + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + zmbufs[i]->mbuf = pkts[i]; + zmbufs[i]->desc_idx = ids[i]; + zmbufs[i]->desc_count = 1; + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + rte_mbuf_refcnt_update(pkts[i], 1); + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbufs[i], next); + + vq->nr_zmbuf += PACKED_BATCH_SIZE; + vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE); + + return 0; + +free_pkt: + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) + rte_pktmbuf_free(pkts[i]); + + return -1; +} + +static __rte_always_inline int +virtio_dev_tx_single_packed_zmbuf(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **pkts) +{ + uint16_t buf_id, desc_count; + struct zcopy_mbuf *zmbuf; + + if (vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id, + &desc_count)) + return -1; + + zmbuf = get_zmbuf(vq); + if (!zmbuf) { + rte_pktmbuf_free(*pkts); + return -1; + } + zmbuf->mbuf = *pkts; + zmbuf->desc_idx = buf_id; + zmbuf->desc_count = desc_count; + + rte_mbuf_refcnt_update(*pkts, 1); + + vq->nr_zmbuf += 1; + TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next); + + vq_inc_last_avail_packed(vq, desc_count); + return 0; +} + +static __rte_always_inline void +free_zmbuf(struct vhost_virtqueue *vq) +{ + struct zcopy_mbuf *next = NULL; + struct zcopy_mbuf *zmbuf; + + for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list); + zmbuf != NULL; zmbuf = next) { + next = TAILQ_NEXT(zmbuf, next); + + uint16_t last_used_idx = vq->last_used_idx; + + if (mbuf_is_consumed(zmbuf->mbuf)) { + uint16_t flags; + flags = vq->desc_packed[last_used_idx].flags; + if (vq->used_wrap_counter) { + flags |= VRING_DESC_F_USED; + flags |= VRING_DESC_F_AVAIL; + } else { + flags &= ~VRING_DESC_F_USED; + flags &= ~VRING_DESC_F_AVAIL; } - zmbuf->mbuf = pkts[i]; - zmbuf->desc_idx = buf_id; - zmbuf->desc_count = desc_count; - /* - * Pin lock the mbuf; we will check later to see - * whether the mbuf is freed (when we are the last - * user) or not. If that's the case, we then could - * update the used ring safely. - */ - rte_mbuf_refcnt_update(pkts[i], 1); + vq->desc_packed[last_used_idx].id = zmbuf->desc_idx; + vq->desc_packed[last_used_idx].len = 0; - vq->nr_zmbuf += 1; - TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next); - } + rte_smp_wmb(); + vq->desc_packed[last_used_idx].flags = flags; + + vq_inc_last_used_packed(vq, zmbuf->desc_count); - vq->last_avail_idx += desc_count; - if (vq->last_avail_idx >= vq->size) { - vq->last_avail_idx -= vq->size; - vq->avail_wrap_counter ^= 1; + TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next); + restore_mbuf(zmbuf->mbuf); + rte_pktmbuf_free(zmbuf->mbuf); + put_zmbuf(zmbuf); + vq->nr_zmbuf -= 1; } } +} - if (likely(dev->dequeue_zero_copy == 0)) { - do_data_copy_dequeue(vq); - if (unlikely(i < count)) - vq->shadow_used_idx = i; - if (likely(vq->shadow_used_idx)) { - flush_shadow_used_ring_packed(dev, vq); - vhost_vring_call_packed(dev, vq); +static __rte_noinline uint16_t +virtio_dev_tx_packed_zmbuf(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **pkts, + uint32_t count) +{ + uint32_t pkt_idx = 0; + uint32_t remained = count; + + free_zmbuf(vq); + + do { + if (remained >= PACKED_BATCH_SIZE) { + if (!virtio_dev_tx_batch_packed_zmbuf(dev, vq, + mbuf_pool, &pkts[pkt_idx])) { + pkt_idx += PACKED_BATCH_SIZE; + remained -= PACKED_BATCH_SIZE; + continue; + } } + + if (virtio_dev_tx_single_packed_zmbuf(dev, vq, mbuf_pool, + &pkts[pkt_idx])) + break; + pkt_idx++; + remained--; + + } while (remained); + + if (pkt_idx) + vhost_vring_call_packed(dev, vq); + + return pkt_idx; +} + +static __rte_noinline uint16_t +virtio_dev_tx_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **pkts, + uint32_t count) +{ + uint32_t pkt_idx = 0; + uint32_t remained = count; + + do { + rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]); + + if (remained >= PACKED_BATCH_SIZE) { + if (!virtio_dev_tx_batch_packed(dev, vq, mbuf_pool, + &pkts[pkt_idx])) { + pkt_idx += PACKED_BATCH_SIZE; + remained -= PACKED_BATCH_SIZE; + continue; + } + } + + if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool, + &pkts[pkt_idx])) + break; + pkt_idx++; + remained--; + + } while (remained); + + if (vq->shadow_used_idx) { + do_data_copy_dequeue(vq); + + vhost_flush_dequeue_shadow_packed(dev, vq); + vhost_vring_call_packed(dev, vq); } - return i; + return pkt_idx; } uint16_t @@ -1559,20 +2205,22 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, struct virtio_net *dev; struct rte_mbuf *rarp_mbuf = NULL; struct vhost_virtqueue *vq; + int16_t success = 1; dev = get_device(vid); if (!dev) return 0; if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { - RTE_LOG(ERR, VHOST_DATA, + VHOST_LOG_DATA(ERR, "(%d) %s: built-in vhost net backend is disabled.\n", dev->vid, __func__); return 0; } if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) { - RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", + VHOST_LOG_DATA(ERR, + "(%d) %s: invalid virtqueue idx %d.\n", dev->vid, __func__, queue_id); return 0; } @@ -1604,30 +2252,35 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, * * broadcast_rarp shares a cacheline in the virtio_net structure * with some fields that are accessed during enqueue and - * rte_atomic16_cmpset() causes a write if using cmpxchg. This could - * result in false sharing between enqueue and dequeue. + * __atomic_compare_exchange_n causes a write if performed compare + * and exchange. This could result in false sharing between enqueue + * and dequeue. * * Prevent unnecessary false sharing by reading broadcast_rarp first - * and only performing cmpset if the read indicates it is likely to - * be set. + * and only performing compare and exchange if the read indicates it + * is likely to be set. */ - if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) && - rte_atomic16_cmpset((volatile uint16_t *) - &dev->broadcast_rarp.cnt, 1, 0))) { + if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) && + __atomic_compare_exchange_n(&dev->broadcast_rarp, + &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) { rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac); if (rarp_mbuf == NULL) { - RTE_LOG(ERR, VHOST_DATA, - "Failed to make RARP packet.\n"); + VHOST_LOG_DATA(ERR, "Failed to make RARP packet.\n"); count = 0; goto out; } count -= 1; } - if (vq_is_packed(dev)) - count = virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count); - else + if (vq_is_packed(dev)) { + if (unlikely(dev->dequeue_zero_copy)) + count = virtio_dev_tx_packed_zmbuf(dev, vq, mbuf_pool, + pkts, count); + else + count = virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, + count); + } else count = virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count); out: