1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
7 #include <linux/virtio_net.h>
10 #include <rte_memcpy.h>
12 #include <rte_ether.h>
14 #include <rte_vhost.h>
19 #include <rte_spinlock.h>
20 #include <rte_malloc.h>
21 #include <rte_vhost_async.h>
26 #define MAX_BATCH_LEN 256
28 #define VHOST_ASYNC_BATCH_THRESHOLD 32
30 static __rte_always_inline bool
31 rxvq_is_mergeable(struct virtio_net *dev)
33 return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF);
36 static __rte_always_inline bool
37 virtio_net_is_inorder(struct virtio_net *dev)
39 return dev->features & (1ULL << VIRTIO_F_IN_ORDER);
43 is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
45 return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
49 do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
51 struct batch_copy_elem *elem = vq->batch_copy_elems;
52 uint16_t count = vq->batch_copy_nb_elems;
55 for (i = 0; i < count; i++) {
56 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
57 vhost_log_cache_write_iova(dev, vq, elem[i].log_addr,
59 PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0);
62 vq->batch_copy_nb_elems = 0;
66 do_data_copy_dequeue(struct vhost_virtqueue *vq)
68 struct batch_copy_elem *elem = vq->batch_copy_elems;
69 uint16_t count = vq->batch_copy_nb_elems;
72 for (i = 0; i < count; i++)
73 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
75 vq->batch_copy_nb_elems = 0;
78 static __rte_always_inline void
79 do_flush_shadow_used_ring_split(struct virtio_net *dev,
80 struct vhost_virtqueue *vq,
81 uint16_t to, uint16_t from, uint16_t size)
83 rte_memcpy(&vq->used->ring[to],
84 &vq->shadow_used_split[from],
85 size * sizeof(struct vring_used_elem));
86 vhost_log_cache_used_vring(dev, vq,
87 offsetof(struct vring_used, ring[to]),
88 size * sizeof(struct vring_used_elem));
91 static __rte_always_inline void
92 flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
94 uint16_t used_idx = vq->last_used_idx & (vq->size - 1);
96 if (used_idx + vq->shadow_used_idx <= vq->size) {
97 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0,
102 /* update used ring interval [used_idx, vq->size] */
103 size = vq->size - used_idx;
104 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size);
106 /* update the left half used ring interval [0, left_size] */
107 do_flush_shadow_used_ring_split(dev, vq, 0, size,
108 vq->shadow_used_idx - size);
110 vq->last_used_idx += vq->shadow_used_idx;
112 vhost_log_cache_sync(dev, vq);
114 __atomic_add_fetch(&vq->used->idx, vq->shadow_used_idx,
116 vq->shadow_used_idx = 0;
117 vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
118 sizeof(vq->used->idx));
121 static __rte_always_inline void
122 update_shadow_used_ring_split(struct vhost_virtqueue *vq,
123 uint16_t desc_idx, uint32_t len)
125 uint16_t i = vq->shadow_used_idx++;
127 vq->shadow_used_split[i].id = desc_idx;
128 vq->shadow_used_split[i].len = len;
131 static __rte_always_inline void
132 vhost_flush_enqueue_shadow_packed(struct virtio_net *dev,
133 struct vhost_virtqueue *vq)
136 uint16_t used_idx = vq->last_used_idx;
137 uint16_t head_idx = vq->last_used_idx;
138 uint16_t head_flags = 0;
140 /* Split loop in two to save memory barriers */
141 for (i = 0; i < vq->shadow_used_idx; i++) {
142 vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id;
143 vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len;
145 used_idx += vq->shadow_used_packed[i].count;
146 if (used_idx >= vq->size)
147 used_idx -= vq->size;
150 /* The ordering for storing desc flags needs to be enforced. */
151 rte_atomic_thread_fence(__ATOMIC_RELEASE);
153 for (i = 0; i < vq->shadow_used_idx; i++) {
156 if (vq->shadow_used_packed[i].len)
157 flags = VRING_DESC_F_WRITE;
161 if (vq->used_wrap_counter) {
162 flags |= VRING_DESC_F_USED;
163 flags |= VRING_DESC_F_AVAIL;
165 flags &= ~VRING_DESC_F_USED;
166 flags &= ~VRING_DESC_F_AVAIL;
170 vq->desc_packed[vq->last_used_idx].flags = flags;
172 vhost_log_cache_used_vring(dev, vq,
174 sizeof(struct vring_packed_desc),
175 sizeof(struct vring_packed_desc));
177 head_idx = vq->last_used_idx;
181 vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count);
184 vq->desc_packed[head_idx].flags = head_flags;
186 vhost_log_cache_used_vring(dev, vq,
188 sizeof(struct vring_packed_desc),
189 sizeof(struct vring_packed_desc));
191 vq->shadow_used_idx = 0;
192 vhost_log_cache_sync(dev, vq);
195 static __rte_always_inline void
196 vhost_flush_dequeue_shadow_packed(struct virtio_net *dev,
197 struct vhost_virtqueue *vq)
199 struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0];
201 vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id;
202 /* desc flags is the synchronization point for virtio packed vring */
203 __atomic_store_n(&vq->desc_packed[vq->shadow_last_used_idx].flags,
204 used_elem->flags, __ATOMIC_RELEASE);
206 vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx *
207 sizeof(struct vring_packed_desc),
208 sizeof(struct vring_packed_desc));
209 vq->shadow_used_idx = 0;
210 vhost_log_cache_sync(dev, vq);
213 static __rte_always_inline void
214 vhost_flush_enqueue_batch_packed(struct virtio_net *dev,
215 struct vhost_virtqueue *vq,
221 uint16_t last_used_idx;
222 struct vring_packed_desc *desc_base;
224 last_used_idx = vq->last_used_idx;
225 desc_base = &vq->desc_packed[last_used_idx];
227 flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter);
229 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
230 desc_base[i].id = ids[i];
231 desc_base[i].len = lens[i];
234 rte_atomic_thread_fence(__ATOMIC_RELEASE);
236 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
237 desc_base[i].flags = flags;
240 vhost_log_cache_used_vring(dev, vq, last_used_idx *
241 sizeof(struct vring_packed_desc),
242 sizeof(struct vring_packed_desc) *
244 vhost_log_cache_sync(dev, vq);
246 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
249 static __rte_always_inline void
250 vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq,
253 vq->shadow_used_packed[0].id = id;
255 if (!vq->shadow_used_idx) {
256 vq->shadow_last_used_idx = vq->last_used_idx;
257 vq->shadow_used_packed[0].flags =
258 PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter);
259 vq->shadow_used_packed[0].len = 0;
260 vq->shadow_used_packed[0].count = 1;
261 vq->shadow_used_idx++;
264 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
267 static __rte_always_inline void
268 vhost_shadow_dequeue_batch_packed(struct virtio_net *dev,
269 struct vhost_virtqueue *vq,
276 flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter);
278 if (!vq->shadow_used_idx) {
279 vq->shadow_last_used_idx = vq->last_used_idx;
280 vq->shadow_used_packed[0].id = ids[0];
281 vq->shadow_used_packed[0].len = 0;
282 vq->shadow_used_packed[0].count = 1;
283 vq->shadow_used_packed[0].flags = flags;
284 vq->shadow_used_idx++;
289 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) {
290 vq->desc_packed[vq->last_used_idx + i].id = ids[i];
291 vq->desc_packed[vq->last_used_idx + i].len = 0;
294 rte_atomic_thread_fence(__ATOMIC_RELEASE);
295 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE)
296 vq->desc_packed[vq->last_used_idx + i].flags = flags;
298 vhost_log_cache_used_vring(dev, vq, vq->last_used_idx *
299 sizeof(struct vring_packed_desc),
300 sizeof(struct vring_packed_desc) *
302 vhost_log_cache_sync(dev, vq);
304 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
307 static __rte_always_inline void
308 vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq,
314 flags = vq->desc_packed[vq->last_used_idx].flags;
315 if (vq->used_wrap_counter) {
316 flags |= VRING_DESC_F_USED;
317 flags |= VRING_DESC_F_AVAIL;
319 flags &= ~VRING_DESC_F_USED;
320 flags &= ~VRING_DESC_F_AVAIL;
323 if (!vq->shadow_used_idx) {
324 vq->shadow_last_used_idx = vq->last_used_idx;
326 vq->shadow_used_packed[0].id = buf_id;
327 vq->shadow_used_packed[0].len = 0;
328 vq->shadow_used_packed[0].flags = flags;
329 vq->shadow_used_idx++;
331 vq->desc_packed[vq->last_used_idx].id = buf_id;
332 vq->desc_packed[vq->last_used_idx].len = 0;
333 vq->desc_packed[vq->last_used_idx].flags = flags;
336 vq_inc_last_used_packed(vq, count);
339 static __rte_always_inline void
340 vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq,
346 vq->shadow_used_packed[0].id = buf_id;
348 flags = vq->desc_packed[vq->last_used_idx].flags;
349 if (vq->used_wrap_counter) {
350 flags |= VRING_DESC_F_USED;
351 flags |= VRING_DESC_F_AVAIL;
353 flags &= ~VRING_DESC_F_USED;
354 flags &= ~VRING_DESC_F_AVAIL;
357 if (!vq->shadow_used_idx) {
358 vq->shadow_last_used_idx = vq->last_used_idx;
359 vq->shadow_used_packed[0].len = 0;
360 vq->shadow_used_packed[0].flags = flags;
361 vq->shadow_used_idx++;
364 vq_inc_last_used_packed(vq, count);
367 static __rte_always_inline void
368 vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq,
372 uint16_t num_buffers)
376 for (i = 0; i < num_buffers; i++) {
377 /* enqueue shadow flush action aligned with batch num */
378 if (!vq->shadow_used_idx)
379 vq->shadow_aligned_idx = vq->last_used_idx &
381 vq->shadow_used_packed[vq->shadow_used_idx].id = id[i];
382 vq->shadow_used_packed[vq->shadow_used_idx].len = len[i];
383 vq->shadow_used_packed[vq->shadow_used_idx].count = count[i];
384 vq->shadow_aligned_idx += count[i];
385 vq->shadow_used_idx++;
389 static __rte_always_inline void
390 vhost_shadow_enqueue_single_packed(struct virtio_net *dev,
391 struct vhost_virtqueue *vq,
395 uint16_t num_buffers)
397 vhost_shadow_enqueue_packed(vq, len, id, count, num_buffers);
399 if (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) {
400 do_data_copy_enqueue(dev, vq);
401 vhost_flush_enqueue_shadow_packed(dev, vq);
405 /* avoid write operation when necessary, to lessen cache issues */
406 #define ASSIGN_UNLESS_EQUAL(var, val) do { \
407 if ((var) != (val)) \
411 static __rte_always_inline void
412 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
414 uint64_t csum_l4 = m_buf->ol_flags & PKT_TX_L4_MASK;
416 if (m_buf->ol_flags & PKT_TX_TCP_SEG)
417 csum_l4 |= PKT_TX_TCP_CKSUM;
420 net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
421 net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len;
424 case PKT_TX_TCP_CKSUM:
425 net_hdr->csum_offset = (offsetof(struct rte_tcp_hdr,
428 case PKT_TX_UDP_CKSUM:
429 net_hdr->csum_offset = (offsetof(struct rte_udp_hdr,
432 case PKT_TX_SCTP_CKSUM:
433 net_hdr->csum_offset = (offsetof(struct rte_sctp_hdr,
438 ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0);
439 ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0);
440 ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0);
443 /* IP cksum verification cannot be bypassed, then calculate here */
444 if (m_buf->ol_flags & PKT_TX_IP_CKSUM) {
445 struct rte_ipv4_hdr *ipv4_hdr;
447 ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *,
449 ipv4_hdr->hdr_checksum = 0;
450 ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
453 if (m_buf->ol_flags & PKT_TX_TCP_SEG) {
454 if (m_buf->ol_flags & PKT_TX_IPV4)
455 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
457 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
458 net_hdr->gso_size = m_buf->tso_segsz;
459 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len
461 } else if (m_buf->ol_flags & PKT_TX_UDP_SEG) {
462 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
463 net_hdr->gso_size = m_buf->tso_segsz;
464 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len +
467 ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0);
468 ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0);
469 ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0);
473 static __rte_always_inline int
474 map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
475 struct buf_vector *buf_vec, uint16_t *vec_idx,
476 uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
478 uint16_t vec_id = *vec_idx;
482 uint64_t desc_chunck_len = desc_len;
484 if (unlikely(vec_id >= BUF_VECTOR_MAX))
487 desc_addr = vhost_iova_to_vva(dev, vq,
491 if (unlikely(!desc_addr))
494 rte_prefetch0((void *)(uintptr_t)desc_addr);
496 buf_vec[vec_id].buf_iova = desc_iova;
497 buf_vec[vec_id].buf_addr = desc_addr;
498 buf_vec[vec_id].buf_len = desc_chunck_len;
500 desc_len -= desc_chunck_len;
501 desc_iova += desc_chunck_len;
509 static __rte_always_inline int
510 fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
511 uint32_t avail_idx, uint16_t *vec_idx,
512 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
513 uint32_t *desc_chain_len, uint8_t perm)
515 uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
516 uint16_t vec_id = *vec_idx;
519 uint32_t nr_descs = vq->size;
521 struct vring_desc *descs = vq->desc;
522 struct vring_desc *idesc = NULL;
524 if (unlikely(idx >= vq->size))
527 *desc_chain_head = idx;
529 if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
530 dlen = vq->desc[idx].len;
531 nr_descs = dlen / sizeof(struct vring_desc);
532 if (unlikely(nr_descs > vq->size))
535 descs = (struct vring_desc *)(uintptr_t)
536 vhost_iova_to_vva(dev, vq, vq->desc[idx].addr,
539 if (unlikely(!descs))
542 if (unlikely(dlen < vq->desc[idx].len)) {
544 * The indirect desc table is not contiguous
545 * in process VA space, we have to copy it.
547 idesc = vhost_alloc_copy_ind_table(dev, vq,
548 vq->desc[idx].addr, vq->desc[idx].len);
549 if (unlikely(!idesc))
559 if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) {
560 free_ind_table(idesc);
564 dlen = descs[idx].len;
567 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
568 descs[idx].addr, dlen,
570 free_ind_table(idesc);
574 if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
577 idx = descs[idx].next;
580 *desc_chain_len = len;
583 if (unlikely(!!idesc))
584 free_ind_table(idesc);
590 * Returns -1 on fail, 0 on success
593 reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
594 uint32_t size, struct buf_vector *buf_vec,
595 uint16_t *num_buffers, uint16_t avail_head,
599 uint16_t vec_idx = 0;
600 uint16_t max_tries, tries = 0;
602 uint16_t head_idx = 0;
606 cur_idx = vq->last_avail_idx;
608 if (rxvq_is_mergeable(dev))
609 max_tries = vq->size - 1;
614 if (unlikely(cur_idx == avail_head))
617 * if we tried all available ring items, and still
618 * can't get enough buf, it means something abnormal
621 if (unlikely(++tries > max_tries))
624 if (unlikely(fill_vec_buf_split(dev, vq, cur_idx,
627 VHOST_ACCESS_RW) < 0))
629 len = RTE_MIN(len, size);
630 update_shadow_used_ring_split(vq, head_idx, len);
642 static __rte_always_inline int
643 fill_vec_buf_packed_indirect(struct virtio_net *dev,
644 struct vhost_virtqueue *vq,
645 struct vring_packed_desc *desc, uint16_t *vec_idx,
646 struct buf_vector *buf_vec, uint32_t *len, uint8_t perm)
650 uint16_t vec_id = *vec_idx;
652 struct vring_packed_desc *descs, *idescs = NULL;
655 descs = (struct vring_packed_desc *)(uintptr_t)
656 vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO);
657 if (unlikely(!descs))
660 if (unlikely(dlen < desc->len)) {
662 * The indirect desc table is not contiguous
663 * in process VA space, we have to copy it.
665 idescs = vhost_alloc_copy_ind_table(dev,
666 vq, desc->addr, desc->len);
667 if (unlikely(!idescs))
673 nr_descs = desc->len / sizeof(struct vring_packed_desc);
674 if (unlikely(nr_descs >= vq->size)) {
675 free_ind_table(idescs);
679 for (i = 0; i < nr_descs; i++) {
680 if (unlikely(vec_id >= BUF_VECTOR_MAX)) {
681 free_ind_table(idescs);
687 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
694 if (unlikely(!!idescs))
695 free_ind_table(idescs);
700 static __rte_always_inline int
701 fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
702 uint16_t avail_idx, uint16_t *desc_count,
703 struct buf_vector *buf_vec, uint16_t *vec_idx,
704 uint16_t *buf_id, uint32_t *len, uint8_t perm)
706 bool wrap_counter = vq->avail_wrap_counter;
707 struct vring_packed_desc *descs = vq->desc_packed;
708 uint16_t vec_id = *vec_idx;
711 if (avail_idx < vq->last_avail_idx)
715 * Perform a load-acquire barrier in desc_is_avail to
716 * enforce the ordering between desc flags and desc
719 if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter)))
726 if (unlikely(vec_id >= BUF_VECTOR_MAX))
729 if (unlikely(*desc_count >= vq->size))
733 *buf_id = descs[avail_idx].id;
735 if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) {
736 if (unlikely(fill_vec_buf_packed_indirect(dev, vq,
742 dlen = descs[avail_idx].len;
745 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
746 descs[avail_idx].addr,
752 if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0)
755 if (++avail_idx >= vq->size) {
756 avail_idx -= vq->size;
766 static __rte_noinline void
767 copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
768 struct buf_vector *buf_vec,
769 struct virtio_net_hdr_mrg_rxbuf *hdr)
772 uint64_t remain = dev->vhost_hlen;
773 uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
774 uint64_t iova = buf_vec->buf_iova;
777 len = RTE_MIN(remain,
779 dst = buf_vec->buf_addr;
780 rte_memcpy((void *)(uintptr_t)dst,
781 (void *)(uintptr_t)src,
784 PRINT_PACKET(dev, (uintptr_t)dst,
786 vhost_log_cache_write_iova(dev, vq,
796 static __rte_always_inline int
797 copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
798 struct rte_mbuf *m, struct buf_vector *buf_vec,
799 uint16_t nr_vec, uint16_t num_buffers)
801 uint32_t vec_idx = 0;
802 uint32_t mbuf_offset, mbuf_avail;
803 uint32_t buf_offset, buf_avail;
804 uint64_t buf_addr, buf_iova, buf_len;
807 struct rte_mbuf *hdr_mbuf;
808 struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
809 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
812 if (unlikely(m == NULL)) {
817 buf_addr = buf_vec[vec_idx].buf_addr;
818 buf_iova = buf_vec[vec_idx].buf_iova;
819 buf_len = buf_vec[vec_idx].buf_len;
821 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
828 if (unlikely(buf_len < dev->vhost_hlen)) {
829 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
832 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
834 VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n",
835 dev->vid, num_buffers);
837 if (unlikely(buf_len < dev->vhost_hlen)) {
838 buf_offset = dev->vhost_hlen - buf_len;
840 buf_addr = buf_vec[vec_idx].buf_addr;
841 buf_iova = buf_vec[vec_idx].buf_iova;
842 buf_len = buf_vec[vec_idx].buf_len;
843 buf_avail = buf_len - buf_offset;
845 buf_offset = dev->vhost_hlen;
846 buf_avail = buf_len - dev->vhost_hlen;
849 mbuf_avail = rte_pktmbuf_data_len(m);
851 while (mbuf_avail != 0 || m->next != NULL) {
852 /* done with current buf, get the next one */
853 if (buf_avail == 0) {
855 if (unlikely(vec_idx >= nr_vec)) {
860 buf_addr = buf_vec[vec_idx].buf_addr;
861 buf_iova = buf_vec[vec_idx].buf_iova;
862 buf_len = buf_vec[vec_idx].buf_len;
868 /* done with current mbuf, get the next one */
869 if (mbuf_avail == 0) {
873 mbuf_avail = rte_pktmbuf_data_len(m);
877 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
878 if (rxvq_is_mergeable(dev))
879 ASSIGN_UNLESS_EQUAL(hdr->num_buffers,
882 if (unlikely(hdr == &tmp_hdr)) {
883 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
885 PRINT_PACKET(dev, (uintptr_t)hdr_addr,
887 vhost_log_cache_write_iova(dev, vq,
895 cpy_len = RTE_MIN(buf_avail, mbuf_avail);
897 if (likely(cpy_len > MAX_BATCH_LEN ||
898 vq->batch_copy_nb_elems >= vq->size)) {
899 rte_memcpy((void *)((uintptr_t)(buf_addr + buf_offset)),
900 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
902 vhost_log_cache_write_iova(dev, vq,
903 buf_iova + buf_offset,
905 PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset),
908 batch_copy[vq->batch_copy_nb_elems].dst =
909 (void *)((uintptr_t)(buf_addr + buf_offset));
910 batch_copy[vq->batch_copy_nb_elems].src =
911 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
912 batch_copy[vq->batch_copy_nb_elems].log_addr =
913 buf_iova + buf_offset;
914 batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
915 vq->batch_copy_nb_elems++;
918 mbuf_avail -= cpy_len;
919 mbuf_offset += cpy_len;
920 buf_avail -= cpy_len;
921 buf_offset += cpy_len;
929 static __rte_always_inline void
930 async_fill_vec(struct iovec *v, void *base, size_t len)
936 static __rte_always_inline void
937 async_fill_iter(struct rte_vhost_iov_iter *it, size_t count,
938 struct iovec *vec, unsigned long nr_seg)
945 it->nr_segs = nr_seg;
952 static __rte_always_inline void
953 async_fill_desc(struct rte_vhost_async_desc *desc,
954 struct rte_vhost_iov_iter *src, struct rte_vhost_iov_iter *dst)
960 static __rte_always_inline int
961 async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
962 struct rte_mbuf *m, struct buf_vector *buf_vec,
963 uint16_t nr_vec, uint16_t num_buffers,
964 struct iovec *src_iovec, struct iovec *dst_iovec,
965 struct rte_vhost_iov_iter *src_it,
966 struct rte_vhost_iov_iter *dst_it)
968 uint32_t vec_idx = 0;
969 uint32_t mbuf_offset, mbuf_avail;
970 uint32_t buf_offset, buf_avail;
971 uint64_t buf_addr, buf_iova, buf_len;
972 uint32_t cpy_len, cpy_threshold;
974 struct rte_mbuf *hdr_mbuf;
975 struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
976 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
984 if (unlikely(m == NULL)) {
989 cpy_threshold = vq->async_threshold;
991 buf_addr = buf_vec[vec_idx].buf_addr;
992 buf_iova = buf_vec[vec_idx].buf_iova;
993 buf_len = buf_vec[vec_idx].buf_len;
995 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
1001 hdr_addr = buf_addr;
1002 if (unlikely(buf_len < dev->vhost_hlen)) {
1003 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
1006 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
1008 VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n",
1009 dev->vid, num_buffers);
1011 if (unlikely(buf_len < dev->vhost_hlen)) {
1012 buf_offset = dev->vhost_hlen - buf_len;
1014 buf_addr = buf_vec[vec_idx].buf_addr;
1015 buf_iova = buf_vec[vec_idx].buf_iova;
1016 buf_len = buf_vec[vec_idx].buf_len;
1017 buf_avail = buf_len - buf_offset;
1019 buf_offset = dev->vhost_hlen;
1020 buf_avail = buf_len - dev->vhost_hlen;
1023 mbuf_avail = rte_pktmbuf_data_len(m);
1026 while (mbuf_avail != 0 || m->next != NULL) {
1027 /* done with current buf, get the next one */
1028 if (buf_avail == 0) {
1030 if (unlikely(vec_idx >= nr_vec)) {
1035 buf_addr = buf_vec[vec_idx].buf_addr;
1036 buf_iova = buf_vec[vec_idx].buf_iova;
1037 buf_len = buf_vec[vec_idx].buf_len;
1040 buf_avail = buf_len;
1043 /* done with current mbuf, get the next one */
1044 if (mbuf_avail == 0) {
1048 mbuf_avail = rte_pktmbuf_data_len(m);
1052 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
1053 if (rxvq_is_mergeable(dev))
1054 ASSIGN_UNLESS_EQUAL(hdr->num_buffers,
1057 if (unlikely(hdr == &tmp_hdr)) {
1058 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
1060 PRINT_PACKET(dev, (uintptr_t)hdr_addr,
1061 dev->vhost_hlen, 0);
1062 vhost_log_cache_write_iova(dev, vq,
1063 buf_vec[0].buf_iova,
1070 cpy_len = RTE_MIN(buf_avail, mbuf_avail);
1072 while (unlikely(cpy_len && cpy_len >= cpy_threshold)) {
1073 hpa = (void *)(uintptr_t)gpa_to_first_hpa(dev,
1074 buf_iova + buf_offset,
1075 cpy_len, &mapped_len);
1077 if (unlikely(!hpa || mapped_len < cpy_threshold))
1080 async_fill_vec(src_iovec + tvec_idx,
1081 (void *)(uintptr_t)rte_pktmbuf_iova_offset(m,
1082 mbuf_offset), (size_t)mapped_len);
1084 async_fill_vec(dst_iovec + tvec_idx,
1085 hpa, (size_t)mapped_len);
1087 tlen += (uint32_t)mapped_len;
1088 cpy_len -= (uint32_t)mapped_len;
1089 mbuf_avail -= (uint32_t)mapped_len;
1090 mbuf_offset += (uint32_t)mapped_len;
1091 buf_avail -= (uint32_t)mapped_len;
1092 buf_offset += (uint32_t)mapped_len;
1096 if (likely(cpy_len)) {
1097 if (unlikely(vq->batch_copy_nb_elems >= vq->size)) {
1099 (void *)((uintptr_t)(buf_addr + buf_offset)),
1100 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
1104 (uintptr_t)(buf_addr + buf_offset),
1107 batch_copy[vq->batch_copy_nb_elems].dst =
1108 (void *)((uintptr_t)(buf_addr + buf_offset));
1109 batch_copy[vq->batch_copy_nb_elems].src =
1110 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
1111 batch_copy[vq->batch_copy_nb_elems].log_addr =
1112 buf_iova + buf_offset;
1113 batch_copy[vq->batch_copy_nb_elems].len =
1115 vq->batch_copy_nb_elems++;
1118 mbuf_avail -= cpy_len;
1119 mbuf_offset += cpy_len;
1120 buf_avail -= cpy_len;
1121 buf_offset += cpy_len;
1128 async_fill_iter(src_it, tlen, src_iovec, tvec_idx);
1129 async_fill_iter(dst_it, tlen, dst_iovec, tvec_idx);
1137 static __rte_always_inline int
1138 vhost_enqueue_single_packed(struct virtio_net *dev,
1139 struct vhost_virtqueue *vq,
1140 struct rte_mbuf *pkt,
1141 struct buf_vector *buf_vec,
1144 uint16_t nr_vec = 0;
1145 uint16_t avail_idx = vq->last_avail_idx;
1146 uint16_t max_tries, tries = 0;
1147 uint16_t buf_id = 0;
1149 uint16_t desc_count;
1150 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf);
1151 uint16_t num_buffers = 0;
1152 uint32_t buffer_len[vq->size];
1153 uint16_t buffer_buf_id[vq->size];
1154 uint16_t buffer_desc_count[vq->size];
1156 if (rxvq_is_mergeable(dev))
1157 max_tries = vq->size - 1;
1163 * if we tried all available ring items, and still
1164 * can't get enough buf, it means something abnormal
1167 if (unlikely(++tries > max_tries))
1170 if (unlikely(fill_vec_buf_packed(dev, vq,
1171 avail_idx, &desc_count,
1174 VHOST_ACCESS_RW) < 0))
1177 len = RTE_MIN(len, size);
1180 buffer_len[num_buffers] = len;
1181 buffer_buf_id[num_buffers] = buf_id;
1182 buffer_desc_count[num_buffers] = desc_count;
1185 *nr_descs += desc_count;
1186 avail_idx += desc_count;
1187 if (avail_idx >= vq->size)
1188 avail_idx -= vq->size;
1191 if (copy_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers) < 0)
1194 vhost_shadow_enqueue_single_packed(dev, vq, buffer_len, buffer_buf_id,
1195 buffer_desc_count, num_buffers);
1200 static __rte_noinline uint32_t
1201 virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
1202 struct rte_mbuf **pkts, uint32_t count)
1204 uint32_t pkt_idx = 0;
1205 uint16_t num_buffers;
1206 struct buf_vector buf_vec[BUF_VECTOR_MAX];
1207 uint16_t avail_head;
1210 * The ordering between avail index and
1211 * desc reads needs to be enforced.
1213 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE);
1215 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
1217 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
1218 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
1219 uint16_t nr_vec = 0;
1221 if (unlikely(reserve_avail_buf_split(dev, vq,
1222 pkt_len, buf_vec, &num_buffers,
1223 avail_head, &nr_vec) < 0)) {
1224 VHOST_LOG_DATA(DEBUG,
1225 "(%d) failed to get enough desc from vring\n",
1227 vq->shadow_used_idx -= num_buffers;
1231 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
1232 dev->vid, vq->last_avail_idx,
1233 vq->last_avail_idx + num_buffers);
1235 if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx],
1238 vq->shadow_used_idx -= num_buffers;
1242 vq->last_avail_idx += num_buffers;
1245 do_data_copy_enqueue(dev, vq);
1247 if (likely(vq->shadow_used_idx)) {
1248 flush_shadow_used_ring_split(dev, vq);
1249 vhost_vring_call_split(dev, vq);
1255 static __rte_always_inline int
1256 virtio_dev_rx_sync_batch_check(struct virtio_net *dev,
1257 struct vhost_virtqueue *vq,
1258 struct rte_mbuf **pkts,
1259 uint64_t *desc_addrs,
1262 bool wrap_counter = vq->avail_wrap_counter;
1263 struct vring_packed_desc *descs = vq->desc_packed;
1264 uint16_t avail_idx = vq->last_avail_idx;
1265 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1268 if (unlikely(avail_idx & PACKED_BATCH_MASK))
1271 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
1274 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1275 if (unlikely(pkts[i]->next != NULL))
1277 if (unlikely(!desc_is_avail(&descs[avail_idx + i],
1282 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1283 lens[i] = descs[avail_idx + i].len;
1285 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1286 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset)))
1290 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1291 desc_addrs[i] = vhost_iova_to_vva(dev, vq,
1292 descs[avail_idx + i].addr,
1296 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1297 if (unlikely(!desc_addrs[i]))
1299 if (unlikely(lens[i] != descs[avail_idx + i].len))
1306 static __rte_always_inline int
1307 virtio_dev_rx_async_batch_check(struct virtio_net *dev,
1308 struct vhost_virtqueue *vq,
1309 struct rte_mbuf **pkts,
1310 uint64_t *desc_addrs,
1313 bool wrap_counter = vq->avail_wrap_counter;
1314 struct vring_packed_desc *descs = vq->desc_packed;
1315 uint16_t avail_idx = vq->last_avail_idx;
1316 uint16_t used_idx = vq->last_used_idx;
1317 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1318 uint32_t cpy_threshold = vq->async_threshold;
1321 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1322 if (unlikely(pkts[i]->data_len >= cpy_threshold))
1326 if (unlikely(avail_idx & PACKED_BATCH_MASK))
1329 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
1332 if (unlikely((used_idx + PACKED_BATCH_SIZE) > vq->size))
1335 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1336 if (unlikely(pkts[i]->next != NULL))
1338 if (unlikely(!desc_is_avail(&descs[avail_idx + i],
1343 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1344 lens[i] = descs[avail_idx + i].len;
1346 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1347 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset)))
1351 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1352 desc_addrs[i] = vhost_iova_to_vva(dev, vq,
1353 descs[avail_idx + i].addr,
1357 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1358 if (unlikely(!desc_addrs[i]))
1360 if (unlikely(lens[i] != descs[avail_idx + i].len))
1367 static __rte_always_inline void
1368 virtio_dev_rx_batch_packed_copy(struct virtio_net *dev,
1369 struct vhost_virtqueue *vq,
1370 struct rte_mbuf **pkts,
1371 uint64_t *desc_addrs,
1374 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1375 struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE];
1376 struct vring_packed_desc *descs = vq->desc_packed;
1377 uint16_t avail_idx = vq->last_avail_idx;
1378 uint16_t ids[PACKED_BATCH_SIZE];
1381 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1382 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
1383 hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *)
1384 (uintptr_t)desc_addrs[i];
1385 lens[i] = pkts[i]->pkt_len +
1386 sizeof(struct virtio_net_hdr_mrg_rxbuf);
1389 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1390 virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr);
1392 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
1394 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1395 rte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset),
1396 rte_pktmbuf_mtod_offset(pkts[i], void *, 0),
1400 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1401 vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr,
1404 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1405 ids[i] = descs[avail_idx + i].id;
1407 vhost_flush_enqueue_batch_packed(dev, vq, lens, ids);
1410 static __rte_always_inline int
1411 virtio_dev_rx_sync_batch_packed(struct virtio_net *dev,
1412 struct vhost_virtqueue *vq,
1413 struct rte_mbuf **pkts)
1415 uint64_t desc_addrs[PACKED_BATCH_SIZE];
1416 uint64_t lens[PACKED_BATCH_SIZE];
1418 if (virtio_dev_rx_sync_batch_check(dev, vq, pkts, desc_addrs, lens) == -1)
1421 if (vq->shadow_used_idx) {
1422 do_data_copy_enqueue(dev, vq);
1423 vhost_flush_enqueue_shadow_packed(dev, vq);
1426 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens);
1431 static __rte_always_inline int
1432 virtio_dev_rx_async_batch_packed(struct virtio_net *dev,
1433 struct vhost_virtqueue *vq,
1434 struct rte_mbuf **pkts,
1435 struct rte_mbuf **comp_pkts, uint32_t *pkt_done)
1438 uint64_t desc_addrs[PACKED_BATCH_SIZE];
1439 uint64_t lens[PACKED_BATCH_SIZE];
1441 if (virtio_dev_rx_async_batch_check(dev, vq, pkts, desc_addrs, lens) == -1)
1444 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens);
1446 if (vq->shadow_used_idx) {
1447 do_data_copy_enqueue(dev, vq);
1448 vhost_flush_enqueue_shadow_packed(dev, vq);
1451 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1452 comp_pkts[(*pkt_done)++] = pkts[i];
1457 static __rte_always_inline int16_t
1458 virtio_dev_rx_single_packed(struct virtio_net *dev,
1459 struct vhost_virtqueue *vq,
1460 struct rte_mbuf *pkt)
1462 struct buf_vector buf_vec[BUF_VECTOR_MAX];
1463 uint16_t nr_descs = 0;
1465 if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec,
1467 VHOST_LOG_DATA(DEBUG,
1468 "(%d) failed to get enough desc from vring\n",
1473 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
1474 dev->vid, vq->last_avail_idx,
1475 vq->last_avail_idx + nr_descs);
1477 vq_inc_last_avail_packed(vq, nr_descs);
1482 static __rte_noinline uint32_t
1483 virtio_dev_rx_packed(struct virtio_net *dev,
1484 struct vhost_virtqueue *__rte_restrict vq,
1485 struct rte_mbuf **__rte_restrict pkts,
1488 uint32_t pkt_idx = 0;
1491 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
1493 if (count - pkt_idx >= PACKED_BATCH_SIZE) {
1494 if (!virtio_dev_rx_sync_batch_packed(dev, vq,
1496 pkt_idx += PACKED_BATCH_SIZE;
1501 if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx]))
1505 } while (pkt_idx < count);
1507 if (vq->shadow_used_idx) {
1508 do_data_copy_enqueue(dev, vq);
1509 vhost_flush_enqueue_shadow_packed(dev, vq);
1513 vhost_vring_call_packed(dev, vq);
1518 static __rte_always_inline uint32_t
1519 virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
1520 struct rte_mbuf **pkts, uint32_t count)
1522 struct vhost_virtqueue *vq;
1525 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
1526 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
1527 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
1528 dev->vid, __func__, queue_id);
1532 vq = dev->virtqueue[queue_id];
1534 rte_spinlock_lock(&vq->access_lock);
1536 if (unlikely(!vq->enabled))
1537 goto out_access_unlock;
1539 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
1540 vhost_user_iotlb_rd_lock(vq);
1542 if (unlikely(!vq->access_ok))
1543 if (unlikely(vring_translate(dev, vq) < 0))
1546 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
1550 if (vq_is_packed(dev))
1551 nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count);
1553 nb_tx = virtio_dev_rx_split(dev, vq, pkts, count);
1556 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
1557 vhost_user_iotlb_rd_unlock(vq);
1560 rte_spinlock_unlock(&vq->access_lock);
1566 rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
1567 struct rte_mbuf **__rte_restrict pkts, uint16_t count)
1569 struct virtio_net *dev = get_device(vid);
1574 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
1576 "(%d) %s: built-in vhost net backend is disabled.\n",
1577 dev->vid, __func__);
1581 return virtio_dev_rx(dev, queue_id, pkts, count);
1584 static __rte_always_inline uint16_t
1585 virtio_dev_rx_async_get_info_idx(uint16_t pkts_idx,
1586 uint16_t vq_size, uint16_t n_inflight)
1588 return pkts_idx > n_inflight ? (pkts_idx - n_inflight) :
1589 (vq_size - n_inflight + pkts_idx) % vq_size;
1592 static __rte_always_inline void
1593 store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem *d_ring,
1594 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count)
1596 size_t elem_size = sizeof(struct vring_used_elem);
1598 if (d_idx + count <= ring_size) {
1599 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
1601 uint16_t size = ring_size - d_idx;
1603 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);
1604 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size);
1608 static __rte_always_inline void
1609 store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring,
1610 struct vring_used_elem_packed *d_ring,
1611 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count)
1613 size_t elem_size = sizeof(struct vring_used_elem_packed);
1615 if (d_idx + count <= ring_size) {
1616 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
1618 uint16_t size = ring_size - d_idx;
1620 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);
1621 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size);
1625 static __rte_noinline uint32_t
1626 virtio_dev_rx_async_submit_split(struct virtio_net *dev,
1627 struct vhost_virtqueue *vq, uint16_t queue_id,
1628 struct rte_mbuf **pkts, uint32_t count,
1629 struct rte_mbuf **comp_pkts, uint32_t *comp_count)
1631 uint32_t pkt_idx = 0, pkt_burst_idx = 0;
1632 uint16_t num_buffers;
1633 struct buf_vector buf_vec[BUF_VECTOR_MAX];
1634 uint16_t avail_head;
1636 struct rte_vhost_iov_iter *it_pool = vq->it_pool;
1637 struct iovec *vec_pool = vq->vec_pool;
1638 struct rte_vhost_async_desc tdes[MAX_PKT_BURST];
1639 struct iovec *src_iovec = vec_pool;
1640 struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);
1641 uint16_t slot_idx = 0;
1642 uint16_t segs_await = 0;
1643 uint16_t iovec_idx = 0, it_idx = 0;
1644 struct async_inflight_info *pkts_info = vq->async_pkts_info;
1645 uint32_t n_pkts = 0, pkt_err = 0;
1646 uint32_t num_async_pkts = 0, num_done_pkts = 0;
1650 uint16_t last_avail_idx;
1651 } async_pkts_log[MAX_PKT_BURST];
1654 * The ordering between avail index and desc reads need to be enforced.
1656 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE);
1658 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
1660 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
1661 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
1662 uint16_t nr_vec = 0;
1664 if (unlikely(reserve_avail_buf_split(dev, vq,
1665 pkt_len, buf_vec, &num_buffers,
1666 avail_head, &nr_vec) < 0)) {
1667 VHOST_LOG_DATA(DEBUG,
1668 "(%d) failed to get enough desc from vring\n",
1670 vq->shadow_used_idx -= num_buffers;
1674 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
1675 dev->vid, vq->last_avail_idx,
1676 vq->last_avail_idx + num_buffers);
1678 if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, num_buffers,
1679 &src_iovec[iovec_idx], &dst_iovec[iovec_idx],
1680 &it_pool[it_idx], &it_pool[it_idx + 1]) < 0) {
1681 vq->shadow_used_idx -= num_buffers;
1685 slot_idx = (vq->async_pkts_idx + num_async_pkts) &
1687 if (it_pool[it_idx].count) {
1690 async_fill_desc(&tdes[pkt_burst_idx++],
1691 &it_pool[it_idx], &it_pool[it_idx + 1]);
1692 pkts_info[slot_idx].descs = num_buffers;
1693 pkts_info[slot_idx].mbuf = pkts[pkt_idx];
1694 async_pkts_log[num_async_pkts].pkt_idx = pkt_idx;
1695 async_pkts_log[num_async_pkts++].last_avail_idx =
1698 iovec_idx += it_pool[it_idx].nr_segs;
1701 segs_await += it_pool[it_idx].nr_segs;
1704 * recover shadow used ring and keep DMA-occupied
1707 from = vq->shadow_used_idx - num_buffers;
1708 to = vq->async_desc_idx_split & (vq->size - 1);
1710 store_dma_desc_info_split(vq->shadow_used_split,
1711 vq->async_descs_split, vq->size, from, to, num_buffers);
1713 vq->async_desc_idx_split += num_buffers;
1714 vq->shadow_used_idx -= num_buffers;
1716 comp_pkts[num_done_pkts++] = pkts[pkt_idx];
1718 vq->last_avail_idx += num_buffers;
1721 * conditions to trigger async device transfer:
1722 * - buffered packet number reaches transfer threshold
1723 * - unused async iov number is less than max vhost vector
1725 if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
1726 ((VHOST_MAX_ASYNC_VEC >> 1) - segs_await <
1728 n_xfer = vq->async_ops.transfer_data(dev->vid,
1729 queue_id, tdes, 0, pkt_burst_idx);
1734 "(%d) %s: failed to transfer data for queue id %d.\n",
1735 dev->vid, __func__, queue_id);
1743 vq->async_pkts_inflight_n += n_pkts;
1745 if (unlikely(n_pkts < pkt_burst_idx)) {
1747 * log error packets number here and do actual
1748 * error processing when applications poll
1751 pkt_err = pkt_burst_idx - n_pkts;
1760 if (pkt_burst_idx) {
1761 n_xfer = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx);
1765 VHOST_LOG_DATA(ERR, "(%d) %s: failed to transfer data for queue id %d.\n",
1766 dev->vid, __func__, queue_id);
1770 vq->async_pkts_inflight_n += n_pkts;
1772 if (unlikely(n_pkts < pkt_burst_idx))
1773 pkt_err = pkt_burst_idx - n_pkts;
1776 do_data_copy_enqueue(dev, vq);
1778 if (unlikely(pkt_err)) {
1779 uint16_t num_descs = 0;
1781 num_async_pkts -= pkt_err;
1782 /* calculate the sum of descriptors of DMA-error packets. */
1783 while (pkt_err-- > 0) {
1784 num_descs += pkts_info[slot_idx & (vq->size - 1)].descs;
1787 vq->async_desc_idx_split -= num_descs;
1788 /* recover shadow used ring and available ring */
1789 vq->shadow_used_idx -= (vq->last_avail_idx -
1790 async_pkts_log[num_async_pkts].last_avail_idx -
1792 vq->last_avail_idx =
1793 async_pkts_log[num_async_pkts].last_avail_idx;
1794 pkt_idx = async_pkts_log[num_async_pkts].pkt_idx;
1795 num_done_pkts = pkt_idx - num_async_pkts;
1798 vq->async_pkts_idx += num_async_pkts;
1799 *comp_count = num_done_pkts;
1801 if (likely(vq->shadow_used_idx)) {
1802 flush_shadow_used_ring_split(dev, vq);
1803 vhost_vring_call_split(dev, vq);
1809 static __rte_always_inline void
1810 vhost_update_used_packed(struct vhost_virtqueue *vq,
1811 struct vring_used_elem_packed *shadow_ring,
1815 uint16_t used_idx = vq->last_used_idx;
1816 uint16_t head_idx = vq->last_used_idx;
1817 uint16_t head_flags = 0;
1822 /* Split loop in two to save memory barriers */
1823 for (i = 0; i < count; i++) {
1824 vq->desc_packed[used_idx].id = shadow_ring[i].id;
1825 vq->desc_packed[used_idx].len = shadow_ring[i].len;
1827 used_idx += shadow_ring[i].count;
1828 if (used_idx >= vq->size)
1829 used_idx -= vq->size;
1832 /* The ordering for storing desc flags needs to be enforced. */
1833 rte_atomic_thread_fence(__ATOMIC_RELEASE);
1835 for (i = 0; i < count; i++) {
1838 if (vq->shadow_used_packed[i].len)
1839 flags = VRING_DESC_F_WRITE;
1843 if (vq->used_wrap_counter) {
1844 flags |= VRING_DESC_F_USED;
1845 flags |= VRING_DESC_F_AVAIL;
1847 flags &= ~VRING_DESC_F_USED;
1848 flags &= ~VRING_DESC_F_AVAIL;
1852 vq->desc_packed[vq->last_used_idx].flags = flags;
1854 head_idx = vq->last_used_idx;
1858 vq_inc_last_used_packed(vq, shadow_ring[i].count);
1861 vq->desc_packed[head_idx].flags = head_flags;
1864 static __rte_always_inline int
1865 vhost_enqueue_async_single_packed(struct virtio_net *dev,
1866 struct vhost_virtqueue *vq,
1867 struct rte_mbuf *pkt,
1868 struct buf_vector *buf_vec,
1870 uint16_t *nr_buffers,
1871 struct vring_packed_desc *async_descs,
1872 struct iovec *src_iovec, struct iovec *dst_iovec,
1873 struct rte_vhost_iov_iter *src_it,
1874 struct rte_vhost_iov_iter *dst_it)
1876 uint16_t nr_vec = 0;
1877 uint16_t avail_idx = vq->last_avail_idx;
1878 uint16_t max_tries, tries = 0;
1879 uint16_t buf_id = 0;
1881 uint16_t desc_count = 0;
1882 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf);
1883 uint32_t buffer_len[vq->size];
1884 uint16_t buffer_buf_id[vq->size];
1885 uint16_t buffer_desc_count[vq->size];
1887 if (rxvq_is_mergeable(dev))
1888 max_tries = vq->size - 1;
1894 * if we tried all available ring items, and still
1895 * can't get enough buf, it means something abnormal
1898 if (unlikely(++tries > max_tries))
1901 if (unlikely(fill_vec_buf_packed(dev, vq, avail_idx, &desc_count, buf_vec, &nr_vec,
1902 &buf_id, &len, VHOST_ACCESS_RW) < 0))
1905 len = RTE_MIN(len, size);
1908 buffer_len[*nr_buffers] = len;
1909 buffer_buf_id[*nr_buffers] = buf_id;
1910 buffer_desc_count[*nr_buffers] = desc_count;
1913 *nr_descs += desc_count;
1914 avail_idx += desc_count;
1915 if (avail_idx >= vq->size)
1916 avail_idx -= vq->size;
1919 if (async_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, src_iovec, dst_iovec,
1920 src_it, dst_it) < 0)
1922 /* store descriptors for DMA */
1923 if (avail_idx >= *nr_descs) {
1924 rte_memcpy(async_descs, &vq->desc_packed[vq->last_avail_idx],
1925 *nr_descs * sizeof(struct vring_packed_desc));
1927 uint16_t nr_copy = vq->size - vq->last_avail_idx;
1929 rte_memcpy(async_descs, &vq->desc_packed[vq->last_avail_idx],
1930 nr_copy * sizeof(struct vring_packed_desc));
1931 rte_memcpy(async_descs + nr_copy, vq->desc_packed,
1932 (*nr_descs - nr_copy) * sizeof(struct vring_packed_desc));
1935 vhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, buffer_desc_count, *nr_buffers);
1940 static __rte_always_inline int16_t
1941 virtio_dev_rx_async_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
1942 struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers,
1943 struct vring_packed_desc *async_descs,
1944 struct iovec *src_iovec, struct iovec *dst_iovec,
1945 struct rte_vhost_iov_iter *src_it, struct rte_vhost_iov_iter *dst_it)
1947 struct buf_vector buf_vec[BUF_VECTOR_MAX];
1949 if (unlikely(vhost_enqueue_async_single_packed(dev, vq, pkt, buf_vec, nr_descs, nr_buffers,
1950 async_descs, src_iovec, dst_iovec,
1951 src_it, dst_it) < 0)) {
1952 VHOST_LOG_DATA(DEBUG, "(%d) failed to get enough desc from vring\n", dev->vid);
1956 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
1957 dev->vid, vq->last_avail_idx, vq->last_avail_idx + *nr_descs);
1962 static __rte_always_inline void
1963 dma_error_handler_packed(struct vhost_virtqueue *vq, struct vring_packed_desc *async_descs,
1964 uint16_t async_descs_idx, uint16_t slot_idx, uint32_t nr_err,
1965 uint32_t *pkt_idx, uint32_t *num_async_pkts, uint32_t *num_done_pkts)
1967 uint16_t descs_err = 0;
1968 uint16_t buffers_err = 0;
1969 struct async_inflight_info *pkts_info = vq->async_pkts_info;
1971 *num_async_pkts -= nr_err;
1973 /* calculate the sum of buffers and descs of DMA-error packets. */
1974 while (nr_err-- > 0) {
1975 descs_err += pkts_info[slot_idx % vq->size].descs;
1976 buffers_err += pkts_info[slot_idx % vq->size].nr_buffers;
1980 vq->async_buffer_idx_packed -= buffers_err;
1982 if (vq->last_avail_idx >= descs_err) {
1983 vq->last_avail_idx -= descs_err;
1985 rte_memcpy(&vq->desc_packed[vq->last_avail_idx],
1986 &async_descs[async_descs_idx - descs_err],
1987 descs_err * sizeof(struct vring_packed_desc));
1991 vq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err;
1992 nr_copy = vq->size - vq->last_avail_idx;
1993 rte_memcpy(&vq->desc_packed[vq->last_avail_idx],
1994 &async_descs[async_descs_idx - descs_err],
1995 nr_copy * sizeof(struct vring_packed_desc));
1996 descs_err -= nr_copy;
1997 rte_memcpy(&vq->desc_packed[0], &async_descs[async_descs_idx - descs_err],
1998 descs_err * sizeof(struct vring_packed_desc));
1999 vq->avail_wrap_counter ^= 1;
2002 *num_done_pkts = *pkt_idx - *num_async_pkts;
2005 static __rte_noinline uint32_t
2006 virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
2007 struct vhost_virtqueue *vq, uint16_t queue_id,
2008 struct rte_mbuf **pkts, uint32_t count,
2009 struct rte_mbuf **comp_pkts, uint32_t *comp_count)
2011 uint32_t pkt_idx = 0, pkt_burst_idx = 0;
2012 uint32_t remained = count;
2013 uint16_t async_descs_idx = 0;
2014 uint16_t num_buffers;
2018 struct rte_vhost_iov_iter *it_pool = vq->it_pool;
2019 struct iovec *vec_pool = vq->vec_pool;
2020 struct rte_vhost_async_desc tdes[MAX_PKT_BURST];
2021 struct iovec *src_iovec = vec_pool;
2022 struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);
2023 uint16_t slot_idx = 0;
2024 uint16_t segs_await = 0;
2025 uint16_t iovec_idx = 0, it_idx = 0;
2026 struct async_inflight_info *pkts_info = vq->async_pkts_info;
2027 uint32_t n_pkts = 0, pkt_err = 0;
2028 uint32_t num_async_pkts = 0, num_done_pkts = 0;
2029 struct vring_packed_desc async_descs[vq->size];
2032 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
2033 if (remained >= PACKED_BATCH_SIZE) {
2034 if (!virtio_dev_rx_async_batch_packed(dev, vq,
2035 &pkts[pkt_idx], comp_pkts, &num_done_pkts)) {
2036 pkt_idx += PACKED_BATCH_SIZE;
2037 remained -= PACKED_BATCH_SIZE;
2044 if (unlikely(virtio_dev_rx_async_single_packed(dev, vq, pkts[pkt_idx],
2045 &num_descs, &num_buffers,
2046 &async_descs[async_descs_idx],
2047 &src_iovec[iovec_idx], &dst_iovec[iovec_idx],
2048 &it_pool[it_idx], &it_pool[it_idx + 1]) < 0))
2051 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
2052 dev->vid, vq->last_avail_idx,
2053 vq->last_avail_idx + num_descs);
2055 slot_idx = (vq->async_pkts_idx + num_async_pkts) % vq->size;
2056 if (it_pool[it_idx].count) {
2059 async_descs_idx += num_descs;
2060 async_fill_desc(&tdes[pkt_burst_idx++],
2061 &it_pool[it_idx], &it_pool[it_idx + 1]);
2062 pkts_info[slot_idx].descs = num_descs;
2063 pkts_info[slot_idx].nr_buffers = num_buffers;
2064 pkts_info[slot_idx].mbuf = pkts[pkt_idx];
2066 iovec_idx += it_pool[it_idx].nr_segs;
2069 segs_await += it_pool[it_idx].nr_segs;
2072 * recover shadow used ring and keep DMA-occupied
2075 from = vq->shadow_used_idx - num_buffers;
2076 store_dma_desc_info_packed(vq->shadow_used_packed,
2077 vq->async_buffers_packed, vq->size, from,
2078 vq->async_buffer_idx_packed, num_buffers);
2080 vq->async_buffer_idx_packed += num_buffers;
2081 if (vq->async_buffer_idx_packed >= vq->size)
2082 vq->async_buffer_idx_packed -= vq->size;
2083 vq->shadow_used_idx -= num_buffers;
2085 comp_pkts[num_done_pkts++] = pkts[pkt_idx];
2090 vq_inc_last_avail_packed(vq, num_descs);
2093 * conditions to trigger async device transfer:
2094 * - buffered packet number reaches transfer threshold
2095 * - unused async iov number is less than max vhost vector
2097 if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
2098 ((VHOST_MAX_ASYNC_VEC >> 1) - segs_await < BUF_VECTOR_MAX))) {
2099 n_xfer = vq->async_ops.transfer_data(dev->vid,
2100 queue_id, tdes, 0, pkt_burst_idx);
2105 "(%d) %s: failed to transfer data for queue id %d.\n",
2106 dev->vid, __func__, queue_id);
2113 vq->async_pkts_inflight_n += n_pkts;
2115 if (unlikely(n_pkts < pkt_burst_idx)) {
2117 * log error packets number here and do actual
2118 * error processing when applications poll
2121 pkt_err = pkt_burst_idx - n_pkts;
2128 } while (pkt_idx < count);
2130 if (pkt_burst_idx) {
2131 n_xfer = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx);
2135 VHOST_LOG_DATA(ERR, "(%d) %s: failed to transfer data for queue id %d.\n",
2136 dev->vid, __func__, queue_id);
2140 vq->async_pkts_inflight_n += n_pkts;
2142 if (unlikely(n_pkts < pkt_burst_idx))
2143 pkt_err = pkt_burst_idx - n_pkts;
2146 do_data_copy_enqueue(dev, vq);
2148 if (unlikely(pkt_err))
2149 dma_error_handler_packed(vq, async_descs, async_descs_idx, slot_idx, pkt_err,
2150 &pkt_idx, &num_async_pkts, &num_done_pkts);
2151 vq->async_pkts_idx += num_async_pkts;
2152 if (vq->async_pkts_idx >= vq->size)
2153 vq->async_pkts_idx -= vq->size;
2154 *comp_count = num_done_pkts;
2156 if (likely(vq->shadow_used_idx)) {
2157 vhost_flush_enqueue_shadow_packed(dev, vq);
2158 vhost_vring_call_packed(dev, vq);
2164 static __rte_always_inline void
2165 write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs)
2167 uint16_t nr_left = n_descs;
2172 from = vq->last_async_desc_idx_split & (vq->size - 1);
2173 nr_copy = nr_left + from <= vq->size ? nr_left : vq->size - from;
2174 to = vq->last_used_idx & (vq->size - 1);
2176 if (to + nr_copy <= vq->size) {
2177 rte_memcpy(&vq->used->ring[to], &vq->async_descs_split[from],
2178 nr_copy * sizeof(struct vring_used_elem));
2180 uint16_t size = vq->size - to;
2182 rte_memcpy(&vq->used->ring[to], &vq->async_descs_split[from],
2183 size * sizeof(struct vring_used_elem));
2184 rte_memcpy(&vq->used->ring[0], &vq->async_descs_split[from + size],
2185 (nr_copy - size) * sizeof(struct vring_used_elem));
2188 vq->last_async_desc_idx_split += nr_copy;
2189 vq->last_used_idx += nr_copy;
2191 } while (nr_left > 0);
2194 static __rte_always_inline void
2195 write_back_completed_descs_packed(struct vhost_virtqueue *vq,
2198 uint16_t nr_left = n_buffers;
2202 from = vq->last_async_buffer_idx_packed;
2203 to = (from + nr_left) % vq->size;
2205 vhost_update_used_packed(vq, vq->async_buffers_packed + from, to - from);
2206 vq->last_async_buffer_idx_packed += nr_left;
2209 vhost_update_used_packed(vq, vq->async_buffers_packed + from,
2211 vq->last_async_buffer_idx_packed = 0;
2212 nr_left -= vq->size - from;
2214 } while (nr_left > 0);
2217 uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
2218 struct rte_mbuf **pkts, uint16_t count)
2220 struct virtio_net *dev = get_device(vid);
2221 struct vhost_virtqueue *vq;
2222 uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
2223 uint16_t start_idx, pkts_idx, vq_size;
2224 struct async_inflight_info *pkts_info;
2231 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
2232 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
2233 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
2234 dev->vid, __func__, queue_id);
2238 vq = dev->virtqueue[queue_id];
2240 if (unlikely(!vq->async_registered)) {
2241 VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
2242 dev->vid, __func__, queue_id);
2246 rte_spinlock_lock(&vq->access_lock);
2248 pkts_idx = vq->async_pkts_idx % vq->size;
2249 pkts_info = vq->async_pkts_info;
2251 start_idx = virtio_dev_rx_async_get_info_idx(pkts_idx,
2252 vq_size, vq->async_pkts_inflight_n);
2254 if (count > vq->async_last_pkts_n) {
2255 n_cpl = vq->async_ops.check_completed_copies(vid,
2256 queue_id, 0, count - vq->async_last_pkts_n);
2261 "(%d) %s: failed to check completed copies for queue id %d.\n",
2262 dev->vid, __func__, queue_id);
2266 n_pkts_cpl += vq->async_last_pkts_n;
2268 n_pkts_put = RTE_MIN(count, n_pkts_cpl);
2269 if (unlikely(n_pkts_put == 0)) {
2270 vq->async_last_pkts_n = n_pkts_cpl;
2274 if (vq_is_packed(dev)) {
2275 for (i = 0; i < n_pkts_put; i++) {
2276 from = (start_idx + i) % vq_size;
2277 n_buffers += pkts_info[from].nr_buffers;
2278 pkts[i] = pkts_info[from].mbuf;
2281 for (i = 0; i < n_pkts_put; i++) {
2282 from = (start_idx + i) & (vq_size - 1);
2283 n_descs += pkts_info[from].descs;
2284 pkts[i] = pkts_info[from].mbuf;
2288 vq->async_last_pkts_n = n_pkts_cpl - n_pkts_put;
2289 vq->async_pkts_inflight_n -= n_pkts_put;
2291 if (likely(vq->enabled && vq->access_ok)) {
2292 if (vq_is_packed(dev)) {
2293 write_back_completed_descs_packed(vq, n_buffers);
2295 vhost_vring_call_packed(dev, vq);
2297 write_back_completed_descs_split(vq, n_descs);
2299 __atomic_add_fetch(&vq->used->idx, n_descs,
2301 vhost_vring_call_split(dev, vq);
2304 if (vq_is_packed(dev)) {
2305 vq->last_async_buffer_idx_packed += n_buffers;
2306 if (vq->last_async_buffer_idx_packed >= vq->size)
2307 vq->last_async_buffer_idx_packed -= vq->size;
2309 vq->last_async_desc_idx_split += n_descs;
2314 rte_spinlock_unlock(&vq->access_lock);
2319 static __rte_always_inline uint32_t
2320 virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
2321 struct rte_mbuf **pkts, uint32_t count,
2322 struct rte_mbuf **comp_pkts, uint32_t *comp_count)
2324 struct vhost_virtqueue *vq;
2327 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
2328 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
2329 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
2330 dev->vid, __func__, queue_id);
2334 vq = dev->virtqueue[queue_id];
2336 rte_spinlock_lock(&vq->access_lock);
2338 if (unlikely(!vq->enabled || !vq->async_registered))
2339 goto out_access_unlock;
2341 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
2342 vhost_user_iotlb_rd_lock(vq);
2344 if (unlikely(!vq->access_ok))
2345 if (unlikely(vring_translate(dev, vq) < 0))
2348 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
2352 if (vq_is_packed(dev))
2353 nb_tx = virtio_dev_rx_async_submit_packed(dev,
2354 vq, queue_id, pkts, count, comp_pkts,
2357 nb_tx = virtio_dev_rx_async_submit_split(dev,
2358 vq, queue_id, pkts, count, comp_pkts,
2362 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
2363 vhost_user_iotlb_rd_unlock(vq);
2366 rte_spinlock_unlock(&vq->access_lock);
2372 rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id,
2373 struct rte_mbuf **pkts, uint16_t count,
2374 struct rte_mbuf **comp_pkts, uint32_t *comp_count)
2376 struct virtio_net *dev = get_device(vid);
2382 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
2384 "(%d) %s: built-in vhost net backend is disabled.\n",
2385 dev->vid, __func__);
2389 return virtio_dev_rx_async_submit(dev, queue_id, pkts, count, comp_pkts,
2394 virtio_net_with_host_offload(struct virtio_net *dev)
2397 ((1ULL << VIRTIO_NET_F_CSUM) |
2398 (1ULL << VIRTIO_NET_F_HOST_ECN) |
2399 (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2400 (1ULL << VIRTIO_NET_F_HOST_TSO6) |
2401 (1ULL << VIRTIO_NET_F_HOST_UFO)))
2408 parse_headers(struct rte_mbuf *m, uint8_t *l4_proto)
2410 struct rte_ipv4_hdr *ipv4_hdr;
2411 struct rte_ipv6_hdr *ipv6_hdr;
2412 struct rte_ether_hdr *eth_hdr;
2414 uint16_t data_len = rte_pktmbuf_data_len(m);
2416 if (data_len < sizeof(struct rte_ether_hdr))
2419 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
2421 m->l2_len = sizeof(struct rte_ether_hdr);
2422 ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
2424 if (ethertype == RTE_ETHER_TYPE_VLAN) {
2425 if (data_len < sizeof(struct rte_ether_hdr) +
2426 sizeof(struct rte_vlan_hdr))
2429 struct rte_vlan_hdr *vlan_hdr =
2430 (struct rte_vlan_hdr *)(eth_hdr + 1);
2432 m->l2_len += sizeof(struct rte_vlan_hdr);
2433 ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto);
2436 switch (ethertype) {
2437 case RTE_ETHER_TYPE_IPV4:
2438 if (data_len < m->l2_len + sizeof(struct rte_ipv4_hdr))
2440 ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
2442 m->l3_len = rte_ipv4_hdr_len(ipv4_hdr);
2443 if (data_len < m->l2_len + m->l3_len)
2445 m->ol_flags |= PKT_TX_IPV4;
2446 *l4_proto = ipv4_hdr->next_proto_id;
2448 case RTE_ETHER_TYPE_IPV6:
2449 if (data_len < m->l2_len + sizeof(struct rte_ipv6_hdr))
2451 ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
2453 m->l3_len = sizeof(struct rte_ipv6_hdr);
2454 m->ol_flags |= PKT_TX_IPV6;
2455 *l4_proto = ipv6_hdr->proto;
2458 /* a valid L3 header is needed for further L4 parsing */
2462 /* both CSUM and GSO need a valid L4 header */
2463 switch (*l4_proto) {
2465 if (data_len < m->l2_len + m->l3_len +
2466 sizeof(struct rte_tcp_hdr))
2470 if (data_len < m->l2_len + m->l3_len +
2471 sizeof(struct rte_udp_hdr))
2475 if (data_len < m->l2_len + m->l3_len +
2476 sizeof(struct rte_sctp_hdr))
2492 static __rte_always_inline void
2493 vhost_dequeue_offload_legacy(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
2495 uint8_t l4_proto = 0;
2496 struct rte_tcp_hdr *tcp_hdr = NULL;
2498 uint16_t data_len = rte_pktmbuf_data_len(m);
2500 if (parse_headers(m, &l4_proto) < 0)
2503 if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2504 if (hdr->csum_start == (m->l2_len + m->l3_len)) {
2505 switch (hdr->csum_offset) {
2506 case (offsetof(struct rte_tcp_hdr, cksum)):
2507 if (l4_proto != IPPROTO_TCP)
2509 m->ol_flags |= PKT_TX_TCP_CKSUM;
2511 case (offsetof(struct rte_udp_hdr, dgram_cksum)):
2512 if (l4_proto != IPPROTO_UDP)
2514 m->ol_flags |= PKT_TX_UDP_CKSUM;
2516 case (offsetof(struct rte_sctp_hdr, cksum)):
2517 if (l4_proto != IPPROTO_SCTP)
2519 m->ol_flags |= PKT_TX_SCTP_CKSUM;
2529 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2530 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2531 case VIRTIO_NET_HDR_GSO_TCPV4:
2532 case VIRTIO_NET_HDR_GSO_TCPV6:
2533 if (l4_proto != IPPROTO_TCP)
2535 tcp_hdr = rte_pktmbuf_mtod_offset(m,
2536 struct rte_tcp_hdr *,
2537 m->l2_len + m->l3_len);
2538 tcp_len = (tcp_hdr->data_off & 0xf0) >> 2;
2539 if (data_len < m->l2_len + m->l3_len + tcp_len)
2541 m->ol_flags |= PKT_TX_TCP_SEG;
2542 m->tso_segsz = hdr->gso_size;
2543 m->l4_len = tcp_len;
2545 case VIRTIO_NET_HDR_GSO_UDP:
2546 if (l4_proto != IPPROTO_UDP)
2548 m->ol_flags |= PKT_TX_UDP_SEG;
2549 m->tso_segsz = hdr->gso_size;
2550 m->l4_len = sizeof(struct rte_udp_hdr);
2553 VHOST_LOG_DATA(WARNING,
2554 "unsupported gso type %u.\n", hdr->gso_type);
2566 static __rte_always_inline void
2567 vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m,
2568 bool legacy_ol_flags)
2570 struct rte_net_hdr_lens hdr_lens;
2571 int l4_supported = 0;
2574 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
2577 if (legacy_ol_flags) {
2578 vhost_dequeue_offload_legacy(hdr, m);
2582 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
2584 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
2585 m->packet_type = ptype;
2586 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
2587 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
2588 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
2591 /* According to Virtio 1.1 spec, the device only needs to look at
2592 * VIRTIO_NET_HDR_F_NEEDS_CSUM in the packet transmission path.
2593 * This differs from the processing incoming packets path where the
2594 * driver could rely on VIRTIO_NET_HDR_F_DATA_VALID flag set by the
2597 * 5.1.6.2.1 Driver Requirements: Packet Transmission
2598 * The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID and
2599 * VIRTIO_NET_HDR_F_RSC_INFO bits in flags.
2601 * 5.1.6.2.2 Device Requirements: Packet Transmission
2602 * The device MUST ignore flag bits that it does not recognize.
2604 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2607 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
2608 if (hdr->csum_start <= hdrlen && l4_supported != 0) {
2609 m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
2611 /* Unknown proto or tunnel, do sw cksum. We can assume
2612 * the cksum field is in the first segment since the
2613 * buffers we provided to the host are large enough.
2614 * In case of SCTP, this will be wrong since it's a CRC
2615 * but there's nothing we can do.
2617 uint16_t csum = 0, off;
2619 if (rte_raw_cksum_mbuf(m, hdr->csum_start,
2620 rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0)
2622 if (likely(csum != 0xffff))
2624 off = hdr->csum_offset + hdr->csum_start;
2625 if (rte_pktmbuf_data_len(m) >= off + 1)
2626 *rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum;
2630 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2631 if (hdr->gso_size == 0)
2634 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2635 case VIRTIO_NET_HDR_GSO_TCPV4:
2636 case VIRTIO_NET_HDR_GSO_TCPV6:
2637 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP)
2639 m->ol_flags |= PKT_RX_LRO | PKT_RX_L4_CKSUM_NONE;
2640 m->tso_segsz = hdr->gso_size;
2642 case VIRTIO_NET_HDR_GSO_UDP:
2643 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP)
2645 m->ol_flags |= PKT_RX_LRO | PKT_RX_L4_CKSUM_NONE;
2646 m->tso_segsz = hdr->gso_size;
2654 static __rte_noinline void
2655 copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
2656 struct buf_vector *buf_vec)
2659 uint64_t remain = sizeof(struct virtio_net_hdr);
2661 uint64_t dst = (uint64_t)(uintptr_t)hdr;
2664 len = RTE_MIN(remain, buf_vec->buf_len);
2665 src = buf_vec->buf_addr;
2666 rte_memcpy((void *)(uintptr_t)dst,
2667 (void *)(uintptr_t)src, len);
2675 static __rte_always_inline int
2676 copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
2677 struct buf_vector *buf_vec, uint16_t nr_vec,
2678 struct rte_mbuf *m, struct rte_mempool *mbuf_pool,
2679 bool legacy_ol_flags)
2681 uint32_t buf_avail, buf_offset;
2682 uint64_t buf_addr, buf_len;
2683 uint32_t mbuf_avail, mbuf_offset;
2685 struct rte_mbuf *cur = m, *prev = m;
2686 struct virtio_net_hdr tmp_hdr;
2687 struct virtio_net_hdr *hdr = NULL;
2688 /* A counter to avoid desc dead loop chain */
2689 uint16_t vec_idx = 0;
2690 struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
2693 buf_addr = buf_vec[vec_idx].buf_addr;
2694 buf_len = buf_vec[vec_idx].buf_len;
2696 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
2701 if (virtio_net_with_host_offload(dev)) {
2702 if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
2704 * No luck, the virtio-net header doesn't fit
2705 * in a contiguous virtual area.
2707 copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
2710 hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
2715 * A virtio driver normally uses at least 2 desc buffers
2716 * for Tx: the first for storing the header, and others
2717 * for storing the data.
2719 if (unlikely(buf_len < dev->vhost_hlen)) {
2720 buf_offset = dev->vhost_hlen - buf_len;
2722 buf_addr = buf_vec[vec_idx].buf_addr;
2723 buf_len = buf_vec[vec_idx].buf_len;
2724 buf_avail = buf_len - buf_offset;
2725 } else if (buf_len == dev->vhost_hlen) {
2726 if (unlikely(++vec_idx >= nr_vec))
2728 buf_addr = buf_vec[vec_idx].buf_addr;
2729 buf_len = buf_vec[vec_idx].buf_len;
2732 buf_avail = buf_len;
2734 buf_offset = dev->vhost_hlen;
2735 buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
2739 (uintptr_t)(buf_addr + buf_offset),
2740 (uint32_t)buf_avail, 0);
2743 mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
2745 cpy_len = RTE_MIN(buf_avail, mbuf_avail);
2747 if (likely(cpy_len > MAX_BATCH_LEN ||
2748 vq->batch_copy_nb_elems >= vq->size ||
2749 (hdr && cur == m))) {
2750 rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
2752 (void *)((uintptr_t)(buf_addr +
2753 buf_offset)), cpy_len);
2755 batch_copy[vq->batch_copy_nb_elems].dst =
2756 rte_pktmbuf_mtod_offset(cur, void *,
2758 batch_copy[vq->batch_copy_nb_elems].src =
2759 (void *)((uintptr_t)(buf_addr + buf_offset));
2760 batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
2761 vq->batch_copy_nb_elems++;
2764 mbuf_avail -= cpy_len;
2765 mbuf_offset += cpy_len;
2766 buf_avail -= cpy_len;
2767 buf_offset += cpy_len;
2769 /* This buf reaches to its end, get the next one */
2770 if (buf_avail == 0) {
2771 if (++vec_idx >= nr_vec)
2774 buf_addr = buf_vec[vec_idx].buf_addr;
2775 buf_len = buf_vec[vec_idx].buf_len;
2778 buf_avail = buf_len;
2780 PRINT_PACKET(dev, (uintptr_t)buf_addr,
2781 (uint32_t)buf_avail, 0);
2785 * This mbuf reaches to its end, get a new one
2786 * to hold more data.
2788 if (mbuf_avail == 0) {
2789 cur = rte_pktmbuf_alloc(mbuf_pool);
2790 if (unlikely(cur == NULL)) {
2791 VHOST_LOG_DATA(ERR, "Failed to "
2792 "allocate memory for mbuf.\n");
2798 prev->data_len = mbuf_offset;
2800 m->pkt_len += mbuf_offset;
2804 mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
2808 prev->data_len = mbuf_offset;
2809 m->pkt_len += mbuf_offset;
2812 vhost_dequeue_offload(hdr, m, legacy_ol_flags);
2820 virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque)
2826 virtio_dev_extbuf_alloc(struct rte_mbuf *pkt, uint32_t size)
2828 struct rte_mbuf_ext_shared_info *shinfo = NULL;
2829 uint32_t total_len = RTE_PKTMBUF_HEADROOM + size;
2834 total_len += sizeof(*shinfo) + sizeof(uintptr_t);
2835 total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t));
2837 if (unlikely(total_len > UINT16_MAX))
2840 buf_len = total_len;
2841 buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE);
2842 if (unlikely(buf == NULL))
2845 /* Initialize shinfo */
2846 shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
2847 virtio_dev_extbuf_free, buf);
2848 if (unlikely(shinfo == NULL)) {
2850 VHOST_LOG_DATA(ERR, "Failed to init shinfo\n");
2854 iova = rte_malloc_virt2iova(buf);
2855 rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo);
2856 rte_pktmbuf_reset_headroom(pkt);
2862 * Prepare a host supported pktmbuf.
2864 static __rte_always_inline int
2865 virtio_dev_pktmbuf_prep(struct virtio_net *dev, struct rte_mbuf *pkt,
2868 if (rte_pktmbuf_tailroom(pkt) >= data_len)
2871 /* attach an external buffer if supported */
2872 if (dev->extbuf && !virtio_dev_extbuf_alloc(pkt, data_len))
2875 /* check if chained buffers are allowed */
2876 if (!dev->linearbuf)
2884 virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
2885 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
2886 bool legacy_ol_flags)
2889 uint16_t free_entries;
2890 uint16_t dropped = 0;
2891 static bool allocerr_warned;
2894 * The ordering between avail index and
2895 * desc reads needs to be enforced.
2897 free_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) -
2899 if (free_entries == 0)
2902 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
2904 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
2906 count = RTE_MIN(count, MAX_PKT_BURST);
2907 count = RTE_MIN(count, free_entries);
2908 VHOST_LOG_DATA(DEBUG, "(%d) about to dequeue %u buffers\n",
2911 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count))
2914 for (i = 0; i < count; i++) {
2915 struct buf_vector buf_vec[BUF_VECTOR_MAX];
2918 uint16_t nr_vec = 0;
2921 if (unlikely(fill_vec_buf_split(dev, vq,
2922 vq->last_avail_idx + i,
2924 &head_idx, &buf_len,
2925 VHOST_ACCESS_RO) < 0))
2928 update_shadow_used_ring_split(vq, head_idx, 0);
2930 err = virtio_dev_pktmbuf_prep(dev, pkts[i], buf_len);
2931 if (unlikely(err)) {
2933 * mbuf allocation fails for jumbo packets when external
2934 * buffer allocation is not allowed and linear buffer
2935 * is required. Drop this packet.
2937 if (!allocerr_warned) {
2939 "Failed mbuf alloc of size %d from %s on %s.\n",
2940 buf_len, mbuf_pool->name, dev->ifname);
2941 allocerr_warned = true;
2948 err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
2949 mbuf_pool, legacy_ol_flags);
2950 if (unlikely(err)) {
2951 if (!allocerr_warned) {
2953 "Failed to copy desc to mbuf on %s.\n",
2955 allocerr_warned = true;
2964 rte_pktmbuf_free_bulk(&pkts[i - 1], count - i + 1);
2966 vq->last_avail_idx += i;
2968 do_data_copy_dequeue(vq);
2969 if (unlikely(i < count))
2970 vq->shadow_used_idx = i;
2971 if (likely(vq->shadow_used_idx)) {
2972 flush_shadow_used_ring_split(dev, vq);
2973 vhost_vring_call_split(dev, vq);
2976 return (i - dropped);
2981 virtio_dev_tx_split_legacy(struct virtio_net *dev,
2982 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
2983 struct rte_mbuf **pkts, uint16_t count)
2985 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true);
2990 virtio_dev_tx_split_compliant(struct virtio_net *dev,
2991 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
2992 struct rte_mbuf **pkts, uint16_t count)
2994 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false);
2997 static __rte_always_inline int
2998 vhost_reserve_avail_batch_packed(struct virtio_net *dev,
2999 struct vhost_virtqueue *vq,
3000 struct rte_mbuf **pkts,
3002 uintptr_t *desc_addrs,
3005 bool wrap = vq->avail_wrap_counter;
3006 struct vring_packed_desc *descs = vq->desc_packed;
3007 uint64_t lens[PACKED_BATCH_SIZE];
3008 uint64_t buf_lens[PACKED_BATCH_SIZE];
3009 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
3012 if (unlikely(avail_idx & PACKED_BATCH_MASK))
3014 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
3017 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3018 flags = descs[avail_idx + i].flags;
3019 if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) ||
3020 (wrap == !!(flags & VRING_DESC_F_USED)) ||
3021 (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG)))
3025 rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
3027 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
3028 lens[i] = descs[avail_idx + i].len;
3030 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3031 desc_addrs[i] = vhost_iova_to_vva(dev, vq,
3032 descs[avail_idx + i].addr,
3033 &lens[i], VHOST_ACCESS_RW);
3036 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3037 if (unlikely(!desc_addrs[i]))
3039 if (unlikely((lens[i] != descs[avail_idx + i].len)))
3043 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3044 if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i]))
3048 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
3049 buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off;
3051 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3052 if (unlikely(buf_lens[i] < (lens[i] - buf_offset)))
3056 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3057 pkts[i]->pkt_len = lens[i] - buf_offset;
3058 pkts[i]->data_len = pkts[i]->pkt_len;
3059 ids[i] = descs[avail_idx + i].id;
3068 static __rte_always_inline int
3069 virtio_dev_tx_batch_packed(struct virtio_net *dev,
3070 struct vhost_virtqueue *vq,
3071 struct rte_mbuf **pkts,
3072 bool legacy_ol_flags)
3074 uint16_t avail_idx = vq->last_avail_idx;
3075 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
3076 struct virtio_net_hdr *hdr;
3077 uintptr_t desc_addrs[PACKED_BATCH_SIZE];
3078 uint16_t ids[PACKED_BATCH_SIZE];
3081 if (vhost_reserve_avail_batch_packed(dev, vq, pkts, avail_idx,
3085 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
3086 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
3088 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
3089 rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0),
3090 (void *)(uintptr_t)(desc_addrs[i] + buf_offset),
3093 if (virtio_net_with_host_offload(dev)) {
3094 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3095 hdr = (struct virtio_net_hdr *)(desc_addrs[i]);
3096 vhost_dequeue_offload(hdr, pkts[i], legacy_ol_flags);
3100 if (virtio_net_is_inorder(dev))
3101 vhost_shadow_dequeue_batch_packed_inorder(vq,
3102 ids[PACKED_BATCH_SIZE - 1]);
3104 vhost_shadow_dequeue_batch_packed(dev, vq, ids);
3106 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
3111 static __rte_always_inline int
3112 vhost_dequeue_single_packed(struct virtio_net *dev,
3113 struct vhost_virtqueue *vq,
3114 struct rte_mempool *mbuf_pool,
3115 struct rte_mbuf *pkts,
3117 uint16_t *desc_count,
3118 bool legacy_ol_flags)
3120 struct buf_vector buf_vec[BUF_VECTOR_MAX];
3122 uint16_t nr_vec = 0;
3124 static bool allocerr_warned;
3126 if (unlikely(fill_vec_buf_packed(dev, vq,
3127 vq->last_avail_idx, desc_count,
3130 VHOST_ACCESS_RO) < 0))
3133 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) {
3134 if (!allocerr_warned) {
3136 "Failed mbuf alloc of size %d from %s on %s.\n",
3137 buf_len, mbuf_pool->name, dev->ifname);
3138 allocerr_warned = true;
3143 err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts,
3144 mbuf_pool, legacy_ol_flags);
3145 if (unlikely(err)) {
3146 if (!allocerr_warned) {
3148 "Failed to copy desc to mbuf on %s.\n",
3150 allocerr_warned = true;
3158 static __rte_always_inline int
3159 virtio_dev_tx_single_packed(struct virtio_net *dev,
3160 struct vhost_virtqueue *vq,
3161 struct rte_mempool *mbuf_pool,
3162 struct rte_mbuf *pkts,
3163 bool legacy_ol_flags)
3166 uint16_t buf_id, desc_count = 0;
3169 ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id,
3170 &desc_count, legacy_ol_flags);
3172 if (likely(desc_count > 0)) {
3173 if (virtio_net_is_inorder(dev))
3174 vhost_shadow_dequeue_single_packed_inorder(vq, buf_id,
3177 vhost_shadow_dequeue_single_packed(vq, buf_id,
3180 vq_inc_last_avail_packed(vq, desc_count);
3188 virtio_dev_tx_packed(struct virtio_net *dev,
3189 struct vhost_virtqueue *__rte_restrict vq,
3190 struct rte_mempool *mbuf_pool,
3191 struct rte_mbuf **__rte_restrict pkts,
3193 bool legacy_ol_flags)
3195 uint32_t pkt_idx = 0;
3197 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count))
3201 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
3203 if (count - pkt_idx >= PACKED_BATCH_SIZE) {
3204 if (!virtio_dev_tx_batch_packed(dev, vq,
3207 pkt_idx += PACKED_BATCH_SIZE;
3212 if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool,
3217 } while (pkt_idx < count);
3219 if (pkt_idx != count)
3220 rte_pktmbuf_free_bulk(&pkts[pkt_idx], count - pkt_idx);
3222 if (vq->shadow_used_idx) {
3223 do_data_copy_dequeue(vq);
3225 vhost_flush_dequeue_shadow_packed(dev, vq);
3226 vhost_vring_call_packed(dev, vq);
3234 virtio_dev_tx_packed_legacy(struct virtio_net *dev,
3235 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
3236 struct rte_mbuf **__rte_restrict pkts, uint32_t count)
3238 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true);
3243 virtio_dev_tx_packed_compliant(struct virtio_net *dev,
3244 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
3245 struct rte_mbuf **__rte_restrict pkts, uint32_t count)
3247 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false);
3251 rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
3252 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
3254 struct virtio_net *dev;
3255 struct rte_mbuf *rarp_mbuf = NULL;
3256 struct vhost_virtqueue *vq;
3257 int16_t success = 1;
3259 dev = get_device(vid);
3263 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
3265 "(%d) %s: built-in vhost net backend is disabled.\n",
3266 dev->vid, __func__);
3270 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
3272 "(%d) %s: invalid virtqueue idx %d.\n",
3273 dev->vid, __func__, queue_id);
3277 vq = dev->virtqueue[queue_id];
3279 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
3282 if (unlikely(!vq->enabled)) {
3284 goto out_access_unlock;
3287 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
3288 vhost_user_iotlb_rd_lock(vq);
3290 if (unlikely(!vq->access_ok))
3291 if (unlikely(vring_translate(dev, vq) < 0)) {
3297 * Construct a RARP broadcast packet, and inject it to the "pkts"
3298 * array, to looks like that guest actually send such packet.
3300 * Check user_send_rarp() for more information.
3302 * broadcast_rarp shares a cacheline in the virtio_net structure
3303 * with some fields that are accessed during enqueue and
3304 * __atomic_compare_exchange_n causes a write if performed compare
3305 * and exchange. This could result in false sharing between enqueue
3308 * Prevent unnecessary false sharing by reading broadcast_rarp first
3309 * and only performing compare and exchange if the read indicates it
3310 * is likely to be set.
3312 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) &&
3313 __atomic_compare_exchange_n(&dev->broadcast_rarp,
3314 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) {
3316 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
3317 if (rarp_mbuf == NULL) {
3318 VHOST_LOG_DATA(ERR, "Failed to make RARP packet.\n");
3325 if (vq_is_packed(dev)) {
3326 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
3327 count = virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool, pkts, count);
3329 count = virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool, pkts, count);
3331 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
3332 count = virtio_dev_tx_split_legacy(dev, vq, mbuf_pool, pkts, count);
3334 count = virtio_dev_tx_split_compliant(dev, vq, mbuf_pool, pkts, count);
3338 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
3339 vhost_user_iotlb_rd_unlock(vq);
3342 rte_spinlock_unlock(&vq->access_lock);
3344 if (unlikely(rarp_mbuf != NULL)) {
3346 * Inject it to the head of "pkts" array, so that switch's mac
3347 * learning table will get updated first.
3349 memmove(&pkts[1], pkts, count * sizeof(struct rte_mbuf *));
3350 pkts[0] = rarp_mbuf;