1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
7 #include <linux/virtio_net.h>
10 #include <rte_memcpy.h>
12 #include <rte_ether.h>
14 #include <rte_vhost.h>
19 #include <rte_spinlock.h>
20 #include <rte_malloc.h>
21 #include <rte_vhost_async.h>
26 #define MAX_BATCH_LEN 256
28 #define VHOST_ASYNC_BATCH_THRESHOLD 32
30 static __rte_always_inline bool
31 rxvq_is_mergeable(struct virtio_net *dev)
33 return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF);
36 static __rte_always_inline bool
37 virtio_net_is_inorder(struct virtio_net *dev)
39 return dev->features & (1ULL << VIRTIO_F_IN_ORDER);
43 is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
45 return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
49 do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
51 struct batch_copy_elem *elem = vq->batch_copy_elems;
52 uint16_t count = vq->batch_copy_nb_elems;
55 for (i = 0; i < count; i++) {
56 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
57 vhost_log_cache_write_iova(dev, vq, elem[i].log_addr,
59 PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0);
62 vq->batch_copy_nb_elems = 0;
66 do_data_copy_dequeue(struct vhost_virtqueue *vq)
68 struct batch_copy_elem *elem = vq->batch_copy_elems;
69 uint16_t count = vq->batch_copy_nb_elems;
72 for (i = 0; i < count; i++)
73 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
75 vq->batch_copy_nb_elems = 0;
78 static __rte_always_inline void
79 do_flush_shadow_used_ring_split(struct virtio_net *dev,
80 struct vhost_virtqueue *vq,
81 uint16_t to, uint16_t from, uint16_t size)
83 rte_memcpy(&vq->used->ring[to],
84 &vq->shadow_used_split[from],
85 size * sizeof(struct vring_used_elem));
86 vhost_log_cache_used_vring(dev, vq,
87 offsetof(struct vring_used, ring[to]),
88 size * sizeof(struct vring_used_elem));
91 static __rte_always_inline void
92 flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
94 uint16_t used_idx = vq->last_used_idx & (vq->size - 1);
96 if (used_idx + vq->shadow_used_idx <= vq->size) {
97 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0,
102 /* update used ring interval [used_idx, vq->size] */
103 size = vq->size - used_idx;
104 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size);
106 /* update the left half used ring interval [0, left_size] */
107 do_flush_shadow_used_ring_split(dev, vq, 0, size,
108 vq->shadow_used_idx - size);
110 vq->last_used_idx += vq->shadow_used_idx;
112 vhost_log_cache_sync(dev, vq);
114 __atomic_add_fetch(&vq->used->idx, vq->shadow_used_idx,
116 vq->shadow_used_idx = 0;
117 vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
118 sizeof(vq->used->idx));
121 static __rte_always_inline void
122 update_shadow_used_ring_split(struct vhost_virtqueue *vq,
123 uint16_t desc_idx, uint32_t len)
125 uint16_t i = vq->shadow_used_idx++;
127 vq->shadow_used_split[i].id = desc_idx;
128 vq->shadow_used_split[i].len = len;
131 static __rte_always_inline void
132 vhost_flush_enqueue_shadow_packed(struct virtio_net *dev,
133 struct vhost_virtqueue *vq)
136 uint16_t used_idx = vq->last_used_idx;
137 uint16_t head_idx = vq->last_used_idx;
138 uint16_t head_flags = 0;
140 /* Split loop in two to save memory barriers */
141 for (i = 0; i < vq->shadow_used_idx; i++) {
142 vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id;
143 vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len;
145 used_idx += vq->shadow_used_packed[i].count;
146 if (used_idx >= vq->size)
147 used_idx -= vq->size;
150 /* The ordering for storing desc flags needs to be enforced. */
151 rte_atomic_thread_fence(__ATOMIC_RELEASE);
153 for (i = 0; i < vq->shadow_used_idx; i++) {
156 if (vq->shadow_used_packed[i].len)
157 flags = VRING_DESC_F_WRITE;
161 if (vq->used_wrap_counter) {
162 flags |= VRING_DESC_F_USED;
163 flags |= VRING_DESC_F_AVAIL;
165 flags &= ~VRING_DESC_F_USED;
166 flags &= ~VRING_DESC_F_AVAIL;
170 vq->desc_packed[vq->last_used_idx].flags = flags;
172 vhost_log_cache_used_vring(dev, vq,
174 sizeof(struct vring_packed_desc),
175 sizeof(struct vring_packed_desc));
177 head_idx = vq->last_used_idx;
181 vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count);
184 vq->desc_packed[head_idx].flags = head_flags;
186 vhost_log_cache_used_vring(dev, vq,
188 sizeof(struct vring_packed_desc),
189 sizeof(struct vring_packed_desc));
191 vq->shadow_used_idx = 0;
192 vhost_log_cache_sync(dev, vq);
195 static __rte_always_inline void
196 vhost_flush_dequeue_shadow_packed(struct virtio_net *dev,
197 struct vhost_virtqueue *vq)
199 struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0];
201 vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id;
202 /* desc flags is the synchronization point for virtio packed vring */
203 __atomic_store_n(&vq->desc_packed[vq->shadow_last_used_idx].flags,
204 used_elem->flags, __ATOMIC_RELEASE);
206 vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx *
207 sizeof(struct vring_packed_desc),
208 sizeof(struct vring_packed_desc));
209 vq->shadow_used_idx = 0;
210 vhost_log_cache_sync(dev, vq);
213 static __rte_always_inline void
214 vhost_flush_enqueue_batch_packed(struct virtio_net *dev,
215 struct vhost_virtqueue *vq,
221 uint16_t last_used_idx;
222 struct vring_packed_desc *desc_base;
224 last_used_idx = vq->last_used_idx;
225 desc_base = &vq->desc_packed[last_used_idx];
227 flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter);
229 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
230 desc_base[i].id = ids[i];
231 desc_base[i].len = lens[i];
234 rte_atomic_thread_fence(__ATOMIC_RELEASE);
236 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
237 desc_base[i].flags = flags;
240 vhost_log_cache_used_vring(dev, vq, last_used_idx *
241 sizeof(struct vring_packed_desc),
242 sizeof(struct vring_packed_desc) *
244 vhost_log_cache_sync(dev, vq);
246 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
249 static __rte_always_inline void
250 vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq,
253 vq->shadow_used_packed[0].id = id;
255 if (!vq->shadow_used_idx) {
256 vq->shadow_last_used_idx = vq->last_used_idx;
257 vq->shadow_used_packed[0].flags =
258 PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter);
259 vq->shadow_used_packed[0].len = 0;
260 vq->shadow_used_packed[0].count = 1;
261 vq->shadow_used_idx++;
264 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
267 static __rte_always_inline void
268 vhost_shadow_dequeue_batch_packed(struct virtio_net *dev,
269 struct vhost_virtqueue *vq,
276 flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter);
278 if (!vq->shadow_used_idx) {
279 vq->shadow_last_used_idx = vq->last_used_idx;
280 vq->shadow_used_packed[0].id = ids[0];
281 vq->shadow_used_packed[0].len = 0;
282 vq->shadow_used_packed[0].count = 1;
283 vq->shadow_used_packed[0].flags = flags;
284 vq->shadow_used_idx++;
289 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) {
290 vq->desc_packed[vq->last_used_idx + i].id = ids[i];
291 vq->desc_packed[vq->last_used_idx + i].len = 0;
294 rte_atomic_thread_fence(__ATOMIC_RELEASE);
295 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE)
296 vq->desc_packed[vq->last_used_idx + i].flags = flags;
298 vhost_log_cache_used_vring(dev, vq, vq->last_used_idx *
299 sizeof(struct vring_packed_desc),
300 sizeof(struct vring_packed_desc) *
302 vhost_log_cache_sync(dev, vq);
304 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
307 static __rte_always_inline void
308 vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq,
314 flags = vq->desc_packed[vq->last_used_idx].flags;
315 if (vq->used_wrap_counter) {
316 flags |= VRING_DESC_F_USED;
317 flags |= VRING_DESC_F_AVAIL;
319 flags &= ~VRING_DESC_F_USED;
320 flags &= ~VRING_DESC_F_AVAIL;
323 if (!vq->shadow_used_idx) {
324 vq->shadow_last_used_idx = vq->last_used_idx;
326 vq->shadow_used_packed[0].id = buf_id;
327 vq->shadow_used_packed[0].len = 0;
328 vq->shadow_used_packed[0].flags = flags;
329 vq->shadow_used_idx++;
331 vq->desc_packed[vq->last_used_idx].id = buf_id;
332 vq->desc_packed[vq->last_used_idx].len = 0;
333 vq->desc_packed[vq->last_used_idx].flags = flags;
336 vq_inc_last_used_packed(vq, count);
339 static __rte_always_inline void
340 vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq,
346 vq->shadow_used_packed[0].id = buf_id;
348 flags = vq->desc_packed[vq->last_used_idx].flags;
349 if (vq->used_wrap_counter) {
350 flags |= VRING_DESC_F_USED;
351 flags |= VRING_DESC_F_AVAIL;
353 flags &= ~VRING_DESC_F_USED;
354 flags &= ~VRING_DESC_F_AVAIL;
357 if (!vq->shadow_used_idx) {
358 vq->shadow_last_used_idx = vq->last_used_idx;
359 vq->shadow_used_packed[0].len = 0;
360 vq->shadow_used_packed[0].flags = flags;
361 vq->shadow_used_idx++;
364 vq_inc_last_used_packed(vq, count);
367 static __rte_always_inline void
368 vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq,
372 uint16_t num_buffers)
376 for (i = 0; i < num_buffers; i++) {
377 /* enqueue shadow flush action aligned with batch num */
378 if (!vq->shadow_used_idx)
379 vq->shadow_aligned_idx = vq->last_used_idx &
381 vq->shadow_used_packed[vq->shadow_used_idx].id = id[i];
382 vq->shadow_used_packed[vq->shadow_used_idx].len = len[i];
383 vq->shadow_used_packed[vq->shadow_used_idx].count = count[i];
384 vq->shadow_aligned_idx += count[i];
385 vq->shadow_used_idx++;
389 static __rte_always_inline void
390 vhost_shadow_enqueue_single_packed(struct virtio_net *dev,
391 struct vhost_virtqueue *vq,
395 uint16_t num_buffers)
397 vhost_shadow_enqueue_packed(vq, len, id, count, num_buffers);
399 if (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) {
400 do_data_copy_enqueue(dev, vq);
401 vhost_flush_enqueue_shadow_packed(dev, vq);
405 /* avoid write operation when necessary, to lessen cache issues */
406 #define ASSIGN_UNLESS_EQUAL(var, val) do { \
407 if ((var) != (val)) \
411 static __rte_always_inline void
412 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
414 uint64_t csum_l4 = m_buf->ol_flags & PKT_TX_L4_MASK;
416 if (m_buf->ol_flags & PKT_TX_TCP_SEG)
417 csum_l4 |= PKT_TX_TCP_CKSUM;
420 net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
421 net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len;
424 case PKT_TX_TCP_CKSUM:
425 net_hdr->csum_offset = (offsetof(struct rte_tcp_hdr,
428 case PKT_TX_UDP_CKSUM:
429 net_hdr->csum_offset = (offsetof(struct rte_udp_hdr,
432 case PKT_TX_SCTP_CKSUM:
433 net_hdr->csum_offset = (offsetof(struct rte_sctp_hdr,
438 ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0);
439 ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0);
440 ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0);
443 /* IP cksum verification cannot be bypassed, then calculate here */
444 if (m_buf->ol_flags & PKT_TX_IP_CKSUM) {
445 struct rte_ipv4_hdr *ipv4_hdr;
447 ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *,
449 ipv4_hdr->hdr_checksum = 0;
450 ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
453 if (m_buf->ol_flags & PKT_TX_TCP_SEG) {
454 if (m_buf->ol_flags & PKT_TX_IPV4)
455 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
457 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
458 net_hdr->gso_size = m_buf->tso_segsz;
459 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len
461 } else if (m_buf->ol_flags & PKT_TX_UDP_SEG) {
462 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
463 net_hdr->gso_size = m_buf->tso_segsz;
464 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len +
467 ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0);
468 ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0);
469 ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0);
473 static __rte_always_inline int
474 map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
475 struct buf_vector *buf_vec, uint16_t *vec_idx,
476 uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
478 uint16_t vec_id = *vec_idx;
482 uint64_t desc_chunck_len = desc_len;
484 if (unlikely(vec_id >= BUF_VECTOR_MAX))
487 desc_addr = vhost_iova_to_vva(dev, vq,
491 if (unlikely(!desc_addr))
494 rte_prefetch0((void *)(uintptr_t)desc_addr);
496 buf_vec[vec_id].buf_iova = desc_iova;
497 buf_vec[vec_id].buf_addr = desc_addr;
498 buf_vec[vec_id].buf_len = desc_chunck_len;
500 desc_len -= desc_chunck_len;
501 desc_iova += desc_chunck_len;
509 static __rte_always_inline int
510 fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
511 uint32_t avail_idx, uint16_t *vec_idx,
512 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
513 uint32_t *desc_chain_len, uint8_t perm)
515 uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
516 uint16_t vec_id = *vec_idx;
519 uint32_t nr_descs = vq->size;
521 struct vring_desc *descs = vq->desc;
522 struct vring_desc *idesc = NULL;
524 if (unlikely(idx >= vq->size))
527 *desc_chain_head = idx;
529 if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
530 dlen = vq->desc[idx].len;
531 nr_descs = dlen / sizeof(struct vring_desc);
532 if (unlikely(nr_descs > vq->size))
535 descs = (struct vring_desc *)(uintptr_t)
536 vhost_iova_to_vva(dev, vq, vq->desc[idx].addr,
539 if (unlikely(!descs))
542 if (unlikely(dlen < vq->desc[idx].len)) {
544 * The indirect desc table is not contiguous
545 * in process VA space, we have to copy it.
547 idesc = vhost_alloc_copy_ind_table(dev, vq,
548 vq->desc[idx].addr, vq->desc[idx].len);
549 if (unlikely(!idesc))
559 if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) {
560 free_ind_table(idesc);
564 dlen = descs[idx].len;
567 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
568 descs[idx].addr, dlen,
570 free_ind_table(idesc);
574 if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
577 idx = descs[idx].next;
580 *desc_chain_len = len;
583 if (unlikely(!!idesc))
584 free_ind_table(idesc);
590 * Returns -1 on fail, 0 on success
593 reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
594 uint32_t size, struct buf_vector *buf_vec,
595 uint16_t *num_buffers, uint16_t avail_head,
599 uint16_t vec_idx = 0;
600 uint16_t max_tries, tries = 0;
602 uint16_t head_idx = 0;
606 cur_idx = vq->last_avail_idx;
608 if (rxvq_is_mergeable(dev))
609 max_tries = vq->size - 1;
614 if (unlikely(cur_idx == avail_head))
617 * if we tried all available ring items, and still
618 * can't get enough buf, it means something abnormal
621 if (unlikely(++tries > max_tries))
624 if (unlikely(fill_vec_buf_split(dev, vq, cur_idx,
627 VHOST_ACCESS_RW) < 0))
629 len = RTE_MIN(len, size);
630 update_shadow_used_ring_split(vq, head_idx, len);
642 static __rte_always_inline int
643 fill_vec_buf_packed_indirect(struct virtio_net *dev,
644 struct vhost_virtqueue *vq,
645 struct vring_packed_desc *desc, uint16_t *vec_idx,
646 struct buf_vector *buf_vec, uint32_t *len, uint8_t perm)
650 uint16_t vec_id = *vec_idx;
652 struct vring_packed_desc *descs, *idescs = NULL;
655 descs = (struct vring_packed_desc *)(uintptr_t)
656 vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO);
657 if (unlikely(!descs))
660 if (unlikely(dlen < desc->len)) {
662 * The indirect desc table is not contiguous
663 * in process VA space, we have to copy it.
665 idescs = vhost_alloc_copy_ind_table(dev,
666 vq, desc->addr, desc->len);
667 if (unlikely(!idescs))
673 nr_descs = desc->len / sizeof(struct vring_packed_desc);
674 if (unlikely(nr_descs >= vq->size)) {
675 free_ind_table(idescs);
679 for (i = 0; i < nr_descs; i++) {
680 if (unlikely(vec_id >= BUF_VECTOR_MAX)) {
681 free_ind_table(idescs);
687 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
694 if (unlikely(!!idescs))
695 free_ind_table(idescs);
700 static __rte_always_inline int
701 fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
702 uint16_t avail_idx, uint16_t *desc_count,
703 struct buf_vector *buf_vec, uint16_t *vec_idx,
704 uint16_t *buf_id, uint32_t *len, uint8_t perm)
706 bool wrap_counter = vq->avail_wrap_counter;
707 struct vring_packed_desc *descs = vq->desc_packed;
708 uint16_t vec_id = *vec_idx;
711 if (avail_idx < vq->last_avail_idx)
715 * Perform a load-acquire barrier in desc_is_avail to
716 * enforce the ordering between desc flags and desc
719 if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter)))
726 if (unlikely(vec_id >= BUF_VECTOR_MAX))
729 if (unlikely(*desc_count >= vq->size))
733 *buf_id = descs[avail_idx].id;
735 if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) {
736 if (unlikely(fill_vec_buf_packed_indirect(dev, vq,
742 dlen = descs[avail_idx].len;
745 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
746 descs[avail_idx].addr,
752 if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0)
755 if (++avail_idx >= vq->size) {
756 avail_idx -= vq->size;
766 static __rte_noinline void
767 copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
768 struct buf_vector *buf_vec,
769 struct virtio_net_hdr_mrg_rxbuf *hdr)
772 uint64_t remain = dev->vhost_hlen;
773 uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
774 uint64_t iova = buf_vec->buf_iova;
777 len = RTE_MIN(remain,
779 dst = buf_vec->buf_addr;
780 rte_memcpy((void *)(uintptr_t)dst,
781 (void *)(uintptr_t)src,
784 PRINT_PACKET(dev, (uintptr_t)dst,
786 vhost_log_cache_write_iova(dev, vq,
796 static __rte_always_inline int
797 copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
798 struct rte_mbuf *m, struct buf_vector *buf_vec,
799 uint16_t nr_vec, uint16_t num_buffers)
801 uint32_t vec_idx = 0;
802 uint32_t mbuf_offset, mbuf_avail;
803 uint32_t buf_offset, buf_avail;
804 uint64_t buf_addr, buf_iova, buf_len;
807 struct rte_mbuf *hdr_mbuf;
808 struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
809 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
812 if (unlikely(m == NULL)) {
817 buf_addr = buf_vec[vec_idx].buf_addr;
818 buf_iova = buf_vec[vec_idx].buf_iova;
819 buf_len = buf_vec[vec_idx].buf_len;
821 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
828 if (unlikely(buf_len < dev->vhost_hlen)) {
829 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
832 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
834 VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n",
835 dev->vid, num_buffers);
837 if (unlikely(buf_len < dev->vhost_hlen)) {
838 buf_offset = dev->vhost_hlen - buf_len;
840 buf_addr = buf_vec[vec_idx].buf_addr;
841 buf_iova = buf_vec[vec_idx].buf_iova;
842 buf_len = buf_vec[vec_idx].buf_len;
843 buf_avail = buf_len - buf_offset;
845 buf_offset = dev->vhost_hlen;
846 buf_avail = buf_len - dev->vhost_hlen;
849 mbuf_avail = rte_pktmbuf_data_len(m);
851 while (mbuf_avail != 0 || m->next != NULL) {
852 /* done with current buf, get the next one */
853 if (buf_avail == 0) {
855 if (unlikely(vec_idx >= nr_vec)) {
860 buf_addr = buf_vec[vec_idx].buf_addr;
861 buf_iova = buf_vec[vec_idx].buf_iova;
862 buf_len = buf_vec[vec_idx].buf_len;
868 /* done with current mbuf, get the next one */
869 if (mbuf_avail == 0) {
873 mbuf_avail = rte_pktmbuf_data_len(m);
877 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
878 if (rxvq_is_mergeable(dev))
879 ASSIGN_UNLESS_EQUAL(hdr->num_buffers,
882 if (unlikely(hdr == &tmp_hdr)) {
883 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
885 PRINT_PACKET(dev, (uintptr_t)hdr_addr,
887 vhost_log_cache_write_iova(dev, vq,
895 cpy_len = RTE_MIN(buf_avail, mbuf_avail);
897 if (likely(cpy_len > MAX_BATCH_LEN ||
898 vq->batch_copy_nb_elems >= vq->size)) {
899 rte_memcpy((void *)((uintptr_t)(buf_addr + buf_offset)),
900 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
902 vhost_log_cache_write_iova(dev, vq,
903 buf_iova + buf_offset,
905 PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset),
908 batch_copy[vq->batch_copy_nb_elems].dst =
909 (void *)((uintptr_t)(buf_addr + buf_offset));
910 batch_copy[vq->batch_copy_nb_elems].src =
911 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
912 batch_copy[vq->batch_copy_nb_elems].log_addr =
913 buf_iova + buf_offset;
914 batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
915 vq->batch_copy_nb_elems++;
918 mbuf_avail -= cpy_len;
919 mbuf_offset += cpy_len;
920 buf_avail -= cpy_len;
921 buf_offset += cpy_len;
929 static __rte_always_inline void
930 async_fill_vec(struct iovec *v, void *base, size_t len)
936 static __rte_always_inline void
937 async_fill_iter(struct rte_vhost_iov_iter *it, size_t count,
938 struct iovec *vec, unsigned long nr_seg)
945 it->nr_segs = nr_seg;
952 static __rte_always_inline void
953 async_fill_desc(struct rte_vhost_async_desc *desc,
954 struct rte_vhost_iov_iter *src, struct rte_vhost_iov_iter *dst)
960 static __rte_always_inline int
961 async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
962 struct rte_mbuf *m, struct buf_vector *buf_vec,
963 uint16_t nr_vec, uint16_t num_buffers,
964 struct iovec *src_iovec, struct iovec *dst_iovec,
965 struct rte_vhost_iov_iter *src_it,
966 struct rte_vhost_iov_iter *dst_it)
968 uint32_t vec_idx = 0;
969 uint32_t mbuf_offset, mbuf_avail;
970 uint32_t buf_offset, buf_avail;
971 uint64_t buf_addr, buf_iova, buf_len;
972 uint32_t cpy_len, cpy_threshold;
974 struct rte_mbuf *hdr_mbuf;
975 struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
976 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
984 if (unlikely(m == NULL)) {
989 cpy_threshold = vq->async_threshold;
991 buf_addr = buf_vec[vec_idx].buf_addr;
992 buf_iova = buf_vec[vec_idx].buf_iova;
993 buf_len = buf_vec[vec_idx].buf_len;
995 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
1001 hdr_addr = buf_addr;
1002 if (unlikely(buf_len < dev->vhost_hlen)) {
1003 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
1006 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
1008 VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n",
1009 dev->vid, num_buffers);
1011 if (unlikely(buf_len < dev->vhost_hlen)) {
1012 buf_offset = dev->vhost_hlen - buf_len;
1014 buf_addr = buf_vec[vec_idx].buf_addr;
1015 buf_iova = buf_vec[vec_idx].buf_iova;
1016 buf_len = buf_vec[vec_idx].buf_len;
1017 buf_avail = buf_len - buf_offset;
1019 buf_offset = dev->vhost_hlen;
1020 buf_avail = buf_len - dev->vhost_hlen;
1023 mbuf_avail = rte_pktmbuf_data_len(m);
1026 while (mbuf_avail != 0 || m->next != NULL) {
1027 /* done with current buf, get the next one */
1028 if (buf_avail == 0) {
1030 if (unlikely(vec_idx >= nr_vec)) {
1035 buf_addr = buf_vec[vec_idx].buf_addr;
1036 buf_iova = buf_vec[vec_idx].buf_iova;
1037 buf_len = buf_vec[vec_idx].buf_len;
1040 buf_avail = buf_len;
1043 /* done with current mbuf, get the next one */
1044 if (mbuf_avail == 0) {
1048 mbuf_avail = rte_pktmbuf_data_len(m);
1052 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
1053 if (rxvq_is_mergeable(dev))
1054 ASSIGN_UNLESS_EQUAL(hdr->num_buffers,
1057 if (unlikely(hdr == &tmp_hdr)) {
1058 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
1060 PRINT_PACKET(dev, (uintptr_t)hdr_addr,
1061 dev->vhost_hlen, 0);
1062 vhost_log_cache_write_iova(dev, vq,
1063 buf_vec[0].buf_iova,
1070 cpy_len = RTE_MIN(buf_avail, mbuf_avail);
1072 while (unlikely(cpy_len && cpy_len >= cpy_threshold)) {
1073 hpa = (void *)(uintptr_t)gpa_to_first_hpa(dev,
1074 buf_iova + buf_offset,
1075 cpy_len, &mapped_len);
1077 if (unlikely(!hpa || mapped_len < cpy_threshold))
1080 async_fill_vec(src_iovec + tvec_idx,
1081 (void *)(uintptr_t)rte_pktmbuf_iova_offset(m,
1082 mbuf_offset), (size_t)mapped_len);
1084 async_fill_vec(dst_iovec + tvec_idx,
1085 hpa, (size_t)mapped_len);
1087 tlen += (uint32_t)mapped_len;
1088 cpy_len -= (uint32_t)mapped_len;
1089 mbuf_avail -= (uint32_t)mapped_len;
1090 mbuf_offset += (uint32_t)mapped_len;
1091 buf_avail -= (uint32_t)mapped_len;
1092 buf_offset += (uint32_t)mapped_len;
1096 if (likely(cpy_len)) {
1097 if (unlikely(vq->batch_copy_nb_elems >= vq->size)) {
1099 (void *)((uintptr_t)(buf_addr + buf_offset)),
1100 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
1104 (uintptr_t)(buf_addr + buf_offset),
1107 batch_copy[vq->batch_copy_nb_elems].dst =
1108 (void *)((uintptr_t)(buf_addr + buf_offset));
1109 batch_copy[vq->batch_copy_nb_elems].src =
1110 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
1111 batch_copy[vq->batch_copy_nb_elems].log_addr =
1112 buf_iova + buf_offset;
1113 batch_copy[vq->batch_copy_nb_elems].len =
1115 vq->batch_copy_nb_elems++;
1118 mbuf_avail -= cpy_len;
1119 mbuf_offset += cpy_len;
1120 buf_avail -= cpy_len;
1121 buf_offset += cpy_len;
1128 async_fill_iter(src_it, tlen, src_iovec, tvec_idx);
1129 async_fill_iter(dst_it, tlen, dst_iovec, tvec_idx);
1137 static __rte_always_inline int
1138 vhost_enqueue_single_packed(struct virtio_net *dev,
1139 struct vhost_virtqueue *vq,
1140 struct rte_mbuf *pkt,
1141 struct buf_vector *buf_vec,
1144 uint16_t nr_vec = 0;
1145 uint16_t avail_idx = vq->last_avail_idx;
1146 uint16_t max_tries, tries = 0;
1147 uint16_t buf_id = 0;
1149 uint16_t desc_count;
1150 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf);
1151 uint16_t num_buffers = 0;
1152 uint32_t buffer_len[vq->size];
1153 uint16_t buffer_buf_id[vq->size];
1154 uint16_t buffer_desc_count[vq->size];
1156 if (rxvq_is_mergeable(dev))
1157 max_tries = vq->size - 1;
1163 * if we tried all available ring items, and still
1164 * can't get enough buf, it means something abnormal
1167 if (unlikely(++tries > max_tries))
1170 if (unlikely(fill_vec_buf_packed(dev, vq,
1171 avail_idx, &desc_count,
1174 VHOST_ACCESS_RW) < 0))
1177 len = RTE_MIN(len, size);
1180 buffer_len[num_buffers] = len;
1181 buffer_buf_id[num_buffers] = buf_id;
1182 buffer_desc_count[num_buffers] = desc_count;
1185 *nr_descs += desc_count;
1186 avail_idx += desc_count;
1187 if (avail_idx >= vq->size)
1188 avail_idx -= vq->size;
1191 if (copy_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers) < 0)
1194 vhost_shadow_enqueue_single_packed(dev, vq, buffer_len, buffer_buf_id,
1195 buffer_desc_count, num_buffers);
1200 static __rte_noinline uint32_t
1201 virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
1202 struct rte_mbuf **pkts, uint32_t count)
1204 uint32_t pkt_idx = 0;
1205 uint16_t num_buffers;
1206 struct buf_vector buf_vec[BUF_VECTOR_MAX];
1207 uint16_t avail_head;
1210 * The ordering between avail index and
1211 * desc reads needs to be enforced.
1213 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE);
1215 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
1217 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
1218 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
1219 uint16_t nr_vec = 0;
1221 if (unlikely(reserve_avail_buf_split(dev, vq,
1222 pkt_len, buf_vec, &num_buffers,
1223 avail_head, &nr_vec) < 0)) {
1224 VHOST_LOG_DATA(DEBUG,
1225 "(%d) failed to get enough desc from vring\n",
1227 vq->shadow_used_idx -= num_buffers;
1231 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
1232 dev->vid, vq->last_avail_idx,
1233 vq->last_avail_idx + num_buffers);
1235 if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx],
1238 vq->shadow_used_idx -= num_buffers;
1242 vq->last_avail_idx += num_buffers;
1245 do_data_copy_enqueue(dev, vq);
1247 if (likely(vq->shadow_used_idx)) {
1248 flush_shadow_used_ring_split(dev, vq);
1249 vhost_vring_call_split(dev, vq);
1255 static __rte_always_inline int
1256 virtio_dev_rx_sync_batch_check(struct virtio_net *dev,
1257 struct vhost_virtqueue *vq,
1258 struct rte_mbuf **pkts,
1259 uint64_t *desc_addrs,
1262 bool wrap_counter = vq->avail_wrap_counter;
1263 struct vring_packed_desc *descs = vq->desc_packed;
1264 uint16_t avail_idx = vq->last_avail_idx;
1265 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1268 if (unlikely(avail_idx & PACKED_BATCH_MASK))
1271 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
1274 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1275 if (unlikely(pkts[i]->next != NULL))
1277 if (unlikely(!desc_is_avail(&descs[avail_idx + i],
1282 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1283 lens[i] = descs[avail_idx + i].len;
1285 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1286 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset)))
1290 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1291 desc_addrs[i] = vhost_iova_to_vva(dev, vq,
1292 descs[avail_idx + i].addr,
1296 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1297 if (unlikely(!desc_addrs[i]))
1299 if (unlikely(lens[i] != descs[avail_idx + i].len))
1306 static __rte_always_inline int
1307 virtio_dev_rx_async_batch_check(struct virtio_net *dev,
1308 struct vhost_virtqueue *vq,
1309 struct rte_mbuf **pkts,
1310 uint64_t *desc_addrs,
1313 bool wrap_counter = vq->avail_wrap_counter;
1314 struct vring_packed_desc *descs = vq->desc_packed;
1315 uint16_t avail_idx = vq->last_avail_idx;
1316 uint16_t used_idx = vq->last_used_idx;
1317 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1318 uint32_t cpy_threshold = vq->async_threshold;
1321 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1322 if (unlikely(pkts[i]->data_len >= cpy_threshold))
1326 if (unlikely(avail_idx & PACKED_BATCH_MASK))
1329 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
1332 if (unlikely((used_idx + PACKED_BATCH_SIZE) > vq->size))
1335 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1336 if (unlikely(pkts[i]->next != NULL))
1338 if (unlikely(!desc_is_avail(&descs[avail_idx + i],
1343 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1344 lens[i] = descs[avail_idx + i].len;
1346 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1347 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset)))
1351 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1352 desc_addrs[i] = vhost_iova_to_vva(dev, vq,
1353 descs[avail_idx + i].addr,
1357 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1358 if (unlikely(!desc_addrs[i]))
1360 if (unlikely(lens[i] != descs[avail_idx + i].len))
1367 static __rte_always_inline void
1368 virtio_dev_rx_batch_packed_copy(struct virtio_net *dev,
1369 struct vhost_virtqueue *vq,
1370 struct rte_mbuf **pkts,
1371 uint64_t *desc_addrs,
1374 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1375 struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE];
1376 struct vring_packed_desc *descs = vq->desc_packed;
1377 uint16_t avail_idx = vq->last_avail_idx;
1378 uint16_t ids[PACKED_BATCH_SIZE];
1381 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1382 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
1383 hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *)
1384 (uintptr_t)desc_addrs[i];
1385 lens[i] = pkts[i]->pkt_len +
1386 sizeof(struct virtio_net_hdr_mrg_rxbuf);
1389 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1390 virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr);
1392 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
1394 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1395 rte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset),
1396 rte_pktmbuf_mtod_offset(pkts[i], void *, 0),
1400 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1401 vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr,
1404 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1405 ids[i] = descs[avail_idx + i].id;
1407 vhost_flush_enqueue_batch_packed(dev, vq, lens, ids);
1410 static __rte_always_inline int
1411 virtio_dev_rx_sync_batch_packed(struct virtio_net *dev,
1412 struct vhost_virtqueue *vq,
1413 struct rte_mbuf **pkts)
1415 uint64_t desc_addrs[PACKED_BATCH_SIZE];
1416 uint64_t lens[PACKED_BATCH_SIZE];
1418 if (virtio_dev_rx_sync_batch_check(dev, vq, pkts, desc_addrs, lens) == -1)
1421 if (vq->shadow_used_idx) {
1422 do_data_copy_enqueue(dev, vq);
1423 vhost_flush_enqueue_shadow_packed(dev, vq);
1426 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens);
1431 static __rte_always_inline int
1432 virtio_dev_rx_async_batch_packed(struct virtio_net *dev,
1433 struct vhost_virtqueue *vq,
1434 struct rte_mbuf **pkts,
1435 struct rte_mbuf **comp_pkts, uint32_t *pkt_done)
1438 uint64_t desc_addrs[PACKED_BATCH_SIZE];
1439 uint64_t lens[PACKED_BATCH_SIZE];
1441 if (virtio_dev_rx_async_batch_check(dev, vq, pkts, desc_addrs, lens) == -1)
1444 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens);
1446 if (vq->shadow_used_idx) {
1447 do_data_copy_enqueue(dev, vq);
1448 vhost_flush_enqueue_shadow_packed(dev, vq);
1451 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1452 comp_pkts[(*pkt_done)++] = pkts[i];
1457 static __rte_always_inline int16_t
1458 virtio_dev_rx_single_packed(struct virtio_net *dev,
1459 struct vhost_virtqueue *vq,
1460 struct rte_mbuf *pkt)
1462 struct buf_vector buf_vec[BUF_VECTOR_MAX];
1463 uint16_t nr_descs = 0;
1465 if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec,
1467 VHOST_LOG_DATA(DEBUG,
1468 "(%d) failed to get enough desc from vring\n",
1473 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
1474 dev->vid, vq->last_avail_idx,
1475 vq->last_avail_idx + nr_descs);
1477 vq_inc_last_avail_packed(vq, nr_descs);
1482 static __rte_noinline uint32_t
1483 virtio_dev_rx_packed(struct virtio_net *dev,
1484 struct vhost_virtqueue *__rte_restrict vq,
1485 struct rte_mbuf **__rte_restrict pkts,
1488 uint32_t pkt_idx = 0;
1491 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
1493 if (count - pkt_idx >= PACKED_BATCH_SIZE) {
1494 if (!virtio_dev_rx_sync_batch_packed(dev, vq,
1496 pkt_idx += PACKED_BATCH_SIZE;
1501 if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx]))
1505 } while (pkt_idx < count);
1507 if (vq->shadow_used_idx) {
1508 do_data_copy_enqueue(dev, vq);
1509 vhost_flush_enqueue_shadow_packed(dev, vq);
1513 vhost_vring_call_packed(dev, vq);
1518 static __rte_always_inline uint32_t
1519 virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
1520 struct rte_mbuf **pkts, uint32_t count)
1522 struct vhost_virtqueue *vq;
1525 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
1526 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
1527 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
1528 dev->vid, __func__, queue_id);
1532 vq = dev->virtqueue[queue_id];
1534 rte_spinlock_lock(&vq->access_lock);
1536 if (unlikely(!vq->enabled))
1537 goto out_access_unlock;
1539 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
1540 vhost_user_iotlb_rd_lock(vq);
1542 if (unlikely(!vq->access_ok))
1543 if (unlikely(vring_translate(dev, vq) < 0))
1546 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
1550 if (vq_is_packed(dev))
1551 nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count);
1553 nb_tx = virtio_dev_rx_split(dev, vq, pkts, count);
1556 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
1557 vhost_user_iotlb_rd_unlock(vq);
1560 rte_spinlock_unlock(&vq->access_lock);
1566 rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
1567 struct rte_mbuf **__rte_restrict pkts, uint16_t count)
1569 struct virtio_net *dev = get_device(vid);
1574 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
1576 "(%d) %s: built-in vhost net backend is disabled.\n",
1577 dev->vid, __func__);
1581 return virtio_dev_rx(dev, queue_id, pkts, count);
1584 static __rte_always_inline uint16_t
1585 virtio_dev_rx_async_get_info_idx(uint16_t pkts_idx,
1586 uint16_t vq_size, uint16_t n_inflight)
1588 return pkts_idx > n_inflight ? (pkts_idx - n_inflight) :
1589 (vq_size - n_inflight + pkts_idx) & (vq_size - 1);
1592 static __rte_always_inline void
1593 store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem *d_ring,
1594 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count)
1596 size_t elem_size = sizeof(struct vring_used_elem);
1598 if (d_idx + count <= ring_size) {
1599 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
1601 uint16_t size = ring_size - d_idx;
1603 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);
1604 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size);
1608 static __rte_always_inline void
1609 store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring,
1610 struct vring_used_elem_packed *d_ring,
1611 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count)
1613 size_t elem_size = sizeof(struct vring_used_elem_packed);
1615 if (d_idx + count <= ring_size) {
1616 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
1618 uint16_t size = ring_size - d_idx;
1620 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);
1621 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size);
1625 static __rte_noinline uint32_t
1626 virtio_dev_rx_async_submit_split(struct virtio_net *dev,
1627 struct vhost_virtqueue *vq, uint16_t queue_id,
1628 struct rte_mbuf **pkts, uint32_t count,
1629 struct rte_mbuf **comp_pkts, uint32_t *comp_count)
1631 uint32_t pkt_idx = 0, pkt_burst_idx = 0;
1632 uint16_t num_buffers;
1633 struct buf_vector buf_vec[BUF_VECTOR_MAX];
1634 uint16_t avail_head;
1636 struct rte_vhost_iov_iter *it_pool = vq->it_pool;
1637 struct iovec *vec_pool = vq->vec_pool;
1638 struct rte_vhost_async_desc tdes[MAX_PKT_BURST];
1639 struct iovec *src_iovec = vec_pool;
1640 struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);
1641 uint16_t slot_idx = 0;
1642 uint16_t segs_await = 0;
1643 uint16_t iovec_idx = 0, it_idx = 0;
1644 struct async_inflight_info *pkts_info = vq->async_pkts_info;
1645 uint32_t n_pkts = 0, pkt_err = 0;
1646 uint32_t num_async_pkts = 0, num_done_pkts = 0;
1649 uint16_t last_avail_idx;
1650 } async_pkts_log[MAX_PKT_BURST];
1653 * The ordering between avail index and desc reads need to be enforced.
1655 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE);
1657 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
1659 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
1660 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
1661 uint16_t nr_vec = 0;
1663 if (unlikely(reserve_avail_buf_split(dev, vq,
1664 pkt_len, buf_vec, &num_buffers,
1665 avail_head, &nr_vec) < 0)) {
1666 VHOST_LOG_DATA(DEBUG,
1667 "(%d) failed to get enough desc from vring\n",
1669 vq->shadow_used_idx -= num_buffers;
1673 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
1674 dev->vid, vq->last_avail_idx,
1675 vq->last_avail_idx + num_buffers);
1677 if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, num_buffers,
1678 &src_iovec[iovec_idx], &dst_iovec[iovec_idx],
1679 &it_pool[it_idx], &it_pool[it_idx + 1]) < 0) {
1680 vq->shadow_used_idx -= num_buffers;
1684 slot_idx = (vq->async_pkts_idx + num_async_pkts) &
1686 if (it_pool[it_idx].count) {
1689 async_fill_desc(&tdes[pkt_burst_idx++],
1690 &it_pool[it_idx], &it_pool[it_idx + 1]);
1691 pkts_info[slot_idx].descs = num_buffers;
1692 pkts_info[slot_idx].mbuf = pkts[pkt_idx];
1693 async_pkts_log[num_async_pkts].pkt_idx = pkt_idx;
1694 async_pkts_log[num_async_pkts++].last_avail_idx =
1697 iovec_idx += it_pool[it_idx].nr_segs;
1700 segs_await += it_pool[it_idx].nr_segs;
1703 * recover shadow used ring and keep DMA-occupied
1706 from = vq->shadow_used_idx - num_buffers;
1707 to = vq->async_desc_idx_split & (vq->size - 1);
1709 store_dma_desc_info_split(vq->shadow_used_split,
1710 vq->async_descs_split, vq->size, from, to, num_buffers);
1712 vq->async_desc_idx_split += num_buffers;
1713 vq->shadow_used_idx -= num_buffers;
1715 comp_pkts[num_done_pkts++] = pkts[pkt_idx];
1717 vq->last_avail_idx += num_buffers;
1720 * conditions to trigger async device transfer:
1721 * - buffered packet number reaches transfer threshold
1722 * - unused async iov number is less than max vhost vector
1724 if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
1725 ((VHOST_MAX_ASYNC_VEC >> 1) - segs_await <
1727 n_pkts = vq->async_ops.transfer_data(dev->vid,
1728 queue_id, tdes, 0, pkt_burst_idx);
1733 vq->async_pkts_inflight_n += n_pkts;
1735 if (unlikely(n_pkts < pkt_burst_idx)) {
1737 * log error packets number here and do actual
1738 * error processing when applications poll
1741 pkt_err = pkt_burst_idx - n_pkts;
1750 if (pkt_burst_idx) {
1751 n_pkts = vq->async_ops.transfer_data(dev->vid,
1752 queue_id, tdes, 0, pkt_burst_idx);
1753 vq->async_pkts_inflight_n += n_pkts;
1755 if (unlikely(n_pkts < pkt_burst_idx))
1756 pkt_err = pkt_burst_idx - n_pkts;
1759 do_data_copy_enqueue(dev, vq);
1761 if (unlikely(pkt_err)) {
1762 uint16_t num_descs = 0;
1764 num_async_pkts -= pkt_err;
1765 /* calculate the sum of descriptors of DMA-error packets. */
1766 while (pkt_err-- > 0) {
1767 num_descs += pkts_info[slot_idx & (vq->size - 1)].descs;
1770 vq->async_desc_idx_split -= num_descs;
1771 /* recover shadow used ring and available ring */
1772 vq->shadow_used_idx -= (vq->last_avail_idx -
1773 async_pkts_log[num_async_pkts].last_avail_idx -
1775 vq->last_avail_idx =
1776 async_pkts_log[num_async_pkts].last_avail_idx;
1777 pkt_idx = async_pkts_log[num_async_pkts].pkt_idx;
1778 num_done_pkts = pkt_idx - num_async_pkts;
1781 vq->async_pkts_idx += num_async_pkts;
1782 *comp_count = num_done_pkts;
1784 if (likely(vq->shadow_used_idx)) {
1785 flush_shadow_used_ring_split(dev, vq);
1786 vhost_vring_call_split(dev, vq);
1792 static __rte_always_inline void
1793 vhost_update_used_packed(struct vhost_virtqueue *vq,
1794 struct vring_used_elem_packed *shadow_ring,
1798 uint16_t used_idx = vq->last_used_idx;
1799 uint16_t head_idx = vq->last_used_idx;
1800 uint16_t head_flags = 0;
1805 /* Split loop in two to save memory barriers */
1806 for (i = 0; i < count; i++) {
1807 vq->desc_packed[used_idx].id = shadow_ring[i].id;
1808 vq->desc_packed[used_idx].len = shadow_ring[i].len;
1810 used_idx += shadow_ring[i].count;
1811 if (used_idx >= vq->size)
1812 used_idx -= vq->size;
1815 /* The ordering for storing desc flags needs to be enforced. */
1816 rte_atomic_thread_fence(__ATOMIC_RELEASE);
1818 for (i = 0; i < count; i++) {
1821 if (vq->shadow_used_packed[i].len)
1822 flags = VRING_DESC_F_WRITE;
1826 if (vq->used_wrap_counter) {
1827 flags |= VRING_DESC_F_USED;
1828 flags |= VRING_DESC_F_AVAIL;
1830 flags &= ~VRING_DESC_F_USED;
1831 flags &= ~VRING_DESC_F_AVAIL;
1835 vq->desc_packed[vq->last_used_idx].flags = flags;
1837 head_idx = vq->last_used_idx;
1841 vq_inc_last_used_packed(vq, shadow_ring[i].count);
1844 vq->desc_packed[head_idx].flags = head_flags;
1847 static __rte_always_inline int
1848 vhost_enqueue_async_single_packed(struct virtio_net *dev,
1849 struct vhost_virtqueue *vq,
1850 struct rte_mbuf *pkt,
1851 struct buf_vector *buf_vec,
1853 uint16_t *nr_buffers,
1854 struct vring_packed_desc *async_descs,
1855 struct iovec *src_iovec, struct iovec *dst_iovec,
1856 struct rte_vhost_iov_iter *src_it,
1857 struct rte_vhost_iov_iter *dst_it)
1859 uint16_t nr_vec = 0;
1860 uint16_t avail_idx = vq->last_avail_idx;
1861 uint16_t max_tries, tries = 0;
1862 uint16_t buf_id = 0;
1864 uint16_t desc_count = 0;
1865 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf);
1866 uint32_t buffer_len[vq->size];
1867 uint16_t buffer_buf_id[vq->size];
1868 uint16_t buffer_desc_count[vq->size];
1870 if (rxvq_is_mergeable(dev))
1871 max_tries = vq->size - 1;
1877 * if we tried all available ring items, and still
1878 * can't get enough buf, it means something abnormal
1881 if (unlikely(++tries > max_tries))
1884 if (unlikely(fill_vec_buf_packed(dev, vq, avail_idx, &desc_count, buf_vec, &nr_vec,
1885 &buf_id, &len, VHOST_ACCESS_RW) < 0))
1888 len = RTE_MIN(len, size);
1891 buffer_len[*nr_buffers] = len;
1892 buffer_buf_id[*nr_buffers] = buf_id;
1893 buffer_desc_count[*nr_buffers] = desc_count;
1896 *nr_descs += desc_count;
1897 avail_idx += desc_count;
1898 if (avail_idx >= vq->size)
1899 avail_idx -= vq->size;
1902 if (async_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, src_iovec, dst_iovec,
1903 src_it, dst_it) < 0)
1905 /* store descriptors for DMA */
1906 if (avail_idx >= *nr_descs) {
1907 rte_memcpy(async_descs, &vq->desc_packed[vq->last_avail_idx],
1908 *nr_descs * sizeof(struct vring_packed_desc));
1910 uint16_t nr_copy = vq->size - vq->last_avail_idx;
1912 rte_memcpy(async_descs, &vq->desc_packed[vq->last_avail_idx],
1913 nr_copy * sizeof(struct vring_packed_desc));
1914 rte_memcpy(async_descs + nr_copy, vq->desc_packed,
1915 (*nr_descs - nr_copy) * sizeof(struct vring_packed_desc));
1918 vhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, buffer_desc_count, *nr_buffers);
1923 static __rte_always_inline int16_t
1924 virtio_dev_rx_async_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
1925 struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers,
1926 struct vring_packed_desc *async_descs,
1927 struct iovec *src_iovec, struct iovec *dst_iovec,
1928 struct rte_vhost_iov_iter *src_it, struct rte_vhost_iov_iter *dst_it)
1930 struct buf_vector buf_vec[BUF_VECTOR_MAX];
1932 if (unlikely(vhost_enqueue_async_single_packed(dev, vq, pkt, buf_vec, nr_descs, nr_buffers,
1933 async_descs, src_iovec, dst_iovec,
1934 src_it, dst_it) < 0)) {
1935 VHOST_LOG_DATA(DEBUG, "(%d) failed to get enough desc from vring\n", dev->vid);
1939 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
1940 dev->vid, vq->last_avail_idx, vq->last_avail_idx + *nr_descs);
1945 static __rte_always_inline void
1946 dma_error_handler_packed(struct vhost_virtqueue *vq, struct vring_packed_desc *async_descs,
1947 uint16_t async_descs_idx, uint16_t slot_idx, uint32_t nr_err,
1948 uint32_t *pkt_idx, uint32_t *num_async_pkts, uint32_t *num_done_pkts)
1950 uint16_t descs_err = 0;
1951 uint16_t buffers_err = 0;
1952 struct async_inflight_info *pkts_info = vq->async_pkts_info;
1954 *num_async_pkts -= nr_err;
1956 /* calculate the sum of buffers and descs of DMA-error packets. */
1957 while (nr_err-- > 0) {
1958 descs_err += pkts_info[slot_idx % vq->size].descs;
1959 buffers_err += pkts_info[slot_idx % vq->size].nr_buffers;
1963 vq->async_buffer_idx_packed -= buffers_err;
1965 if (vq->last_avail_idx >= descs_err) {
1966 vq->last_avail_idx -= descs_err;
1968 rte_memcpy(&vq->desc_packed[vq->last_avail_idx],
1969 &async_descs[async_descs_idx - descs_err],
1970 descs_err * sizeof(struct vring_packed_desc));
1974 vq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err;
1975 nr_copy = vq->size - vq->last_avail_idx;
1976 rte_memcpy(&vq->desc_packed[vq->last_avail_idx],
1977 &async_descs[async_descs_idx - descs_err],
1978 nr_copy * sizeof(struct vring_packed_desc));
1979 descs_err -= nr_copy;
1980 rte_memcpy(&vq->desc_packed[0], &async_descs[async_descs_idx - descs_err],
1981 descs_err * sizeof(struct vring_packed_desc));
1982 vq->avail_wrap_counter ^= 1;
1985 *num_done_pkts = *pkt_idx - *num_async_pkts;
1988 static __rte_noinline uint32_t
1989 virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
1990 struct vhost_virtqueue *vq, uint16_t queue_id,
1991 struct rte_mbuf **pkts, uint32_t count,
1992 struct rte_mbuf **comp_pkts, uint32_t *comp_count)
1994 uint32_t pkt_idx = 0, pkt_burst_idx = 0;
1995 uint32_t remained = count;
1996 uint16_t async_descs_idx = 0;
1997 uint16_t num_buffers;
2000 struct rte_vhost_iov_iter *it_pool = vq->it_pool;
2001 struct iovec *vec_pool = vq->vec_pool;
2002 struct rte_vhost_async_desc tdes[MAX_PKT_BURST];
2003 struct iovec *src_iovec = vec_pool;
2004 struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);
2005 uint16_t slot_idx = 0;
2006 uint16_t segs_await = 0;
2007 uint16_t iovec_idx = 0, it_idx = 0;
2008 struct async_inflight_info *pkts_info = vq->async_pkts_info;
2009 uint32_t n_pkts = 0, pkt_err = 0;
2010 uint32_t num_async_pkts = 0, num_done_pkts = 0;
2011 struct vring_packed_desc async_descs[vq->size];
2014 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
2015 if (remained >= PACKED_BATCH_SIZE) {
2016 if (!virtio_dev_rx_async_batch_packed(dev, vq,
2017 &pkts[pkt_idx], comp_pkts, &num_done_pkts)) {
2018 pkt_idx += PACKED_BATCH_SIZE;
2019 remained -= PACKED_BATCH_SIZE;
2026 if (unlikely(virtio_dev_rx_async_single_packed(dev, vq, pkts[pkt_idx],
2027 &num_descs, &num_buffers,
2028 &async_descs[async_descs_idx],
2029 &src_iovec[iovec_idx], &dst_iovec[iovec_idx],
2030 &it_pool[it_idx], &it_pool[it_idx + 1]) < 0))
2033 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
2034 dev->vid, vq->last_avail_idx,
2035 vq->last_avail_idx + num_descs);
2037 slot_idx = (vq->async_pkts_idx + num_async_pkts) % vq->size;
2038 if (it_pool[it_idx].count) {
2041 async_descs_idx += num_descs;
2042 async_fill_desc(&tdes[pkt_burst_idx++],
2043 &it_pool[it_idx], &it_pool[it_idx + 1]);
2044 pkts_info[slot_idx].descs = num_descs;
2045 pkts_info[slot_idx].nr_buffers = num_buffers;
2046 pkts_info[slot_idx].mbuf = pkts[pkt_idx];
2048 iovec_idx += it_pool[it_idx].nr_segs;
2051 segs_await += it_pool[it_idx].nr_segs;
2054 * recover shadow used ring and keep DMA-occupied
2057 from = vq->shadow_used_idx - num_buffers;
2058 store_dma_desc_info_packed(vq->shadow_used_packed,
2059 vq->async_buffers_packed, vq->size, from,
2060 vq->async_buffer_idx_packed, num_buffers);
2062 vq->async_buffer_idx_packed += num_buffers;
2063 if (vq->async_buffer_idx_packed >= vq->size)
2064 vq->async_buffer_idx_packed -= vq->size;
2065 vq->shadow_used_idx -= num_buffers;
2067 comp_pkts[num_done_pkts++] = pkts[pkt_idx];
2072 vq_inc_last_avail_packed(vq, num_descs);
2075 * conditions to trigger async device transfer:
2076 * - buffered packet number reaches transfer threshold
2077 * - unused async iov number is less than max vhost vector
2079 if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
2080 ((VHOST_MAX_ASYNC_VEC >> 1) - segs_await < BUF_VECTOR_MAX))) {
2081 n_pkts = vq->async_ops.transfer_data(dev->vid, queue_id,
2082 tdes, 0, pkt_burst_idx);
2086 vq->async_pkts_inflight_n += n_pkts;
2088 if (unlikely(n_pkts < pkt_burst_idx)) {
2090 * log error packets number here and do actual
2091 * error processing when applications poll
2094 pkt_err = pkt_burst_idx - n_pkts;
2101 } while (pkt_idx < count);
2103 if (pkt_burst_idx) {
2104 n_pkts = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx);
2105 vq->async_pkts_inflight_n += n_pkts;
2107 if (unlikely(n_pkts < pkt_burst_idx))
2108 pkt_err = pkt_burst_idx - n_pkts;
2111 do_data_copy_enqueue(dev, vq);
2113 if (unlikely(pkt_err))
2114 dma_error_handler_packed(vq, async_descs, async_descs_idx, slot_idx, pkt_err,
2115 &pkt_idx, &num_async_pkts, &num_done_pkts);
2116 vq->async_pkts_idx += num_async_pkts;
2117 if (vq->async_pkts_idx >= vq->size)
2118 vq->async_pkts_idx -= vq->size;
2119 *comp_count = num_done_pkts;
2121 if (likely(vq->shadow_used_idx)) {
2122 vhost_flush_enqueue_shadow_packed(dev, vq);
2123 vhost_vring_call_packed(dev, vq);
2129 static __rte_always_inline void
2130 write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs)
2132 uint16_t nr_left = n_descs;
2137 from = vq->last_async_desc_idx_split & (vq->size - 1);
2138 nr_copy = nr_left + from <= vq->size ? nr_left : vq->size - from;
2139 to = vq->last_used_idx & (vq->size - 1);
2141 if (to + nr_copy <= vq->size) {
2142 rte_memcpy(&vq->used->ring[to], &vq->async_descs_split[from],
2143 nr_copy * sizeof(struct vring_used_elem));
2145 uint16_t size = vq->size - to;
2147 rte_memcpy(&vq->used->ring[to], &vq->async_descs_split[from],
2148 size * sizeof(struct vring_used_elem));
2149 rte_memcpy(&vq->used->ring[0], &vq->async_descs_split[from + size],
2150 (nr_copy - size) * sizeof(struct vring_used_elem));
2153 vq->last_async_desc_idx_split += nr_copy;
2154 vq->last_used_idx += nr_copy;
2156 } while (nr_left > 0);
2159 static __rte_always_inline void
2160 write_back_completed_descs_packed(struct vhost_virtqueue *vq,
2163 uint16_t nr_left = n_buffers;
2167 from = vq->last_async_buffer_idx_packed;
2168 to = (from + nr_left) % vq->size;
2170 vhost_update_used_packed(vq, vq->async_buffers_packed + from, to - from);
2171 vq->last_async_buffer_idx_packed += nr_left;
2174 vhost_update_used_packed(vq, vq->async_buffers_packed + from,
2176 vq->last_async_buffer_idx_packed = 0;
2177 nr_left -= vq->size - from;
2179 } while (nr_left > 0);
2182 uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
2183 struct rte_mbuf **pkts, uint16_t count)
2185 struct virtio_net *dev = get_device(vid);
2186 struct vhost_virtqueue *vq;
2187 uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
2188 uint16_t start_idx, pkts_idx, vq_size;
2189 struct async_inflight_info *pkts_info;
2195 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
2196 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
2197 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
2198 dev->vid, __func__, queue_id);
2202 vq = dev->virtqueue[queue_id];
2204 if (unlikely(!vq->async_registered)) {
2205 VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
2206 dev->vid, __func__, queue_id);
2210 rte_spinlock_lock(&vq->access_lock);
2212 pkts_idx = vq->async_pkts_idx % vq->size;
2213 pkts_info = vq->async_pkts_info;
2215 start_idx = virtio_dev_rx_async_get_info_idx(pkts_idx,
2216 vq_size, vq->async_pkts_inflight_n);
2218 if (count > vq->async_last_pkts_n)
2219 n_pkts_cpl = vq->async_ops.check_completed_copies(vid,
2220 queue_id, 0, count - vq->async_last_pkts_n);
2221 n_pkts_cpl += vq->async_last_pkts_n;
2223 n_pkts_put = RTE_MIN(count, n_pkts_cpl);
2224 if (unlikely(n_pkts_put == 0)) {
2225 vq->async_last_pkts_n = n_pkts_cpl;
2229 if (vq_is_packed(dev)) {
2230 for (i = 0; i < n_pkts_put; i++) {
2231 from = (start_idx + i) & (vq_size - 1);
2232 n_buffers += pkts_info[from].nr_buffers;
2233 pkts[i] = pkts_info[from].mbuf;
2236 for (i = 0; i < n_pkts_put; i++) {
2237 from = (start_idx + i) & (vq_size - 1);
2238 n_descs += pkts_info[from].descs;
2239 pkts[i] = pkts_info[from].mbuf;
2243 vq->async_last_pkts_n = n_pkts_cpl - n_pkts_put;
2244 vq->async_pkts_inflight_n -= n_pkts_put;
2246 if (likely(vq->enabled && vq->access_ok)) {
2247 if (vq_is_packed(dev)) {
2248 write_back_completed_descs_packed(vq, n_buffers);
2250 vhost_vring_call_packed(dev, vq);
2252 write_back_completed_descs_split(vq, n_descs);
2254 __atomic_add_fetch(&vq->used->idx, n_descs,
2256 vhost_vring_call_split(dev, vq);
2259 if (vq_is_packed(dev)) {
2260 vq->last_async_buffer_idx_packed += n_buffers;
2261 if (vq->last_async_buffer_idx_packed >= vq->size)
2262 vq->last_async_buffer_idx_packed -= vq->size;
2264 vq->last_async_desc_idx_split += n_descs;
2269 rte_spinlock_unlock(&vq->access_lock);
2274 static __rte_always_inline uint32_t
2275 virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
2276 struct rte_mbuf **pkts, uint32_t count,
2277 struct rte_mbuf **comp_pkts, uint32_t *comp_count)
2279 struct vhost_virtqueue *vq;
2282 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
2283 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
2284 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
2285 dev->vid, __func__, queue_id);
2289 vq = dev->virtqueue[queue_id];
2291 rte_spinlock_lock(&vq->access_lock);
2293 if (unlikely(!vq->enabled || !vq->async_registered))
2294 goto out_access_unlock;
2296 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
2297 vhost_user_iotlb_rd_lock(vq);
2299 if (unlikely(!vq->access_ok))
2300 if (unlikely(vring_translate(dev, vq) < 0))
2303 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
2307 if (vq_is_packed(dev))
2308 nb_tx = virtio_dev_rx_async_submit_packed(dev,
2309 vq, queue_id, pkts, count, comp_pkts,
2312 nb_tx = virtio_dev_rx_async_submit_split(dev,
2313 vq, queue_id, pkts, count, comp_pkts,
2317 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
2318 vhost_user_iotlb_rd_unlock(vq);
2321 rte_spinlock_unlock(&vq->access_lock);
2327 rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id,
2328 struct rte_mbuf **pkts, uint16_t count,
2329 struct rte_mbuf **comp_pkts, uint32_t *comp_count)
2331 struct virtio_net *dev = get_device(vid);
2337 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
2339 "(%d) %s: built-in vhost net backend is disabled.\n",
2340 dev->vid, __func__);
2344 return virtio_dev_rx_async_submit(dev, queue_id, pkts, count, comp_pkts,
2349 virtio_net_with_host_offload(struct virtio_net *dev)
2352 ((1ULL << VIRTIO_NET_F_CSUM) |
2353 (1ULL << VIRTIO_NET_F_HOST_ECN) |
2354 (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2355 (1ULL << VIRTIO_NET_F_HOST_TSO6) |
2356 (1ULL << VIRTIO_NET_F_HOST_UFO)))
2363 parse_headers(struct rte_mbuf *m, uint8_t *l4_proto)
2365 struct rte_ipv4_hdr *ipv4_hdr;
2366 struct rte_ipv6_hdr *ipv6_hdr;
2367 struct rte_ether_hdr *eth_hdr;
2369 uint16_t data_len = rte_pktmbuf_data_len(m);
2371 if (data_len < sizeof(struct rte_ether_hdr))
2374 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
2376 m->l2_len = sizeof(struct rte_ether_hdr);
2377 ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
2379 if (ethertype == RTE_ETHER_TYPE_VLAN) {
2380 if (data_len < sizeof(struct rte_ether_hdr) +
2381 sizeof(struct rte_vlan_hdr))
2384 struct rte_vlan_hdr *vlan_hdr =
2385 (struct rte_vlan_hdr *)(eth_hdr + 1);
2387 m->l2_len += sizeof(struct rte_vlan_hdr);
2388 ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto);
2391 switch (ethertype) {
2392 case RTE_ETHER_TYPE_IPV4:
2393 if (data_len < m->l2_len + sizeof(struct rte_ipv4_hdr))
2395 ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
2397 m->l3_len = rte_ipv4_hdr_len(ipv4_hdr);
2398 if (data_len < m->l2_len + m->l3_len)
2400 m->ol_flags |= PKT_TX_IPV4;
2401 *l4_proto = ipv4_hdr->next_proto_id;
2403 case RTE_ETHER_TYPE_IPV6:
2404 if (data_len < m->l2_len + sizeof(struct rte_ipv6_hdr))
2406 ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
2408 m->l3_len = sizeof(struct rte_ipv6_hdr);
2409 m->ol_flags |= PKT_TX_IPV6;
2410 *l4_proto = ipv6_hdr->proto;
2413 /* a valid L3 header is needed for further L4 parsing */
2417 /* both CSUM and GSO need a valid L4 header */
2418 switch (*l4_proto) {
2420 if (data_len < m->l2_len + m->l3_len +
2421 sizeof(struct rte_tcp_hdr))
2425 if (data_len < m->l2_len + m->l3_len +
2426 sizeof(struct rte_udp_hdr))
2430 if (data_len < m->l2_len + m->l3_len +
2431 sizeof(struct rte_sctp_hdr))
2447 static __rte_always_inline void
2448 vhost_dequeue_offload_legacy(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
2450 uint8_t l4_proto = 0;
2451 struct rte_tcp_hdr *tcp_hdr = NULL;
2453 uint16_t data_len = rte_pktmbuf_data_len(m);
2455 if (parse_headers(m, &l4_proto) < 0)
2458 if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2459 if (hdr->csum_start == (m->l2_len + m->l3_len)) {
2460 switch (hdr->csum_offset) {
2461 case (offsetof(struct rte_tcp_hdr, cksum)):
2462 if (l4_proto != IPPROTO_TCP)
2464 m->ol_flags |= PKT_TX_TCP_CKSUM;
2466 case (offsetof(struct rte_udp_hdr, dgram_cksum)):
2467 if (l4_proto != IPPROTO_UDP)
2469 m->ol_flags |= PKT_TX_UDP_CKSUM;
2471 case (offsetof(struct rte_sctp_hdr, cksum)):
2472 if (l4_proto != IPPROTO_SCTP)
2474 m->ol_flags |= PKT_TX_SCTP_CKSUM;
2484 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2485 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2486 case VIRTIO_NET_HDR_GSO_TCPV4:
2487 case VIRTIO_NET_HDR_GSO_TCPV6:
2488 if (l4_proto != IPPROTO_TCP)
2490 tcp_hdr = rte_pktmbuf_mtod_offset(m,
2491 struct rte_tcp_hdr *,
2492 m->l2_len + m->l3_len);
2493 tcp_len = (tcp_hdr->data_off & 0xf0) >> 2;
2494 if (data_len < m->l2_len + m->l3_len + tcp_len)
2496 m->ol_flags |= PKT_TX_TCP_SEG;
2497 m->tso_segsz = hdr->gso_size;
2498 m->l4_len = tcp_len;
2500 case VIRTIO_NET_HDR_GSO_UDP:
2501 if (l4_proto != IPPROTO_UDP)
2503 m->ol_flags |= PKT_TX_UDP_SEG;
2504 m->tso_segsz = hdr->gso_size;
2505 m->l4_len = sizeof(struct rte_udp_hdr);
2508 VHOST_LOG_DATA(WARNING,
2509 "unsupported gso type %u.\n", hdr->gso_type);
2521 static __rte_always_inline void
2522 vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m,
2523 bool legacy_ol_flags)
2525 struct rte_net_hdr_lens hdr_lens;
2526 int l4_supported = 0;
2529 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
2532 if (legacy_ol_flags) {
2533 vhost_dequeue_offload_legacy(hdr, m);
2537 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
2539 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
2540 m->packet_type = ptype;
2541 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
2542 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
2543 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
2546 /* According to Virtio 1.1 spec, the device only needs to look at
2547 * VIRTIO_NET_HDR_F_NEEDS_CSUM in the packet transmission path.
2548 * This differs from the processing incoming packets path where the
2549 * driver could rely on VIRTIO_NET_HDR_F_DATA_VALID flag set by the
2552 * 5.1.6.2.1 Driver Requirements: Packet Transmission
2553 * The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID and
2554 * VIRTIO_NET_HDR_F_RSC_INFO bits in flags.
2556 * 5.1.6.2.2 Device Requirements: Packet Transmission
2557 * The device MUST ignore flag bits that it does not recognize.
2559 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2562 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
2563 if (hdr->csum_start <= hdrlen && l4_supported != 0) {
2564 m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
2566 /* Unknown proto or tunnel, do sw cksum. We can assume
2567 * the cksum field is in the first segment since the
2568 * buffers we provided to the host are large enough.
2569 * In case of SCTP, this will be wrong since it's a CRC
2570 * but there's nothing we can do.
2572 uint16_t csum = 0, off;
2574 if (rte_raw_cksum_mbuf(m, hdr->csum_start,
2575 rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0)
2577 if (likely(csum != 0xffff))
2579 off = hdr->csum_offset + hdr->csum_start;
2580 if (rte_pktmbuf_data_len(m) >= off + 1)
2581 *rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum;
2585 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2586 if (hdr->gso_size == 0)
2589 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2590 case VIRTIO_NET_HDR_GSO_TCPV4:
2591 case VIRTIO_NET_HDR_GSO_TCPV6:
2592 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP)
2594 m->ol_flags |= PKT_RX_LRO | PKT_RX_L4_CKSUM_NONE;
2595 m->tso_segsz = hdr->gso_size;
2597 case VIRTIO_NET_HDR_GSO_UDP:
2598 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP)
2600 m->ol_flags |= PKT_RX_LRO | PKT_RX_L4_CKSUM_NONE;
2601 m->tso_segsz = hdr->gso_size;
2609 static __rte_noinline void
2610 copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
2611 struct buf_vector *buf_vec)
2614 uint64_t remain = sizeof(struct virtio_net_hdr);
2616 uint64_t dst = (uint64_t)(uintptr_t)hdr;
2619 len = RTE_MIN(remain, buf_vec->buf_len);
2620 src = buf_vec->buf_addr;
2621 rte_memcpy((void *)(uintptr_t)dst,
2622 (void *)(uintptr_t)src, len);
2630 static __rte_always_inline int
2631 copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
2632 struct buf_vector *buf_vec, uint16_t nr_vec,
2633 struct rte_mbuf *m, struct rte_mempool *mbuf_pool,
2634 bool legacy_ol_flags)
2636 uint32_t buf_avail, buf_offset;
2637 uint64_t buf_addr, buf_len;
2638 uint32_t mbuf_avail, mbuf_offset;
2640 struct rte_mbuf *cur = m, *prev = m;
2641 struct virtio_net_hdr tmp_hdr;
2642 struct virtio_net_hdr *hdr = NULL;
2643 /* A counter to avoid desc dead loop chain */
2644 uint16_t vec_idx = 0;
2645 struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
2648 buf_addr = buf_vec[vec_idx].buf_addr;
2649 buf_len = buf_vec[vec_idx].buf_len;
2651 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
2656 if (virtio_net_with_host_offload(dev)) {
2657 if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
2659 * No luck, the virtio-net header doesn't fit
2660 * in a contiguous virtual area.
2662 copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
2665 hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
2670 * A virtio driver normally uses at least 2 desc buffers
2671 * for Tx: the first for storing the header, and others
2672 * for storing the data.
2674 if (unlikely(buf_len < dev->vhost_hlen)) {
2675 buf_offset = dev->vhost_hlen - buf_len;
2677 buf_addr = buf_vec[vec_idx].buf_addr;
2678 buf_len = buf_vec[vec_idx].buf_len;
2679 buf_avail = buf_len - buf_offset;
2680 } else if (buf_len == dev->vhost_hlen) {
2681 if (unlikely(++vec_idx >= nr_vec))
2683 buf_addr = buf_vec[vec_idx].buf_addr;
2684 buf_len = buf_vec[vec_idx].buf_len;
2687 buf_avail = buf_len;
2689 buf_offset = dev->vhost_hlen;
2690 buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
2694 (uintptr_t)(buf_addr + buf_offset),
2695 (uint32_t)buf_avail, 0);
2698 mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
2700 cpy_len = RTE_MIN(buf_avail, mbuf_avail);
2702 if (likely(cpy_len > MAX_BATCH_LEN ||
2703 vq->batch_copy_nb_elems >= vq->size ||
2704 (hdr && cur == m))) {
2705 rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
2707 (void *)((uintptr_t)(buf_addr +
2708 buf_offset)), cpy_len);
2710 batch_copy[vq->batch_copy_nb_elems].dst =
2711 rte_pktmbuf_mtod_offset(cur, void *,
2713 batch_copy[vq->batch_copy_nb_elems].src =
2714 (void *)((uintptr_t)(buf_addr + buf_offset));
2715 batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
2716 vq->batch_copy_nb_elems++;
2719 mbuf_avail -= cpy_len;
2720 mbuf_offset += cpy_len;
2721 buf_avail -= cpy_len;
2722 buf_offset += cpy_len;
2724 /* This buf reaches to its end, get the next one */
2725 if (buf_avail == 0) {
2726 if (++vec_idx >= nr_vec)
2729 buf_addr = buf_vec[vec_idx].buf_addr;
2730 buf_len = buf_vec[vec_idx].buf_len;
2733 buf_avail = buf_len;
2735 PRINT_PACKET(dev, (uintptr_t)buf_addr,
2736 (uint32_t)buf_avail, 0);
2740 * This mbuf reaches to its end, get a new one
2741 * to hold more data.
2743 if (mbuf_avail == 0) {
2744 cur = rte_pktmbuf_alloc(mbuf_pool);
2745 if (unlikely(cur == NULL)) {
2746 VHOST_LOG_DATA(ERR, "Failed to "
2747 "allocate memory for mbuf.\n");
2753 prev->data_len = mbuf_offset;
2755 m->pkt_len += mbuf_offset;
2759 mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
2763 prev->data_len = mbuf_offset;
2764 m->pkt_len += mbuf_offset;
2767 vhost_dequeue_offload(hdr, m, legacy_ol_flags);
2775 virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque)
2781 virtio_dev_extbuf_alloc(struct rte_mbuf *pkt, uint32_t size)
2783 struct rte_mbuf_ext_shared_info *shinfo = NULL;
2784 uint32_t total_len = RTE_PKTMBUF_HEADROOM + size;
2789 total_len += sizeof(*shinfo) + sizeof(uintptr_t);
2790 total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t));
2792 if (unlikely(total_len > UINT16_MAX))
2795 buf_len = total_len;
2796 buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE);
2797 if (unlikely(buf == NULL))
2800 /* Initialize shinfo */
2801 shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
2802 virtio_dev_extbuf_free, buf);
2803 if (unlikely(shinfo == NULL)) {
2805 VHOST_LOG_DATA(ERR, "Failed to init shinfo\n");
2809 iova = rte_malloc_virt2iova(buf);
2810 rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo);
2811 rte_pktmbuf_reset_headroom(pkt);
2817 * Prepare a host supported pktmbuf.
2819 static __rte_always_inline int
2820 virtio_dev_pktmbuf_prep(struct virtio_net *dev, struct rte_mbuf *pkt,
2823 if (rte_pktmbuf_tailroom(pkt) >= data_len)
2826 /* attach an external buffer if supported */
2827 if (dev->extbuf && !virtio_dev_extbuf_alloc(pkt, data_len))
2830 /* check if chained buffers are allowed */
2831 if (!dev->linearbuf)
2839 virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
2840 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
2841 bool legacy_ol_flags)
2844 uint16_t free_entries;
2845 uint16_t dropped = 0;
2846 static bool allocerr_warned;
2849 * The ordering between avail index and
2850 * desc reads needs to be enforced.
2852 free_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) -
2854 if (free_entries == 0)
2857 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
2859 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
2861 count = RTE_MIN(count, MAX_PKT_BURST);
2862 count = RTE_MIN(count, free_entries);
2863 VHOST_LOG_DATA(DEBUG, "(%d) about to dequeue %u buffers\n",
2866 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count))
2869 for (i = 0; i < count; i++) {
2870 struct buf_vector buf_vec[BUF_VECTOR_MAX];
2873 uint16_t nr_vec = 0;
2876 if (unlikely(fill_vec_buf_split(dev, vq,
2877 vq->last_avail_idx + i,
2879 &head_idx, &buf_len,
2880 VHOST_ACCESS_RO) < 0))
2883 update_shadow_used_ring_split(vq, head_idx, 0);
2885 err = virtio_dev_pktmbuf_prep(dev, pkts[i], buf_len);
2886 if (unlikely(err)) {
2888 * mbuf allocation fails for jumbo packets when external
2889 * buffer allocation is not allowed and linear buffer
2890 * is required. Drop this packet.
2892 if (!allocerr_warned) {
2894 "Failed mbuf alloc of size %d from %s on %s.\n",
2895 buf_len, mbuf_pool->name, dev->ifname);
2896 allocerr_warned = true;
2903 err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
2904 mbuf_pool, legacy_ol_flags);
2905 if (unlikely(err)) {
2906 if (!allocerr_warned) {
2908 "Failed to copy desc to mbuf on %s.\n",
2910 allocerr_warned = true;
2919 rte_pktmbuf_free_bulk(&pkts[i - 1], count - i + 1);
2921 vq->last_avail_idx += i;
2923 do_data_copy_dequeue(vq);
2924 if (unlikely(i < count))
2925 vq->shadow_used_idx = i;
2926 if (likely(vq->shadow_used_idx)) {
2927 flush_shadow_used_ring_split(dev, vq);
2928 vhost_vring_call_split(dev, vq);
2931 return (i - dropped);
2936 virtio_dev_tx_split_legacy(struct virtio_net *dev,
2937 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
2938 struct rte_mbuf **pkts, uint16_t count)
2940 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true);
2945 virtio_dev_tx_split_compliant(struct virtio_net *dev,
2946 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
2947 struct rte_mbuf **pkts, uint16_t count)
2949 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false);
2952 static __rte_always_inline int
2953 vhost_reserve_avail_batch_packed(struct virtio_net *dev,
2954 struct vhost_virtqueue *vq,
2955 struct rte_mbuf **pkts,
2957 uintptr_t *desc_addrs,
2960 bool wrap = vq->avail_wrap_counter;
2961 struct vring_packed_desc *descs = vq->desc_packed;
2962 uint64_t lens[PACKED_BATCH_SIZE];
2963 uint64_t buf_lens[PACKED_BATCH_SIZE];
2964 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2967 if (unlikely(avail_idx & PACKED_BATCH_MASK))
2969 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
2972 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
2973 flags = descs[avail_idx + i].flags;
2974 if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) ||
2975 (wrap == !!(flags & VRING_DESC_F_USED)) ||
2976 (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG)))
2980 rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
2982 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
2983 lens[i] = descs[avail_idx + i].len;
2985 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
2986 desc_addrs[i] = vhost_iova_to_vva(dev, vq,
2987 descs[avail_idx + i].addr,
2988 &lens[i], VHOST_ACCESS_RW);
2991 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
2992 if (unlikely(!desc_addrs[i]))
2994 if (unlikely((lens[i] != descs[avail_idx + i].len)))
2998 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
2999 if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i]))
3003 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
3004 buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off;
3006 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3007 if (unlikely(buf_lens[i] < (lens[i] - buf_offset)))
3011 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3012 pkts[i]->pkt_len = lens[i] - buf_offset;
3013 pkts[i]->data_len = pkts[i]->pkt_len;
3014 ids[i] = descs[avail_idx + i].id;
3023 static __rte_always_inline int
3024 virtio_dev_tx_batch_packed(struct virtio_net *dev,
3025 struct vhost_virtqueue *vq,
3026 struct rte_mbuf **pkts,
3027 bool legacy_ol_flags)
3029 uint16_t avail_idx = vq->last_avail_idx;
3030 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
3031 struct virtio_net_hdr *hdr;
3032 uintptr_t desc_addrs[PACKED_BATCH_SIZE];
3033 uint16_t ids[PACKED_BATCH_SIZE];
3036 if (vhost_reserve_avail_batch_packed(dev, vq, pkts, avail_idx,
3040 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
3041 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
3043 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
3044 rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0),
3045 (void *)(uintptr_t)(desc_addrs[i] + buf_offset),
3048 if (virtio_net_with_host_offload(dev)) {
3049 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3050 hdr = (struct virtio_net_hdr *)(desc_addrs[i]);
3051 vhost_dequeue_offload(hdr, pkts[i], legacy_ol_flags);
3055 if (virtio_net_is_inorder(dev))
3056 vhost_shadow_dequeue_batch_packed_inorder(vq,
3057 ids[PACKED_BATCH_SIZE - 1]);
3059 vhost_shadow_dequeue_batch_packed(dev, vq, ids);
3061 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
3066 static __rte_always_inline int
3067 vhost_dequeue_single_packed(struct virtio_net *dev,
3068 struct vhost_virtqueue *vq,
3069 struct rte_mempool *mbuf_pool,
3070 struct rte_mbuf *pkts,
3072 uint16_t *desc_count,
3073 bool legacy_ol_flags)
3075 struct buf_vector buf_vec[BUF_VECTOR_MAX];
3077 uint16_t nr_vec = 0;
3079 static bool allocerr_warned;
3081 if (unlikely(fill_vec_buf_packed(dev, vq,
3082 vq->last_avail_idx, desc_count,
3085 VHOST_ACCESS_RO) < 0))
3088 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) {
3089 if (!allocerr_warned) {
3091 "Failed mbuf alloc of size %d from %s on %s.\n",
3092 buf_len, mbuf_pool->name, dev->ifname);
3093 allocerr_warned = true;
3098 err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts,
3099 mbuf_pool, legacy_ol_flags);
3100 if (unlikely(err)) {
3101 if (!allocerr_warned) {
3103 "Failed to copy desc to mbuf on %s.\n",
3105 allocerr_warned = true;
3113 static __rte_always_inline int
3114 virtio_dev_tx_single_packed(struct virtio_net *dev,
3115 struct vhost_virtqueue *vq,
3116 struct rte_mempool *mbuf_pool,
3117 struct rte_mbuf *pkts,
3118 bool legacy_ol_flags)
3121 uint16_t buf_id, desc_count = 0;
3124 ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id,
3125 &desc_count, legacy_ol_flags);
3127 if (likely(desc_count > 0)) {
3128 if (virtio_net_is_inorder(dev))
3129 vhost_shadow_dequeue_single_packed_inorder(vq, buf_id,
3132 vhost_shadow_dequeue_single_packed(vq, buf_id,
3135 vq_inc_last_avail_packed(vq, desc_count);
3143 virtio_dev_tx_packed(struct virtio_net *dev,
3144 struct vhost_virtqueue *__rte_restrict vq,
3145 struct rte_mempool *mbuf_pool,
3146 struct rte_mbuf **__rte_restrict pkts,
3148 bool legacy_ol_flags)
3150 uint32_t pkt_idx = 0;
3152 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count))
3156 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
3158 if (count - pkt_idx >= PACKED_BATCH_SIZE) {
3159 if (!virtio_dev_tx_batch_packed(dev, vq,
3162 pkt_idx += PACKED_BATCH_SIZE;
3167 if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool,
3172 } while (pkt_idx < count);
3174 if (pkt_idx != count)
3175 rte_pktmbuf_free_bulk(&pkts[pkt_idx], count - pkt_idx);
3177 if (vq->shadow_used_idx) {
3178 do_data_copy_dequeue(vq);
3180 vhost_flush_dequeue_shadow_packed(dev, vq);
3181 vhost_vring_call_packed(dev, vq);
3189 virtio_dev_tx_packed_legacy(struct virtio_net *dev,
3190 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
3191 struct rte_mbuf **__rte_restrict pkts, uint32_t count)
3193 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true);
3198 virtio_dev_tx_packed_compliant(struct virtio_net *dev,
3199 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
3200 struct rte_mbuf **__rte_restrict pkts, uint32_t count)
3202 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false);
3206 rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
3207 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
3209 struct virtio_net *dev;
3210 struct rte_mbuf *rarp_mbuf = NULL;
3211 struct vhost_virtqueue *vq;
3212 int16_t success = 1;
3214 dev = get_device(vid);
3218 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
3220 "(%d) %s: built-in vhost net backend is disabled.\n",
3221 dev->vid, __func__);
3225 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
3227 "(%d) %s: invalid virtqueue idx %d.\n",
3228 dev->vid, __func__, queue_id);
3232 vq = dev->virtqueue[queue_id];
3234 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
3237 if (unlikely(!vq->enabled)) {
3239 goto out_access_unlock;
3242 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
3243 vhost_user_iotlb_rd_lock(vq);
3245 if (unlikely(!vq->access_ok))
3246 if (unlikely(vring_translate(dev, vq) < 0)) {
3252 * Construct a RARP broadcast packet, and inject it to the "pkts"
3253 * array, to looks like that guest actually send such packet.
3255 * Check user_send_rarp() for more information.
3257 * broadcast_rarp shares a cacheline in the virtio_net structure
3258 * with some fields that are accessed during enqueue and
3259 * __atomic_compare_exchange_n causes a write if performed compare
3260 * and exchange. This could result in false sharing between enqueue
3263 * Prevent unnecessary false sharing by reading broadcast_rarp first
3264 * and only performing compare and exchange if the read indicates it
3265 * is likely to be set.
3267 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) &&
3268 __atomic_compare_exchange_n(&dev->broadcast_rarp,
3269 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) {
3271 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
3272 if (rarp_mbuf == NULL) {
3273 VHOST_LOG_DATA(ERR, "Failed to make RARP packet.\n");
3280 if (vq_is_packed(dev)) {
3281 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
3282 count = virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool, pkts, count);
3284 count = virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool, pkts, count);
3286 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
3287 count = virtio_dev_tx_split_legacy(dev, vq, mbuf_pool, pkts, count);
3289 count = virtio_dev_tx_split_compliant(dev, vq, mbuf_pool, pkts, count);
3293 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
3294 vhost_user_iotlb_rd_unlock(vq);
3297 rte_spinlock_unlock(&vq->access_lock);
3299 if (unlikely(rarp_mbuf != NULL)) {
3301 * Inject it to the head of "pkts" array, so that switch's mac
3302 * learning table will get updated first.
3304 memmove(&pkts[1], pkts, count * sizeof(struct rte_mbuf *));
3305 pkts[0] = rarp_mbuf;