1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
7 #include <linux/virtio_net.h>
10 #include <rte_memcpy.h>
12 #include <rte_ether.h>
14 #include <rte_vhost.h>
19 #include <rte_spinlock.h>
20 #include <rte_malloc.h>
21 #include <rte_vhost_async.h>
26 #define MAX_BATCH_LEN 256
28 #define VHOST_ASYNC_BATCH_THRESHOLD 32
30 static __rte_always_inline bool
31 rxvq_is_mergeable(struct virtio_net *dev)
33 return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF);
36 static __rte_always_inline bool
37 virtio_net_is_inorder(struct virtio_net *dev)
39 return dev->features & (1ULL << VIRTIO_F_IN_ORDER);
43 is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
45 return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
49 do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
51 struct batch_copy_elem *elem = vq->batch_copy_elems;
52 uint16_t count = vq->batch_copy_nb_elems;
55 for (i = 0; i < count; i++) {
56 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
57 vhost_log_cache_write_iova(dev, vq, elem[i].log_addr,
59 PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0);
62 vq->batch_copy_nb_elems = 0;
66 do_data_copy_dequeue(struct vhost_virtqueue *vq)
68 struct batch_copy_elem *elem = vq->batch_copy_elems;
69 uint16_t count = vq->batch_copy_nb_elems;
72 for (i = 0; i < count; i++)
73 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
75 vq->batch_copy_nb_elems = 0;
78 static __rte_always_inline void
79 do_flush_shadow_used_ring_split(struct virtio_net *dev,
80 struct vhost_virtqueue *vq,
81 uint16_t to, uint16_t from, uint16_t size)
83 rte_memcpy(&vq->used->ring[to],
84 &vq->shadow_used_split[from],
85 size * sizeof(struct vring_used_elem));
86 vhost_log_cache_used_vring(dev, vq,
87 offsetof(struct vring_used, ring[to]),
88 size * sizeof(struct vring_used_elem));
91 static __rte_always_inline void
92 flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
94 uint16_t used_idx = vq->last_used_idx & (vq->size - 1);
96 if (used_idx + vq->shadow_used_idx <= vq->size) {
97 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0,
102 /* update used ring interval [used_idx, vq->size] */
103 size = vq->size - used_idx;
104 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size);
106 /* update the left half used ring interval [0, left_size] */
107 do_flush_shadow_used_ring_split(dev, vq, 0, size,
108 vq->shadow_used_idx - size);
110 vq->last_used_idx += vq->shadow_used_idx;
112 vhost_log_cache_sync(dev, vq);
114 __atomic_add_fetch(&vq->used->idx, vq->shadow_used_idx,
116 vq->shadow_used_idx = 0;
117 vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
118 sizeof(vq->used->idx));
121 static __rte_always_inline void
122 update_shadow_used_ring_split(struct vhost_virtqueue *vq,
123 uint16_t desc_idx, uint32_t len)
125 uint16_t i = vq->shadow_used_idx++;
127 vq->shadow_used_split[i].id = desc_idx;
128 vq->shadow_used_split[i].len = len;
131 static __rte_always_inline void
132 vhost_flush_enqueue_shadow_packed(struct virtio_net *dev,
133 struct vhost_virtqueue *vq)
136 uint16_t used_idx = vq->last_used_idx;
137 uint16_t head_idx = vq->last_used_idx;
138 uint16_t head_flags = 0;
140 /* Split loop in two to save memory barriers */
141 for (i = 0; i < vq->shadow_used_idx; i++) {
142 vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id;
143 vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len;
145 used_idx += vq->shadow_used_packed[i].count;
146 if (used_idx >= vq->size)
147 used_idx -= vq->size;
150 /* The ordering for storing desc flags needs to be enforced. */
151 rte_atomic_thread_fence(__ATOMIC_RELEASE);
153 for (i = 0; i < vq->shadow_used_idx; i++) {
156 if (vq->shadow_used_packed[i].len)
157 flags = VRING_DESC_F_WRITE;
161 if (vq->used_wrap_counter) {
162 flags |= VRING_DESC_F_USED;
163 flags |= VRING_DESC_F_AVAIL;
165 flags &= ~VRING_DESC_F_USED;
166 flags &= ~VRING_DESC_F_AVAIL;
170 vq->desc_packed[vq->last_used_idx].flags = flags;
172 vhost_log_cache_used_vring(dev, vq,
174 sizeof(struct vring_packed_desc),
175 sizeof(struct vring_packed_desc));
177 head_idx = vq->last_used_idx;
181 vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count);
184 vq->desc_packed[head_idx].flags = head_flags;
186 vhost_log_cache_used_vring(dev, vq,
188 sizeof(struct vring_packed_desc),
189 sizeof(struct vring_packed_desc));
191 vq->shadow_used_idx = 0;
192 vhost_log_cache_sync(dev, vq);
195 static __rte_always_inline void
196 vhost_flush_dequeue_shadow_packed(struct virtio_net *dev,
197 struct vhost_virtqueue *vq)
199 struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0];
201 vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id;
202 /* desc flags is the synchronization point for virtio packed vring */
203 __atomic_store_n(&vq->desc_packed[vq->shadow_last_used_idx].flags,
204 used_elem->flags, __ATOMIC_RELEASE);
206 vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx *
207 sizeof(struct vring_packed_desc),
208 sizeof(struct vring_packed_desc));
209 vq->shadow_used_idx = 0;
210 vhost_log_cache_sync(dev, vq);
213 static __rte_always_inline void
214 vhost_flush_enqueue_batch_packed(struct virtio_net *dev,
215 struct vhost_virtqueue *vq,
221 uint16_t last_used_idx;
222 struct vring_packed_desc *desc_base;
224 last_used_idx = vq->last_used_idx;
225 desc_base = &vq->desc_packed[last_used_idx];
227 flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter);
229 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
230 desc_base[i].id = ids[i];
231 desc_base[i].len = lens[i];
234 rte_atomic_thread_fence(__ATOMIC_RELEASE);
236 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
237 desc_base[i].flags = flags;
240 vhost_log_cache_used_vring(dev, vq, last_used_idx *
241 sizeof(struct vring_packed_desc),
242 sizeof(struct vring_packed_desc) *
244 vhost_log_cache_sync(dev, vq);
246 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
249 static __rte_always_inline void
250 vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq,
253 vq->shadow_used_packed[0].id = id;
255 if (!vq->shadow_used_idx) {
256 vq->shadow_last_used_idx = vq->last_used_idx;
257 vq->shadow_used_packed[0].flags =
258 PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter);
259 vq->shadow_used_packed[0].len = 0;
260 vq->shadow_used_packed[0].count = 1;
261 vq->shadow_used_idx++;
264 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
267 static __rte_always_inline void
268 vhost_shadow_dequeue_batch_packed(struct virtio_net *dev,
269 struct vhost_virtqueue *vq,
276 flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter);
278 if (!vq->shadow_used_idx) {
279 vq->shadow_last_used_idx = vq->last_used_idx;
280 vq->shadow_used_packed[0].id = ids[0];
281 vq->shadow_used_packed[0].len = 0;
282 vq->shadow_used_packed[0].count = 1;
283 vq->shadow_used_packed[0].flags = flags;
284 vq->shadow_used_idx++;
289 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) {
290 vq->desc_packed[vq->last_used_idx + i].id = ids[i];
291 vq->desc_packed[vq->last_used_idx + i].len = 0;
294 rte_atomic_thread_fence(__ATOMIC_RELEASE);
295 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE)
296 vq->desc_packed[vq->last_used_idx + i].flags = flags;
298 vhost_log_cache_used_vring(dev, vq, vq->last_used_idx *
299 sizeof(struct vring_packed_desc),
300 sizeof(struct vring_packed_desc) *
302 vhost_log_cache_sync(dev, vq);
304 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
307 static __rte_always_inline void
308 vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq,
314 flags = vq->desc_packed[vq->last_used_idx].flags;
315 if (vq->used_wrap_counter) {
316 flags |= VRING_DESC_F_USED;
317 flags |= VRING_DESC_F_AVAIL;
319 flags &= ~VRING_DESC_F_USED;
320 flags &= ~VRING_DESC_F_AVAIL;
323 if (!vq->shadow_used_idx) {
324 vq->shadow_last_used_idx = vq->last_used_idx;
326 vq->shadow_used_packed[0].id = buf_id;
327 vq->shadow_used_packed[0].len = 0;
328 vq->shadow_used_packed[0].flags = flags;
329 vq->shadow_used_idx++;
331 vq->desc_packed[vq->last_used_idx].id = buf_id;
332 vq->desc_packed[vq->last_used_idx].len = 0;
333 vq->desc_packed[vq->last_used_idx].flags = flags;
336 vq_inc_last_used_packed(vq, count);
339 static __rte_always_inline void
340 vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq,
346 vq->shadow_used_packed[0].id = buf_id;
348 flags = vq->desc_packed[vq->last_used_idx].flags;
349 if (vq->used_wrap_counter) {
350 flags |= VRING_DESC_F_USED;
351 flags |= VRING_DESC_F_AVAIL;
353 flags &= ~VRING_DESC_F_USED;
354 flags &= ~VRING_DESC_F_AVAIL;
357 if (!vq->shadow_used_idx) {
358 vq->shadow_last_used_idx = vq->last_used_idx;
359 vq->shadow_used_packed[0].len = 0;
360 vq->shadow_used_packed[0].flags = flags;
361 vq->shadow_used_idx++;
364 vq_inc_last_used_packed(vq, count);
367 static __rte_always_inline void
368 vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq,
372 uint16_t num_buffers)
376 for (i = 0; i < num_buffers; i++) {
377 /* enqueue shadow flush action aligned with batch num */
378 if (!vq->shadow_used_idx)
379 vq->shadow_aligned_idx = vq->last_used_idx &
381 vq->shadow_used_packed[vq->shadow_used_idx].id = id[i];
382 vq->shadow_used_packed[vq->shadow_used_idx].len = len[i];
383 vq->shadow_used_packed[vq->shadow_used_idx].count = count[i];
384 vq->shadow_aligned_idx += count[i];
385 vq->shadow_used_idx++;
389 static __rte_always_inline void
390 vhost_shadow_enqueue_single_packed(struct virtio_net *dev,
391 struct vhost_virtqueue *vq,
395 uint16_t num_buffers)
397 vhost_shadow_enqueue_packed(vq, len, id, count, num_buffers);
399 if (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) {
400 do_data_copy_enqueue(dev, vq);
401 vhost_flush_enqueue_shadow_packed(dev, vq);
405 /* avoid write operation when necessary, to lessen cache issues */
406 #define ASSIGN_UNLESS_EQUAL(var, val) do { \
407 if ((var) != (val)) \
411 static __rte_always_inline void
412 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
414 uint64_t csum_l4 = m_buf->ol_flags & PKT_TX_L4_MASK;
416 if (m_buf->ol_flags & PKT_TX_TCP_SEG)
417 csum_l4 |= PKT_TX_TCP_CKSUM;
420 net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
421 net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len;
424 case PKT_TX_TCP_CKSUM:
425 net_hdr->csum_offset = (offsetof(struct rte_tcp_hdr,
428 case PKT_TX_UDP_CKSUM:
429 net_hdr->csum_offset = (offsetof(struct rte_udp_hdr,
432 case PKT_TX_SCTP_CKSUM:
433 net_hdr->csum_offset = (offsetof(struct rte_sctp_hdr,
438 ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0);
439 ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0);
440 ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0);
443 /* IP cksum verification cannot be bypassed, then calculate here */
444 if (m_buf->ol_flags & PKT_TX_IP_CKSUM) {
445 struct rte_ipv4_hdr *ipv4_hdr;
447 ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *,
449 ipv4_hdr->hdr_checksum = 0;
450 ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
453 if (m_buf->ol_flags & PKT_TX_TCP_SEG) {
454 if (m_buf->ol_flags & PKT_TX_IPV4)
455 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
457 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
458 net_hdr->gso_size = m_buf->tso_segsz;
459 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len
461 } else if (m_buf->ol_flags & PKT_TX_UDP_SEG) {
462 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
463 net_hdr->gso_size = m_buf->tso_segsz;
464 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len +
467 ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0);
468 ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0);
469 ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0);
473 static __rte_always_inline int
474 map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
475 struct buf_vector *buf_vec, uint16_t *vec_idx,
476 uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
478 uint16_t vec_id = *vec_idx;
482 uint64_t desc_chunck_len = desc_len;
484 if (unlikely(vec_id >= BUF_VECTOR_MAX))
487 desc_addr = vhost_iova_to_vva(dev, vq,
491 if (unlikely(!desc_addr))
494 rte_prefetch0((void *)(uintptr_t)desc_addr);
496 buf_vec[vec_id].buf_iova = desc_iova;
497 buf_vec[vec_id].buf_addr = desc_addr;
498 buf_vec[vec_id].buf_len = desc_chunck_len;
500 desc_len -= desc_chunck_len;
501 desc_iova += desc_chunck_len;
509 static __rte_always_inline int
510 fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
511 uint32_t avail_idx, uint16_t *vec_idx,
512 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
513 uint32_t *desc_chain_len, uint8_t perm)
515 uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
516 uint16_t vec_id = *vec_idx;
519 uint32_t nr_descs = vq->size;
521 struct vring_desc *descs = vq->desc;
522 struct vring_desc *idesc = NULL;
524 if (unlikely(idx >= vq->size))
527 *desc_chain_head = idx;
529 if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
530 dlen = vq->desc[idx].len;
531 nr_descs = dlen / sizeof(struct vring_desc);
532 if (unlikely(nr_descs > vq->size))
535 descs = (struct vring_desc *)(uintptr_t)
536 vhost_iova_to_vva(dev, vq, vq->desc[idx].addr,
539 if (unlikely(!descs))
542 if (unlikely(dlen < vq->desc[idx].len)) {
544 * The indirect desc table is not contiguous
545 * in process VA space, we have to copy it.
547 idesc = vhost_alloc_copy_ind_table(dev, vq,
548 vq->desc[idx].addr, vq->desc[idx].len);
549 if (unlikely(!idesc))
559 if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) {
560 free_ind_table(idesc);
564 dlen = descs[idx].len;
567 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
568 descs[idx].addr, dlen,
570 free_ind_table(idesc);
574 if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
577 idx = descs[idx].next;
580 *desc_chain_len = len;
583 if (unlikely(!!idesc))
584 free_ind_table(idesc);
590 * Returns -1 on fail, 0 on success
593 reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
594 uint32_t size, struct buf_vector *buf_vec,
595 uint16_t *num_buffers, uint16_t avail_head,
599 uint16_t vec_idx = 0;
600 uint16_t max_tries, tries = 0;
602 uint16_t head_idx = 0;
606 cur_idx = vq->last_avail_idx;
608 if (rxvq_is_mergeable(dev))
609 max_tries = vq->size - 1;
614 if (unlikely(cur_idx == avail_head))
617 * if we tried all available ring items, and still
618 * can't get enough buf, it means something abnormal
621 if (unlikely(++tries > max_tries))
624 if (unlikely(fill_vec_buf_split(dev, vq, cur_idx,
627 VHOST_ACCESS_RW) < 0))
629 len = RTE_MIN(len, size);
630 update_shadow_used_ring_split(vq, head_idx, len);
642 static __rte_always_inline int
643 fill_vec_buf_packed_indirect(struct virtio_net *dev,
644 struct vhost_virtqueue *vq,
645 struct vring_packed_desc *desc, uint16_t *vec_idx,
646 struct buf_vector *buf_vec, uint32_t *len, uint8_t perm)
650 uint16_t vec_id = *vec_idx;
652 struct vring_packed_desc *descs, *idescs = NULL;
655 descs = (struct vring_packed_desc *)(uintptr_t)
656 vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO);
657 if (unlikely(!descs))
660 if (unlikely(dlen < desc->len)) {
662 * The indirect desc table is not contiguous
663 * in process VA space, we have to copy it.
665 idescs = vhost_alloc_copy_ind_table(dev,
666 vq, desc->addr, desc->len);
667 if (unlikely(!idescs))
673 nr_descs = desc->len / sizeof(struct vring_packed_desc);
674 if (unlikely(nr_descs >= vq->size)) {
675 free_ind_table(idescs);
679 for (i = 0; i < nr_descs; i++) {
680 if (unlikely(vec_id >= BUF_VECTOR_MAX)) {
681 free_ind_table(idescs);
687 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
694 if (unlikely(!!idescs))
695 free_ind_table(idescs);
700 static __rte_always_inline int
701 fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
702 uint16_t avail_idx, uint16_t *desc_count,
703 struct buf_vector *buf_vec, uint16_t *vec_idx,
704 uint16_t *buf_id, uint32_t *len, uint8_t perm)
706 bool wrap_counter = vq->avail_wrap_counter;
707 struct vring_packed_desc *descs = vq->desc_packed;
708 uint16_t vec_id = *vec_idx;
711 if (avail_idx < vq->last_avail_idx)
715 * Perform a load-acquire barrier in desc_is_avail to
716 * enforce the ordering between desc flags and desc
719 if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter)))
726 if (unlikely(vec_id >= BUF_VECTOR_MAX))
729 if (unlikely(*desc_count >= vq->size))
733 *buf_id = descs[avail_idx].id;
735 if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) {
736 if (unlikely(fill_vec_buf_packed_indirect(dev, vq,
742 dlen = descs[avail_idx].len;
745 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
746 descs[avail_idx].addr,
752 if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0)
755 if (++avail_idx >= vq->size) {
756 avail_idx -= vq->size;
766 static __rte_noinline void
767 copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
768 struct buf_vector *buf_vec,
769 struct virtio_net_hdr_mrg_rxbuf *hdr)
772 uint64_t remain = dev->vhost_hlen;
773 uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
774 uint64_t iova = buf_vec->buf_iova;
777 len = RTE_MIN(remain,
779 dst = buf_vec->buf_addr;
780 rte_memcpy((void *)(uintptr_t)dst,
781 (void *)(uintptr_t)src,
784 PRINT_PACKET(dev, (uintptr_t)dst,
786 vhost_log_cache_write_iova(dev, vq,
796 static __rte_always_inline int
797 copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
798 struct rte_mbuf *m, struct buf_vector *buf_vec,
799 uint16_t nr_vec, uint16_t num_buffers)
801 uint32_t vec_idx = 0;
802 uint32_t mbuf_offset, mbuf_avail;
803 uint32_t buf_offset, buf_avail;
804 uint64_t buf_addr, buf_iova, buf_len;
807 struct rte_mbuf *hdr_mbuf;
808 struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
809 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
812 if (unlikely(m == NULL)) {
817 buf_addr = buf_vec[vec_idx].buf_addr;
818 buf_iova = buf_vec[vec_idx].buf_iova;
819 buf_len = buf_vec[vec_idx].buf_len;
821 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
828 if (unlikely(buf_len < dev->vhost_hlen)) {
829 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
832 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
834 VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n",
835 dev->vid, num_buffers);
837 if (unlikely(buf_len < dev->vhost_hlen)) {
838 buf_offset = dev->vhost_hlen - buf_len;
840 buf_addr = buf_vec[vec_idx].buf_addr;
841 buf_iova = buf_vec[vec_idx].buf_iova;
842 buf_len = buf_vec[vec_idx].buf_len;
843 buf_avail = buf_len - buf_offset;
845 buf_offset = dev->vhost_hlen;
846 buf_avail = buf_len - dev->vhost_hlen;
849 mbuf_avail = rte_pktmbuf_data_len(m);
851 while (mbuf_avail != 0 || m->next != NULL) {
852 /* done with current buf, get the next one */
853 if (buf_avail == 0) {
855 if (unlikely(vec_idx >= nr_vec)) {
860 buf_addr = buf_vec[vec_idx].buf_addr;
861 buf_iova = buf_vec[vec_idx].buf_iova;
862 buf_len = buf_vec[vec_idx].buf_len;
868 /* done with current mbuf, get the next one */
869 if (mbuf_avail == 0) {
873 mbuf_avail = rte_pktmbuf_data_len(m);
877 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
878 if (rxvq_is_mergeable(dev))
879 ASSIGN_UNLESS_EQUAL(hdr->num_buffers,
882 if (unlikely(hdr == &tmp_hdr)) {
883 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
885 PRINT_PACKET(dev, (uintptr_t)hdr_addr,
887 vhost_log_cache_write_iova(dev, vq,
895 cpy_len = RTE_MIN(buf_avail, mbuf_avail);
897 if (likely(cpy_len > MAX_BATCH_LEN ||
898 vq->batch_copy_nb_elems >= vq->size)) {
899 rte_memcpy((void *)((uintptr_t)(buf_addr + buf_offset)),
900 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
902 vhost_log_cache_write_iova(dev, vq,
903 buf_iova + buf_offset,
905 PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset),
908 batch_copy[vq->batch_copy_nb_elems].dst =
909 (void *)((uintptr_t)(buf_addr + buf_offset));
910 batch_copy[vq->batch_copy_nb_elems].src =
911 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
912 batch_copy[vq->batch_copy_nb_elems].log_addr =
913 buf_iova + buf_offset;
914 batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
915 vq->batch_copy_nb_elems++;
918 mbuf_avail -= cpy_len;
919 mbuf_offset += cpy_len;
920 buf_avail -= cpy_len;
921 buf_offset += cpy_len;
929 static __rte_always_inline void
930 async_fill_vec(struct iovec *v, void *base, size_t len)
936 static __rte_always_inline void
937 async_fill_iter(struct rte_vhost_iov_iter *it, size_t count,
938 struct iovec *vec, unsigned long nr_seg)
945 it->nr_segs = nr_seg;
952 static __rte_always_inline void
953 async_fill_desc(struct rte_vhost_async_desc *desc,
954 struct rte_vhost_iov_iter *src, struct rte_vhost_iov_iter *dst)
960 static __rte_always_inline int
961 async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
962 struct rte_mbuf *m, struct buf_vector *buf_vec,
963 uint16_t nr_vec, uint16_t num_buffers,
964 struct iovec *src_iovec, struct iovec *dst_iovec,
965 struct rte_vhost_iov_iter *src_it,
966 struct rte_vhost_iov_iter *dst_it)
968 uint32_t vec_idx = 0;
969 uint32_t mbuf_offset, mbuf_avail;
970 uint32_t buf_offset, buf_avail;
971 uint64_t buf_addr, buf_iova, buf_len;
972 uint32_t cpy_len, cpy_threshold;
974 struct rte_mbuf *hdr_mbuf;
975 struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
976 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
984 if (unlikely(m == NULL)) {
989 cpy_threshold = vq->async_threshold;
991 buf_addr = buf_vec[vec_idx].buf_addr;
992 buf_iova = buf_vec[vec_idx].buf_iova;
993 buf_len = buf_vec[vec_idx].buf_len;
995 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
1001 hdr_addr = buf_addr;
1002 if (unlikely(buf_len < dev->vhost_hlen)) {
1003 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
1006 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
1008 VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n",
1009 dev->vid, num_buffers);
1011 if (unlikely(buf_len < dev->vhost_hlen)) {
1012 buf_offset = dev->vhost_hlen - buf_len;
1014 buf_addr = buf_vec[vec_idx].buf_addr;
1015 buf_iova = buf_vec[vec_idx].buf_iova;
1016 buf_len = buf_vec[vec_idx].buf_len;
1017 buf_avail = buf_len - buf_offset;
1019 buf_offset = dev->vhost_hlen;
1020 buf_avail = buf_len - dev->vhost_hlen;
1023 mbuf_avail = rte_pktmbuf_data_len(m);
1026 while (mbuf_avail != 0 || m->next != NULL) {
1027 /* done with current buf, get the next one */
1028 if (buf_avail == 0) {
1030 if (unlikely(vec_idx >= nr_vec)) {
1035 buf_addr = buf_vec[vec_idx].buf_addr;
1036 buf_iova = buf_vec[vec_idx].buf_iova;
1037 buf_len = buf_vec[vec_idx].buf_len;
1040 buf_avail = buf_len;
1043 /* done with current mbuf, get the next one */
1044 if (mbuf_avail == 0) {
1048 mbuf_avail = rte_pktmbuf_data_len(m);
1052 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
1053 if (rxvq_is_mergeable(dev))
1054 ASSIGN_UNLESS_EQUAL(hdr->num_buffers,
1057 if (unlikely(hdr == &tmp_hdr)) {
1058 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
1060 PRINT_PACKET(dev, (uintptr_t)hdr_addr,
1061 dev->vhost_hlen, 0);
1062 vhost_log_cache_write_iova(dev, vq,
1063 buf_vec[0].buf_iova,
1070 cpy_len = RTE_MIN(buf_avail, mbuf_avail);
1072 while (unlikely(cpy_len && cpy_len >= cpy_threshold)) {
1073 hpa = (void *)(uintptr_t)gpa_to_first_hpa(dev,
1074 buf_iova + buf_offset,
1075 cpy_len, &mapped_len);
1077 if (unlikely(!hpa || mapped_len < cpy_threshold))
1080 async_fill_vec(src_iovec + tvec_idx,
1081 (void *)(uintptr_t)rte_pktmbuf_iova_offset(m,
1082 mbuf_offset), (size_t)mapped_len);
1084 async_fill_vec(dst_iovec + tvec_idx,
1085 hpa, (size_t)mapped_len);
1087 tlen += (uint32_t)mapped_len;
1088 cpy_len -= (uint32_t)mapped_len;
1089 mbuf_avail -= (uint32_t)mapped_len;
1090 mbuf_offset += (uint32_t)mapped_len;
1091 buf_avail -= (uint32_t)mapped_len;
1092 buf_offset += (uint32_t)mapped_len;
1096 if (likely(cpy_len)) {
1097 if (unlikely(vq->batch_copy_nb_elems >= vq->size)) {
1099 (void *)((uintptr_t)(buf_addr + buf_offset)),
1100 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
1104 (uintptr_t)(buf_addr + buf_offset),
1107 batch_copy[vq->batch_copy_nb_elems].dst =
1108 (void *)((uintptr_t)(buf_addr + buf_offset));
1109 batch_copy[vq->batch_copy_nb_elems].src =
1110 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
1111 batch_copy[vq->batch_copy_nb_elems].log_addr =
1112 buf_iova + buf_offset;
1113 batch_copy[vq->batch_copy_nb_elems].len =
1115 vq->batch_copy_nb_elems++;
1118 mbuf_avail -= cpy_len;
1119 mbuf_offset += cpy_len;
1120 buf_avail -= cpy_len;
1121 buf_offset += cpy_len;
1128 async_fill_iter(src_it, tlen, src_iovec, tvec_idx);
1129 async_fill_iter(dst_it, tlen, dst_iovec, tvec_idx);
1137 static __rte_always_inline int
1138 vhost_enqueue_single_packed(struct virtio_net *dev,
1139 struct vhost_virtqueue *vq,
1140 struct rte_mbuf *pkt,
1141 struct buf_vector *buf_vec,
1144 uint16_t nr_vec = 0;
1145 uint16_t avail_idx = vq->last_avail_idx;
1146 uint16_t max_tries, tries = 0;
1147 uint16_t buf_id = 0;
1149 uint16_t desc_count;
1150 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf);
1151 uint16_t num_buffers = 0;
1152 uint32_t buffer_len[vq->size];
1153 uint16_t buffer_buf_id[vq->size];
1154 uint16_t buffer_desc_count[vq->size];
1156 if (rxvq_is_mergeable(dev))
1157 max_tries = vq->size - 1;
1163 * if we tried all available ring items, and still
1164 * can't get enough buf, it means something abnormal
1167 if (unlikely(++tries > max_tries))
1170 if (unlikely(fill_vec_buf_packed(dev, vq,
1171 avail_idx, &desc_count,
1174 VHOST_ACCESS_RW) < 0))
1177 len = RTE_MIN(len, size);
1180 buffer_len[num_buffers] = len;
1181 buffer_buf_id[num_buffers] = buf_id;
1182 buffer_desc_count[num_buffers] = desc_count;
1185 *nr_descs += desc_count;
1186 avail_idx += desc_count;
1187 if (avail_idx >= vq->size)
1188 avail_idx -= vq->size;
1191 if (copy_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers) < 0)
1194 vhost_shadow_enqueue_single_packed(dev, vq, buffer_len, buffer_buf_id,
1195 buffer_desc_count, num_buffers);
1200 static __rte_noinline uint32_t
1201 virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
1202 struct rte_mbuf **pkts, uint32_t count)
1204 uint32_t pkt_idx = 0;
1205 uint16_t num_buffers;
1206 struct buf_vector buf_vec[BUF_VECTOR_MAX];
1207 uint16_t avail_head;
1210 * The ordering between avail index and
1211 * desc reads needs to be enforced.
1213 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE);
1215 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
1217 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
1218 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
1219 uint16_t nr_vec = 0;
1221 if (unlikely(reserve_avail_buf_split(dev, vq,
1222 pkt_len, buf_vec, &num_buffers,
1223 avail_head, &nr_vec) < 0)) {
1224 VHOST_LOG_DATA(DEBUG,
1225 "(%d) failed to get enough desc from vring\n",
1227 vq->shadow_used_idx -= num_buffers;
1231 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
1232 dev->vid, vq->last_avail_idx,
1233 vq->last_avail_idx + num_buffers);
1235 if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx],
1238 vq->shadow_used_idx -= num_buffers;
1242 vq->last_avail_idx += num_buffers;
1245 do_data_copy_enqueue(dev, vq);
1247 if (likely(vq->shadow_used_idx)) {
1248 flush_shadow_used_ring_split(dev, vq);
1249 vhost_vring_call_split(dev, vq);
1255 static __rte_always_inline int
1256 virtio_dev_rx_sync_batch_check(struct virtio_net *dev,
1257 struct vhost_virtqueue *vq,
1258 struct rte_mbuf **pkts,
1259 uint64_t *desc_addrs,
1262 bool wrap_counter = vq->avail_wrap_counter;
1263 struct vring_packed_desc *descs = vq->desc_packed;
1264 uint16_t avail_idx = vq->last_avail_idx;
1265 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1268 if (unlikely(avail_idx & PACKED_BATCH_MASK))
1271 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
1274 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1275 if (unlikely(pkts[i]->next != NULL))
1277 if (unlikely(!desc_is_avail(&descs[avail_idx + i],
1282 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1283 lens[i] = descs[avail_idx + i].len;
1285 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1286 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset)))
1290 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1291 desc_addrs[i] = vhost_iova_to_vva(dev, vq,
1292 descs[avail_idx + i].addr,
1296 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1297 if (unlikely(!desc_addrs[i]))
1299 if (unlikely(lens[i] != descs[avail_idx + i].len))
1306 static __rte_always_inline int
1307 virtio_dev_rx_async_batch_check(struct virtio_net *dev,
1308 struct vhost_virtqueue *vq,
1309 struct rte_mbuf **pkts,
1310 uint64_t *desc_addrs,
1313 bool wrap_counter = vq->avail_wrap_counter;
1314 struct vring_packed_desc *descs = vq->desc_packed;
1315 uint16_t avail_idx = vq->last_avail_idx;
1316 uint16_t used_idx = vq->last_used_idx;
1317 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1318 uint32_t cpy_threshold = vq->async_threshold;
1321 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1322 if (unlikely(pkts[i]->data_len >= cpy_threshold))
1326 if (unlikely(avail_idx & PACKED_BATCH_MASK))
1329 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
1332 if (unlikely((used_idx + PACKED_BATCH_SIZE) > vq->size))
1335 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1336 if (unlikely(pkts[i]->next != NULL))
1338 if (unlikely(!desc_is_avail(&descs[avail_idx + i],
1343 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1344 lens[i] = descs[avail_idx + i].len;
1346 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1347 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset)))
1351 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1352 desc_addrs[i] = vhost_iova_to_vva(dev, vq,
1353 descs[avail_idx + i].addr,
1357 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1358 if (unlikely(!desc_addrs[i]))
1360 if (unlikely(lens[i] != descs[avail_idx + i].len))
1367 static __rte_always_inline void
1368 virtio_dev_rx_batch_packed_copy(struct virtio_net *dev,
1369 struct vhost_virtqueue *vq,
1370 struct rte_mbuf **pkts,
1371 uint64_t *desc_addrs,
1374 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1375 struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE];
1376 struct vring_packed_desc *descs = vq->desc_packed;
1377 uint16_t avail_idx = vq->last_avail_idx;
1378 uint16_t ids[PACKED_BATCH_SIZE];
1381 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1382 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
1383 hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *)
1384 (uintptr_t)desc_addrs[i];
1385 lens[i] = pkts[i]->pkt_len +
1386 sizeof(struct virtio_net_hdr_mrg_rxbuf);
1389 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1390 virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr);
1392 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
1394 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1395 rte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset),
1396 rte_pktmbuf_mtod_offset(pkts[i], void *, 0),
1400 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1401 vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr,
1404 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1405 ids[i] = descs[avail_idx + i].id;
1407 vhost_flush_enqueue_batch_packed(dev, vq, lens, ids);
1410 static __rte_always_inline int
1411 virtio_dev_rx_sync_batch_packed(struct virtio_net *dev,
1412 struct vhost_virtqueue *vq,
1413 struct rte_mbuf **pkts)
1415 uint64_t desc_addrs[PACKED_BATCH_SIZE];
1416 uint64_t lens[PACKED_BATCH_SIZE];
1418 if (virtio_dev_rx_sync_batch_check(dev, vq, pkts, desc_addrs, lens) == -1)
1421 if (vq->shadow_used_idx) {
1422 do_data_copy_enqueue(dev, vq);
1423 vhost_flush_enqueue_shadow_packed(dev, vq);
1426 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens);
1431 static __rte_always_inline int
1432 virtio_dev_rx_async_batch_packed(struct virtio_net *dev,
1433 struct vhost_virtqueue *vq,
1434 struct rte_mbuf **pkts,
1435 struct rte_mbuf **comp_pkts, uint32_t *pkt_done)
1438 uint64_t desc_addrs[PACKED_BATCH_SIZE];
1439 uint64_t lens[PACKED_BATCH_SIZE];
1441 if (virtio_dev_rx_async_batch_check(dev, vq, pkts, desc_addrs, lens) == -1)
1444 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens);
1446 if (vq->shadow_used_idx) {
1447 do_data_copy_enqueue(dev, vq);
1448 vhost_flush_enqueue_shadow_packed(dev, vq);
1451 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1452 comp_pkts[(*pkt_done)++] = pkts[i];
1457 static __rte_always_inline int16_t
1458 virtio_dev_rx_single_packed(struct virtio_net *dev,
1459 struct vhost_virtqueue *vq,
1460 struct rte_mbuf *pkt)
1462 struct buf_vector buf_vec[BUF_VECTOR_MAX];
1463 uint16_t nr_descs = 0;
1465 if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec,
1467 VHOST_LOG_DATA(DEBUG,
1468 "(%d) failed to get enough desc from vring\n",
1473 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
1474 dev->vid, vq->last_avail_idx,
1475 vq->last_avail_idx + nr_descs);
1477 vq_inc_last_avail_packed(vq, nr_descs);
1482 static __rte_noinline uint32_t
1483 virtio_dev_rx_packed(struct virtio_net *dev,
1484 struct vhost_virtqueue *__rte_restrict vq,
1485 struct rte_mbuf **__rte_restrict pkts,
1488 uint32_t pkt_idx = 0;
1491 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
1493 if (count - pkt_idx >= PACKED_BATCH_SIZE) {
1494 if (!virtio_dev_rx_sync_batch_packed(dev, vq,
1496 pkt_idx += PACKED_BATCH_SIZE;
1501 if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx]))
1505 } while (pkt_idx < count);
1507 if (vq->shadow_used_idx) {
1508 do_data_copy_enqueue(dev, vq);
1509 vhost_flush_enqueue_shadow_packed(dev, vq);
1513 vhost_vring_call_packed(dev, vq);
1518 static __rte_always_inline uint32_t
1519 virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
1520 struct rte_mbuf **pkts, uint32_t count)
1522 struct vhost_virtqueue *vq;
1525 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
1526 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
1527 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
1528 dev->vid, __func__, queue_id);
1532 vq = dev->virtqueue[queue_id];
1534 rte_spinlock_lock(&vq->access_lock);
1536 if (unlikely(!vq->enabled))
1537 goto out_access_unlock;
1539 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
1540 vhost_user_iotlb_rd_lock(vq);
1542 if (unlikely(!vq->access_ok))
1543 if (unlikely(vring_translate(dev, vq) < 0))
1546 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
1550 if (vq_is_packed(dev))
1551 nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count);
1553 nb_tx = virtio_dev_rx_split(dev, vq, pkts, count);
1556 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
1557 vhost_user_iotlb_rd_unlock(vq);
1560 rte_spinlock_unlock(&vq->access_lock);
1566 rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
1567 struct rte_mbuf **__rte_restrict pkts, uint16_t count)
1569 struct virtio_net *dev = get_device(vid);
1574 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
1576 "(%d) %s: built-in vhost net backend is disabled.\n",
1577 dev->vid, __func__);
1581 return virtio_dev_rx(dev, queue_id, pkts, count);
1584 static __rte_always_inline uint16_t
1585 virtio_dev_rx_async_get_info_idx(uint16_t pkts_idx,
1586 uint16_t vq_size, uint16_t n_inflight)
1588 return pkts_idx > n_inflight ? (pkts_idx - n_inflight) :
1589 (vq_size - n_inflight + pkts_idx) % vq_size;
1592 static __rte_always_inline void
1593 store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem *d_ring,
1594 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count)
1596 size_t elem_size = sizeof(struct vring_used_elem);
1598 if (d_idx + count <= ring_size) {
1599 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
1601 uint16_t size = ring_size - d_idx;
1603 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);
1604 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size);
1608 static __rte_always_inline void
1609 store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring,
1610 struct vring_used_elem_packed *d_ring,
1611 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count)
1613 size_t elem_size = sizeof(struct vring_used_elem_packed);
1615 if (d_idx + count <= ring_size) {
1616 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
1618 uint16_t size = ring_size - d_idx;
1620 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);
1621 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size);
1625 static __rte_noinline uint32_t
1626 virtio_dev_rx_async_submit_split(struct virtio_net *dev,
1627 struct vhost_virtqueue *vq, uint16_t queue_id,
1628 struct rte_mbuf **pkts, uint32_t count,
1629 struct rte_mbuf **comp_pkts, uint32_t *comp_count)
1631 uint32_t pkt_idx = 0, pkt_burst_idx = 0;
1632 uint16_t num_buffers;
1633 struct buf_vector buf_vec[BUF_VECTOR_MAX];
1634 uint16_t avail_head;
1636 struct rte_vhost_iov_iter *it_pool = vq->it_pool;
1637 struct iovec *vec_pool = vq->vec_pool;
1638 struct rte_vhost_async_desc tdes[MAX_PKT_BURST];
1639 struct iovec *src_iovec = vec_pool;
1640 struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);
1641 uint16_t slot_idx = 0;
1642 uint16_t segs_await = 0;
1643 uint16_t iovec_idx = 0, it_idx = 0;
1644 struct async_inflight_info *pkts_info = vq->async_pkts_info;
1645 uint32_t n_pkts = 0, pkt_err = 0;
1646 uint32_t num_async_pkts = 0, num_done_pkts = 0;
1650 uint16_t last_avail_idx;
1651 } async_pkts_log[MAX_PKT_BURST];
1654 * The ordering between avail index and desc reads need to be enforced.
1656 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE);
1658 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
1660 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
1661 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
1662 uint16_t nr_vec = 0;
1664 if (unlikely(reserve_avail_buf_split(dev, vq,
1665 pkt_len, buf_vec, &num_buffers,
1666 avail_head, &nr_vec) < 0)) {
1667 VHOST_LOG_DATA(DEBUG,
1668 "(%d) failed to get enough desc from vring\n",
1670 vq->shadow_used_idx -= num_buffers;
1674 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
1675 dev->vid, vq->last_avail_idx,
1676 vq->last_avail_idx + num_buffers);
1678 if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, num_buffers,
1679 &src_iovec[iovec_idx], &dst_iovec[iovec_idx],
1680 &it_pool[it_idx], &it_pool[it_idx + 1]) < 0) {
1681 vq->shadow_used_idx -= num_buffers;
1685 slot_idx = (vq->async_pkts_idx + num_async_pkts) &
1687 if (it_pool[it_idx].count) {
1690 async_fill_desc(&tdes[pkt_burst_idx++],
1691 &it_pool[it_idx], &it_pool[it_idx + 1]);
1692 pkts_info[slot_idx].descs = num_buffers;
1693 pkts_info[slot_idx].mbuf = pkts[pkt_idx];
1694 async_pkts_log[num_async_pkts].pkt_idx = pkt_idx;
1695 async_pkts_log[num_async_pkts++].last_avail_idx =
1698 iovec_idx += it_pool[it_idx].nr_segs;
1701 segs_await += it_pool[it_idx].nr_segs;
1704 * recover shadow used ring and keep DMA-occupied
1707 from = vq->shadow_used_idx - num_buffers;
1708 to = vq->async_desc_idx_split & (vq->size - 1);
1710 store_dma_desc_info_split(vq->shadow_used_split,
1711 vq->async_descs_split, vq->size, from, to, num_buffers);
1713 vq->async_desc_idx_split += num_buffers;
1714 vq->shadow_used_idx -= num_buffers;
1716 comp_pkts[num_done_pkts++] = pkts[pkt_idx];
1718 vq->last_avail_idx += num_buffers;
1721 * conditions to trigger async device transfer:
1722 * - buffered packet number reaches transfer threshold
1723 * - unused async iov number is less than max vhost vector
1725 if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
1726 ((VHOST_MAX_ASYNC_VEC >> 1) - segs_await <
1728 n_xfer = vq->async_ops.transfer_data(dev->vid,
1729 queue_id, tdes, 0, pkt_burst_idx);
1734 "(%d) %s: failed to transfer data for queue id %d.\n",
1735 dev->vid, __func__, queue_id);
1743 vq->async_pkts_inflight_n += n_pkts;
1745 if (unlikely(n_pkts < pkt_burst_idx)) {
1747 * log error packets number here and do actual
1748 * error processing when applications poll
1751 pkt_err = pkt_burst_idx - n_pkts;
1760 if (pkt_burst_idx) {
1761 n_xfer = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx);
1765 VHOST_LOG_DATA(ERR, "(%d) %s: failed to transfer data for queue id %d.\n",
1766 dev->vid, __func__, queue_id);
1770 vq->async_pkts_inflight_n += n_pkts;
1772 if (unlikely(n_pkts < pkt_burst_idx))
1773 pkt_err = pkt_burst_idx - n_pkts;
1776 do_data_copy_enqueue(dev, vq);
1778 if (unlikely(pkt_err)) {
1779 uint16_t num_descs = 0;
1781 num_async_pkts -= pkt_err;
1782 /* calculate the sum of descriptors of DMA-error packets. */
1783 while (pkt_err-- > 0) {
1784 num_descs += pkts_info[slot_idx & (vq->size - 1)].descs;
1787 vq->async_desc_idx_split -= num_descs;
1788 /* recover shadow used ring and available ring */
1789 vq->shadow_used_idx -= (vq->last_avail_idx -
1790 async_pkts_log[num_async_pkts].last_avail_idx -
1792 vq->last_avail_idx =
1793 async_pkts_log[num_async_pkts].last_avail_idx;
1794 pkt_idx = async_pkts_log[num_async_pkts].pkt_idx;
1795 num_done_pkts = pkt_idx - num_async_pkts;
1798 vq->async_pkts_idx += num_async_pkts;
1799 *comp_count = num_done_pkts;
1801 if (likely(vq->shadow_used_idx)) {
1802 flush_shadow_used_ring_split(dev, vq);
1803 vhost_vring_call_split(dev, vq);
1809 static __rte_always_inline void
1810 vhost_update_used_packed(struct vhost_virtqueue *vq,
1811 struct vring_used_elem_packed *shadow_ring,
1815 uint16_t used_idx = vq->last_used_idx;
1816 uint16_t head_idx = vq->last_used_idx;
1817 uint16_t head_flags = 0;
1822 /* Split loop in two to save memory barriers */
1823 for (i = 0; i < count; i++) {
1824 vq->desc_packed[used_idx].id = shadow_ring[i].id;
1825 vq->desc_packed[used_idx].len = shadow_ring[i].len;
1827 used_idx += shadow_ring[i].count;
1828 if (used_idx >= vq->size)
1829 used_idx -= vq->size;
1832 /* The ordering for storing desc flags needs to be enforced. */
1833 rte_atomic_thread_fence(__ATOMIC_RELEASE);
1835 for (i = 0; i < count; i++) {
1838 if (vq->shadow_used_packed[i].len)
1839 flags = VRING_DESC_F_WRITE;
1843 if (vq->used_wrap_counter) {
1844 flags |= VRING_DESC_F_USED;
1845 flags |= VRING_DESC_F_AVAIL;
1847 flags &= ~VRING_DESC_F_USED;
1848 flags &= ~VRING_DESC_F_AVAIL;
1852 vq->desc_packed[vq->last_used_idx].flags = flags;
1854 head_idx = vq->last_used_idx;
1858 vq_inc_last_used_packed(vq, shadow_ring[i].count);
1861 vq->desc_packed[head_idx].flags = head_flags;
1864 static __rte_always_inline int
1865 vhost_enqueue_async_single_packed(struct virtio_net *dev,
1866 struct vhost_virtqueue *vq,
1867 struct rte_mbuf *pkt,
1868 struct buf_vector *buf_vec,
1870 uint16_t *nr_buffers,
1871 struct vring_packed_desc *async_descs,
1872 struct iovec *src_iovec, struct iovec *dst_iovec,
1873 struct rte_vhost_iov_iter *src_it,
1874 struct rte_vhost_iov_iter *dst_it)
1876 uint16_t nr_vec = 0;
1877 uint16_t avail_idx = vq->last_avail_idx;
1878 uint16_t max_tries, tries = 0;
1879 uint16_t buf_id = 0;
1881 uint16_t desc_count = 0;
1882 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf);
1883 uint32_t buffer_len[vq->size];
1884 uint16_t buffer_buf_id[vq->size];
1885 uint16_t buffer_desc_count[vq->size];
1887 if (rxvq_is_mergeable(dev))
1888 max_tries = vq->size - 1;
1894 * if we tried all available ring items, and still
1895 * can't get enough buf, it means something abnormal
1898 if (unlikely(++tries > max_tries))
1901 if (unlikely(fill_vec_buf_packed(dev, vq, avail_idx, &desc_count, buf_vec, &nr_vec,
1902 &buf_id, &len, VHOST_ACCESS_RW) < 0))
1905 len = RTE_MIN(len, size);
1908 buffer_len[*nr_buffers] = len;
1909 buffer_buf_id[*nr_buffers] = buf_id;
1910 buffer_desc_count[*nr_buffers] = desc_count;
1913 *nr_descs += desc_count;
1914 avail_idx += desc_count;
1915 if (avail_idx >= vq->size)
1916 avail_idx -= vq->size;
1919 if (async_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, src_iovec, dst_iovec,
1920 src_it, dst_it) < 0)
1922 /* store descriptors for DMA */
1923 if (avail_idx >= *nr_descs) {
1924 rte_memcpy(async_descs, &vq->desc_packed[vq->last_avail_idx],
1925 *nr_descs * sizeof(struct vring_packed_desc));
1927 uint16_t nr_copy = vq->size - vq->last_avail_idx;
1929 rte_memcpy(async_descs, &vq->desc_packed[vq->last_avail_idx],
1930 nr_copy * sizeof(struct vring_packed_desc));
1931 rte_memcpy(async_descs + nr_copy, vq->desc_packed,
1932 (*nr_descs - nr_copy) * sizeof(struct vring_packed_desc));
1935 vhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, buffer_desc_count, *nr_buffers);
1940 static __rte_always_inline int16_t
1941 virtio_dev_rx_async_single_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
1942 struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers,
1943 struct vring_packed_desc *async_descs,
1944 struct iovec *src_iovec, struct iovec *dst_iovec,
1945 struct rte_vhost_iov_iter *src_it, struct rte_vhost_iov_iter *dst_it)
1947 struct buf_vector buf_vec[BUF_VECTOR_MAX];
1949 if (unlikely(vhost_enqueue_async_single_packed(dev, vq, pkt, buf_vec, nr_descs, nr_buffers,
1950 async_descs, src_iovec, dst_iovec,
1951 src_it, dst_it) < 0)) {
1952 VHOST_LOG_DATA(DEBUG, "(%d) failed to get enough desc from vring\n", dev->vid);
1956 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
1957 dev->vid, vq->last_avail_idx, vq->last_avail_idx + *nr_descs);
1962 static __rte_always_inline void
1963 dma_error_handler_packed(struct vhost_virtqueue *vq, struct vring_packed_desc *async_descs,
1964 uint16_t async_descs_idx, uint16_t slot_idx, uint32_t nr_err,
1965 uint32_t *pkt_idx, uint32_t *num_async_pkts, uint32_t *num_done_pkts)
1967 uint16_t descs_err = 0;
1968 uint16_t buffers_err = 0;
1969 struct async_inflight_info *pkts_info = vq->async_pkts_info;
1971 *num_async_pkts -= nr_err;
1973 /* calculate the sum of buffers and descs of DMA-error packets. */
1974 while (nr_err-- > 0) {
1975 descs_err += pkts_info[slot_idx % vq->size].descs;
1976 buffers_err += pkts_info[slot_idx % vq->size].nr_buffers;
1980 vq->async_buffer_idx_packed -= buffers_err;
1982 if (vq->last_avail_idx >= descs_err) {
1983 vq->last_avail_idx -= descs_err;
1985 rte_memcpy(&vq->desc_packed[vq->last_avail_idx],
1986 &async_descs[async_descs_idx - descs_err],
1987 descs_err * sizeof(struct vring_packed_desc));
1991 vq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err;
1992 nr_copy = vq->size - vq->last_avail_idx;
1993 rte_memcpy(&vq->desc_packed[vq->last_avail_idx],
1994 &async_descs[async_descs_idx - descs_err],
1995 nr_copy * sizeof(struct vring_packed_desc));
1996 descs_err -= nr_copy;
1997 rte_memcpy(&vq->desc_packed[0], &async_descs[async_descs_idx - descs_err],
1998 descs_err * sizeof(struct vring_packed_desc));
1999 vq->avail_wrap_counter ^= 1;
2002 *num_done_pkts = *pkt_idx - *num_async_pkts;
2005 static __rte_noinline uint32_t
2006 virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
2007 struct vhost_virtqueue *vq, uint16_t queue_id,
2008 struct rte_mbuf **pkts, uint32_t count,
2009 struct rte_mbuf **comp_pkts, uint32_t *comp_count)
2011 uint32_t pkt_idx = 0, pkt_burst_idx = 0;
2012 uint32_t remained = count;
2013 uint16_t async_descs_idx = 0;
2014 uint16_t num_buffers;
2018 struct rte_vhost_iov_iter *it_pool = vq->it_pool;
2019 struct iovec *vec_pool = vq->vec_pool;
2020 struct rte_vhost_async_desc tdes[MAX_PKT_BURST];
2021 struct iovec *src_iovec = vec_pool;
2022 struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);
2023 uint16_t slot_idx = 0;
2024 uint16_t segs_await = 0;
2025 uint16_t iovec_idx = 0, it_idx = 0;
2026 struct async_inflight_info *pkts_info = vq->async_pkts_info;
2027 uint32_t n_pkts = 0, pkt_err = 0;
2028 uint32_t num_async_pkts = 0, num_done_pkts = 0;
2029 struct vring_packed_desc async_descs[vq->size];
2032 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
2033 if (remained >= PACKED_BATCH_SIZE) {
2034 if (!virtio_dev_rx_async_batch_packed(dev, vq,
2035 &pkts[pkt_idx], comp_pkts, &num_done_pkts)) {
2036 pkt_idx += PACKED_BATCH_SIZE;
2037 remained -= PACKED_BATCH_SIZE;
2044 if (unlikely(virtio_dev_rx_async_single_packed(dev, vq, pkts[pkt_idx],
2045 &num_descs, &num_buffers,
2046 &async_descs[async_descs_idx],
2047 &src_iovec[iovec_idx], &dst_iovec[iovec_idx],
2048 &it_pool[it_idx], &it_pool[it_idx + 1]) < 0))
2051 VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
2052 dev->vid, vq->last_avail_idx,
2053 vq->last_avail_idx + num_descs);
2055 slot_idx = (vq->async_pkts_idx + num_async_pkts) % vq->size;
2056 if (it_pool[it_idx].count) {
2059 async_descs_idx += num_descs;
2060 async_fill_desc(&tdes[pkt_burst_idx++],
2061 &it_pool[it_idx], &it_pool[it_idx + 1]);
2062 pkts_info[slot_idx].descs = num_descs;
2063 pkts_info[slot_idx].nr_buffers = num_buffers;
2064 pkts_info[slot_idx].mbuf = pkts[pkt_idx];
2066 iovec_idx += it_pool[it_idx].nr_segs;
2069 segs_await += it_pool[it_idx].nr_segs;
2072 * recover shadow used ring and keep DMA-occupied
2075 from = vq->shadow_used_idx - num_buffers;
2076 store_dma_desc_info_packed(vq->shadow_used_packed,
2077 vq->async_buffers_packed, vq->size, from,
2078 vq->async_buffer_idx_packed, num_buffers);
2080 vq->async_buffer_idx_packed += num_buffers;
2081 if (vq->async_buffer_idx_packed >= vq->size)
2082 vq->async_buffer_idx_packed -= vq->size;
2083 vq->shadow_used_idx -= num_buffers;
2085 comp_pkts[num_done_pkts++] = pkts[pkt_idx];
2090 vq_inc_last_avail_packed(vq, num_descs);
2093 * conditions to trigger async device transfer:
2094 * - buffered packet number reaches transfer threshold
2095 * - unused async iov number is less than max vhost vector
2097 if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
2098 ((VHOST_MAX_ASYNC_VEC >> 1) - segs_await < BUF_VECTOR_MAX))) {
2099 n_xfer = vq->async_ops.transfer_data(dev->vid,
2100 queue_id, tdes, 0, pkt_burst_idx);
2105 "(%d) %s: failed to transfer data for queue id %d.\n",
2106 dev->vid, __func__, queue_id);
2113 vq->async_pkts_inflight_n += n_pkts;
2115 if (unlikely(n_pkts < pkt_burst_idx)) {
2117 * log error packets number here and do actual
2118 * error processing when applications poll
2121 pkt_err = pkt_burst_idx - n_pkts;
2128 } while (pkt_idx < count);
2130 if (pkt_burst_idx) {
2131 n_xfer = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx);
2135 VHOST_LOG_DATA(ERR, "(%d) %s: failed to transfer data for queue id %d.\n",
2136 dev->vid, __func__, queue_id);
2140 vq->async_pkts_inflight_n += n_pkts;
2142 if (unlikely(n_pkts < pkt_burst_idx))
2143 pkt_err = pkt_burst_idx - n_pkts;
2146 do_data_copy_enqueue(dev, vq);
2148 if (unlikely(pkt_err))
2149 dma_error_handler_packed(vq, async_descs, async_descs_idx, slot_idx, pkt_err,
2150 &pkt_idx, &num_async_pkts, &num_done_pkts);
2151 vq->async_pkts_idx += num_async_pkts;
2152 if (vq->async_pkts_idx >= vq->size)
2153 vq->async_pkts_idx -= vq->size;
2154 *comp_count = num_done_pkts;
2156 if (likely(vq->shadow_used_idx)) {
2157 vhost_flush_enqueue_shadow_packed(dev, vq);
2158 vhost_vring_call_packed(dev, vq);
2164 static __rte_always_inline void
2165 write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs)
2167 uint16_t nr_left = n_descs;
2172 from = vq->last_async_desc_idx_split & (vq->size - 1);
2173 nr_copy = nr_left + from <= vq->size ? nr_left : vq->size - from;
2174 to = vq->last_used_idx & (vq->size - 1);
2176 if (to + nr_copy <= vq->size) {
2177 rte_memcpy(&vq->used->ring[to], &vq->async_descs_split[from],
2178 nr_copy * sizeof(struct vring_used_elem));
2180 uint16_t size = vq->size - to;
2182 rte_memcpy(&vq->used->ring[to], &vq->async_descs_split[from],
2183 size * sizeof(struct vring_used_elem));
2184 rte_memcpy(&vq->used->ring[0], &vq->async_descs_split[from + size],
2185 (nr_copy - size) * sizeof(struct vring_used_elem));
2188 vq->last_async_desc_idx_split += nr_copy;
2189 vq->last_used_idx += nr_copy;
2191 } while (nr_left > 0);
2194 static __rte_always_inline void
2195 write_back_completed_descs_packed(struct vhost_virtqueue *vq,
2198 uint16_t nr_left = n_buffers;
2202 from = vq->last_async_buffer_idx_packed;
2203 to = (from + nr_left) % vq->size;
2205 vhost_update_used_packed(vq, vq->async_buffers_packed + from, to - from);
2206 vq->last_async_buffer_idx_packed += nr_left;
2209 vhost_update_used_packed(vq, vq->async_buffers_packed + from,
2211 vq->last_async_buffer_idx_packed = 0;
2212 nr_left -= vq->size - from;
2214 } while (nr_left > 0);
2217 static __rte_always_inline uint16_t
2218 vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
2219 struct rte_mbuf **pkts, uint16_t count)
2221 struct vhost_virtqueue *vq;
2222 uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
2223 uint16_t start_idx, pkts_idx, vq_size;
2224 struct async_inflight_info *pkts_info;
2228 vq = dev->virtqueue[queue_id];
2230 pkts_idx = vq->async_pkts_idx % vq->size;
2231 pkts_info = vq->async_pkts_info;
2233 start_idx = virtio_dev_rx_async_get_info_idx(pkts_idx,
2234 vq_size, vq->async_pkts_inflight_n);
2236 if (count > vq->async_last_pkts_n) {
2237 n_cpl = vq->async_ops.check_completed_copies(dev->vid,
2238 queue_id, 0, count - vq->async_last_pkts_n);
2243 "(%d) %s: failed to check completed copies for queue id %d.\n",
2244 dev->vid, __func__, queue_id);
2248 n_pkts_cpl += vq->async_last_pkts_n;
2250 n_pkts_put = RTE_MIN(count, n_pkts_cpl);
2251 if (unlikely(n_pkts_put == 0)) {
2252 vq->async_last_pkts_n = n_pkts_cpl;
2256 if (vq_is_packed(dev)) {
2257 for (i = 0; i < n_pkts_put; i++) {
2258 from = (start_idx + i) % vq_size;
2259 n_buffers += pkts_info[from].nr_buffers;
2260 pkts[i] = pkts_info[from].mbuf;
2263 for (i = 0; i < n_pkts_put; i++) {
2264 from = (start_idx + i) & (vq_size - 1);
2265 n_descs += pkts_info[from].descs;
2266 pkts[i] = pkts_info[from].mbuf;
2270 vq->async_last_pkts_n = n_pkts_cpl - n_pkts_put;
2271 vq->async_pkts_inflight_n -= n_pkts_put;
2273 if (likely(vq->enabled && vq->access_ok)) {
2274 if (vq_is_packed(dev)) {
2275 write_back_completed_descs_packed(vq, n_buffers);
2277 vhost_vring_call_packed(dev, vq);
2279 write_back_completed_descs_split(vq, n_descs);
2281 __atomic_add_fetch(&vq->used->idx, n_descs,
2283 vhost_vring_call_split(dev, vq);
2286 if (vq_is_packed(dev)) {
2287 vq->last_async_buffer_idx_packed += n_buffers;
2288 if (vq->last_async_buffer_idx_packed >= vq->size)
2289 vq->last_async_buffer_idx_packed -= vq->size;
2291 vq->last_async_desc_idx_split += n_descs;
2299 rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
2300 struct rte_mbuf **pkts, uint16_t count)
2302 struct virtio_net *dev = get_device(vid);
2303 struct vhost_virtqueue *vq;
2304 uint16_t n_pkts_cpl = 0;
2309 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
2310 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
2311 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
2312 dev->vid, __func__, queue_id);
2316 vq = dev->virtqueue[queue_id];
2318 if (unlikely(!vq->async_registered)) {
2319 VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
2320 dev->vid, __func__, queue_id);
2324 rte_spinlock_lock(&vq->access_lock);
2326 n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
2328 rte_spinlock_unlock(&vq->access_lock);
2334 rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id,
2335 struct rte_mbuf **pkts, uint16_t count)
2337 struct virtio_net *dev = get_device(vid);
2338 struct vhost_virtqueue *vq;
2339 uint16_t n_pkts_cpl = 0;
2344 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
2345 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
2346 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
2347 dev->vid, __func__, queue_id);
2351 vq = dev->virtqueue[queue_id];
2353 if (unlikely(!vq->async_registered)) {
2354 VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
2355 dev->vid, __func__, queue_id);
2359 n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
2364 static __rte_always_inline uint32_t
2365 virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
2366 struct rte_mbuf **pkts, uint32_t count,
2367 struct rte_mbuf **comp_pkts, uint32_t *comp_count)
2369 struct vhost_virtqueue *vq;
2372 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
2373 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
2374 VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
2375 dev->vid, __func__, queue_id);
2379 vq = dev->virtqueue[queue_id];
2381 rte_spinlock_lock(&vq->access_lock);
2383 if (unlikely(!vq->enabled || !vq->async_registered))
2384 goto out_access_unlock;
2386 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
2387 vhost_user_iotlb_rd_lock(vq);
2389 if (unlikely(!vq->access_ok))
2390 if (unlikely(vring_translate(dev, vq) < 0))
2393 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
2397 if (vq_is_packed(dev))
2398 nb_tx = virtio_dev_rx_async_submit_packed(dev,
2399 vq, queue_id, pkts, count, comp_pkts,
2402 nb_tx = virtio_dev_rx_async_submit_split(dev,
2403 vq, queue_id, pkts, count, comp_pkts,
2407 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
2408 vhost_user_iotlb_rd_unlock(vq);
2411 rte_spinlock_unlock(&vq->access_lock);
2417 rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id,
2418 struct rte_mbuf **pkts, uint16_t count,
2419 struct rte_mbuf **comp_pkts, uint32_t *comp_count)
2421 struct virtio_net *dev = get_device(vid);
2427 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
2429 "(%d) %s: built-in vhost net backend is disabled.\n",
2430 dev->vid, __func__);
2434 return virtio_dev_rx_async_submit(dev, queue_id, pkts, count, comp_pkts,
2439 virtio_net_with_host_offload(struct virtio_net *dev)
2442 ((1ULL << VIRTIO_NET_F_CSUM) |
2443 (1ULL << VIRTIO_NET_F_HOST_ECN) |
2444 (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2445 (1ULL << VIRTIO_NET_F_HOST_TSO6) |
2446 (1ULL << VIRTIO_NET_F_HOST_UFO)))
2453 parse_headers(struct rte_mbuf *m, uint8_t *l4_proto)
2455 struct rte_ipv4_hdr *ipv4_hdr;
2456 struct rte_ipv6_hdr *ipv6_hdr;
2457 struct rte_ether_hdr *eth_hdr;
2459 uint16_t data_len = rte_pktmbuf_data_len(m);
2461 if (data_len < sizeof(struct rte_ether_hdr))
2464 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
2466 m->l2_len = sizeof(struct rte_ether_hdr);
2467 ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
2469 if (ethertype == RTE_ETHER_TYPE_VLAN) {
2470 if (data_len < sizeof(struct rte_ether_hdr) +
2471 sizeof(struct rte_vlan_hdr))
2474 struct rte_vlan_hdr *vlan_hdr =
2475 (struct rte_vlan_hdr *)(eth_hdr + 1);
2477 m->l2_len += sizeof(struct rte_vlan_hdr);
2478 ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto);
2481 switch (ethertype) {
2482 case RTE_ETHER_TYPE_IPV4:
2483 if (data_len < m->l2_len + sizeof(struct rte_ipv4_hdr))
2485 ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
2487 m->l3_len = rte_ipv4_hdr_len(ipv4_hdr);
2488 if (data_len < m->l2_len + m->l3_len)
2490 m->ol_flags |= PKT_TX_IPV4;
2491 *l4_proto = ipv4_hdr->next_proto_id;
2493 case RTE_ETHER_TYPE_IPV6:
2494 if (data_len < m->l2_len + sizeof(struct rte_ipv6_hdr))
2496 ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
2498 m->l3_len = sizeof(struct rte_ipv6_hdr);
2499 m->ol_flags |= PKT_TX_IPV6;
2500 *l4_proto = ipv6_hdr->proto;
2503 /* a valid L3 header is needed for further L4 parsing */
2507 /* both CSUM and GSO need a valid L4 header */
2508 switch (*l4_proto) {
2510 if (data_len < m->l2_len + m->l3_len +
2511 sizeof(struct rte_tcp_hdr))
2515 if (data_len < m->l2_len + m->l3_len +
2516 sizeof(struct rte_udp_hdr))
2520 if (data_len < m->l2_len + m->l3_len +
2521 sizeof(struct rte_sctp_hdr))
2537 static __rte_always_inline void
2538 vhost_dequeue_offload_legacy(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
2540 uint8_t l4_proto = 0;
2541 struct rte_tcp_hdr *tcp_hdr = NULL;
2543 uint16_t data_len = rte_pktmbuf_data_len(m);
2545 if (parse_headers(m, &l4_proto) < 0)
2548 if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2549 if (hdr->csum_start == (m->l2_len + m->l3_len)) {
2550 switch (hdr->csum_offset) {
2551 case (offsetof(struct rte_tcp_hdr, cksum)):
2552 if (l4_proto != IPPROTO_TCP)
2554 m->ol_flags |= PKT_TX_TCP_CKSUM;
2556 case (offsetof(struct rte_udp_hdr, dgram_cksum)):
2557 if (l4_proto != IPPROTO_UDP)
2559 m->ol_flags |= PKT_TX_UDP_CKSUM;
2561 case (offsetof(struct rte_sctp_hdr, cksum)):
2562 if (l4_proto != IPPROTO_SCTP)
2564 m->ol_flags |= PKT_TX_SCTP_CKSUM;
2574 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2575 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2576 case VIRTIO_NET_HDR_GSO_TCPV4:
2577 case VIRTIO_NET_HDR_GSO_TCPV6:
2578 if (l4_proto != IPPROTO_TCP)
2580 tcp_hdr = rte_pktmbuf_mtod_offset(m,
2581 struct rte_tcp_hdr *,
2582 m->l2_len + m->l3_len);
2583 tcp_len = (tcp_hdr->data_off & 0xf0) >> 2;
2584 if (data_len < m->l2_len + m->l3_len + tcp_len)
2586 m->ol_flags |= PKT_TX_TCP_SEG;
2587 m->tso_segsz = hdr->gso_size;
2588 m->l4_len = tcp_len;
2590 case VIRTIO_NET_HDR_GSO_UDP:
2591 if (l4_proto != IPPROTO_UDP)
2593 m->ol_flags |= PKT_TX_UDP_SEG;
2594 m->tso_segsz = hdr->gso_size;
2595 m->l4_len = sizeof(struct rte_udp_hdr);
2598 VHOST_LOG_DATA(WARNING,
2599 "unsupported gso type %u.\n", hdr->gso_type);
2611 static __rte_always_inline void
2612 vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m,
2613 bool legacy_ol_flags)
2615 struct rte_net_hdr_lens hdr_lens;
2616 int l4_supported = 0;
2619 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
2622 if (legacy_ol_flags) {
2623 vhost_dequeue_offload_legacy(hdr, m);
2627 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
2629 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
2630 m->packet_type = ptype;
2631 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
2632 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
2633 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
2636 /* According to Virtio 1.1 spec, the device only needs to look at
2637 * VIRTIO_NET_HDR_F_NEEDS_CSUM in the packet transmission path.
2638 * This differs from the processing incoming packets path where the
2639 * driver could rely on VIRTIO_NET_HDR_F_DATA_VALID flag set by the
2642 * 5.1.6.2.1 Driver Requirements: Packet Transmission
2643 * The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID and
2644 * VIRTIO_NET_HDR_F_RSC_INFO bits in flags.
2646 * 5.1.6.2.2 Device Requirements: Packet Transmission
2647 * The device MUST ignore flag bits that it does not recognize.
2649 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2652 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
2653 if (hdr->csum_start <= hdrlen && l4_supported != 0) {
2654 m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
2656 /* Unknown proto or tunnel, do sw cksum. We can assume
2657 * the cksum field is in the first segment since the
2658 * buffers we provided to the host are large enough.
2659 * In case of SCTP, this will be wrong since it's a CRC
2660 * but there's nothing we can do.
2662 uint16_t csum = 0, off;
2664 if (rte_raw_cksum_mbuf(m, hdr->csum_start,
2665 rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0)
2667 if (likely(csum != 0xffff))
2669 off = hdr->csum_offset + hdr->csum_start;
2670 if (rte_pktmbuf_data_len(m) >= off + 1)
2671 *rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum;
2675 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2676 if (hdr->gso_size == 0)
2679 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2680 case VIRTIO_NET_HDR_GSO_TCPV4:
2681 case VIRTIO_NET_HDR_GSO_TCPV6:
2682 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP)
2684 m->ol_flags |= PKT_RX_LRO | PKT_RX_L4_CKSUM_NONE;
2685 m->tso_segsz = hdr->gso_size;
2687 case VIRTIO_NET_HDR_GSO_UDP:
2688 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP)
2690 m->ol_flags |= PKT_RX_LRO | PKT_RX_L4_CKSUM_NONE;
2691 m->tso_segsz = hdr->gso_size;
2699 static __rte_noinline void
2700 copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
2701 struct buf_vector *buf_vec)
2704 uint64_t remain = sizeof(struct virtio_net_hdr);
2706 uint64_t dst = (uint64_t)(uintptr_t)hdr;
2709 len = RTE_MIN(remain, buf_vec->buf_len);
2710 src = buf_vec->buf_addr;
2711 rte_memcpy((void *)(uintptr_t)dst,
2712 (void *)(uintptr_t)src, len);
2720 static __rte_always_inline int
2721 copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
2722 struct buf_vector *buf_vec, uint16_t nr_vec,
2723 struct rte_mbuf *m, struct rte_mempool *mbuf_pool,
2724 bool legacy_ol_flags)
2726 uint32_t buf_avail, buf_offset;
2727 uint64_t buf_addr, buf_len;
2728 uint32_t mbuf_avail, mbuf_offset;
2730 struct rte_mbuf *cur = m, *prev = m;
2731 struct virtio_net_hdr tmp_hdr;
2732 struct virtio_net_hdr *hdr = NULL;
2733 /* A counter to avoid desc dead loop chain */
2734 uint16_t vec_idx = 0;
2735 struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
2738 buf_addr = buf_vec[vec_idx].buf_addr;
2739 buf_len = buf_vec[vec_idx].buf_len;
2741 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
2746 if (virtio_net_with_host_offload(dev)) {
2747 if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
2749 * No luck, the virtio-net header doesn't fit
2750 * in a contiguous virtual area.
2752 copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
2755 hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
2760 * A virtio driver normally uses at least 2 desc buffers
2761 * for Tx: the first for storing the header, and others
2762 * for storing the data.
2764 if (unlikely(buf_len < dev->vhost_hlen)) {
2765 buf_offset = dev->vhost_hlen - buf_len;
2767 buf_addr = buf_vec[vec_idx].buf_addr;
2768 buf_len = buf_vec[vec_idx].buf_len;
2769 buf_avail = buf_len - buf_offset;
2770 } else if (buf_len == dev->vhost_hlen) {
2771 if (unlikely(++vec_idx >= nr_vec))
2773 buf_addr = buf_vec[vec_idx].buf_addr;
2774 buf_len = buf_vec[vec_idx].buf_len;
2777 buf_avail = buf_len;
2779 buf_offset = dev->vhost_hlen;
2780 buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
2784 (uintptr_t)(buf_addr + buf_offset),
2785 (uint32_t)buf_avail, 0);
2788 mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
2790 cpy_len = RTE_MIN(buf_avail, mbuf_avail);
2792 if (likely(cpy_len > MAX_BATCH_LEN ||
2793 vq->batch_copy_nb_elems >= vq->size ||
2794 (hdr && cur == m))) {
2795 rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
2797 (void *)((uintptr_t)(buf_addr +
2798 buf_offset)), cpy_len);
2800 batch_copy[vq->batch_copy_nb_elems].dst =
2801 rte_pktmbuf_mtod_offset(cur, void *,
2803 batch_copy[vq->batch_copy_nb_elems].src =
2804 (void *)((uintptr_t)(buf_addr + buf_offset));
2805 batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
2806 vq->batch_copy_nb_elems++;
2809 mbuf_avail -= cpy_len;
2810 mbuf_offset += cpy_len;
2811 buf_avail -= cpy_len;
2812 buf_offset += cpy_len;
2814 /* This buf reaches to its end, get the next one */
2815 if (buf_avail == 0) {
2816 if (++vec_idx >= nr_vec)
2819 buf_addr = buf_vec[vec_idx].buf_addr;
2820 buf_len = buf_vec[vec_idx].buf_len;
2823 buf_avail = buf_len;
2825 PRINT_PACKET(dev, (uintptr_t)buf_addr,
2826 (uint32_t)buf_avail, 0);
2830 * This mbuf reaches to its end, get a new one
2831 * to hold more data.
2833 if (mbuf_avail == 0) {
2834 cur = rte_pktmbuf_alloc(mbuf_pool);
2835 if (unlikely(cur == NULL)) {
2836 VHOST_LOG_DATA(ERR, "Failed to "
2837 "allocate memory for mbuf.\n");
2843 prev->data_len = mbuf_offset;
2845 m->pkt_len += mbuf_offset;
2849 mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
2853 prev->data_len = mbuf_offset;
2854 m->pkt_len += mbuf_offset;
2857 vhost_dequeue_offload(hdr, m, legacy_ol_flags);
2865 virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque)
2871 virtio_dev_extbuf_alloc(struct rte_mbuf *pkt, uint32_t size)
2873 struct rte_mbuf_ext_shared_info *shinfo = NULL;
2874 uint32_t total_len = RTE_PKTMBUF_HEADROOM + size;
2879 total_len += sizeof(*shinfo) + sizeof(uintptr_t);
2880 total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t));
2882 if (unlikely(total_len > UINT16_MAX))
2885 buf_len = total_len;
2886 buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE);
2887 if (unlikely(buf == NULL))
2890 /* Initialize shinfo */
2891 shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
2892 virtio_dev_extbuf_free, buf);
2893 if (unlikely(shinfo == NULL)) {
2895 VHOST_LOG_DATA(ERR, "Failed to init shinfo\n");
2899 iova = rte_malloc_virt2iova(buf);
2900 rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo);
2901 rte_pktmbuf_reset_headroom(pkt);
2907 * Prepare a host supported pktmbuf.
2909 static __rte_always_inline int
2910 virtio_dev_pktmbuf_prep(struct virtio_net *dev, struct rte_mbuf *pkt,
2913 if (rte_pktmbuf_tailroom(pkt) >= data_len)
2916 /* attach an external buffer if supported */
2917 if (dev->extbuf && !virtio_dev_extbuf_alloc(pkt, data_len))
2920 /* check if chained buffers are allowed */
2921 if (!dev->linearbuf)
2929 virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
2930 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
2931 bool legacy_ol_flags)
2934 uint16_t free_entries;
2935 uint16_t dropped = 0;
2936 static bool allocerr_warned;
2939 * The ordering between avail index and
2940 * desc reads needs to be enforced.
2942 free_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) -
2944 if (free_entries == 0)
2947 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
2949 VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
2951 count = RTE_MIN(count, MAX_PKT_BURST);
2952 count = RTE_MIN(count, free_entries);
2953 VHOST_LOG_DATA(DEBUG, "(%d) about to dequeue %u buffers\n",
2956 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count))
2959 for (i = 0; i < count; i++) {
2960 struct buf_vector buf_vec[BUF_VECTOR_MAX];
2963 uint16_t nr_vec = 0;
2966 if (unlikely(fill_vec_buf_split(dev, vq,
2967 vq->last_avail_idx + i,
2969 &head_idx, &buf_len,
2970 VHOST_ACCESS_RO) < 0))
2973 update_shadow_used_ring_split(vq, head_idx, 0);
2975 err = virtio_dev_pktmbuf_prep(dev, pkts[i], buf_len);
2976 if (unlikely(err)) {
2978 * mbuf allocation fails for jumbo packets when external
2979 * buffer allocation is not allowed and linear buffer
2980 * is required. Drop this packet.
2982 if (!allocerr_warned) {
2984 "Failed mbuf alloc of size %d from %s on %s.\n",
2985 buf_len, mbuf_pool->name, dev->ifname);
2986 allocerr_warned = true;
2993 err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
2994 mbuf_pool, legacy_ol_flags);
2995 if (unlikely(err)) {
2996 if (!allocerr_warned) {
2998 "Failed to copy desc to mbuf on %s.\n",
3000 allocerr_warned = true;
3009 rte_pktmbuf_free_bulk(&pkts[i - 1], count - i + 1);
3011 vq->last_avail_idx += i;
3013 do_data_copy_dequeue(vq);
3014 if (unlikely(i < count))
3015 vq->shadow_used_idx = i;
3016 if (likely(vq->shadow_used_idx)) {
3017 flush_shadow_used_ring_split(dev, vq);
3018 vhost_vring_call_split(dev, vq);
3021 return (i - dropped);
3026 virtio_dev_tx_split_legacy(struct virtio_net *dev,
3027 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
3028 struct rte_mbuf **pkts, uint16_t count)
3030 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true);
3035 virtio_dev_tx_split_compliant(struct virtio_net *dev,
3036 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
3037 struct rte_mbuf **pkts, uint16_t count)
3039 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false);
3042 static __rte_always_inline int
3043 vhost_reserve_avail_batch_packed(struct virtio_net *dev,
3044 struct vhost_virtqueue *vq,
3045 struct rte_mbuf **pkts,
3047 uintptr_t *desc_addrs,
3050 bool wrap = vq->avail_wrap_counter;
3051 struct vring_packed_desc *descs = vq->desc_packed;
3052 uint64_t lens[PACKED_BATCH_SIZE];
3053 uint64_t buf_lens[PACKED_BATCH_SIZE];
3054 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
3057 if (unlikely(avail_idx & PACKED_BATCH_MASK))
3059 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
3062 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3063 flags = descs[avail_idx + i].flags;
3064 if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) ||
3065 (wrap == !!(flags & VRING_DESC_F_USED)) ||
3066 (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG)))
3070 rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
3072 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
3073 lens[i] = descs[avail_idx + i].len;
3075 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3076 desc_addrs[i] = vhost_iova_to_vva(dev, vq,
3077 descs[avail_idx + i].addr,
3078 &lens[i], VHOST_ACCESS_RW);
3081 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3082 if (unlikely(!desc_addrs[i]))
3084 if (unlikely((lens[i] != descs[avail_idx + i].len)))
3088 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3089 if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i]))
3093 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
3094 buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off;
3096 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3097 if (unlikely(buf_lens[i] < (lens[i] - buf_offset)))
3101 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3102 pkts[i]->pkt_len = lens[i] - buf_offset;
3103 pkts[i]->data_len = pkts[i]->pkt_len;
3104 ids[i] = descs[avail_idx + i].id;
3113 static __rte_always_inline int
3114 virtio_dev_tx_batch_packed(struct virtio_net *dev,
3115 struct vhost_virtqueue *vq,
3116 struct rte_mbuf **pkts,
3117 bool legacy_ol_flags)
3119 uint16_t avail_idx = vq->last_avail_idx;
3120 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
3121 struct virtio_net_hdr *hdr;
3122 uintptr_t desc_addrs[PACKED_BATCH_SIZE];
3123 uint16_t ids[PACKED_BATCH_SIZE];
3126 if (vhost_reserve_avail_batch_packed(dev, vq, pkts, avail_idx,
3130 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
3131 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
3133 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
3134 rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0),
3135 (void *)(uintptr_t)(desc_addrs[i] + buf_offset),
3138 if (virtio_net_with_host_offload(dev)) {
3139 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3140 hdr = (struct virtio_net_hdr *)(desc_addrs[i]);
3141 vhost_dequeue_offload(hdr, pkts[i], legacy_ol_flags);
3145 if (virtio_net_is_inorder(dev))
3146 vhost_shadow_dequeue_batch_packed_inorder(vq,
3147 ids[PACKED_BATCH_SIZE - 1]);
3149 vhost_shadow_dequeue_batch_packed(dev, vq, ids);
3151 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
3156 static __rte_always_inline int
3157 vhost_dequeue_single_packed(struct virtio_net *dev,
3158 struct vhost_virtqueue *vq,
3159 struct rte_mempool *mbuf_pool,
3160 struct rte_mbuf *pkts,
3162 uint16_t *desc_count,
3163 bool legacy_ol_flags)
3165 struct buf_vector buf_vec[BUF_VECTOR_MAX];
3167 uint16_t nr_vec = 0;
3169 static bool allocerr_warned;
3171 if (unlikely(fill_vec_buf_packed(dev, vq,
3172 vq->last_avail_idx, desc_count,
3175 VHOST_ACCESS_RO) < 0))
3178 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) {
3179 if (!allocerr_warned) {
3181 "Failed mbuf alloc of size %d from %s on %s.\n",
3182 buf_len, mbuf_pool->name, dev->ifname);
3183 allocerr_warned = true;
3188 err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts,
3189 mbuf_pool, legacy_ol_flags);
3190 if (unlikely(err)) {
3191 if (!allocerr_warned) {
3193 "Failed to copy desc to mbuf on %s.\n",
3195 allocerr_warned = true;
3203 static __rte_always_inline int
3204 virtio_dev_tx_single_packed(struct virtio_net *dev,
3205 struct vhost_virtqueue *vq,
3206 struct rte_mempool *mbuf_pool,
3207 struct rte_mbuf *pkts,
3208 bool legacy_ol_flags)
3211 uint16_t buf_id, desc_count = 0;
3214 ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id,
3215 &desc_count, legacy_ol_flags);
3217 if (likely(desc_count > 0)) {
3218 if (virtio_net_is_inorder(dev))
3219 vhost_shadow_dequeue_single_packed_inorder(vq, buf_id,
3222 vhost_shadow_dequeue_single_packed(vq, buf_id,
3225 vq_inc_last_avail_packed(vq, desc_count);
3233 virtio_dev_tx_packed(struct virtio_net *dev,
3234 struct vhost_virtqueue *__rte_restrict vq,
3235 struct rte_mempool *mbuf_pool,
3236 struct rte_mbuf **__rte_restrict pkts,
3238 bool legacy_ol_flags)
3240 uint32_t pkt_idx = 0;
3242 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count))
3246 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
3248 if (count - pkt_idx >= PACKED_BATCH_SIZE) {
3249 if (!virtio_dev_tx_batch_packed(dev, vq,
3252 pkt_idx += PACKED_BATCH_SIZE;
3257 if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool,
3262 } while (pkt_idx < count);
3264 if (pkt_idx != count)
3265 rte_pktmbuf_free_bulk(&pkts[pkt_idx], count - pkt_idx);
3267 if (vq->shadow_used_idx) {
3268 do_data_copy_dequeue(vq);
3270 vhost_flush_dequeue_shadow_packed(dev, vq);
3271 vhost_vring_call_packed(dev, vq);
3279 virtio_dev_tx_packed_legacy(struct virtio_net *dev,
3280 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
3281 struct rte_mbuf **__rte_restrict pkts, uint32_t count)
3283 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true);
3288 virtio_dev_tx_packed_compliant(struct virtio_net *dev,
3289 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
3290 struct rte_mbuf **__rte_restrict pkts, uint32_t count)
3292 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false);
3296 rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
3297 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
3299 struct virtio_net *dev;
3300 struct rte_mbuf *rarp_mbuf = NULL;
3301 struct vhost_virtqueue *vq;
3302 int16_t success = 1;
3304 dev = get_device(vid);
3308 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
3310 "(%d) %s: built-in vhost net backend is disabled.\n",
3311 dev->vid, __func__);
3315 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
3317 "(%d) %s: invalid virtqueue idx %d.\n",
3318 dev->vid, __func__, queue_id);
3322 vq = dev->virtqueue[queue_id];
3324 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
3327 if (unlikely(!vq->enabled)) {
3329 goto out_access_unlock;
3332 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
3333 vhost_user_iotlb_rd_lock(vq);
3335 if (unlikely(!vq->access_ok))
3336 if (unlikely(vring_translate(dev, vq) < 0)) {
3342 * Construct a RARP broadcast packet, and inject it to the "pkts"
3343 * array, to looks like that guest actually send such packet.
3345 * Check user_send_rarp() for more information.
3347 * broadcast_rarp shares a cacheline in the virtio_net structure
3348 * with some fields that are accessed during enqueue and
3349 * __atomic_compare_exchange_n causes a write if performed compare
3350 * and exchange. This could result in false sharing between enqueue
3353 * Prevent unnecessary false sharing by reading broadcast_rarp first
3354 * and only performing compare and exchange if the read indicates it
3355 * is likely to be set.
3357 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) &&
3358 __atomic_compare_exchange_n(&dev->broadcast_rarp,
3359 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) {
3361 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
3362 if (rarp_mbuf == NULL) {
3363 VHOST_LOG_DATA(ERR, "Failed to make RARP packet.\n");
3370 if (vq_is_packed(dev)) {
3371 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
3372 count = virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool, pkts, count);
3374 count = virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool, pkts, count);
3376 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
3377 count = virtio_dev_tx_split_legacy(dev, vq, mbuf_pool, pkts, count);
3379 count = virtio_dev_tx_split_compliant(dev, vq, mbuf_pool, pkts, count);
3383 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
3384 vhost_user_iotlb_rd_unlock(vq);
3387 rte_spinlock_unlock(&vq->access_lock);
3389 if (unlikely(rarp_mbuf != NULL)) {
3391 * Inject it to the head of "pkts" array, so that switch's mac
3392 * learning table will get updated first.
3394 memmove(&pkts[1], pkts, count * sizeof(struct rte_mbuf *));
3395 pkts[0] = rarp_mbuf;