1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
39 #define VIRTIO_DUMP_PACKET(m, len) do { } while (0)
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
45 struct virtnet_rx *rxvq = rxq;
46 struct virtqueue *vq = rxvq->vq;
48 return VIRTQUEUE_NUSED(vq) >= offset;
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
54 vq->vq_free_cnt += num;
55 vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
61 struct vring_desc *dp, *dp_tail;
62 struct vq_desc_extra *dxp;
63 uint16_t desc_idx_last = desc_idx;
65 dp = &vq->vq_ring.desc[desc_idx];
66 dxp = &vq->vq_descx[desc_idx];
67 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68 if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69 while (dp->flags & VRING_DESC_F_NEXT) {
70 desc_idx_last = dp->next;
71 dp = &vq->vq_ring.desc[dp->next];
77 * We must append the existing free chain, if any, to the end of
78 * newly freed chain. If the virtqueue was completely used, then
79 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
81 if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82 vq->vq_desc_head_idx = desc_idx;
84 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
85 dp_tail->next = desc_idx;
88 vq->vq_desc_tail_idx = desc_idx_last;
89 dp->next = VQ_RING_DESC_CHAIN_END;
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
95 struct vq_desc_extra *dxp;
97 dxp = &vq->vq_descx[id];
98 vq->vq_free_cnt += dxp->ndescs;
100 if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101 vq->vq_desc_head_idx = id;
103 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
105 vq->vq_desc_tail_idx = id;
106 dxp->next = VQ_RING_DESC_CHAIN_END;
110 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
111 struct rte_mbuf **rx_pkts,
115 struct rte_mbuf *cookie;
118 struct vring_packed_desc *desc;
121 desc = vq->ring_packed.desc_packed;
123 for (i = 0; i < num; i++) {
124 used_idx = vq->vq_used_cons_idx;
125 if (!desc_is_used(&desc[used_idx], vq))
127 virtio_rmb(vq->hw->weak_barriers);
128 len[i] = desc[used_idx].len;
129 id = desc[used_idx].id;
130 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
131 if (unlikely(cookie == NULL)) {
132 PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
133 vq->vq_used_cons_idx);
136 rte_prefetch0(cookie);
137 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
141 vq->vq_used_cons_idx++;
142 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
143 vq->vq_used_cons_idx -= vq->vq_nentries;
144 vq->used_wrap_counter ^= 1;
152 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
153 uint32_t *len, uint16_t num)
155 struct vring_used_elem *uep;
156 struct rte_mbuf *cookie;
157 uint16_t used_idx, desc_idx;
160 /* Caller does the check */
161 for (i = 0; i < num ; i++) {
162 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
163 uep = &vq->vq_ring.used->ring[used_idx];
164 desc_idx = (uint16_t) uep->id;
166 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
168 if (unlikely(cookie == NULL)) {
169 PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
170 vq->vq_used_cons_idx);
174 rte_prefetch0(cookie);
175 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
177 vq->vq_used_cons_idx++;
178 vq_ring_free_chain(vq, desc_idx);
179 vq->vq_descx[desc_idx].cookie = NULL;
186 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
187 struct rte_mbuf **rx_pkts,
191 struct vring_used_elem *uep;
192 struct rte_mbuf *cookie;
193 uint16_t used_idx = 0;
196 if (unlikely(num == 0))
199 for (i = 0; i < num; i++) {
200 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
201 /* Desc idx same as used idx */
202 uep = &vq->vq_ring.used->ring[used_idx];
204 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
206 if (unlikely(cookie == NULL)) {
207 PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
208 vq->vq_used_cons_idx);
212 rte_prefetch0(cookie);
213 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
215 vq->vq_used_cons_idx++;
216 vq->vq_descx[used_idx].cookie = NULL;
219 vq_ring_free_inorder(vq, used_idx, i);
223 #ifndef DEFAULT_TX_FREE_THRESH
224 #define DEFAULT_TX_FREE_THRESH 32
228 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
230 uint16_t used_idx, id, curr_id, free_cnt = 0;
231 uint16_t size = vq->vq_nentries;
232 struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
233 struct vq_desc_extra *dxp;
235 used_idx = vq->vq_used_cons_idx;
236 while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
237 virtio_rmb(vq->hw->weak_barriers);
238 id = desc[used_idx].id;
241 dxp = &vq->vq_descx[used_idx];
242 used_idx += dxp->ndescs;
243 free_cnt += dxp->ndescs;
245 if (used_idx >= size) {
247 vq->used_wrap_counter ^= 1;
249 if (dxp->cookie != NULL) {
250 rte_pktmbuf_free(dxp->cookie);
253 } while (curr_id != id);
255 vq->vq_used_cons_idx = used_idx;
256 vq->vq_free_cnt += free_cnt;
260 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
262 uint16_t used_idx, id;
263 uint16_t size = vq->vq_nentries;
264 struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
265 struct vq_desc_extra *dxp;
267 used_idx = vq->vq_used_cons_idx;
268 while (num-- && desc_is_used(&desc[used_idx], vq)) {
269 virtio_rmb(vq->hw->weak_barriers);
270 id = desc[used_idx].id;
271 dxp = &vq->vq_descx[id];
272 vq->vq_used_cons_idx += dxp->ndescs;
273 if (vq->vq_used_cons_idx >= size) {
274 vq->vq_used_cons_idx -= size;
275 vq->used_wrap_counter ^= 1;
277 vq_ring_free_id_packed(vq, id);
278 if (dxp->cookie != NULL) {
279 rte_pktmbuf_free(dxp->cookie);
282 used_idx = vq->vq_used_cons_idx;
286 /* Cleanup from completed transmits. */
288 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
291 virtio_xmit_cleanup_inorder_packed(vq, num);
293 virtio_xmit_cleanup_normal_packed(vq, num);
297 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
299 uint16_t i, used_idx, desc_idx;
300 for (i = 0; i < num; i++) {
301 struct vring_used_elem *uep;
302 struct vq_desc_extra *dxp;
304 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
305 uep = &vq->vq_ring.used->ring[used_idx];
307 desc_idx = (uint16_t) uep->id;
308 dxp = &vq->vq_descx[desc_idx];
309 vq->vq_used_cons_idx++;
310 vq_ring_free_chain(vq, desc_idx);
312 if (dxp->cookie != NULL) {
313 rte_pktmbuf_free(dxp->cookie);
319 /* Cleanup from completed inorder transmits. */
321 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
323 uint16_t i, idx = vq->vq_used_cons_idx;
324 int16_t free_cnt = 0;
325 struct vq_desc_extra *dxp = NULL;
327 if (unlikely(num == 0))
330 for (i = 0; i < num; i++) {
331 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
332 free_cnt += dxp->ndescs;
333 if (dxp->cookie != NULL) {
334 rte_pktmbuf_free(dxp->cookie);
339 vq->vq_free_cnt += free_cnt;
340 vq->vq_used_cons_idx = idx;
344 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
345 struct rte_mbuf **cookies,
348 struct vq_desc_extra *dxp;
349 struct virtio_hw *hw = vq->hw;
350 struct vring_desc *start_dp;
351 uint16_t head_idx, idx, i = 0;
353 if (unlikely(vq->vq_free_cnt == 0))
355 if (unlikely(vq->vq_free_cnt < num))
358 head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
359 start_dp = vq->vq_ring.desc;
362 idx = head_idx & (vq->vq_nentries - 1);
363 dxp = &vq->vq_descx[idx];
364 dxp->cookie = (void *)cookies[i];
368 VIRTIO_MBUF_ADDR(cookies[i], vq) +
369 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
371 cookies[i]->buf_len -
372 RTE_PKTMBUF_HEADROOM +
374 start_dp[idx].flags = VRING_DESC_F_WRITE;
376 vq_update_avail_ring(vq, idx);
381 vq->vq_desc_head_idx += num;
382 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
387 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
390 struct vq_desc_extra *dxp;
391 struct virtio_hw *hw = vq->hw;
392 struct vring_desc *start_dp = vq->vq_ring.desc;
395 if (unlikely(vq->vq_free_cnt == 0))
397 if (unlikely(vq->vq_free_cnt < num))
400 if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
403 for (i = 0; i < num; i++) {
404 idx = vq->vq_desc_head_idx;
405 dxp = &vq->vq_descx[idx];
406 dxp->cookie = (void *)cookie[i];
410 VIRTIO_MBUF_ADDR(cookie[i], vq) +
411 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
413 cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
415 start_dp[idx].flags = VRING_DESC_F_WRITE;
416 vq->vq_desc_head_idx = start_dp[idx].next;
417 vq_update_avail_ring(vq, idx);
418 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
419 vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
424 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
430 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
431 struct rte_mbuf **cookie, uint16_t num)
433 struct vring_packed_desc *start_dp = vq->ring_packed.desc_packed;
434 uint16_t flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
435 struct virtio_hw *hw = vq->hw;
436 struct vq_desc_extra *dxp;
440 if (unlikely(vq->vq_free_cnt == 0))
442 if (unlikely(vq->vq_free_cnt < num))
445 for (i = 0; i < num; i++) {
446 idx = vq->vq_avail_idx;
447 dxp = &vq->vq_descx[idx];
448 dxp->cookie = (void *)cookie[i];
451 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
452 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
453 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
454 + hw->vtnet_hdr_size;
456 vq->vq_desc_head_idx = dxp->next;
457 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
458 vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
459 virtio_wmb(hw->weak_barriers);
460 start_dp[idx].flags = flags;
461 if (++vq->vq_avail_idx >= vq->vq_nentries) {
462 vq->vq_avail_idx -= vq->vq_nentries;
463 vq->avail_wrap_counter ^= 1;
464 vq->avail_used_flags =
465 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
466 VRING_DESC_F_USED(!vq->avail_wrap_counter);
467 flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
470 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
474 /* When doing TSO, the IP length is not included in the pseudo header
475 * checksum of the packet given to the PMD, but for virtio it is
479 virtio_tso_fix_cksum(struct rte_mbuf *m)
481 /* common case: header is not fragmented */
482 if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
484 struct ipv4_hdr *iph;
485 struct ipv6_hdr *ip6h;
487 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
490 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
491 th = RTE_PTR_ADD(iph, m->l3_len);
492 if ((iph->version_ihl >> 4) == 4) {
493 iph->hdr_checksum = 0;
494 iph->hdr_checksum = rte_ipv4_cksum(iph);
495 ip_len = iph->total_length;
496 ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
499 ip6h = (struct ipv6_hdr *)iph;
500 ip_paylen = ip6h->payload_len;
503 /* calculate the new phdr checksum not including ip_paylen */
504 prev_cksum = th->cksum;
507 tmp = (tmp & 0xffff) + (tmp >> 16);
510 /* replace it in the packet */
511 th->cksum = new_cksum;
516 /* avoid write operation when necessary, to lessen cache issues */
517 #define ASSIGN_UNLESS_EQUAL(var, val) do { \
518 if ((var) != (val)) \
522 #define virtqueue_clear_net_hdr(_hdr) do { \
523 ASSIGN_UNLESS_EQUAL((_hdr)->csum_start, 0); \
524 ASSIGN_UNLESS_EQUAL((_hdr)->csum_offset, 0); \
525 ASSIGN_UNLESS_EQUAL((_hdr)->flags, 0); \
526 ASSIGN_UNLESS_EQUAL((_hdr)->gso_type, 0); \
527 ASSIGN_UNLESS_EQUAL((_hdr)->gso_size, 0); \
528 ASSIGN_UNLESS_EQUAL((_hdr)->hdr_len, 0); \
532 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
533 struct rte_mbuf *cookie,
537 if (cookie->ol_flags & PKT_TX_TCP_SEG)
538 cookie->ol_flags |= PKT_TX_TCP_CKSUM;
540 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
541 case PKT_TX_UDP_CKSUM:
542 hdr->csum_start = cookie->l2_len + cookie->l3_len;
543 hdr->csum_offset = offsetof(struct udp_hdr,
545 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
548 case PKT_TX_TCP_CKSUM:
549 hdr->csum_start = cookie->l2_len + cookie->l3_len;
550 hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
551 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
555 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
556 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
557 ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
561 /* TCP Segmentation Offload */
562 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
563 virtio_tso_fix_cksum(cookie);
564 hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
565 VIRTIO_NET_HDR_GSO_TCPV6 :
566 VIRTIO_NET_HDR_GSO_TCPV4;
567 hdr->gso_size = cookie->tso_segsz;
573 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
574 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
575 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
581 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
582 struct rte_mbuf **cookies,
585 struct vq_desc_extra *dxp;
586 struct virtqueue *vq = txvq->vq;
587 struct vring_desc *start_dp;
588 struct virtio_net_hdr *hdr;
590 uint16_t head_size = vq->hw->vtnet_hdr_size;
593 idx = vq->vq_desc_head_idx;
594 start_dp = vq->vq_ring.desc;
597 idx = idx & (vq->vq_nentries - 1);
598 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
599 dxp->cookie = (void *)cookies[i];
602 hdr = (struct virtio_net_hdr *)
603 rte_pktmbuf_prepend(cookies[i], head_size);
604 cookies[i]->pkt_len -= head_size;
606 /* if offload disabled, hdr is not zeroed yet, do it now */
607 if (!vq->hw->has_tx_offload)
608 virtqueue_clear_net_hdr(hdr);
610 virtqueue_xmit_offload(hdr, cookies[i], true);
612 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
613 start_dp[idx].len = cookies[i]->data_len;
614 start_dp[idx].flags = 0;
616 vq_update_avail_ring(vq, idx);
622 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
623 vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
627 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
628 uint16_t needed, int can_push, int in_order)
630 struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
631 struct vq_desc_extra *dxp;
632 struct virtqueue *vq = txvq->vq;
633 struct vring_packed_desc *start_dp, *head_dp;
634 uint16_t idx, id, head_idx, head_flags;
635 uint16_t head_size = vq->hw->vtnet_hdr_size;
636 struct virtio_net_hdr *hdr;
639 id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
641 dxp = &vq->vq_descx[id];
642 dxp->ndescs = needed;
643 dxp->cookie = cookie;
645 head_idx = vq->vq_avail_idx;
648 start_dp = vq->ring_packed.desc_packed;
650 head_dp = &vq->ring_packed.desc_packed[idx];
651 head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
652 head_flags |= vq->avail_used_flags;
655 /* prepend cannot fail, checked by caller */
656 hdr = (struct virtio_net_hdr *)
657 rte_pktmbuf_prepend(cookie, head_size);
658 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
659 * which is wrong. Below subtract restores correct pkt size.
661 cookie->pkt_len -= head_size;
663 /* if offload disabled, it is not zeroed below, do it now */
664 if (!vq->hw->has_tx_offload)
665 virtqueue_clear_net_hdr(hdr);
667 /* setup first tx ring slot to point to header
668 * stored in reserved region.
670 start_dp[idx].addr = txvq->virtio_net_hdr_mem +
671 RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
672 start_dp[idx].len = vq->hw->vtnet_hdr_size;
673 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
675 if (idx >= vq->vq_nentries) {
676 idx -= vq->vq_nentries;
677 vq->avail_wrap_counter ^= 1;
678 vq->avail_used_flags =
679 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
680 VRING_DESC_F_USED(!vq->avail_wrap_counter);
684 virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
689 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
690 start_dp[idx].len = cookie->data_len;
691 if (likely(idx != head_idx)) {
692 flags = cookie->next ? VRING_DESC_F_NEXT : 0;
693 flags |= vq->avail_used_flags;
694 start_dp[idx].flags = flags;
698 if (idx >= vq->vq_nentries) {
699 idx -= vq->vq_nentries;
700 vq->avail_wrap_counter ^= 1;
701 vq->avail_used_flags =
702 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
703 VRING_DESC_F_USED(!vq->avail_wrap_counter);
705 } while ((cookie = cookie->next) != NULL);
707 start_dp[prev].id = id;
709 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
710 vq->vq_avail_idx = idx;
713 vq->vq_desc_head_idx = dxp->next;
714 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
715 vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
718 virtio_wmb(vq->hw->weak_barriers);
719 head_dp->flags = head_flags;
723 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
724 uint16_t needed, int use_indirect, int can_push,
727 struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
728 struct vq_desc_extra *dxp;
729 struct virtqueue *vq = txvq->vq;
730 struct vring_desc *start_dp;
731 uint16_t seg_num = cookie->nb_segs;
732 uint16_t head_idx, idx;
733 uint16_t head_size = vq->hw->vtnet_hdr_size;
734 struct virtio_net_hdr *hdr;
736 head_idx = vq->vq_desc_head_idx;
739 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
741 dxp = &vq->vq_descx[idx];
742 dxp->cookie = (void *)cookie;
743 dxp->ndescs = needed;
745 start_dp = vq->vq_ring.desc;
748 /* prepend cannot fail, checked by caller */
749 hdr = (struct virtio_net_hdr *)
750 rte_pktmbuf_prepend(cookie, head_size);
751 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
752 * which is wrong. Below subtract restores correct pkt size.
754 cookie->pkt_len -= head_size;
756 /* if offload disabled, it is not zeroed below, do it now */
757 if (!vq->hw->has_tx_offload)
758 virtqueue_clear_net_hdr(hdr);
759 } else if (use_indirect) {
760 /* setup tx ring slot to point to indirect
761 * descriptor list stored in reserved region.
763 * the first slot in indirect ring is already preset
764 * to point to the header in reserved region
766 start_dp[idx].addr = txvq->virtio_net_hdr_mem +
767 RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
768 start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc);
769 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
770 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
772 /* loop below will fill in rest of the indirect elements */
773 start_dp = txr[idx].tx_indir;
776 /* setup first tx ring slot to point to header
777 * stored in reserved region.
779 start_dp[idx].addr = txvq->virtio_net_hdr_mem +
780 RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
781 start_dp[idx].len = vq->hw->vtnet_hdr_size;
782 start_dp[idx].flags = VRING_DESC_F_NEXT;
783 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
785 idx = start_dp[idx].next;
788 virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
791 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
792 start_dp[idx].len = cookie->data_len;
793 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
794 idx = start_dp[idx].next;
795 } while ((cookie = cookie->next) != NULL);
798 idx = vq->vq_ring.desc[head_idx].next;
800 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
802 vq->vq_desc_head_idx = idx;
803 vq_update_avail_ring(vq, head_idx);
806 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
807 vq->vq_desc_tail_idx = idx;
812 virtio_dev_cq_start(struct rte_eth_dev *dev)
814 struct virtio_hw *hw = dev->data->dev_private;
816 if (hw->cvq && hw->cvq->vq) {
817 rte_spinlock_init(&hw->cvq->lock);
818 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
823 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
826 unsigned int socket_id __rte_unused,
827 const struct rte_eth_rxconf *rx_conf __rte_unused,
828 struct rte_mempool *mp)
830 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
831 struct virtio_hw *hw = dev->data->dev_private;
832 struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
833 struct virtnet_rx *rxvq;
835 PMD_INIT_FUNC_TRACE();
837 if (nb_desc == 0 || nb_desc > vq->vq_nentries)
838 nb_desc = vq->vq_nentries;
839 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
842 rxvq->queue_id = queue_idx;
844 if (rxvq->mpool == NULL) {
845 rte_exit(EXIT_FAILURE,
846 "Cannot allocate mbufs for rx virtqueue");
849 dev->data->rx_queues[queue_idx] = rxvq;
855 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
857 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
858 struct virtio_hw *hw = dev->data->dev_private;
859 struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
860 struct virtnet_rx *rxvq = &vq->rxq;
865 PMD_INIT_FUNC_TRACE();
867 /* Allocate blank mbufs for the each rx descriptor */
870 if (hw->use_simple_rx) {
871 for (desc_idx = 0; desc_idx < vq->vq_nentries;
873 vq->vq_ring.avail->ring[desc_idx] = desc_idx;
874 vq->vq_ring.desc[desc_idx].flags =
878 virtio_rxq_vec_setup(rxvq);
881 memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
882 for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
884 vq->sw_ring[vq->vq_nentries + desc_idx] =
888 if (hw->use_simple_rx) {
889 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
890 virtio_rxq_rearm_vec(rxvq);
891 nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
893 } else if (hw->use_inorder_rx) {
894 if ((!virtqueue_full(vq))) {
895 uint16_t free_cnt = vq->vq_free_cnt;
896 struct rte_mbuf *pkts[free_cnt];
898 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
900 error = virtqueue_enqueue_refill_inorder(vq,
903 if (unlikely(error)) {
904 for (i = 0; i < free_cnt; i++)
905 rte_pktmbuf_free(pkts[i]);
910 vq_update_avail_idx(vq);
913 while (!virtqueue_full(vq)) {
914 m = rte_mbuf_raw_alloc(rxvq->mpool);
918 /* Enqueue allocated buffers */
919 if (vtpci_packed_queue(vq->hw))
920 error = virtqueue_enqueue_recv_refill_packed(vq,
923 error = virtqueue_enqueue_recv_refill(vq,
932 if (!vtpci_packed_queue(vq->hw))
933 vq_update_avail_idx(vq);
936 PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
944 * struct rte_eth_dev *dev: Used to update dev
945 * uint16_t nb_desc: Defaults to values read from config space
946 * unsigned int socket_id: Used to allocate memzone
947 * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
948 * uint16_t queue_idx: Just used as an index in dev txq list
951 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
954 unsigned int socket_id __rte_unused,
955 const struct rte_eth_txconf *tx_conf)
957 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
958 struct virtio_hw *hw = dev->data->dev_private;
959 struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
960 struct virtnet_tx *txvq;
961 uint16_t tx_free_thresh;
963 PMD_INIT_FUNC_TRACE();
965 if (nb_desc == 0 || nb_desc > vq->vq_nentries)
966 nb_desc = vq->vq_nentries;
967 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
970 txvq->queue_id = queue_idx;
972 tx_free_thresh = tx_conf->tx_free_thresh;
973 if (tx_free_thresh == 0)
975 RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
977 if (tx_free_thresh >= (vq->vq_nentries - 3)) {
978 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
979 "number of TX entries minus 3 (%u)."
980 " (tx_free_thresh=%u port=%u queue=%u)\n",
982 tx_free_thresh, dev->data->port_id, queue_idx);
986 vq->vq_free_thresh = tx_free_thresh;
988 dev->data->tx_queues[queue_idx] = txvq;
993 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
996 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
997 struct virtio_hw *hw = dev->data->dev_private;
998 struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1000 PMD_INIT_FUNC_TRACE();
1002 if (!vtpci_packed_queue(hw)) {
1003 if (hw->use_inorder_tx)
1004 vq->vq_ring.desc[vq->vq_nentries - 1].next = 0;
1013 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1017 * Requeue the discarded mbuf. This should always be
1018 * successful since it was just dequeued.
1020 if (vtpci_packed_queue(vq->hw))
1021 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1023 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1025 if (unlikely(error)) {
1026 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1027 rte_pktmbuf_free(m);
1032 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1036 error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1037 if (unlikely(error)) {
1038 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1039 rte_pktmbuf_free(m);
1044 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
1046 uint32_t s = mbuf->pkt_len;
1047 struct ether_addr *ea;
1052 stats->size_bins[1]++;
1053 } else if (s > 64 && s < 1024) {
1056 /* count zeros, and offset into correct bin */
1057 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
1058 stats->size_bins[bin]++;
1061 stats->size_bins[0]++;
1063 stats->size_bins[6]++;
1065 stats->size_bins[7]++;
1068 ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
1069 if (is_multicast_ether_addr(ea)) {
1070 if (is_broadcast_ether_addr(ea))
1078 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
1080 VIRTIO_DUMP_PACKET(m, m->data_len);
1082 virtio_update_packet_stats(&rxvq->stats, m);
1085 /* Optionally fill offload information in structure */
1087 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1089 struct rte_net_hdr_lens hdr_lens;
1090 uint32_t hdrlen, ptype;
1091 int l4_supported = 0;
1094 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1097 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1099 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1100 m->packet_type = ptype;
1101 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1102 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1103 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1106 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1107 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1108 if (hdr->csum_start <= hdrlen && l4_supported) {
1109 m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1111 /* Unknown proto or tunnel, do sw cksum. We can assume
1112 * the cksum field is in the first segment since the
1113 * buffers we provided to the host are large enough.
1114 * In case of SCTP, this will be wrong since it's a CRC
1115 * but there's nothing we can do.
1117 uint16_t csum = 0, off;
1119 rte_raw_cksum_mbuf(m, hdr->csum_start,
1120 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1122 if (likely(csum != 0xffff))
1124 off = hdr->csum_offset + hdr->csum_start;
1125 if (rte_pktmbuf_data_len(m) >= off + 1)
1126 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1129 } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1130 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1133 /* GSO request, save required information in mbuf */
1134 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1135 /* Check unsupported modes */
1136 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1137 (hdr->gso_size == 0)) {
1141 /* Update mss lengthes in mbuf */
1142 m->tso_segsz = hdr->gso_size;
1143 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1144 case VIRTIO_NET_HDR_GSO_TCPV4:
1145 case VIRTIO_NET_HDR_GSO_TCPV6:
1146 m->ol_flags |= PKT_RX_LRO | \
1147 PKT_RX_L4_CKSUM_NONE;
1157 #define VIRTIO_MBUF_BURST_SZ 64
1158 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1160 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1162 struct virtnet_rx *rxvq = rx_queue;
1163 struct virtqueue *vq = rxvq->vq;
1164 struct virtio_hw *hw = vq->hw;
1165 struct rte_mbuf *rxm, *new_mbuf;
1166 uint16_t nb_used, num, nb_rx;
1167 uint32_t len[VIRTIO_MBUF_BURST_SZ];
1168 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1170 uint32_t i, nb_enqueued;
1172 struct virtio_net_hdr *hdr;
1175 if (unlikely(hw->started == 0))
1178 nb_used = VIRTQUEUE_NUSED(vq);
1180 virtio_rmb(hw->weak_barriers);
1182 num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1183 if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1184 num = VIRTIO_MBUF_BURST_SZ;
1185 if (likely(num > DESC_PER_CACHELINE))
1186 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1188 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1189 PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1192 hdr_size = hw->vtnet_hdr_size;
1194 for (i = 0; i < num ; i++) {
1197 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1199 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1200 PMD_RX_LOG(ERR, "Packet drop");
1202 virtio_discard_rxbuf(vq, rxm);
1203 rxvq->stats.errors++;
1207 rxm->port = rxvq->port_id;
1208 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1212 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1213 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1215 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1216 RTE_PKTMBUF_HEADROOM - hdr_size);
1219 rte_vlan_strip(rxm);
1221 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1222 virtio_discard_rxbuf(vq, rxm);
1223 rxvq->stats.errors++;
1227 virtio_rx_stats_updated(rxvq, rxm);
1229 rx_pkts[nb_rx++] = rxm;
1232 rxvq->stats.packets += nb_rx;
1234 /* Allocate new mbuf for the used descriptor */
1235 while (likely(!virtqueue_full(vq))) {
1236 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1237 if (unlikely(new_mbuf == NULL)) {
1238 struct rte_eth_dev *dev
1239 = &rte_eth_devices[rxvq->port_id];
1240 dev->data->rx_mbuf_alloc_failed++;
1243 error = virtqueue_enqueue_recv_refill(vq, &new_mbuf, 1);
1244 if (unlikely(error)) {
1245 rte_pktmbuf_free(new_mbuf);
1251 if (likely(nb_enqueued)) {
1252 vq_update_avail_idx(vq);
1254 if (unlikely(virtqueue_kick_prepare(vq))) {
1255 virtqueue_notify(vq);
1256 PMD_RX_LOG(DEBUG, "Notified");
1264 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1267 struct virtnet_rx *rxvq = rx_queue;
1268 struct virtqueue *vq = rxvq->vq;
1269 struct virtio_hw *hw = vq->hw;
1270 struct rte_mbuf *rxm, *new_mbuf;
1271 uint16_t num, nb_rx;
1272 uint32_t len[VIRTIO_MBUF_BURST_SZ];
1273 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1275 uint32_t i, nb_enqueued;
1277 struct virtio_net_hdr *hdr;
1280 if (unlikely(hw->started == 0))
1283 num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1284 if (likely(num > DESC_PER_CACHELINE))
1285 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1287 num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1288 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1291 hdr_size = hw->vtnet_hdr_size;
1293 for (i = 0; i < num; i++) {
1296 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1298 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1299 PMD_RX_LOG(ERR, "Packet drop");
1301 virtio_discard_rxbuf(vq, rxm);
1302 rxvq->stats.errors++;
1306 rxm->port = rxvq->port_id;
1307 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1311 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1312 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1314 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1315 RTE_PKTMBUF_HEADROOM - hdr_size);
1318 rte_vlan_strip(rxm);
1320 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1321 virtio_discard_rxbuf(vq, rxm);
1322 rxvq->stats.errors++;
1326 virtio_rx_stats_updated(rxvq, rxm);
1328 rx_pkts[nb_rx++] = rxm;
1331 rxvq->stats.packets += nb_rx;
1333 /* Allocate new mbuf for the used descriptor */
1334 while (likely(!virtqueue_full(vq))) {
1335 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1336 if (unlikely(new_mbuf == NULL)) {
1337 struct rte_eth_dev *dev =
1338 &rte_eth_devices[rxvq->port_id];
1339 dev->data->rx_mbuf_alloc_failed++;
1342 error = virtqueue_enqueue_recv_refill_packed(vq, &new_mbuf, 1);
1343 if (unlikely(error)) {
1344 rte_pktmbuf_free(new_mbuf);
1350 if (likely(nb_enqueued)) {
1351 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1352 virtqueue_notify(vq);
1353 PMD_RX_LOG(DEBUG, "Notified");
1362 virtio_recv_pkts_inorder(void *rx_queue,
1363 struct rte_mbuf **rx_pkts,
1366 struct virtnet_rx *rxvq = rx_queue;
1367 struct virtqueue *vq = rxvq->vq;
1368 struct virtio_hw *hw = vq->hw;
1369 struct rte_mbuf *rxm;
1370 struct rte_mbuf *prev;
1371 uint16_t nb_used, num, nb_rx;
1372 uint32_t len[VIRTIO_MBUF_BURST_SZ];
1373 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1375 uint32_t nb_enqueued;
1382 if (unlikely(hw->started == 0))
1385 nb_used = VIRTQUEUE_NUSED(vq);
1386 nb_used = RTE_MIN(nb_used, nb_pkts);
1387 nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1389 virtio_rmb(hw->weak_barriers);
1391 PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1396 hdr_size = hw->vtnet_hdr_size;
1398 num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1400 for (i = 0; i < num; i++) {
1401 struct virtio_net_hdr_mrg_rxbuf *header;
1403 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1404 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1408 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1409 PMD_RX_LOG(ERR, "Packet drop");
1411 virtio_discard_rxbuf_inorder(vq, rxm);
1412 rxvq->stats.errors++;
1416 header = (struct virtio_net_hdr_mrg_rxbuf *)
1417 ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1420 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1421 seg_num = header->num_buffers;
1428 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1429 rxm->nb_segs = seg_num;
1432 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1433 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1435 rxm->port = rxvq->port_id;
1437 rx_pkts[nb_rx] = rxm;
1440 if (vq->hw->has_rx_offload &&
1441 virtio_rx_offload(rxm, &header->hdr) < 0) {
1442 virtio_discard_rxbuf_inorder(vq, rxm);
1443 rxvq->stats.errors++;
1448 rte_vlan_strip(rx_pkts[nb_rx]);
1450 seg_res = seg_num - 1;
1452 /* Merge remaining segments */
1453 while (seg_res != 0 && i < (num - 1)) {
1457 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1458 rxm->pkt_len = (uint32_t)(len[i]);
1459 rxm->data_len = (uint16_t)(len[i]);
1461 rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1462 rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1472 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1477 /* Last packet still need merge segments */
1478 while (seg_res != 0) {
1479 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1480 VIRTIO_MBUF_BURST_SZ);
1482 prev = rcv_pkts[nb_rx];
1483 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1484 virtio_rmb(hw->weak_barriers);
1485 num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1487 uint16_t extra_idx = 0;
1490 while (extra_idx < rcv_cnt) {
1491 rxm = rcv_pkts[extra_idx];
1493 RTE_PKTMBUF_HEADROOM - hdr_size;
1494 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1495 rxm->data_len = (uint16_t)(len[extra_idx]);
1498 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1499 rx_pkts[nb_rx]->data_len += len[extra_idx];
1505 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1510 "No enough segments for packet.");
1511 virtio_discard_rxbuf_inorder(vq, prev);
1512 rxvq->stats.errors++;
1517 rxvq->stats.packets += nb_rx;
1519 /* Allocate new mbuf for the used descriptor */
1521 if (likely(!virtqueue_full(vq))) {
1522 /* free_cnt may include mrg descs */
1523 uint16_t free_cnt = vq->vq_free_cnt;
1524 struct rte_mbuf *new_pkts[free_cnt];
1526 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1527 error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1529 if (unlikely(error)) {
1530 for (i = 0; i < free_cnt; i++)
1531 rte_pktmbuf_free(new_pkts[i]);
1533 nb_enqueued += free_cnt;
1535 struct rte_eth_dev *dev =
1536 &rte_eth_devices[rxvq->port_id];
1537 dev->data->rx_mbuf_alloc_failed += free_cnt;
1541 if (likely(nb_enqueued)) {
1542 vq_update_avail_idx(vq);
1544 if (unlikely(virtqueue_kick_prepare(vq))) {
1545 virtqueue_notify(vq);
1546 PMD_RX_LOG(DEBUG, "Notified");
1554 virtio_recv_mergeable_pkts(void *rx_queue,
1555 struct rte_mbuf **rx_pkts,
1558 struct virtnet_rx *rxvq = rx_queue;
1559 struct virtqueue *vq = rxvq->vq;
1560 struct virtio_hw *hw = vq->hw;
1561 struct rte_mbuf *rxm;
1562 struct rte_mbuf *prev;
1563 uint16_t nb_used, num, nb_rx = 0;
1564 uint32_t len[VIRTIO_MBUF_BURST_SZ];
1565 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1567 uint32_t nb_enqueued = 0;
1568 uint32_t seg_num = 0;
1569 uint32_t seg_res = 0;
1570 uint32_t hdr_size = hw->vtnet_hdr_size;
1573 if (unlikely(hw->started == 0))
1576 nb_used = VIRTQUEUE_NUSED(vq);
1578 virtio_rmb(hw->weak_barriers);
1580 PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1582 num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1583 if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1584 num = VIRTIO_MBUF_BURST_SZ;
1585 if (likely(num > DESC_PER_CACHELINE))
1586 num = num - ((vq->vq_used_cons_idx + num) %
1587 DESC_PER_CACHELINE);
1590 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1592 for (i = 0; i < num; i++) {
1593 struct virtio_net_hdr_mrg_rxbuf *header;
1595 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1596 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1600 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1601 PMD_RX_LOG(ERR, "Packet drop");
1603 virtio_discard_rxbuf(vq, rxm);
1604 rxvq->stats.errors++;
1608 header = (struct virtio_net_hdr_mrg_rxbuf *)
1609 ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1611 seg_num = header->num_buffers;
1615 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1616 rxm->nb_segs = seg_num;
1619 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1620 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1622 rxm->port = rxvq->port_id;
1624 rx_pkts[nb_rx] = rxm;
1627 if (hw->has_rx_offload &&
1628 virtio_rx_offload(rxm, &header->hdr) < 0) {
1629 virtio_discard_rxbuf(vq, rxm);
1630 rxvq->stats.errors++;
1635 rte_vlan_strip(rx_pkts[nb_rx]);
1637 seg_res = seg_num - 1;
1639 /* Merge remaining segments */
1640 while (seg_res != 0 && i < (num - 1)) {
1644 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1645 rxm->pkt_len = (uint32_t)(len[i]);
1646 rxm->data_len = (uint16_t)(len[i]);
1648 rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1649 rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1659 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1664 /* Last packet still need merge segments */
1665 while (seg_res != 0) {
1666 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1667 VIRTIO_MBUF_BURST_SZ);
1669 prev = rcv_pkts[nb_rx];
1670 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1671 virtio_rmb(hw->weak_barriers);
1672 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1674 uint16_t extra_idx = 0;
1677 while (extra_idx < rcv_cnt) {
1678 rxm = rcv_pkts[extra_idx];
1680 RTE_PKTMBUF_HEADROOM - hdr_size;
1681 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1682 rxm->data_len = (uint16_t)(len[extra_idx]);
1685 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1686 rx_pkts[nb_rx]->data_len += len[extra_idx];
1692 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1697 "No enough segments for packet.");
1698 virtio_discard_rxbuf(vq, prev);
1699 rxvq->stats.errors++;
1704 rxvq->stats.packets += nb_rx;
1706 /* Allocate new mbuf for the used descriptor */
1707 if (likely(!virtqueue_full(vq))) {
1708 /* free_cnt may include mrg descs */
1709 uint16_t free_cnt = vq->vq_free_cnt;
1710 struct rte_mbuf *new_pkts[free_cnt];
1712 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1713 error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1715 if (unlikely(error)) {
1716 for (i = 0; i < free_cnt; i++)
1717 rte_pktmbuf_free(new_pkts[i]);
1719 nb_enqueued += free_cnt;
1721 struct rte_eth_dev *dev =
1722 &rte_eth_devices[rxvq->port_id];
1723 dev->data->rx_mbuf_alloc_failed += free_cnt;
1727 if (likely(nb_enqueued)) {
1728 vq_update_avail_idx(vq);
1730 if (unlikely(virtqueue_kick_prepare(vq))) {
1731 virtqueue_notify(vq);
1732 PMD_RX_LOG(DEBUG, "Notified");
1740 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1741 struct rte_mbuf **rx_pkts,
1744 struct virtnet_rx *rxvq = rx_queue;
1745 struct virtqueue *vq = rxvq->vq;
1746 struct virtio_hw *hw = vq->hw;
1747 struct rte_mbuf *rxm;
1748 struct rte_mbuf *prev = NULL;
1749 uint16_t num, nb_rx = 0;
1750 uint32_t len[VIRTIO_MBUF_BURST_SZ];
1751 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1752 uint32_t nb_enqueued = 0;
1753 uint32_t seg_num = 0;
1754 uint32_t seg_res = 0;
1755 uint32_t hdr_size = hw->vtnet_hdr_size;
1759 if (unlikely(hw->started == 0))
1764 if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1765 num = VIRTIO_MBUF_BURST_SZ;
1766 if (likely(num > DESC_PER_CACHELINE))
1767 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1769 num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1771 for (i = 0; i < num; i++) {
1772 struct virtio_net_hdr_mrg_rxbuf *header;
1774 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1775 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1779 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1780 PMD_RX_LOG(ERR, "Packet drop");
1782 virtio_discard_rxbuf(vq, rxm);
1783 rxvq->stats.errors++;
1787 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1788 rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1789 seg_num = header->num_buffers;
1794 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1795 rxm->nb_segs = seg_num;
1798 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1799 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1801 rxm->port = rxvq->port_id;
1802 rx_pkts[nb_rx] = rxm;
1805 if (hw->has_rx_offload &&
1806 virtio_rx_offload(rxm, &header->hdr) < 0) {
1807 virtio_discard_rxbuf(vq, rxm);
1808 rxvq->stats.errors++;
1813 rte_vlan_strip(rx_pkts[nb_rx]);
1815 seg_res = seg_num - 1;
1817 /* Merge remaining segments */
1818 while (seg_res != 0 && i < (num - 1)) {
1822 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1823 rxm->pkt_len = (uint32_t)(len[i]);
1824 rxm->data_len = (uint16_t)(len[i]);
1826 rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1827 rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1837 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1842 /* Last packet still need merge segments */
1843 while (seg_res != 0) {
1844 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1845 VIRTIO_MBUF_BURST_SZ);
1846 if (likely(vq->vq_free_cnt >= rcv_cnt)) {
1847 num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1849 uint16_t extra_idx = 0;
1853 while (extra_idx < rcv_cnt) {
1854 rxm = rcv_pkts[extra_idx];
1857 RTE_PKTMBUF_HEADROOM - hdr_size;
1858 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1859 rxm->data_len = (uint16_t)(len[extra_idx]);
1863 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1864 rx_pkts[nb_rx]->data_len += len[extra_idx];
1869 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1874 "No enough segments for packet.");
1876 virtio_discard_rxbuf(vq, prev);
1877 rxvq->stats.errors++;
1882 rxvq->stats.packets += nb_rx;
1884 /* Allocate new mbuf for the used descriptor */
1885 if (likely(!virtqueue_full(vq))) {
1886 /* free_cnt may include mrg descs */
1887 uint16_t free_cnt = vq->vq_free_cnt;
1888 struct rte_mbuf *new_pkts[free_cnt];
1890 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1891 error = virtqueue_enqueue_recv_refill_packed(vq,
1892 new_pkts, free_cnt);
1893 if (unlikely(error)) {
1894 for (i = 0; i < free_cnt; i++)
1895 rte_pktmbuf_free(new_pkts[i]);
1897 nb_enqueued += free_cnt;
1899 struct rte_eth_dev *dev =
1900 &rte_eth_devices[rxvq->port_id];
1901 dev->data->rx_mbuf_alloc_failed += free_cnt;
1905 if (likely(nb_enqueued)) {
1906 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1907 virtqueue_notify(vq);
1908 PMD_RX_LOG(DEBUG, "Notified");
1916 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1919 struct virtnet_tx *txvq = tx_queue;
1920 struct virtqueue *vq = txvq->vq;
1921 struct virtio_hw *hw = vq->hw;
1922 uint16_t hdr_size = hw->vtnet_hdr_size;
1924 bool in_order = hw->use_inorder_tx;
1927 if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1930 if (unlikely(nb_pkts < 1))
1933 PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1935 if (nb_pkts > vq->vq_free_cnt)
1936 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
1939 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1940 struct rte_mbuf *txm = tx_pkts[nb_tx];
1941 int can_push = 0, slots, need;
1943 /* Do VLAN tag insertion */
1944 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1945 error = rte_vlan_insert(&txm);
1946 if (unlikely(error)) {
1947 rte_pktmbuf_free(txm);
1952 /* optimize ring usage */
1953 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1954 vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1955 rte_mbuf_refcnt_read(txm) == 1 &&
1956 RTE_MBUF_DIRECT(txm) &&
1957 txm->nb_segs == 1 &&
1958 rte_pktmbuf_headroom(txm) >= hdr_size &&
1959 rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1960 __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1963 /* How many main ring entries are needed to this Tx?
1964 * any_layout => number of segments
1965 * default => number of segments + 1
1967 slots = txm->nb_segs + !can_push;
1968 need = slots - vq->vq_free_cnt;
1970 /* Positive value indicates it need free vring descriptors */
1971 if (unlikely(need > 0)) {
1972 virtio_xmit_cleanup_packed(vq, need, in_order);
1973 need = slots - vq->vq_free_cnt;
1974 if (unlikely(need > 0)) {
1976 "No free tx descriptors to transmit");
1981 /* Enqueue Packet buffers */
1982 virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push,
1985 virtio_update_packet_stats(&txvq->stats, txm);
1988 txvq->stats.packets += nb_tx;
1990 if (likely(nb_tx)) {
1991 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1992 virtqueue_notify(vq);
1993 PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2001 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2003 struct virtnet_tx *txvq = tx_queue;
2004 struct virtqueue *vq = txvq->vq;
2005 struct virtio_hw *hw = vq->hw;
2006 uint16_t hdr_size = hw->vtnet_hdr_size;
2007 uint16_t nb_used, nb_tx = 0;
2010 if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2013 if (unlikely(nb_pkts < 1))
2016 PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2017 nb_used = VIRTQUEUE_NUSED(vq);
2019 virtio_rmb(hw->weak_barriers);
2020 if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2021 virtio_xmit_cleanup(vq, nb_used);
2023 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2024 struct rte_mbuf *txm = tx_pkts[nb_tx];
2025 int can_push = 0, use_indirect = 0, slots, need;
2027 /* Do VLAN tag insertion */
2028 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2029 error = rte_vlan_insert(&txm);
2030 if (unlikely(error)) {
2031 rte_pktmbuf_free(txm);
2036 /* optimize ring usage */
2037 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2038 vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2039 rte_mbuf_refcnt_read(txm) == 1 &&
2040 RTE_MBUF_DIRECT(txm) &&
2041 txm->nb_segs == 1 &&
2042 rte_pktmbuf_headroom(txm) >= hdr_size &&
2043 rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2044 __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2046 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2047 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2050 /* How many main ring entries are needed to this Tx?
2051 * any_layout => number of segments
2053 * default => number of segments + 1
2055 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2056 need = slots - vq->vq_free_cnt;
2058 /* Positive value indicates it need free vring descriptors */
2059 if (unlikely(need > 0)) {
2060 nb_used = VIRTQUEUE_NUSED(vq);
2061 virtio_rmb(hw->weak_barriers);
2062 need = RTE_MIN(need, (int)nb_used);
2064 virtio_xmit_cleanup(vq, need);
2065 need = slots - vq->vq_free_cnt;
2066 if (unlikely(need > 0)) {
2068 "No free tx descriptors to transmit");
2073 /* Enqueue Packet buffers */
2074 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2077 virtio_update_packet_stats(&txvq->stats, txm);
2080 txvq->stats.packets += nb_tx;
2082 if (likely(nb_tx)) {
2083 vq_update_avail_idx(vq);
2085 if (unlikely(virtqueue_kick_prepare(vq))) {
2086 virtqueue_notify(vq);
2087 PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2095 virtio_xmit_pkts_inorder(void *tx_queue,
2096 struct rte_mbuf **tx_pkts,
2099 struct virtnet_tx *txvq = tx_queue;
2100 struct virtqueue *vq = txvq->vq;
2101 struct virtio_hw *hw = vq->hw;
2102 uint16_t hdr_size = hw->vtnet_hdr_size;
2103 uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
2104 struct rte_mbuf *inorder_pkts[nb_pkts];
2107 if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2110 if (unlikely(nb_pkts < 1))
2114 PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2115 nb_used = VIRTQUEUE_NUSED(vq);
2117 virtio_rmb(hw->weak_barriers);
2118 if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2119 virtio_xmit_cleanup_inorder(vq, nb_used);
2121 if (unlikely(!vq->vq_free_cnt))
2122 virtio_xmit_cleanup_inorder(vq, nb_used);
2124 nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
2126 for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
2127 struct rte_mbuf *txm = tx_pkts[nb_tx];
2130 /* Do VLAN tag insertion */
2131 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2132 error = rte_vlan_insert(&txm);
2133 if (unlikely(error)) {
2134 rte_pktmbuf_free(txm);
2139 /* optimize ring usage */
2140 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2141 vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2142 rte_mbuf_refcnt_read(txm) == 1 &&
2143 RTE_MBUF_DIRECT(txm) &&
2144 txm->nb_segs == 1 &&
2145 rte_pktmbuf_headroom(txm) >= hdr_size &&
2146 rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2147 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2148 inorder_pkts[nb_inorder_pkts] = txm;
2151 virtio_update_packet_stats(&txvq->stats, txm);
2155 if (nb_inorder_pkts) {
2156 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2158 nb_inorder_pkts = 0;
2161 slots = txm->nb_segs + 1;
2162 need = slots - vq->vq_free_cnt;
2163 if (unlikely(need > 0)) {
2164 nb_used = VIRTQUEUE_NUSED(vq);
2165 virtio_rmb(hw->weak_barriers);
2166 need = RTE_MIN(need, (int)nb_used);
2168 virtio_xmit_cleanup_inorder(vq, need);
2170 need = slots - vq->vq_free_cnt;
2172 if (unlikely(need > 0)) {
2174 "No free tx descriptors to transmit");
2178 /* Enqueue Packet buffers */
2179 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2181 virtio_update_packet_stats(&txvq->stats, txm);
2184 /* Transmit all inorder packets */
2185 if (nb_inorder_pkts)
2186 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2189 txvq->stats.packets += nb_tx;
2191 if (likely(nb_tx)) {
2192 vq_update_avail_idx(vq);
2194 if (unlikely(virtqueue_kick_prepare(vq))) {
2195 virtqueue_notify(vq);
2196 PMD_TX_LOG(DEBUG, "Notified backend after xmit");