net/virtio: move bytes accounting to common function
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34
35 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
36 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
37 #else
38 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
39 #endif
40
41 int
42 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
43 {
44         struct virtnet_rx *rxvq = rxq;
45         struct virtqueue *vq = rxvq->vq;
46
47         return VIRTQUEUE_NUSED(vq) >= offset;
48 }
49
50 void
51 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
52 {
53         vq->vq_free_cnt += num;
54         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
55 }
56
57 void
58 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
59 {
60         struct vring_desc *dp, *dp_tail;
61         struct vq_desc_extra *dxp;
62         uint16_t desc_idx_last = desc_idx;
63
64         dp  = &vq->vq_ring.desc[desc_idx];
65         dxp = &vq->vq_descx[desc_idx];
66         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
67         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
68                 while (dp->flags & VRING_DESC_F_NEXT) {
69                         desc_idx_last = dp->next;
70                         dp = &vq->vq_ring.desc[dp->next];
71                 }
72         }
73         dxp->ndescs = 0;
74
75         /*
76          * We must append the existing free chain, if any, to the end of
77          * newly freed chain. If the virtqueue was completely used, then
78          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
79          */
80         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
81                 vq->vq_desc_head_idx = desc_idx;
82         } else {
83                 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
84                 dp_tail->next = desc_idx;
85         }
86
87         vq->vq_desc_tail_idx = desc_idx_last;
88         dp->next = VQ_RING_DESC_CHAIN_END;
89 }
90
91 static uint16_t
92 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
93                            uint32_t *len, uint16_t num)
94 {
95         struct vring_used_elem *uep;
96         struct rte_mbuf *cookie;
97         uint16_t used_idx, desc_idx;
98         uint16_t i;
99
100         /*  Caller does the check */
101         for (i = 0; i < num ; i++) {
102                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
103                 uep = &vq->vq_ring.used->ring[used_idx];
104                 desc_idx = (uint16_t) uep->id;
105                 len[i] = uep->len;
106                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
107
108                 if (unlikely(cookie == NULL)) {
109                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
110                                 vq->vq_used_cons_idx);
111                         break;
112                 }
113
114                 rte_prefetch0(cookie);
115                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
116                 rx_pkts[i]  = cookie;
117                 vq->vq_used_cons_idx++;
118                 vq_ring_free_chain(vq, desc_idx);
119                 vq->vq_descx[desc_idx].cookie = NULL;
120         }
121
122         return i;
123 }
124
125 static uint16_t
126 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
127                         struct rte_mbuf **rx_pkts,
128                         uint32_t *len,
129                         uint16_t num)
130 {
131         struct vring_used_elem *uep;
132         struct rte_mbuf *cookie;
133         uint16_t used_idx = 0;
134         uint16_t i;
135
136         if (unlikely(num == 0))
137                 return 0;
138
139         for (i = 0; i < num; i++) {
140                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
141                 /* Desc idx same as used idx */
142                 uep = &vq->vq_ring.used->ring[used_idx];
143                 len[i] = uep->len;
144                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
145
146                 if (unlikely(cookie == NULL)) {
147                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
148                                 vq->vq_used_cons_idx);
149                         break;
150                 }
151
152                 rte_prefetch0(cookie);
153                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
154                 rx_pkts[i]  = cookie;
155                 vq->vq_used_cons_idx++;
156                 vq->vq_descx[used_idx].cookie = NULL;
157         }
158
159         vq_ring_free_inorder(vq, used_idx, i);
160         return i;
161 }
162
163 #ifndef DEFAULT_TX_FREE_THRESH
164 #define DEFAULT_TX_FREE_THRESH 32
165 #endif
166
167 /* Cleanup from completed transmits. */
168 static void
169 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
170 {
171         uint16_t i, used_idx, desc_idx;
172         for (i = 0; i < num; i++) {
173                 struct vring_used_elem *uep;
174                 struct vq_desc_extra *dxp;
175
176                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
177                 uep = &vq->vq_ring.used->ring[used_idx];
178
179                 desc_idx = (uint16_t) uep->id;
180                 dxp = &vq->vq_descx[desc_idx];
181                 vq->vq_used_cons_idx++;
182                 vq_ring_free_chain(vq, desc_idx);
183
184                 if (dxp->cookie != NULL) {
185                         rte_pktmbuf_free(dxp->cookie);
186                         dxp->cookie = NULL;
187                 }
188         }
189 }
190
191 /* Cleanup from completed inorder transmits. */
192 static void
193 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
194 {
195         uint16_t i, used_idx, desc_idx = 0, last_idx;
196         int16_t free_cnt = 0;
197         struct vq_desc_extra *dxp = NULL;
198
199         if (unlikely(num == 0))
200                 return;
201
202         for (i = 0; i < num; i++) {
203                 struct vring_used_elem *uep;
204
205                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
206                 uep = &vq->vq_ring.used->ring[used_idx];
207                 desc_idx = (uint16_t)uep->id;
208
209                 dxp = &vq->vq_descx[desc_idx];
210                 vq->vq_used_cons_idx++;
211
212                 if (dxp->cookie != NULL) {
213                         rte_pktmbuf_free(dxp->cookie);
214                         dxp->cookie = NULL;
215                 }
216         }
217
218         last_idx = desc_idx + dxp->ndescs - 1;
219         free_cnt = last_idx - vq->vq_desc_tail_idx;
220         if (free_cnt <= 0)
221                 free_cnt += vq->vq_nentries;
222
223         vq_ring_free_inorder(vq, last_idx, free_cnt);
224 }
225
226 static inline int
227 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
228                         struct rte_mbuf **cookies,
229                         uint16_t num)
230 {
231         struct vq_desc_extra *dxp;
232         struct virtio_hw *hw = vq->hw;
233         struct vring_desc *start_dp;
234         uint16_t head_idx, idx, i = 0;
235
236         if (unlikely(vq->vq_free_cnt == 0))
237                 return -ENOSPC;
238         if (unlikely(vq->vq_free_cnt < num))
239                 return -EMSGSIZE;
240
241         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
242         start_dp = vq->vq_ring.desc;
243
244         while (i < num) {
245                 idx = head_idx & (vq->vq_nentries - 1);
246                 dxp = &vq->vq_descx[idx];
247                 dxp->cookie = (void *)cookies[i];
248                 dxp->ndescs = 1;
249
250                 start_dp[idx].addr =
251                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
252                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
253                 start_dp[idx].len =
254                                 cookies[i]->buf_len -
255                                 RTE_PKTMBUF_HEADROOM +
256                                 hw->vtnet_hdr_size;
257                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
258
259                 vq_update_avail_ring(vq, idx);
260                 head_idx++;
261                 i++;
262         }
263
264         vq->vq_desc_head_idx += num;
265         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
266         return 0;
267 }
268
269 static inline int
270 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
271 {
272         struct vq_desc_extra *dxp;
273         struct virtio_hw *hw = vq->hw;
274         struct vring_desc *start_dp;
275         uint16_t needed = 1;
276         uint16_t head_idx, idx;
277
278         if (unlikely(vq->vq_free_cnt == 0))
279                 return -ENOSPC;
280         if (unlikely(vq->vq_free_cnt < needed))
281                 return -EMSGSIZE;
282
283         head_idx = vq->vq_desc_head_idx;
284         if (unlikely(head_idx >= vq->vq_nentries))
285                 return -EFAULT;
286
287         idx = head_idx;
288         dxp = &vq->vq_descx[idx];
289         dxp->cookie = (void *)cookie;
290         dxp->ndescs = needed;
291
292         start_dp = vq->vq_ring.desc;
293         start_dp[idx].addr =
294                 VIRTIO_MBUF_ADDR(cookie, vq) +
295                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
296         start_dp[idx].len =
297                 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
298         start_dp[idx].flags =  VRING_DESC_F_WRITE;
299         idx = start_dp[idx].next;
300         vq->vq_desc_head_idx = idx;
301         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
302                 vq->vq_desc_tail_idx = idx;
303         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
304         vq_update_avail_ring(vq, head_idx);
305
306         return 0;
307 }
308
309 /* When doing TSO, the IP length is not included in the pseudo header
310  * checksum of the packet given to the PMD, but for virtio it is
311  * expected.
312  */
313 static void
314 virtio_tso_fix_cksum(struct rte_mbuf *m)
315 {
316         /* common case: header is not fragmented */
317         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
318                         m->l4_len)) {
319                 struct ipv4_hdr *iph;
320                 struct ipv6_hdr *ip6h;
321                 struct tcp_hdr *th;
322                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
323                 uint32_t tmp;
324
325                 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
326                 th = RTE_PTR_ADD(iph, m->l3_len);
327                 if ((iph->version_ihl >> 4) == 4) {
328                         iph->hdr_checksum = 0;
329                         iph->hdr_checksum = rte_ipv4_cksum(iph);
330                         ip_len = iph->total_length;
331                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
332                                 m->l3_len);
333                 } else {
334                         ip6h = (struct ipv6_hdr *)iph;
335                         ip_paylen = ip6h->payload_len;
336                 }
337
338                 /* calculate the new phdr checksum not including ip_paylen */
339                 prev_cksum = th->cksum;
340                 tmp = prev_cksum;
341                 tmp += ip_paylen;
342                 tmp = (tmp & 0xffff) + (tmp >> 16);
343                 new_cksum = tmp;
344
345                 /* replace it in the packet */
346                 th->cksum = new_cksum;
347         }
348 }
349
350
351 /* avoid write operation when necessary, to lessen cache issues */
352 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
353         if ((var) != (val))                     \
354                 (var) = (val);                  \
355 } while (0)
356
357 static inline void
358 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
359                         struct rte_mbuf *cookie,
360                         bool offload)
361 {
362         if (offload) {
363                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
364                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
365
366                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
367                 case PKT_TX_UDP_CKSUM:
368                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
369                         hdr->csum_offset = offsetof(struct udp_hdr,
370                                 dgram_cksum);
371                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
372                         break;
373
374                 case PKT_TX_TCP_CKSUM:
375                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
376                         hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
377                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
378                         break;
379
380                 default:
381                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
382                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
383                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
384                         break;
385                 }
386
387                 /* TCP Segmentation Offload */
388                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
389                         virtio_tso_fix_cksum(cookie);
390                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
391                                 VIRTIO_NET_HDR_GSO_TCPV6 :
392                                 VIRTIO_NET_HDR_GSO_TCPV4;
393                         hdr->gso_size = cookie->tso_segsz;
394                         hdr->hdr_len =
395                                 cookie->l2_len +
396                                 cookie->l3_len +
397                                 cookie->l4_len;
398                 } else {
399                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
400                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
401                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
402                 }
403         }
404 }
405
406 static inline void
407 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
408                         struct rte_mbuf **cookies,
409                         uint16_t num)
410 {
411         struct vq_desc_extra *dxp;
412         struct virtqueue *vq = txvq->vq;
413         struct vring_desc *start_dp;
414         struct virtio_net_hdr *hdr;
415         uint16_t idx;
416         uint16_t head_size = vq->hw->vtnet_hdr_size;
417         uint16_t i = 0;
418
419         idx = vq->vq_desc_head_idx;
420         start_dp = vq->vq_ring.desc;
421
422         while (i < num) {
423                 idx = idx & (vq->vq_nentries - 1);
424                 dxp = &vq->vq_descx[idx];
425                 dxp->cookie = (void *)cookies[i];
426                 dxp->ndescs = 1;
427
428                 hdr = (struct virtio_net_hdr *)
429                         rte_pktmbuf_prepend(cookies[i], head_size);
430                 cookies[i]->pkt_len -= head_size;
431
432                 /* if offload disabled, it is not zeroed below, do it now */
433                 if (!vq->hw->has_tx_offload) {
434                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
435                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
436                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
437                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
438                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
439                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
440                 }
441
442                 virtqueue_xmit_offload(hdr, cookies[i],
443                                 vq->hw->has_tx_offload);
444
445                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
446                 start_dp[idx].len   = cookies[i]->data_len;
447                 start_dp[idx].flags = 0;
448
449                 vq_update_avail_ring(vq, idx);
450
451                 idx++;
452                 i++;
453         };
454
455         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
456         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
457 }
458
459 static inline void
460 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
461                         uint16_t needed, int use_indirect, int can_push,
462                         int in_order)
463 {
464         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
465         struct vq_desc_extra *dxp;
466         struct virtqueue *vq = txvq->vq;
467         struct vring_desc *start_dp;
468         uint16_t seg_num = cookie->nb_segs;
469         uint16_t head_idx, idx;
470         uint16_t head_size = vq->hw->vtnet_hdr_size;
471         struct virtio_net_hdr *hdr;
472
473         head_idx = vq->vq_desc_head_idx;
474         idx = head_idx;
475         dxp = &vq->vq_descx[idx];
476         dxp->cookie = (void *)cookie;
477         dxp->ndescs = needed;
478
479         start_dp = vq->vq_ring.desc;
480
481         if (can_push) {
482                 /* prepend cannot fail, checked by caller */
483                 hdr = (struct virtio_net_hdr *)
484                         rte_pktmbuf_prepend(cookie, head_size);
485                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
486                  * which is wrong. Below subtract restores correct pkt size.
487                  */
488                 cookie->pkt_len -= head_size;
489
490                 /* if offload disabled, it is not zeroed below, do it now */
491                 if (!vq->hw->has_tx_offload) {
492                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
493                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
494                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
495                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
496                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
497                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
498                 }
499         } else if (use_indirect) {
500                 /* setup tx ring slot to point to indirect
501                  * descriptor list stored in reserved region.
502                  *
503                  * the first slot in indirect ring is already preset
504                  * to point to the header in reserved region
505                  */
506                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
507                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
508                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
509                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
510                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
511
512                 /* loop below will fill in rest of the indirect elements */
513                 start_dp = txr[idx].tx_indir;
514                 idx = 1;
515         } else {
516                 /* setup first tx ring slot to point to header
517                  * stored in reserved region.
518                  */
519                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
520                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
521                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
522                 start_dp[idx].flags = VRING_DESC_F_NEXT;
523                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
524
525                 idx = start_dp[idx].next;
526         }
527
528         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
529
530         do {
531                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
532                 start_dp[idx].len   = cookie->data_len;
533                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
534                 idx = start_dp[idx].next;
535         } while ((cookie = cookie->next) != NULL);
536
537         if (use_indirect)
538                 idx = vq->vq_ring.desc[head_idx].next;
539
540         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
541
542         vq->vq_desc_head_idx = idx;
543         vq_update_avail_ring(vq, head_idx);
544
545         if (!in_order) {
546                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
547                         vq->vq_desc_tail_idx = idx;
548         }
549 }
550
551 void
552 virtio_dev_cq_start(struct rte_eth_dev *dev)
553 {
554         struct virtio_hw *hw = dev->data->dev_private;
555
556         if (hw->cvq && hw->cvq->vq) {
557                 rte_spinlock_init(&hw->cvq->lock);
558                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
559         }
560 }
561
562 int
563 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
564                         uint16_t queue_idx,
565                         uint16_t nb_desc,
566                         unsigned int socket_id __rte_unused,
567                         const struct rte_eth_rxconf *rx_conf __rte_unused,
568                         struct rte_mempool *mp)
569 {
570         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
571         struct virtio_hw *hw = dev->data->dev_private;
572         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
573         struct virtnet_rx *rxvq;
574
575         PMD_INIT_FUNC_TRACE();
576
577         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
578                 nb_desc = vq->vq_nentries;
579         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
580
581         rxvq = &vq->rxq;
582         rxvq->queue_id = queue_idx;
583         rxvq->mpool = mp;
584         if (rxvq->mpool == NULL) {
585                 rte_exit(EXIT_FAILURE,
586                         "Cannot allocate mbufs for rx virtqueue");
587         }
588
589         dev->data->rx_queues[queue_idx] = rxvq;
590
591         return 0;
592 }
593
594 int
595 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
596 {
597         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
598         struct virtio_hw *hw = dev->data->dev_private;
599         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
600         struct virtnet_rx *rxvq = &vq->rxq;
601         struct rte_mbuf *m;
602         uint16_t desc_idx;
603         int error, nbufs, i;
604
605         PMD_INIT_FUNC_TRACE();
606
607         /* Allocate blank mbufs for the each rx descriptor */
608         nbufs = 0;
609
610         if (hw->use_simple_rx) {
611                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
612                      desc_idx++) {
613                         vq->vq_ring.avail->ring[desc_idx] = desc_idx;
614                         vq->vq_ring.desc[desc_idx].flags =
615                                 VRING_DESC_F_WRITE;
616                 }
617
618                 virtio_rxq_vec_setup(rxvq);
619         }
620
621         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
622         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
623              desc_idx++) {
624                 vq->sw_ring[vq->vq_nentries + desc_idx] =
625                         &rxvq->fake_mbuf;
626         }
627
628         if (hw->use_simple_rx) {
629                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
630                         virtio_rxq_rearm_vec(rxvq);
631                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
632                 }
633         } else if (hw->use_inorder_rx) {
634                 if ((!virtqueue_full(vq))) {
635                         uint16_t free_cnt = vq->vq_free_cnt;
636                         struct rte_mbuf *pkts[free_cnt];
637
638                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
639                                 free_cnt)) {
640                                 error = virtqueue_enqueue_refill_inorder(vq,
641                                                 pkts,
642                                                 free_cnt);
643                                 if (unlikely(error)) {
644                                         for (i = 0; i < free_cnt; i++)
645                                                 rte_pktmbuf_free(pkts[i]);
646                                 }
647                         }
648
649                         nbufs += free_cnt;
650                         vq_update_avail_idx(vq);
651                 }
652         } else {
653                 while (!virtqueue_full(vq)) {
654                         m = rte_mbuf_raw_alloc(rxvq->mpool);
655                         if (m == NULL)
656                                 break;
657
658                         /* Enqueue allocated buffers */
659                         error = virtqueue_enqueue_recv_refill(vq, m);
660                         if (error) {
661                                 rte_pktmbuf_free(m);
662                                 break;
663                         }
664                         nbufs++;
665                 }
666
667                 vq_update_avail_idx(vq);
668         }
669
670         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
671
672         VIRTQUEUE_DUMP(vq);
673
674         return 0;
675 }
676
677 /*
678  * struct rte_eth_dev *dev: Used to update dev
679  * uint16_t nb_desc: Defaults to values read from config space
680  * unsigned int socket_id: Used to allocate memzone
681  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
682  * uint16_t queue_idx: Just used as an index in dev txq list
683  */
684 int
685 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
686                         uint16_t queue_idx,
687                         uint16_t nb_desc,
688                         unsigned int socket_id __rte_unused,
689                         const struct rte_eth_txconf *tx_conf)
690 {
691         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
692         struct virtio_hw *hw = dev->data->dev_private;
693         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
694         struct virtnet_tx *txvq;
695         uint16_t tx_free_thresh;
696
697         PMD_INIT_FUNC_TRACE();
698
699         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
700                 nb_desc = vq->vq_nentries;
701         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
702
703         txvq = &vq->txq;
704         txvq->queue_id = queue_idx;
705
706         tx_free_thresh = tx_conf->tx_free_thresh;
707         if (tx_free_thresh == 0)
708                 tx_free_thresh =
709                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
710
711         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
712                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
713                         "number of TX entries minus 3 (%u)."
714                         " (tx_free_thresh=%u port=%u queue=%u)\n",
715                         vq->vq_nentries - 3,
716                         tx_free_thresh, dev->data->port_id, queue_idx);
717                 return -EINVAL;
718         }
719
720         vq->vq_free_thresh = tx_free_thresh;
721
722         dev->data->tx_queues[queue_idx] = txvq;
723         return 0;
724 }
725
726 int
727 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
728                                 uint16_t queue_idx)
729 {
730         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
731         struct virtio_hw *hw = dev->data->dev_private;
732         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
733
734         PMD_INIT_FUNC_TRACE();
735
736         if (hw->use_inorder_tx)
737                 vq->vq_ring.desc[vq->vq_nentries - 1].next = 0;
738
739         VIRTQUEUE_DUMP(vq);
740
741         return 0;
742 }
743
744 static void
745 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
746 {
747         int error;
748         /*
749          * Requeue the discarded mbuf. This should always be
750          * successful since it was just dequeued.
751          */
752         error = virtqueue_enqueue_recv_refill(vq, m);
753
754         if (unlikely(error)) {
755                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
756                 rte_pktmbuf_free(m);
757         }
758 }
759
760 static void
761 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
762 {
763         int error;
764
765         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
766         if (unlikely(error)) {
767                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
768                 rte_pktmbuf_free(m);
769         }
770 }
771
772 static void
773 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
774 {
775         uint32_t s = mbuf->pkt_len;
776         struct ether_addr *ea;
777
778         stats->bytes += s;
779
780         if (s == 64) {
781                 stats->size_bins[1]++;
782         } else if (s > 64 && s < 1024) {
783                 uint32_t bin;
784
785                 /* count zeros, and offset into correct bin */
786                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
787                 stats->size_bins[bin]++;
788         } else {
789                 if (s < 64)
790                         stats->size_bins[0]++;
791                 else if (s < 1519)
792                         stats->size_bins[6]++;
793                 else if (s >= 1519)
794                         stats->size_bins[7]++;
795         }
796
797         ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
798         if (is_multicast_ether_addr(ea)) {
799                 if (is_broadcast_ether_addr(ea))
800                         stats->broadcast++;
801                 else
802                         stats->multicast++;
803         }
804 }
805
806 static inline void
807 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
808 {
809         VIRTIO_DUMP_PACKET(m, m->data_len);
810
811         virtio_update_packet_stats(&rxvq->stats, m);
812 }
813
814 /* Optionally fill offload information in structure */
815 static int
816 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
817 {
818         struct rte_net_hdr_lens hdr_lens;
819         uint32_t hdrlen, ptype;
820         int l4_supported = 0;
821
822         /* nothing to do */
823         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
824                 return 0;
825
826         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
827
828         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
829         m->packet_type = ptype;
830         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
831             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
832             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
833                 l4_supported = 1;
834
835         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
836                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
837                 if (hdr->csum_start <= hdrlen && l4_supported) {
838                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
839                 } else {
840                         /* Unknown proto or tunnel, do sw cksum. We can assume
841                          * the cksum field is in the first segment since the
842                          * buffers we provided to the host are large enough.
843                          * In case of SCTP, this will be wrong since it's a CRC
844                          * but there's nothing we can do.
845                          */
846                         uint16_t csum = 0, off;
847
848                         rte_raw_cksum_mbuf(m, hdr->csum_start,
849                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
850                                 &csum);
851                         if (likely(csum != 0xffff))
852                                 csum = ~csum;
853                         off = hdr->csum_offset + hdr->csum_start;
854                         if (rte_pktmbuf_data_len(m) >= off + 1)
855                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
856                                         off) = csum;
857                 }
858         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
859                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
860         }
861
862         /* GSO request, save required information in mbuf */
863         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
864                 /* Check unsupported modes */
865                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
866                     (hdr->gso_size == 0)) {
867                         return -EINVAL;
868                 }
869
870                 /* Update mss lengthes in mbuf */
871                 m->tso_segsz = hdr->gso_size;
872                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
873                         case VIRTIO_NET_HDR_GSO_TCPV4:
874                         case VIRTIO_NET_HDR_GSO_TCPV6:
875                                 m->ol_flags |= PKT_RX_LRO | \
876                                         PKT_RX_L4_CKSUM_NONE;
877                                 break;
878                         default:
879                                 return -EINVAL;
880                 }
881         }
882
883         return 0;
884 }
885
886 #define VIRTIO_MBUF_BURST_SZ 64
887 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
888 uint16_t
889 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
890 {
891         struct virtnet_rx *rxvq = rx_queue;
892         struct virtqueue *vq = rxvq->vq;
893         struct virtio_hw *hw = vq->hw;
894         struct rte_mbuf *rxm, *new_mbuf;
895         uint16_t nb_used, num, nb_rx;
896         uint32_t len[VIRTIO_MBUF_BURST_SZ];
897         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
898         int error;
899         uint32_t i, nb_enqueued;
900         uint32_t hdr_size;
901         struct virtio_net_hdr *hdr;
902
903         nb_rx = 0;
904         if (unlikely(hw->started == 0))
905                 return nb_rx;
906
907         nb_used = VIRTQUEUE_NUSED(vq);
908
909         virtio_rmb();
910
911         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
912         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
913                 num = VIRTIO_MBUF_BURST_SZ;
914         if (likely(num > DESC_PER_CACHELINE))
915                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
916
917         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
918         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
919
920         nb_enqueued = 0;
921         hdr_size = hw->vtnet_hdr_size;
922
923         for (i = 0; i < num ; i++) {
924                 rxm = rcv_pkts[i];
925
926                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
927
928                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
929                         PMD_RX_LOG(ERR, "Packet drop");
930                         nb_enqueued++;
931                         virtio_discard_rxbuf(vq, rxm);
932                         rxvq->stats.errors++;
933                         continue;
934                 }
935
936                 rxm->port = rxvq->port_id;
937                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
938                 rxm->ol_flags = 0;
939                 rxm->vlan_tci = 0;
940
941                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
942                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
943
944                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
945                         RTE_PKTMBUF_HEADROOM - hdr_size);
946
947                 if (hw->vlan_strip)
948                         rte_vlan_strip(rxm);
949
950                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
951                         virtio_discard_rxbuf(vq, rxm);
952                         rxvq->stats.errors++;
953                         continue;
954                 }
955
956                 virtio_rx_stats_updated(rxvq, rxm);
957
958                 rx_pkts[nb_rx++] = rxm;
959         }
960
961         rxvq->stats.packets += nb_rx;
962
963         /* Allocate new mbuf for the used descriptor */
964         while (likely(!virtqueue_full(vq))) {
965                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
966                 if (unlikely(new_mbuf == NULL)) {
967                         struct rte_eth_dev *dev
968                                 = &rte_eth_devices[rxvq->port_id];
969                         dev->data->rx_mbuf_alloc_failed++;
970                         break;
971                 }
972                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
973                 if (unlikely(error)) {
974                         rte_pktmbuf_free(new_mbuf);
975                         break;
976                 }
977                 nb_enqueued++;
978         }
979
980         if (likely(nb_enqueued)) {
981                 vq_update_avail_idx(vq);
982
983                 if (unlikely(virtqueue_kick_prepare(vq))) {
984                         virtqueue_notify(vq);
985                         PMD_RX_LOG(DEBUG, "Notified");
986                 }
987         }
988
989         return nb_rx;
990 }
991
992 uint16_t
993 virtio_recv_mergeable_pkts_inorder(void *rx_queue,
994                         struct rte_mbuf **rx_pkts,
995                         uint16_t nb_pkts)
996 {
997         struct virtnet_rx *rxvq = rx_queue;
998         struct virtqueue *vq = rxvq->vq;
999         struct virtio_hw *hw = vq->hw;
1000         struct rte_mbuf *rxm;
1001         struct rte_mbuf *prev;
1002         uint16_t nb_used, num, nb_rx;
1003         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1004         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1005         int error;
1006         uint32_t nb_enqueued;
1007         uint32_t seg_num;
1008         uint32_t seg_res;
1009         uint32_t hdr_size;
1010         int32_t i;
1011
1012         nb_rx = 0;
1013         if (unlikely(hw->started == 0))
1014                 return nb_rx;
1015
1016         nb_used = VIRTQUEUE_NUSED(vq);
1017         nb_used = RTE_MIN(nb_used, nb_pkts);
1018         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1019
1020         virtio_rmb();
1021
1022         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1023
1024         nb_enqueued = 0;
1025         seg_num = 1;
1026         seg_res = 0;
1027         hdr_size = hw->vtnet_hdr_size;
1028
1029         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1030
1031         for (i = 0; i < num; i++) {
1032                 struct virtio_net_hdr_mrg_rxbuf *header;
1033
1034                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1035                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1036
1037                 rxm = rcv_pkts[i];
1038
1039                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1040                         PMD_RX_LOG(ERR, "Packet drop");
1041                         nb_enqueued++;
1042                         virtio_discard_rxbuf_inorder(vq, rxm);
1043                         rxvq->stats.errors++;
1044                         continue;
1045                 }
1046
1047                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1048                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1049                          - hdr_size);
1050                 seg_num = header->num_buffers;
1051
1052                 if (seg_num == 0)
1053                         seg_num = 1;
1054
1055                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1056                 rxm->nb_segs = seg_num;
1057                 rxm->ol_flags = 0;
1058                 rxm->vlan_tci = 0;
1059                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1060                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1061
1062                 rxm->port = rxvq->port_id;
1063
1064                 rx_pkts[nb_rx] = rxm;
1065                 prev = rxm;
1066
1067                 if (vq->hw->has_rx_offload &&
1068                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1069                         virtio_discard_rxbuf_inorder(vq, rxm);
1070                         rxvq->stats.errors++;
1071                         continue;
1072                 }
1073
1074                 if (hw->vlan_strip)
1075                         rte_vlan_strip(rx_pkts[nb_rx]);
1076
1077                 seg_res = seg_num - 1;
1078
1079                 /* Merge remaining segments */
1080                 while (seg_res != 0 && i < (num - 1)) {
1081                         i++;
1082
1083                         rxm = rcv_pkts[i];
1084                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1085                         rxm->pkt_len = (uint32_t)(len[i]);
1086                         rxm->data_len = (uint16_t)(len[i]);
1087
1088                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1089                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1090
1091                         if (prev)
1092                                 prev->next = rxm;
1093
1094                         prev = rxm;
1095                         seg_res -= 1;
1096                 }
1097
1098                 if (!seg_res) {
1099                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1100                         nb_rx++;
1101                 }
1102         }
1103
1104         /* Last packet still need merge segments */
1105         while (seg_res != 0) {
1106                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1107                                         VIRTIO_MBUF_BURST_SZ);
1108
1109                 prev = rcv_pkts[nb_rx];
1110                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1111                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1112                                                            rcv_cnt);
1113                         uint16_t extra_idx = 0;
1114
1115                         rcv_cnt = num;
1116                         while (extra_idx < rcv_cnt) {
1117                                 rxm = rcv_pkts[extra_idx];
1118                                 rxm->data_off =
1119                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1120                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1121                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1122                                 prev->next = rxm;
1123                                 prev = rxm;
1124                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1125                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1126                                 extra_idx += 1;
1127                         };
1128                         seg_res -= rcv_cnt;
1129
1130                         if (!seg_res) {
1131                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1132                                 nb_rx++;
1133                         }
1134                 } else {
1135                         PMD_RX_LOG(ERR,
1136                                         "No enough segments for packet.");
1137                         virtio_discard_rxbuf_inorder(vq, prev);
1138                         rxvq->stats.errors++;
1139                         break;
1140                 }
1141         }
1142
1143         rxvq->stats.packets += nb_rx;
1144
1145         /* Allocate new mbuf for the used descriptor */
1146
1147         if (likely(!virtqueue_full(vq))) {
1148                 /* free_cnt may include mrg descs */
1149                 uint16_t free_cnt = vq->vq_free_cnt;
1150                 struct rte_mbuf *new_pkts[free_cnt];
1151
1152                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1153                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1154                                         free_cnt);
1155                         if (unlikely(error)) {
1156                                 for (i = 0; i < free_cnt; i++)
1157                                         rte_pktmbuf_free(new_pkts[i]);
1158                         }
1159                         nb_enqueued += free_cnt;
1160                 } else {
1161                         struct rte_eth_dev *dev =
1162                                 &rte_eth_devices[rxvq->port_id];
1163                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1164                 }
1165         }
1166
1167         if (likely(nb_enqueued)) {
1168                 vq_update_avail_idx(vq);
1169
1170                 if (unlikely(virtqueue_kick_prepare(vq))) {
1171                         virtqueue_notify(vq);
1172                         PMD_RX_LOG(DEBUG, "Notified");
1173                 }
1174         }
1175
1176         return nb_rx;
1177 }
1178
1179 uint16_t
1180 virtio_recv_mergeable_pkts(void *rx_queue,
1181                         struct rte_mbuf **rx_pkts,
1182                         uint16_t nb_pkts)
1183 {
1184         struct virtnet_rx *rxvq = rx_queue;
1185         struct virtqueue *vq = rxvq->vq;
1186         struct virtio_hw *hw = vq->hw;
1187         struct rte_mbuf *rxm, *new_mbuf;
1188         uint16_t nb_used, num, nb_rx;
1189         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1190         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1191         struct rte_mbuf *prev;
1192         int error;
1193         uint32_t i, nb_enqueued;
1194         uint32_t seg_num;
1195         uint16_t extra_idx;
1196         uint32_t seg_res;
1197         uint32_t hdr_size;
1198
1199         nb_rx = 0;
1200         if (unlikely(hw->started == 0))
1201                 return nb_rx;
1202
1203         nb_used = VIRTQUEUE_NUSED(vq);
1204
1205         virtio_rmb();
1206
1207         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1208
1209         i = 0;
1210         nb_enqueued = 0;
1211         seg_num = 0;
1212         extra_idx = 0;
1213         seg_res = 0;
1214         hdr_size = hw->vtnet_hdr_size;
1215
1216         while (i < nb_used) {
1217                 struct virtio_net_hdr_mrg_rxbuf *header;
1218
1219                 if (nb_rx == nb_pkts)
1220                         break;
1221
1222                 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1);
1223                 if (num != 1)
1224                         continue;
1225
1226                 i++;
1227
1228                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1229                 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]);
1230
1231                 rxm = rcv_pkts[0];
1232
1233                 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
1234                         PMD_RX_LOG(ERR, "Packet drop");
1235                         nb_enqueued++;
1236                         virtio_discard_rxbuf(vq, rxm);
1237                         rxvq->stats.errors++;
1238                         continue;
1239                 }
1240
1241                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
1242                         RTE_PKTMBUF_HEADROOM - hdr_size);
1243                 seg_num = header->num_buffers;
1244
1245                 if (seg_num == 0)
1246                         seg_num = 1;
1247
1248                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1249                 rxm->nb_segs = seg_num;
1250                 rxm->ol_flags = 0;
1251                 rxm->vlan_tci = 0;
1252                 rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
1253                 rxm->data_len = (uint16_t)(len[0] - hdr_size);
1254
1255                 rxm->port = rxvq->port_id;
1256                 rx_pkts[nb_rx] = rxm;
1257                 prev = rxm;
1258
1259                 if (hw->has_rx_offload &&
1260                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1261                         virtio_discard_rxbuf(vq, rxm);
1262                         rxvq->stats.errors++;
1263                         continue;
1264                 }
1265
1266                 seg_res = seg_num - 1;
1267
1268                 while (seg_res != 0) {
1269                         /*
1270                          * Get extra segments for current uncompleted packet.
1271                          */
1272                         uint16_t  rcv_cnt =
1273                                 RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
1274                         if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1275                                 uint32_t rx_num =
1276                                         virtqueue_dequeue_burst_rx(vq,
1277                                         rcv_pkts, len, rcv_cnt);
1278                                 i += rx_num;
1279                                 rcv_cnt = rx_num;
1280                         } else {
1281                                 PMD_RX_LOG(ERR,
1282                                            "No enough segments for packet.");
1283                                 nb_enqueued++;
1284                                 virtio_discard_rxbuf(vq, rxm);
1285                                 rxvq->stats.errors++;
1286                                 break;
1287                         }
1288
1289                         extra_idx = 0;
1290
1291                         while (extra_idx < rcv_cnt) {
1292                                 rxm = rcv_pkts[extra_idx];
1293
1294                                 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1295                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1296                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1297
1298                                 if (prev)
1299                                         prev->next = rxm;
1300
1301                                 prev = rxm;
1302                                 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
1303                                 extra_idx++;
1304                         };
1305                         seg_res -= rcv_cnt;
1306                 }
1307
1308                 if (hw->vlan_strip)
1309                         rte_vlan_strip(rx_pkts[nb_rx]);
1310
1311                 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
1312                         rx_pkts[nb_rx]->data_len);
1313
1314                 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]);
1315                 nb_rx++;
1316         }
1317
1318         rxvq->stats.packets += nb_rx;
1319
1320         /* Allocate new mbuf for the used descriptor */
1321         while (likely(!virtqueue_full(vq))) {
1322                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1323                 if (unlikely(new_mbuf == NULL)) {
1324                         struct rte_eth_dev *dev
1325                                 = &rte_eth_devices[rxvq->port_id];
1326                         dev->data->rx_mbuf_alloc_failed++;
1327                         break;
1328                 }
1329                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
1330                 if (unlikely(error)) {
1331                         rte_pktmbuf_free(new_mbuf);
1332                         break;
1333                 }
1334                 nb_enqueued++;
1335         }
1336
1337         if (likely(nb_enqueued)) {
1338                 vq_update_avail_idx(vq);
1339
1340                 if (unlikely(virtqueue_kick_prepare(vq))) {
1341                         virtqueue_notify(vq);
1342                         PMD_RX_LOG(DEBUG, "Notified");
1343                 }
1344         }
1345
1346         return nb_rx;
1347 }
1348
1349 uint16_t
1350 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1351 {
1352         struct virtnet_tx *txvq = tx_queue;
1353         struct virtqueue *vq = txvq->vq;
1354         struct virtio_hw *hw = vq->hw;
1355         uint16_t hdr_size = hw->vtnet_hdr_size;
1356         uint16_t nb_used, nb_tx = 0;
1357         int error;
1358
1359         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1360                 return nb_tx;
1361
1362         if (unlikely(nb_pkts < 1))
1363                 return nb_pkts;
1364
1365         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1366         nb_used = VIRTQUEUE_NUSED(vq);
1367
1368         virtio_rmb();
1369         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1370                 virtio_xmit_cleanup(vq, nb_used);
1371
1372         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1373                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1374                 int can_push = 0, use_indirect = 0, slots, need;
1375
1376                 /* Do VLAN tag insertion */
1377                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1378                         error = rte_vlan_insert(&txm);
1379                         if (unlikely(error)) {
1380                                 rte_pktmbuf_free(txm);
1381                                 continue;
1382                         }
1383                 }
1384
1385                 /* optimize ring usage */
1386                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1387                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1388                     rte_mbuf_refcnt_read(txm) == 1 &&
1389                     RTE_MBUF_DIRECT(txm) &&
1390                     txm->nb_segs == 1 &&
1391                     rte_pktmbuf_headroom(txm) >= hdr_size &&
1392                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1393                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1394                         can_push = 1;
1395                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
1396                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
1397                         use_indirect = 1;
1398
1399                 /* How many main ring entries are needed to this Tx?
1400                  * any_layout => number of segments
1401                  * indirect   => 1
1402                  * default    => number of segments + 1
1403                  */
1404                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
1405                 need = slots - vq->vq_free_cnt;
1406
1407                 /* Positive value indicates it need free vring descriptors */
1408                 if (unlikely(need > 0)) {
1409                         nb_used = VIRTQUEUE_NUSED(vq);
1410                         virtio_rmb();
1411                         need = RTE_MIN(need, (int)nb_used);
1412
1413                         virtio_xmit_cleanup(vq, need);
1414                         need = slots - vq->vq_free_cnt;
1415                         if (unlikely(need > 0)) {
1416                                 PMD_TX_LOG(ERR,
1417                                            "No free tx descriptors to transmit");
1418                                 break;
1419                         }
1420                 }
1421
1422                 /* Enqueue Packet buffers */
1423                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
1424                         can_push, 0);
1425
1426                 virtio_update_packet_stats(&txvq->stats, txm);
1427         }
1428
1429         txvq->stats.packets += nb_tx;
1430
1431         if (likely(nb_tx)) {
1432                 vq_update_avail_idx(vq);
1433
1434                 if (unlikely(virtqueue_kick_prepare(vq))) {
1435                         virtqueue_notify(vq);
1436                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1437                 }
1438         }
1439
1440         return nb_tx;
1441 }
1442
1443 uint16_t
1444 virtio_xmit_pkts_inorder(void *tx_queue,
1445                         struct rte_mbuf **tx_pkts,
1446                         uint16_t nb_pkts)
1447 {
1448         struct virtnet_tx *txvq = tx_queue;
1449         struct virtqueue *vq = txvq->vq;
1450         struct virtio_hw *hw = vq->hw;
1451         uint16_t hdr_size = hw->vtnet_hdr_size;
1452         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
1453         struct rte_mbuf *inorder_pkts[nb_pkts];
1454         int error;
1455
1456         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1457                 return nb_tx;
1458
1459         if (unlikely(nb_pkts < 1))
1460                 return nb_pkts;
1461
1462         VIRTQUEUE_DUMP(vq);
1463         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1464         nb_used = VIRTQUEUE_NUSED(vq);
1465
1466         virtio_rmb();
1467         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1468                 virtio_xmit_cleanup_inorder(vq, nb_used);
1469
1470         if (unlikely(!vq->vq_free_cnt))
1471                 virtio_xmit_cleanup_inorder(vq, nb_used);
1472
1473         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
1474
1475         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
1476                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1477                 int slots, need;
1478
1479                 /* Do VLAN tag insertion */
1480                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1481                         error = rte_vlan_insert(&txm);
1482                         if (unlikely(error)) {
1483                                 rte_pktmbuf_free(txm);
1484                                 continue;
1485                         }
1486                 }
1487
1488                 /* optimize ring usage */
1489                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1490                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1491                      rte_mbuf_refcnt_read(txm) == 1 &&
1492                      RTE_MBUF_DIRECT(txm) &&
1493                      txm->nb_segs == 1 &&
1494                      rte_pktmbuf_headroom(txm) >= hdr_size &&
1495                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1496                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
1497                         inorder_pkts[nb_inorder_pkts] = txm;
1498                         nb_inorder_pkts++;
1499
1500                         virtio_update_packet_stats(&txvq->stats, txm);
1501                         continue;
1502                 }
1503
1504                 if (nb_inorder_pkts) {
1505                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
1506                                                         nb_inorder_pkts);
1507                         nb_inorder_pkts = 0;
1508                 }
1509
1510                 slots = txm->nb_segs + 1;
1511                 need = slots - vq->vq_free_cnt;
1512                 if (unlikely(need > 0)) {
1513                         nb_used = VIRTQUEUE_NUSED(vq);
1514                         virtio_rmb();
1515                         need = RTE_MIN(need, (int)nb_used);
1516
1517                         virtio_xmit_cleanup_inorder(vq, need);
1518
1519                         need = slots - vq->vq_free_cnt;
1520
1521                         if (unlikely(need > 0)) {
1522                                 PMD_TX_LOG(ERR,
1523                                         "No free tx descriptors to transmit");
1524                                 break;
1525                         }
1526                 }
1527                 /* Enqueue Packet buffers */
1528                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
1529
1530                 virtio_update_packet_stats(&txvq->stats, txm);
1531         }
1532
1533         /* Transmit all inorder packets */
1534         if (nb_inorder_pkts)
1535                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
1536                                                 nb_inorder_pkts);
1537
1538         txvq->stats.packets += nb_tx;
1539
1540         if (likely(nb_tx)) {
1541                 vq_update_avail_idx(vq);
1542
1543                 if (unlikely(virtqueue_kick_prepare(vq))) {
1544                         virtqueue_notify(vq);
1545                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1546                 }
1547         }
1548
1549         VIRTQUEUE_DUMP(vq);
1550
1551         return nb_tx;
1552 }