net/virtio: support in-order Rx and Tx
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34
35 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
36 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
37 #else
38 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
39 #endif
40
41 int
42 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
43 {
44         struct virtnet_rx *rxvq = rxq;
45         struct virtqueue *vq = rxvq->vq;
46
47         return VIRTQUEUE_NUSED(vq) >= offset;
48 }
49
50 void
51 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
52 {
53         vq->vq_free_cnt += num;
54         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
55 }
56
57 void
58 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
59 {
60         struct vring_desc *dp, *dp_tail;
61         struct vq_desc_extra *dxp;
62         uint16_t desc_idx_last = desc_idx;
63
64         dp  = &vq->vq_ring.desc[desc_idx];
65         dxp = &vq->vq_descx[desc_idx];
66         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
67         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
68                 while (dp->flags & VRING_DESC_F_NEXT) {
69                         desc_idx_last = dp->next;
70                         dp = &vq->vq_ring.desc[dp->next];
71                 }
72         }
73         dxp->ndescs = 0;
74
75         /*
76          * We must append the existing free chain, if any, to the end of
77          * newly freed chain. If the virtqueue was completely used, then
78          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
79          */
80         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
81                 vq->vq_desc_head_idx = desc_idx;
82         } else {
83                 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
84                 dp_tail->next = desc_idx;
85         }
86
87         vq->vq_desc_tail_idx = desc_idx_last;
88         dp->next = VQ_RING_DESC_CHAIN_END;
89 }
90
91 static uint16_t
92 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
93                            uint32_t *len, uint16_t num)
94 {
95         struct vring_used_elem *uep;
96         struct rte_mbuf *cookie;
97         uint16_t used_idx, desc_idx;
98         uint16_t i;
99
100         /*  Caller does the check */
101         for (i = 0; i < num ; i++) {
102                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
103                 uep = &vq->vq_ring.used->ring[used_idx];
104                 desc_idx = (uint16_t) uep->id;
105                 len[i] = uep->len;
106                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
107
108                 if (unlikely(cookie == NULL)) {
109                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
110                                 vq->vq_used_cons_idx);
111                         break;
112                 }
113
114                 rte_prefetch0(cookie);
115                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
116                 rx_pkts[i]  = cookie;
117                 vq->vq_used_cons_idx++;
118                 vq_ring_free_chain(vq, desc_idx);
119                 vq->vq_descx[desc_idx].cookie = NULL;
120         }
121
122         return i;
123 }
124
125 static uint16_t
126 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
127                         struct rte_mbuf **rx_pkts,
128                         uint32_t *len,
129                         uint16_t num)
130 {
131         struct vring_used_elem *uep;
132         struct rte_mbuf *cookie;
133         uint16_t used_idx = 0;
134         uint16_t i;
135
136         if (unlikely(num == 0))
137                 return 0;
138
139         for (i = 0; i < num; i++) {
140                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
141                 /* Desc idx same as used idx */
142                 uep = &vq->vq_ring.used->ring[used_idx];
143                 len[i] = uep->len;
144                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
145
146                 if (unlikely(cookie == NULL)) {
147                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
148                                 vq->vq_used_cons_idx);
149                         break;
150                 }
151
152                 rte_prefetch0(cookie);
153                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
154                 rx_pkts[i]  = cookie;
155                 vq->vq_used_cons_idx++;
156                 vq->vq_descx[used_idx].cookie = NULL;
157         }
158
159         vq_ring_free_inorder(vq, used_idx, i);
160         return i;
161 }
162
163 #ifndef DEFAULT_TX_FREE_THRESH
164 #define DEFAULT_TX_FREE_THRESH 32
165 #endif
166
167 /* Cleanup from completed transmits. */
168 static void
169 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
170 {
171         uint16_t i, used_idx, desc_idx;
172         for (i = 0; i < num; i++) {
173                 struct vring_used_elem *uep;
174                 struct vq_desc_extra *dxp;
175
176                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
177                 uep = &vq->vq_ring.used->ring[used_idx];
178
179                 desc_idx = (uint16_t) uep->id;
180                 dxp = &vq->vq_descx[desc_idx];
181                 vq->vq_used_cons_idx++;
182                 vq_ring_free_chain(vq, desc_idx);
183
184                 if (dxp->cookie != NULL) {
185                         rte_pktmbuf_free(dxp->cookie);
186                         dxp->cookie = NULL;
187                 }
188         }
189 }
190
191 /* Cleanup from completed inorder transmits. */
192 static void
193 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
194 {
195         uint16_t i, used_idx, desc_idx = 0, last_idx;
196         int16_t free_cnt = 0;
197         struct vq_desc_extra *dxp = NULL;
198
199         if (unlikely(num == 0))
200                 return;
201
202         for (i = 0; i < num; i++) {
203                 struct vring_used_elem *uep;
204
205                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
206                 uep = &vq->vq_ring.used->ring[used_idx];
207                 desc_idx = (uint16_t)uep->id;
208
209                 dxp = &vq->vq_descx[desc_idx];
210                 vq->vq_used_cons_idx++;
211
212                 if (dxp->cookie != NULL) {
213                         rte_pktmbuf_free(dxp->cookie);
214                         dxp->cookie = NULL;
215                 }
216         }
217
218         last_idx = desc_idx + dxp->ndescs - 1;
219         free_cnt = last_idx - vq->vq_desc_tail_idx;
220         if (free_cnt <= 0)
221                 free_cnt += vq->vq_nentries;
222
223         vq_ring_free_inorder(vq, last_idx, free_cnt);
224 }
225
226 static inline int
227 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
228                         struct rte_mbuf **cookies,
229                         uint16_t num)
230 {
231         struct vq_desc_extra *dxp;
232         struct virtio_hw *hw = vq->hw;
233         struct vring_desc *start_dp;
234         uint16_t head_idx, idx, i = 0;
235
236         if (unlikely(vq->vq_free_cnt == 0))
237                 return -ENOSPC;
238         if (unlikely(vq->vq_free_cnt < num))
239                 return -EMSGSIZE;
240
241         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
242         start_dp = vq->vq_ring.desc;
243
244         while (i < num) {
245                 idx = head_idx & (vq->vq_nentries - 1);
246                 dxp = &vq->vq_descx[idx];
247                 dxp->cookie = (void *)cookies[i];
248                 dxp->ndescs = 1;
249
250                 start_dp[idx].addr =
251                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
252                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
253                 start_dp[idx].len =
254                                 cookies[i]->buf_len -
255                                 RTE_PKTMBUF_HEADROOM +
256                                 hw->vtnet_hdr_size;
257                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
258
259                 vq_update_avail_ring(vq, idx);
260                 head_idx++;
261                 i++;
262         }
263
264         vq->vq_desc_head_idx += num;
265         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
266         return 0;
267 }
268
269 static inline int
270 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
271 {
272         struct vq_desc_extra *dxp;
273         struct virtio_hw *hw = vq->hw;
274         struct vring_desc *start_dp;
275         uint16_t needed = 1;
276         uint16_t head_idx, idx;
277
278         if (unlikely(vq->vq_free_cnt == 0))
279                 return -ENOSPC;
280         if (unlikely(vq->vq_free_cnt < needed))
281                 return -EMSGSIZE;
282
283         head_idx = vq->vq_desc_head_idx;
284         if (unlikely(head_idx >= vq->vq_nentries))
285                 return -EFAULT;
286
287         idx = head_idx;
288         dxp = &vq->vq_descx[idx];
289         dxp->cookie = (void *)cookie;
290         dxp->ndescs = needed;
291
292         start_dp = vq->vq_ring.desc;
293         start_dp[idx].addr =
294                 VIRTIO_MBUF_ADDR(cookie, vq) +
295                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
296         start_dp[idx].len =
297                 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
298         start_dp[idx].flags =  VRING_DESC_F_WRITE;
299         idx = start_dp[idx].next;
300         vq->vq_desc_head_idx = idx;
301         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
302                 vq->vq_desc_tail_idx = idx;
303         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
304         vq_update_avail_ring(vq, head_idx);
305
306         return 0;
307 }
308
309 /* When doing TSO, the IP length is not included in the pseudo header
310  * checksum of the packet given to the PMD, but for virtio it is
311  * expected.
312  */
313 static void
314 virtio_tso_fix_cksum(struct rte_mbuf *m)
315 {
316         /* common case: header is not fragmented */
317         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
318                         m->l4_len)) {
319                 struct ipv4_hdr *iph;
320                 struct ipv6_hdr *ip6h;
321                 struct tcp_hdr *th;
322                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
323                 uint32_t tmp;
324
325                 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
326                 th = RTE_PTR_ADD(iph, m->l3_len);
327                 if ((iph->version_ihl >> 4) == 4) {
328                         iph->hdr_checksum = 0;
329                         iph->hdr_checksum = rte_ipv4_cksum(iph);
330                         ip_len = iph->total_length;
331                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
332                                 m->l3_len);
333                 } else {
334                         ip6h = (struct ipv6_hdr *)iph;
335                         ip_paylen = ip6h->payload_len;
336                 }
337
338                 /* calculate the new phdr checksum not including ip_paylen */
339                 prev_cksum = th->cksum;
340                 tmp = prev_cksum;
341                 tmp += ip_paylen;
342                 tmp = (tmp & 0xffff) + (tmp >> 16);
343                 new_cksum = tmp;
344
345                 /* replace it in the packet */
346                 th->cksum = new_cksum;
347         }
348 }
349
350 static inline int
351 tx_offload_enabled(struct virtio_hw *hw)
352 {
353         return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) ||
354                 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
355                 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
356 }
357
358 /* avoid write operation when necessary, to lessen cache issues */
359 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
360         if ((var) != (val))                     \
361                 (var) = (val);                  \
362 } while (0)
363
364 static inline void
365 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
366                         struct rte_mbuf *cookie,
367                         int offload)
368 {
369         if (offload) {
370                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
371                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
372
373                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
374                 case PKT_TX_UDP_CKSUM:
375                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
376                         hdr->csum_offset = offsetof(struct udp_hdr,
377                                 dgram_cksum);
378                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
379                         break;
380
381                 case PKT_TX_TCP_CKSUM:
382                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
383                         hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
384                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
385                         break;
386
387                 default:
388                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
389                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
390                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
391                         break;
392                 }
393
394                 /* TCP Segmentation Offload */
395                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
396                         virtio_tso_fix_cksum(cookie);
397                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
398                                 VIRTIO_NET_HDR_GSO_TCPV6 :
399                                 VIRTIO_NET_HDR_GSO_TCPV4;
400                         hdr->gso_size = cookie->tso_segsz;
401                         hdr->hdr_len =
402                                 cookie->l2_len +
403                                 cookie->l3_len +
404                                 cookie->l4_len;
405                 } else {
406                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
407                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
408                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
409                 }
410         }
411 }
412
413 static inline void
414 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
415                         struct rte_mbuf **cookies,
416                         uint16_t num)
417 {
418         struct vq_desc_extra *dxp;
419         struct virtqueue *vq = txvq->vq;
420         struct vring_desc *start_dp;
421         struct virtio_net_hdr *hdr;
422         uint16_t idx;
423         uint16_t head_size = vq->hw->vtnet_hdr_size;
424         int offload;
425         uint16_t i = 0;
426
427         idx = vq->vq_desc_head_idx;
428         start_dp = vq->vq_ring.desc;
429
430         offload = tx_offload_enabled(vq->hw);
431
432         while (i < num) {
433                 idx = idx & (vq->vq_nentries - 1);
434                 dxp = &vq->vq_descx[idx];
435                 dxp->cookie = (void *)cookies[i];
436                 dxp->ndescs = 1;
437
438                 hdr = (struct virtio_net_hdr *)
439                         rte_pktmbuf_prepend(cookies[i], head_size);
440                 cookies[i]->pkt_len -= head_size;
441
442                 /* if offload disabled, it is not zeroed below, do it now */
443                 if (offload == 0) {
444                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
445                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
446                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
447                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
448                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
449                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
450                 }
451
452                 virtqueue_xmit_offload(hdr, cookies[i], offload);
453
454                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
455                 start_dp[idx].len   = cookies[i]->data_len;
456                 start_dp[idx].flags = 0;
457
458                 vq_update_avail_ring(vq, idx);
459
460                 idx++;
461                 i++;
462         };
463
464         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
465         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
466 }
467
468 static inline void
469 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
470                         uint16_t needed, int use_indirect, int can_push,
471                         int in_order)
472 {
473         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
474         struct vq_desc_extra *dxp;
475         struct virtqueue *vq = txvq->vq;
476         struct vring_desc *start_dp;
477         uint16_t seg_num = cookie->nb_segs;
478         uint16_t head_idx, idx;
479         uint16_t head_size = vq->hw->vtnet_hdr_size;
480         struct virtio_net_hdr *hdr;
481         int offload;
482
483         offload = tx_offload_enabled(vq->hw);
484
485         head_idx = vq->vq_desc_head_idx;
486         idx = head_idx;
487         dxp = &vq->vq_descx[idx];
488         dxp->cookie = (void *)cookie;
489         dxp->ndescs = needed;
490
491         start_dp = vq->vq_ring.desc;
492
493         if (can_push) {
494                 /* prepend cannot fail, checked by caller */
495                 hdr = (struct virtio_net_hdr *)
496                         rte_pktmbuf_prepend(cookie, head_size);
497                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
498                  * which is wrong. Below subtract restores correct pkt size.
499                  */
500                 cookie->pkt_len -= head_size;
501
502                 /* if offload disabled, it is not zeroed below, do it now */
503                 if (offload == 0) {
504                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
505                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
506                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
507                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
508                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
509                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
510                 }
511         } else if (use_indirect) {
512                 /* setup tx ring slot to point to indirect
513                  * descriptor list stored in reserved region.
514                  *
515                  * the first slot in indirect ring is already preset
516                  * to point to the header in reserved region
517                  */
518                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
519                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
520                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
521                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
522                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
523
524                 /* loop below will fill in rest of the indirect elements */
525                 start_dp = txr[idx].tx_indir;
526                 idx = 1;
527         } else {
528                 /* setup first tx ring slot to point to header
529                  * stored in reserved region.
530                  */
531                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
532                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
533                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
534                 start_dp[idx].flags = VRING_DESC_F_NEXT;
535                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
536
537                 idx = start_dp[idx].next;
538         }
539
540         virtqueue_xmit_offload(hdr, cookie, offload);
541
542         do {
543                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
544                 start_dp[idx].len   = cookie->data_len;
545                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
546                 idx = start_dp[idx].next;
547         } while ((cookie = cookie->next) != NULL);
548
549         if (use_indirect)
550                 idx = vq->vq_ring.desc[head_idx].next;
551
552         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
553
554         vq->vq_desc_head_idx = idx;
555         vq_update_avail_ring(vq, head_idx);
556
557         if (!in_order) {
558                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
559                         vq->vq_desc_tail_idx = idx;
560         }
561 }
562
563 void
564 virtio_dev_cq_start(struct rte_eth_dev *dev)
565 {
566         struct virtio_hw *hw = dev->data->dev_private;
567
568         if (hw->cvq && hw->cvq->vq) {
569                 rte_spinlock_init(&hw->cvq->lock);
570                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
571         }
572 }
573
574 int
575 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
576                         uint16_t queue_idx,
577                         uint16_t nb_desc,
578                         unsigned int socket_id __rte_unused,
579                         const struct rte_eth_rxconf *rx_conf __rte_unused,
580                         struct rte_mempool *mp)
581 {
582         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
583         struct virtio_hw *hw = dev->data->dev_private;
584         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
585         struct virtnet_rx *rxvq;
586
587         PMD_INIT_FUNC_TRACE();
588
589         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
590                 nb_desc = vq->vq_nentries;
591         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
592
593         rxvq = &vq->rxq;
594         rxvq->queue_id = queue_idx;
595         rxvq->mpool = mp;
596         if (rxvq->mpool == NULL) {
597                 rte_exit(EXIT_FAILURE,
598                         "Cannot allocate mbufs for rx virtqueue");
599         }
600
601         dev->data->rx_queues[queue_idx] = rxvq;
602
603         return 0;
604 }
605
606 int
607 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
608 {
609         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
610         struct virtio_hw *hw = dev->data->dev_private;
611         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
612         struct virtnet_rx *rxvq = &vq->rxq;
613         struct rte_mbuf *m;
614         uint16_t desc_idx;
615         int error, nbufs, i;
616
617         PMD_INIT_FUNC_TRACE();
618
619         /* Allocate blank mbufs for the each rx descriptor */
620         nbufs = 0;
621
622         if (hw->use_simple_rx) {
623                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
624                      desc_idx++) {
625                         vq->vq_ring.avail->ring[desc_idx] = desc_idx;
626                         vq->vq_ring.desc[desc_idx].flags =
627                                 VRING_DESC_F_WRITE;
628                 }
629
630                 virtio_rxq_vec_setup(rxvq);
631         }
632
633         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
634         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
635              desc_idx++) {
636                 vq->sw_ring[vq->vq_nentries + desc_idx] =
637                         &rxvq->fake_mbuf;
638         }
639
640         if (hw->use_simple_rx) {
641                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
642                         virtio_rxq_rearm_vec(rxvq);
643                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
644                 }
645         } else if (hw->use_inorder_rx) {
646                 if ((!virtqueue_full(vq))) {
647                         uint16_t free_cnt = vq->vq_free_cnt;
648                         struct rte_mbuf *pkts[free_cnt];
649
650                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
651                                 free_cnt)) {
652                                 error = virtqueue_enqueue_refill_inorder(vq,
653                                                 pkts,
654                                                 free_cnt);
655                                 if (unlikely(error)) {
656                                         for (i = 0; i < free_cnt; i++)
657                                                 rte_pktmbuf_free(pkts[i]);
658                                 }
659                         }
660
661                         nbufs += free_cnt;
662                         vq_update_avail_idx(vq);
663                 }
664         } else {
665                 while (!virtqueue_full(vq)) {
666                         m = rte_mbuf_raw_alloc(rxvq->mpool);
667                         if (m == NULL)
668                                 break;
669
670                         /* Enqueue allocated buffers */
671                         error = virtqueue_enqueue_recv_refill(vq, m);
672                         if (error) {
673                                 rte_pktmbuf_free(m);
674                                 break;
675                         }
676                         nbufs++;
677                 }
678
679                 vq_update_avail_idx(vq);
680         }
681
682         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
683
684         VIRTQUEUE_DUMP(vq);
685
686         return 0;
687 }
688
689 /*
690  * struct rte_eth_dev *dev: Used to update dev
691  * uint16_t nb_desc: Defaults to values read from config space
692  * unsigned int socket_id: Used to allocate memzone
693  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
694  * uint16_t queue_idx: Just used as an index in dev txq list
695  */
696 int
697 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
698                         uint16_t queue_idx,
699                         uint16_t nb_desc,
700                         unsigned int socket_id __rte_unused,
701                         const struct rte_eth_txconf *tx_conf)
702 {
703         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
704         struct virtio_hw *hw = dev->data->dev_private;
705         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
706         struct virtnet_tx *txvq;
707         uint16_t tx_free_thresh;
708
709         PMD_INIT_FUNC_TRACE();
710
711         /* cannot use simple rxtx funcs with multisegs or offloads */
712         if (dev->data->dev_conf.txmode.offloads)
713                 hw->use_simple_tx = 0;
714
715         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
716                 nb_desc = vq->vq_nentries;
717         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
718
719         txvq = &vq->txq;
720         txvq->queue_id = queue_idx;
721
722         tx_free_thresh = tx_conf->tx_free_thresh;
723         if (tx_free_thresh == 0)
724                 tx_free_thresh =
725                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
726
727         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
728                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
729                         "number of TX entries minus 3 (%u)."
730                         " (tx_free_thresh=%u port=%u queue=%u)\n",
731                         vq->vq_nentries - 3,
732                         tx_free_thresh, dev->data->port_id, queue_idx);
733                 return -EINVAL;
734         }
735
736         vq->vq_free_thresh = tx_free_thresh;
737
738         dev->data->tx_queues[queue_idx] = txvq;
739         return 0;
740 }
741
742 int
743 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
744                                 uint16_t queue_idx)
745 {
746         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
747         struct virtio_hw *hw = dev->data->dev_private;
748         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
749         uint16_t mid_idx = vq->vq_nentries >> 1;
750         struct virtnet_tx *txvq = &vq->txq;
751         uint16_t desc_idx;
752
753         PMD_INIT_FUNC_TRACE();
754
755         if (hw->use_simple_tx) {
756                 for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) {
757                         vq->vq_ring.avail->ring[desc_idx] =
758                                 desc_idx + mid_idx;
759                         vq->vq_ring.desc[desc_idx + mid_idx].next =
760                                 desc_idx;
761                         vq->vq_ring.desc[desc_idx + mid_idx].addr =
762                                 txvq->virtio_net_hdr_mem +
763                                 offsetof(struct virtio_tx_region, tx_hdr);
764                         vq->vq_ring.desc[desc_idx + mid_idx].len =
765                                 vq->hw->vtnet_hdr_size;
766                         vq->vq_ring.desc[desc_idx + mid_idx].flags =
767                                 VRING_DESC_F_NEXT;
768                         vq->vq_ring.desc[desc_idx].flags = 0;
769                 }
770                 for (desc_idx = mid_idx; desc_idx < vq->vq_nentries;
771                      desc_idx++)
772                         vq->vq_ring.avail->ring[desc_idx] = desc_idx;
773         } else if (hw->use_inorder_tx) {
774                 vq->vq_ring.desc[vq->vq_nentries - 1].next = 0;
775         }
776
777         VIRTQUEUE_DUMP(vq);
778
779         return 0;
780 }
781
782 static void
783 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
784 {
785         int error;
786         /*
787          * Requeue the discarded mbuf. This should always be
788          * successful since it was just dequeued.
789          */
790         error = virtqueue_enqueue_recv_refill(vq, m);
791
792         if (unlikely(error)) {
793                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
794                 rte_pktmbuf_free(m);
795         }
796 }
797
798 static void
799 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
800 {
801         int error;
802
803         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
804         if (unlikely(error)) {
805                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
806                 rte_pktmbuf_free(m);
807         }
808 }
809
810 static void
811 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
812 {
813         uint32_t s = mbuf->pkt_len;
814         struct ether_addr *ea;
815
816         if (s == 64) {
817                 stats->size_bins[1]++;
818         } else if (s > 64 && s < 1024) {
819                 uint32_t bin;
820
821                 /* count zeros, and offset into correct bin */
822                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
823                 stats->size_bins[bin]++;
824         } else {
825                 if (s < 64)
826                         stats->size_bins[0]++;
827                 else if (s < 1519)
828                         stats->size_bins[6]++;
829                 else if (s >= 1519)
830                         stats->size_bins[7]++;
831         }
832
833         ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
834         if (is_multicast_ether_addr(ea)) {
835                 if (is_broadcast_ether_addr(ea))
836                         stats->broadcast++;
837                 else
838                         stats->multicast++;
839         }
840 }
841
842 static inline void
843 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
844 {
845         VIRTIO_DUMP_PACKET(m, m->data_len);
846
847         rxvq->stats.bytes += m->pkt_len;
848         virtio_update_packet_stats(&rxvq->stats, m);
849 }
850
851 /* Optionally fill offload information in structure */
852 static int
853 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
854 {
855         struct rte_net_hdr_lens hdr_lens;
856         uint32_t hdrlen, ptype;
857         int l4_supported = 0;
858
859         /* nothing to do */
860         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
861                 return 0;
862
863         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
864
865         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
866         m->packet_type = ptype;
867         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
868             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
869             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
870                 l4_supported = 1;
871
872         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
873                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
874                 if (hdr->csum_start <= hdrlen && l4_supported) {
875                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
876                 } else {
877                         /* Unknown proto or tunnel, do sw cksum. We can assume
878                          * the cksum field is in the first segment since the
879                          * buffers we provided to the host are large enough.
880                          * In case of SCTP, this will be wrong since it's a CRC
881                          * but there's nothing we can do.
882                          */
883                         uint16_t csum = 0, off;
884
885                         rte_raw_cksum_mbuf(m, hdr->csum_start,
886                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
887                                 &csum);
888                         if (likely(csum != 0xffff))
889                                 csum = ~csum;
890                         off = hdr->csum_offset + hdr->csum_start;
891                         if (rte_pktmbuf_data_len(m) >= off + 1)
892                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
893                                         off) = csum;
894                 }
895         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
896                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
897         }
898
899         /* GSO request, save required information in mbuf */
900         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
901                 /* Check unsupported modes */
902                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
903                     (hdr->gso_size == 0)) {
904                         return -EINVAL;
905                 }
906
907                 /* Update mss lengthes in mbuf */
908                 m->tso_segsz = hdr->gso_size;
909                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
910                         case VIRTIO_NET_HDR_GSO_TCPV4:
911                         case VIRTIO_NET_HDR_GSO_TCPV6:
912                                 m->ol_flags |= PKT_RX_LRO | \
913                                         PKT_RX_L4_CKSUM_NONE;
914                                 break;
915                         default:
916                                 return -EINVAL;
917                 }
918         }
919
920         return 0;
921 }
922
923 static inline int
924 rx_offload_enabled(struct virtio_hw *hw)
925 {
926         return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
927                 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
928                 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
929 }
930
931 #define VIRTIO_MBUF_BURST_SZ 64
932 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
933 uint16_t
934 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
935 {
936         struct virtnet_rx *rxvq = rx_queue;
937         struct virtqueue *vq = rxvq->vq;
938         struct virtio_hw *hw = vq->hw;
939         struct rte_mbuf *rxm, *new_mbuf;
940         uint16_t nb_used, num, nb_rx;
941         uint32_t len[VIRTIO_MBUF_BURST_SZ];
942         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
943         int error;
944         uint32_t i, nb_enqueued;
945         uint32_t hdr_size;
946         int offload;
947         struct virtio_net_hdr *hdr;
948
949         nb_rx = 0;
950         if (unlikely(hw->started == 0))
951                 return nb_rx;
952
953         nb_used = VIRTQUEUE_NUSED(vq);
954
955         virtio_rmb();
956
957         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
958         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
959                 num = VIRTIO_MBUF_BURST_SZ;
960         if (likely(num > DESC_PER_CACHELINE))
961                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
962
963         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
964         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
965
966         nb_enqueued = 0;
967         hdr_size = hw->vtnet_hdr_size;
968         offload = rx_offload_enabled(hw);
969
970         for (i = 0; i < num ; i++) {
971                 rxm = rcv_pkts[i];
972
973                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
974
975                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
976                         PMD_RX_LOG(ERR, "Packet drop");
977                         nb_enqueued++;
978                         virtio_discard_rxbuf(vq, rxm);
979                         rxvq->stats.errors++;
980                         continue;
981                 }
982
983                 rxm->port = rxvq->port_id;
984                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
985                 rxm->ol_flags = 0;
986                 rxm->vlan_tci = 0;
987
988                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
989                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
990
991                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
992                         RTE_PKTMBUF_HEADROOM - hdr_size);
993
994                 if (hw->vlan_strip)
995                         rte_vlan_strip(rxm);
996
997                 if (offload && virtio_rx_offload(rxm, hdr) < 0) {
998                         virtio_discard_rxbuf(vq, rxm);
999                         rxvq->stats.errors++;
1000                         continue;
1001                 }
1002
1003                 virtio_rx_stats_updated(rxvq, rxm);
1004
1005                 rx_pkts[nb_rx++] = rxm;
1006         }
1007
1008         rxvq->stats.packets += nb_rx;
1009
1010         /* Allocate new mbuf for the used descriptor */
1011         error = ENOSPC;
1012         while (likely(!virtqueue_full(vq))) {
1013                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1014                 if (unlikely(new_mbuf == NULL)) {
1015                         struct rte_eth_dev *dev
1016                                 = &rte_eth_devices[rxvq->port_id];
1017                         dev->data->rx_mbuf_alloc_failed++;
1018                         break;
1019                 }
1020                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
1021                 if (unlikely(error)) {
1022                         rte_pktmbuf_free(new_mbuf);
1023                         break;
1024                 }
1025                 nb_enqueued++;
1026         }
1027
1028         if (likely(nb_enqueued)) {
1029                 vq_update_avail_idx(vq);
1030
1031                 if (unlikely(virtqueue_kick_prepare(vq))) {
1032                         virtqueue_notify(vq);
1033                         PMD_RX_LOG(DEBUG, "Notified");
1034                 }
1035         }
1036
1037         return nb_rx;
1038 }
1039
1040 uint16_t
1041 virtio_recv_mergeable_pkts_inorder(void *rx_queue,
1042                         struct rte_mbuf **rx_pkts,
1043                         uint16_t nb_pkts)
1044 {
1045         struct virtnet_rx *rxvq = rx_queue;
1046         struct virtqueue *vq = rxvq->vq;
1047         struct virtio_hw *hw = vq->hw;
1048         struct rte_mbuf *rxm;
1049         struct rte_mbuf *prev;
1050         uint16_t nb_used, num, nb_rx;
1051         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1052         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1053         int error;
1054         uint32_t nb_enqueued;
1055         uint32_t seg_num;
1056         uint32_t seg_res;
1057         uint32_t hdr_size;
1058         int32_t i;
1059         int offload;
1060
1061         nb_rx = 0;
1062         if (unlikely(hw->started == 0))
1063                 return nb_rx;
1064
1065         nb_used = VIRTQUEUE_NUSED(vq);
1066         nb_used = RTE_MIN(nb_used, nb_pkts);
1067         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1068
1069         virtio_rmb();
1070
1071         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1072
1073         nb_enqueued = 0;
1074         seg_num = 1;
1075         seg_res = 0;
1076         hdr_size = hw->vtnet_hdr_size;
1077         offload = rx_offload_enabled(hw);
1078
1079         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1080
1081         for (i = 0; i < num; i++) {
1082                 struct virtio_net_hdr_mrg_rxbuf *header;
1083
1084                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1085                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1086
1087                 rxm = rcv_pkts[i];
1088
1089                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1090                         PMD_RX_LOG(ERR, "Packet drop");
1091                         nb_enqueued++;
1092                         virtio_discard_rxbuf_inorder(vq, rxm);
1093                         rxvq->stats.errors++;
1094                         continue;
1095                 }
1096
1097                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1098                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1099                          - hdr_size);
1100                 seg_num = header->num_buffers;
1101
1102                 if (seg_num == 0)
1103                         seg_num = 1;
1104
1105                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1106                 rxm->nb_segs = seg_num;
1107                 rxm->ol_flags = 0;
1108                 rxm->vlan_tci = 0;
1109                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1110                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1111
1112                 rxm->port = rxvq->port_id;
1113
1114                 rx_pkts[nb_rx] = rxm;
1115                 prev = rxm;
1116
1117                 if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) {
1118                         virtio_discard_rxbuf_inorder(vq, rxm);
1119                         rxvq->stats.errors++;
1120                         continue;
1121                 }
1122
1123                 if (hw->vlan_strip)
1124                         rte_vlan_strip(rx_pkts[nb_rx]);
1125
1126                 seg_res = seg_num - 1;
1127
1128                 /* Merge remaining segments */
1129                 while (seg_res != 0 && i < (num - 1)) {
1130                         i++;
1131
1132                         rxm = rcv_pkts[i];
1133                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1134                         rxm->pkt_len = (uint32_t)(len[i]);
1135                         rxm->data_len = (uint16_t)(len[i]);
1136
1137                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1138                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1139
1140                         if (prev)
1141                                 prev->next = rxm;
1142
1143                         prev = rxm;
1144                         seg_res -= 1;
1145                 }
1146
1147                 if (!seg_res) {
1148                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1149                         nb_rx++;
1150                 }
1151         }
1152
1153         /* Last packet still need merge segments */
1154         while (seg_res != 0) {
1155                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1156                                         VIRTIO_MBUF_BURST_SZ);
1157
1158                 prev = rcv_pkts[nb_rx];
1159                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1160                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1161                                                            rcv_cnt);
1162                         uint16_t extra_idx = 0;
1163
1164                         rcv_cnt = num;
1165                         while (extra_idx < rcv_cnt) {
1166                                 rxm = rcv_pkts[extra_idx];
1167                                 rxm->data_off =
1168                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1169                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1170                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1171                                 prev->next = rxm;
1172                                 prev = rxm;
1173                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1174                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1175                                 extra_idx += 1;
1176                         };
1177                         seg_res -= rcv_cnt;
1178
1179                         if (!seg_res) {
1180                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1181                                 nb_rx++;
1182                         }
1183                 } else {
1184                         PMD_RX_LOG(ERR,
1185                                         "No enough segments for packet.");
1186                         virtio_discard_rxbuf_inorder(vq, prev);
1187                         rxvq->stats.errors++;
1188                         break;
1189                 }
1190         }
1191
1192         rxvq->stats.packets += nb_rx;
1193
1194         /* Allocate new mbuf for the used descriptor */
1195
1196         if (likely(!virtqueue_full(vq))) {
1197                 /* free_cnt may include mrg descs */
1198                 uint16_t free_cnt = vq->vq_free_cnt;
1199                 struct rte_mbuf *new_pkts[free_cnt];
1200
1201                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1202                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1203                                         free_cnt);
1204                         if (unlikely(error)) {
1205                                 for (i = 0; i < free_cnt; i++)
1206                                         rte_pktmbuf_free(new_pkts[i]);
1207                         }
1208                         nb_enqueued += free_cnt;
1209                 } else {
1210                         struct rte_eth_dev *dev =
1211                                 &rte_eth_devices[rxvq->port_id];
1212                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1213                 }
1214         }
1215
1216         if (likely(nb_enqueued)) {
1217                 vq_update_avail_idx(vq);
1218
1219                 if (unlikely(virtqueue_kick_prepare(vq))) {
1220                         virtqueue_notify(vq);
1221                         PMD_RX_LOG(DEBUG, "Notified");
1222                 }
1223         }
1224
1225         return nb_rx;
1226 }
1227
1228 uint16_t
1229 virtio_recv_mergeable_pkts(void *rx_queue,
1230                         struct rte_mbuf **rx_pkts,
1231                         uint16_t nb_pkts)
1232 {
1233         struct virtnet_rx *rxvq = rx_queue;
1234         struct virtqueue *vq = rxvq->vq;
1235         struct virtio_hw *hw = vq->hw;
1236         struct rte_mbuf *rxm, *new_mbuf;
1237         uint16_t nb_used, num, nb_rx;
1238         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1239         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1240         struct rte_mbuf *prev;
1241         int error;
1242         uint32_t i, nb_enqueued;
1243         uint32_t seg_num;
1244         uint16_t extra_idx;
1245         uint32_t seg_res;
1246         uint32_t hdr_size;
1247         int offload;
1248
1249         nb_rx = 0;
1250         if (unlikely(hw->started == 0))
1251                 return nb_rx;
1252
1253         nb_used = VIRTQUEUE_NUSED(vq);
1254
1255         virtio_rmb();
1256
1257         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1258
1259         i = 0;
1260         nb_enqueued = 0;
1261         seg_num = 0;
1262         extra_idx = 0;
1263         seg_res = 0;
1264         hdr_size = hw->vtnet_hdr_size;
1265         offload = rx_offload_enabled(hw);
1266
1267         while (i < nb_used) {
1268                 struct virtio_net_hdr_mrg_rxbuf *header;
1269
1270                 if (nb_rx == nb_pkts)
1271                         break;
1272
1273                 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1);
1274                 if (num != 1)
1275                         continue;
1276
1277                 i++;
1278
1279                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1280                 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]);
1281
1282                 rxm = rcv_pkts[0];
1283
1284                 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
1285                         PMD_RX_LOG(ERR, "Packet drop");
1286                         nb_enqueued++;
1287                         virtio_discard_rxbuf(vq, rxm);
1288                         rxvq->stats.errors++;
1289                         continue;
1290                 }
1291
1292                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
1293                         RTE_PKTMBUF_HEADROOM - hdr_size);
1294                 seg_num = header->num_buffers;
1295
1296                 if (seg_num == 0)
1297                         seg_num = 1;
1298
1299                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1300                 rxm->nb_segs = seg_num;
1301                 rxm->ol_flags = 0;
1302                 rxm->vlan_tci = 0;
1303                 rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
1304                 rxm->data_len = (uint16_t)(len[0] - hdr_size);
1305
1306                 rxm->port = rxvq->port_id;
1307                 rx_pkts[nb_rx] = rxm;
1308                 prev = rxm;
1309
1310                 if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) {
1311                         virtio_discard_rxbuf(vq, rxm);
1312                         rxvq->stats.errors++;
1313                         continue;
1314                 }
1315
1316                 seg_res = seg_num - 1;
1317
1318                 while (seg_res != 0) {
1319                         /*
1320                          * Get extra segments for current uncompleted packet.
1321                          */
1322                         uint16_t  rcv_cnt =
1323                                 RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
1324                         if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1325                                 uint32_t rx_num =
1326                                         virtqueue_dequeue_burst_rx(vq,
1327                                         rcv_pkts, len, rcv_cnt);
1328                                 i += rx_num;
1329                                 rcv_cnt = rx_num;
1330                         } else {
1331                                 PMD_RX_LOG(ERR,
1332                                            "No enough segments for packet.");
1333                                 nb_enqueued++;
1334                                 virtio_discard_rxbuf(vq, rxm);
1335                                 rxvq->stats.errors++;
1336                                 break;
1337                         }
1338
1339                         extra_idx = 0;
1340
1341                         while (extra_idx < rcv_cnt) {
1342                                 rxm = rcv_pkts[extra_idx];
1343
1344                                 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1345                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1346                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1347
1348                                 if (prev)
1349                                         prev->next = rxm;
1350
1351                                 prev = rxm;
1352                                 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
1353                                 extra_idx++;
1354                         };
1355                         seg_res -= rcv_cnt;
1356                 }
1357
1358                 if (hw->vlan_strip)
1359                         rte_vlan_strip(rx_pkts[nb_rx]);
1360
1361                 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
1362                         rx_pkts[nb_rx]->data_len);
1363
1364                 rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len;
1365                 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]);
1366                 nb_rx++;
1367         }
1368
1369         rxvq->stats.packets += nb_rx;
1370
1371         /* Allocate new mbuf for the used descriptor */
1372         error = ENOSPC;
1373         while (likely(!virtqueue_full(vq))) {
1374                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1375                 if (unlikely(new_mbuf == NULL)) {
1376                         struct rte_eth_dev *dev
1377                                 = &rte_eth_devices[rxvq->port_id];
1378                         dev->data->rx_mbuf_alloc_failed++;
1379                         break;
1380                 }
1381                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
1382                 if (unlikely(error)) {
1383                         rte_pktmbuf_free(new_mbuf);
1384                         break;
1385                 }
1386                 nb_enqueued++;
1387         }
1388
1389         if (likely(nb_enqueued)) {
1390                 vq_update_avail_idx(vq);
1391
1392                 if (unlikely(virtqueue_kick_prepare(vq))) {
1393                         virtqueue_notify(vq);
1394                         PMD_RX_LOG(DEBUG, "Notified");
1395                 }
1396         }
1397
1398         return nb_rx;
1399 }
1400
1401 uint16_t
1402 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1403 {
1404         struct virtnet_tx *txvq = tx_queue;
1405         struct virtqueue *vq = txvq->vq;
1406         struct virtio_hw *hw = vq->hw;
1407         uint16_t hdr_size = hw->vtnet_hdr_size;
1408         uint16_t nb_used, nb_tx = 0;
1409         int error;
1410
1411         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1412                 return nb_tx;
1413
1414         if (unlikely(nb_pkts < 1))
1415                 return nb_pkts;
1416
1417         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1418         nb_used = VIRTQUEUE_NUSED(vq);
1419
1420         virtio_rmb();
1421         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1422                 virtio_xmit_cleanup(vq, nb_used);
1423
1424         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1425                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1426                 int can_push = 0, use_indirect = 0, slots, need;
1427
1428                 /* Do VLAN tag insertion */
1429                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1430                         error = rte_vlan_insert(&txm);
1431                         if (unlikely(error)) {
1432                                 rte_pktmbuf_free(txm);
1433                                 continue;
1434                         }
1435                 }
1436
1437                 /* optimize ring usage */
1438                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1439                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1440                     rte_mbuf_refcnt_read(txm) == 1 &&
1441                     RTE_MBUF_DIRECT(txm) &&
1442                     txm->nb_segs == 1 &&
1443                     rte_pktmbuf_headroom(txm) >= hdr_size &&
1444                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1445                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1446                         can_push = 1;
1447                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
1448                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
1449                         use_indirect = 1;
1450
1451                 /* How many main ring entries are needed to this Tx?
1452                  * any_layout => number of segments
1453                  * indirect   => 1
1454                  * default    => number of segments + 1
1455                  */
1456                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
1457                 need = slots - vq->vq_free_cnt;
1458
1459                 /* Positive value indicates it need free vring descriptors */
1460                 if (unlikely(need > 0)) {
1461                         nb_used = VIRTQUEUE_NUSED(vq);
1462                         virtio_rmb();
1463                         need = RTE_MIN(need, (int)nb_used);
1464
1465                         virtio_xmit_cleanup(vq, need);
1466                         need = slots - vq->vq_free_cnt;
1467                         if (unlikely(need > 0)) {
1468                                 PMD_TX_LOG(ERR,
1469                                            "No free tx descriptors to transmit");
1470                                 break;
1471                         }
1472                 }
1473
1474                 /* Enqueue Packet buffers */
1475                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
1476                         can_push, 0);
1477
1478                 txvq->stats.bytes += txm->pkt_len;
1479                 virtio_update_packet_stats(&txvq->stats, txm);
1480         }
1481
1482         txvq->stats.packets += nb_tx;
1483
1484         if (likely(nb_tx)) {
1485                 vq_update_avail_idx(vq);
1486
1487                 if (unlikely(virtqueue_kick_prepare(vq))) {
1488                         virtqueue_notify(vq);
1489                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1490                 }
1491         }
1492
1493         return nb_tx;
1494 }
1495
1496 uint16_t
1497 virtio_xmit_pkts_inorder(void *tx_queue,
1498                         struct rte_mbuf **tx_pkts,
1499                         uint16_t nb_pkts)
1500 {
1501         struct virtnet_tx *txvq = tx_queue;
1502         struct virtqueue *vq = txvq->vq;
1503         struct virtio_hw *hw = vq->hw;
1504         uint16_t hdr_size = hw->vtnet_hdr_size;
1505         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
1506         struct rte_mbuf *inorder_pkts[nb_pkts];
1507         int error;
1508
1509         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1510                 return nb_tx;
1511
1512         if (unlikely(nb_pkts < 1))
1513                 return nb_pkts;
1514
1515         VIRTQUEUE_DUMP(vq);
1516         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1517         nb_used = VIRTQUEUE_NUSED(vq);
1518
1519         virtio_rmb();
1520         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1521                 virtio_xmit_cleanup_inorder(vq, nb_used);
1522
1523         if (unlikely(!vq->vq_free_cnt))
1524                 virtio_xmit_cleanup_inorder(vq, nb_used);
1525
1526         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
1527
1528         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
1529                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1530                 int slots, need;
1531
1532                 /* Do VLAN tag insertion */
1533                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1534                         error = rte_vlan_insert(&txm);
1535                         if (unlikely(error)) {
1536                                 rte_pktmbuf_free(txm);
1537                                 continue;
1538                         }
1539                 }
1540
1541                 /* optimize ring usage */
1542                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1543                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1544                      rte_mbuf_refcnt_read(txm) == 1 &&
1545                      RTE_MBUF_DIRECT(txm) &&
1546                      txm->nb_segs == 1 &&
1547                      rte_pktmbuf_headroom(txm) >= hdr_size &&
1548                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1549                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
1550                         inorder_pkts[nb_inorder_pkts] = txm;
1551                         nb_inorder_pkts++;
1552
1553                         txvq->stats.bytes += txm->pkt_len;
1554                         virtio_update_packet_stats(&txvq->stats, txm);
1555                         continue;
1556                 }
1557
1558                 if (nb_inorder_pkts) {
1559                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
1560                                                         nb_inorder_pkts);
1561                         nb_inorder_pkts = 0;
1562                 }
1563
1564                 slots = txm->nb_segs + 1;
1565                 need = slots - vq->vq_free_cnt;
1566                 if (unlikely(need > 0)) {
1567                         nb_used = VIRTQUEUE_NUSED(vq);
1568                         virtio_rmb();
1569                         need = RTE_MIN(need, (int)nb_used);
1570
1571                         virtio_xmit_cleanup_inorder(vq, need);
1572
1573                         need = slots - vq->vq_free_cnt;
1574
1575                         if (unlikely(need > 0)) {
1576                                 PMD_TX_LOG(ERR,
1577                                         "No free tx descriptors to transmit");
1578                                 break;
1579                         }
1580                 }
1581                 /* Enqueue Packet buffers */
1582                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
1583
1584                 txvq->stats.bytes += txm->pkt_len;
1585                 virtio_update_packet_stats(&txvq->stats, txm);
1586         }
1587
1588         /* Transmit all inorder packets */
1589         if (nb_inorder_pkts)
1590                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
1591                                                 nb_inorder_pkts);
1592
1593         txvq->stats.packets += nb_tx;
1594
1595         if (likely(nb_tx)) {
1596                 vq_update_avail_idx(vq);
1597
1598                 if (unlikely(virtqueue_kick_prepare(vq))) {
1599                         virtqueue_notify(vq);
1600                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1601                 }
1602         }
1603
1604         VIRTQUEUE_DUMP(vq);
1605
1606         return nb_tx;
1607 }