net/virtio: remove simple Tx path
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34
35 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
36 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
37 #else
38 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
39 #endif
40
41 int
42 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
43 {
44         struct virtnet_rx *rxvq = rxq;
45         struct virtqueue *vq = rxvq->vq;
46
47         return VIRTQUEUE_NUSED(vq) >= offset;
48 }
49
50 void
51 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
52 {
53         vq->vq_free_cnt += num;
54         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
55 }
56
57 void
58 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
59 {
60         struct vring_desc *dp, *dp_tail;
61         struct vq_desc_extra *dxp;
62         uint16_t desc_idx_last = desc_idx;
63
64         dp  = &vq->vq_ring.desc[desc_idx];
65         dxp = &vq->vq_descx[desc_idx];
66         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
67         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
68                 while (dp->flags & VRING_DESC_F_NEXT) {
69                         desc_idx_last = dp->next;
70                         dp = &vq->vq_ring.desc[dp->next];
71                 }
72         }
73         dxp->ndescs = 0;
74
75         /*
76          * We must append the existing free chain, if any, to the end of
77          * newly freed chain. If the virtqueue was completely used, then
78          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
79          */
80         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
81                 vq->vq_desc_head_idx = desc_idx;
82         } else {
83                 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
84                 dp_tail->next = desc_idx;
85         }
86
87         vq->vq_desc_tail_idx = desc_idx_last;
88         dp->next = VQ_RING_DESC_CHAIN_END;
89 }
90
91 static uint16_t
92 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
93                            uint32_t *len, uint16_t num)
94 {
95         struct vring_used_elem *uep;
96         struct rte_mbuf *cookie;
97         uint16_t used_idx, desc_idx;
98         uint16_t i;
99
100         /*  Caller does the check */
101         for (i = 0; i < num ; i++) {
102                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
103                 uep = &vq->vq_ring.used->ring[used_idx];
104                 desc_idx = (uint16_t) uep->id;
105                 len[i] = uep->len;
106                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
107
108                 if (unlikely(cookie == NULL)) {
109                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
110                                 vq->vq_used_cons_idx);
111                         break;
112                 }
113
114                 rte_prefetch0(cookie);
115                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
116                 rx_pkts[i]  = cookie;
117                 vq->vq_used_cons_idx++;
118                 vq_ring_free_chain(vq, desc_idx);
119                 vq->vq_descx[desc_idx].cookie = NULL;
120         }
121
122         return i;
123 }
124
125 static uint16_t
126 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
127                         struct rte_mbuf **rx_pkts,
128                         uint32_t *len,
129                         uint16_t num)
130 {
131         struct vring_used_elem *uep;
132         struct rte_mbuf *cookie;
133         uint16_t used_idx = 0;
134         uint16_t i;
135
136         if (unlikely(num == 0))
137                 return 0;
138
139         for (i = 0; i < num; i++) {
140                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
141                 /* Desc idx same as used idx */
142                 uep = &vq->vq_ring.used->ring[used_idx];
143                 len[i] = uep->len;
144                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
145
146                 if (unlikely(cookie == NULL)) {
147                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
148                                 vq->vq_used_cons_idx);
149                         break;
150                 }
151
152                 rte_prefetch0(cookie);
153                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
154                 rx_pkts[i]  = cookie;
155                 vq->vq_used_cons_idx++;
156                 vq->vq_descx[used_idx].cookie = NULL;
157         }
158
159         vq_ring_free_inorder(vq, used_idx, i);
160         return i;
161 }
162
163 #ifndef DEFAULT_TX_FREE_THRESH
164 #define DEFAULT_TX_FREE_THRESH 32
165 #endif
166
167 /* Cleanup from completed transmits. */
168 static void
169 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
170 {
171         uint16_t i, used_idx, desc_idx;
172         for (i = 0; i < num; i++) {
173                 struct vring_used_elem *uep;
174                 struct vq_desc_extra *dxp;
175
176                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
177                 uep = &vq->vq_ring.used->ring[used_idx];
178
179                 desc_idx = (uint16_t) uep->id;
180                 dxp = &vq->vq_descx[desc_idx];
181                 vq->vq_used_cons_idx++;
182                 vq_ring_free_chain(vq, desc_idx);
183
184                 if (dxp->cookie != NULL) {
185                         rte_pktmbuf_free(dxp->cookie);
186                         dxp->cookie = NULL;
187                 }
188         }
189 }
190
191 /* Cleanup from completed inorder transmits. */
192 static void
193 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
194 {
195         uint16_t i, used_idx, desc_idx = 0, last_idx;
196         int16_t free_cnt = 0;
197         struct vq_desc_extra *dxp = NULL;
198
199         if (unlikely(num == 0))
200                 return;
201
202         for (i = 0; i < num; i++) {
203                 struct vring_used_elem *uep;
204
205                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
206                 uep = &vq->vq_ring.used->ring[used_idx];
207                 desc_idx = (uint16_t)uep->id;
208
209                 dxp = &vq->vq_descx[desc_idx];
210                 vq->vq_used_cons_idx++;
211
212                 if (dxp->cookie != NULL) {
213                         rte_pktmbuf_free(dxp->cookie);
214                         dxp->cookie = NULL;
215                 }
216         }
217
218         last_idx = desc_idx + dxp->ndescs - 1;
219         free_cnt = last_idx - vq->vq_desc_tail_idx;
220         if (free_cnt <= 0)
221                 free_cnt += vq->vq_nentries;
222
223         vq_ring_free_inorder(vq, last_idx, free_cnt);
224 }
225
226 static inline int
227 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
228                         struct rte_mbuf **cookies,
229                         uint16_t num)
230 {
231         struct vq_desc_extra *dxp;
232         struct virtio_hw *hw = vq->hw;
233         struct vring_desc *start_dp;
234         uint16_t head_idx, idx, i = 0;
235
236         if (unlikely(vq->vq_free_cnt == 0))
237                 return -ENOSPC;
238         if (unlikely(vq->vq_free_cnt < num))
239                 return -EMSGSIZE;
240
241         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
242         start_dp = vq->vq_ring.desc;
243
244         while (i < num) {
245                 idx = head_idx & (vq->vq_nentries - 1);
246                 dxp = &vq->vq_descx[idx];
247                 dxp->cookie = (void *)cookies[i];
248                 dxp->ndescs = 1;
249
250                 start_dp[idx].addr =
251                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
252                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
253                 start_dp[idx].len =
254                                 cookies[i]->buf_len -
255                                 RTE_PKTMBUF_HEADROOM +
256                                 hw->vtnet_hdr_size;
257                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
258
259                 vq_update_avail_ring(vq, idx);
260                 head_idx++;
261                 i++;
262         }
263
264         vq->vq_desc_head_idx += num;
265         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
266         return 0;
267 }
268
269 static inline int
270 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
271 {
272         struct vq_desc_extra *dxp;
273         struct virtio_hw *hw = vq->hw;
274         struct vring_desc *start_dp;
275         uint16_t needed = 1;
276         uint16_t head_idx, idx;
277
278         if (unlikely(vq->vq_free_cnt == 0))
279                 return -ENOSPC;
280         if (unlikely(vq->vq_free_cnt < needed))
281                 return -EMSGSIZE;
282
283         head_idx = vq->vq_desc_head_idx;
284         if (unlikely(head_idx >= vq->vq_nentries))
285                 return -EFAULT;
286
287         idx = head_idx;
288         dxp = &vq->vq_descx[idx];
289         dxp->cookie = (void *)cookie;
290         dxp->ndescs = needed;
291
292         start_dp = vq->vq_ring.desc;
293         start_dp[idx].addr =
294                 VIRTIO_MBUF_ADDR(cookie, vq) +
295                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
296         start_dp[idx].len =
297                 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
298         start_dp[idx].flags =  VRING_DESC_F_WRITE;
299         idx = start_dp[idx].next;
300         vq->vq_desc_head_idx = idx;
301         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
302                 vq->vq_desc_tail_idx = idx;
303         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
304         vq_update_avail_ring(vq, head_idx);
305
306         return 0;
307 }
308
309 /* When doing TSO, the IP length is not included in the pseudo header
310  * checksum of the packet given to the PMD, but for virtio it is
311  * expected.
312  */
313 static void
314 virtio_tso_fix_cksum(struct rte_mbuf *m)
315 {
316         /* common case: header is not fragmented */
317         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
318                         m->l4_len)) {
319                 struct ipv4_hdr *iph;
320                 struct ipv6_hdr *ip6h;
321                 struct tcp_hdr *th;
322                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
323                 uint32_t tmp;
324
325                 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
326                 th = RTE_PTR_ADD(iph, m->l3_len);
327                 if ((iph->version_ihl >> 4) == 4) {
328                         iph->hdr_checksum = 0;
329                         iph->hdr_checksum = rte_ipv4_cksum(iph);
330                         ip_len = iph->total_length;
331                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
332                                 m->l3_len);
333                 } else {
334                         ip6h = (struct ipv6_hdr *)iph;
335                         ip_paylen = ip6h->payload_len;
336                 }
337
338                 /* calculate the new phdr checksum not including ip_paylen */
339                 prev_cksum = th->cksum;
340                 tmp = prev_cksum;
341                 tmp += ip_paylen;
342                 tmp = (tmp & 0xffff) + (tmp >> 16);
343                 new_cksum = tmp;
344
345                 /* replace it in the packet */
346                 th->cksum = new_cksum;
347         }
348 }
349
350 static inline int
351 tx_offload_enabled(struct virtio_hw *hw)
352 {
353         return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) ||
354                 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
355                 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
356 }
357
358 /* avoid write operation when necessary, to lessen cache issues */
359 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
360         if ((var) != (val))                     \
361                 (var) = (val);                  \
362 } while (0)
363
364 static inline void
365 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
366                         struct rte_mbuf *cookie,
367                         int offload)
368 {
369         if (offload) {
370                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
371                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
372
373                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
374                 case PKT_TX_UDP_CKSUM:
375                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
376                         hdr->csum_offset = offsetof(struct udp_hdr,
377                                 dgram_cksum);
378                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
379                         break;
380
381                 case PKT_TX_TCP_CKSUM:
382                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
383                         hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
384                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
385                         break;
386
387                 default:
388                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
389                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
390                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
391                         break;
392                 }
393
394                 /* TCP Segmentation Offload */
395                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
396                         virtio_tso_fix_cksum(cookie);
397                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
398                                 VIRTIO_NET_HDR_GSO_TCPV6 :
399                                 VIRTIO_NET_HDR_GSO_TCPV4;
400                         hdr->gso_size = cookie->tso_segsz;
401                         hdr->hdr_len =
402                                 cookie->l2_len +
403                                 cookie->l3_len +
404                                 cookie->l4_len;
405                 } else {
406                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
407                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
408                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
409                 }
410         }
411 }
412
413 static inline void
414 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
415                         struct rte_mbuf **cookies,
416                         uint16_t num)
417 {
418         struct vq_desc_extra *dxp;
419         struct virtqueue *vq = txvq->vq;
420         struct vring_desc *start_dp;
421         struct virtio_net_hdr *hdr;
422         uint16_t idx;
423         uint16_t head_size = vq->hw->vtnet_hdr_size;
424         int offload;
425         uint16_t i = 0;
426
427         idx = vq->vq_desc_head_idx;
428         start_dp = vq->vq_ring.desc;
429
430         offload = tx_offload_enabled(vq->hw);
431
432         while (i < num) {
433                 idx = idx & (vq->vq_nentries - 1);
434                 dxp = &vq->vq_descx[idx];
435                 dxp->cookie = (void *)cookies[i];
436                 dxp->ndescs = 1;
437
438                 hdr = (struct virtio_net_hdr *)
439                         rte_pktmbuf_prepend(cookies[i], head_size);
440                 cookies[i]->pkt_len -= head_size;
441
442                 /* if offload disabled, it is not zeroed below, do it now */
443                 if (offload == 0) {
444                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
445                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
446                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
447                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
448                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
449                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
450                 }
451
452                 virtqueue_xmit_offload(hdr, cookies[i], offload);
453
454                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
455                 start_dp[idx].len   = cookies[i]->data_len;
456                 start_dp[idx].flags = 0;
457
458                 vq_update_avail_ring(vq, idx);
459
460                 idx++;
461                 i++;
462         };
463
464         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
465         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
466 }
467
468 static inline void
469 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
470                         uint16_t needed, int use_indirect, int can_push,
471                         int in_order)
472 {
473         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
474         struct vq_desc_extra *dxp;
475         struct virtqueue *vq = txvq->vq;
476         struct vring_desc *start_dp;
477         uint16_t seg_num = cookie->nb_segs;
478         uint16_t head_idx, idx;
479         uint16_t head_size = vq->hw->vtnet_hdr_size;
480         struct virtio_net_hdr *hdr;
481         int offload;
482
483         offload = tx_offload_enabled(vq->hw);
484
485         head_idx = vq->vq_desc_head_idx;
486         idx = head_idx;
487         dxp = &vq->vq_descx[idx];
488         dxp->cookie = (void *)cookie;
489         dxp->ndescs = needed;
490
491         start_dp = vq->vq_ring.desc;
492
493         if (can_push) {
494                 /* prepend cannot fail, checked by caller */
495                 hdr = (struct virtio_net_hdr *)
496                         rte_pktmbuf_prepend(cookie, head_size);
497                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
498                  * which is wrong. Below subtract restores correct pkt size.
499                  */
500                 cookie->pkt_len -= head_size;
501
502                 /* if offload disabled, it is not zeroed below, do it now */
503                 if (offload == 0) {
504                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
505                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
506                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
507                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
508                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
509                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
510                 }
511         } else if (use_indirect) {
512                 /* setup tx ring slot to point to indirect
513                  * descriptor list stored in reserved region.
514                  *
515                  * the first slot in indirect ring is already preset
516                  * to point to the header in reserved region
517                  */
518                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
519                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
520                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
521                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
522                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
523
524                 /* loop below will fill in rest of the indirect elements */
525                 start_dp = txr[idx].tx_indir;
526                 idx = 1;
527         } else {
528                 /* setup first tx ring slot to point to header
529                  * stored in reserved region.
530                  */
531                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
532                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
533                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
534                 start_dp[idx].flags = VRING_DESC_F_NEXT;
535                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
536
537                 idx = start_dp[idx].next;
538         }
539
540         virtqueue_xmit_offload(hdr, cookie, offload);
541
542         do {
543                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
544                 start_dp[idx].len   = cookie->data_len;
545                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
546                 idx = start_dp[idx].next;
547         } while ((cookie = cookie->next) != NULL);
548
549         if (use_indirect)
550                 idx = vq->vq_ring.desc[head_idx].next;
551
552         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
553
554         vq->vq_desc_head_idx = idx;
555         vq_update_avail_ring(vq, head_idx);
556
557         if (!in_order) {
558                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
559                         vq->vq_desc_tail_idx = idx;
560         }
561 }
562
563 void
564 virtio_dev_cq_start(struct rte_eth_dev *dev)
565 {
566         struct virtio_hw *hw = dev->data->dev_private;
567
568         if (hw->cvq && hw->cvq->vq) {
569                 rte_spinlock_init(&hw->cvq->lock);
570                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
571         }
572 }
573
574 int
575 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
576                         uint16_t queue_idx,
577                         uint16_t nb_desc,
578                         unsigned int socket_id __rte_unused,
579                         const struct rte_eth_rxconf *rx_conf __rte_unused,
580                         struct rte_mempool *mp)
581 {
582         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
583         struct virtio_hw *hw = dev->data->dev_private;
584         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
585         struct virtnet_rx *rxvq;
586
587         PMD_INIT_FUNC_TRACE();
588
589         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
590                 nb_desc = vq->vq_nentries;
591         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
592
593         rxvq = &vq->rxq;
594         rxvq->queue_id = queue_idx;
595         rxvq->mpool = mp;
596         if (rxvq->mpool == NULL) {
597                 rte_exit(EXIT_FAILURE,
598                         "Cannot allocate mbufs for rx virtqueue");
599         }
600
601         dev->data->rx_queues[queue_idx] = rxvq;
602
603         return 0;
604 }
605
606 int
607 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
608 {
609         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
610         struct virtio_hw *hw = dev->data->dev_private;
611         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
612         struct virtnet_rx *rxvq = &vq->rxq;
613         struct rte_mbuf *m;
614         uint16_t desc_idx;
615         int error, nbufs, i;
616
617         PMD_INIT_FUNC_TRACE();
618
619         /* Allocate blank mbufs for the each rx descriptor */
620         nbufs = 0;
621
622         if (hw->use_simple_rx) {
623                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
624                      desc_idx++) {
625                         vq->vq_ring.avail->ring[desc_idx] = desc_idx;
626                         vq->vq_ring.desc[desc_idx].flags =
627                                 VRING_DESC_F_WRITE;
628                 }
629
630                 virtio_rxq_vec_setup(rxvq);
631         }
632
633         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
634         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
635              desc_idx++) {
636                 vq->sw_ring[vq->vq_nentries + desc_idx] =
637                         &rxvq->fake_mbuf;
638         }
639
640         if (hw->use_simple_rx) {
641                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
642                         virtio_rxq_rearm_vec(rxvq);
643                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
644                 }
645         } else if (hw->use_inorder_rx) {
646                 if ((!virtqueue_full(vq))) {
647                         uint16_t free_cnt = vq->vq_free_cnt;
648                         struct rte_mbuf *pkts[free_cnt];
649
650                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
651                                 free_cnt)) {
652                                 error = virtqueue_enqueue_refill_inorder(vq,
653                                                 pkts,
654                                                 free_cnt);
655                                 if (unlikely(error)) {
656                                         for (i = 0; i < free_cnt; i++)
657                                                 rte_pktmbuf_free(pkts[i]);
658                                 }
659                         }
660
661                         nbufs += free_cnt;
662                         vq_update_avail_idx(vq);
663                 }
664         } else {
665                 while (!virtqueue_full(vq)) {
666                         m = rte_mbuf_raw_alloc(rxvq->mpool);
667                         if (m == NULL)
668                                 break;
669
670                         /* Enqueue allocated buffers */
671                         error = virtqueue_enqueue_recv_refill(vq, m);
672                         if (error) {
673                                 rte_pktmbuf_free(m);
674                                 break;
675                         }
676                         nbufs++;
677                 }
678
679                 vq_update_avail_idx(vq);
680         }
681
682         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
683
684         VIRTQUEUE_DUMP(vq);
685
686         return 0;
687 }
688
689 /*
690  * struct rte_eth_dev *dev: Used to update dev
691  * uint16_t nb_desc: Defaults to values read from config space
692  * unsigned int socket_id: Used to allocate memzone
693  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
694  * uint16_t queue_idx: Just used as an index in dev txq list
695  */
696 int
697 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
698                         uint16_t queue_idx,
699                         uint16_t nb_desc,
700                         unsigned int socket_id __rte_unused,
701                         const struct rte_eth_txconf *tx_conf)
702 {
703         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
704         struct virtio_hw *hw = dev->data->dev_private;
705         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
706         struct virtnet_tx *txvq;
707         uint16_t tx_free_thresh;
708
709         PMD_INIT_FUNC_TRACE();
710
711         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
712                 nb_desc = vq->vq_nentries;
713         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
714
715         txvq = &vq->txq;
716         txvq->queue_id = queue_idx;
717
718         tx_free_thresh = tx_conf->tx_free_thresh;
719         if (tx_free_thresh == 0)
720                 tx_free_thresh =
721                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
722
723         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
724                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
725                         "number of TX entries minus 3 (%u)."
726                         " (tx_free_thresh=%u port=%u queue=%u)\n",
727                         vq->vq_nentries - 3,
728                         tx_free_thresh, dev->data->port_id, queue_idx);
729                 return -EINVAL;
730         }
731
732         vq->vq_free_thresh = tx_free_thresh;
733
734         dev->data->tx_queues[queue_idx] = txvq;
735         return 0;
736 }
737
738 int
739 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
740                                 uint16_t queue_idx)
741 {
742         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
743         struct virtio_hw *hw = dev->data->dev_private;
744         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
745
746         PMD_INIT_FUNC_TRACE();
747
748         if (hw->use_inorder_tx)
749                 vq->vq_ring.desc[vq->vq_nentries - 1].next = 0;
750
751         VIRTQUEUE_DUMP(vq);
752
753         return 0;
754 }
755
756 static void
757 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
758 {
759         int error;
760         /*
761          * Requeue the discarded mbuf. This should always be
762          * successful since it was just dequeued.
763          */
764         error = virtqueue_enqueue_recv_refill(vq, m);
765
766         if (unlikely(error)) {
767                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
768                 rte_pktmbuf_free(m);
769         }
770 }
771
772 static void
773 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
774 {
775         int error;
776
777         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
778         if (unlikely(error)) {
779                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
780                 rte_pktmbuf_free(m);
781         }
782 }
783
784 static void
785 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
786 {
787         uint32_t s = mbuf->pkt_len;
788         struct ether_addr *ea;
789
790         if (s == 64) {
791                 stats->size_bins[1]++;
792         } else if (s > 64 && s < 1024) {
793                 uint32_t bin;
794
795                 /* count zeros, and offset into correct bin */
796                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
797                 stats->size_bins[bin]++;
798         } else {
799                 if (s < 64)
800                         stats->size_bins[0]++;
801                 else if (s < 1519)
802                         stats->size_bins[6]++;
803                 else if (s >= 1519)
804                         stats->size_bins[7]++;
805         }
806
807         ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
808         if (is_multicast_ether_addr(ea)) {
809                 if (is_broadcast_ether_addr(ea))
810                         stats->broadcast++;
811                 else
812                         stats->multicast++;
813         }
814 }
815
816 static inline void
817 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
818 {
819         VIRTIO_DUMP_PACKET(m, m->data_len);
820
821         rxvq->stats.bytes += m->pkt_len;
822         virtio_update_packet_stats(&rxvq->stats, m);
823 }
824
825 /* Optionally fill offload information in structure */
826 static int
827 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
828 {
829         struct rte_net_hdr_lens hdr_lens;
830         uint32_t hdrlen, ptype;
831         int l4_supported = 0;
832
833         /* nothing to do */
834         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
835                 return 0;
836
837         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
838
839         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
840         m->packet_type = ptype;
841         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
842             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
843             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
844                 l4_supported = 1;
845
846         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
847                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
848                 if (hdr->csum_start <= hdrlen && l4_supported) {
849                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
850                 } else {
851                         /* Unknown proto or tunnel, do sw cksum. We can assume
852                          * the cksum field is in the first segment since the
853                          * buffers we provided to the host are large enough.
854                          * In case of SCTP, this will be wrong since it's a CRC
855                          * but there's nothing we can do.
856                          */
857                         uint16_t csum = 0, off;
858
859                         rte_raw_cksum_mbuf(m, hdr->csum_start,
860                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
861                                 &csum);
862                         if (likely(csum != 0xffff))
863                                 csum = ~csum;
864                         off = hdr->csum_offset + hdr->csum_start;
865                         if (rte_pktmbuf_data_len(m) >= off + 1)
866                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
867                                         off) = csum;
868                 }
869         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
870                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
871         }
872
873         /* GSO request, save required information in mbuf */
874         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
875                 /* Check unsupported modes */
876                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
877                     (hdr->gso_size == 0)) {
878                         return -EINVAL;
879                 }
880
881                 /* Update mss lengthes in mbuf */
882                 m->tso_segsz = hdr->gso_size;
883                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
884                         case VIRTIO_NET_HDR_GSO_TCPV4:
885                         case VIRTIO_NET_HDR_GSO_TCPV6:
886                                 m->ol_flags |= PKT_RX_LRO | \
887                                         PKT_RX_L4_CKSUM_NONE;
888                                 break;
889                         default:
890                                 return -EINVAL;
891                 }
892         }
893
894         return 0;
895 }
896
897 static inline int
898 rx_offload_enabled(struct virtio_hw *hw)
899 {
900         return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
901                 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
902                 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
903 }
904
905 #define VIRTIO_MBUF_BURST_SZ 64
906 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
907 uint16_t
908 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
909 {
910         struct virtnet_rx *rxvq = rx_queue;
911         struct virtqueue *vq = rxvq->vq;
912         struct virtio_hw *hw = vq->hw;
913         struct rte_mbuf *rxm, *new_mbuf;
914         uint16_t nb_used, num, nb_rx;
915         uint32_t len[VIRTIO_MBUF_BURST_SZ];
916         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
917         int error;
918         uint32_t i, nb_enqueued;
919         uint32_t hdr_size;
920         int offload;
921         struct virtio_net_hdr *hdr;
922
923         nb_rx = 0;
924         if (unlikely(hw->started == 0))
925                 return nb_rx;
926
927         nb_used = VIRTQUEUE_NUSED(vq);
928
929         virtio_rmb();
930
931         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
932         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
933                 num = VIRTIO_MBUF_BURST_SZ;
934         if (likely(num > DESC_PER_CACHELINE))
935                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
936
937         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
938         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
939
940         nb_enqueued = 0;
941         hdr_size = hw->vtnet_hdr_size;
942         offload = rx_offload_enabled(hw);
943
944         for (i = 0; i < num ; i++) {
945                 rxm = rcv_pkts[i];
946
947                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
948
949                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
950                         PMD_RX_LOG(ERR, "Packet drop");
951                         nb_enqueued++;
952                         virtio_discard_rxbuf(vq, rxm);
953                         rxvq->stats.errors++;
954                         continue;
955                 }
956
957                 rxm->port = rxvq->port_id;
958                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
959                 rxm->ol_flags = 0;
960                 rxm->vlan_tci = 0;
961
962                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
963                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
964
965                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
966                         RTE_PKTMBUF_HEADROOM - hdr_size);
967
968                 if (hw->vlan_strip)
969                         rte_vlan_strip(rxm);
970
971                 if (offload && virtio_rx_offload(rxm, hdr) < 0) {
972                         virtio_discard_rxbuf(vq, rxm);
973                         rxvq->stats.errors++;
974                         continue;
975                 }
976
977                 virtio_rx_stats_updated(rxvq, rxm);
978
979                 rx_pkts[nb_rx++] = rxm;
980         }
981
982         rxvq->stats.packets += nb_rx;
983
984         /* Allocate new mbuf for the used descriptor */
985         error = ENOSPC;
986         while (likely(!virtqueue_full(vq))) {
987                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
988                 if (unlikely(new_mbuf == NULL)) {
989                         struct rte_eth_dev *dev
990                                 = &rte_eth_devices[rxvq->port_id];
991                         dev->data->rx_mbuf_alloc_failed++;
992                         break;
993                 }
994                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
995                 if (unlikely(error)) {
996                         rte_pktmbuf_free(new_mbuf);
997                         break;
998                 }
999                 nb_enqueued++;
1000         }
1001
1002         if (likely(nb_enqueued)) {
1003                 vq_update_avail_idx(vq);
1004
1005                 if (unlikely(virtqueue_kick_prepare(vq))) {
1006                         virtqueue_notify(vq);
1007                         PMD_RX_LOG(DEBUG, "Notified");
1008                 }
1009         }
1010
1011         return nb_rx;
1012 }
1013
1014 uint16_t
1015 virtio_recv_mergeable_pkts_inorder(void *rx_queue,
1016                         struct rte_mbuf **rx_pkts,
1017                         uint16_t nb_pkts)
1018 {
1019         struct virtnet_rx *rxvq = rx_queue;
1020         struct virtqueue *vq = rxvq->vq;
1021         struct virtio_hw *hw = vq->hw;
1022         struct rte_mbuf *rxm;
1023         struct rte_mbuf *prev;
1024         uint16_t nb_used, num, nb_rx;
1025         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1026         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1027         int error;
1028         uint32_t nb_enqueued;
1029         uint32_t seg_num;
1030         uint32_t seg_res;
1031         uint32_t hdr_size;
1032         int32_t i;
1033         int offload;
1034
1035         nb_rx = 0;
1036         if (unlikely(hw->started == 0))
1037                 return nb_rx;
1038
1039         nb_used = VIRTQUEUE_NUSED(vq);
1040         nb_used = RTE_MIN(nb_used, nb_pkts);
1041         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1042
1043         virtio_rmb();
1044
1045         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1046
1047         nb_enqueued = 0;
1048         seg_num = 1;
1049         seg_res = 0;
1050         hdr_size = hw->vtnet_hdr_size;
1051         offload = rx_offload_enabled(hw);
1052
1053         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1054
1055         for (i = 0; i < num; i++) {
1056                 struct virtio_net_hdr_mrg_rxbuf *header;
1057
1058                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1059                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1060
1061                 rxm = rcv_pkts[i];
1062
1063                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1064                         PMD_RX_LOG(ERR, "Packet drop");
1065                         nb_enqueued++;
1066                         virtio_discard_rxbuf_inorder(vq, rxm);
1067                         rxvq->stats.errors++;
1068                         continue;
1069                 }
1070
1071                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1072                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1073                          - hdr_size);
1074                 seg_num = header->num_buffers;
1075
1076                 if (seg_num == 0)
1077                         seg_num = 1;
1078
1079                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1080                 rxm->nb_segs = seg_num;
1081                 rxm->ol_flags = 0;
1082                 rxm->vlan_tci = 0;
1083                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1084                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1085
1086                 rxm->port = rxvq->port_id;
1087
1088                 rx_pkts[nb_rx] = rxm;
1089                 prev = rxm;
1090
1091                 if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) {
1092                         virtio_discard_rxbuf_inorder(vq, rxm);
1093                         rxvq->stats.errors++;
1094                         continue;
1095                 }
1096
1097                 if (hw->vlan_strip)
1098                         rte_vlan_strip(rx_pkts[nb_rx]);
1099
1100                 seg_res = seg_num - 1;
1101
1102                 /* Merge remaining segments */
1103                 while (seg_res != 0 && i < (num - 1)) {
1104                         i++;
1105
1106                         rxm = rcv_pkts[i];
1107                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1108                         rxm->pkt_len = (uint32_t)(len[i]);
1109                         rxm->data_len = (uint16_t)(len[i]);
1110
1111                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1112                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1113
1114                         if (prev)
1115                                 prev->next = rxm;
1116
1117                         prev = rxm;
1118                         seg_res -= 1;
1119                 }
1120
1121                 if (!seg_res) {
1122                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1123                         nb_rx++;
1124                 }
1125         }
1126
1127         /* Last packet still need merge segments */
1128         while (seg_res != 0) {
1129                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1130                                         VIRTIO_MBUF_BURST_SZ);
1131
1132                 prev = rcv_pkts[nb_rx];
1133                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1134                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1135                                                            rcv_cnt);
1136                         uint16_t extra_idx = 0;
1137
1138                         rcv_cnt = num;
1139                         while (extra_idx < rcv_cnt) {
1140                                 rxm = rcv_pkts[extra_idx];
1141                                 rxm->data_off =
1142                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1143                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1144                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1145                                 prev->next = rxm;
1146                                 prev = rxm;
1147                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1148                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1149                                 extra_idx += 1;
1150                         };
1151                         seg_res -= rcv_cnt;
1152
1153                         if (!seg_res) {
1154                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1155                                 nb_rx++;
1156                         }
1157                 } else {
1158                         PMD_RX_LOG(ERR,
1159                                         "No enough segments for packet.");
1160                         virtio_discard_rxbuf_inorder(vq, prev);
1161                         rxvq->stats.errors++;
1162                         break;
1163                 }
1164         }
1165
1166         rxvq->stats.packets += nb_rx;
1167
1168         /* Allocate new mbuf for the used descriptor */
1169
1170         if (likely(!virtqueue_full(vq))) {
1171                 /* free_cnt may include mrg descs */
1172                 uint16_t free_cnt = vq->vq_free_cnt;
1173                 struct rte_mbuf *new_pkts[free_cnt];
1174
1175                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1176                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1177                                         free_cnt);
1178                         if (unlikely(error)) {
1179                                 for (i = 0; i < free_cnt; i++)
1180                                         rte_pktmbuf_free(new_pkts[i]);
1181                         }
1182                         nb_enqueued += free_cnt;
1183                 } else {
1184                         struct rte_eth_dev *dev =
1185                                 &rte_eth_devices[rxvq->port_id];
1186                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1187                 }
1188         }
1189
1190         if (likely(nb_enqueued)) {
1191                 vq_update_avail_idx(vq);
1192
1193                 if (unlikely(virtqueue_kick_prepare(vq))) {
1194                         virtqueue_notify(vq);
1195                         PMD_RX_LOG(DEBUG, "Notified");
1196                 }
1197         }
1198
1199         return nb_rx;
1200 }
1201
1202 uint16_t
1203 virtio_recv_mergeable_pkts(void *rx_queue,
1204                         struct rte_mbuf **rx_pkts,
1205                         uint16_t nb_pkts)
1206 {
1207         struct virtnet_rx *rxvq = rx_queue;
1208         struct virtqueue *vq = rxvq->vq;
1209         struct virtio_hw *hw = vq->hw;
1210         struct rte_mbuf *rxm, *new_mbuf;
1211         uint16_t nb_used, num, nb_rx;
1212         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1213         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1214         struct rte_mbuf *prev;
1215         int error;
1216         uint32_t i, nb_enqueued;
1217         uint32_t seg_num;
1218         uint16_t extra_idx;
1219         uint32_t seg_res;
1220         uint32_t hdr_size;
1221         int offload;
1222
1223         nb_rx = 0;
1224         if (unlikely(hw->started == 0))
1225                 return nb_rx;
1226
1227         nb_used = VIRTQUEUE_NUSED(vq);
1228
1229         virtio_rmb();
1230
1231         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1232
1233         i = 0;
1234         nb_enqueued = 0;
1235         seg_num = 0;
1236         extra_idx = 0;
1237         seg_res = 0;
1238         hdr_size = hw->vtnet_hdr_size;
1239         offload = rx_offload_enabled(hw);
1240
1241         while (i < nb_used) {
1242                 struct virtio_net_hdr_mrg_rxbuf *header;
1243
1244                 if (nb_rx == nb_pkts)
1245                         break;
1246
1247                 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1);
1248                 if (num != 1)
1249                         continue;
1250
1251                 i++;
1252
1253                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1254                 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]);
1255
1256                 rxm = rcv_pkts[0];
1257
1258                 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
1259                         PMD_RX_LOG(ERR, "Packet drop");
1260                         nb_enqueued++;
1261                         virtio_discard_rxbuf(vq, rxm);
1262                         rxvq->stats.errors++;
1263                         continue;
1264                 }
1265
1266                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
1267                         RTE_PKTMBUF_HEADROOM - hdr_size);
1268                 seg_num = header->num_buffers;
1269
1270                 if (seg_num == 0)
1271                         seg_num = 1;
1272
1273                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1274                 rxm->nb_segs = seg_num;
1275                 rxm->ol_flags = 0;
1276                 rxm->vlan_tci = 0;
1277                 rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
1278                 rxm->data_len = (uint16_t)(len[0] - hdr_size);
1279
1280                 rxm->port = rxvq->port_id;
1281                 rx_pkts[nb_rx] = rxm;
1282                 prev = rxm;
1283
1284                 if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) {
1285                         virtio_discard_rxbuf(vq, rxm);
1286                         rxvq->stats.errors++;
1287                         continue;
1288                 }
1289
1290                 seg_res = seg_num - 1;
1291
1292                 while (seg_res != 0) {
1293                         /*
1294                          * Get extra segments for current uncompleted packet.
1295                          */
1296                         uint16_t  rcv_cnt =
1297                                 RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
1298                         if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1299                                 uint32_t rx_num =
1300                                         virtqueue_dequeue_burst_rx(vq,
1301                                         rcv_pkts, len, rcv_cnt);
1302                                 i += rx_num;
1303                                 rcv_cnt = rx_num;
1304                         } else {
1305                                 PMD_RX_LOG(ERR,
1306                                            "No enough segments for packet.");
1307                                 nb_enqueued++;
1308                                 virtio_discard_rxbuf(vq, rxm);
1309                                 rxvq->stats.errors++;
1310                                 break;
1311                         }
1312
1313                         extra_idx = 0;
1314
1315                         while (extra_idx < rcv_cnt) {
1316                                 rxm = rcv_pkts[extra_idx];
1317
1318                                 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1319                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1320                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1321
1322                                 if (prev)
1323                                         prev->next = rxm;
1324
1325                                 prev = rxm;
1326                                 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
1327                                 extra_idx++;
1328                         };
1329                         seg_res -= rcv_cnt;
1330                 }
1331
1332                 if (hw->vlan_strip)
1333                         rte_vlan_strip(rx_pkts[nb_rx]);
1334
1335                 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
1336                         rx_pkts[nb_rx]->data_len);
1337
1338                 rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len;
1339                 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]);
1340                 nb_rx++;
1341         }
1342
1343         rxvq->stats.packets += nb_rx;
1344
1345         /* Allocate new mbuf for the used descriptor */
1346         error = ENOSPC;
1347         while (likely(!virtqueue_full(vq))) {
1348                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1349                 if (unlikely(new_mbuf == NULL)) {
1350                         struct rte_eth_dev *dev
1351                                 = &rte_eth_devices[rxvq->port_id];
1352                         dev->data->rx_mbuf_alloc_failed++;
1353                         break;
1354                 }
1355                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
1356                 if (unlikely(error)) {
1357                         rte_pktmbuf_free(new_mbuf);
1358                         break;
1359                 }
1360                 nb_enqueued++;
1361         }
1362
1363         if (likely(nb_enqueued)) {
1364                 vq_update_avail_idx(vq);
1365
1366                 if (unlikely(virtqueue_kick_prepare(vq))) {
1367                         virtqueue_notify(vq);
1368                         PMD_RX_LOG(DEBUG, "Notified");
1369                 }
1370         }
1371
1372         return nb_rx;
1373 }
1374
1375 uint16_t
1376 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1377 {
1378         struct virtnet_tx *txvq = tx_queue;
1379         struct virtqueue *vq = txvq->vq;
1380         struct virtio_hw *hw = vq->hw;
1381         uint16_t hdr_size = hw->vtnet_hdr_size;
1382         uint16_t nb_used, nb_tx = 0;
1383         int error;
1384
1385         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1386                 return nb_tx;
1387
1388         if (unlikely(nb_pkts < 1))
1389                 return nb_pkts;
1390
1391         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1392         nb_used = VIRTQUEUE_NUSED(vq);
1393
1394         virtio_rmb();
1395         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1396                 virtio_xmit_cleanup(vq, nb_used);
1397
1398         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1399                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1400                 int can_push = 0, use_indirect = 0, slots, need;
1401
1402                 /* Do VLAN tag insertion */
1403                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1404                         error = rte_vlan_insert(&txm);
1405                         if (unlikely(error)) {
1406                                 rte_pktmbuf_free(txm);
1407                                 continue;
1408                         }
1409                 }
1410
1411                 /* optimize ring usage */
1412                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1413                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1414                     rte_mbuf_refcnt_read(txm) == 1 &&
1415                     RTE_MBUF_DIRECT(txm) &&
1416                     txm->nb_segs == 1 &&
1417                     rte_pktmbuf_headroom(txm) >= hdr_size &&
1418                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1419                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1420                         can_push = 1;
1421                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
1422                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
1423                         use_indirect = 1;
1424
1425                 /* How many main ring entries are needed to this Tx?
1426                  * any_layout => number of segments
1427                  * indirect   => 1
1428                  * default    => number of segments + 1
1429                  */
1430                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
1431                 need = slots - vq->vq_free_cnt;
1432
1433                 /* Positive value indicates it need free vring descriptors */
1434                 if (unlikely(need > 0)) {
1435                         nb_used = VIRTQUEUE_NUSED(vq);
1436                         virtio_rmb();
1437                         need = RTE_MIN(need, (int)nb_used);
1438
1439                         virtio_xmit_cleanup(vq, need);
1440                         need = slots - vq->vq_free_cnt;
1441                         if (unlikely(need > 0)) {
1442                                 PMD_TX_LOG(ERR,
1443                                            "No free tx descriptors to transmit");
1444                                 break;
1445                         }
1446                 }
1447
1448                 /* Enqueue Packet buffers */
1449                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
1450                         can_push, 0);
1451
1452                 txvq->stats.bytes += txm->pkt_len;
1453                 virtio_update_packet_stats(&txvq->stats, txm);
1454         }
1455
1456         txvq->stats.packets += nb_tx;
1457
1458         if (likely(nb_tx)) {
1459                 vq_update_avail_idx(vq);
1460
1461                 if (unlikely(virtqueue_kick_prepare(vq))) {
1462                         virtqueue_notify(vq);
1463                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1464                 }
1465         }
1466
1467         return nb_tx;
1468 }
1469
1470 uint16_t
1471 virtio_xmit_pkts_inorder(void *tx_queue,
1472                         struct rte_mbuf **tx_pkts,
1473                         uint16_t nb_pkts)
1474 {
1475         struct virtnet_tx *txvq = tx_queue;
1476         struct virtqueue *vq = txvq->vq;
1477         struct virtio_hw *hw = vq->hw;
1478         uint16_t hdr_size = hw->vtnet_hdr_size;
1479         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
1480         struct rte_mbuf *inorder_pkts[nb_pkts];
1481         int error;
1482
1483         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1484                 return nb_tx;
1485
1486         if (unlikely(nb_pkts < 1))
1487                 return nb_pkts;
1488
1489         VIRTQUEUE_DUMP(vq);
1490         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1491         nb_used = VIRTQUEUE_NUSED(vq);
1492
1493         virtio_rmb();
1494         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1495                 virtio_xmit_cleanup_inorder(vq, nb_used);
1496
1497         if (unlikely(!vq->vq_free_cnt))
1498                 virtio_xmit_cleanup_inorder(vq, nb_used);
1499
1500         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
1501
1502         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
1503                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1504                 int slots, need;
1505
1506                 /* Do VLAN tag insertion */
1507                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1508                         error = rte_vlan_insert(&txm);
1509                         if (unlikely(error)) {
1510                                 rte_pktmbuf_free(txm);
1511                                 continue;
1512                         }
1513                 }
1514
1515                 /* optimize ring usage */
1516                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1517                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1518                      rte_mbuf_refcnt_read(txm) == 1 &&
1519                      RTE_MBUF_DIRECT(txm) &&
1520                      txm->nb_segs == 1 &&
1521                      rte_pktmbuf_headroom(txm) >= hdr_size &&
1522                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1523                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
1524                         inorder_pkts[nb_inorder_pkts] = txm;
1525                         nb_inorder_pkts++;
1526
1527                         txvq->stats.bytes += txm->pkt_len;
1528                         virtio_update_packet_stats(&txvq->stats, txm);
1529                         continue;
1530                 }
1531
1532                 if (nb_inorder_pkts) {
1533                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
1534                                                         nb_inorder_pkts);
1535                         nb_inorder_pkts = 0;
1536                 }
1537
1538                 slots = txm->nb_segs + 1;
1539                 need = slots - vq->vq_free_cnt;
1540                 if (unlikely(need > 0)) {
1541                         nb_used = VIRTQUEUE_NUSED(vq);
1542                         virtio_rmb();
1543                         need = RTE_MIN(need, (int)nb_used);
1544
1545                         virtio_xmit_cleanup_inorder(vq, need);
1546
1547                         need = slots - vq->vq_free_cnt;
1548
1549                         if (unlikely(need > 0)) {
1550                                 PMD_TX_LOG(ERR,
1551                                         "No free tx descriptors to transmit");
1552                                 break;
1553                         }
1554                 }
1555                 /* Enqueue Packet buffers */
1556                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
1557
1558                 txvq->stats.bytes += txm->pkt_len;
1559                 virtio_update_packet_stats(&txvq->stats, txm);
1560         }
1561
1562         /* Transmit all inorder packets */
1563         if (nb_inorder_pkts)
1564                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
1565                                                 nb_inorder_pkts);
1566
1567         txvq->stats.packets += nb_tx;
1568
1569         if (likely(nb_tx)) {
1570                 vq_update_avail_idx(vq);
1571
1572                 if (unlikely(virtqueue_kick_prepare(vq))) {
1573                         virtqueue_notify(vq);
1574                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1575                 }
1576         }
1577
1578         VIRTQUEUE_DUMP(vq);
1579
1580         return nb_tx;
1581 }