407f58bce636be1aa53dfc7b8ced2793bacc0d22
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45         struct virtnet_rx *rxvq = rxq;
46         struct virtqueue *vq = rxvq->vq;
47
48         return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54         vq->vq_free_cnt += num;
55         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61         struct vring_desc *dp, *dp_tail;
62         struct vq_desc_extra *dxp;
63         uint16_t desc_idx_last = desc_idx;
64
65         dp  = &vq->vq_ring.desc[desc_idx];
66         dxp = &vq->vq_descx[desc_idx];
67         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69                 while (dp->flags & VRING_DESC_F_NEXT) {
70                         desc_idx_last = dp->next;
71                         dp = &vq->vq_ring.desc[dp->next];
72                 }
73         }
74         dxp->ndescs = 0;
75
76         /*
77          * We must append the existing free chain, if any, to the end of
78          * newly freed chain. If the virtqueue was completely used, then
79          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80          */
81         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82                 vq->vq_desc_head_idx = desc_idx;
83         } else {
84                 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
85                 dp_tail->next = desc_idx;
86         }
87
88         vq->vq_desc_tail_idx = desc_idx_last;
89         dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95         struct vq_desc_extra *dxp;
96
97         dxp = &vq->vq_descx[id];
98         vq->vq_free_cnt += dxp->ndescs;
99
100         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101                 vq->vq_desc_head_idx = id;
102         else
103                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104
105         vq->vq_desc_tail_idx = id;
106         dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108
109 static uint16_t
110 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
111                                   struct rte_mbuf **rx_pkts,
112                                   uint32_t *len,
113                                   uint16_t num)
114 {
115         struct rte_mbuf *cookie;
116         uint16_t used_idx;
117         uint16_t id;
118         struct vring_packed_desc *desc;
119         uint16_t i;
120
121         desc = vq->ring_packed.desc_packed;
122
123         for (i = 0; i < num; i++) {
124                 used_idx = vq->vq_used_cons_idx;
125                 if (!desc_is_used(&desc[used_idx], vq))
126                         return i;
127                 virtio_rmb(vq->hw->weak_barriers);
128                 len[i] = desc[used_idx].len;
129                 id = desc[used_idx].id;
130                 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
131                 if (unlikely(cookie == NULL)) {
132                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
133                                 vq->vq_used_cons_idx);
134                         break;
135                 }
136                 rte_prefetch0(cookie);
137                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
138                 rx_pkts[i] = cookie;
139
140                 vq->vq_free_cnt++;
141                 vq->vq_used_cons_idx++;
142                 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
143                         vq->vq_used_cons_idx -= vq->vq_nentries;
144                         vq->used_wrap_counter ^= 1;
145                 }
146         }
147
148         return i;
149 }
150
151 static uint16_t
152 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
153                            uint32_t *len, uint16_t num)
154 {
155         struct vring_used_elem *uep;
156         struct rte_mbuf *cookie;
157         uint16_t used_idx, desc_idx;
158         uint16_t i;
159
160         /*  Caller does the check */
161         for (i = 0; i < num ; i++) {
162                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
163                 uep = &vq->vq_ring.used->ring[used_idx];
164                 desc_idx = (uint16_t) uep->id;
165                 len[i] = uep->len;
166                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
167
168                 if (unlikely(cookie == NULL)) {
169                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
170                                 vq->vq_used_cons_idx);
171                         break;
172                 }
173
174                 rte_prefetch0(cookie);
175                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
176                 rx_pkts[i]  = cookie;
177                 vq->vq_used_cons_idx++;
178                 vq_ring_free_chain(vq, desc_idx);
179                 vq->vq_descx[desc_idx].cookie = NULL;
180         }
181
182         return i;
183 }
184
185 static uint16_t
186 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
187                         struct rte_mbuf **rx_pkts,
188                         uint32_t *len,
189                         uint16_t num)
190 {
191         struct vring_used_elem *uep;
192         struct rte_mbuf *cookie;
193         uint16_t used_idx = 0;
194         uint16_t i;
195
196         if (unlikely(num == 0))
197                 return 0;
198
199         for (i = 0; i < num; i++) {
200                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
201                 /* Desc idx same as used idx */
202                 uep = &vq->vq_ring.used->ring[used_idx];
203                 len[i] = uep->len;
204                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
205
206                 if (unlikely(cookie == NULL)) {
207                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
208                                 vq->vq_used_cons_idx);
209                         break;
210                 }
211
212                 rte_prefetch0(cookie);
213                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
214                 rx_pkts[i]  = cookie;
215                 vq->vq_used_cons_idx++;
216                 vq->vq_descx[used_idx].cookie = NULL;
217         }
218
219         vq_ring_free_inorder(vq, used_idx, i);
220         return i;
221 }
222
223 #ifndef DEFAULT_TX_FREE_THRESH
224 #define DEFAULT_TX_FREE_THRESH 32
225 #endif
226
227 /* Cleanup from completed transmits. */
228 static void
229 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num)
230 {
231         uint16_t used_idx, id;
232         uint16_t size = vq->vq_nentries;
233         struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
234         struct vq_desc_extra *dxp;
235
236         used_idx = vq->vq_used_cons_idx;
237         while (num-- && desc_is_used(&desc[used_idx], vq)) {
238                 virtio_rmb(vq->hw->weak_barriers);
239                 id = desc[used_idx].id;
240                 dxp = &vq->vq_descx[id];
241                 vq->vq_used_cons_idx += dxp->ndescs;
242                 if (vq->vq_used_cons_idx >= size) {
243                         vq->vq_used_cons_idx -= size;
244                         vq->used_wrap_counter ^= 1;
245                 }
246                 vq_ring_free_id_packed(vq, id);
247                 if (dxp->cookie != NULL) {
248                         rte_pktmbuf_free(dxp->cookie);
249                         dxp->cookie = NULL;
250                 }
251                 used_idx = vq->vq_used_cons_idx;
252         }
253 }
254
255 static void
256 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
257 {
258         uint16_t i, used_idx, desc_idx;
259         for (i = 0; i < num; i++) {
260                 struct vring_used_elem *uep;
261                 struct vq_desc_extra *dxp;
262
263                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
264                 uep = &vq->vq_ring.used->ring[used_idx];
265
266                 desc_idx = (uint16_t) uep->id;
267                 dxp = &vq->vq_descx[desc_idx];
268                 vq->vq_used_cons_idx++;
269                 vq_ring_free_chain(vq, desc_idx);
270
271                 if (dxp->cookie != NULL) {
272                         rte_pktmbuf_free(dxp->cookie);
273                         dxp->cookie = NULL;
274                 }
275         }
276 }
277
278 /* Cleanup from completed inorder transmits. */
279 static void
280 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
281 {
282         uint16_t i, idx = vq->vq_used_cons_idx;
283         int16_t free_cnt = 0;
284         struct vq_desc_extra *dxp = NULL;
285
286         if (unlikely(num == 0))
287                 return;
288
289         for (i = 0; i < num; i++) {
290                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
291                 free_cnt += dxp->ndescs;
292                 if (dxp->cookie != NULL) {
293                         rte_pktmbuf_free(dxp->cookie);
294                         dxp->cookie = NULL;
295                 }
296         }
297
298         vq->vq_free_cnt += free_cnt;
299         vq->vq_used_cons_idx = idx;
300 }
301
302 static inline int
303 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
304                         struct rte_mbuf **cookies,
305                         uint16_t num)
306 {
307         struct vq_desc_extra *dxp;
308         struct virtio_hw *hw = vq->hw;
309         struct vring_desc *start_dp;
310         uint16_t head_idx, idx, i = 0;
311
312         if (unlikely(vq->vq_free_cnt == 0))
313                 return -ENOSPC;
314         if (unlikely(vq->vq_free_cnt < num))
315                 return -EMSGSIZE;
316
317         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
318         start_dp = vq->vq_ring.desc;
319
320         while (i < num) {
321                 idx = head_idx & (vq->vq_nentries - 1);
322                 dxp = &vq->vq_descx[idx];
323                 dxp->cookie = (void *)cookies[i];
324                 dxp->ndescs = 1;
325
326                 start_dp[idx].addr =
327                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
328                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
329                 start_dp[idx].len =
330                                 cookies[i]->buf_len -
331                                 RTE_PKTMBUF_HEADROOM +
332                                 hw->vtnet_hdr_size;
333                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
334
335                 vq_update_avail_ring(vq, idx);
336                 head_idx++;
337                 i++;
338         }
339
340         vq->vq_desc_head_idx += num;
341         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
342         return 0;
343 }
344
345 static inline int
346 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
347                                 uint16_t num)
348 {
349         struct vq_desc_extra *dxp;
350         struct virtio_hw *hw = vq->hw;
351         struct vring_desc *start_dp = vq->vq_ring.desc;
352         uint16_t idx, i;
353
354         if (unlikely(vq->vq_free_cnt == 0))
355                 return -ENOSPC;
356         if (unlikely(vq->vq_free_cnt < num))
357                 return -EMSGSIZE;
358
359         if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
360                 return -EFAULT;
361
362         for (i = 0; i < num; i++) {
363                 idx = vq->vq_desc_head_idx;
364                 dxp = &vq->vq_descx[idx];
365                 dxp->cookie = (void *)cookie[i];
366                 dxp->ndescs = 1;
367
368                 start_dp[idx].addr =
369                         VIRTIO_MBUF_ADDR(cookie[i], vq) +
370                         RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
371                 start_dp[idx].len =
372                         cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
373                         hw->vtnet_hdr_size;
374                 start_dp[idx].flags = VRING_DESC_F_WRITE;
375                 vq->vq_desc_head_idx = start_dp[idx].next;
376                 vq_update_avail_ring(vq, idx);
377                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
378                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
379                         break;
380                 }
381         }
382
383         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
384
385         return 0;
386 }
387
388 static inline int
389 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
390                                      struct rte_mbuf **cookie, uint16_t num)
391 {
392         struct vring_packed_desc *start_dp = vq->ring_packed.desc_packed;
393         uint16_t flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
394         struct virtio_hw *hw = vq->hw;
395         struct vq_desc_extra *dxp;
396         uint16_t idx;
397         int i;
398
399         if (unlikely(vq->vq_free_cnt == 0))
400                 return -ENOSPC;
401         if (unlikely(vq->vq_free_cnt < num))
402                 return -EMSGSIZE;
403
404         for (i = 0; i < num; i++) {
405                 idx = vq->vq_avail_idx;
406                 dxp = &vq->vq_descx[idx];
407                 dxp->cookie = (void *)cookie[i];
408                 dxp->ndescs = 1;
409
410                 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
411                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
412                 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
413                                         + hw->vtnet_hdr_size;
414
415                 vq->vq_desc_head_idx = dxp->next;
416                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
417                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
418                 virtio_wmb(hw->weak_barriers);
419                 start_dp[idx].flags = flags;
420                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
421                         vq->vq_avail_idx -= vq->vq_nentries;
422                         vq->avail_wrap_counter ^= 1;
423                         vq->avail_used_flags =
424                                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
425                                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
426                         flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
427                 }
428         }
429         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
430         return 0;
431 }
432
433 /* When doing TSO, the IP length is not included in the pseudo header
434  * checksum of the packet given to the PMD, but for virtio it is
435  * expected.
436  */
437 static void
438 virtio_tso_fix_cksum(struct rte_mbuf *m)
439 {
440         /* common case: header is not fragmented */
441         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
442                         m->l4_len)) {
443                 struct ipv4_hdr *iph;
444                 struct ipv6_hdr *ip6h;
445                 struct tcp_hdr *th;
446                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
447                 uint32_t tmp;
448
449                 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
450                 th = RTE_PTR_ADD(iph, m->l3_len);
451                 if ((iph->version_ihl >> 4) == 4) {
452                         iph->hdr_checksum = 0;
453                         iph->hdr_checksum = rte_ipv4_cksum(iph);
454                         ip_len = iph->total_length;
455                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
456                                 m->l3_len);
457                 } else {
458                         ip6h = (struct ipv6_hdr *)iph;
459                         ip_paylen = ip6h->payload_len;
460                 }
461
462                 /* calculate the new phdr checksum not including ip_paylen */
463                 prev_cksum = th->cksum;
464                 tmp = prev_cksum;
465                 tmp += ip_paylen;
466                 tmp = (tmp & 0xffff) + (tmp >> 16);
467                 new_cksum = tmp;
468
469                 /* replace it in the packet */
470                 th->cksum = new_cksum;
471         }
472 }
473
474
475 /* avoid write operation when necessary, to lessen cache issues */
476 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
477         if ((var) != (val))                     \
478                 (var) = (val);                  \
479 } while (0)
480
481 static inline void
482 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
483                         struct rte_mbuf *cookie,
484                         bool offload)
485 {
486         if (offload) {
487                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
488                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
489
490                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
491                 case PKT_TX_UDP_CKSUM:
492                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
493                         hdr->csum_offset = offsetof(struct udp_hdr,
494                                 dgram_cksum);
495                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
496                         break;
497
498                 case PKT_TX_TCP_CKSUM:
499                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
500                         hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
501                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
502                         break;
503
504                 default:
505                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
506                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
507                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
508                         break;
509                 }
510
511                 /* TCP Segmentation Offload */
512                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
513                         virtio_tso_fix_cksum(cookie);
514                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
515                                 VIRTIO_NET_HDR_GSO_TCPV6 :
516                                 VIRTIO_NET_HDR_GSO_TCPV4;
517                         hdr->gso_size = cookie->tso_segsz;
518                         hdr->hdr_len =
519                                 cookie->l2_len +
520                                 cookie->l3_len +
521                                 cookie->l4_len;
522                 } else {
523                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
524                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
525                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
526                 }
527         }
528 }
529
530 static inline void
531 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
532                         struct rte_mbuf **cookies,
533                         uint16_t num)
534 {
535         struct vq_desc_extra *dxp;
536         struct virtqueue *vq = txvq->vq;
537         struct vring_desc *start_dp;
538         struct virtio_net_hdr *hdr;
539         uint16_t idx;
540         uint16_t head_size = vq->hw->vtnet_hdr_size;
541         uint16_t i = 0;
542
543         idx = vq->vq_desc_head_idx;
544         start_dp = vq->vq_ring.desc;
545
546         while (i < num) {
547                 idx = idx & (vq->vq_nentries - 1);
548                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
549                 dxp->cookie = (void *)cookies[i];
550                 dxp->ndescs = 1;
551
552                 hdr = (struct virtio_net_hdr *)
553                         rte_pktmbuf_prepend(cookies[i], head_size);
554                 cookies[i]->pkt_len -= head_size;
555
556                 /* if offload disabled, it is not zeroed below, do it now */
557                 if (!vq->hw->has_tx_offload) {
558                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
559                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
560                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
561                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
562                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
563                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
564                 }
565
566                 virtqueue_xmit_offload(hdr, cookies[i],
567                                 vq->hw->has_tx_offload);
568
569                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
570                 start_dp[idx].len   = cookies[i]->data_len;
571                 start_dp[idx].flags = 0;
572
573                 vq_update_avail_ring(vq, idx);
574
575                 idx++;
576                 i++;
577         };
578
579         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
580         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
581 }
582
583 static inline void
584 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
585                               uint16_t needed, int can_push)
586 {
587         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
588         struct vq_desc_extra *dxp;
589         struct virtqueue *vq = txvq->vq;
590         struct vring_packed_desc *start_dp, *head_dp;
591         uint16_t idx, id, head_idx, head_flags;
592         uint16_t head_size = vq->hw->vtnet_hdr_size;
593         struct virtio_net_hdr *hdr;
594         uint16_t prev;
595
596         id = vq->vq_desc_head_idx;
597
598         dxp = &vq->vq_descx[id];
599         dxp->ndescs = needed;
600         dxp->cookie = cookie;
601
602         head_idx = vq->vq_avail_idx;
603         idx = head_idx;
604         prev = head_idx;
605         start_dp = vq->ring_packed.desc_packed;
606
607         head_dp = &vq->ring_packed.desc_packed[idx];
608         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
609         head_flags |= vq->avail_used_flags;
610
611         if (can_push) {
612                 /* prepend cannot fail, checked by caller */
613                 hdr = (struct virtio_net_hdr *)
614                         rte_pktmbuf_prepend(cookie, head_size);
615                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
616                  * which is wrong. Below subtract restores correct pkt size.
617                  */
618                 cookie->pkt_len -= head_size;
619
620                 /* if offload disabled, it is not zeroed below, do it now */
621                 if (!vq->hw->has_tx_offload) {
622                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
623                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
624                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
625                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
626                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
627                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
628                 }
629         } else {
630                 /* setup first tx ring slot to point to header
631                  * stored in reserved region.
632                  */
633                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
634                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
635                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
636                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
637                 idx++;
638                 if (idx >= vq->vq_nentries) {
639                         idx -= vq->vq_nentries;
640                         vq->avail_wrap_counter ^= 1;
641                         vq->avail_used_flags =
642                                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
643                                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
644                 }
645         }
646
647         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
648
649         do {
650                 uint16_t flags;
651
652                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
653                 start_dp[idx].len  = cookie->data_len;
654                 if (likely(idx != head_idx)) {
655                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
656                         flags |= vq->avail_used_flags;
657                         start_dp[idx].flags = flags;
658                 }
659                 prev = idx;
660                 idx++;
661                 if (idx >= vq->vq_nentries) {
662                         idx -= vq->vq_nentries;
663                         vq->avail_wrap_counter ^= 1;
664                         vq->avail_used_flags =
665                                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
666                                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
667                 }
668         } while ((cookie = cookie->next) != NULL);
669
670         start_dp[prev].id = id;
671
672         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
673
674         vq->vq_desc_head_idx = dxp->next;
675         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
676                 vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
677
678         vq->vq_avail_idx = idx;
679
680         virtio_wmb(vq->hw->weak_barriers);
681         head_dp->flags = head_flags;
682 }
683
684 static inline void
685 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
686                         uint16_t needed, int use_indirect, int can_push,
687                         int in_order)
688 {
689         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
690         struct vq_desc_extra *dxp;
691         struct virtqueue *vq = txvq->vq;
692         struct vring_desc *start_dp;
693         uint16_t seg_num = cookie->nb_segs;
694         uint16_t head_idx, idx;
695         uint16_t head_size = vq->hw->vtnet_hdr_size;
696         struct virtio_net_hdr *hdr;
697
698         head_idx = vq->vq_desc_head_idx;
699         idx = head_idx;
700         if (in_order)
701                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
702         else
703                 dxp = &vq->vq_descx[idx];
704         dxp->cookie = (void *)cookie;
705         dxp->ndescs = needed;
706
707         start_dp = vq->vq_ring.desc;
708
709         if (can_push) {
710                 /* prepend cannot fail, checked by caller */
711                 hdr = (struct virtio_net_hdr *)
712                         rte_pktmbuf_prepend(cookie, head_size);
713                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
714                  * which is wrong. Below subtract restores correct pkt size.
715                  */
716                 cookie->pkt_len -= head_size;
717
718                 /* if offload disabled, it is not zeroed below, do it now */
719                 if (!vq->hw->has_tx_offload) {
720                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
721                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
722                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
723                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
724                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
725                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
726                 }
727         } else if (use_indirect) {
728                 /* setup tx ring slot to point to indirect
729                  * descriptor list stored in reserved region.
730                  *
731                  * the first slot in indirect ring is already preset
732                  * to point to the header in reserved region
733                  */
734                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
735                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
736                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
737                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
738                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
739
740                 /* loop below will fill in rest of the indirect elements */
741                 start_dp = txr[idx].tx_indir;
742                 idx = 1;
743         } else {
744                 /* setup first tx ring slot to point to header
745                  * stored in reserved region.
746                  */
747                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
748                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
749                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
750                 start_dp[idx].flags = VRING_DESC_F_NEXT;
751                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
752
753                 idx = start_dp[idx].next;
754         }
755
756         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
757
758         do {
759                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
760                 start_dp[idx].len   = cookie->data_len;
761                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
762                 idx = start_dp[idx].next;
763         } while ((cookie = cookie->next) != NULL);
764
765         if (use_indirect)
766                 idx = vq->vq_ring.desc[head_idx].next;
767
768         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
769
770         vq->vq_desc_head_idx = idx;
771         vq_update_avail_ring(vq, head_idx);
772
773         if (!in_order) {
774                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
775                         vq->vq_desc_tail_idx = idx;
776         }
777 }
778
779 void
780 virtio_dev_cq_start(struct rte_eth_dev *dev)
781 {
782         struct virtio_hw *hw = dev->data->dev_private;
783
784         if (hw->cvq && hw->cvq->vq) {
785                 rte_spinlock_init(&hw->cvq->lock);
786                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
787         }
788 }
789
790 int
791 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
792                         uint16_t queue_idx,
793                         uint16_t nb_desc,
794                         unsigned int socket_id __rte_unused,
795                         const struct rte_eth_rxconf *rx_conf __rte_unused,
796                         struct rte_mempool *mp)
797 {
798         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
799         struct virtio_hw *hw = dev->data->dev_private;
800         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
801         struct virtnet_rx *rxvq;
802
803         PMD_INIT_FUNC_TRACE();
804
805         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
806                 nb_desc = vq->vq_nentries;
807         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
808
809         rxvq = &vq->rxq;
810         rxvq->queue_id = queue_idx;
811         rxvq->mpool = mp;
812         if (rxvq->mpool == NULL) {
813                 rte_exit(EXIT_FAILURE,
814                         "Cannot allocate mbufs for rx virtqueue");
815         }
816
817         dev->data->rx_queues[queue_idx] = rxvq;
818
819         return 0;
820 }
821
822 int
823 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
824 {
825         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
826         struct virtio_hw *hw = dev->data->dev_private;
827         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
828         struct virtnet_rx *rxvq = &vq->rxq;
829         struct rte_mbuf *m;
830         uint16_t desc_idx;
831         int error, nbufs, i;
832
833         PMD_INIT_FUNC_TRACE();
834
835         /* Allocate blank mbufs for the each rx descriptor */
836         nbufs = 0;
837
838         if (hw->use_simple_rx) {
839                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
840                      desc_idx++) {
841                         vq->vq_ring.avail->ring[desc_idx] = desc_idx;
842                         vq->vq_ring.desc[desc_idx].flags =
843                                 VRING_DESC_F_WRITE;
844                 }
845
846                 virtio_rxq_vec_setup(rxvq);
847         }
848
849         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
850         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
851              desc_idx++) {
852                 vq->sw_ring[vq->vq_nentries + desc_idx] =
853                         &rxvq->fake_mbuf;
854         }
855
856         if (hw->use_simple_rx) {
857                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
858                         virtio_rxq_rearm_vec(rxvq);
859                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
860                 }
861         } else if (hw->use_inorder_rx) {
862                 if ((!virtqueue_full(vq))) {
863                         uint16_t free_cnt = vq->vq_free_cnt;
864                         struct rte_mbuf *pkts[free_cnt];
865
866                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
867                                 free_cnt)) {
868                                 error = virtqueue_enqueue_refill_inorder(vq,
869                                                 pkts,
870                                                 free_cnt);
871                                 if (unlikely(error)) {
872                                         for (i = 0; i < free_cnt; i++)
873                                                 rte_pktmbuf_free(pkts[i]);
874                                 }
875                         }
876
877                         nbufs += free_cnt;
878                         vq_update_avail_idx(vq);
879                 }
880         } else {
881                 while (!virtqueue_full(vq)) {
882                         m = rte_mbuf_raw_alloc(rxvq->mpool);
883                         if (m == NULL)
884                                 break;
885
886                         /* Enqueue allocated buffers */
887                         if (vtpci_packed_queue(vq->hw))
888                                 error = virtqueue_enqueue_recv_refill_packed(vq,
889                                                 &m, 1);
890                         else
891                                 error = virtqueue_enqueue_recv_refill(vq,
892                                                 &m, 1);
893                         if (error) {
894                                 rte_pktmbuf_free(m);
895                                 break;
896                         }
897                         nbufs++;
898                 }
899
900                 if (!vtpci_packed_queue(vq->hw))
901                         vq_update_avail_idx(vq);
902         }
903
904         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
905
906         VIRTQUEUE_DUMP(vq);
907
908         return 0;
909 }
910
911 /*
912  * struct rte_eth_dev *dev: Used to update dev
913  * uint16_t nb_desc: Defaults to values read from config space
914  * unsigned int socket_id: Used to allocate memzone
915  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
916  * uint16_t queue_idx: Just used as an index in dev txq list
917  */
918 int
919 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
920                         uint16_t queue_idx,
921                         uint16_t nb_desc,
922                         unsigned int socket_id __rte_unused,
923                         const struct rte_eth_txconf *tx_conf)
924 {
925         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
926         struct virtio_hw *hw = dev->data->dev_private;
927         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
928         struct virtnet_tx *txvq;
929         uint16_t tx_free_thresh;
930
931         PMD_INIT_FUNC_TRACE();
932
933         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
934                 nb_desc = vq->vq_nentries;
935         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
936
937         txvq = &vq->txq;
938         txvq->queue_id = queue_idx;
939
940         tx_free_thresh = tx_conf->tx_free_thresh;
941         if (tx_free_thresh == 0)
942                 tx_free_thresh =
943                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
944
945         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
946                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
947                         "number of TX entries minus 3 (%u)."
948                         " (tx_free_thresh=%u port=%u queue=%u)\n",
949                         vq->vq_nentries - 3,
950                         tx_free_thresh, dev->data->port_id, queue_idx);
951                 return -EINVAL;
952         }
953
954         vq->vq_free_thresh = tx_free_thresh;
955
956         dev->data->tx_queues[queue_idx] = txvq;
957         return 0;
958 }
959
960 int
961 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
962                                 uint16_t queue_idx)
963 {
964         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
965         struct virtio_hw *hw = dev->data->dev_private;
966         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
967
968         PMD_INIT_FUNC_TRACE();
969
970         if (!vtpci_packed_queue(hw)) {
971                 if (hw->use_inorder_tx)
972                         vq->vq_ring.desc[vq->vq_nentries - 1].next = 0;
973         }
974
975         VIRTQUEUE_DUMP(vq);
976
977         return 0;
978 }
979
980 static inline void
981 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
982 {
983         int error;
984         /*
985          * Requeue the discarded mbuf. This should always be
986          * successful since it was just dequeued.
987          */
988         if (vtpci_packed_queue(vq->hw))
989                 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
990         else
991                 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
992
993         if (unlikely(error)) {
994                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
995                 rte_pktmbuf_free(m);
996         }
997 }
998
999 static inline void
1000 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1001 {
1002         int error;
1003
1004         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1005         if (unlikely(error)) {
1006                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1007                 rte_pktmbuf_free(m);
1008         }
1009 }
1010
1011 static inline void
1012 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
1013 {
1014         uint32_t s = mbuf->pkt_len;
1015         struct ether_addr *ea;
1016
1017         stats->bytes += s;
1018
1019         if (s == 64) {
1020                 stats->size_bins[1]++;
1021         } else if (s > 64 && s < 1024) {
1022                 uint32_t bin;
1023
1024                 /* count zeros, and offset into correct bin */
1025                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
1026                 stats->size_bins[bin]++;
1027         } else {
1028                 if (s < 64)
1029                         stats->size_bins[0]++;
1030                 else if (s < 1519)
1031                         stats->size_bins[6]++;
1032                 else if (s >= 1519)
1033                         stats->size_bins[7]++;
1034         }
1035
1036         ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
1037         if (is_multicast_ether_addr(ea)) {
1038                 if (is_broadcast_ether_addr(ea))
1039                         stats->broadcast++;
1040                 else
1041                         stats->multicast++;
1042         }
1043 }
1044
1045 static inline void
1046 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
1047 {
1048         VIRTIO_DUMP_PACKET(m, m->data_len);
1049
1050         virtio_update_packet_stats(&rxvq->stats, m);
1051 }
1052
1053 /* Optionally fill offload information in structure */
1054 static inline int
1055 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1056 {
1057         struct rte_net_hdr_lens hdr_lens;
1058         uint32_t hdrlen, ptype;
1059         int l4_supported = 0;
1060
1061         /* nothing to do */
1062         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1063                 return 0;
1064
1065         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1066
1067         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1068         m->packet_type = ptype;
1069         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1070             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1071             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1072                 l4_supported = 1;
1073
1074         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1075                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1076                 if (hdr->csum_start <= hdrlen && l4_supported) {
1077                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1078                 } else {
1079                         /* Unknown proto or tunnel, do sw cksum. We can assume
1080                          * the cksum field is in the first segment since the
1081                          * buffers we provided to the host are large enough.
1082                          * In case of SCTP, this will be wrong since it's a CRC
1083                          * but there's nothing we can do.
1084                          */
1085                         uint16_t csum = 0, off;
1086
1087                         rte_raw_cksum_mbuf(m, hdr->csum_start,
1088                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1089                                 &csum);
1090                         if (likely(csum != 0xffff))
1091                                 csum = ~csum;
1092                         off = hdr->csum_offset + hdr->csum_start;
1093                         if (rte_pktmbuf_data_len(m) >= off + 1)
1094                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1095                                         off) = csum;
1096                 }
1097         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1098                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1099         }
1100
1101         /* GSO request, save required information in mbuf */
1102         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1103                 /* Check unsupported modes */
1104                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1105                     (hdr->gso_size == 0)) {
1106                         return -EINVAL;
1107                 }
1108
1109                 /* Update mss lengthes in mbuf */
1110                 m->tso_segsz = hdr->gso_size;
1111                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1112                         case VIRTIO_NET_HDR_GSO_TCPV4:
1113                         case VIRTIO_NET_HDR_GSO_TCPV6:
1114                                 m->ol_flags |= PKT_RX_LRO | \
1115                                         PKT_RX_L4_CKSUM_NONE;
1116                                 break;
1117                         default:
1118                                 return -EINVAL;
1119                 }
1120         }
1121
1122         return 0;
1123 }
1124
1125 #define VIRTIO_MBUF_BURST_SZ 64
1126 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1127 uint16_t
1128 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1129 {
1130         struct virtnet_rx *rxvq = rx_queue;
1131         struct virtqueue *vq = rxvq->vq;
1132         struct virtio_hw *hw = vq->hw;
1133         struct rte_mbuf *rxm, *new_mbuf;
1134         uint16_t nb_used, num, nb_rx;
1135         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1136         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1137         int error;
1138         uint32_t i, nb_enqueued;
1139         uint32_t hdr_size;
1140         struct virtio_net_hdr *hdr;
1141
1142         nb_rx = 0;
1143         if (unlikely(hw->started == 0))
1144                 return nb_rx;
1145
1146         nb_used = VIRTQUEUE_NUSED(vq);
1147
1148         virtio_rmb(hw->weak_barriers);
1149
1150         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1151         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1152                 num = VIRTIO_MBUF_BURST_SZ;
1153         if (likely(num > DESC_PER_CACHELINE))
1154                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1155
1156         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1157         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1158
1159         nb_enqueued = 0;
1160         hdr_size = hw->vtnet_hdr_size;
1161
1162         for (i = 0; i < num ; i++) {
1163                 rxm = rcv_pkts[i];
1164
1165                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1166
1167                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1168                         PMD_RX_LOG(ERR, "Packet drop");
1169                         nb_enqueued++;
1170                         virtio_discard_rxbuf(vq, rxm);
1171                         rxvq->stats.errors++;
1172                         continue;
1173                 }
1174
1175                 rxm->port = rxvq->port_id;
1176                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1177                 rxm->ol_flags = 0;
1178                 rxm->vlan_tci = 0;
1179
1180                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1181                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1182
1183                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1184                         RTE_PKTMBUF_HEADROOM - hdr_size);
1185
1186                 if (hw->vlan_strip)
1187                         rte_vlan_strip(rxm);
1188
1189                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1190                         virtio_discard_rxbuf(vq, rxm);
1191                         rxvq->stats.errors++;
1192                         continue;
1193                 }
1194
1195                 virtio_rx_stats_updated(rxvq, rxm);
1196
1197                 rx_pkts[nb_rx++] = rxm;
1198         }
1199
1200         rxvq->stats.packets += nb_rx;
1201
1202         /* Allocate new mbuf for the used descriptor */
1203         while (likely(!virtqueue_full(vq))) {
1204                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1205                 if (unlikely(new_mbuf == NULL)) {
1206                         struct rte_eth_dev *dev
1207                                 = &rte_eth_devices[rxvq->port_id];
1208                         dev->data->rx_mbuf_alloc_failed++;
1209                         break;
1210                 }
1211                 error = virtqueue_enqueue_recv_refill(vq, &new_mbuf, 1);
1212                 if (unlikely(error)) {
1213                         rte_pktmbuf_free(new_mbuf);
1214                         break;
1215                 }
1216                 nb_enqueued++;
1217         }
1218
1219         if (likely(nb_enqueued)) {
1220                 vq_update_avail_idx(vq);
1221
1222                 if (unlikely(virtqueue_kick_prepare(vq))) {
1223                         virtqueue_notify(vq);
1224                         PMD_RX_LOG(DEBUG, "Notified");
1225                 }
1226         }
1227
1228         return nb_rx;
1229 }
1230
1231 uint16_t
1232 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1233                         uint16_t nb_pkts)
1234 {
1235         struct virtnet_rx *rxvq = rx_queue;
1236         struct virtqueue *vq = rxvq->vq;
1237         struct virtio_hw *hw = vq->hw;
1238         struct rte_mbuf *rxm, *new_mbuf;
1239         uint16_t num, nb_rx;
1240         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1241         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1242         int error;
1243         uint32_t i, nb_enqueued;
1244         uint32_t hdr_size;
1245         struct virtio_net_hdr *hdr;
1246
1247         nb_rx = 0;
1248         if (unlikely(hw->started == 0))
1249                 return nb_rx;
1250
1251         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1252         if (likely(num > DESC_PER_CACHELINE))
1253                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1254
1255         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1256         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1257
1258         nb_enqueued = 0;
1259         hdr_size = hw->vtnet_hdr_size;
1260
1261         for (i = 0; i < num; i++) {
1262                 rxm = rcv_pkts[i];
1263
1264                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1265
1266                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1267                         PMD_RX_LOG(ERR, "Packet drop");
1268                         nb_enqueued++;
1269                         virtio_discard_rxbuf(vq, rxm);
1270                         rxvq->stats.errors++;
1271                         continue;
1272                 }
1273
1274                 rxm->port = rxvq->port_id;
1275                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1276                 rxm->ol_flags = 0;
1277                 rxm->vlan_tci = 0;
1278
1279                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1280                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1281
1282                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1283                         RTE_PKTMBUF_HEADROOM - hdr_size);
1284
1285                 if (hw->vlan_strip)
1286                         rte_vlan_strip(rxm);
1287
1288                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1289                         virtio_discard_rxbuf(vq, rxm);
1290                         rxvq->stats.errors++;
1291                         continue;
1292                 }
1293
1294                 virtio_rx_stats_updated(rxvq, rxm);
1295
1296                 rx_pkts[nb_rx++] = rxm;
1297         }
1298
1299         rxvq->stats.packets += nb_rx;
1300
1301         /* Allocate new mbuf for the used descriptor */
1302         while (likely(!virtqueue_full(vq))) {
1303                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1304                 if (unlikely(new_mbuf == NULL)) {
1305                         struct rte_eth_dev *dev =
1306                                 &rte_eth_devices[rxvq->port_id];
1307                         dev->data->rx_mbuf_alloc_failed++;
1308                         break;
1309                 }
1310                 error = virtqueue_enqueue_recv_refill_packed(vq, &new_mbuf, 1);
1311                 if (unlikely(error)) {
1312                         rte_pktmbuf_free(new_mbuf);
1313                         break;
1314                 }
1315                 nb_enqueued++;
1316         }
1317
1318         if (likely(nb_enqueued)) {
1319                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1320                         virtqueue_notify(vq);
1321                         PMD_RX_LOG(DEBUG, "Notified");
1322                 }
1323         }
1324
1325         return nb_rx;
1326 }
1327
1328
1329 uint16_t
1330 virtio_recv_pkts_inorder(void *rx_queue,
1331                         struct rte_mbuf **rx_pkts,
1332                         uint16_t nb_pkts)
1333 {
1334         struct virtnet_rx *rxvq = rx_queue;
1335         struct virtqueue *vq = rxvq->vq;
1336         struct virtio_hw *hw = vq->hw;
1337         struct rte_mbuf *rxm;
1338         struct rte_mbuf *prev;
1339         uint16_t nb_used, num, nb_rx;
1340         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1341         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1342         int error;
1343         uint32_t nb_enqueued;
1344         uint32_t seg_num;
1345         uint32_t seg_res;
1346         uint32_t hdr_size;
1347         int32_t i;
1348
1349         nb_rx = 0;
1350         if (unlikely(hw->started == 0))
1351                 return nb_rx;
1352
1353         nb_used = VIRTQUEUE_NUSED(vq);
1354         nb_used = RTE_MIN(nb_used, nb_pkts);
1355         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1356
1357         virtio_rmb(hw->weak_barriers);
1358
1359         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1360
1361         nb_enqueued = 0;
1362         seg_num = 1;
1363         seg_res = 0;
1364         hdr_size = hw->vtnet_hdr_size;
1365
1366         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1367
1368         for (i = 0; i < num; i++) {
1369                 struct virtio_net_hdr_mrg_rxbuf *header;
1370
1371                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1372                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1373
1374                 rxm = rcv_pkts[i];
1375
1376                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1377                         PMD_RX_LOG(ERR, "Packet drop");
1378                         nb_enqueued++;
1379                         virtio_discard_rxbuf_inorder(vq, rxm);
1380                         rxvq->stats.errors++;
1381                         continue;
1382                 }
1383
1384                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1385                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1386                          - hdr_size);
1387
1388                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1389                         seg_num = header->num_buffers;
1390                         if (seg_num == 0)
1391                                 seg_num = 1;
1392                 } else {
1393                         seg_num = 1;
1394                 }
1395
1396                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1397                 rxm->nb_segs = seg_num;
1398                 rxm->ol_flags = 0;
1399                 rxm->vlan_tci = 0;
1400                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1401                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1402
1403                 rxm->port = rxvq->port_id;
1404
1405                 rx_pkts[nb_rx] = rxm;
1406                 prev = rxm;
1407
1408                 if (vq->hw->has_rx_offload &&
1409                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1410                         virtio_discard_rxbuf_inorder(vq, rxm);
1411                         rxvq->stats.errors++;
1412                         continue;
1413                 }
1414
1415                 if (hw->vlan_strip)
1416                         rte_vlan_strip(rx_pkts[nb_rx]);
1417
1418                 seg_res = seg_num - 1;
1419
1420                 /* Merge remaining segments */
1421                 while (seg_res != 0 && i < (num - 1)) {
1422                         i++;
1423
1424                         rxm = rcv_pkts[i];
1425                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1426                         rxm->pkt_len = (uint32_t)(len[i]);
1427                         rxm->data_len = (uint16_t)(len[i]);
1428
1429                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1430                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1431
1432                         if (prev)
1433                                 prev->next = rxm;
1434
1435                         prev = rxm;
1436                         seg_res -= 1;
1437                 }
1438
1439                 if (!seg_res) {
1440                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1441                         nb_rx++;
1442                 }
1443         }
1444
1445         /* Last packet still need merge segments */
1446         while (seg_res != 0) {
1447                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1448                                         VIRTIO_MBUF_BURST_SZ);
1449
1450                 prev = rcv_pkts[nb_rx];
1451                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1452                         virtio_rmb(hw->weak_barriers);
1453                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1454                                                            rcv_cnt);
1455                         uint16_t extra_idx = 0;
1456
1457                         rcv_cnt = num;
1458                         while (extra_idx < rcv_cnt) {
1459                                 rxm = rcv_pkts[extra_idx];
1460                                 rxm->data_off =
1461                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1462                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1463                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1464                                 prev->next = rxm;
1465                                 prev = rxm;
1466                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1467                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1468                                 extra_idx += 1;
1469                         };
1470                         seg_res -= rcv_cnt;
1471
1472                         if (!seg_res) {
1473                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1474                                 nb_rx++;
1475                         }
1476                 } else {
1477                         PMD_RX_LOG(ERR,
1478                                         "No enough segments for packet.");
1479                         virtio_discard_rxbuf_inorder(vq, prev);
1480                         rxvq->stats.errors++;
1481                         break;
1482                 }
1483         }
1484
1485         rxvq->stats.packets += nb_rx;
1486
1487         /* Allocate new mbuf for the used descriptor */
1488
1489         if (likely(!virtqueue_full(vq))) {
1490                 /* free_cnt may include mrg descs */
1491                 uint16_t free_cnt = vq->vq_free_cnt;
1492                 struct rte_mbuf *new_pkts[free_cnt];
1493
1494                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1495                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1496                                         free_cnt);
1497                         if (unlikely(error)) {
1498                                 for (i = 0; i < free_cnt; i++)
1499                                         rte_pktmbuf_free(new_pkts[i]);
1500                         }
1501                         nb_enqueued += free_cnt;
1502                 } else {
1503                         struct rte_eth_dev *dev =
1504                                 &rte_eth_devices[rxvq->port_id];
1505                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1506                 }
1507         }
1508
1509         if (likely(nb_enqueued)) {
1510                 vq_update_avail_idx(vq);
1511
1512                 if (unlikely(virtqueue_kick_prepare(vq))) {
1513                         virtqueue_notify(vq);
1514                         PMD_RX_LOG(DEBUG, "Notified");
1515                 }
1516         }
1517
1518         return nb_rx;
1519 }
1520
1521 uint16_t
1522 virtio_recv_mergeable_pkts(void *rx_queue,
1523                         struct rte_mbuf **rx_pkts,
1524                         uint16_t nb_pkts)
1525 {
1526         struct virtnet_rx *rxvq = rx_queue;
1527         struct virtqueue *vq = rxvq->vq;
1528         struct virtio_hw *hw = vq->hw;
1529         struct rte_mbuf *rxm;
1530         struct rte_mbuf *prev;
1531         uint16_t nb_used, num, nb_rx = 0;
1532         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1533         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1534         int error;
1535         uint32_t nb_enqueued = 0;
1536         uint32_t seg_num = 0;
1537         uint32_t seg_res = 0;
1538         uint32_t hdr_size = hw->vtnet_hdr_size;
1539         int32_t i;
1540
1541         if (unlikely(hw->started == 0))
1542                 return nb_rx;
1543
1544         nb_used = VIRTQUEUE_NUSED(vq);
1545
1546         virtio_rmb(hw->weak_barriers);
1547
1548         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1549
1550         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1551         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1552                 num = VIRTIO_MBUF_BURST_SZ;
1553         if (likely(num > DESC_PER_CACHELINE))
1554                 num = num - ((vq->vq_used_cons_idx + num) %
1555                                 DESC_PER_CACHELINE);
1556
1557
1558         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1559
1560         for (i = 0; i < num; i++) {
1561                 struct virtio_net_hdr_mrg_rxbuf *header;
1562
1563                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1564                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1565
1566                 rxm = rcv_pkts[i];
1567
1568                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1569                         PMD_RX_LOG(ERR, "Packet drop");
1570                         nb_enqueued++;
1571                         virtio_discard_rxbuf(vq, rxm);
1572                         rxvq->stats.errors++;
1573                         continue;
1574                 }
1575
1576                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1577                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1578                          - hdr_size);
1579                 seg_num = header->num_buffers;
1580                 if (seg_num == 0)
1581                         seg_num = 1;
1582
1583                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1584                 rxm->nb_segs = seg_num;
1585                 rxm->ol_flags = 0;
1586                 rxm->vlan_tci = 0;
1587                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1588                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1589
1590                 rxm->port = rxvq->port_id;
1591
1592                 rx_pkts[nb_rx] = rxm;
1593                 prev = rxm;
1594
1595                 if (hw->has_rx_offload &&
1596                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1597                         virtio_discard_rxbuf(vq, rxm);
1598                         rxvq->stats.errors++;
1599                         continue;
1600                 }
1601
1602                 if (hw->vlan_strip)
1603                         rte_vlan_strip(rx_pkts[nb_rx]);
1604
1605                 seg_res = seg_num - 1;
1606
1607                 /* Merge remaining segments */
1608                 while (seg_res != 0 && i < (num - 1)) {
1609                         i++;
1610
1611                         rxm = rcv_pkts[i];
1612                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1613                         rxm->pkt_len = (uint32_t)(len[i]);
1614                         rxm->data_len = (uint16_t)(len[i]);
1615
1616                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1617                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1618
1619                         if (prev)
1620                                 prev->next = rxm;
1621
1622                         prev = rxm;
1623                         seg_res -= 1;
1624                 }
1625
1626                 if (!seg_res) {
1627                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1628                         nb_rx++;
1629                 }
1630         }
1631
1632         /* Last packet still need merge segments */
1633         while (seg_res != 0) {
1634                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1635                                         VIRTIO_MBUF_BURST_SZ);
1636
1637                 prev = rcv_pkts[nb_rx];
1638                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1639                         virtio_rmb(hw->weak_barriers);
1640                         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1641                                                            rcv_cnt);
1642                         uint16_t extra_idx = 0;
1643
1644                         rcv_cnt = num;
1645                         while (extra_idx < rcv_cnt) {
1646                                 rxm = rcv_pkts[extra_idx];
1647                                 rxm->data_off =
1648                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1649                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1650                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1651                                 prev->next = rxm;
1652                                 prev = rxm;
1653                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1654                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1655                                 extra_idx += 1;
1656                         };
1657                         seg_res -= rcv_cnt;
1658
1659                         if (!seg_res) {
1660                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1661                                 nb_rx++;
1662                         }
1663                 } else {
1664                         PMD_RX_LOG(ERR,
1665                                         "No enough segments for packet.");
1666                         virtio_discard_rxbuf(vq, prev);
1667                         rxvq->stats.errors++;
1668                         break;
1669                 }
1670         }
1671
1672         rxvq->stats.packets += nb_rx;
1673
1674         /* Allocate new mbuf for the used descriptor */
1675         if (likely(!virtqueue_full(vq))) {
1676                 /* free_cnt may include mrg descs */
1677                 uint16_t free_cnt = vq->vq_free_cnt;
1678                 struct rte_mbuf *new_pkts[free_cnt];
1679
1680                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1681                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1682                                         free_cnt);
1683                         if (unlikely(error)) {
1684                                 for (i = 0; i < free_cnt; i++)
1685                                         rte_pktmbuf_free(new_pkts[i]);
1686                         }
1687                         nb_enqueued += free_cnt;
1688                 } else {
1689                         struct rte_eth_dev *dev =
1690                                 &rte_eth_devices[rxvq->port_id];
1691                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1692                 }
1693         }
1694
1695         if (likely(nb_enqueued)) {
1696                 vq_update_avail_idx(vq);
1697
1698                 if (unlikely(virtqueue_kick_prepare(vq))) {
1699                         virtqueue_notify(vq);
1700                         PMD_RX_LOG(DEBUG, "Notified");
1701                 }
1702         }
1703
1704         return nb_rx;
1705 }
1706
1707 uint16_t
1708 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1709                         struct rte_mbuf **rx_pkts,
1710                         uint16_t nb_pkts)
1711 {
1712         struct virtnet_rx *rxvq = rx_queue;
1713         struct virtqueue *vq = rxvq->vq;
1714         struct virtio_hw *hw = vq->hw;
1715         struct rte_mbuf *rxm;
1716         struct rte_mbuf *prev = NULL;
1717         uint16_t num, nb_rx = 0;
1718         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1719         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1720         uint32_t nb_enqueued = 0;
1721         uint32_t seg_num = 0;
1722         uint32_t seg_res = 0;
1723         uint32_t hdr_size = hw->vtnet_hdr_size;
1724         int32_t i;
1725         int error;
1726
1727         if (unlikely(hw->started == 0))
1728                 return nb_rx;
1729
1730
1731         num = nb_pkts;
1732         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1733                 num = VIRTIO_MBUF_BURST_SZ;
1734         if (likely(num > DESC_PER_CACHELINE))
1735                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1736
1737         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1738
1739         for (i = 0; i < num; i++) {
1740                 struct virtio_net_hdr_mrg_rxbuf *header;
1741
1742                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1743                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1744
1745                 rxm = rcv_pkts[i];
1746
1747                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1748                         PMD_RX_LOG(ERR, "Packet drop");
1749                         nb_enqueued++;
1750                         virtio_discard_rxbuf(vq, rxm);
1751                         rxvq->stats.errors++;
1752                         continue;
1753                 }
1754
1755                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1756                           rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1757                 seg_num = header->num_buffers;
1758
1759                 if (seg_num == 0)
1760                         seg_num = 1;
1761
1762                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1763                 rxm->nb_segs = seg_num;
1764                 rxm->ol_flags = 0;
1765                 rxm->vlan_tci = 0;
1766                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1767                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1768
1769                 rxm->port = rxvq->port_id;
1770                 rx_pkts[nb_rx] = rxm;
1771                 prev = rxm;
1772
1773                 if (hw->has_rx_offload &&
1774                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1775                         virtio_discard_rxbuf(vq, rxm);
1776                         rxvq->stats.errors++;
1777                         continue;
1778                 }
1779
1780                 if (hw->vlan_strip)
1781                         rte_vlan_strip(rx_pkts[nb_rx]);
1782
1783                 seg_res = seg_num - 1;
1784
1785                 /* Merge remaining segments */
1786                 while (seg_res != 0 && i < (num - 1)) {
1787                         i++;
1788
1789                         rxm = rcv_pkts[i];
1790                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1791                         rxm->pkt_len = (uint32_t)(len[i]);
1792                         rxm->data_len = (uint16_t)(len[i]);
1793
1794                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1795                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1796
1797                         if (prev)
1798                                 prev->next = rxm;
1799
1800                         prev = rxm;
1801                         seg_res -= 1;
1802                 }
1803
1804                 if (!seg_res) {
1805                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1806                         nb_rx++;
1807                 }
1808         }
1809
1810         /* Last packet still need merge segments */
1811         while (seg_res != 0) {
1812                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1813                                         VIRTIO_MBUF_BURST_SZ);
1814                 if (likely(vq->vq_free_cnt >= rcv_cnt)) {
1815                         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1816                                         len, rcv_cnt);
1817                         uint16_t extra_idx = 0;
1818
1819                         rcv_cnt = num;
1820
1821                         while (extra_idx < rcv_cnt) {
1822                                 rxm = rcv_pkts[extra_idx];
1823
1824                                 rxm->data_off =
1825                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1826                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1827                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1828
1829                                 prev->next = rxm;
1830                                 prev = rxm;
1831                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1832                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1833                                 extra_idx += 1;
1834                         }
1835                         seg_res -= rcv_cnt;
1836                         if (!seg_res) {
1837                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1838                                 nb_rx++;
1839                         }
1840                 } else {
1841                         PMD_RX_LOG(ERR,
1842                                         "No enough segments for packet.");
1843                         if (prev)
1844                                 virtio_discard_rxbuf(vq, prev);
1845                         rxvq->stats.errors++;
1846                         break;
1847                 }
1848         }
1849
1850         rxvq->stats.packets += nb_rx;
1851
1852         /* Allocate new mbuf for the used descriptor */
1853         if (likely(!virtqueue_full(vq))) {
1854                 /* free_cnt may include mrg descs */
1855                 uint16_t free_cnt = vq->vq_free_cnt;
1856                 struct rte_mbuf *new_pkts[free_cnt];
1857
1858                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1859                         error = virtqueue_enqueue_recv_refill_packed(vq,
1860                                         new_pkts, free_cnt);
1861                         if (unlikely(error)) {
1862                                 for (i = 0; i < free_cnt; i++)
1863                                         rte_pktmbuf_free(new_pkts[i]);
1864                         }
1865                         nb_enqueued += free_cnt;
1866                 } else {
1867                         struct rte_eth_dev *dev =
1868                                 &rte_eth_devices[rxvq->port_id];
1869                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1870                 }
1871         }
1872
1873         if (likely(nb_enqueued)) {
1874                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1875                         virtqueue_notify(vq);
1876                         PMD_RX_LOG(DEBUG, "Notified");
1877                 }
1878         }
1879
1880         return nb_rx;
1881 }
1882
1883 uint16_t
1884 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1885                         uint16_t nb_pkts)
1886 {
1887         struct virtnet_tx *txvq = tx_queue;
1888         struct virtqueue *vq = txvq->vq;
1889         struct virtio_hw *hw = vq->hw;
1890         uint16_t hdr_size = hw->vtnet_hdr_size;
1891         uint16_t nb_tx = 0;
1892         int error;
1893
1894         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1895                 return nb_tx;
1896
1897         if (unlikely(nb_pkts < 1))
1898                 return nb_pkts;
1899
1900         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1901
1902         if (nb_pkts > vq->vq_free_cnt)
1903                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt);
1904
1905         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1906                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1907                 int can_push = 0, slots, need;
1908
1909                 /* Do VLAN tag insertion */
1910                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1911                         error = rte_vlan_insert(&txm);
1912                         if (unlikely(error)) {
1913                                 rte_pktmbuf_free(txm);
1914                                 continue;
1915                         }
1916                 }
1917
1918                 /* optimize ring usage */
1919                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1920                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1921                     rte_mbuf_refcnt_read(txm) == 1 &&
1922                     RTE_MBUF_DIRECT(txm) &&
1923                     txm->nb_segs == 1 &&
1924                     rte_pktmbuf_headroom(txm) >= hdr_size &&
1925                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1926                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1927                         can_push = 1;
1928
1929                 /* How many main ring entries are needed to this Tx?
1930                  * any_layout => number of segments
1931                  * default    => number of segments + 1
1932                  */
1933                 slots = txm->nb_segs + !can_push;
1934                 need = slots - vq->vq_free_cnt;
1935
1936                 /* Positive value indicates it need free vring descriptors */
1937                 if (unlikely(need > 0)) {
1938                         virtio_xmit_cleanup_packed(vq, need);
1939                         need = slots - vq->vq_free_cnt;
1940                         if (unlikely(need > 0)) {
1941                                 PMD_TX_LOG(ERR,
1942                                            "No free tx descriptors to transmit");
1943                                 break;
1944                         }
1945                 }
1946
1947                 /* Enqueue Packet buffers */
1948                 virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push);
1949
1950                 virtio_update_packet_stats(&txvq->stats, txm);
1951         }
1952
1953         txvq->stats.packets += nb_tx;
1954
1955         if (likely(nb_tx)) {
1956                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1957                         virtqueue_notify(vq);
1958                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1959                 }
1960         }
1961
1962         return nb_tx;
1963 }
1964
1965 uint16_t
1966 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1967 {
1968         struct virtnet_tx *txvq = tx_queue;
1969         struct virtqueue *vq = txvq->vq;
1970         struct virtio_hw *hw = vq->hw;
1971         uint16_t hdr_size = hw->vtnet_hdr_size;
1972         uint16_t nb_used, nb_tx = 0;
1973         int error;
1974
1975         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1976                 return nb_tx;
1977
1978         if (unlikely(nb_pkts < 1))
1979                 return nb_pkts;
1980
1981         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1982         nb_used = VIRTQUEUE_NUSED(vq);
1983
1984         virtio_rmb(hw->weak_barriers);
1985         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1986                 virtio_xmit_cleanup(vq, nb_used);
1987
1988         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1989                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1990                 int can_push = 0, use_indirect = 0, slots, need;
1991
1992                 /* Do VLAN tag insertion */
1993                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1994                         error = rte_vlan_insert(&txm);
1995                         if (unlikely(error)) {
1996                                 rte_pktmbuf_free(txm);
1997                                 continue;
1998                         }
1999                 }
2000
2001                 /* optimize ring usage */
2002                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2003                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2004                     rte_mbuf_refcnt_read(txm) == 1 &&
2005                     RTE_MBUF_DIRECT(txm) &&
2006                     txm->nb_segs == 1 &&
2007                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2008                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2009                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2010                         can_push = 1;
2011                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2012                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2013                         use_indirect = 1;
2014
2015                 /* How many main ring entries are needed to this Tx?
2016                  * any_layout => number of segments
2017                  * indirect   => 1
2018                  * default    => number of segments + 1
2019                  */
2020                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2021                 need = slots - vq->vq_free_cnt;
2022
2023                 /* Positive value indicates it need free vring descriptors */
2024                 if (unlikely(need > 0)) {
2025                         nb_used = VIRTQUEUE_NUSED(vq);
2026                         virtio_rmb(hw->weak_barriers);
2027                         need = RTE_MIN(need, (int)nb_used);
2028
2029                         virtio_xmit_cleanup(vq, need);
2030                         need = slots - vq->vq_free_cnt;
2031                         if (unlikely(need > 0)) {
2032                                 PMD_TX_LOG(ERR,
2033                                            "No free tx descriptors to transmit");
2034                                 break;
2035                         }
2036                 }
2037
2038                 /* Enqueue Packet buffers */
2039                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2040                         can_push, 0);
2041
2042                 virtio_update_packet_stats(&txvq->stats, txm);
2043         }
2044
2045         txvq->stats.packets += nb_tx;
2046
2047         if (likely(nb_tx)) {
2048                 vq_update_avail_idx(vq);
2049
2050                 if (unlikely(virtqueue_kick_prepare(vq))) {
2051                         virtqueue_notify(vq);
2052                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2053                 }
2054         }
2055
2056         return nb_tx;
2057 }
2058
2059 uint16_t
2060 virtio_xmit_pkts_inorder(void *tx_queue,
2061                         struct rte_mbuf **tx_pkts,
2062                         uint16_t nb_pkts)
2063 {
2064         struct virtnet_tx *txvq = tx_queue;
2065         struct virtqueue *vq = txvq->vq;
2066         struct virtio_hw *hw = vq->hw;
2067         uint16_t hdr_size = hw->vtnet_hdr_size;
2068         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
2069         struct rte_mbuf *inorder_pkts[nb_pkts];
2070         int error;
2071
2072         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2073                 return nb_tx;
2074
2075         if (unlikely(nb_pkts < 1))
2076                 return nb_pkts;
2077
2078         VIRTQUEUE_DUMP(vq);
2079         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2080         nb_used = VIRTQUEUE_NUSED(vq);
2081
2082         virtio_rmb(hw->weak_barriers);
2083         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2084                 virtio_xmit_cleanup_inorder(vq, nb_used);
2085
2086         if (unlikely(!vq->vq_free_cnt))
2087                 virtio_xmit_cleanup_inorder(vq, nb_used);
2088
2089         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
2090
2091         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
2092                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2093                 int slots, need;
2094
2095                 /* Do VLAN tag insertion */
2096                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2097                         error = rte_vlan_insert(&txm);
2098                         if (unlikely(error)) {
2099                                 rte_pktmbuf_free(txm);
2100                                 continue;
2101                         }
2102                 }
2103
2104                 /* optimize ring usage */
2105                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2106                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2107                      rte_mbuf_refcnt_read(txm) == 1 &&
2108                      RTE_MBUF_DIRECT(txm) &&
2109                      txm->nb_segs == 1 &&
2110                      rte_pktmbuf_headroom(txm) >= hdr_size &&
2111                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2112                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2113                         inorder_pkts[nb_inorder_pkts] = txm;
2114                         nb_inorder_pkts++;
2115
2116                         virtio_update_packet_stats(&txvq->stats, txm);
2117                         continue;
2118                 }
2119
2120                 if (nb_inorder_pkts) {
2121                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2122                                                         nb_inorder_pkts);
2123                         nb_inorder_pkts = 0;
2124                 }
2125
2126                 slots = txm->nb_segs + 1;
2127                 need = slots - vq->vq_free_cnt;
2128                 if (unlikely(need > 0)) {
2129                         nb_used = VIRTQUEUE_NUSED(vq);
2130                         virtio_rmb(hw->weak_barriers);
2131                         need = RTE_MIN(need, (int)nb_used);
2132
2133                         virtio_xmit_cleanup_inorder(vq, need);
2134
2135                         need = slots - vq->vq_free_cnt;
2136
2137                         if (unlikely(need > 0)) {
2138                                 PMD_TX_LOG(ERR,
2139                                         "No free tx descriptors to transmit");
2140                                 break;
2141                         }
2142                 }
2143                 /* Enqueue Packet buffers */
2144                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2145
2146                 virtio_update_packet_stats(&txvq->stats, txm);
2147         }
2148
2149         /* Transmit all inorder packets */
2150         if (nb_inorder_pkts)
2151                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2152                                                 nb_inorder_pkts);
2153
2154         txvq->stats.packets += nb_tx;
2155
2156         if (likely(nb_tx)) {
2157                 vq_update_avail_idx(vq);
2158
2159                 if (unlikely(virtqueue_kick_prepare(vq))) {
2160                         virtqueue_notify(vq);
2161                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2162                 }
2163         }
2164
2165         VIRTQUEUE_DUMP(vq);
2166
2167         return nb_tx;
2168 }