net/virtio: fix Tx desc cleanup for packed ring
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45         struct virtnet_rx *rxvq = rxq;
46         struct virtqueue *vq = rxvq->vq;
47
48         return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54         vq->vq_free_cnt += num;
55         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61         struct vring_desc *dp, *dp_tail;
62         struct vq_desc_extra *dxp;
63         uint16_t desc_idx_last = desc_idx;
64
65         dp  = &vq->vq_ring.desc[desc_idx];
66         dxp = &vq->vq_descx[desc_idx];
67         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69                 while (dp->flags & VRING_DESC_F_NEXT) {
70                         desc_idx_last = dp->next;
71                         dp = &vq->vq_ring.desc[dp->next];
72                 }
73         }
74         dxp->ndescs = 0;
75
76         /*
77          * We must append the existing free chain, if any, to the end of
78          * newly freed chain. If the virtqueue was completely used, then
79          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80          */
81         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82                 vq->vq_desc_head_idx = desc_idx;
83         } else {
84                 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
85                 dp_tail->next = desc_idx;
86         }
87
88         vq->vq_desc_tail_idx = desc_idx_last;
89         dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95         struct vq_desc_extra *dxp;
96
97         dxp = &vq->vq_descx[id];
98         vq->vq_free_cnt += dxp->ndescs;
99
100         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101                 vq->vq_desc_head_idx = id;
102         else
103                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104
105         vq->vq_desc_tail_idx = id;
106         dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108
109 static uint16_t
110 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
111                                   struct rte_mbuf **rx_pkts,
112                                   uint32_t *len,
113                                   uint16_t num)
114 {
115         struct rte_mbuf *cookie;
116         uint16_t used_idx;
117         uint16_t id;
118         struct vring_packed_desc *desc;
119         uint16_t i;
120
121         desc = vq->ring_packed.desc_packed;
122
123         for (i = 0; i < num; i++) {
124                 used_idx = vq->vq_used_cons_idx;
125                 if (!desc_is_used(&desc[used_idx], vq))
126                         return i;
127                 virtio_rmb(vq->hw->weak_barriers);
128                 len[i] = desc[used_idx].len;
129                 id = desc[used_idx].id;
130                 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
131                 if (unlikely(cookie == NULL)) {
132                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
133                                 vq->vq_used_cons_idx);
134                         break;
135                 }
136                 rte_prefetch0(cookie);
137                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
138                 rx_pkts[i] = cookie;
139
140                 vq->vq_free_cnt++;
141                 vq->vq_used_cons_idx++;
142                 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
143                         vq->vq_used_cons_idx -= vq->vq_nentries;
144                         vq->used_wrap_counter ^= 1;
145                 }
146         }
147
148         return i;
149 }
150
151 static uint16_t
152 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
153                            uint32_t *len, uint16_t num)
154 {
155         struct vring_used_elem *uep;
156         struct rte_mbuf *cookie;
157         uint16_t used_idx, desc_idx;
158         uint16_t i;
159
160         /*  Caller does the check */
161         for (i = 0; i < num ; i++) {
162                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
163                 uep = &vq->vq_ring.used->ring[used_idx];
164                 desc_idx = (uint16_t) uep->id;
165                 len[i] = uep->len;
166                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
167
168                 if (unlikely(cookie == NULL)) {
169                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
170                                 vq->vq_used_cons_idx);
171                         break;
172                 }
173
174                 rte_prefetch0(cookie);
175                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
176                 rx_pkts[i]  = cookie;
177                 vq->vq_used_cons_idx++;
178                 vq_ring_free_chain(vq, desc_idx);
179                 vq->vq_descx[desc_idx].cookie = NULL;
180         }
181
182         return i;
183 }
184
185 static uint16_t
186 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
187                         struct rte_mbuf **rx_pkts,
188                         uint32_t *len,
189                         uint16_t num)
190 {
191         struct vring_used_elem *uep;
192         struct rte_mbuf *cookie;
193         uint16_t used_idx = 0;
194         uint16_t i;
195
196         if (unlikely(num == 0))
197                 return 0;
198
199         for (i = 0; i < num; i++) {
200                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
201                 /* Desc idx same as used idx */
202                 uep = &vq->vq_ring.used->ring[used_idx];
203                 len[i] = uep->len;
204                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
205
206                 if (unlikely(cookie == NULL)) {
207                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
208                                 vq->vq_used_cons_idx);
209                         break;
210                 }
211
212                 rte_prefetch0(cookie);
213                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
214                 rx_pkts[i]  = cookie;
215                 vq->vq_used_cons_idx++;
216                 vq->vq_descx[used_idx].cookie = NULL;
217         }
218
219         vq_ring_free_inorder(vq, used_idx, i);
220         return i;
221 }
222
223 #ifndef DEFAULT_TX_FREE_THRESH
224 #define DEFAULT_TX_FREE_THRESH 32
225 #endif
226
227 /* Cleanup from completed transmits. */
228 static void
229 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num)
230 {
231         uint16_t used_idx, id;
232         uint16_t size = vq->vq_nentries;
233         struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
234         struct vq_desc_extra *dxp;
235
236         used_idx = vq->vq_used_cons_idx;
237         while (num-- && desc_is_used(&desc[used_idx], vq)) {
238                 virtio_rmb(vq->hw->weak_barriers);
239                 id = desc[used_idx].id;
240                 dxp = &vq->vq_descx[id];
241                 vq->vq_used_cons_idx += dxp->ndescs;
242                 if (vq->vq_used_cons_idx >= size) {
243                         vq->vq_used_cons_idx -= size;
244                         vq->used_wrap_counter ^= 1;
245                 }
246                 vq_ring_free_id_packed(vq, id);
247                 if (dxp->cookie != NULL) {
248                         rte_pktmbuf_free(dxp->cookie);
249                         dxp->cookie = NULL;
250                 }
251                 used_idx = vq->vq_used_cons_idx;
252         }
253 }
254
255 static void
256 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
257 {
258         uint16_t i, used_idx, desc_idx;
259         for (i = 0; i < num; i++) {
260                 struct vring_used_elem *uep;
261                 struct vq_desc_extra *dxp;
262
263                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
264                 uep = &vq->vq_ring.used->ring[used_idx];
265
266                 desc_idx = (uint16_t) uep->id;
267                 dxp = &vq->vq_descx[desc_idx];
268                 vq->vq_used_cons_idx++;
269                 vq_ring_free_chain(vq, desc_idx);
270
271                 if (dxp->cookie != NULL) {
272                         rte_pktmbuf_free(dxp->cookie);
273                         dxp->cookie = NULL;
274                 }
275         }
276 }
277
278 /* Cleanup from completed inorder transmits. */
279 static void
280 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
281 {
282         uint16_t i, used_idx, desc_idx = 0, last_idx;
283         int16_t free_cnt = 0;
284         struct vq_desc_extra *dxp = NULL;
285
286         if (unlikely(num == 0))
287                 return;
288
289         for (i = 0; i < num; i++) {
290                 struct vring_used_elem *uep;
291
292                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
293                 uep = &vq->vq_ring.used->ring[used_idx];
294                 desc_idx = (uint16_t)uep->id;
295
296                 dxp = &vq->vq_descx[desc_idx];
297                 vq->vq_used_cons_idx++;
298
299                 if (dxp->cookie != NULL) {
300                         rte_pktmbuf_free(dxp->cookie);
301                         dxp->cookie = NULL;
302                 }
303         }
304
305         last_idx = desc_idx + dxp->ndescs - 1;
306         free_cnt = last_idx - vq->vq_desc_tail_idx;
307         if (free_cnt <= 0)
308                 free_cnt += vq->vq_nentries;
309
310         vq_ring_free_inorder(vq, last_idx, free_cnt);
311 }
312
313 static inline int
314 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
315                         struct rte_mbuf **cookies,
316                         uint16_t num)
317 {
318         struct vq_desc_extra *dxp;
319         struct virtio_hw *hw = vq->hw;
320         struct vring_desc *start_dp;
321         uint16_t head_idx, idx, i = 0;
322
323         if (unlikely(vq->vq_free_cnt == 0))
324                 return -ENOSPC;
325         if (unlikely(vq->vq_free_cnt < num))
326                 return -EMSGSIZE;
327
328         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
329         start_dp = vq->vq_ring.desc;
330
331         while (i < num) {
332                 idx = head_idx & (vq->vq_nentries - 1);
333                 dxp = &vq->vq_descx[idx];
334                 dxp->cookie = (void *)cookies[i];
335                 dxp->ndescs = 1;
336
337                 start_dp[idx].addr =
338                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
339                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
340                 start_dp[idx].len =
341                                 cookies[i]->buf_len -
342                                 RTE_PKTMBUF_HEADROOM +
343                                 hw->vtnet_hdr_size;
344                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
345
346                 vq_update_avail_ring(vq, idx);
347                 head_idx++;
348                 i++;
349         }
350
351         vq->vq_desc_head_idx += num;
352         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
353         return 0;
354 }
355
356 static inline int
357 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
358                                 uint16_t num)
359 {
360         struct vq_desc_extra *dxp;
361         struct virtio_hw *hw = vq->hw;
362         struct vring_desc *start_dp = vq->vq_ring.desc;
363         uint16_t idx, i;
364
365         if (unlikely(vq->vq_free_cnt == 0))
366                 return -ENOSPC;
367         if (unlikely(vq->vq_free_cnt < num))
368                 return -EMSGSIZE;
369
370         if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
371                 return -EFAULT;
372
373         for (i = 0; i < num; i++) {
374                 idx = vq->vq_desc_head_idx;
375                 dxp = &vq->vq_descx[idx];
376                 dxp->cookie = (void *)cookie[i];
377                 dxp->ndescs = 1;
378
379                 start_dp[idx].addr =
380                         VIRTIO_MBUF_ADDR(cookie[i], vq) +
381                         RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
382                 start_dp[idx].len =
383                         cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
384                         hw->vtnet_hdr_size;
385                 start_dp[idx].flags = VRING_DESC_F_WRITE;
386                 vq->vq_desc_head_idx = start_dp[idx].next;
387                 vq_update_avail_ring(vq, idx);
388                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
389                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
390                         break;
391                 }
392         }
393
394         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
395
396         return 0;
397 }
398
399 static inline int
400 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
401                                      struct rte_mbuf **cookie, uint16_t num)
402 {
403         struct vring_packed_desc *start_dp = vq->ring_packed.desc_packed;
404         uint16_t flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
405         struct virtio_hw *hw = vq->hw;
406         struct vq_desc_extra *dxp;
407         uint16_t idx;
408         int i;
409
410         if (unlikely(vq->vq_free_cnt == 0))
411                 return -ENOSPC;
412         if (unlikely(vq->vq_free_cnt < num))
413                 return -EMSGSIZE;
414
415         for (i = 0; i < num; i++) {
416                 idx = vq->vq_avail_idx;
417                 dxp = &vq->vq_descx[idx];
418                 dxp->cookie = (void *)cookie[i];
419                 dxp->ndescs = 1;
420
421                 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
422                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
423                 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
424                                         + hw->vtnet_hdr_size;
425
426                 vq->vq_desc_head_idx = dxp->next;
427                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
428                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
429                 virtio_wmb(hw->weak_barriers);
430                 start_dp[idx].flags = flags;
431                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
432                         vq->vq_avail_idx -= vq->vq_nentries;
433                         vq->avail_wrap_counter ^= 1;
434                         vq->avail_used_flags =
435                                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
436                                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
437                         flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
438                 }
439         }
440         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
441         return 0;
442 }
443
444 /* When doing TSO, the IP length is not included in the pseudo header
445  * checksum of the packet given to the PMD, but for virtio it is
446  * expected.
447  */
448 static void
449 virtio_tso_fix_cksum(struct rte_mbuf *m)
450 {
451         /* common case: header is not fragmented */
452         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
453                         m->l4_len)) {
454                 struct ipv4_hdr *iph;
455                 struct ipv6_hdr *ip6h;
456                 struct tcp_hdr *th;
457                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
458                 uint32_t tmp;
459
460                 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
461                 th = RTE_PTR_ADD(iph, m->l3_len);
462                 if ((iph->version_ihl >> 4) == 4) {
463                         iph->hdr_checksum = 0;
464                         iph->hdr_checksum = rte_ipv4_cksum(iph);
465                         ip_len = iph->total_length;
466                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
467                                 m->l3_len);
468                 } else {
469                         ip6h = (struct ipv6_hdr *)iph;
470                         ip_paylen = ip6h->payload_len;
471                 }
472
473                 /* calculate the new phdr checksum not including ip_paylen */
474                 prev_cksum = th->cksum;
475                 tmp = prev_cksum;
476                 tmp += ip_paylen;
477                 tmp = (tmp & 0xffff) + (tmp >> 16);
478                 new_cksum = tmp;
479
480                 /* replace it in the packet */
481                 th->cksum = new_cksum;
482         }
483 }
484
485
486 /* avoid write operation when necessary, to lessen cache issues */
487 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
488         if ((var) != (val))                     \
489                 (var) = (val);                  \
490 } while (0)
491
492 static inline void
493 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
494                         struct rte_mbuf *cookie,
495                         bool offload)
496 {
497         if (offload) {
498                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
499                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
500
501                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
502                 case PKT_TX_UDP_CKSUM:
503                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
504                         hdr->csum_offset = offsetof(struct udp_hdr,
505                                 dgram_cksum);
506                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
507                         break;
508
509                 case PKT_TX_TCP_CKSUM:
510                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
511                         hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
512                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
513                         break;
514
515                 default:
516                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
517                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
518                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
519                         break;
520                 }
521
522                 /* TCP Segmentation Offload */
523                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
524                         virtio_tso_fix_cksum(cookie);
525                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
526                                 VIRTIO_NET_HDR_GSO_TCPV6 :
527                                 VIRTIO_NET_HDR_GSO_TCPV4;
528                         hdr->gso_size = cookie->tso_segsz;
529                         hdr->hdr_len =
530                                 cookie->l2_len +
531                                 cookie->l3_len +
532                                 cookie->l4_len;
533                 } else {
534                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
535                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
536                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
537                 }
538         }
539 }
540
541 static inline void
542 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
543                         struct rte_mbuf **cookies,
544                         uint16_t num)
545 {
546         struct vq_desc_extra *dxp;
547         struct virtqueue *vq = txvq->vq;
548         struct vring_desc *start_dp;
549         struct virtio_net_hdr *hdr;
550         uint16_t idx;
551         uint16_t head_size = vq->hw->vtnet_hdr_size;
552         uint16_t i = 0;
553
554         idx = vq->vq_desc_head_idx;
555         start_dp = vq->vq_ring.desc;
556
557         while (i < num) {
558                 idx = idx & (vq->vq_nentries - 1);
559                 dxp = &vq->vq_descx[idx];
560                 dxp->cookie = (void *)cookies[i];
561                 dxp->ndescs = 1;
562
563                 hdr = (struct virtio_net_hdr *)
564                         rte_pktmbuf_prepend(cookies[i], head_size);
565                 cookies[i]->pkt_len -= head_size;
566
567                 /* if offload disabled, it is not zeroed below, do it now */
568                 if (!vq->hw->has_tx_offload) {
569                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
570                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
571                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
572                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
573                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
574                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
575                 }
576
577                 virtqueue_xmit_offload(hdr, cookies[i],
578                                 vq->hw->has_tx_offload);
579
580                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
581                 start_dp[idx].len   = cookies[i]->data_len;
582                 start_dp[idx].flags = 0;
583
584                 vq_update_avail_ring(vq, idx);
585
586                 idx++;
587                 i++;
588         };
589
590         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
591         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
592 }
593
594 static inline void
595 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
596                               uint16_t needed, int can_push)
597 {
598         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
599         struct vq_desc_extra *dxp;
600         struct virtqueue *vq = txvq->vq;
601         struct vring_packed_desc *start_dp, *head_dp;
602         uint16_t idx, id, head_idx, head_flags;
603         uint16_t head_size = vq->hw->vtnet_hdr_size;
604         struct virtio_net_hdr *hdr;
605         uint16_t prev;
606
607         id = vq->vq_desc_head_idx;
608
609         dxp = &vq->vq_descx[id];
610         dxp->ndescs = needed;
611         dxp->cookie = cookie;
612
613         head_idx = vq->vq_avail_idx;
614         idx = head_idx;
615         prev = head_idx;
616         start_dp = vq->ring_packed.desc_packed;
617
618         head_dp = &vq->ring_packed.desc_packed[idx];
619         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
620         head_flags |= vq->avail_used_flags;
621
622         if (can_push) {
623                 /* prepend cannot fail, checked by caller */
624                 hdr = (struct virtio_net_hdr *)
625                         rte_pktmbuf_prepend(cookie, head_size);
626                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
627                  * which is wrong. Below subtract restores correct pkt size.
628                  */
629                 cookie->pkt_len -= head_size;
630
631                 /* if offload disabled, it is not zeroed below, do it now */
632                 if (!vq->hw->has_tx_offload) {
633                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
634                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
635                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
636                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
637                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
638                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
639                 }
640         } else {
641                 /* setup first tx ring slot to point to header
642                  * stored in reserved region.
643                  */
644                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
645                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
646                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
647                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
648                 idx++;
649                 if (idx >= vq->vq_nentries) {
650                         idx -= vq->vq_nentries;
651                         vq->avail_wrap_counter ^= 1;
652                         vq->avail_used_flags =
653                                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
654                                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
655                 }
656         }
657
658         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
659
660         do {
661                 uint16_t flags;
662
663                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
664                 start_dp[idx].len  = cookie->data_len;
665                 if (likely(idx != head_idx)) {
666                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
667                         flags |= vq->avail_used_flags;
668                         start_dp[idx].flags = flags;
669                 }
670                 prev = idx;
671                 idx++;
672                 if (idx >= vq->vq_nentries) {
673                         idx -= vq->vq_nentries;
674                         vq->avail_wrap_counter ^= 1;
675                         vq->avail_used_flags =
676                                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
677                                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
678                 }
679         } while ((cookie = cookie->next) != NULL);
680
681         start_dp[prev].id = id;
682
683         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
684
685         vq->vq_desc_head_idx = dxp->next;
686         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
687                 vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
688
689         vq->vq_avail_idx = idx;
690
691         virtio_wmb(vq->hw->weak_barriers);
692         head_dp->flags = head_flags;
693 }
694
695 static inline void
696 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
697                         uint16_t needed, int use_indirect, int can_push,
698                         int in_order)
699 {
700         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
701         struct vq_desc_extra *dxp;
702         struct virtqueue *vq = txvq->vq;
703         struct vring_desc *start_dp;
704         uint16_t seg_num = cookie->nb_segs;
705         uint16_t head_idx, idx;
706         uint16_t head_size = vq->hw->vtnet_hdr_size;
707         struct virtio_net_hdr *hdr;
708
709         head_idx = vq->vq_desc_head_idx;
710         idx = head_idx;
711         dxp = &vq->vq_descx[idx];
712         dxp->cookie = (void *)cookie;
713         dxp->ndescs = needed;
714
715         start_dp = vq->vq_ring.desc;
716
717         if (can_push) {
718                 /* prepend cannot fail, checked by caller */
719                 hdr = (struct virtio_net_hdr *)
720                         rte_pktmbuf_prepend(cookie, head_size);
721                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
722                  * which is wrong. Below subtract restores correct pkt size.
723                  */
724                 cookie->pkt_len -= head_size;
725
726                 /* if offload disabled, it is not zeroed below, do it now */
727                 if (!vq->hw->has_tx_offload) {
728                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
729                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
730                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
731                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
732                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
733                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
734                 }
735         } else if (use_indirect) {
736                 /* setup tx ring slot to point to indirect
737                  * descriptor list stored in reserved region.
738                  *
739                  * the first slot in indirect ring is already preset
740                  * to point to the header in reserved region
741                  */
742                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
743                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
744                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
745                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
746                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
747
748                 /* loop below will fill in rest of the indirect elements */
749                 start_dp = txr[idx].tx_indir;
750                 idx = 1;
751         } else {
752                 /* setup first tx ring slot to point to header
753                  * stored in reserved region.
754                  */
755                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
756                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
757                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
758                 start_dp[idx].flags = VRING_DESC_F_NEXT;
759                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
760
761                 idx = start_dp[idx].next;
762         }
763
764         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
765
766         do {
767                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
768                 start_dp[idx].len   = cookie->data_len;
769                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
770                 idx = start_dp[idx].next;
771         } while ((cookie = cookie->next) != NULL);
772
773         if (use_indirect)
774                 idx = vq->vq_ring.desc[head_idx].next;
775
776         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
777
778         vq->vq_desc_head_idx = idx;
779         vq_update_avail_ring(vq, head_idx);
780
781         if (!in_order) {
782                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
783                         vq->vq_desc_tail_idx = idx;
784         }
785 }
786
787 void
788 virtio_dev_cq_start(struct rte_eth_dev *dev)
789 {
790         struct virtio_hw *hw = dev->data->dev_private;
791
792         if (hw->cvq && hw->cvq->vq) {
793                 rte_spinlock_init(&hw->cvq->lock);
794                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
795         }
796 }
797
798 int
799 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
800                         uint16_t queue_idx,
801                         uint16_t nb_desc,
802                         unsigned int socket_id __rte_unused,
803                         const struct rte_eth_rxconf *rx_conf __rte_unused,
804                         struct rte_mempool *mp)
805 {
806         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
807         struct virtio_hw *hw = dev->data->dev_private;
808         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
809         struct virtnet_rx *rxvq;
810
811         PMD_INIT_FUNC_TRACE();
812
813         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
814                 nb_desc = vq->vq_nentries;
815         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
816
817         rxvq = &vq->rxq;
818         rxvq->queue_id = queue_idx;
819         rxvq->mpool = mp;
820         if (rxvq->mpool == NULL) {
821                 rte_exit(EXIT_FAILURE,
822                         "Cannot allocate mbufs for rx virtqueue");
823         }
824
825         dev->data->rx_queues[queue_idx] = rxvq;
826
827         return 0;
828 }
829
830 int
831 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
832 {
833         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
834         struct virtio_hw *hw = dev->data->dev_private;
835         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
836         struct virtnet_rx *rxvq = &vq->rxq;
837         struct rte_mbuf *m;
838         uint16_t desc_idx;
839         int error, nbufs, i;
840
841         PMD_INIT_FUNC_TRACE();
842
843         /* Allocate blank mbufs for the each rx descriptor */
844         nbufs = 0;
845
846         if (hw->use_simple_rx) {
847                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
848                      desc_idx++) {
849                         vq->vq_ring.avail->ring[desc_idx] = desc_idx;
850                         vq->vq_ring.desc[desc_idx].flags =
851                                 VRING_DESC_F_WRITE;
852                 }
853
854                 virtio_rxq_vec_setup(rxvq);
855         }
856
857         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
858         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
859              desc_idx++) {
860                 vq->sw_ring[vq->vq_nentries + desc_idx] =
861                         &rxvq->fake_mbuf;
862         }
863
864         if (hw->use_simple_rx) {
865                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
866                         virtio_rxq_rearm_vec(rxvq);
867                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
868                 }
869         } else if (hw->use_inorder_rx) {
870                 if ((!virtqueue_full(vq))) {
871                         uint16_t free_cnt = vq->vq_free_cnt;
872                         struct rte_mbuf *pkts[free_cnt];
873
874                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
875                                 free_cnt)) {
876                                 error = virtqueue_enqueue_refill_inorder(vq,
877                                                 pkts,
878                                                 free_cnt);
879                                 if (unlikely(error)) {
880                                         for (i = 0; i < free_cnt; i++)
881                                                 rte_pktmbuf_free(pkts[i]);
882                                 }
883                         }
884
885                         nbufs += free_cnt;
886                         vq_update_avail_idx(vq);
887                 }
888         } else {
889                 while (!virtqueue_full(vq)) {
890                         m = rte_mbuf_raw_alloc(rxvq->mpool);
891                         if (m == NULL)
892                                 break;
893
894                         /* Enqueue allocated buffers */
895                         if (vtpci_packed_queue(vq->hw))
896                                 error = virtqueue_enqueue_recv_refill_packed(vq,
897                                                 &m, 1);
898                         else
899                                 error = virtqueue_enqueue_recv_refill(vq,
900                                                 &m, 1);
901                         if (error) {
902                                 rte_pktmbuf_free(m);
903                                 break;
904                         }
905                         nbufs++;
906                 }
907
908                 if (!vtpci_packed_queue(vq->hw))
909                         vq_update_avail_idx(vq);
910         }
911
912         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
913
914         VIRTQUEUE_DUMP(vq);
915
916         return 0;
917 }
918
919 /*
920  * struct rte_eth_dev *dev: Used to update dev
921  * uint16_t nb_desc: Defaults to values read from config space
922  * unsigned int socket_id: Used to allocate memzone
923  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
924  * uint16_t queue_idx: Just used as an index in dev txq list
925  */
926 int
927 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
928                         uint16_t queue_idx,
929                         uint16_t nb_desc,
930                         unsigned int socket_id __rte_unused,
931                         const struct rte_eth_txconf *tx_conf)
932 {
933         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
934         struct virtio_hw *hw = dev->data->dev_private;
935         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
936         struct virtnet_tx *txvq;
937         uint16_t tx_free_thresh;
938
939         PMD_INIT_FUNC_TRACE();
940
941         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
942                 nb_desc = vq->vq_nentries;
943         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
944
945         txvq = &vq->txq;
946         txvq->queue_id = queue_idx;
947
948         tx_free_thresh = tx_conf->tx_free_thresh;
949         if (tx_free_thresh == 0)
950                 tx_free_thresh =
951                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
952
953         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
954                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
955                         "number of TX entries minus 3 (%u)."
956                         " (tx_free_thresh=%u port=%u queue=%u)\n",
957                         vq->vq_nentries - 3,
958                         tx_free_thresh, dev->data->port_id, queue_idx);
959                 return -EINVAL;
960         }
961
962         vq->vq_free_thresh = tx_free_thresh;
963
964         dev->data->tx_queues[queue_idx] = txvq;
965         return 0;
966 }
967
968 int
969 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
970                                 uint16_t queue_idx)
971 {
972         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
973         struct virtio_hw *hw = dev->data->dev_private;
974         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
975
976         PMD_INIT_FUNC_TRACE();
977
978         if (!vtpci_packed_queue(hw)) {
979                 if (hw->use_inorder_tx)
980                         vq->vq_ring.desc[vq->vq_nentries - 1].next = 0;
981         }
982
983         VIRTQUEUE_DUMP(vq);
984
985         return 0;
986 }
987
988 static inline void
989 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
990 {
991         int error;
992         /*
993          * Requeue the discarded mbuf. This should always be
994          * successful since it was just dequeued.
995          */
996         if (vtpci_packed_queue(vq->hw))
997                 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
998         else
999                 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1000
1001         if (unlikely(error)) {
1002                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1003                 rte_pktmbuf_free(m);
1004         }
1005 }
1006
1007 static inline void
1008 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1009 {
1010         int error;
1011
1012         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1013         if (unlikely(error)) {
1014                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1015                 rte_pktmbuf_free(m);
1016         }
1017 }
1018
1019 static inline void
1020 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
1021 {
1022         uint32_t s = mbuf->pkt_len;
1023         struct ether_addr *ea;
1024
1025         stats->bytes += s;
1026
1027         if (s == 64) {
1028                 stats->size_bins[1]++;
1029         } else if (s > 64 && s < 1024) {
1030                 uint32_t bin;
1031
1032                 /* count zeros, and offset into correct bin */
1033                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
1034                 stats->size_bins[bin]++;
1035         } else {
1036                 if (s < 64)
1037                         stats->size_bins[0]++;
1038                 else if (s < 1519)
1039                         stats->size_bins[6]++;
1040                 else if (s >= 1519)
1041                         stats->size_bins[7]++;
1042         }
1043
1044         ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
1045         if (is_multicast_ether_addr(ea)) {
1046                 if (is_broadcast_ether_addr(ea))
1047                         stats->broadcast++;
1048                 else
1049                         stats->multicast++;
1050         }
1051 }
1052
1053 static inline void
1054 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
1055 {
1056         VIRTIO_DUMP_PACKET(m, m->data_len);
1057
1058         virtio_update_packet_stats(&rxvq->stats, m);
1059 }
1060
1061 /* Optionally fill offload information in structure */
1062 static inline int
1063 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1064 {
1065         struct rte_net_hdr_lens hdr_lens;
1066         uint32_t hdrlen, ptype;
1067         int l4_supported = 0;
1068
1069         /* nothing to do */
1070         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1071                 return 0;
1072
1073         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1074
1075         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1076         m->packet_type = ptype;
1077         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1078             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1079             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1080                 l4_supported = 1;
1081
1082         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1083                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1084                 if (hdr->csum_start <= hdrlen && l4_supported) {
1085                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1086                 } else {
1087                         /* Unknown proto or tunnel, do sw cksum. We can assume
1088                          * the cksum field is in the first segment since the
1089                          * buffers we provided to the host are large enough.
1090                          * In case of SCTP, this will be wrong since it's a CRC
1091                          * but there's nothing we can do.
1092                          */
1093                         uint16_t csum = 0, off;
1094
1095                         rte_raw_cksum_mbuf(m, hdr->csum_start,
1096                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1097                                 &csum);
1098                         if (likely(csum != 0xffff))
1099                                 csum = ~csum;
1100                         off = hdr->csum_offset + hdr->csum_start;
1101                         if (rte_pktmbuf_data_len(m) >= off + 1)
1102                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1103                                         off) = csum;
1104                 }
1105         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1106                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1107         }
1108
1109         /* GSO request, save required information in mbuf */
1110         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1111                 /* Check unsupported modes */
1112                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1113                     (hdr->gso_size == 0)) {
1114                         return -EINVAL;
1115                 }
1116
1117                 /* Update mss lengthes in mbuf */
1118                 m->tso_segsz = hdr->gso_size;
1119                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1120                         case VIRTIO_NET_HDR_GSO_TCPV4:
1121                         case VIRTIO_NET_HDR_GSO_TCPV6:
1122                                 m->ol_flags |= PKT_RX_LRO | \
1123                                         PKT_RX_L4_CKSUM_NONE;
1124                                 break;
1125                         default:
1126                                 return -EINVAL;
1127                 }
1128         }
1129
1130         return 0;
1131 }
1132
1133 #define VIRTIO_MBUF_BURST_SZ 64
1134 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1135 uint16_t
1136 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1137 {
1138         struct virtnet_rx *rxvq = rx_queue;
1139         struct virtqueue *vq = rxvq->vq;
1140         struct virtio_hw *hw = vq->hw;
1141         struct rte_mbuf *rxm, *new_mbuf;
1142         uint16_t nb_used, num, nb_rx;
1143         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1144         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1145         int error;
1146         uint32_t i, nb_enqueued;
1147         uint32_t hdr_size;
1148         struct virtio_net_hdr *hdr;
1149
1150         nb_rx = 0;
1151         if (unlikely(hw->started == 0))
1152                 return nb_rx;
1153
1154         nb_used = VIRTQUEUE_NUSED(vq);
1155
1156         virtio_rmb(hw->weak_barriers);
1157
1158         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1159         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1160                 num = VIRTIO_MBUF_BURST_SZ;
1161         if (likely(num > DESC_PER_CACHELINE))
1162                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1163
1164         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1165         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1166
1167         nb_enqueued = 0;
1168         hdr_size = hw->vtnet_hdr_size;
1169
1170         for (i = 0; i < num ; i++) {
1171                 rxm = rcv_pkts[i];
1172
1173                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1174
1175                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1176                         PMD_RX_LOG(ERR, "Packet drop");
1177                         nb_enqueued++;
1178                         virtio_discard_rxbuf(vq, rxm);
1179                         rxvq->stats.errors++;
1180                         continue;
1181                 }
1182
1183                 rxm->port = rxvq->port_id;
1184                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1185                 rxm->ol_flags = 0;
1186                 rxm->vlan_tci = 0;
1187
1188                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1189                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1190
1191                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1192                         RTE_PKTMBUF_HEADROOM - hdr_size);
1193
1194                 if (hw->vlan_strip)
1195                         rte_vlan_strip(rxm);
1196
1197                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1198                         virtio_discard_rxbuf(vq, rxm);
1199                         rxvq->stats.errors++;
1200                         continue;
1201                 }
1202
1203                 virtio_rx_stats_updated(rxvq, rxm);
1204
1205                 rx_pkts[nb_rx++] = rxm;
1206         }
1207
1208         rxvq->stats.packets += nb_rx;
1209
1210         /* Allocate new mbuf for the used descriptor */
1211         while (likely(!virtqueue_full(vq))) {
1212                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1213                 if (unlikely(new_mbuf == NULL)) {
1214                         struct rte_eth_dev *dev
1215                                 = &rte_eth_devices[rxvq->port_id];
1216                         dev->data->rx_mbuf_alloc_failed++;
1217                         break;
1218                 }
1219                 error = virtqueue_enqueue_recv_refill(vq, &new_mbuf, 1);
1220                 if (unlikely(error)) {
1221                         rte_pktmbuf_free(new_mbuf);
1222                         break;
1223                 }
1224                 nb_enqueued++;
1225         }
1226
1227         if (likely(nb_enqueued)) {
1228                 vq_update_avail_idx(vq);
1229
1230                 if (unlikely(virtqueue_kick_prepare(vq))) {
1231                         virtqueue_notify(vq);
1232                         PMD_RX_LOG(DEBUG, "Notified");
1233                 }
1234         }
1235
1236         return nb_rx;
1237 }
1238
1239 uint16_t
1240 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1241                         uint16_t nb_pkts)
1242 {
1243         struct virtnet_rx *rxvq = rx_queue;
1244         struct virtqueue *vq = rxvq->vq;
1245         struct virtio_hw *hw = vq->hw;
1246         struct rte_mbuf *rxm, *new_mbuf;
1247         uint16_t num, nb_rx;
1248         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1249         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1250         int error;
1251         uint32_t i, nb_enqueued;
1252         uint32_t hdr_size;
1253         struct virtio_net_hdr *hdr;
1254
1255         nb_rx = 0;
1256         if (unlikely(hw->started == 0))
1257                 return nb_rx;
1258
1259         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1260         if (likely(num > DESC_PER_CACHELINE))
1261                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1262
1263         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1264         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1265
1266         nb_enqueued = 0;
1267         hdr_size = hw->vtnet_hdr_size;
1268
1269         for (i = 0; i < num; i++) {
1270                 rxm = rcv_pkts[i];
1271
1272                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1273
1274                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1275                         PMD_RX_LOG(ERR, "Packet drop");
1276                         nb_enqueued++;
1277                         virtio_discard_rxbuf(vq, rxm);
1278                         rxvq->stats.errors++;
1279                         continue;
1280                 }
1281
1282                 rxm->port = rxvq->port_id;
1283                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1284                 rxm->ol_flags = 0;
1285                 rxm->vlan_tci = 0;
1286
1287                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1288                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1289
1290                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1291                         RTE_PKTMBUF_HEADROOM - hdr_size);
1292
1293                 if (hw->vlan_strip)
1294                         rte_vlan_strip(rxm);
1295
1296                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1297                         virtio_discard_rxbuf(vq, rxm);
1298                         rxvq->stats.errors++;
1299                         continue;
1300                 }
1301
1302                 virtio_rx_stats_updated(rxvq, rxm);
1303
1304                 rx_pkts[nb_rx++] = rxm;
1305         }
1306
1307         rxvq->stats.packets += nb_rx;
1308
1309         /* Allocate new mbuf for the used descriptor */
1310         while (likely(!virtqueue_full(vq))) {
1311                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1312                 if (unlikely(new_mbuf == NULL)) {
1313                         struct rte_eth_dev *dev =
1314                                 &rte_eth_devices[rxvq->port_id];
1315                         dev->data->rx_mbuf_alloc_failed++;
1316                         break;
1317                 }
1318                 error = virtqueue_enqueue_recv_refill_packed(vq, &new_mbuf, 1);
1319                 if (unlikely(error)) {
1320                         rte_pktmbuf_free(new_mbuf);
1321                         break;
1322                 }
1323                 nb_enqueued++;
1324         }
1325
1326         if (likely(nb_enqueued)) {
1327                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1328                         virtqueue_notify(vq);
1329                         PMD_RX_LOG(DEBUG, "Notified");
1330                 }
1331         }
1332
1333         return nb_rx;
1334 }
1335
1336
1337 uint16_t
1338 virtio_recv_pkts_inorder(void *rx_queue,
1339                         struct rte_mbuf **rx_pkts,
1340                         uint16_t nb_pkts)
1341 {
1342         struct virtnet_rx *rxvq = rx_queue;
1343         struct virtqueue *vq = rxvq->vq;
1344         struct virtio_hw *hw = vq->hw;
1345         struct rte_mbuf *rxm;
1346         struct rte_mbuf *prev;
1347         uint16_t nb_used, num, nb_rx;
1348         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1349         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1350         int error;
1351         uint32_t nb_enqueued;
1352         uint32_t seg_num;
1353         uint32_t seg_res;
1354         uint32_t hdr_size;
1355         int32_t i;
1356
1357         nb_rx = 0;
1358         if (unlikely(hw->started == 0))
1359                 return nb_rx;
1360
1361         nb_used = VIRTQUEUE_NUSED(vq);
1362         nb_used = RTE_MIN(nb_used, nb_pkts);
1363         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1364
1365         virtio_rmb(hw->weak_barriers);
1366
1367         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1368
1369         nb_enqueued = 0;
1370         seg_num = 1;
1371         seg_res = 0;
1372         hdr_size = hw->vtnet_hdr_size;
1373
1374         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1375
1376         for (i = 0; i < num; i++) {
1377                 struct virtio_net_hdr_mrg_rxbuf *header;
1378
1379                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1380                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1381
1382                 rxm = rcv_pkts[i];
1383
1384                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1385                         PMD_RX_LOG(ERR, "Packet drop");
1386                         nb_enqueued++;
1387                         virtio_discard_rxbuf_inorder(vq, rxm);
1388                         rxvq->stats.errors++;
1389                         continue;
1390                 }
1391
1392                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1393                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1394                          - hdr_size);
1395
1396                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1397                         seg_num = header->num_buffers;
1398                         if (seg_num == 0)
1399                                 seg_num = 1;
1400                 } else {
1401                         seg_num = 1;
1402                 }
1403
1404                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1405                 rxm->nb_segs = seg_num;
1406                 rxm->ol_flags = 0;
1407                 rxm->vlan_tci = 0;
1408                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1409                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1410
1411                 rxm->port = rxvq->port_id;
1412
1413                 rx_pkts[nb_rx] = rxm;
1414                 prev = rxm;
1415
1416                 if (vq->hw->has_rx_offload &&
1417                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1418                         virtio_discard_rxbuf_inorder(vq, rxm);
1419                         rxvq->stats.errors++;
1420                         continue;
1421                 }
1422
1423                 if (hw->vlan_strip)
1424                         rte_vlan_strip(rx_pkts[nb_rx]);
1425
1426                 seg_res = seg_num - 1;
1427
1428                 /* Merge remaining segments */
1429                 while (seg_res != 0 && i < (num - 1)) {
1430                         i++;
1431
1432                         rxm = rcv_pkts[i];
1433                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1434                         rxm->pkt_len = (uint32_t)(len[i]);
1435                         rxm->data_len = (uint16_t)(len[i]);
1436
1437                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1438                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1439
1440                         if (prev)
1441                                 prev->next = rxm;
1442
1443                         prev = rxm;
1444                         seg_res -= 1;
1445                 }
1446
1447                 if (!seg_res) {
1448                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1449                         nb_rx++;
1450                 }
1451         }
1452
1453         /* Last packet still need merge segments */
1454         while (seg_res != 0) {
1455                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1456                                         VIRTIO_MBUF_BURST_SZ);
1457
1458                 prev = rcv_pkts[nb_rx];
1459                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1460                         virtio_rmb(hw->weak_barriers);
1461                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1462                                                            rcv_cnt);
1463                         uint16_t extra_idx = 0;
1464
1465                         rcv_cnt = num;
1466                         while (extra_idx < rcv_cnt) {
1467                                 rxm = rcv_pkts[extra_idx];
1468                                 rxm->data_off =
1469                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1470                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1471                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1472                                 prev->next = rxm;
1473                                 prev = rxm;
1474                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1475                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1476                                 extra_idx += 1;
1477                         };
1478                         seg_res -= rcv_cnt;
1479
1480                         if (!seg_res) {
1481                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1482                                 nb_rx++;
1483                         }
1484                 } else {
1485                         PMD_RX_LOG(ERR,
1486                                         "No enough segments for packet.");
1487                         virtio_discard_rxbuf_inorder(vq, prev);
1488                         rxvq->stats.errors++;
1489                         break;
1490                 }
1491         }
1492
1493         rxvq->stats.packets += nb_rx;
1494
1495         /* Allocate new mbuf for the used descriptor */
1496
1497         if (likely(!virtqueue_full(vq))) {
1498                 /* free_cnt may include mrg descs */
1499                 uint16_t free_cnt = vq->vq_free_cnt;
1500                 struct rte_mbuf *new_pkts[free_cnt];
1501
1502                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1503                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1504                                         free_cnt);
1505                         if (unlikely(error)) {
1506                                 for (i = 0; i < free_cnt; i++)
1507                                         rte_pktmbuf_free(new_pkts[i]);
1508                         }
1509                         nb_enqueued += free_cnt;
1510                 } else {
1511                         struct rte_eth_dev *dev =
1512                                 &rte_eth_devices[rxvq->port_id];
1513                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1514                 }
1515         }
1516
1517         if (likely(nb_enqueued)) {
1518                 vq_update_avail_idx(vq);
1519
1520                 if (unlikely(virtqueue_kick_prepare(vq))) {
1521                         virtqueue_notify(vq);
1522                         PMD_RX_LOG(DEBUG, "Notified");
1523                 }
1524         }
1525
1526         return nb_rx;
1527 }
1528
1529 uint16_t
1530 virtio_recv_mergeable_pkts(void *rx_queue,
1531                         struct rte_mbuf **rx_pkts,
1532                         uint16_t nb_pkts)
1533 {
1534         struct virtnet_rx *rxvq = rx_queue;
1535         struct virtqueue *vq = rxvq->vq;
1536         struct virtio_hw *hw = vq->hw;
1537         struct rte_mbuf *rxm;
1538         struct rte_mbuf *prev;
1539         uint16_t nb_used, num, nb_rx = 0;
1540         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1541         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1542         int error;
1543         uint32_t nb_enqueued = 0;
1544         uint32_t seg_num = 0;
1545         uint32_t seg_res = 0;
1546         uint32_t hdr_size = hw->vtnet_hdr_size;
1547         int32_t i;
1548
1549         if (unlikely(hw->started == 0))
1550                 return nb_rx;
1551
1552         nb_used = VIRTQUEUE_NUSED(vq);
1553
1554         virtio_rmb(hw->weak_barriers);
1555
1556         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1557
1558         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1559         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1560                 num = VIRTIO_MBUF_BURST_SZ;
1561         if (likely(num > DESC_PER_CACHELINE))
1562                 num = num - ((vq->vq_used_cons_idx + num) %
1563                                 DESC_PER_CACHELINE);
1564
1565
1566         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1567
1568         for (i = 0; i < num; i++) {
1569                 struct virtio_net_hdr_mrg_rxbuf *header;
1570
1571                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1572                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1573
1574                 rxm = rcv_pkts[i];
1575
1576                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1577                         PMD_RX_LOG(ERR, "Packet drop");
1578                         nb_enqueued++;
1579                         virtio_discard_rxbuf(vq, rxm);
1580                         rxvq->stats.errors++;
1581                         continue;
1582                 }
1583
1584                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1585                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1586                          - hdr_size);
1587                 seg_num = header->num_buffers;
1588                 if (seg_num == 0)
1589                         seg_num = 1;
1590
1591                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1592                 rxm->nb_segs = seg_num;
1593                 rxm->ol_flags = 0;
1594                 rxm->vlan_tci = 0;
1595                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1596                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1597
1598                 rxm->port = rxvq->port_id;
1599
1600                 rx_pkts[nb_rx] = rxm;
1601                 prev = rxm;
1602
1603                 if (hw->has_rx_offload &&
1604                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1605                         virtio_discard_rxbuf(vq, rxm);
1606                         rxvq->stats.errors++;
1607                         continue;
1608                 }
1609
1610                 if (hw->vlan_strip)
1611                         rte_vlan_strip(rx_pkts[nb_rx]);
1612
1613                 seg_res = seg_num - 1;
1614
1615                 /* Merge remaining segments */
1616                 while (seg_res != 0 && i < (num - 1)) {
1617                         i++;
1618
1619                         rxm = rcv_pkts[i];
1620                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1621                         rxm->pkt_len = (uint32_t)(len[i]);
1622                         rxm->data_len = (uint16_t)(len[i]);
1623
1624                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1625                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1626
1627                         if (prev)
1628                                 prev->next = rxm;
1629
1630                         prev = rxm;
1631                         seg_res -= 1;
1632                 }
1633
1634                 if (!seg_res) {
1635                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1636                         nb_rx++;
1637                 }
1638         }
1639
1640         /* Last packet still need merge segments */
1641         while (seg_res != 0) {
1642                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1643                                         VIRTIO_MBUF_BURST_SZ);
1644
1645                 prev = rcv_pkts[nb_rx];
1646                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1647                         virtio_rmb(hw->weak_barriers);
1648                         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1649                                                            rcv_cnt);
1650                         uint16_t extra_idx = 0;
1651
1652                         rcv_cnt = num;
1653                         while (extra_idx < rcv_cnt) {
1654                                 rxm = rcv_pkts[extra_idx];
1655                                 rxm->data_off =
1656                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1657                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1658                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1659                                 prev->next = rxm;
1660                                 prev = rxm;
1661                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1662                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1663                                 extra_idx += 1;
1664                         };
1665                         seg_res -= rcv_cnt;
1666
1667                         if (!seg_res) {
1668                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1669                                 nb_rx++;
1670                         }
1671                 } else {
1672                         PMD_RX_LOG(ERR,
1673                                         "No enough segments for packet.");
1674                         virtio_discard_rxbuf(vq, prev);
1675                         rxvq->stats.errors++;
1676                         break;
1677                 }
1678         }
1679
1680         rxvq->stats.packets += nb_rx;
1681
1682         /* Allocate new mbuf for the used descriptor */
1683         if (likely(!virtqueue_full(vq))) {
1684                 /* free_cnt may include mrg descs */
1685                 uint16_t free_cnt = vq->vq_free_cnt;
1686                 struct rte_mbuf *new_pkts[free_cnt];
1687
1688                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1689                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1690                                         free_cnt);
1691                         if (unlikely(error)) {
1692                                 for (i = 0; i < free_cnt; i++)
1693                                         rte_pktmbuf_free(new_pkts[i]);
1694                         }
1695                         nb_enqueued += free_cnt;
1696                 } else {
1697                         struct rte_eth_dev *dev =
1698                                 &rte_eth_devices[rxvq->port_id];
1699                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1700                 }
1701         }
1702
1703         if (likely(nb_enqueued)) {
1704                 vq_update_avail_idx(vq);
1705
1706                 if (unlikely(virtqueue_kick_prepare(vq))) {
1707                         virtqueue_notify(vq);
1708                         PMD_RX_LOG(DEBUG, "Notified");
1709                 }
1710         }
1711
1712         return nb_rx;
1713 }
1714
1715 uint16_t
1716 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1717                         struct rte_mbuf **rx_pkts,
1718                         uint16_t nb_pkts)
1719 {
1720         struct virtnet_rx *rxvq = rx_queue;
1721         struct virtqueue *vq = rxvq->vq;
1722         struct virtio_hw *hw = vq->hw;
1723         struct rte_mbuf *rxm;
1724         struct rte_mbuf *prev = NULL;
1725         uint16_t num, nb_rx = 0;
1726         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1727         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1728         uint32_t nb_enqueued = 0;
1729         uint32_t seg_num = 0;
1730         uint32_t seg_res = 0;
1731         uint32_t hdr_size = hw->vtnet_hdr_size;
1732         int32_t i;
1733         int error;
1734
1735         if (unlikely(hw->started == 0))
1736                 return nb_rx;
1737
1738
1739         num = nb_pkts;
1740         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1741                 num = VIRTIO_MBUF_BURST_SZ;
1742         if (likely(num > DESC_PER_CACHELINE))
1743                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1744
1745         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1746
1747         for (i = 0; i < num; i++) {
1748                 struct virtio_net_hdr_mrg_rxbuf *header;
1749
1750                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1751                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1752
1753                 rxm = rcv_pkts[i];
1754
1755                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1756                         PMD_RX_LOG(ERR, "Packet drop");
1757                         nb_enqueued++;
1758                         virtio_discard_rxbuf(vq, rxm);
1759                         rxvq->stats.errors++;
1760                         continue;
1761                 }
1762
1763                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1764                           rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1765                 seg_num = header->num_buffers;
1766
1767                 if (seg_num == 0)
1768                         seg_num = 1;
1769
1770                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1771                 rxm->nb_segs = seg_num;
1772                 rxm->ol_flags = 0;
1773                 rxm->vlan_tci = 0;
1774                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1775                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1776
1777                 rxm->port = rxvq->port_id;
1778                 rx_pkts[nb_rx] = rxm;
1779                 prev = rxm;
1780
1781                 if (hw->has_rx_offload &&
1782                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1783                         virtio_discard_rxbuf(vq, rxm);
1784                         rxvq->stats.errors++;
1785                         continue;
1786                 }
1787
1788                 if (hw->vlan_strip)
1789                         rte_vlan_strip(rx_pkts[nb_rx]);
1790
1791                 seg_res = seg_num - 1;
1792
1793                 /* Merge remaining segments */
1794                 while (seg_res != 0 && i < (num - 1)) {
1795                         i++;
1796
1797                         rxm = rcv_pkts[i];
1798                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1799                         rxm->pkt_len = (uint32_t)(len[i]);
1800                         rxm->data_len = (uint16_t)(len[i]);
1801
1802                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1803                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1804
1805                         if (prev)
1806                                 prev->next = rxm;
1807
1808                         prev = rxm;
1809                         seg_res -= 1;
1810                 }
1811
1812                 if (!seg_res) {
1813                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1814                         nb_rx++;
1815                 }
1816         }
1817
1818         /* Last packet still need merge segments */
1819         while (seg_res != 0) {
1820                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1821                                         VIRTIO_MBUF_BURST_SZ);
1822                 if (likely(vq->vq_free_cnt >= rcv_cnt)) {
1823                         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1824                                         len, rcv_cnt);
1825                         uint16_t extra_idx = 0;
1826
1827                         rcv_cnt = num;
1828
1829                         while (extra_idx < rcv_cnt) {
1830                                 rxm = rcv_pkts[extra_idx];
1831
1832                                 rxm->data_off =
1833                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1834                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1835                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1836
1837                                 prev->next = rxm;
1838                                 prev = rxm;
1839                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1840                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1841                                 extra_idx += 1;
1842                         }
1843                         seg_res -= rcv_cnt;
1844                         if (!seg_res) {
1845                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1846                                 nb_rx++;
1847                         }
1848                 } else {
1849                         PMD_RX_LOG(ERR,
1850                                         "No enough segments for packet.");
1851                         if (prev)
1852                                 virtio_discard_rxbuf(vq, prev);
1853                         rxvq->stats.errors++;
1854                         break;
1855                 }
1856         }
1857
1858         rxvq->stats.packets += nb_rx;
1859
1860         /* Allocate new mbuf for the used descriptor */
1861         if (likely(!virtqueue_full(vq))) {
1862                 /* free_cnt may include mrg descs */
1863                 uint16_t free_cnt = vq->vq_free_cnt;
1864                 struct rte_mbuf *new_pkts[free_cnt];
1865
1866                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1867                         error = virtqueue_enqueue_recv_refill_packed(vq,
1868                                         new_pkts, free_cnt);
1869                         if (unlikely(error)) {
1870                                 for (i = 0; i < free_cnt; i++)
1871                                         rte_pktmbuf_free(new_pkts[i]);
1872                         }
1873                         nb_enqueued += free_cnt;
1874                 } else {
1875                         struct rte_eth_dev *dev =
1876                                 &rte_eth_devices[rxvq->port_id];
1877                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1878                 }
1879         }
1880
1881         if (likely(nb_enqueued)) {
1882                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1883                         virtqueue_notify(vq);
1884                         PMD_RX_LOG(DEBUG, "Notified");
1885                 }
1886         }
1887
1888         return nb_rx;
1889 }
1890
1891 uint16_t
1892 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1893                         uint16_t nb_pkts)
1894 {
1895         struct virtnet_tx *txvq = tx_queue;
1896         struct virtqueue *vq = txvq->vq;
1897         struct virtio_hw *hw = vq->hw;
1898         uint16_t hdr_size = hw->vtnet_hdr_size;
1899         uint16_t nb_tx = 0;
1900         int error;
1901
1902         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1903                 return nb_tx;
1904
1905         if (unlikely(nb_pkts < 1))
1906                 return nb_pkts;
1907
1908         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1909
1910         if (nb_pkts > vq->vq_free_cnt)
1911                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt);
1912
1913         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1914                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1915                 int can_push = 0, slots, need;
1916
1917                 /* Do VLAN tag insertion */
1918                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1919                         error = rte_vlan_insert(&txm);
1920                         if (unlikely(error)) {
1921                                 rte_pktmbuf_free(txm);
1922                                 continue;
1923                         }
1924                 }
1925
1926                 /* optimize ring usage */
1927                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1928                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1929                     rte_mbuf_refcnt_read(txm) == 1 &&
1930                     RTE_MBUF_DIRECT(txm) &&
1931                     txm->nb_segs == 1 &&
1932                     rte_pktmbuf_headroom(txm) >= hdr_size &&
1933                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1934                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1935                         can_push = 1;
1936
1937                 /* How many main ring entries are needed to this Tx?
1938                  * any_layout => number of segments
1939                  * default    => number of segments + 1
1940                  */
1941                 slots = txm->nb_segs + !can_push;
1942                 need = slots - vq->vq_free_cnt;
1943
1944                 /* Positive value indicates it need free vring descriptors */
1945                 if (unlikely(need > 0)) {
1946                         virtio_xmit_cleanup_packed(vq, need);
1947                         need = slots - vq->vq_free_cnt;
1948                         if (unlikely(need > 0)) {
1949                                 PMD_TX_LOG(ERR,
1950                                            "No free tx descriptors to transmit");
1951                                 break;
1952                         }
1953                 }
1954
1955                 /* Enqueue Packet buffers */
1956                 virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push);
1957
1958                 virtio_update_packet_stats(&txvq->stats, txm);
1959         }
1960
1961         txvq->stats.packets += nb_tx;
1962
1963         if (likely(nb_tx)) {
1964                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1965                         virtqueue_notify(vq);
1966                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1967                 }
1968         }
1969
1970         return nb_tx;
1971 }
1972
1973 uint16_t
1974 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1975 {
1976         struct virtnet_tx *txvq = tx_queue;
1977         struct virtqueue *vq = txvq->vq;
1978         struct virtio_hw *hw = vq->hw;
1979         uint16_t hdr_size = hw->vtnet_hdr_size;
1980         uint16_t nb_used, nb_tx = 0;
1981         int error;
1982
1983         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1984                 return nb_tx;
1985
1986         if (unlikely(nb_pkts < 1))
1987                 return nb_pkts;
1988
1989         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1990         nb_used = VIRTQUEUE_NUSED(vq);
1991
1992         virtio_rmb(hw->weak_barriers);
1993         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1994                 virtio_xmit_cleanup(vq, nb_used);
1995
1996         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1997                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1998                 int can_push = 0, use_indirect = 0, slots, need;
1999
2000                 /* Do VLAN tag insertion */
2001                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2002                         error = rte_vlan_insert(&txm);
2003                         if (unlikely(error)) {
2004                                 rte_pktmbuf_free(txm);
2005                                 continue;
2006                         }
2007                 }
2008
2009                 /* optimize ring usage */
2010                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2011                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2012                     rte_mbuf_refcnt_read(txm) == 1 &&
2013                     RTE_MBUF_DIRECT(txm) &&
2014                     txm->nb_segs == 1 &&
2015                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2016                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2017                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2018                         can_push = 1;
2019                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2020                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2021                         use_indirect = 1;
2022
2023                 /* How many main ring entries are needed to this Tx?
2024                  * any_layout => number of segments
2025                  * indirect   => 1
2026                  * default    => number of segments + 1
2027                  */
2028                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2029                 need = slots - vq->vq_free_cnt;
2030
2031                 /* Positive value indicates it need free vring descriptors */
2032                 if (unlikely(need > 0)) {
2033                         nb_used = VIRTQUEUE_NUSED(vq);
2034                         virtio_rmb(hw->weak_barriers);
2035                         need = RTE_MIN(need, (int)nb_used);
2036
2037                         virtio_xmit_cleanup(vq, need);
2038                         need = slots - vq->vq_free_cnt;
2039                         if (unlikely(need > 0)) {
2040                                 PMD_TX_LOG(ERR,
2041                                            "No free tx descriptors to transmit");
2042                                 break;
2043                         }
2044                 }
2045
2046                 /* Enqueue Packet buffers */
2047                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2048                         can_push, 0);
2049
2050                 virtio_update_packet_stats(&txvq->stats, txm);
2051         }
2052
2053         txvq->stats.packets += nb_tx;
2054
2055         if (likely(nb_tx)) {
2056                 vq_update_avail_idx(vq);
2057
2058                 if (unlikely(virtqueue_kick_prepare(vq))) {
2059                         virtqueue_notify(vq);
2060                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2061                 }
2062         }
2063
2064         return nb_tx;
2065 }
2066
2067 uint16_t
2068 virtio_xmit_pkts_inorder(void *tx_queue,
2069                         struct rte_mbuf **tx_pkts,
2070                         uint16_t nb_pkts)
2071 {
2072         struct virtnet_tx *txvq = tx_queue;
2073         struct virtqueue *vq = txvq->vq;
2074         struct virtio_hw *hw = vq->hw;
2075         uint16_t hdr_size = hw->vtnet_hdr_size;
2076         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
2077         struct rte_mbuf *inorder_pkts[nb_pkts];
2078         int error;
2079
2080         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2081                 return nb_tx;
2082
2083         if (unlikely(nb_pkts < 1))
2084                 return nb_pkts;
2085
2086         VIRTQUEUE_DUMP(vq);
2087         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2088         nb_used = VIRTQUEUE_NUSED(vq);
2089
2090         virtio_rmb(hw->weak_barriers);
2091         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2092                 virtio_xmit_cleanup_inorder(vq, nb_used);
2093
2094         if (unlikely(!vq->vq_free_cnt))
2095                 virtio_xmit_cleanup_inorder(vq, nb_used);
2096
2097         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
2098
2099         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
2100                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2101                 int slots, need;
2102
2103                 /* Do VLAN tag insertion */
2104                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2105                         error = rte_vlan_insert(&txm);
2106                         if (unlikely(error)) {
2107                                 rte_pktmbuf_free(txm);
2108                                 continue;
2109                         }
2110                 }
2111
2112                 /* optimize ring usage */
2113                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2114                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2115                      rte_mbuf_refcnt_read(txm) == 1 &&
2116                      RTE_MBUF_DIRECT(txm) &&
2117                      txm->nb_segs == 1 &&
2118                      rte_pktmbuf_headroom(txm) >= hdr_size &&
2119                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2120                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2121                         inorder_pkts[nb_inorder_pkts] = txm;
2122                         nb_inorder_pkts++;
2123
2124                         virtio_update_packet_stats(&txvq->stats, txm);
2125                         continue;
2126                 }
2127
2128                 if (nb_inorder_pkts) {
2129                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2130                                                         nb_inorder_pkts);
2131                         nb_inorder_pkts = 0;
2132                 }
2133
2134                 slots = txm->nb_segs + 1;
2135                 need = slots - vq->vq_free_cnt;
2136                 if (unlikely(need > 0)) {
2137                         nb_used = VIRTQUEUE_NUSED(vq);
2138                         virtio_rmb(hw->weak_barriers);
2139                         need = RTE_MIN(need, (int)nb_used);
2140
2141                         virtio_xmit_cleanup_inorder(vq, need);
2142
2143                         need = slots - vq->vq_free_cnt;
2144
2145                         if (unlikely(need > 0)) {
2146                                 PMD_TX_LOG(ERR,
2147                                         "No free tx descriptors to transmit");
2148                                 break;
2149                         }
2150                 }
2151                 /* Enqueue Packet buffers */
2152                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2153
2154                 virtio_update_packet_stats(&txvq->stats, txm);
2155         }
2156
2157         /* Transmit all inorder packets */
2158         if (nb_inorder_pkts)
2159                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2160                                                 nb_inorder_pkts);
2161
2162         txvq->stats.packets += nb_tx;
2163
2164         if (likely(nb_tx)) {
2165                 vq_update_avail_idx(vq);
2166
2167                 if (unlikely(virtqueue_kick_prepare(vq))) {
2168                         virtqueue_notify(vq);
2169                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2170                 }
2171         }
2172
2173         VIRTQUEUE_DUMP(vq);
2174
2175         return nb_tx;
2176 }