4333c10a995f12abd6d7ec538162291601650450
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45         struct virtnet_rx *rxvq = rxq;
46         struct virtqueue *vq = rxvq->vq;
47
48         return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54         vq->vq_free_cnt += num;
55         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61         struct vring_desc *dp, *dp_tail;
62         struct vq_desc_extra *dxp;
63         uint16_t desc_idx_last = desc_idx;
64
65         dp  = &vq->vq_split.ring.desc[desc_idx];
66         dxp = &vq->vq_descx[desc_idx];
67         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69                 while (dp->flags & VRING_DESC_F_NEXT) {
70                         desc_idx_last = dp->next;
71                         dp = &vq->vq_split.ring.desc[dp->next];
72                 }
73         }
74         dxp->ndescs = 0;
75
76         /*
77          * We must append the existing free chain, if any, to the end of
78          * newly freed chain. If the virtqueue was completely used, then
79          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80          */
81         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82                 vq->vq_desc_head_idx = desc_idx;
83         } else {
84                 dp_tail = &vq->vq_split.ring.desc[vq->vq_desc_tail_idx];
85                 dp_tail->next = desc_idx;
86         }
87
88         vq->vq_desc_tail_idx = desc_idx_last;
89         dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95         struct vq_desc_extra *dxp;
96
97         dxp = &vq->vq_descx[id];
98         vq->vq_free_cnt += dxp->ndescs;
99
100         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101                 vq->vq_desc_head_idx = id;
102         else
103                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104
105         vq->vq_desc_tail_idx = id;
106         dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108
109 static uint16_t
110 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
111                                   struct rte_mbuf **rx_pkts,
112                                   uint32_t *len,
113                                   uint16_t num)
114 {
115         struct rte_mbuf *cookie;
116         uint16_t used_idx;
117         uint16_t id;
118         struct vring_packed_desc *desc;
119         uint16_t i;
120
121         desc = vq->vq_packed.ring.desc;
122
123         for (i = 0; i < num; i++) {
124                 used_idx = vq->vq_used_cons_idx;
125                 if (!desc_is_used(&desc[used_idx], vq))
126                         return i;
127                 virtio_rmb(vq->hw->weak_barriers);
128                 len[i] = desc[used_idx].len;
129                 id = desc[used_idx].id;
130                 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
131                 if (unlikely(cookie == NULL)) {
132                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
133                                 vq->vq_used_cons_idx);
134                         break;
135                 }
136                 rte_prefetch0(cookie);
137                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
138                 rx_pkts[i] = cookie;
139
140                 vq->vq_free_cnt++;
141                 vq->vq_used_cons_idx++;
142                 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
143                         vq->vq_used_cons_idx -= vq->vq_nentries;
144                         vq->vq_packed.used_wrap_counter ^= 1;
145                 }
146         }
147
148         return i;
149 }
150
151 static uint16_t
152 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
153                            uint32_t *len, uint16_t num)
154 {
155         struct vring_used_elem *uep;
156         struct rte_mbuf *cookie;
157         uint16_t used_idx, desc_idx;
158         uint16_t i;
159
160         /*  Caller does the check */
161         for (i = 0; i < num ; i++) {
162                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
163                 uep = &vq->vq_split.ring.used->ring[used_idx];
164                 desc_idx = (uint16_t) uep->id;
165                 len[i] = uep->len;
166                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
167
168                 if (unlikely(cookie == NULL)) {
169                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
170                                 vq->vq_used_cons_idx);
171                         break;
172                 }
173
174                 rte_prefetch0(cookie);
175                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
176                 rx_pkts[i]  = cookie;
177                 vq->vq_used_cons_idx++;
178                 vq_ring_free_chain(vq, desc_idx);
179                 vq->vq_descx[desc_idx].cookie = NULL;
180         }
181
182         return i;
183 }
184
185 static uint16_t
186 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
187                         struct rte_mbuf **rx_pkts,
188                         uint32_t *len,
189                         uint16_t num)
190 {
191         struct vring_used_elem *uep;
192         struct rte_mbuf *cookie;
193         uint16_t used_idx = 0;
194         uint16_t i;
195
196         if (unlikely(num == 0))
197                 return 0;
198
199         for (i = 0; i < num; i++) {
200                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
201                 /* Desc idx same as used idx */
202                 uep = &vq->vq_split.ring.used->ring[used_idx];
203                 len[i] = uep->len;
204                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
205
206                 if (unlikely(cookie == NULL)) {
207                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
208                                 vq->vq_used_cons_idx);
209                         break;
210                 }
211
212                 rte_prefetch0(cookie);
213                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
214                 rx_pkts[i]  = cookie;
215                 vq->vq_used_cons_idx++;
216                 vq->vq_descx[used_idx].cookie = NULL;
217         }
218
219         vq_ring_free_inorder(vq, used_idx, i);
220         return i;
221 }
222
223 #ifndef DEFAULT_TX_FREE_THRESH
224 #define DEFAULT_TX_FREE_THRESH 32
225 #endif
226
227 static void
228 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
229 {
230         uint16_t used_idx, id, curr_id, free_cnt = 0;
231         uint16_t size = vq->vq_nentries;
232         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
233         struct vq_desc_extra *dxp;
234
235         used_idx = vq->vq_used_cons_idx;
236         while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
237                 virtio_rmb(vq->hw->weak_barriers);
238                 id = desc[used_idx].id;
239                 do {
240                         curr_id = used_idx;
241                         dxp = &vq->vq_descx[used_idx];
242                         used_idx += dxp->ndescs;
243                         free_cnt += dxp->ndescs;
244                         num -= dxp->ndescs;
245                         if (used_idx >= size) {
246                                 used_idx -= size;
247                                 vq->vq_packed.used_wrap_counter ^= 1;
248                         }
249                         if (dxp->cookie != NULL) {
250                                 rte_pktmbuf_free(dxp->cookie);
251                                 dxp->cookie = NULL;
252                         }
253                 } while (curr_id != id);
254         }
255         vq->vq_used_cons_idx = used_idx;
256         vq->vq_free_cnt += free_cnt;
257 }
258
259 static void
260 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
261 {
262         uint16_t used_idx, id;
263         uint16_t size = vq->vq_nentries;
264         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
265         struct vq_desc_extra *dxp;
266
267         used_idx = vq->vq_used_cons_idx;
268         while (num-- && desc_is_used(&desc[used_idx], vq)) {
269                 virtio_rmb(vq->hw->weak_barriers);
270                 id = desc[used_idx].id;
271                 dxp = &vq->vq_descx[id];
272                 vq->vq_used_cons_idx += dxp->ndescs;
273                 if (vq->vq_used_cons_idx >= size) {
274                         vq->vq_used_cons_idx -= size;
275                         vq->vq_packed.used_wrap_counter ^= 1;
276                 }
277                 vq_ring_free_id_packed(vq, id);
278                 if (dxp->cookie != NULL) {
279                         rte_pktmbuf_free(dxp->cookie);
280                         dxp->cookie = NULL;
281                 }
282                 used_idx = vq->vq_used_cons_idx;
283         }
284 }
285
286 /* Cleanup from completed transmits. */
287 static inline void
288 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
289 {
290         if (in_order)
291                 virtio_xmit_cleanup_inorder_packed(vq, num);
292         else
293                 virtio_xmit_cleanup_normal_packed(vq, num);
294 }
295
296 static void
297 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
298 {
299         uint16_t i, used_idx, desc_idx;
300         for (i = 0; i < num; i++) {
301                 struct vring_used_elem *uep;
302                 struct vq_desc_extra *dxp;
303
304                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
305                 uep = &vq->vq_split.ring.used->ring[used_idx];
306
307                 desc_idx = (uint16_t) uep->id;
308                 dxp = &vq->vq_descx[desc_idx];
309                 vq->vq_used_cons_idx++;
310                 vq_ring_free_chain(vq, desc_idx);
311
312                 if (dxp->cookie != NULL) {
313                         rte_pktmbuf_free(dxp->cookie);
314                         dxp->cookie = NULL;
315                 }
316         }
317 }
318
319 /* Cleanup from completed inorder transmits. */
320 static void
321 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
322 {
323         uint16_t i, idx = vq->vq_used_cons_idx;
324         int16_t free_cnt = 0;
325         struct vq_desc_extra *dxp = NULL;
326
327         if (unlikely(num == 0))
328                 return;
329
330         for (i = 0; i < num; i++) {
331                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
332                 free_cnt += dxp->ndescs;
333                 if (dxp->cookie != NULL) {
334                         rte_pktmbuf_free(dxp->cookie);
335                         dxp->cookie = NULL;
336                 }
337         }
338
339         vq->vq_free_cnt += free_cnt;
340         vq->vq_used_cons_idx = idx;
341 }
342
343 static inline int
344 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
345                         struct rte_mbuf **cookies,
346                         uint16_t num)
347 {
348         struct vq_desc_extra *dxp;
349         struct virtio_hw *hw = vq->hw;
350         struct vring_desc *start_dp;
351         uint16_t head_idx, idx, i = 0;
352
353         if (unlikely(vq->vq_free_cnt == 0))
354                 return -ENOSPC;
355         if (unlikely(vq->vq_free_cnt < num))
356                 return -EMSGSIZE;
357
358         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
359         start_dp = vq->vq_split.ring.desc;
360
361         while (i < num) {
362                 idx = head_idx & (vq->vq_nentries - 1);
363                 dxp = &vq->vq_descx[idx];
364                 dxp->cookie = (void *)cookies[i];
365                 dxp->ndescs = 1;
366
367                 start_dp[idx].addr =
368                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
369                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
370                 start_dp[idx].len =
371                                 cookies[i]->buf_len -
372                                 RTE_PKTMBUF_HEADROOM +
373                                 hw->vtnet_hdr_size;
374                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
375
376                 vq_update_avail_ring(vq, idx);
377                 head_idx++;
378                 i++;
379         }
380
381         vq->vq_desc_head_idx += num;
382         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
383         return 0;
384 }
385
386 static inline int
387 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
388                                 uint16_t num)
389 {
390         struct vq_desc_extra *dxp;
391         struct virtio_hw *hw = vq->hw;
392         struct vring_desc *start_dp = vq->vq_split.ring.desc;
393         uint16_t idx, i;
394
395         if (unlikely(vq->vq_free_cnt == 0))
396                 return -ENOSPC;
397         if (unlikely(vq->vq_free_cnt < num))
398                 return -EMSGSIZE;
399
400         if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
401                 return -EFAULT;
402
403         for (i = 0; i < num; i++) {
404                 idx = vq->vq_desc_head_idx;
405                 dxp = &vq->vq_descx[idx];
406                 dxp->cookie = (void *)cookie[i];
407                 dxp->ndescs = 1;
408
409                 start_dp[idx].addr =
410                         VIRTIO_MBUF_ADDR(cookie[i], vq) +
411                         RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
412                 start_dp[idx].len =
413                         cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
414                         hw->vtnet_hdr_size;
415                 start_dp[idx].flags = VRING_DESC_F_WRITE;
416                 vq->vq_desc_head_idx = start_dp[idx].next;
417                 vq_update_avail_ring(vq, idx);
418                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
419                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
420                         break;
421                 }
422         }
423
424         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
425
426         return 0;
427 }
428
429 static inline int
430 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
431                                      struct rte_mbuf **cookie, uint16_t num)
432 {
433         struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
434         uint16_t flags = vq->vq_packed.cached_flags;
435         struct virtio_hw *hw = vq->hw;
436         struct vq_desc_extra *dxp;
437         uint16_t idx;
438         int i;
439
440         if (unlikely(vq->vq_free_cnt == 0))
441                 return -ENOSPC;
442         if (unlikely(vq->vq_free_cnt < num))
443                 return -EMSGSIZE;
444
445         for (i = 0; i < num; i++) {
446                 idx = vq->vq_avail_idx;
447                 dxp = &vq->vq_descx[idx];
448                 dxp->cookie = (void *)cookie[i];
449                 dxp->ndescs = 1;
450
451                 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
452                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
453                 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
454                                         + hw->vtnet_hdr_size;
455
456                 vq->vq_desc_head_idx = dxp->next;
457                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
458                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
459                 virtio_wmb(hw->weak_barriers);
460                 start_dp[idx].flags = flags;
461                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
462                         vq->vq_avail_idx -= vq->vq_nentries;
463                         vq->vq_packed.cached_flags ^=
464                                 VRING_PACKED_DESC_F_AVAIL_USED;
465                         flags = vq->vq_packed.cached_flags;
466                 }
467         }
468         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
469         return 0;
470 }
471
472 /* When doing TSO, the IP length is not included in the pseudo header
473  * checksum of the packet given to the PMD, but for virtio it is
474  * expected.
475  */
476 static void
477 virtio_tso_fix_cksum(struct rte_mbuf *m)
478 {
479         /* common case: header is not fragmented */
480         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
481                         m->l4_len)) {
482                 struct rte_ipv4_hdr *iph;
483                 struct rte_ipv6_hdr *ip6h;
484                 struct rte_tcp_hdr *th;
485                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
486                 uint32_t tmp;
487
488                 iph = rte_pktmbuf_mtod_offset(m,
489                                         struct rte_ipv4_hdr *, m->l2_len);
490                 th = RTE_PTR_ADD(iph, m->l3_len);
491                 if ((iph->version_ihl >> 4) == 4) {
492                         iph->hdr_checksum = 0;
493                         iph->hdr_checksum = rte_ipv4_cksum(iph);
494                         ip_len = iph->total_length;
495                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
496                                 m->l3_len);
497                 } else {
498                         ip6h = (struct rte_ipv6_hdr *)iph;
499                         ip_paylen = ip6h->payload_len;
500                 }
501
502                 /* calculate the new phdr checksum not including ip_paylen */
503                 prev_cksum = th->cksum;
504                 tmp = prev_cksum;
505                 tmp += ip_paylen;
506                 tmp = (tmp & 0xffff) + (tmp >> 16);
507                 new_cksum = tmp;
508
509                 /* replace it in the packet */
510                 th->cksum = new_cksum;
511         }
512 }
513
514
515 /* avoid write operation when necessary, to lessen cache issues */
516 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
517         if ((var) != (val))                     \
518                 (var) = (val);                  \
519 } while (0)
520
521 #define virtqueue_clear_net_hdr(_hdr) do {              \
522         ASSIGN_UNLESS_EQUAL((_hdr)->csum_start, 0);     \
523         ASSIGN_UNLESS_EQUAL((_hdr)->csum_offset, 0);    \
524         ASSIGN_UNLESS_EQUAL((_hdr)->flags, 0);          \
525         ASSIGN_UNLESS_EQUAL((_hdr)->gso_type, 0);       \
526         ASSIGN_UNLESS_EQUAL((_hdr)->gso_size, 0);       \
527         ASSIGN_UNLESS_EQUAL((_hdr)->hdr_len, 0);        \
528 } while (0)
529
530 static inline void
531 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
532                         struct rte_mbuf *cookie,
533                         bool offload)
534 {
535         if (offload) {
536                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
537                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
538
539                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
540                 case PKT_TX_UDP_CKSUM:
541                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
542                         hdr->csum_offset = offsetof(struct rte_udp_hdr,
543                                 dgram_cksum);
544                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
545                         break;
546
547                 case PKT_TX_TCP_CKSUM:
548                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
549                         hdr->csum_offset = offsetof(struct rte_tcp_hdr, cksum);
550                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
551                         break;
552
553                 default:
554                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
555                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
556                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
557                         break;
558                 }
559
560                 /* TCP Segmentation Offload */
561                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
562                         virtio_tso_fix_cksum(cookie);
563                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
564                                 VIRTIO_NET_HDR_GSO_TCPV6 :
565                                 VIRTIO_NET_HDR_GSO_TCPV4;
566                         hdr->gso_size = cookie->tso_segsz;
567                         hdr->hdr_len =
568                                 cookie->l2_len +
569                                 cookie->l3_len +
570                                 cookie->l4_len;
571                 } else {
572                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
573                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
574                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
575                 }
576         }
577 }
578
579 static inline void
580 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
581                         struct rte_mbuf **cookies,
582                         uint16_t num)
583 {
584         struct vq_desc_extra *dxp;
585         struct virtqueue *vq = txvq->vq;
586         struct vring_desc *start_dp;
587         struct virtio_net_hdr *hdr;
588         uint16_t idx;
589         uint16_t head_size = vq->hw->vtnet_hdr_size;
590         uint16_t i = 0;
591
592         idx = vq->vq_desc_head_idx;
593         start_dp = vq->vq_split.ring.desc;
594
595         while (i < num) {
596                 idx = idx & (vq->vq_nentries - 1);
597                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
598                 dxp->cookie = (void *)cookies[i];
599                 dxp->ndescs = 1;
600
601                 hdr = (struct virtio_net_hdr *)
602                         rte_pktmbuf_prepend(cookies[i], head_size);
603                 cookies[i]->pkt_len -= head_size;
604
605                 /* if offload disabled, hdr is not zeroed yet, do it now */
606                 if (!vq->hw->has_tx_offload)
607                         virtqueue_clear_net_hdr(hdr);
608                 else
609                         virtqueue_xmit_offload(hdr, cookies[i], true);
610
611                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
612                 start_dp[idx].len   = cookies[i]->data_len;
613                 start_dp[idx].flags = 0;
614
615                 vq_update_avail_ring(vq, idx);
616
617                 idx++;
618                 i++;
619         };
620
621         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
622         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
623 }
624
625 static inline void
626 virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
627                                    struct rte_mbuf *cookie,
628                                    int in_order)
629 {
630         struct virtqueue *vq = txvq->vq;
631         struct vring_packed_desc *dp;
632         struct vq_desc_extra *dxp;
633         uint16_t idx, id, flags;
634         uint16_t head_size = vq->hw->vtnet_hdr_size;
635         struct virtio_net_hdr *hdr;
636
637         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
638         idx = vq->vq_avail_idx;
639         dp = &vq->vq_packed.ring.desc[idx];
640
641         dxp = &vq->vq_descx[id];
642         dxp->ndescs = 1;
643         dxp->cookie = cookie;
644
645         flags = vq->vq_packed.cached_flags;
646
647         /* prepend cannot fail, checked by caller */
648         hdr = (struct virtio_net_hdr *)
649                 rte_pktmbuf_prepend(cookie, head_size);
650         cookie->pkt_len -= head_size;
651
652         /* if offload disabled, hdr is not zeroed yet, do it now */
653         if (!vq->hw->has_tx_offload)
654                 virtqueue_clear_net_hdr(hdr);
655         else
656                 virtqueue_xmit_offload(hdr, cookie, true);
657
658         dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
659         dp->len  = cookie->data_len;
660         dp->id   = id;
661
662         if (++vq->vq_avail_idx >= vq->vq_nentries) {
663                 vq->vq_avail_idx -= vq->vq_nentries;
664                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
665         }
666
667         vq->vq_free_cnt--;
668
669         if (!in_order) {
670                 vq->vq_desc_head_idx = dxp->next;
671                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
672                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
673         }
674
675         virtio_wmb(vq->hw->weak_barriers);
676         dp->flags = flags;
677 }
678
679 static inline void
680 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
681                               uint16_t needed, int can_push, int in_order)
682 {
683         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
684         struct vq_desc_extra *dxp;
685         struct virtqueue *vq = txvq->vq;
686         struct vring_packed_desc *start_dp, *head_dp;
687         uint16_t idx, id, head_idx, head_flags;
688         uint16_t head_size = vq->hw->vtnet_hdr_size;
689         struct virtio_net_hdr *hdr;
690         uint16_t prev;
691
692         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
693
694         dxp = &vq->vq_descx[id];
695         dxp->ndescs = needed;
696         dxp->cookie = cookie;
697
698         head_idx = vq->vq_avail_idx;
699         idx = head_idx;
700         prev = head_idx;
701         start_dp = vq->vq_packed.ring.desc;
702
703         head_dp = &vq->vq_packed.ring.desc[idx];
704         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
705         head_flags |= vq->vq_packed.cached_flags;
706
707         if (can_push) {
708                 /* prepend cannot fail, checked by caller */
709                 hdr = (struct virtio_net_hdr *)
710                         rte_pktmbuf_prepend(cookie, head_size);
711                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
712                  * which is wrong. Below subtract restores correct pkt size.
713                  */
714                 cookie->pkt_len -= head_size;
715
716                 /* if offload disabled, it is not zeroed below, do it now */
717                 if (!vq->hw->has_tx_offload)
718                         virtqueue_clear_net_hdr(hdr);
719         } else {
720                 /* setup first tx ring slot to point to header
721                  * stored in reserved region.
722                  */
723                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
724                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
725                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
726                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
727                 idx++;
728                 if (idx >= vq->vq_nentries) {
729                         idx -= vq->vq_nentries;
730                         vq->vq_packed.cached_flags ^=
731                                 VRING_PACKED_DESC_F_AVAIL_USED;
732                 }
733         }
734
735         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
736
737         do {
738                 uint16_t flags;
739
740                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
741                 start_dp[idx].len  = cookie->data_len;
742                 if (likely(idx != head_idx)) {
743                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
744                         flags |= vq->vq_packed.cached_flags;
745                         start_dp[idx].flags = flags;
746                 }
747                 prev = idx;
748                 idx++;
749                 if (idx >= vq->vq_nentries) {
750                         idx -= vq->vq_nentries;
751                         vq->vq_packed.cached_flags ^=
752                                 VRING_PACKED_DESC_F_AVAIL_USED;
753                 }
754         } while ((cookie = cookie->next) != NULL);
755
756         start_dp[prev].id = id;
757
758         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
759         vq->vq_avail_idx = idx;
760
761         if (!in_order) {
762                 vq->vq_desc_head_idx = dxp->next;
763                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
764                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
765         }
766
767         virtio_wmb(vq->hw->weak_barriers);
768         head_dp->flags = head_flags;
769 }
770
771 static inline void
772 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
773                         uint16_t needed, int use_indirect, int can_push,
774                         int in_order)
775 {
776         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
777         struct vq_desc_extra *dxp;
778         struct virtqueue *vq = txvq->vq;
779         struct vring_desc *start_dp;
780         uint16_t seg_num = cookie->nb_segs;
781         uint16_t head_idx, idx;
782         uint16_t head_size = vq->hw->vtnet_hdr_size;
783         struct virtio_net_hdr *hdr;
784
785         head_idx = vq->vq_desc_head_idx;
786         idx = head_idx;
787         if (in_order)
788                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
789         else
790                 dxp = &vq->vq_descx[idx];
791         dxp->cookie = (void *)cookie;
792         dxp->ndescs = needed;
793
794         start_dp = vq->vq_split.ring.desc;
795
796         if (can_push) {
797                 /* prepend cannot fail, checked by caller */
798                 hdr = (struct virtio_net_hdr *)
799                         rte_pktmbuf_prepend(cookie, head_size);
800                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
801                  * which is wrong. Below subtract restores correct pkt size.
802                  */
803                 cookie->pkt_len -= head_size;
804
805                 /* if offload disabled, it is not zeroed below, do it now */
806                 if (!vq->hw->has_tx_offload)
807                         virtqueue_clear_net_hdr(hdr);
808         } else if (use_indirect) {
809                 /* setup tx ring slot to point to indirect
810                  * descriptor list stored in reserved region.
811                  *
812                  * the first slot in indirect ring is already preset
813                  * to point to the header in reserved region
814                  */
815                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
816                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
817                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
818                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
819                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
820
821                 /* loop below will fill in rest of the indirect elements */
822                 start_dp = txr[idx].tx_indir;
823                 idx = 1;
824         } else {
825                 /* setup first tx ring slot to point to header
826                  * stored in reserved region.
827                  */
828                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
829                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
830                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
831                 start_dp[idx].flags = VRING_DESC_F_NEXT;
832                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
833
834                 idx = start_dp[idx].next;
835         }
836
837         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
838
839         do {
840                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
841                 start_dp[idx].len   = cookie->data_len;
842                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
843                 idx = start_dp[idx].next;
844         } while ((cookie = cookie->next) != NULL);
845
846         if (use_indirect)
847                 idx = vq->vq_split.ring.desc[head_idx].next;
848
849         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
850
851         vq->vq_desc_head_idx = idx;
852         vq_update_avail_ring(vq, head_idx);
853
854         if (!in_order) {
855                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
856                         vq->vq_desc_tail_idx = idx;
857         }
858 }
859
860 void
861 virtio_dev_cq_start(struct rte_eth_dev *dev)
862 {
863         struct virtio_hw *hw = dev->data->dev_private;
864
865         if (hw->cvq && hw->cvq->vq) {
866                 rte_spinlock_init(&hw->cvq->lock);
867                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
868         }
869 }
870
871 int
872 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
873                         uint16_t queue_idx,
874                         uint16_t nb_desc,
875                         unsigned int socket_id __rte_unused,
876                         const struct rte_eth_rxconf *rx_conf __rte_unused,
877                         struct rte_mempool *mp)
878 {
879         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
880         struct virtio_hw *hw = dev->data->dev_private;
881         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
882         struct virtnet_rx *rxvq;
883
884         PMD_INIT_FUNC_TRACE();
885
886         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
887                 nb_desc = vq->vq_nentries;
888         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
889
890         rxvq = &vq->rxq;
891         rxvq->queue_id = queue_idx;
892         rxvq->mpool = mp;
893         dev->data->rx_queues[queue_idx] = rxvq;
894
895         return 0;
896 }
897
898 int
899 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
900 {
901         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
902         struct virtio_hw *hw = dev->data->dev_private;
903         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
904         struct virtnet_rx *rxvq = &vq->rxq;
905         struct rte_mbuf *m;
906         uint16_t desc_idx;
907         int error, nbufs, i;
908
909         PMD_INIT_FUNC_TRACE();
910
911         /* Allocate blank mbufs for the each rx descriptor */
912         nbufs = 0;
913
914         if (hw->use_simple_rx) {
915                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
916                      desc_idx++) {
917                         vq->vq_split.ring.avail->ring[desc_idx] = desc_idx;
918                         vq->vq_split.ring.desc[desc_idx].flags =
919                                 VRING_DESC_F_WRITE;
920                 }
921
922                 virtio_rxq_vec_setup(rxvq);
923         }
924
925         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
926         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
927              desc_idx++) {
928                 vq->sw_ring[vq->vq_nentries + desc_idx] =
929                         &rxvq->fake_mbuf;
930         }
931
932         if (hw->use_simple_rx) {
933                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
934                         virtio_rxq_rearm_vec(rxvq);
935                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
936                 }
937         } else if (hw->use_inorder_rx) {
938                 if ((!virtqueue_full(vq))) {
939                         uint16_t free_cnt = vq->vq_free_cnt;
940                         struct rte_mbuf *pkts[free_cnt];
941
942                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
943                                 free_cnt)) {
944                                 error = virtqueue_enqueue_refill_inorder(vq,
945                                                 pkts,
946                                                 free_cnt);
947                                 if (unlikely(error)) {
948                                         for (i = 0; i < free_cnt; i++)
949                                                 rte_pktmbuf_free(pkts[i]);
950                                 }
951                         }
952
953                         nbufs += free_cnt;
954                         vq_update_avail_idx(vq);
955                 }
956         } else {
957                 while (!virtqueue_full(vq)) {
958                         m = rte_mbuf_raw_alloc(rxvq->mpool);
959                         if (m == NULL)
960                                 break;
961
962                         /* Enqueue allocated buffers */
963                         if (vtpci_packed_queue(vq->hw))
964                                 error = virtqueue_enqueue_recv_refill_packed(vq,
965                                                 &m, 1);
966                         else
967                                 error = virtqueue_enqueue_recv_refill(vq,
968                                                 &m, 1);
969                         if (error) {
970                                 rte_pktmbuf_free(m);
971                                 break;
972                         }
973                         nbufs++;
974                 }
975
976                 if (!vtpci_packed_queue(vq->hw))
977                         vq_update_avail_idx(vq);
978         }
979
980         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
981
982         VIRTQUEUE_DUMP(vq);
983
984         return 0;
985 }
986
987 /*
988  * struct rte_eth_dev *dev: Used to update dev
989  * uint16_t nb_desc: Defaults to values read from config space
990  * unsigned int socket_id: Used to allocate memzone
991  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
992  * uint16_t queue_idx: Just used as an index in dev txq list
993  */
994 int
995 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
996                         uint16_t queue_idx,
997                         uint16_t nb_desc,
998                         unsigned int socket_id __rte_unused,
999                         const struct rte_eth_txconf *tx_conf)
1000 {
1001         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1002         struct virtio_hw *hw = dev->data->dev_private;
1003         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1004         struct virtnet_tx *txvq;
1005         uint16_t tx_free_thresh;
1006
1007         PMD_INIT_FUNC_TRACE();
1008
1009         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
1010                 nb_desc = vq->vq_nentries;
1011         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
1012
1013         txvq = &vq->txq;
1014         txvq->queue_id = queue_idx;
1015
1016         tx_free_thresh = tx_conf->tx_free_thresh;
1017         if (tx_free_thresh == 0)
1018                 tx_free_thresh =
1019                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
1020
1021         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
1022                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
1023                         "number of TX entries minus 3 (%u)."
1024                         " (tx_free_thresh=%u port=%u queue=%u)\n",
1025                         vq->vq_nentries - 3,
1026                         tx_free_thresh, dev->data->port_id, queue_idx);
1027                 return -EINVAL;
1028         }
1029
1030         vq->vq_free_thresh = tx_free_thresh;
1031
1032         dev->data->tx_queues[queue_idx] = txvq;
1033         return 0;
1034 }
1035
1036 int
1037 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
1038                                 uint16_t queue_idx)
1039 {
1040         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1041         struct virtio_hw *hw = dev->data->dev_private;
1042         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1043
1044         PMD_INIT_FUNC_TRACE();
1045
1046         if (!vtpci_packed_queue(hw)) {
1047                 if (hw->use_inorder_tx)
1048                         vq->vq_split.ring.desc[vq->vq_nentries - 1].next = 0;
1049         }
1050
1051         VIRTQUEUE_DUMP(vq);
1052
1053         return 0;
1054 }
1055
1056 static inline void
1057 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1058 {
1059         int error;
1060         /*
1061          * Requeue the discarded mbuf. This should always be
1062          * successful since it was just dequeued.
1063          */
1064         if (vtpci_packed_queue(vq->hw))
1065                 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1066         else
1067                 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1068
1069         if (unlikely(error)) {
1070                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1071                 rte_pktmbuf_free(m);
1072         }
1073 }
1074
1075 static inline void
1076 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1077 {
1078         int error;
1079
1080         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1081         if (unlikely(error)) {
1082                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1083                 rte_pktmbuf_free(m);
1084         }
1085 }
1086
1087 static inline void
1088 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
1089 {
1090         uint32_t s = mbuf->pkt_len;
1091         struct rte_ether_addr *ea;
1092
1093         stats->bytes += s;
1094
1095         if (s == 64) {
1096                 stats->size_bins[1]++;
1097         } else if (s > 64 && s < 1024) {
1098                 uint32_t bin;
1099
1100                 /* count zeros, and offset into correct bin */
1101                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
1102                 stats->size_bins[bin]++;
1103         } else {
1104                 if (s < 64)
1105                         stats->size_bins[0]++;
1106                 else if (s < 1519)
1107                         stats->size_bins[6]++;
1108                 else
1109                         stats->size_bins[7]++;
1110         }
1111
1112         ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
1113         if (rte_is_multicast_ether_addr(ea)) {
1114                 if (rte_is_broadcast_ether_addr(ea))
1115                         stats->broadcast++;
1116                 else
1117                         stats->multicast++;
1118         }
1119 }
1120
1121 static inline void
1122 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
1123 {
1124         VIRTIO_DUMP_PACKET(m, m->data_len);
1125
1126         virtio_update_packet_stats(&rxvq->stats, m);
1127 }
1128
1129 /* Optionally fill offload information in structure */
1130 static inline int
1131 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1132 {
1133         struct rte_net_hdr_lens hdr_lens;
1134         uint32_t hdrlen, ptype;
1135         int l4_supported = 0;
1136
1137         /* nothing to do */
1138         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1139                 return 0;
1140
1141         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1142
1143         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1144         m->packet_type = ptype;
1145         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1146             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1147             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1148                 l4_supported = 1;
1149
1150         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1151                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1152                 if (hdr->csum_start <= hdrlen && l4_supported) {
1153                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1154                 } else {
1155                         /* Unknown proto or tunnel, do sw cksum. We can assume
1156                          * the cksum field is in the first segment since the
1157                          * buffers we provided to the host are large enough.
1158                          * In case of SCTP, this will be wrong since it's a CRC
1159                          * but there's nothing we can do.
1160                          */
1161                         uint16_t csum = 0, off;
1162
1163                         rte_raw_cksum_mbuf(m, hdr->csum_start,
1164                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1165                                 &csum);
1166                         if (likely(csum != 0xffff))
1167                                 csum = ~csum;
1168                         off = hdr->csum_offset + hdr->csum_start;
1169                         if (rte_pktmbuf_data_len(m) >= off + 1)
1170                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1171                                         off) = csum;
1172                 }
1173         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1174                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1175         }
1176
1177         /* GSO request, save required information in mbuf */
1178         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1179                 /* Check unsupported modes */
1180                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1181                     (hdr->gso_size == 0)) {
1182                         return -EINVAL;
1183                 }
1184
1185                 /* Update mss lengthes in mbuf */
1186                 m->tso_segsz = hdr->gso_size;
1187                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1188                         case VIRTIO_NET_HDR_GSO_TCPV4:
1189                         case VIRTIO_NET_HDR_GSO_TCPV6:
1190                                 m->ol_flags |= PKT_RX_LRO | \
1191                                         PKT_RX_L4_CKSUM_NONE;
1192                                 break;
1193                         default:
1194                                 return -EINVAL;
1195                 }
1196         }
1197
1198         return 0;
1199 }
1200
1201 #define VIRTIO_MBUF_BURST_SZ 64
1202 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1203 uint16_t
1204 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1205 {
1206         struct virtnet_rx *rxvq = rx_queue;
1207         struct virtqueue *vq = rxvq->vq;
1208         struct virtio_hw *hw = vq->hw;
1209         struct rte_mbuf *rxm;
1210         uint16_t nb_used, num, nb_rx;
1211         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1212         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1213         int error;
1214         uint32_t i, nb_enqueued;
1215         uint32_t hdr_size;
1216         struct virtio_net_hdr *hdr;
1217
1218         nb_rx = 0;
1219         if (unlikely(hw->started == 0))
1220                 return nb_rx;
1221
1222         nb_used = VIRTQUEUE_NUSED(vq);
1223
1224         virtio_rmb(hw->weak_barriers);
1225
1226         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1227         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1228                 num = VIRTIO_MBUF_BURST_SZ;
1229         if (likely(num > DESC_PER_CACHELINE))
1230                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1231
1232         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1233         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1234
1235         nb_enqueued = 0;
1236         hdr_size = hw->vtnet_hdr_size;
1237
1238         for (i = 0; i < num ; i++) {
1239                 rxm = rcv_pkts[i];
1240
1241                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1242
1243                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1244                         PMD_RX_LOG(ERR, "Packet drop");
1245                         nb_enqueued++;
1246                         virtio_discard_rxbuf(vq, rxm);
1247                         rxvq->stats.errors++;
1248                         continue;
1249                 }
1250
1251                 rxm->port = rxvq->port_id;
1252                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1253                 rxm->ol_flags = 0;
1254                 rxm->vlan_tci = 0;
1255
1256                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1257                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1258
1259                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1260                         RTE_PKTMBUF_HEADROOM - hdr_size);
1261
1262                 if (hw->vlan_strip)
1263                         rte_vlan_strip(rxm);
1264
1265                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1266                         virtio_discard_rxbuf(vq, rxm);
1267                         rxvq->stats.errors++;
1268                         continue;
1269                 }
1270
1271                 virtio_rx_stats_updated(rxvq, rxm);
1272
1273                 rx_pkts[nb_rx++] = rxm;
1274         }
1275
1276         rxvq->stats.packets += nb_rx;
1277
1278         /* Allocate new mbuf for the used descriptor */
1279         if (likely(!virtqueue_full(vq))) {
1280                 uint16_t free_cnt = vq->vq_free_cnt;
1281                 struct rte_mbuf *new_pkts[free_cnt];
1282
1283                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1284                                                 free_cnt) == 0)) {
1285                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1286                                         free_cnt);
1287                         if (unlikely(error)) {
1288                                 for (i = 0; i < free_cnt; i++)
1289                                         rte_pktmbuf_free(new_pkts[i]);
1290                         }
1291                         nb_enqueued += free_cnt;
1292                 } else {
1293                         struct rte_eth_dev *dev =
1294                                 &rte_eth_devices[rxvq->port_id];
1295                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1296                 }
1297         }
1298
1299         if (likely(nb_enqueued)) {
1300                 vq_update_avail_idx(vq);
1301
1302                 if (unlikely(virtqueue_kick_prepare(vq))) {
1303                         virtqueue_notify(vq);
1304                         PMD_RX_LOG(DEBUG, "Notified");
1305                 }
1306         }
1307
1308         return nb_rx;
1309 }
1310
1311 uint16_t
1312 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1313                         uint16_t nb_pkts)
1314 {
1315         struct virtnet_rx *rxvq = rx_queue;
1316         struct virtqueue *vq = rxvq->vq;
1317         struct virtio_hw *hw = vq->hw;
1318         struct rte_mbuf *rxm;
1319         uint16_t num, nb_rx;
1320         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1321         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1322         int error;
1323         uint32_t i, nb_enqueued;
1324         uint32_t hdr_size;
1325         struct virtio_net_hdr *hdr;
1326
1327         nb_rx = 0;
1328         if (unlikely(hw->started == 0))
1329                 return nb_rx;
1330
1331         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1332         if (likely(num > DESC_PER_CACHELINE))
1333                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1334
1335         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1336         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1337
1338         nb_enqueued = 0;
1339         hdr_size = hw->vtnet_hdr_size;
1340
1341         for (i = 0; i < num; i++) {
1342                 rxm = rcv_pkts[i];
1343
1344                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1345
1346                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1347                         PMD_RX_LOG(ERR, "Packet drop");
1348                         nb_enqueued++;
1349                         virtio_discard_rxbuf(vq, rxm);
1350                         rxvq->stats.errors++;
1351                         continue;
1352                 }
1353
1354                 rxm->port = rxvq->port_id;
1355                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1356                 rxm->ol_flags = 0;
1357                 rxm->vlan_tci = 0;
1358
1359                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1360                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1361
1362                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1363                         RTE_PKTMBUF_HEADROOM - hdr_size);
1364
1365                 if (hw->vlan_strip)
1366                         rte_vlan_strip(rxm);
1367
1368                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1369                         virtio_discard_rxbuf(vq, rxm);
1370                         rxvq->stats.errors++;
1371                         continue;
1372                 }
1373
1374                 virtio_rx_stats_updated(rxvq, rxm);
1375
1376                 rx_pkts[nb_rx++] = rxm;
1377         }
1378
1379         rxvq->stats.packets += nb_rx;
1380
1381         /* Allocate new mbuf for the used descriptor */
1382         if (likely(!virtqueue_full(vq))) {
1383                 uint16_t free_cnt = vq->vq_free_cnt;
1384                 struct rte_mbuf *new_pkts[free_cnt];
1385
1386                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1387                                                 free_cnt) == 0)) {
1388                         error = virtqueue_enqueue_recv_refill_packed(vq,
1389                                         new_pkts, free_cnt);
1390                         if (unlikely(error)) {
1391                                 for (i = 0; i < free_cnt; i++)
1392                                         rte_pktmbuf_free(new_pkts[i]);
1393                         }
1394                         nb_enqueued += free_cnt;
1395                 } else {
1396                         struct rte_eth_dev *dev =
1397                                 &rte_eth_devices[rxvq->port_id];
1398                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1399                 }
1400         }
1401
1402         if (likely(nb_enqueued)) {
1403                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1404                         virtqueue_notify(vq);
1405                         PMD_RX_LOG(DEBUG, "Notified");
1406                 }
1407         }
1408
1409         return nb_rx;
1410 }
1411
1412
1413 uint16_t
1414 virtio_recv_pkts_inorder(void *rx_queue,
1415                         struct rte_mbuf **rx_pkts,
1416                         uint16_t nb_pkts)
1417 {
1418         struct virtnet_rx *rxvq = rx_queue;
1419         struct virtqueue *vq = rxvq->vq;
1420         struct virtio_hw *hw = vq->hw;
1421         struct rte_mbuf *rxm;
1422         struct rte_mbuf *prev;
1423         uint16_t nb_used, num, nb_rx;
1424         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1425         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1426         int error;
1427         uint32_t nb_enqueued;
1428         uint32_t seg_num;
1429         uint32_t seg_res;
1430         uint32_t hdr_size;
1431         int32_t i;
1432
1433         nb_rx = 0;
1434         if (unlikely(hw->started == 0))
1435                 return nb_rx;
1436
1437         nb_used = VIRTQUEUE_NUSED(vq);
1438         nb_used = RTE_MIN(nb_used, nb_pkts);
1439         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1440
1441         virtio_rmb(hw->weak_barriers);
1442
1443         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1444
1445         nb_enqueued = 0;
1446         seg_num = 1;
1447         seg_res = 0;
1448         hdr_size = hw->vtnet_hdr_size;
1449
1450         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1451
1452         for (i = 0; i < num; i++) {
1453                 struct virtio_net_hdr_mrg_rxbuf *header;
1454
1455                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1456                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1457
1458                 rxm = rcv_pkts[i];
1459
1460                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1461                         PMD_RX_LOG(ERR, "Packet drop");
1462                         nb_enqueued++;
1463                         virtio_discard_rxbuf_inorder(vq, rxm);
1464                         rxvq->stats.errors++;
1465                         continue;
1466                 }
1467
1468                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1469                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1470                          - hdr_size);
1471
1472                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1473                         seg_num = header->num_buffers;
1474                         if (seg_num == 0)
1475                                 seg_num = 1;
1476                 } else {
1477                         seg_num = 1;
1478                 }
1479
1480                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1481                 rxm->nb_segs = seg_num;
1482                 rxm->ol_flags = 0;
1483                 rxm->vlan_tci = 0;
1484                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1485                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1486
1487                 rxm->port = rxvq->port_id;
1488
1489                 rx_pkts[nb_rx] = rxm;
1490                 prev = rxm;
1491
1492                 if (vq->hw->has_rx_offload &&
1493                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1494                         virtio_discard_rxbuf_inorder(vq, rxm);
1495                         rxvq->stats.errors++;
1496                         continue;
1497                 }
1498
1499                 if (hw->vlan_strip)
1500                         rte_vlan_strip(rx_pkts[nb_rx]);
1501
1502                 seg_res = seg_num - 1;
1503
1504                 /* Merge remaining segments */
1505                 while (seg_res != 0 && i < (num - 1)) {
1506                         i++;
1507
1508                         rxm = rcv_pkts[i];
1509                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1510                         rxm->pkt_len = (uint32_t)(len[i]);
1511                         rxm->data_len = (uint16_t)(len[i]);
1512
1513                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1514                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1515
1516                         if (prev)
1517                                 prev->next = rxm;
1518
1519                         prev = rxm;
1520                         seg_res -= 1;
1521                 }
1522
1523                 if (!seg_res) {
1524                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1525                         nb_rx++;
1526                 }
1527         }
1528
1529         /* Last packet still need merge segments */
1530         while (seg_res != 0) {
1531                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1532                                         VIRTIO_MBUF_BURST_SZ);
1533
1534                 prev = rcv_pkts[nb_rx];
1535                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1536                         virtio_rmb(hw->weak_barriers);
1537                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1538                                                            rcv_cnt);
1539                         uint16_t extra_idx = 0;
1540
1541                         rcv_cnt = num;
1542                         while (extra_idx < rcv_cnt) {
1543                                 rxm = rcv_pkts[extra_idx];
1544                                 rxm->data_off =
1545                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1546                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1547                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1548                                 prev->next = rxm;
1549                                 prev = rxm;
1550                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1551                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1552                                 extra_idx += 1;
1553                         };
1554                         seg_res -= rcv_cnt;
1555
1556                         if (!seg_res) {
1557                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1558                                 nb_rx++;
1559                         }
1560                 } else {
1561                         PMD_RX_LOG(ERR,
1562                                         "No enough segments for packet.");
1563                         virtio_discard_rxbuf_inorder(vq, prev);
1564                         rxvq->stats.errors++;
1565                         break;
1566                 }
1567         }
1568
1569         rxvq->stats.packets += nb_rx;
1570
1571         /* Allocate new mbuf for the used descriptor */
1572
1573         if (likely(!virtqueue_full(vq))) {
1574                 /* free_cnt may include mrg descs */
1575                 uint16_t free_cnt = vq->vq_free_cnt;
1576                 struct rte_mbuf *new_pkts[free_cnt];
1577
1578                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1579                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1580                                         free_cnt);
1581                         if (unlikely(error)) {
1582                                 for (i = 0; i < free_cnt; i++)
1583                                         rte_pktmbuf_free(new_pkts[i]);
1584                         }
1585                         nb_enqueued += free_cnt;
1586                 } else {
1587                         struct rte_eth_dev *dev =
1588                                 &rte_eth_devices[rxvq->port_id];
1589                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1590                 }
1591         }
1592
1593         if (likely(nb_enqueued)) {
1594                 vq_update_avail_idx(vq);
1595
1596                 if (unlikely(virtqueue_kick_prepare(vq))) {
1597                         virtqueue_notify(vq);
1598                         PMD_RX_LOG(DEBUG, "Notified");
1599                 }
1600         }
1601
1602         return nb_rx;
1603 }
1604
1605 uint16_t
1606 virtio_recv_mergeable_pkts(void *rx_queue,
1607                         struct rte_mbuf **rx_pkts,
1608                         uint16_t nb_pkts)
1609 {
1610         struct virtnet_rx *rxvq = rx_queue;
1611         struct virtqueue *vq = rxvq->vq;
1612         struct virtio_hw *hw = vq->hw;
1613         struct rte_mbuf *rxm;
1614         struct rte_mbuf *prev;
1615         uint16_t nb_used, num, nb_rx = 0;
1616         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1617         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1618         int error;
1619         uint32_t nb_enqueued = 0;
1620         uint32_t seg_num = 0;
1621         uint32_t seg_res = 0;
1622         uint32_t hdr_size = hw->vtnet_hdr_size;
1623         int32_t i;
1624
1625         if (unlikely(hw->started == 0))
1626                 return nb_rx;
1627
1628         nb_used = VIRTQUEUE_NUSED(vq);
1629
1630         virtio_rmb(hw->weak_barriers);
1631
1632         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1633
1634         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1635         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1636                 num = VIRTIO_MBUF_BURST_SZ;
1637         if (likely(num > DESC_PER_CACHELINE))
1638                 num = num - ((vq->vq_used_cons_idx + num) %
1639                                 DESC_PER_CACHELINE);
1640
1641
1642         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1643
1644         for (i = 0; i < num; i++) {
1645                 struct virtio_net_hdr_mrg_rxbuf *header;
1646
1647                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1648                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1649
1650                 rxm = rcv_pkts[i];
1651
1652                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1653                         PMD_RX_LOG(ERR, "Packet drop");
1654                         nb_enqueued++;
1655                         virtio_discard_rxbuf(vq, rxm);
1656                         rxvq->stats.errors++;
1657                         continue;
1658                 }
1659
1660                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1661                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1662                          - hdr_size);
1663                 seg_num = header->num_buffers;
1664                 if (seg_num == 0)
1665                         seg_num = 1;
1666
1667                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1668                 rxm->nb_segs = seg_num;
1669                 rxm->ol_flags = 0;
1670                 rxm->vlan_tci = 0;
1671                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1672                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1673
1674                 rxm->port = rxvq->port_id;
1675
1676                 rx_pkts[nb_rx] = rxm;
1677                 prev = rxm;
1678
1679                 if (hw->has_rx_offload &&
1680                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1681                         virtio_discard_rxbuf(vq, rxm);
1682                         rxvq->stats.errors++;
1683                         continue;
1684                 }
1685
1686                 if (hw->vlan_strip)
1687                         rte_vlan_strip(rx_pkts[nb_rx]);
1688
1689                 seg_res = seg_num - 1;
1690
1691                 /* Merge remaining segments */
1692                 while (seg_res != 0 && i < (num - 1)) {
1693                         i++;
1694
1695                         rxm = rcv_pkts[i];
1696                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1697                         rxm->pkt_len = (uint32_t)(len[i]);
1698                         rxm->data_len = (uint16_t)(len[i]);
1699
1700                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1701                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1702
1703                         if (prev)
1704                                 prev->next = rxm;
1705
1706                         prev = rxm;
1707                         seg_res -= 1;
1708                 }
1709
1710                 if (!seg_res) {
1711                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1712                         nb_rx++;
1713                 }
1714         }
1715
1716         /* Last packet still need merge segments */
1717         while (seg_res != 0) {
1718                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1719                                         VIRTIO_MBUF_BURST_SZ);
1720
1721                 prev = rcv_pkts[nb_rx];
1722                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1723                         virtio_rmb(hw->weak_barriers);
1724                         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1725                                                            rcv_cnt);
1726                         uint16_t extra_idx = 0;
1727
1728                         rcv_cnt = num;
1729                         while (extra_idx < rcv_cnt) {
1730                                 rxm = rcv_pkts[extra_idx];
1731                                 rxm->data_off =
1732                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1733                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1734                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1735                                 prev->next = rxm;
1736                                 prev = rxm;
1737                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1738                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1739                                 extra_idx += 1;
1740                         };
1741                         seg_res -= rcv_cnt;
1742
1743                         if (!seg_res) {
1744                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1745                                 nb_rx++;
1746                         }
1747                 } else {
1748                         PMD_RX_LOG(ERR,
1749                                         "No enough segments for packet.");
1750                         virtio_discard_rxbuf(vq, prev);
1751                         rxvq->stats.errors++;
1752                         break;
1753                 }
1754         }
1755
1756         rxvq->stats.packets += nb_rx;
1757
1758         /* Allocate new mbuf for the used descriptor */
1759         if (likely(!virtqueue_full(vq))) {
1760                 /* free_cnt may include mrg descs */
1761                 uint16_t free_cnt = vq->vq_free_cnt;
1762                 struct rte_mbuf *new_pkts[free_cnt];
1763
1764                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1765                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1766                                         free_cnt);
1767                         if (unlikely(error)) {
1768                                 for (i = 0; i < free_cnt; i++)
1769                                         rte_pktmbuf_free(new_pkts[i]);
1770                         }
1771                         nb_enqueued += free_cnt;
1772                 } else {
1773                         struct rte_eth_dev *dev =
1774                                 &rte_eth_devices[rxvq->port_id];
1775                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1776                 }
1777         }
1778
1779         if (likely(nb_enqueued)) {
1780                 vq_update_avail_idx(vq);
1781
1782                 if (unlikely(virtqueue_kick_prepare(vq))) {
1783                         virtqueue_notify(vq);
1784                         PMD_RX_LOG(DEBUG, "Notified");
1785                 }
1786         }
1787
1788         return nb_rx;
1789 }
1790
1791 uint16_t
1792 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1793                         struct rte_mbuf **rx_pkts,
1794                         uint16_t nb_pkts)
1795 {
1796         struct virtnet_rx *rxvq = rx_queue;
1797         struct virtqueue *vq = rxvq->vq;
1798         struct virtio_hw *hw = vq->hw;
1799         struct rte_mbuf *rxm;
1800         struct rte_mbuf *prev = NULL;
1801         uint16_t num, nb_rx = 0;
1802         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1803         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1804         uint32_t nb_enqueued = 0;
1805         uint32_t seg_num = 0;
1806         uint32_t seg_res = 0;
1807         uint32_t hdr_size = hw->vtnet_hdr_size;
1808         int32_t i;
1809         int error;
1810
1811         if (unlikely(hw->started == 0))
1812                 return nb_rx;
1813
1814
1815         num = nb_pkts;
1816         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1817                 num = VIRTIO_MBUF_BURST_SZ;
1818         if (likely(num > DESC_PER_CACHELINE))
1819                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1820
1821         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1822
1823         for (i = 0; i < num; i++) {
1824                 struct virtio_net_hdr_mrg_rxbuf *header;
1825
1826                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1827                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1828
1829                 rxm = rcv_pkts[i];
1830
1831                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1832                         PMD_RX_LOG(ERR, "Packet drop");
1833                         nb_enqueued++;
1834                         virtio_discard_rxbuf(vq, rxm);
1835                         rxvq->stats.errors++;
1836                         continue;
1837                 }
1838
1839                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1840                           rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1841                 seg_num = header->num_buffers;
1842
1843                 if (seg_num == 0)
1844                         seg_num = 1;
1845
1846                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1847                 rxm->nb_segs = seg_num;
1848                 rxm->ol_flags = 0;
1849                 rxm->vlan_tci = 0;
1850                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1851                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1852
1853                 rxm->port = rxvq->port_id;
1854                 rx_pkts[nb_rx] = rxm;
1855                 prev = rxm;
1856
1857                 if (hw->has_rx_offload &&
1858                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1859                         virtio_discard_rxbuf(vq, rxm);
1860                         rxvq->stats.errors++;
1861                         continue;
1862                 }
1863
1864                 if (hw->vlan_strip)
1865                         rte_vlan_strip(rx_pkts[nb_rx]);
1866
1867                 seg_res = seg_num - 1;
1868
1869                 /* Merge remaining segments */
1870                 while (seg_res != 0 && i < (num - 1)) {
1871                         i++;
1872
1873                         rxm = rcv_pkts[i];
1874                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1875                         rxm->pkt_len = (uint32_t)(len[i]);
1876                         rxm->data_len = (uint16_t)(len[i]);
1877
1878                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1879                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1880
1881                         if (prev)
1882                                 prev->next = rxm;
1883
1884                         prev = rxm;
1885                         seg_res -= 1;
1886                 }
1887
1888                 if (!seg_res) {
1889                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1890                         nb_rx++;
1891                 }
1892         }
1893
1894         /* Last packet still need merge segments */
1895         while (seg_res != 0) {
1896                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1897                                         VIRTIO_MBUF_BURST_SZ);
1898                 if (likely(vq->vq_free_cnt >= rcv_cnt)) {
1899                         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1900                                         len, rcv_cnt);
1901                         uint16_t extra_idx = 0;
1902
1903                         rcv_cnt = num;
1904
1905                         while (extra_idx < rcv_cnt) {
1906                                 rxm = rcv_pkts[extra_idx];
1907
1908                                 rxm->data_off =
1909                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1910                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1911                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1912
1913                                 prev->next = rxm;
1914                                 prev = rxm;
1915                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1916                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1917                                 extra_idx += 1;
1918                         }
1919                         seg_res -= rcv_cnt;
1920                         if (!seg_res) {
1921                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1922                                 nb_rx++;
1923                         }
1924                 } else {
1925                         PMD_RX_LOG(ERR,
1926                                         "No enough segments for packet.");
1927                         if (prev)
1928                                 virtio_discard_rxbuf(vq, prev);
1929                         rxvq->stats.errors++;
1930                         break;
1931                 }
1932         }
1933
1934         rxvq->stats.packets += nb_rx;
1935
1936         /* Allocate new mbuf for the used descriptor */
1937         if (likely(!virtqueue_full(vq))) {
1938                 /* free_cnt may include mrg descs */
1939                 uint16_t free_cnt = vq->vq_free_cnt;
1940                 struct rte_mbuf *new_pkts[free_cnt];
1941
1942                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1943                         error = virtqueue_enqueue_recv_refill_packed(vq,
1944                                         new_pkts, free_cnt);
1945                         if (unlikely(error)) {
1946                                 for (i = 0; i < free_cnt; i++)
1947                                         rte_pktmbuf_free(new_pkts[i]);
1948                         }
1949                         nb_enqueued += free_cnt;
1950                 } else {
1951                         struct rte_eth_dev *dev =
1952                                 &rte_eth_devices[rxvq->port_id];
1953                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1954                 }
1955         }
1956
1957         if (likely(nb_enqueued)) {
1958                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1959                         virtqueue_notify(vq);
1960                         PMD_RX_LOG(DEBUG, "Notified");
1961                 }
1962         }
1963
1964         return nb_rx;
1965 }
1966
1967 uint16_t
1968 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1969                         uint16_t nb_pkts)
1970 {
1971         struct virtnet_tx *txvq = tx_queue;
1972         struct virtqueue *vq = txvq->vq;
1973         struct virtio_hw *hw = vq->hw;
1974         uint16_t hdr_size = hw->vtnet_hdr_size;
1975         uint16_t nb_tx = 0;
1976         bool in_order = hw->use_inorder_tx;
1977         int error;
1978
1979         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1980                 return nb_tx;
1981
1982         if (unlikely(nb_pkts < 1))
1983                 return nb_pkts;
1984
1985         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1986
1987         if (nb_pkts > vq->vq_free_cnt)
1988                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
1989                                            in_order);
1990
1991         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1992                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1993                 int can_push = 0, slots, need;
1994
1995                 /* Do VLAN tag insertion */
1996                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1997                         error = rte_vlan_insert(&txm);
1998                         if (unlikely(error)) {
1999                                 rte_pktmbuf_free(txm);
2000                                 continue;
2001                         }
2002                         /* vlan_insert may add a header mbuf */
2003                         tx_pkts[nb_tx] = txm;
2004                 }
2005
2006                 /* optimize ring usage */
2007                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2008                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2009                     rte_mbuf_refcnt_read(txm) == 1 &&
2010                     RTE_MBUF_DIRECT(txm) &&
2011                     txm->nb_segs == 1 &&
2012                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2013                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2014                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2015                         can_push = 1;
2016
2017                 /* How many main ring entries are needed to this Tx?
2018                  * any_layout => number of segments
2019                  * default    => number of segments + 1
2020                  */
2021                 slots = txm->nb_segs + !can_push;
2022                 need = slots - vq->vq_free_cnt;
2023
2024                 /* Positive value indicates it need free vring descriptors */
2025                 if (unlikely(need > 0)) {
2026                         virtio_xmit_cleanup_packed(vq, need, in_order);
2027                         need = slots - vq->vq_free_cnt;
2028                         if (unlikely(need > 0)) {
2029                                 PMD_TX_LOG(ERR,
2030                                            "No free tx descriptors to transmit");
2031                                 break;
2032                         }
2033                 }
2034
2035                 /* Enqueue Packet buffers */
2036                 if (can_push)
2037                         virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
2038                 else
2039                         virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
2040                                                       in_order);
2041
2042                 virtio_update_packet_stats(&txvq->stats, txm);
2043         }
2044
2045         txvq->stats.packets += nb_tx;
2046
2047         if (likely(nb_tx)) {
2048                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
2049                         virtqueue_notify(vq);
2050                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2051                 }
2052         }
2053
2054         return nb_tx;
2055 }
2056
2057 uint16_t
2058 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2059 {
2060         struct virtnet_tx *txvq = tx_queue;
2061         struct virtqueue *vq = txvq->vq;
2062         struct virtio_hw *hw = vq->hw;
2063         uint16_t hdr_size = hw->vtnet_hdr_size;
2064         uint16_t nb_used, nb_tx = 0;
2065         int error;
2066
2067         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2068                 return nb_tx;
2069
2070         if (unlikely(nb_pkts < 1))
2071                 return nb_pkts;
2072
2073         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2074         nb_used = VIRTQUEUE_NUSED(vq);
2075
2076         virtio_rmb(hw->weak_barriers);
2077         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2078                 virtio_xmit_cleanup(vq, nb_used);
2079
2080         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2081                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2082                 int can_push = 0, use_indirect = 0, slots, need;
2083
2084                 /* Do VLAN tag insertion */
2085                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2086                         error = rte_vlan_insert(&txm);
2087                         if (unlikely(error)) {
2088                                 rte_pktmbuf_free(txm);
2089                                 continue;
2090                         }
2091                         /* vlan_insert may add a header mbuf */
2092                         tx_pkts[nb_tx] = txm;
2093                 }
2094
2095                 /* optimize ring usage */
2096                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2097                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2098                     rte_mbuf_refcnt_read(txm) == 1 &&
2099                     RTE_MBUF_DIRECT(txm) &&
2100                     txm->nb_segs == 1 &&
2101                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2102                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2103                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2104                         can_push = 1;
2105                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2106                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2107                         use_indirect = 1;
2108
2109                 /* How many main ring entries are needed to this Tx?
2110                  * any_layout => number of segments
2111                  * indirect   => 1
2112                  * default    => number of segments + 1
2113                  */
2114                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2115                 need = slots - vq->vq_free_cnt;
2116
2117                 /* Positive value indicates it need free vring descriptors */
2118                 if (unlikely(need > 0)) {
2119                         nb_used = VIRTQUEUE_NUSED(vq);
2120                         virtio_rmb(hw->weak_barriers);
2121                         need = RTE_MIN(need, (int)nb_used);
2122
2123                         virtio_xmit_cleanup(vq, need);
2124                         need = slots - vq->vq_free_cnt;
2125                         if (unlikely(need > 0)) {
2126                                 PMD_TX_LOG(ERR,
2127                                            "No free tx descriptors to transmit");
2128                                 break;
2129                         }
2130                 }
2131
2132                 /* Enqueue Packet buffers */
2133                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2134                         can_push, 0);
2135
2136                 virtio_update_packet_stats(&txvq->stats, txm);
2137         }
2138
2139         txvq->stats.packets += nb_tx;
2140
2141         if (likely(nb_tx)) {
2142                 vq_update_avail_idx(vq);
2143
2144                 if (unlikely(virtqueue_kick_prepare(vq))) {
2145                         virtqueue_notify(vq);
2146                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2147                 }
2148         }
2149
2150         return nb_tx;
2151 }
2152
2153 uint16_t
2154 virtio_xmit_pkts_inorder(void *tx_queue,
2155                         struct rte_mbuf **tx_pkts,
2156                         uint16_t nb_pkts)
2157 {
2158         struct virtnet_tx *txvq = tx_queue;
2159         struct virtqueue *vq = txvq->vq;
2160         struct virtio_hw *hw = vq->hw;
2161         uint16_t hdr_size = hw->vtnet_hdr_size;
2162         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
2163         struct rte_mbuf *inorder_pkts[nb_pkts];
2164         int error;
2165
2166         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2167                 return nb_tx;
2168
2169         if (unlikely(nb_pkts < 1))
2170                 return nb_pkts;
2171
2172         VIRTQUEUE_DUMP(vq);
2173         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2174         nb_used = VIRTQUEUE_NUSED(vq);
2175
2176         virtio_rmb(hw->weak_barriers);
2177         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2178                 virtio_xmit_cleanup_inorder(vq, nb_used);
2179
2180         if (unlikely(!vq->vq_free_cnt))
2181                 virtio_xmit_cleanup_inorder(vq, nb_used);
2182
2183         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
2184
2185         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
2186                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2187                 int slots, need;
2188
2189                 /* Do VLAN tag insertion */
2190                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2191                         error = rte_vlan_insert(&txm);
2192                         if (unlikely(error)) {
2193                                 rte_pktmbuf_free(txm);
2194                                 continue;
2195                         }
2196                         /* vlan_insert may add a header mbuf */
2197                         tx_pkts[nb_tx] = txm;
2198                 }
2199
2200                 /* optimize ring usage */
2201                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2202                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2203                      rte_mbuf_refcnt_read(txm) == 1 &&
2204                      RTE_MBUF_DIRECT(txm) &&
2205                      txm->nb_segs == 1 &&
2206                      rte_pktmbuf_headroom(txm) >= hdr_size &&
2207                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2208                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2209                         inorder_pkts[nb_inorder_pkts] = txm;
2210                         nb_inorder_pkts++;
2211
2212                         virtio_update_packet_stats(&txvq->stats, txm);
2213                         continue;
2214                 }
2215
2216                 if (nb_inorder_pkts) {
2217                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2218                                                         nb_inorder_pkts);
2219                         nb_inorder_pkts = 0;
2220                 }
2221
2222                 slots = txm->nb_segs + 1;
2223                 need = slots - vq->vq_free_cnt;
2224                 if (unlikely(need > 0)) {
2225                         nb_used = VIRTQUEUE_NUSED(vq);
2226                         virtio_rmb(hw->weak_barriers);
2227                         need = RTE_MIN(need, (int)nb_used);
2228
2229                         virtio_xmit_cleanup_inorder(vq, need);
2230
2231                         need = slots - vq->vq_free_cnt;
2232
2233                         if (unlikely(need > 0)) {
2234                                 PMD_TX_LOG(ERR,
2235                                         "No free tx descriptors to transmit");
2236                                 break;
2237                         }
2238                 }
2239                 /* Enqueue Packet buffers */
2240                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2241
2242                 virtio_update_packet_stats(&txvq->stats, txm);
2243         }
2244
2245         /* Transmit all inorder packets */
2246         if (nb_inorder_pkts)
2247                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2248                                                 nb_inorder_pkts);
2249
2250         txvq->stats.packets += nb_tx;
2251
2252         if (likely(nb_tx)) {
2253                 vq_update_avail_idx(vq);
2254
2255                 if (unlikely(virtqueue_kick_prepare(vq))) {
2256                         virtqueue_notify(vq);
2257                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2258                 }
2259         }
2260
2261         VIRTQUEUE_DUMP(vq);
2262
2263         return nb_tx;
2264 }