94ba7a3ecb507f01f8677d307977db12b0c705bb
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45         struct virtnet_rx *rxvq = rxq;
46         struct virtqueue *vq = rxvq->vq;
47
48         return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54         vq->vq_free_cnt += num;
55         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61         struct vring_desc *dp, *dp_tail;
62         struct vq_desc_extra *dxp;
63         uint16_t desc_idx_last = desc_idx;
64
65         dp  = &vq->vq_split.ring.desc[desc_idx];
66         dxp = &vq->vq_descx[desc_idx];
67         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69                 while (dp->flags & VRING_DESC_F_NEXT) {
70                         desc_idx_last = dp->next;
71                         dp = &vq->vq_split.ring.desc[dp->next];
72                 }
73         }
74         dxp->ndescs = 0;
75
76         /*
77          * We must append the existing free chain, if any, to the end of
78          * newly freed chain. If the virtqueue was completely used, then
79          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80          */
81         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82                 vq->vq_desc_head_idx = desc_idx;
83         } else {
84                 dp_tail = &vq->vq_split.ring.desc[vq->vq_desc_tail_idx];
85                 dp_tail->next = desc_idx;
86         }
87
88         vq->vq_desc_tail_idx = desc_idx_last;
89         dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95         struct vq_desc_extra *dxp;
96
97         dxp = &vq->vq_descx[id];
98         vq->vq_free_cnt += dxp->ndescs;
99
100         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101                 vq->vq_desc_head_idx = id;
102         else
103                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104
105         vq->vq_desc_tail_idx = id;
106         dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108
109 void
110 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
111 {
112         uint32_t s = mbuf->pkt_len;
113         struct rte_ether_addr *ea;
114
115         stats->bytes += s;
116
117         if (s == 64) {
118                 stats->size_bins[1]++;
119         } else if (s > 64 && s < 1024) {
120                 uint32_t bin;
121
122                 /* count zeros, and offset into correct bin */
123                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
124                 stats->size_bins[bin]++;
125         } else {
126                 if (s < 64)
127                         stats->size_bins[0]++;
128                 else if (s < 1519)
129                         stats->size_bins[6]++;
130                 else
131                         stats->size_bins[7]++;
132         }
133
134         ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
135         if (rte_is_multicast_ether_addr(ea)) {
136                 if (rte_is_broadcast_ether_addr(ea))
137                         stats->broadcast++;
138                 else
139                         stats->multicast++;
140         }
141 }
142
143 static inline void
144 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
145 {
146         VIRTIO_DUMP_PACKET(m, m->data_len);
147
148         virtio_update_packet_stats(&rxvq->stats, m);
149 }
150
151 static uint16_t
152 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
153                                   struct rte_mbuf **rx_pkts,
154                                   uint32_t *len,
155                                   uint16_t num)
156 {
157         struct rte_mbuf *cookie;
158         uint16_t used_idx;
159         uint16_t id;
160         struct vring_packed_desc *desc;
161         uint16_t i;
162
163         desc = vq->vq_packed.ring.desc;
164
165         for (i = 0; i < num; i++) {
166                 used_idx = vq->vq_used_cons_idx;
167                 /* desc_is_used has a load-acquire or rte_cio_rmb inside
168                  * and wait for used desc in virtqueue.
169                  */
170                 if (!desc_is_used(&desc[used_idx], vq))
171                         return i;
172                 len[i] = desc[used_idx].len;
173                 id = desc[used_idx].id;
174                 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
175                 if (unlikely(cookie == NULL)) {
176                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
177                                 vq->vq_used_cons_idx);
178                         break;
179                 }
180                 rte_prefetch0(cookie);
181                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
182                 rx_pkts[i] = cookie;
183
184                 vq->vq_free_cnt++;
185                 vq->vq_used_cons_idx++;
186                 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
187                         vq->vq_used_cons_idx -= vq->vq_nentries;
188                         vq->vq_packed.used_wrap_counter ^= 1;
189                 }
190         }
191
192         return i;
193 }
194
195 static uint16_t
196 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
197                            uint32_t *len, uint16_t num)
198 {
199         struct vring_used_elem *uep;
200         struct rte_mbuf *cookie;
201         uint16_t used_idx, desc_idx;
202         uint16_t i;
203
204         /*  Caller does the check */
205         for (i = 0; i < num ; i++) {
206                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
207                 uep = &vq->vq_split.ring.used->ring[used_idx];
208                 desc_idx = (uint16_t) uep->id;
209                 len[i] = uep->len;
210                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
211
212                 if (unlikely(cookie == NULL)) {
213                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
214                                 vq->vq_used_cons_idx);
215                         break;
216                 }
217
218                 rte_prefetch0(cookie);
219                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
220                 rx_pkts[i]  = cookie;
221                 vq->vq_used_cons_idx++;
222                 vq_ring_free_chain(vq, desc_idx);
223                 vq->vq_descx[desc_idx].cookie = NULL;
224         }
225
226         return i;
227 }
228
229 static uint16_t
230 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
231                         struct rte_mbuf **rx_pkts,
232                         uint32_t *len,
233                         uint16_t num)
234 {
235         struct vring_used_elem *uep;
236         struct rte_mbuf *cookie;
237         uint16_t used_idx = 0;
238         uint16_t i;
239
240         if (unlikely(num == 0))
241                 return 0;
242
243         for (i = 0; i < num; i++) {
244                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
245                 /* Desc idx same as used idx */
246                 uep = &vq->vq_split.ring.used->ring[used_idx];
247                 len[i] = uep->len;
248                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
249
250                 if (unlikely(cookie == NULL)) {
251                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
252                                 vq->vq_used_cons_idx);
253                         break;
254                 }
255
256                 rte_prefetch0(cookie);
257                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
258                 rx_pkts[i]  = cookie;
259                 vq->vq_used_cons_idx++;
260                 vq->vq_descx[used_idx].cookie = NULL;
261         }
262
263         vq_ring_free_inorder(vq, used_idx, i);
264         return i;
265 }
266
267 #ifndef DEFAULT_TX_FREE_THRESH
268 #define DEFAULT_TX_FREE_THRESH 32
269 #endif
270
271 static void
272 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
273 {
274         uint16_t used_idx, id, curr_id, free_cnt = 0;
275         uint16_t size = vq->vq_nentries;
276         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
277         struct vq_desc_extra *dxp;
278
279         used_idx = vq->vq_used_cons_idx;
280         /* desc_is_used has a load-acquire or rte_cio_rmb inside
281          * and wait for used desc in virtqueue.
282          */
283         while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
284                 id = desc[used_idx].id;
285                 do {
286                         curr_id = used_idx;
287                         dxp = &vq->vq_descx[used_idx];
288                         used_idx += dxp->ndescs;
289                         free_cnt += dxp->ndescs;
290                         num -= dxp->ndescs;
291                         if (used_idx >= size) {
292                                 used_idx -= size;
293                                 vq->vq_packed.used_wrap_counter ^= 1;
294                         }
295                         if (dxp->cookie != NULL) {
296                                 rte_pktmbuf_free(dxp->cookie);
297                                 dxp->cookie = NULL;
298                         }
299                 } while (curr_id != id);
300         }
301         vq->vq_used_cons_idx = used_idx;
302         vq->vq_free_cnt += free_cnt;
303 }
304
305 static void
306 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
307 {
308         uint16_t used_idx, id;
309         uint16_t size = vq->vq_nentries;
310         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
311         struct vq_desc_extra *dxp;
312
313         used_idx = vq->vq_used_cons_idx;
314         /* desc_is_used has a load-acquire or rte_cio_rmb inside
315          * and wait for used desc in virtqueue.
316          */
317         while (num-- && desc_is_used(&desc[used_idx], vq)) {
318                 id = desc[used_idx].id;
319                 dxp = &vq->vq_descx[id];
320                 vq->vq_used_cons_idx += dxp->ndescs;
321                 if (vq->vq_used_cons_idx >= size) {
322                         vq->vq_used_cons_idx -= size;
323                         vq->vq_packed.used_wrap_counter ^= 1;
324                 }
325                 vq_ring_free_id_packed(vq, id);
326                 if (dxp->cookie != NULL) {
327                         rte_pktmbuf_free(dxp->cookie);
328                         dxp->cookie = NULL;
329                 }
330                 used_idx = vq->vq_used_cons_idx;
331         }
332 }
333
334 /* Cleanup from completed transmits. */
335 static inline void
336 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
337 {
338         if (in_order)
339                 virtio_xmit_cleanup_inorder_packed(vq, num);
340         else
341                 virtio_xmit_cleanup_normal_packed(vq, num);
342 }
343
344 static void
345 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
346 {
347         uint16_t i, used_idx, desc_idx;
348         for (i = 0; i < num; i++) {
349                 struct vring_used_elem *uep;
350                 struct vq_desc_extra *dxp;
351
352                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
353                 uep = &vq->vq_split.ring.used->ring[used_idx];
354
355                 desc_idx = (uint16_t) uep->id;
356                 dxp = &vq->vq_descx[desc_idx];
357                 vq->vq_used_cons_idx++;
358                 vq_ring_free_chain(vq, desc_idx);
359
360                 if (dxp->cookie != NULL) {
361                         rte_pktmbuf_free(dxp->cookie);
362                         dxp->cookie = NULL;
363                 }
364         }
365 }
366
367 /* Cleanup from completed inorder transmits. */
368 static __rte_always_inline void
369 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
370 {
371         uint16_t i, idx = vq->vq_used_cons_idx;
372         int16_t free_cnt = 0;
373         struct vq_desc_extra *dxp = NULL;
374
375         if (unlikely(num == 0))
376                 return;
377
378         for (i = 0; i < num; i++) {
379                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
380                 free_cnt += dxp->ndescs;
381                 if (dxp->cookie != NULL) {
382                         rte_pktmbuf_free(dxp->cookie);
383                         dxp->cookie = NULL;
384                 }
385         }
386
387         vq->vq_free_cnt += free_cnt;
388         vq->vq_used_cons_idx = idx;
389 }
390
391 static inline int
392 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
393                         struct rte_mbuf **cookies,
394                         uint16_t num)
395 {
396         struct vq_desc_extra *dxp;
397         struct virtio_hw *hw = vq->hw;
398         struct vring_desc *start_dp;
399         uint16_t head_idx, idx, i = 0;
400
401         if (unlikely(vq->vq_free_cnt == 0))
402                 return -ENOSPC;
403         if (unlikely(vq->vq_free_cnt < num))
404                 return -EMSGSIZE;
405
406         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
407         start_dp = vq->vq_split.ring.desc;
408
409         while (i < num) {
410                 idx = head_idx & (vq->vq_nentries - 1);
411                 dxp = &vq->vq_descx[idx];
412                 dxp->cookie = (void *)cookies[i];
413                 dxp->ndescs = 1;
414
415                 start_dp[idx].addr =
416                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
417                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
418                 start_dp[idx].len =
419                                 cookies[i]->buf_len -
420                                 RTE_PKTMBUF_HEADROOM +
421                                 hw->vtnet_hdr_size;
422                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
423
424                 vq_update_avail_ring(vq, idx);
425                 head_idx++;
426                 i++;
427         }
428
429         vq->vq_desc_head_idx += num;
430         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
431         return 0;
432 }
433
434 static inline int
435 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
436                                 uint16_t num)
437 {
438         struct vq_desc_extra *dxp;
439         struct virtio_hw *hw = vq->hw;
440         struct vring_desc *start_dp = vq->vq_split.ring.desc;
441         uint16_t idx, i;
442
443         if (unlikely(vq->vq_free_cnt == 0))
444                 return -ENOSPC;
445         if (unlikely(vq->vq_free_cnt < num))
446                 return -EMSGSIZE;
447
448         if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
449                 return -EFAULT;
450
451         for (i = 0; i < num; i++) {
452                 idx = vq->vq_desc_head_idx;
453                 dxp = &vq->vq_descx[idx];
454                 dxp->cookie = (void *)cookie[i];
455                 dxp->ndescs = 1;
456
457                 start_dp[idx].addr =
458                         VIRTIO_MBUF_ADDR(cookie[i], vq) +
459                         RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
460                 start_dp[idx].len =
461                         cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
462                         hw->vtnet_hdr_size;
463                 start_dp[idx].flags = VRING_DESC_F_WRITE;
464                 vq->vq_desc_head_idx = start_dp[idx].next;
465                 vq_update_avail_ring(vq, idx);
466                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
467                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
468                         break;
469                 }
470         }
471
472         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
473
474         return 0;
475 }
476
477 static inline int
478 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
479                                      struct rte_mbuf **cookie, uint16_t num)
480 {
481         struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
482         uint16_t flags = vq->vq_packed.cached_flags;
483         struct virtio_hw *hw = vq->hw;
484         struct vq_desc_extra *dxp;
485         uint16_t idx;
486         int i;
487
488         if (unlikely(vq->vq_free_cnt == 0))
489                 return -ENOSPC;
490         if (unlikely(vq->vq_free_cnt < num))
491                 return -EMSGSIZE;
492
493         for (i = 0; i < num; i++) {
494                 idx = vq->vq_avail_idx;
495                 dxp = &vq->vq_descx[idx];
496                 dxp->cookie = (void *)cookie[i];
497                 dxp->ndescs = 1;
498
499                 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
500                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
501                 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
502                                         + hw->vtnet_hdr_size;
503
504                 vq->vq_desc_head_idx = dxp->next;
505                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
506                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
507
508                 virtqueue_store_flags_packed(&start_dp[idx], flags,
509                                              hw->weak_barriers);
510
511                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
512                         vq->vq_avail_idx -= vq->vq_nentries;
513                         vq->vq_packed.cached_flags ^=
514                                 VRING_PACKED_DESC_F_AVAIL_USED;
515                         flags = vq->vq_packed.cached_flags;
516                 }
517         }
518         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
519         return 0;
520 }
521
522 /* When doing TSO, the IP length is not included in the pseudo header
523  * checksum of the packet given to the PMD, but for virtio it is
524  * expected.
525  */
526 static void
527 virtio_tso_fix_cksum(struct rte_mbuf *m)
528 {
529         /* common case: header is not fragmented */
530         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
531                         m->l4_len)) {
532                 struct rte_ipv4_hdr *iph;
533                 struct rte_ipv6_hdr *ip6h;
534                 struct rte_tcp_hdr *th;
535                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
536                 uint32_t tmp;
537
538                 iph = rte_pktmbuf_mtod_offset(m,
539                                         struct rte_ipv4_hdr *, m->l2_len);
540                 th = RTE_PTR_ADD(iph, m->l3_len);
541                 if ((iph->version_ihl >> 4) == 4) {
542                         iph->hdr_checksum = 0;
543                         iph->hdr_checksum = rte_ipv4_cksum(iph);
544                         ip_len = iph->total_length;
545                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
546                                 m->l3_len);
547                 } else {
548                         ip6h = (struct rte_ipv6_hdr *)iph;
549                         ip_paylen = ip6h->payload_len;
550                 }
551
552                 /* calculate the new phdr checksum not including ip_paylen */
553                 prev_cksum = th->cksum;
554                 tmp = prev_cksum;
555                 tmp += ip_paylen;
556                 tmp = (tmp & 0xffff) + (tmp >> 16);
557                 new_cksum = tmp;
558
559                 /* replace it in the packet */
560                 th->cksum = new_cksum;
561         }
562 }
563
564
565 /* avoid write operation when necessary, to lessen cache issues */
566 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
567         if ((var) != (val))                     \
568                 (var) = (val);                  \
569 } while (0)
570
571 #define virtqueue_clear_net_hdr(_hdr) do {              \
572         ASSIGN_UNLESS_EQUAL((_hdr)->csum_start, 0);     \
573         ASSIGN_UNLESS_EQUAL((_hdr)->csum_offset, 0);    \
574         ASSIGN_UNLESS_EQUAL((_hdr)->flags, 0);          \
575         ASSIGN_UNLESS_EQUAL((_hdr)->gso_type, 0);       \
576         ASSIGN_UNLESS_EQUAL((_hdr)->gso_size, 0);       \
577         ASSIGN_UNLESS_EQUAL((_hdr)->hdr_len, 0);        \
578 } while (0)
579
580 static inline void
581 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
582                         struct rte_mbuf *cookie,
583                         bool offload)
584 {
585         if (offload) {
586                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
587                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
588
589                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
590                 case PKT_TX_UDP_CKSUM:
591                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
592                         hdr->csum_offset = offsetof(struct rte_udp_hdr,
593                                 dgram_cksum);
594                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
595                         break;
596
597                 case PKT_TX_TCP_CKSUM:
598                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
599                         hdr->csum_offset = offsetof(struct rte_tcp_hdr, cksum);
600                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
601                         break;
602
603                 default:
604                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
605                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
606                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
607                         break;
608                 }
609
610                 /* TCP Segmentation Offload */
611                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
612                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
613                                 VIRTIO_NET_HDR_GSO_TCPV6 :
614                                 VIRTIO_NET_HDR_GSO_TCPV4;
615                         hdr->gso_size = cookie->tso_segsz;
616                         hdr->hdr_len =
617                                 cookie->l2_len +
618                                 cookie->l3_len +
619                                 cookie->l4_len;
620                 } else {
621                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
622                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
623                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
624                 }
625         }
626 }
627
628 static inline void
629 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
630                         struct rte_mbuf **cookies,
631                         uint16_t num)
632 {
633         struct vq_desc_extra *dxp;
634         struct virtqueue *vq = txvq->vq;
635         struct vring_desc *start_dp;
636         struct virtio_net_hdr *hdr;
637         uint16_t idx;
638         int16_t head_size = vq->hw->vtnet_hdr_size;
639         uint16_t i = 0;
640
641         idx = vq->vq_desc_head_idx;
642         start_dp = vq->vq_split.ring.desc;
643
644         while (i < num) {
645                 idx = idx & (vq->vq_nentries - 1);
646                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
647                 dxp->cookie = (void *)cookies[i];
648                 dxp->ndescs = 1;
649                 virtio_update_packet_stats(&txvq->stats, cookies[i]);
650
651                 hdr = rte_pktmbuf_mtod_offset(cookies[i],
652                                 struct virtio_net_hdr *, -head_size);
653
654                 /* if offload disabled, hdr is not zeroed yet, do it now */
655                 if (!vq->hw->has_tx_offload)
656                         virtqueue_clear_net_hdr(hdr);
657                 else
658                         virtqueue_xmit_offload(hdr, cookies[i], true);
659
660                 start_dp[idx].addr  =
661                         VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq) - head_size;
662                 start_dp[idx].len   = cookies[i]->data_len + head_size;
663                 start_dp[idx].flags = 0;
664
665
666                 vq_update_avail_ring(vq, idx);
667
668                 idx++;
669                 i++;
670         };
671
672         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
673         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
674 }
675
676 static inline void
677 virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
678                                    struct rte_mbuf *cookie,
679                                    int in_order)
680 {
681         struct virtqueue *vq = txvq->vq;
682         struct vring_packed_desc *dp;
683         struct vq_desc_extra *dxp;
684         uint16_t idx, id, flags;
685         int16_t head_size = vq->hw->vtnet_hdr_size;
686         struct virtio_net_hdr *hdr;
687
688         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
689         idx = vq->vq_avail_idx;
690         dp = &vq->vq_packed.ring.desc[idx];
691
692         dxp = &vq->vq_descx[id];
693         dxp->ndescs = 1;
694         dxp->cookie = cookie;
695
696         flags = vq->vq_packed.cached_flags;
697
698         /* prepend cannot fail, checked by caller */
699         hdr = rte_pktmbuf_mtod_offset(cookie, struct virtio_net_hdr *,
700                                       -head_size);
701
702         /* if offload disabled, hdr is not zeroed yet, do it now */
703         if (!vq->hw->has_tx_offload)
704                 virtqueue_clear_net_hdr(hdr);
705         else
706                 virtqueue_xmit_offload(hdr, cookie, true);
707
708         dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq) - head_size;
709         dp->len  = cookie->data_len + head_size;
710         dp->id   = id;
711
712         if (++vq->vq_avail_idx >= vq->vq_nentries) {
713                 vq->vq_avail_idx -= vq->vq_nentries;
714                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
715         }
716
717         vq->vq_free_cnt--;
718
719         if (!in_order) {
720                 vq->vq_desc_head_idx = dxp->next;
721                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
722                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
723         }
724
725         virtqueue_store_flags_packed(dp, flags, vq->hw->weak_barriers);
726 }
727
728 static inline void
729 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
730                               uint16_t needed, int can_push, int in_order)
731 {
732         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
733         struct vq_desc_extra *dxp;
734         struct virtqueue *vq = txvq->vq;
735         struct vring_packed_desc *start_dp, *head_dp;
736         uint16_t idx, id, head_idx, head_flags;
737         int16_t head_size = vq->hw->vtnet_hdr_size;
738         struct virtio_net_hdr *hdr;
739         uint16_t prev;
740         bool prepend_header = false;
741
742         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
743
744         dxp = &vq->vq_descx[id];
745         dxp->ndescs = needed;
746         dxp->cookie = cookie;
747
748         head_idx = vq->vq_avail_idx;
749         idx = head_idx;
750         prev = head_idx;
751         start_dp = vq->vq_packed.ring.desc;
752
753         head_dp = &vq->vq_packed.ring.desc[idx];
754         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
755         head_flags |= vq->vq_packed.cached_flags;
756
757         if (can_push) {
758                 /* prepend cannot fail, checked by caller */
759                 hdr = rte_pktmbuf_mtod_offset(cookie, struct virtio_net_hdr *,
760                                               -head_size);
761                 prepend_header = true;
762
763                 /* if offload disabled, it is not zeroed below, do it now */
764                 if (!vq->hw->has_tx_offload)
765                         virtqueue_clear_net_hdr(hdr);
766         } else {
767                 /* setup first tx ring slot to point to header
768                  * stored in reserved region.
769                  */
770                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
771                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
772                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
773                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
774                 idx++;
775                 if (idx >= vq->vq_nentries) {
776                         idx -= vq->vq_nentries;
777                         vq->vq_packed.cached_flags ^=
778                                 VRING_PACKED_DESC_F_AVAIL_USED;
779                 }
780         }
781
782         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
783
784         do {
785                 uint16_t flags;
786
787                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
788                 start_dp[idx].len  = cookie->data_len;
789                 if (prepend_header) {
790                         start_dp[idx].addr -= head_size;
791                         start_dp[idx].len += head_size;
792                         prepend_header = false;
793                 }
794
795                 if (likely(idx != head_idx)) {
796                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
797                         flags |= vq->vq_packed.cached_flags;
798                         start_dp[idx].flags = flags;
799                 }
800                 prev = idx;
801                 idx++;
802                 if (idx >= vq->vq_nentries) {
803                         idx -= vq->vq_nentries;
804                         vq->vq_packed.cached_flags ^=
805                                 VRING_PACKED_DESC_F_AVAIL_USED;
806                 }
807         } while ((cookie = cookie->next) != NULL);
808
809         start_dp[prev].id = id;
810
811         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
812         vq->vq_avail_idx = idx;
813
814         if (!in_order) {
815                 vq->vq_desc_head_idx = dxp->next;
816                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
817                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
818         }
819
820         virtqueue_store_flags_packed(head_dp, head_flags,
821                                      vq->hw->weak_barriers);
822 }
823
824 static inline void
825 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
826                         uint16_t needed, int use_indirect, int can_push,
827                         int in_order)
828 {
829         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
830         struct vq_desc_extra *dxp;
831         struct virtqueue *vq = txvq->vq;
832         struct vring_desc *start_dp;
833         uint16_t seg_num = cookie->nb_segs;
834         uint16_t head_idx, idx;
835         int16_t head_size = vq->hw->vtnet_hdr_size;
836         bool prepend_header = false;
837         struct virtio_net_hdr *hdr;
838
839         head_idx = vq->vq_desc_head_idx;
840         idx = head_idx;
841         if (in_order)
842                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
843         else
844                 dxp = &vq->vq_descx[idx];
845         dxp->cookie = (void *)cookie;
846         dxp->ndescs = needed;
847
848         start_dp = vq->vq_split.ring.desc;
849
850         if (can_push) {
851                 /* prepend cannot fail, checked by caller */
852                 hdr = rte_pktmbuf_mtod_offset(cookie, struct virtio_net_hdr *,
853                                               -head_size);
854                 prepend_header = true;
855
856                 /* if offload disabled, it is not zeroed below, do it now */
857                 if (!vq->hw->has_tx_offload)
858                         virtqueue_clear_net_hdr(hdr);
859         } else if (use_indirect) {
860                 /* setup tx ring slot to point to indirect
861                  * descriptor list stored in reserved region.
862                  *
863                  * the first slot in indirect ring is already preset
864                  * to point to the header in reserved region
865                  */
866                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
867                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
868                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
869                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
870                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
871
872                 /* loop below will fill in rest of the indirect elements */
873                 start_dp = txr[idx].tx_indir;
874                 idx = 1;
875         } else {
876                 /* setup first tx ring slot to point to header
877                  * stored in reserved region.
878                  */
879                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
880                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
881                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
882                 start_dp[idx].flags = VRING_DESC_F_NEXT;
883                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
884
885                 idx = start_dp[idx].next;
886         }
887
888         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
889
890         do {
891                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
892                 start_dp[idx].len   = cookie->data_len;
893                 if (prepend_header) {
894                         start_dp[idx].addr -= head_size;
895                         start_dp[idx].len += head_size;
896                         prepend_header = false;
897                 }
898                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
899                 idx = start_dp[idx].next;
900         } while ((cookie = cookie->next) != NULL);
901
902         if (use_indirect)
903                 idx = vq->vq_split.ring.desc[head_idx].next;
904
905         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
906
907         vq->vq_desc_head_idx = idx;
908         vq_update_avail_ring(vq, head_idx);
909
910         if (!in_order) {
911                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
912                         vq->vq_desc_tail_idx = idx;
913         }
914 }
915
916 void
917 virtio_dev_cq_start(struct rte_eth_dev *dev)
918 {
919         struct virtio_hw *hw = dev->data->dev_private;
920
921         if (hw->cvq && hw->cvq->vq) {
922                 rte_spinlock_init(&hw->cvq->lock);
923                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
924         }
925 }
926
927 int
928 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
929                         uint16_t queue_idx,
930                         uint16_t nb_desc,
931                         unsigned int socket_id __rte_unused,
932                         const struct rte_eth_rxconf *rx_conf,
933                         struct rte_mempool *mp)
934 {
935         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
936         struct virtio_hw *hw = dev->data->dev_private;
937         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
938         struct virtnet_rx *rxvq;
939         uint16_t rx_free_thresh;
940
941         PMD_INIT_FUNC_TRACE();
942
943         if (rx_conf->rx_deferred_start) {
944                 PMD_INIT_LOG(ERR, "Rx deferred start is not supported");
945                 return -EINVAL;
946         }
947
948         rx_free_thresh = rx_conf->rx_free_thresh;
949         if (rx_free_thresh == 0)
950                 rx_free_thresh =
951                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_RX_FREE_THRESH);
952
953         if (rx_free_thresh & 0x3) {
954                 RTE_LOG(ERR, PMD, "rx_free_thresh must be multiples of four."
955                         " (rx_free_thresh=%u port=%u queue=%u)\n",
956                         rx_free_thresh, dev->data->port_id, queue_idx);
957                 return -EINVAL;
958         }
959
960         if (rx_free_thresh >= vq->vq_nentries) {
961                 RTE_LOG(ERR, PMD, "rx_free_thresh must be less than the "
962                         "number of RX entries (%u)."
963                         " (rx_free_thresh=%u port=%u queue=%u)\n",
964                         vq->vq_nentries,
965                         rx_free_thresh, dev->data->port_id, queue_idx);
966                 return -EINVAL;
967         }
968         vq->vq_free_thresh = rx_free_thresh;
969
970         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
971                 nb_desc = vq->vq_nentries;
972         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
973
974         rxvq = &vq->rxq;
975         rxvq->queue_id = queue_idx;
976         rxvq->mpool = mp;
977         dev->data->rx_queues[queue_idx] = rxvq;
978
979         return 0;
980 }
981
982 int
983 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
984 {
985         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
986         struct virtio_hw *hw = dev->data->dev_private;
987         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
988         struct virtnet_rx *rxvq = &vq->rxq;
989         struct rte_mbuf *m;
990         uint16_t desc_idx;
991         int error, nbufs, i;
992
993         PMD_INIT_FUNC_TRACE();
994
995         /* Allocate blank mbufs for the each rx descriptor */
996         nbufs = 0;
997
998         if (hw->use_simple_rx) {
999                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
1000                      desc_idx++) {
1001                         vq->vq_split.ring.avail->ring[desc_idx] = desc_idx;
1002                         vq->vq_split.ring.desc[desc_idx].flags =
1003                                 VRING_DESC_F_WRITE;
1004                 }
1005
1006                 virtio_rxq_vec_setup(rxvq);
1007         }
1008
1009         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
1010         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
1011              desc_idx++) {
1012                 vq->sw_ring[vq->vq_nentries + desc_idx] =
1013                         &rxvq->fake_mbuf;
1014         }
1015
1016         if (hw->use_simple_rx) {
1017                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
1018                         virtio_rxq_rearm_vec(rxvq);
1019                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
1020                 }
1021         } else if (hw->use_inorder_rx) {
1022                 if ((!virtqueue_full(vq))) {
1023                         uint16_t free_cnt = vq->vq_free_cnt;
1024                         struct rte_mbuf *pkts[free_cnt];
1025
1026                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
1027                                 free_cnt)) {
1028                                 error = virtqueue_enqueue_refill_inorder(vq,
1029                                                 pkts,
1030                                                 free_cnt);
1031                                 if (unlikely(error)) {
1032                                         for (i = 0; i < free_cnt; i++)
1033                                                 rte_pktmbuf_free(pkts[i]);
1034                                 }
1035                         }
1036
1037                         nbufs += free_cnt;
1038                         vq_update_avail_idx(vq);
1039                 }
1040         } else {
1041                 while (!virtqueue_full(vq)) {
1042                         m = rte_mbuf_raw_alloc(rxvq->mpool);
1043                         if (m == NULL)
1044                                 break;
1045
1046                         /* Enqueue allocated buffers */
1047                         if (vtpci_packed_queue(vq->hw))
1048                                 error = virtqueue_enqueue_recv_refill_packed(vq,
1049                                                 &m, 1);
1050                         else
1051                                 error = virtqueue_enqueue_recv_refill(vq,
1052                                                 &m, 1);
1053                         if (error) {
1054                                 rte_pktmbuf_free(m);
1055                                 break;
1056                         }
1057                         nbufs++;
1058                 }
1059
1060                 if (!vtpci_packed_queue(vq->hw))
1061                         vq_update_avail_idx(vq);
1062         }
1063
1064         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
1065
1066         VIRTQUEUE_DUMP(vq);
1067
1068         return 0;
1069 }
1070
1071 /*
1072  * struct rte_eth_dev *dev: Used to update dev
1073  * uint16_t nb_desc: Defaults to values read from config space
1074  * unsigned int socket_id: Used to allocate memzone
1075  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
1076  * uint16_t queue_idx: Just used as an index in dev txq list
1077  */
1078 int
1079 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
1080                         uint16_t queue_idx,
1081                         uint16_t nb_desc,
1082                         unsigned int socket_id __rte_unused,
1083                         const struct rte_eth_txconf *tx_conf)
1084 {
1085         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1086         struct virtio_hw *hw = dev->data->dev_private;
1087         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1088         struct virtnet_tx *txvq;
1089         uint16_t tx_free_thresh;
1090
1091         PMD_INIT_FUNC_TRACE();
1092
1093         if (tx_conf->tx_deferred_start) {
1094                 PMD_INIT_LOG(ERR, "Tx deferred start is not supported");
1095                 return -EINVAL;
1096         }
1097
1098         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
1099                 nb_desc = vq->vq_nentries;
1100         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
1101
1102         txvq = &vq->txq;
1103         txvq->queue_id = queue_idx;
1104
1105         tx_free_thresh = tx_conf->tx_free_thresh;
1106         if (tx_free_thresh == 0)
1107                 tx_free_thresh =
1108                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
1109
1110         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
1111                 PMD_DRV_LOG(ERR, "tx_free_thresh must be less than the "
1112                         "number of TX entries minus 3 (%u)."
1113                         " (tx_free_thresh=%u port=%u queue=%u)\n",
1114                         vq->vq_nentries - 3,
1115                         tx_free_thresh, dev->data->port_id, queue_idx);
1116                 return -EINVAL;
1117         }
1118
1119         vq->vq_free_thresh = tx_free_thresh;
1120
1121         dev->data->tx_queues[queue_idx] = txvq;
1122         return 0;
1123 }
1124
1125 int
1126 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
1127                                 uint16_t queue_idx)
1128 {
1129         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1130         struct virtio_hw *hw = dev->data->dev_private;
1131         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1132
1133         PMD_INIT_FUNC_TRACE();
1134
1135         if (!vtpci_packed_queue(hw)) {
1136                 if (hw->use_inorder_tx)
1137                         vq->vq_split.ring.desc[vq->vq_nentries - 1].next = 0;
1138         }
1139
1140         VIRTQUEUE_DUMP(vq);
1141
1142         return 0;
1143 }
1144
1145 static inline void
1146 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1147 {
1148         int error;
1149         /*
1150          * Requeue the discarded mbuf. This should always be
1151          * successful since it was just dequeued.
1152          */
1153         if (vtpci_packed_queue(vq->hw))
1154                 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1155         else
1156                 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1157
1158         if (unlikely(error)) {
1159                 PMD_DRV_LOG(ERR, "cannot requeue discarded mbuf");
1160                 rte_pktmbuf_free(m);
1161         }
1162 }
1163
1164 static inline void
1165 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1166 {
1167         int error;
1168
1169         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1170         if (unlikely(error)) {
1171                 PMD_DRV_LOG(ERR, "cannot requeue discarded mbuf");
1172                 rte_pktmbuf_free(m);
1173         }
1174 }
1175
1176 /* Optionally fill offload information in structure */
1177 static inline int
1178 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1179 {
1180         struct rte_net_hdr_lens hdr_lens;
1181         uint32_t hdrlen, ptype;
1182         int l4_supported = 0;
1183
1184         /* nothing to do */
1185         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1186                 return 0;
1187
1188         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1189
1190         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1191         m->packet_type = ptype;
1192         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1193             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1194             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1195                 l4_supported = 1;
1196
1197         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1198                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1199                 if (hdr->csum_start <= hdrlen && l4_supported) {
1200                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1201                 } else {
1202                         /* Unknown proto or tunnel, do sw cksum. We can assume
1203                          * the cksum field is in the first segment since the
1204                          * buffers we provided to the host are large enough.
1205                          * In case of SCTP, this will be wrong since it's a CRC
1206                          * but there's nothing we can do.
1207                          */
1208                         uint16_t csum = 0, off;
1209
1210                         rte_raw_cksum_mbuf(m, hdr->csum_start,
1211                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1212                                 &csum);
1213                         if (likely(csum != 0xffff))
1214                                 csum = ~csum;
1215                         off = hdr->csum_offset + hdr->csum_start;
1216                         if (rte_pktmbuf_data_len(m) >= off + 1)
1217                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1218                                         off) = csum;
1219                 }
1220         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1221                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1222         }
1223
1224         /* GSO request, save required information in mbuf */
1225         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1226                 /* Check unsupported modes */
1227                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1228                     (hdr->gso_size == 0)) {
1229                         return -EINVAL;
1230                 }
1231
1232                 /* Update mss lengthes in mbuf */
1233                 m->tso_segsz = hdr->gso_size;
1234                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1235                         case VIRTIO_NET_HDR_GSO_TCPV4:
1236                         case VIRTIO_NET_HDR_GSO_TCPV6:
1237                                 m->ol_flags |= PKT_RX_LRO | \
1238                                         PKT_RX_L4_CKSUM_NONE;
1239                                 break;
1240                         default:
1241                                 return -EINVAL;
1242                 }
1243         }
1244
1245         return 0;
1246 }
1247
1248 #define VIRTIO_MBUF_BURST_SZ 64
1249 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1250 uint16_t
1251 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1252 {
1253         struct virtnet_rx *rxvq = rx_queue;
1254         struct virtqueue *vq = rxvq->vq;
1255         struct virtio_hw *hw = vq->hw;
1256         struct rte_mbuf *rxm;
1257         uint16_t nb_used, num, nb_rx;
1258         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1259         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1260         int error;
1261         uint32_t i, nb_enqueued;
1262         uint32_t hdr_size;
1263         struct virtio_net_hdr *hdr;
1264
1265         nb_rx = 0;
1266         if (unlikely(hw->started == 0))
1267                 return nb_rx;
1268
1269         nb_used = VIRTQUEUE_NUSED(vq);
1270
1271         virtio_rmb(hw->weak_barriers);
1272
1273         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1274         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1275                 num = VIRTIO_MBUF_BURST_SZ;
1276         if (likely(num > DESC_PER_CACHELINE))
1277                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1278
1279         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1280         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1281
1282         nb_enqueued = 0;
1283         hdr_size = hw->vtnet_hdr_size;
1284
1285         for (i = 0; i < num ; i++) {
1286                 rxm = rcv_pkts[i];
1287
1288                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1289
1290                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1291                         PMD_RX_LOG(ERR, "Packet drop");
1292                         nb_enqueued++;
1293                         virtio_discard_rxbuf(vq, rxm);
1294                         rxvq->stats.errors++;
1295                         continue;
1296                 }
1297
1298                 rxm->port = rxvq->port_id;
1299                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1300                 rxm->ol_flags = 0;
1301                 rxm->vlan_tci = 0;
1302
1303                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1304                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1305
1306                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1307                         RTE_PKTMBUF_HEADROOM - hdr_size);
1308
1309                 if (hw->vlan_strip)
1310                         rte_vlan_strip(rxm);
1311
1312                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1313                         virtio_discard_rxbuf(vq, rxm);
1314                         rxvq->stats.errors++;
1315                         continue;
1316                 }
1317
1318                 virtio_rx_stats_updated(rxvq, rxm);
1319
1320                 rx_pkts[nb_rx++] = rxm;
1321         }
1322
1323         rxvq->stats.packets += nb_rx;
1324
1325         /* Allocate new mbuf for the used descriptor */
1326         if (likely(!virtqueue_full(vq))) {
1327                 uint16_t free_cnt = vq->vq_free_cnt;
1328                 struct rte_mbuf *new_pkts[free_cnt];
1329
1330                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1331                                                 free_cnt) == 0)) {
1332                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1333                                         free_cnt);
1334                         if (unlikely(error)) {
1335                                 for (i = 0; i < free_cnt; i++)
1336                                         rte_pktmbuf_free(new_pkts[i]);
1337                         }
1338                         nb_enqueued += free_cnt;
1339                 } else {
1340                         struct rte_eth_dev *dev =
1341                                 &rte_eth_devices[rxvq->port_id];
1342                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1343                 }
1344         }
1345
1346         if (likely(nb_enqueued)) {
1347                 vq_update_avail_idx(vq);
1348
1349                 if (unlikely(virtqueue_kick_prepare(vq))) {
1350                         virtqueue_notify(vq);
1351                         PMD_RX_LOG(DEBUG, "Notified");
1352                 }
1353         }
1354
1355         return nb_rx;
1356 }
1357
1358 uint16_t
1359 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1360                         uint16_t nb_pkts)
1361 {
1362         struct virtnet_rx *rxvq = rx_queue;
1363         struct virtqueue *vq = rxvq->vq;
1364         struct virtio_hw *hw = vq->hw;
1365         struct rte_mbuf *rxm;
1366         uint16_t num, nb_rx;
1367         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1368         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1369         int error;
1370         uint32_t i, nb_enqueued;
1371         uint32_t hdr_size;
1372         struct virtio_net_hdr *hdr;
1373
1374         nb_rx = 0;
1375         if (unlikely(hw->started == 0))
1376                 return nb_rx;
1377
1378         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1379         if (likely(num > DESC_PER_CACHELINE))
1380                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1381
1382         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1383         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1384
1385         nb_enqueued = 0;
1386         hdr_size = hw->vtnet_hdr_size;
1387
1388         for (i = 0; i < num; i++) {
1389                 rxm = rcv_pkts[i];
1390
1391                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1392
1393                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1394                         PMD_RX_LOG(ERR, "Packet drop");
1395                         nb_enqueued++;
1396                         virtio_discard_rxbuf(vq, rxm);
1397                         rxvq->stats.errors++;
1398                         continue;
1399                 }
1400
1401                 rxm->port = rxvq->port_id;
1402                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1403                 rxm->ol_flags = 0;
1404                 rxm->vlan_tci = 0;
1405
1406                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1407                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1408
1409                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1410                         RTE_PKTMBUF_HEADROOM - hdr_size);
1411
1412                 if (hw->vlan_strip)
1413                         rte_vlan_strip(rxm);
1414
1415                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1416                         virtio_discard_rxbuf(vq, rxm);
1417                         rxvq->stats.errors++;
1418                         continue;
1419                 }
1420
1421                 virtio_rx_stats_updated(rxvq, rxm);
1422
1423                 rx_pkts[nb_rx++] = rxm;
1424         }
1425
1426         rxvq->stats.packets += nb_rx;
1427
1428         /* Allocate new mbuf for the used descriptor */
1429         if (likely(!virtqueue_full(vq))) {
1430                 uint16_t free_cnt = vq->vq_free_cnt;
1431                 struct rte_mbuf *new_pkts[free_cnt];
1432
1433                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1434                                                 free_cnt) == 0)) {
1435                         error = virtqueue_enqueue_recv_refill_packed(vq,
1436                                         new_pkts, free_cnt);
1437                         if (unlikely(error)) {
1438                                 for (i = 0; i < free_cnt; i++)
1439                                         rte_pktmbuf_free(new_pkts[i]);
1440                         }
1441                         nb_enqueued += free_cnt;
1442                 } else {
1443                         struct rte_eth_dev *dev =
1444                                 &rte_eth_devices[rxvq->port_id];
1445                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1446                 }
1447         }
1448
1449         if (likely(nb_enqueued)) {
1450                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1451                         virtqueue_notify(vq);
1452                         PMD_RX_LOG(DEBUG, "Notified");
1453                 }
1454         }
1455
1456         return nb_rx;
1457 }
1458
1459
1460 uint16_t
1461 virtio_recv_pkts_inorder(void *rx_queue,
1462                         struct rte_mbuf **rx_pkts,
1463                         uint16_t nb_pkts)
1464 {
1465         struct virtnet_rx *rxvq = rx_queue;
1466         struct virtqueue *vq = rxvq->vq;
1467         struct virtio_hw *hw = vq->hw;
1468         struct rte_mbuf *rxm;
1469         struct rte_mbuf *prev = NULL;
1470         uint16_t nb_used, num, nb_rx;
1471         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1472         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1473         int error;
1474         uint32_t nb_enqueued;
1475         uint32_t seg_num;
1476         uint32_t seg_res;
1477         uint32_t hdr_size;
1478         int32_t i;
1479
1480         nb_rx = 0;
1481         if (unlikely(hw->started == 0))
1482                 return nb_rx;
1483
1484         nb_used = VIRTQUEUE_NUSED(vq);
1485         nb_used = RTE_MIN(nb_used, nb_pkts);
1486         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1487
1488         virtio_rmb(hw->weak_barriers);
1489
1490         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1491
1492         nb_enqueued = 0;
1493         seg_num = 1;
1494         seg_res = 0;
1495         hdr_size = hw->vtnet_hdr_size;
1496
1497         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1498
1499         for (i = 0; i < num; i++) {
1500                 struct virtio_net_hdr_mrg_rxbuf *header;
1501
1502                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1503                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1504
1505                 rxm = rcv_pkts[i];
1506
1507                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1508                         PMD_RX_LOG(ERR, "Packet drop");
1509                         nb_enqueued++;
1510                         virtio_discard_rxbuf_inorder(vq, rxm);
1511                         rxvq->stats.errors++;
1512                         continue;
1513                 }
1514
1515                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1516                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1517                          - hdr_size);
1518
1519                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1520                         seg_num = header->num_buffers;
1521                         if (seg_num == 0)
1522                                 seg_num = 1;
1523                 } else {
1524                         seg_num = 1;
1525                 }
1526
1527                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1528                 rxm->nb_segs = seg_num;
1529                 rxm->ol_flags = 0;
1530                 rxm->vlan_tci = 0;
1531                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1532                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1533
1534                 rxm->port = rxvq->port_id;
1535
1536                 rx_pkts[nb_rx] = rxm;
1537                 prev = rxm;
1538
1539                 if (vq->hw->has_rx_offload &&
1540                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1541                         virtio_discard_rxbuf_inorder(vq, rxm);
1542                         rxvq->stats.errors++;
1543                         continue;
1544                 }
1545
1546                 if (hw->vlan_strip)
1547                         rte_vlan_strip(rx_pkts[nb_rx]);
1548
1549                 seg_res = seg_num - 1;
1550
1551                 /* Merge remaining segments */
1552                 while (seg_res != 0 && i < (num - 1)) {
1553                         i++;
1554
1555                         rxm = rcv_pkts[i];
1556                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1557                         rxm->pkt_len = (uint32_t)(len[i]);
1558                         rxm->data_len = (uint16_t)(len[i]);
1559
1560                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1561
1562                         prev->next = rxm;
1563                         prev = rxm;
1564                         seg_res -= 1;
1565                 }
1566
1567                 if (!seg_res) {
1568                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1569                         nb_rx++;
1570                 }
1571         }
1572
1573         /* Last packet still need merge segments */
1574         while (seg_res != 0) {
1575                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1576                                         VIRTIO_MBUF_BURST_SZ);
1577
1578                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1579                         virtio_rmb(hw->weak_barriers);
1580                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1581                                                            rcv_cnt);
1582                         uint16_t extra_idx = 0;
1583
1584                         rcv_cnt = num;
1585                         while (extra_idx < rcv_cnt) {
1586                                 rxm = rcv_pkts[extra_idx];
1587                                 rxm->data_off =
1588                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1589                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1590                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1591                                 prev->next = rxm;
1592                                 prev = rxm;
1593                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1594                                 extra_idx += 1;
1595                         };
1596                         seg_res -= rcv_cnt;
1597
1598                         if (!seg_res) {
1599                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1600                                 nb_rx++;
1601                         }
1602                 } else {
1603                         PMD_RX_LOG(ERR,
1604                                         "No enough segments for packet.");
1605                         rte_pktmbuf_free(rx_pkts[nb_rx]);
1606                         rxvq->stats.errors++;
1607                         break;
1608                 }
1609         }
1610
1611         rxvq->stats.packets += nb_rx;
1612
1613         /* Allocate new mbuf for the used descriptor */
1614
1615         if (likely(!virtqueue_full(vq))) {
1616                 /* free_cnt may include mrg descs */
1617                 uint16_t free_cnt = vq->vq_free_cnt;
1618                 struct rte_mbuf *new_pkts[free_cnt];
1619
1620                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1621                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1622                                         free_cnt);
1623                         if (unlikely(error)) {
1624                                 for (i = 0; i < free_cnt; i++)
1625                                         rte_pktmbuf_free(new_pkts[i]);
1626                         }
1627                         nb_enqueued += free_cnt;
1628                 } else {
1629                         struct rte_eth_dev *dev =
1630                                 &rte_eth_devices[rxvq->port_id];
1631                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1632                 }
1633         }
1634
1635         if (likely(nb_enqueued)) {
1636                 vq_update_avail_idx(vq);
1637
1638                 if (unlikely(virtqueue_kick_prepare(vq))) {
1639                         virtqueue_notify(vq);
1640                         PMD_RX_LOG(DEBUG, "Notified");
1641                 }
1642         }
1643
1644         return nb_rx;
1645 }
1646
1647 uint16_t
1648 virtio_recv_mergeable_pkts(void *rx_queue,
1649                         struct rte_mbuf **rx_pkts,
1650                         uint16_t nb_pkts)
1651 {
1652         struct virtnet_rx *rxvq = rx_queue;
1653         struct virtqueue *vq = rxvq->vq;
1654         struct virtio_hw *hw = vq->hw;
1655         struct rte_mbuf *rxm;
1656         struct rte_mbuf *prev = NULL;
1657         uint16_t nb_used, num, nb_rx = 0;
1658         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1659         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1660         int error;
1661         uint32_t nb_enqueued = 0;
1662         uint32_t seg_num = 0;
1663         uint32_t seg_res = 0;
1664         uint32_t hdr_size = hw->vtnet_hdr_size;
1665         int32_t i;
1666
1667         if (unlikely(hw->started == 0))
1668                 return nb_rx;
1669
1670         nb_used = VIRTQUEUE_NUSED(vq);
1671
1672         virtio_rmb(hw->weak_barriers);
1673
1674         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1675
1676         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1677         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1678                 num = VIRTIO_MBUF_BURST_SZ;
1679         if (likely(num > DESC_PER_CACHELINE))
1680                 num = num - ((vq->vq_used_cons_idx + num) %
1681                                 DESC_PER_CACHELINE);
1682
1683
1684         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1685
1686         for (i = 0; i < num; i++) {
1687                 struct virtio_net_hdr_mrg_rxbuf *header;
1688
1689                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1690                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1691
1692                 rxm = rcv_pkts[i];
1693
1694                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1695                         PMD_RX_LOG(ERR, "Packet drop");
1696                         nb_enqueued++;
1697                         virtio_discard_rxbuf(vq, rxm);
1698                         rxvq->stats.errors++;
1699                         continue;
1700                 }
1701
1702                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1703                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1704                          - hdr_size);
1705                 seg_num = header->num_buffers;
1706                 if (seg_num == 0)
1707                         seg_num = 1;
1708
1709                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1710                 rxm->nb_segs = seg_num;
1711                 rxm->ol_flags = 0;
1712                 rxm->vlan_tci = 0;
1713                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1714                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1715
1716                 rxm->port = rxvq->port_id;
1717
1718                 rx_pkts[nb_rx] = rxm;
1719                 prev = rxm;
1720
1721                 if (hw->has_rx_offload &&
1722                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1723                         virtio_discard_rxbuf(vq, rxm);
1724                         rxvq->stats.errors++;
1725                         continue;
1726                 }
1727
1728                 if (hw->vlan_strip)
1729                         rte_vlan_strip(rx_pkts[nb_rx]);
1730
1731                 seg_res = seg_num - 1;
1732
1733                 /* Merge remaining segments */
1734                 while (seg_res != 0 && i < (num - 1)) {
1735                         i++;
1736
1737                         rxm = rcv_pkts[i];
1738                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1739                         rxm->pkt_len = (uint32_t)(len[i]);
1740                         rxm->data_len = (uint16_t)(len[i]);
1741
1742                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1743
1744                         prev->next = rxm;
1745                         prev = rxm;
1746                         seg_res -= 1;
1747                 }
1748
1749                 if (!seg_res) {
1750                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1751                         nb_rx++;
1752                 }
1753         }
1754
1755         /* Last packet still need merge segments */
1756         while (seg_res != 0) {
1757                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1758                                         VIRTIO_MBUF_BURST_SZ);
1759
1760                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1761                         virtio_rmb(hw->weak_barriers);
1762                         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1763                                                            rcv_cnt);
1764                         uint16_t extra_idx = 0;
1765
1766                         rcv_cnt = num;
1767                         while (extra_idx < rcv_cnt) {
1768                                 rxm = rcv_pkts[extra_idx];
1769                                 rxm->data_off =
1770                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1771                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1772                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1773                                 prev->next = rxm;
1774                                 prev = rxm;
1775                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1776                                 extra_idx += 1;
1777                         };
1778                         seg_res -= rcv_cnt;
1779
1780                         if (!seg_res) {
1781                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1782                                 nb_rx++;
1783                         }
1784                 } else {
1785                         PMD_RX_LOG(ERR,
1786                                         "No enough segments for packet.");
1787                         rte_pktmbuf_free(rx_pkts[nb_rx]);
1788                         rxvq->stats.errors++;
1789                         break;
1790                 }
1791         }
1792
1793         rxvq->stats.packets += nb_rx;
1794
1795         /* Allocate new mbuf for the used descriptor */
1796         if (likely(!virtqueue_full(vq))) {
1797                 /* free_cnt may include mrg descs */
1798                 uint16_t free_cnt = vq->vq_free_cnt;
1799                 struct rte_mbuf *new_pkts[free_cnt];
1800
1801                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1802                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1803                                         free_cnt);
1804                         if (unlikely(error)) {
1805                                 for (i = 0; i < free_cnt; i++)
1806                                         rte_pktmbuf_free(new_pkts[i]);
1807                         }
1808                         nb_enqueued += free_cnt;
1809                 } else {
1810                         struct rte_eth_dev *dev =
1811                                 &rte_eth_devices[rxvq->port_id];
1812                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1813                 }
1814         }
1815
1816         if (likely(nb_enqueued)) {
1817                 vq_update_avail_idx(vq);
1818
1819                 if (unlikely(virtqueue_kick_prepare(vq))) {
1820                         virtqueue_notify(vq);
1821                         PMD_RX_LOG(DEBUG, "Notified");
1822                 }
1823         }
1824
1825         return nb_rx;
1826 }
1827
1828 uint16_t
1829 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1830                         struct rte_mbuf **rx_pkts,
1831                         uint16_t nb_pkts)
1832 {
1833         struct virtnet_rx *rxvq = rx_queue;
1834         struct virtqueue *vq = rxvq->vq;
1835         struct virtio_hw *hw = vq->hw;
1836         struct rte_mbuf *rxm;
1837         struct rte_mbuf *prev = NULL;
1838         uint16_t num, nb_rx = 0;
1839         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1840         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1841         uint32_t nb_enqueued = 0;
1842         uint32_t seg_num = 0;
1843         uint32_t seg_res = 0;
1844         uint32_t hdr_size = hw->vtnet_hdr_size;
1845         int32_t i;
1846         int error;
1847
1848         if (unlikely(hw->started == 0))
1849                 return nb_rx;
1850
1851
1852         num = nb_pkts;
1853         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1854                 num = VIRTIO_MBUF_BURST_SZ;
1855         if (likely(num > DESC_PER_CACHELINE))
1856                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1857
1858         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1859
1860         for (i = 0; i < num; i++) {
1861                 struct virtio_net_hdr_mrg_rxbuf *header;
1862
1863                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1864                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1865
1866                 rxm = rcv_pkts[i];
1867
1868                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1869                         PMD_RX_LOG(ERR, "Packet drop");
1870                         nb_enqueued++;
1871                         virtio_discard_rxbuf(vq, rxm);
1872                         rxvq->stats.errors++;
1873                         continue;
1874                 }
1875
1876                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1877                           rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1878                 seg_num = header->num_buffers;
1879
1880                 if (seg_num == 0)
1881                         seg_num = 1;
1882
1883                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1884                 rxm->nb_segs = seg_num;
1885                 rxm->ol_flags = 0;
1886                 rxm->vlan_tci = 0;
1887                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1888                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1889
1890                 rxm->port = rxvq->port_id;
1891                 rx_pkts[nb_rx] = rxm;
1892                 prev = rxm;
1893
1894                 if (hw->has_rx_offload &&
1895                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1896                         virtio_discard_rxbuf(vq, rxm);
1897                         rxvq->stats.errors++;
1898                         continue;
1899                 }
1900
1901                 if (hw->vlan_strip)
1902                         rte_vlan_strip(rx_pkts[nb_rx]);
1903
1904                 seg_res = seg_num - 1;
1905
1906                 /* Merge remaining segments */
1907                 while (seg_res != 0 && i < (num - 1)) {
1908                         i++;
1909
1910                         rxm = rcv_pkts[i];
1911                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1912                         rxm->pkt_len = (uint32_t)(len[i]);
1913                         rxm->data_len = (uint16_t)(len[i]);
1914
1915                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1916
1917                         prev->next = rxm;
1918                         prev = rxm;
1919                         seg_res -= 1;
1920                 }
1921
1922                 if (!seg_res) {
1923                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1924                         nb_rx++;
1925                 }
1926         }
1927
1928         /* Last packet still need merge segments */
1929         while (seg_res != 0) {
1930                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1931                                         VIRTIO_MBUF_BURST_SZ);
1932                 uint16_t extra_idx = 0;
1933
1934                 rcv_cnt = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1935                                 len, rcv_cnt);
1936                 if (unlikely(rcv_cnt == 0)) {
1937                         PMD_RX_LOG(ERR, "No enough segments for packet.");
1938                         rte_pktmbuf_free(rx_pkts[nb_rx]);
1939                         rxvq->stats.errors++;
1940                         break;
1941                 }
1942
1943                 while (extra_idx < rcv_cnt) {
1944                         rxm = rcv_pkts[extra_idx];
1945
1946                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1947                         rxm->pkt_len = (uint32_t)(len[extra_idx]);
1948                         rxm->data_len = (uint16_t)(len[extra_idx]);
1949
1950                         prev->next = rxm;
1951                         prev = rxm;
1952                         rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1953                         extra_idx += 1;
1954                 }
1955                 seg_res -= rcv_cnt;
1956                 if (!seg_res) {
1957                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1958                         nb_rx++;
1959                 }
1960         }
1961
1962         rxvq->stats.packets += nb_rx;
1963
1964         /* Allocate new mbuf for the used descriptor */
1965         if (likely(!virtqueue_full(vq))) {
1966                 /* free_cnt may include mrg descs */
1967                 uint16_t free_cnt = vq->vq_free_cnt;
1968                 struct rte_mbuf *new_pkts[free_cnt];
1969
1970                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1971                         error = virtqueue_enqueue_recv_refill_packed(vq,
1972                                         new_pkts, free_cnt);
1973                         if (unlikely(error)) {
1974                                 for (i = 0; i < free_cnt; i++)
1975                                         rte_pktmbuf_free(new_pkts[i]);
1976                         }
1977                         nb_enqueued += free_cnt;
1978                 } else {
1979                         struct rte_eth_dev *dev =
1980                                 &rte_eth_devices[rxvq->port_id];
1981                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1982                 }
1983         }
1984
1985         if (likely(nb_enqueued)) {
1986                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1987                         virtqueue_notify(vq);
1988                         PMD_RX_LOG(DEBUG, "Notified");
1989                 }
1990         }
1991
1992         return nb_rx;
1993 }
1994
1995 uint16_t
1996 virtio_xmit_pkts_prepare(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts,
1997                         uint16_t nb_pkts)
1998 {
1999         uint16_t nb_tx;
2000         int error;
2001
2002         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2003                 struct rte_mbuf *m = tx_pkts[nb_tx];
2004
2005 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
2006                 error = rte_validate_tx_offload(m);
2007                 if (unlikely(error)) {
2008                         rte_errno = -error;
2009                         break;
2010                 }
2011 #endif
2012
2013                 /* Do VLAN tag insertion */
2014                 if (unlikely(m->ol_flags & PKT_TX_VLAN_PKT)) {
2015                         error = rte_vlan_insert(&m);
2016                         /* rte_vlan_insert() may change pointer
2017                          * even in the case of failure
2018                          */
2019                         tx_pkts[nb_tx] = m;
2020
2021                         if (unlikely(error)) {
2022                                 rte_errno = -error;
2023                                 break;
2024                         }
2025                 }
2026
2027                 error = rte_net_intel_cksum_prepare(m);
2028                 if (unlikely(error)) {
2029                         rte_errno = -error;
2030                         break;
2031                 }
2032
2033                 if (m->ol_flags & PKT_TX_TCP_SEG)
2034                         virtio_tso_fix_cksum(m);
2035         }
2036
2037         return nb_tx;
2038 }
2039
2040 uint16_t
2041 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
2042                         uint16_t nb_pkts)
2043 {
2044         struct virtnet_tx *txvq = tx_queue;
2045         struct virtqueue *vq = txvq->vq;
2046         struct virtio_hw *hw = vq->hw;
2047         uint16_t hdr_size = hw->vtnet_hdr_size;
2048         uint16_t nb_tx = 0;
2049         bool in_order = hw->use_inorder_tx;
2050
2051         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2052                 return nb_tx;
2053
2054         if (unlikely(nb_pkts < 1))
2055                 return nb_pkts;
2056
2057         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2058
2059         if (nb_pkts > vq->vq_free_cnt)
2060                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
2061                                            in_order);
2062
2063         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2064                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2065                 int can_push = 0, slots, need;
2066
2067                 /* optimize ring usage */
2068                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2069                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2070                     rte_mbuf_refcnt_read(txm) == 1 &&
2071                     RTE_MBUF_DIRECT(txm) &&
2072                     txm->nb_segs == 1 &&
2073                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2074                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2075                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2076                         can_push = 1;
2077
2078                 /* How many main ring entries are needed to this Tx?
2079                  * any_layout => number of segments
2080                  * default    => number of segments + 1
2081                  */
2082                 slots = txm->nb_segs + !can_push;
2083                 need = slots - vq->vq_free_cnt;
2084
2085                 /* Positive value indicates it need free vring descriptors */
2086                 if (unlikely(need > 0)) {
2087                         virtio_xmit_cleanup_packed(vq, need, in_order);
2088                         need = slots - vq->vq_free_cnt;
2089                         if (unlikely(need > 0)) {
2090                                 PMD_TX_LOG(ERR,
2091                                            "No free tx descriptors to transmit");
2092                                 break;
2093                         }
2094                 }
2095
2096                 /* Enqueue Packet buffers */
2097                 if (can_push)
2098                         virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
2099                 else
2100                         virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
2101                                                       in_order);
2102
2103                 virtio_update_packet_stats(&txvq->stats, txm);
2104         }
2105
2106         txvq->stats.packets += nb_tx;
2107
2108         if (likely(nb_tx)) {
2109                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
2110                         virtqueue_notify(vq);
2111                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2112                 }
2113         }
2114
2115         return nb_tx;
2116 }
2117
2118 uint16_t
2119 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2120 {
2121         struct virtnet_tx *txvq = tx_queue;
2122         struct virtqueue *vq = txvq->vq;
2123         struct virtio_hw *hw = vq->hw;
2124         uint16_t hdr_size = hw->vtnet_hdr_size;
2125         uint16_t nb_used, nb_tx = 0;
2126
2127         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2128                 return nb_tx;
2129
2130         if (unlikely(nb_pkts < 1))
2131                 return nb_pkts;
2132
2133         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2134         nb_used = VIRTQUEUE_NUSED(vq);
2135
2136         virtio_rmb(hw->weak_barriers);
2137         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2138                 virtio_xmit_cleanup(vq, nb_used);
2139
2140         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2141                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2142                 int can_push = 0, use_indirect = 0, slots, need;
2143
2144                 /* optimize ring usage */
2145                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2146                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2147                     rte_mbuf_refcnt_read(txm) == 1 &&
2148                     RTE_MBUF_DIRECT(txm) &&
2149                     txm->nb_segs == 1 &&
2150                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2151                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2152                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2153                         can_push = 1;
2154                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2155                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2156                         use_indirect = 1;
2157
2158                 /* How many main ring entries are needed to this Tx?
2159                  * any_layout => number of segments
2160                  * indirect   => 1
2161                  * default    => number of segments + 1
2162                  */
2163                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2164                 need = slots - vq->vq_free_cnt;
2165
2166                 /* Positive value indicates it need free vring descriptors */
2167                 if (unlikely(need > 0)) {
2168                         nb_used = VIRTQUEUE_NUSED(vq);
2169                         virtio_rmb(hw->weak_barriers);
2170                         need = RTE_MIN(need, (int)nb_used);
2171
2172                         virtio_xmit_cleanup(vq, need);
2173                         need = slots - vq->vq_free_cnt;
2174                         if (unlikely(need > 0)) {
2175                                 PMD_TX_LOG(ERR,
2176                                            "No free tx descriptors to transmit");
2177                                 break;
2178                         }
2179                 }
2180
2181                 /* Enqueue Packet buffers */
2182                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2183                         can_push, 0);
2184
2185                 virtio_update_packet_stats(&txvq->stats, txm);
2186         }
2187
2188         txvq->stats.packets += nb_tx;
2189
2190         if (likely(nb_tx)) {
2191                 vq_update_avail_idx(vq);
2192
2193                 if (unlikely(virtqueue_kick_prepare(vq))) {
2194                         virtqueue_notify(vq);
2195                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2196                 }
2197         }
2198
2199         return nb_tx;
2200 }
2201
2202 static __rte_always_inline int
2203 virtio_xmit_try_cleanup_inorder(struct virtqueue *vq, uint16_t need)
2204 {
2205         uint16_t nb_used, nb_clean, nb_descs;
2206         struct virtio_hw *hw = vq->hw;
2207
2208         nb_descs = vq->vq_free_cnt + need;
2209         nb_used = VIRTQUEUE_NUSED(vq);
2210         virtio_rmb(hw->weak_barriers);
2211         nb_clean = RTE_MIN(need, (int)nb_used);
2212
2213         virtio_xmit_cleanup_inorder(vq, nb_clean);
2214
2215         return nb_descs - vq->vq_free_cnt;
2216 }
2217
2218 uint16_t
2219 virtio_xmit_pkts_inorder(void *tx_queue,
2220                         struct rte_mbuf **tx_pkts,
2221                         uint16_t nb_pkts)
2222 {
2223         struct virtnet_tx *txvq = tx_queue;
2224         struct virtqueue *vq = txvq->vq;
2225         struct virtio_hw *hw = vq->hw;
2226         uint16_t hdr_size = hw->vtnet_hdr_size;
2227         uint16_t nb_used, nb_tx = 0, nb_inorder_pkts = 0;
2228         struct rte_mbuf *inorder_pkts[nb_pkts];
2229         int need;
2230
2231         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2232                 return nb_tx;
2233
2234         if (unlikely(nb_pkts < 1))
2235                 return nb_pkts;
2236
2237         VIRTQUEUE_DUMP(vq);
2238         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2239         nb_used = VIRTQUEUE_NUSED(vq);
2240
2241         virtio_rmb(hw->weak_barriers);
2242         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2243                 virtio_xmit_cleanup_inorder(vq, nb_used);
2244
2245         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2246                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2247                 int slots;
2248
2249                 /* optimize ring usage */
2250                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2251                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2252                      rte_mbuf_refcnt_read(txm) == 1 &&
2253                      RTE_MBUF_DIRECT(txm) &&
2254                      txm->nb_segs == 1 &&
2255                      rte_pktmbuf_headroom(txm) >= hdr_size &&
2256                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2257                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2258                         inorder_pkts[nb_inorder_pkts] = txm;
2259                         nb_inorder_pkts++;
2260
2261                         continue;
2262                 }
2263
2264                 if (nb_inorder_pkts) {
2265                         need = nb_inorder_pkts - vq->vq_free_cnt;
2266                         if (unlikely(need > 0)) {
2267                                 need = virtio_xmit_try_cleanup_inorder(vq,
2268                                                                        need);
2269                                 if (unlikely(need > 0)) {
2270                                         PMD_TX_LOG(ERR,
2271                                                 "No free tx descriptors to "
2272                                                 "transmit");
2273                                         break;
2274                                 }
2275                         }
2276                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2277                                                         nb_inorder_pkts);
2278                         nb_inorder_pkts = 0;
2279                 }
2280
2281                 slots = txm->nb_segs + 1;
2282                 need = slots - vq->vq_free_cnt;
2283                 if (unlikely(need > 0)) {
2284                         need = virtio_xmit_try_cleanup_inorder(vq, slots);
2285
2286                         if (unlikely(need > 0)) {
2287                                 PMD_TX_LOG(ERR,
2288                                         "No free tx descriptors to transmit");
2289                                 break;
2290                         }
2291                 }
2292                 /* Enqueue Packet buffers */
2293                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2294
2295                 virtio_update_packet_stats(&txvq->stats, txm);
2296         }
2297
2298         /* Transmit all inorder packets */
2299         if (nb_inorder_pkts) {
2300                 need = nb_inorder_pkts - vq->vq_free_cnt;
2301                 if (unlikely(need > 0)) {
2302                         need = virtio_xmit_try_cleanup_inorder(vq,
2303                                                                   need);
2304                         if (unlikely(need > 0)) {
2305                                 PMD_TX_LOG(ERR,
2306                                         "No free tx descriptors to transmit");
2307                                 nb_inorder_pkts = vq->vq_free_cnt;
2308                                 nb_tx -= need;
2309                         }
2310                 }
2311
2312                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2313                                                 nb_inorder_pkts);
2314         }
2315
2316         txvq->stats.packets += nb_tx;
2317
2318         if (likely(nb_tx)) {
2319                 vq_update_avail_idx(vq);
2320
2321                 if (unlikely(virtqueue_kick_prepare(vq))) {
2322                         virtqueue_notify(vq);
2323                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2324                 }
2325         }
2326
2327         VIRTQUEUE_DUMP(vq);
2328
2329         return nb_tx;
2330 }