net/virtio: fix mbuf data and packet length mismatch
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45         struct virtnet_rx *rxvq = rxq;
46         struct virtqueue *vq = rxvq->vq;
47
48         return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54         vq->vq_free_cnt += num;
55         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61         struct vring_desc *dp, *dp_tail;
62         struct vq_desc_extra *dxp;
63         uint16_t desc_idx_last = desc_idx;
64
65         dp  = &vq->vq_split.ring.desc[desc_idx];
66         dxp = &vq->vq_descx[desc_idx];
67         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69                 while (dp->flags & VRING_DESC_F_NEXT) {
70                         desc_idx_last = dp->next;
71                         dp = &vq->vq_split.ring.desc[dp->next];
72                 }
73         }
74         dxp->ndescs = 0;
75
76         /*
77          * We must append the existing free chain, if any, to the end of
78          * newly freed chain. If the virtqueue was completely used, then
79          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80          */
81         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82                 vq->vq_desc_head_idx = desc_idx;
83         } else {
84                 dp_tail = &vq->vq_split.ring.desc[vq->vq_desc_tail_idx];
85                 dp_tail->next = desc_idx;
86         }
87
88         vq->vq_desc_tail_idx = desc_idx_last;
89         dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95         struct vq_desc_extra *dxp;
96
97         dxp = &vq->vq_descx[id];
98         vq->vq_free_cnt += dxp->ndescs;
99
100         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101                 vq->vq_desc_head_idx = id;
102         else
103                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104
105         vq->vq_desc_tail_idx = id;
106         dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108
109 void
110 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
111 {
112         uint32_t s = mbuf->pkt_len;
113         struct rte_ether_addr *ea;
114
115         stats->bytes += s;
116
117         if (s == 64) {
118                 stats->size_bins[1]++;
119         } else if (s > 64 && s < 1024) {
120                 uint32_t bin;
121
122                 /* count zeros, and offset into correct bin */
123                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
124                 stats->size_bins[bin]++;
125         } else {
126                 if (s < 64)
127                         stats->size_bins[0]++;
128                 else if (s < 1519)
129                         stats->size_bins[6]++;
130                 else
131                         stats->size_bins[7]++;
132         }
133
134         ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
135         if (rte_is_multicast_ether_addr(ea)) {
136                 if (rte_is_broadcast_ether_addr(ea))
137                         stats->broadcast++;
138                 else
139                         stats->multicast++;
140         }
141 }
142
143 static inline void
144 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
145 {
146         VIRTIO_DUMP_PACKET(m, m->data_len);
147
148         virtio_update_packet_stats(&rxvq->stats, m);
149 }
150
151 static uint16_t
152 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
153                                   struct rte_mbuf **rx_pkts,
154                                   uint32_t *len,
155                                   uint16_t num)
156 {
157         struct rte_mbuf *cookie;
158         uint16_t used_idx;
159         uint16_t id;
160         struct vring_packed_desc *desc;
161         uint16_t i;
162
163         desc = vq->vq_packed.ring.desc;
164
165         for (i = 0; i < num; i++) {
166                 used_idx = vq->vq_used_cons_idx;
167                 if (!desc_is_used(&desc[used_idx], vq))
168                         return i;
169                 virtio_rmb(vq->hw->weak_barriers);
170                 len[i] = desc[used_idx].len;
171                 id = desc[used_idx].id;
172                 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
173                 if (unlikely(cookie == NULL)) {
174                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
175                                 vq->vq_used_cons_idx);
176                         break;
177                 }
178                 rte_prefetch0(cookie);
179                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
180                 rx_pkts[i] = cookie;
181
182                 vq->vq_free_cnt++;
183                 vq->vq_used_cons_idx++;
184                 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
185                         vq->vq_used_cons_idx -= vq->vq_nentries;
186                         vq->vq_packed.used_wrap_counter ^= 1;
187                 }
188         }
189
190         return i;
191 }
192
193 static uint16_t
194 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
195                            uint32_t *len, uint16_t num)
196 {
197         struct vring_used_elem *uep;
198         struct rte_mbuf *cookie;
199         uint16_t used_idx, desc_idx;
200         uint16_t i;
201
202         /*  Caller does the check */
203         for (i = 0; i < num ; i++) {
204                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
205                 uep = &vq->vq_split.ring.used->ring[used_idx];
206                 desc_idx = (uint16_t) uep->id;
207                 len[i] = uep->len;
208                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
209
210                 if (unlikely(cookie == NULL)) {
211                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
212                                 vq->vq_used_cons_idx);
213                         break;
214                 }
215
216                 rte_prefetch0(cookie);
217                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
218                 rx_pkts[i]  = cookie;
219                 vq->vq_used_cons_idx++;
220                 vq_ring_free_chain(vq, desc_idx);
221                 vq->vq_descx[desc_idx].cookie = NULL;
222         }
223
224         return i;
225 }
226
227 static uint16_t
228 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
229                         struct rte_mbuf **rx_pkts,
230                         uint32_t *len,
231                         uint16_t num)
232 {
233         struct vring_used_elem *uep;
234         struct rte_mbuf *cookie;
235         uint16_t used_idx = 0;
236         uint16_t i;
237
238         if (unlikely(num == 0))
239                 return 0;
240
241         for (i = 0; i < num; i++) {
242                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
243                 /* Desc idx same as used idx */
244                 uep = &vq->vq_split.ring.used->ring[used_idx];
245                 len[i] = uep->len;
246                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
247
248                 if (unlikely(cookie == NULL)) {
249                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
250                                 vq->vq_used_cons_idx);
251                         break;
252                 }
253
254                 rte_prefetch0(cookie);
255                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
256                 rx_pkts[i]  = cookie;
257                 vq->vq_used_cons_idx++;
258                 vq->vq_descx[used_idx].cookie = NULL;
259         }
260
261         vq_ring_free_inorder(vq, used_idx, i);
262         return i;
263 }
264
265 #ifndef DEFAULT_TX_FREE_THRESH
266 #define DEFAULT_TX_FREE_THRESH 32
267 #endif
268
269 static void
270 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
271 {
272         uint16_t used_idx, id, curr_id, free_cnt = 0;
273         uint16_t size = vq->vq_nentries;
274         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
275         struct vq_desc_extra *dxp;
276
277         used_idx = vq->vq_used_cons_idx;
278         while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
279                 virtio_rmb(vq->hw->weak_barriers);
280                 id = desc[used_idx].id;
281                 do {
282                         curr_id = used_idx;
283                         dxp = &vq->vq_descx[used_idx];
284                         used_idx += dxp->ndescs;
285                         free_cnt += dxp->ndescs;
286                         num -= dxp->ndescs;
287                         if (used_idx >= size) {
288                                 used_idx -= size;
289                                 vq->vq_packed.used_wrap_counter ^= 1;
290                         }
291                         if (dxp->cookie != NULL) {
292                                 rte_pktmbuf_free(dxp->cookie);
293                                 dxp->cookie = NULL;
294                         }
295                 } while (curr_id != id);
296         }
297         vq->vq_used_cons_idx = used_idx;
298         vq->vq_free_cnt += free_cnt;
299 }
300
301 static void
302 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
303 {
304         uint16_t used_idx, id;
305         uint16_t size = vq->vq_nentries;
306         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
307         struct vq_desc_extra *dxp;
308
309         used_idx = vq->vq_used_cons_idx;
310         while (num-- && desc_is_used(&desc[used_idx], vq)) {
311                 virtio_rmb(vq->hw->weak_barriers);
312                 id = desc[used_idx].id;
313                 dxp = &vq->vq_descx[id];
314                 vq->vq_used_cons_idx += dxp->ndescs;
315                 if (vq->vq_used_cons_idx >= size) {
316                         vq->vq_used_cons_idx -= size;
317                         vq->vq_packed.used_wrap_counter ^= 1;
318                 }
319                 vq_ring_free_id_packed(vq, id);
320                 if (dxp->cookie != NULL) {
321                         rte_pktmbuf_free(dxp->cookie);
322                         dxp->cookie = NULL;
323                 }
324                 used_idx = vq->vq_used_cons_idx;
325         }
326 }
327
328 /* Cleanup from completed transmits. */
329 static inline void
330 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
331 {
332         if (in_order)
333                 virtio_xmit_cleanup_inorder_packed(vq, num);
334         else
335                 virtio_xmit_cleanup_normal_packed(vq, num);
336 }
337
338 static void
339 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
340 {
341         uint16_t i, used_idx, desc_idx;
342         for (i = 0; i < num; i++) {
343                 struct vring_used_elem *uep;
344                 struct vq_desc_extra *dxp;
345
346                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
347                 uep = &vq->vq_split.ring.used->ring[used_idx];
348
349                 desc_idx = (uint16_t) uep->id;
350                 dxp = &vq->vq_descx[desc_idx];
351                 vq->vq_used_cons_idx++;
352                 vq_ring_free_chain(vq, desc_idx);
353
354                 if (dxp->cookie != NULL) {
355                         rte_pktmbuf_free(dxp->cookie);
356                         dxp->cookie = NULL;
357                 }
358         }
359 }
360
361 /* Cleanup from completed inorder transmits. */
362 static __rte_always_inline void
363 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
364 {
365         uint16_t i, idx = vq->vq_used_cons_idx;
366         int16_t free_cnt = 0;
367         struct vq_desc_extra *dxp = NULL;
368
369         if (unlikely(num == 0))
370                 return;
371
372         for (i = 0; i < num; i++) {
373                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
374                 free_cnt += dxp->ndescs;
375                 if (dxp->cookie != NULL) {
376                         rte_pktmbuf_free(dxp->cookie);
377                         dxp->cookie = NULL;
378                 }
379         }
380
381         vq->vq_free_cnt += free_cnt;
382         vq->vq_used_cons_idx = idx;
383 }
384
385 static inline int
386 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
387                         struct rte_mbuf **cookies,
388                         uint16_t num)
389 {
390         struct vq_desc_extra *dxp;
391         struct virtio_hw *hw = vq->hw;
392         struct vring_desc *start_dp;
393         uint16_t head_idx, idx, i = 0;
394
395         if (unlikely(vq->vq_free_cnt == 0))
396                 return -ENOSPC;
397         if (unlikely(vq->vq_free_cnt < num))
398                 return -EMSGSIZE;
399
400         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
401         start_dp = vq->vq_split.ring.desc;
402
403         while (i < num) {
404                 idx = head_idx & (vq->vq_nentries - 1);
405                 dxp = &vq->vq_descx[idx];
406                 dxp->cookie = (void *)cookies[i];
407                 dxp->ndescs = 1;
408
409                 start_dp[idx].addr =
410                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
411                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
412                 start_dp[idx].len =
413                                 cookies[i]->buf_len -
414                                 RTE_PKTMBUF_HEADROOM +
415                                 hw->vtnet_hdr_size;
416                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
417
418                 vq_update_avail_ring(vq, idx);
419                 head_idx++;
420                 i++;
421         }
422
423         vq->vq_desc_head_idx += num;
424         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
425         return 0;
426 }
427
428 static inline int
429 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
430                                 uint16_t num)
431 {
432         struct vq_desc_extra *dxp;
433         struct virtio_hw *hw = vq->hw;
434         struct vring_desc *start_dp = vq->vq_split.ring.desc;
435         uint16_t idx, i;
436
437         if (unlikely(vq->vq_free_cnt == 0))
438                 return -ENOSPC;
439         if (unlikely(vq->vq_free_cnt < num))
440                 return -EMSGSIZE;
441
442         if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
443                 return -EFAULT;
444
445         for (i = 0; i < num; i++) {
446                 idx = vq->vq_desc_head_idx;
447                 dxp = &vq->vq_descx[idx];
448                 dxp->cookie = (void *)cookie[i];
449                 dxp->ndescs = 1;
450
451                 start_dp[idx].addr =
452                         VIRTIO_MBUF_ADDR(cookie[i], vq) +
453                         RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
454                 start_dp[idx].len =
455                         cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
456                         hw->vtnet_hdr_size;
457                 start_dp[idx].flags = VRING_DESC_F_WRITE;
458                 vq->vq_desc_head_idx = start_dp[idx].next;
459                 vq_update_avail_ring(vq, idx);
460                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
461                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
462                         break;
463                 }
464         }
465
466         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
467
468         return 0;
469 }
470
471 static inline int
472 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
473                                      struct rte_mbuf **cookie, uint16_t num)
474 {
475         struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
476         uint16_t flags = vq->vq_packed.cached_flags;
477         struct virtio_hw *hw = vq->hw;
478         struct vq_desc_extra *dxp;
479         uint16_t idx;
480         int i;
481
482         if (unlikely(vq->vq_free_cnt == 0))
483                 return -ENOSPC;
484         if (unlikely(vq->vq_free_cnt < num))
485                 return -EMSGSIZE;
486
487         for (i = 0; i < num; i++) {
488                 idx = vq->vq_avail_idx;
489                 dxp = &vq->vq_descx[idx];
490                 dxp->cookie = (void *)cookie[i];
491                 dxp->ndescs = 1;
492
493                 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
494                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
495                 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
496                                         + hw->vtnet_hdr_size;
497
498                 vq->vq_desc_head_idx = dxp->next;
499                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
500                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
501                 virtio_wmb(hw->weak_barriers);
502                 start_dp[idx].flags = flags;
503                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
504                         vq->vq_avail_idx -= vq->vq_nentries;
505                         vq->vq_packed.cached_flags ^=
506                                 VRING_PACKED_DESC_F_AVAIL_USED;
507                         flags = vq->vq_packed.cached_flags;
508                 }
509         }
510         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
511         return 0;
512 }
513
514 /* When doing TSO, the IP length is not included in the pseudo header
515  * checksum of the packet given to the PMD, but for virtio it is
516  * expected.
517  */
518 static void
519 virtio_tso_fix_cksum(struct rte_mbuf *m)
520 {
521         /* common case: header is not fragmented */
522         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
523                         m->l4_len)) {
524                 struct rte_ipv4_hdr *iph;
525                 struct rte_ipv6_hdr *ip6h;
526                 struct rte_tcp_hdr *th;
527                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
528                 uint32_t tmp;
529
530                 iph = rte_pktmbuf_mtod_offset(m,
531                                         struct rte_ipv4_hdr *, m->l2_len);
532                 th = RTE_PTR_ADD(iph, m->l3_len);
533                 if ((iph->version_ihl >> 4) == 4) {
534                         iph->hdr_checksum = 0;
535                         iph->hdr_checksum = rte_ipv4_cksum(iph);
536                         ip_len = iph->total_length;
537                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
538                                 m->l3_len);
539                 } else {
540                         ip6h = (struct rte_ipv6_hdr *)iph;
541                         ip_paylen = ip6h->payload_len;
542                 }
543
544                 /* calculate the new phdr checksum not including ip_paylen */
545                 prev_cksum = th->cksum;
546                 tmp = prev_cksum;
547                 tmp += ip_paylen;
548                 tmp = (tmp & 0xffff) + (tmp >> 16);
549                 new_cksum = tmp;
550
551                 /* replace it in the packet */
552                 th->cksum = new_cksum;
553         }
554 }
555
556
557 /* avoid write operation when necessary, to lessen cache issues */
558 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
559         if ((var) != (val))                     \
560                 (var) = (val);                  \
561 } while (0)
562
563 #define virtqueue_clear_net_hdr(_hdr) do {              \
564         ASSIGN_UNLESS_EQUAL((_hdr)->csum_start, 0);     \
565         ASSIGN_UNLESS_EQUAL((_hdr)->csum_offset, 0);    \
566         ASSIGN_UNLESS_EQUAL((_hdr)->flags, 0);          \
567         ASSIGN_UNLESS_EQUAL((_hdr)->gso_type, 0);       \
568         ASSIGN_UNLESS_EQUAL((_hdr)->gso_size, 0);       \
569         ASSIGN_UNLESS_EQUAL((_hdr)->hdr_len, 0);        \
570 } while (0)
571
572 static inline void
573 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
574                         struct rte_mbuf *cookie,
575                         bool offload)
576 {
577         if (offload) {
578                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
579                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
580
581                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
582                 case PKT_TX_UDP_CKSUM:
583                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
584                         hdr->csum_offset = offsetof(struct rte_udp_hdr,
585                                 dgram_cksum);
586                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
587                         break;
588
589                 case PKT_TX_TCP_CKSUM:
590                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
591                         hdr->csum_offset = offsetof(struct rte_tcp_hdr, cksum);
592                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
593                         break;
594
595                 default:
596                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
597                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
598                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
599                         break;
600                 }
601
602                 /* TCP Segmentation Offload */
603                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
604                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
605                                 VIRTIO_NET_HDR_GSO_TCPV6 :
606                                 VIRTIO_NET_HDR_GSO_TCPV4;
607                         hdr->gso_size = cookie->tso_segsz;
608                         hdr->hdr_len =
609                                 cookie->l2_len +
610                                 cookie->l3_len +
611                                 cookie->l4_len;
612                 } else {
613                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
614                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
615                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
616                 }
617         }
618 }
619
620 static inline void
621 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
622                         struct rte_mbuf **cookies,
623                         uint16_t num)
624 {
625         struct vq_desc_extra *dxp;
626         struct virtqueue *vq = txvq->vq;
627         struct vring_desc *start_dp;
628         struct virtio_net_hdr *hdr;
629         uint16_t idx;
630         uint16_t head_size = vq->hw->vtnet_hdr_size;
631         uint16_t i = 0;
632
633         idx = vq->vq_desc_head_idx;
634         start_dp = vq->vq_split.ring.desc;
635
636         while (i < num) {
637                 idx = idx & (vq->vq_nentries - 1);
638                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
639                 dxp->cookie = (void *)cookies[i];
640                 dxp->ndescs = 1;
641                 virtio_update_packet_stats(&txvq->stats, cookies[i]);
642
643                 hdr = (struct virtio_net_hdr *)(char *)cookies[i]->buf_addr +
644                         cookies[i]->data_off - head_size;
645
646                 /* if offload disabled, hdr is not zeroed yet, do it now */
647                 if (!vq->hw->has_tx_offload)
648                         virtqueue_clear_net_hdr(hdr);
649                 else
650                         virtqueue_xmit_offload(hdr, cookies[i], true);
651
652                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
653                 start_dp[idx].len   = cookies[i]->data_len + head_size;
654                 start_dp[idx].flags = 0;
655
656
657                 vq_update_avail_ring(vq, idx);
658
659                 idx++;
660                 i++;
661         };
662
663         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
664         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
665 }
666
667 static inline void
668 virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
669                                    struct rte_mbuf *cookie,
670                                    int in_order)
671 {
672         struct virtqueue *vq = txvq->vq;
673         struct vring_packed_desc *dp;
674         struct vq_desc_extra *dxp;
675         uint16_t idx, id, flags;
676         uint16_t head_size = vq->hw->vtnet_hdr_size;
677         struct virtio_net_hdr *hdr;
678
679         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
680         idx = vq->vq_avail_idx;
681         dp = &vq->vq_packed.ring.desc[idx];
682
683         dxp = &vq->vq_descx[id];
684         dxp->ndescs = 1;
685         dxp->cookie = cookie;
686
687         flags = vq->vq_packed.cached_flags;
688
689         /* prepend cannot fail, checked by caller */
690         hdr = (struct virtio_net_hdr *)(char *)cookie->buf_addr +
691                 cookie->data_off - head_size;
692
693         /* if offload disabled, hdr is not zeroed yet, do it now */
694         if (!vq->hw->has_tx_offload)
695                 virtqueue_clear_net_hdr(hdr);
696         else
697                 virtqueue_xmit_offload(hdr, cookie, true);
698
699         dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
700         dp->len  = cookie->data_len + head_size;
701         dp->id   = id;
702
703         if (++vq->vq_avail_idx >= vq->vq_nentries) {
704                 vq->vq_avail_idx -= vq->vq_nentries;
705                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
706         }
707
708         vq->vq_free_cnt--;
709
710         if (!in_order) {
711                 vq->vq_desc_head_idx = dxp->next;
712                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
713                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
714         }
715
716         virtio_wmb(vq->hw->weak_barriers);
717         dp->flags = flags;
718 }
719
720 static inline void
721 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
722                               uint16_t needed, int can_push, int in_order)
723 {
724         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
725         struct vq_desc_extra *dxp;
726         struct virtqueue *vq = txvq->vq;
727         struct vring_packed_desc *start_dp, *head_dp;
728         uint16_t idx, id, head_idx, head_flags;
729         uint16_t head_size = vq->hw->vtnet_hdr_size;
730         struct virtio_net_hdr *hdr;
731         uint16_t prev;
732         bool prepend_header = false;
733
734         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
735
736         dxp = &vq->vq_descx[id];
737         dxp->ndescs = needed;
738         dxp->cookie = cookie;
739
740         head_idx = vq->vq_avail_idx;
741         idx = head_idx;
742         prev = head_idx;
743         start_dp = vq->vq_packed.ring.desc;
744
745         head_dp = &vq->vq_packed.ring.desc[idx];
746         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
747         head_flags |= vq->vq_packed.cached_flags;
748
749         if (can_push) {
750                 /* prepend cannot fail, checked by caller */
751                 hdr = (struct virtio_net_hdr *)(char *)cookie->buf_addr +
752                         cookie->data_off - head_size;
753                 prepend_header = true;
754
755                 /* if offload disabled, it is not zeroed below, do it now */
756                 if (!vq->hw->has_tx_offload)
757                         virtqueue_clear_net_hdr(hdr);
758         } else {
759                 /* setup first tx ring slot to point to header
760                  * stored in reserved region.
761                  */
762                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
763                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
764                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
765                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
766                 idx++;
767                 if (idx >= vq->vq_nentries) {
768                         idx -= vq->vq_nentries;
769                         vq->vq_packed.cached_flags ^=
770                                 VRING_PACKED_DESC_F_AVAIL_USED;
771                 }
772         }
773
774         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
775
776         do {
777                 uint16_t flags;
778
779                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
780                 start_dp[idx].len  = cookie->data_len;
781                 if (prepend_header) {
782                         start_dp[idx].len += head_size;
783                         prepend_header = false;
784                 }
785
786                 if (likely(idx != head_idx)) {
787                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
788                         flags |= vq->vq_packed.cached_flags;
789                         start_dp[idx].flags = flags;
790                 }
791                 prev = idx;
792                 idx++;
793                 if (idx >= vq->vq_nentries) {
794                         idx -= vq->vq_nentries;
795                         vq->vq_packed.cached_flags ^=
796                                 VRING_PACKED_DESC_F_AVAIL_USED;
797                 }
798         } while ((cookie = cookie->next) != NULL);
799
800         start_dp[prev].id = id;
801
802         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
803         vq->vq_avail_idx = idx;
804
805         if (!in_order) {
806                 vq->vq_desc_head_idx = dxp->next;
807                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
808                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
809         }
810
811         virtio_wmb(vq->hw->weak_barriers);
812         head_dp->flags = head_flags;
813 }
814
815 static inline void
816 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
817                         uint16_t needed, int use_indirect, int can_push,
818                         int in_order)
819 {
820         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
821         struct vq_desc_extra *dxp;
822         struct virtqueue *vq = txvq->vq;
823         struct vring_desc *start_dp;
824         uint16_t seg_num = cookie->nb_segs;
825         uint16_t head_idx, idx;
826         uint16_t head_size = vq->hw->vtnet_hdr_size;
827         bool prepend_header = false;
828         struct virtio_net_hdr *hdr;
829
830         head_idx = vq->vq_desc_head_idx;
831         idx = head_idx;
832         if (in_order)
833                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
834         else
835                 dxp = &vq->vq_descx[idx];
836         dxp->cookie = (void *)cookie;
837         dxp->ndescs = needed;
838
839         start_dp = vq->vq_split.ring.desc;
840
841         if (can_push) {
842                 /* prepend cannot fail, checked by caller */
843                 hdr = (struct virtio_net_hdr *)(char *)cookie->buf_addr +
844                         cookie->data_off - head_size;
845                 prepend_header = true;
846
847                 /* if offload disabled, it is not zeroed below, do it now */
848                 if (!vq->hw->has_tx_offload)
849                         virtqueue_clear_net_hdr(hdr);
850         } else if (use_indirect) {
851                 /* setup tx ring slot to point to indirect
852                  * descriptor list stored in reserved region.
853                  *
854                  * the first slot in indirect ring is already preset
855                  * to point to the header in reserved region
856                  */
857                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
858                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
859                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
860                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
861                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
862
863                 /* loop below will fill in rest of the indirect elements */
864                 start_dp = txr[idx].tx_indir;
865                 idx = 1;
866         } else {
867                 /* setup first tx ring slot to point to header
868                  * stored in reserved region.
869                  */
870                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
871                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
872                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
873                 start_dp[idx].flags = VRING_DESC_F_NEXT;
874                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
875
876                 idx = start_dp[idx].next;
877         }
878
879         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
880
881         do {
882                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
883                 start_dp[idx].len   = cookie->data_len;
884                 if (prepend_header) {
885                         start_dp[idx].len += head_size;
886                         prepend_header = false;
887                 }
888                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
889                 idx = start_dp[idx].next;
890         } while ((cookie = cookie->next) != NULL);
891
892         if (use_indirect)
893                 idx = vq->vq_split.ring.desc[head_idx].next;
894
895         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
896
897         vq->vq_desc_head_idx = idx;
898         vq_update_avail_ring(vq, head_idx);
899
900         if (!in_order) {
901                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
902                         vq->vq_desc_tail_idx = idx;
903         }
904 }
905
906 void
907 virtio_dev_cq_start(struct rte_eth_dev *dev)
908 {
909         struct virtio_hw *hw = dev->data->dev_private;
910
911         if (hw->cvq && hw->cvq->vq) {
912                 rte_spinlock_init(&hw->cvq->lock);
913                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
914         }
915 }
916
917 int
918 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
919                         uint16_t queue_idx,
920                         uint16_t nb_desc,
921                         unsigned int socket_id __rte_unused,
922                         const struct rte_eth_rxconf *rx_conf __rte_unused,
923                         struct rte_mempool *mp)
924 {
925         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
926         struct virtio_hw *hw = dev->data->dev_private;
927         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
928         struct virtnet_rx *rxvq;
929
930         PMD_INIT_FUNC_TRACE();
931
932         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
933                 nb_desc = vq->vq_nentries;
934         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
935
936         rxvq = &vq->rxq;
937         rxvq->queue_id = queue_idx;
938         rxvq->mpool = mp;
939         dev->data->rx_queues[queue_idx] = rxvq;
940
941         return 0;
942 }
943
944 int
945 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
946 {
947         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
948         struct virtio_hw *hw = dev->data->dev_private;
949         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
950         struct virtnet_rx *rxvq = &vq->rxq;
951         struct rte_mbuf *m;
952         uint16_t desc_idx;
953         int error, nbufs, i;
954
955         PMD_INIT_FUNC_TRACE();
956
957         /* Allocate blank mbufs for the each rx descriptor */
958         nbufs = 0;
959
960         if (hw->use_simple_rx) {
961                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
962                      desc_idx++) {
963                         vq->vq_split.ring.avail->ring[desc_idx] = desc_idx;
964                         vq->vq_split.ring.desc[desc_idx].flags =
965                                 VRING_DESC_F_WRITE;
966                 }
967
968                 virtio_rxq_vec_setup(rxvq);
969         }
970
971         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
972         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
973              desc_idx++) {
974                 vq->sw_ring[vq->vq_nentries + desc_idx] =
975                         &rxvq->fake_mbuf;
976         }
977
978         if (hw->use_simple_rx) {
979                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
980                         virtio_rxq_rearm_vec(rxvq);
981                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
982                 }
983         } else if (hw->use_inorder_rx) {
984                 if ((!virtqueue_full(vq))) {
985                         uint16_t free_cnt = vq->vq_free_cnt;
986                         struct rte_mbuf *pkts[free_cnt];
987
988                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
989                                 free_cnt)) {
990                                 error = virtqueue_enqueue_refill_inorder(vq,
991                                                 pkts,
992                                                 free_cnt);
993                                 if (unlikely(error)) {
994                                         for (i = 0; i < free_cnt; i++)
995                                                 rte_pktmbuf_free(pkts[i]);
996                                 }
997                         }
998
999                         nbufs += free_cnt;
1000                         vq_update_avail_idx(vq);
1001                 }
1002         } else {
1003                 while (!virtqueue_full(vq)) {
1004                         m = rte_mbuf_raw_alloc(rxvq->mpool);
1005                         if (m == NULL)
1006                                 break;
1007
1008                         /* Enqueue allocated buffers */
1009                         if (vtpci_packed_queue(vq->hw))
1010                                 error = virtqueue_enqueue_recv_refill_packed(vq,
1011                                                 &m, 1);
1012                         else
1013                                 error = virtqueue_enqueue_recv_refill(vq,
1014                                                 &m, 1);
1015                         if (error) {
1016                                 rte_pktmbuf_free(m);
1017                                 break;
1018                         }
1019                         nbufs++;
1020                 }
1021
1022                 if (!vtpci_packed_queue(vq->hw))
1023                         vq_update_avail_idx(vq);
1024         }
1025
1026         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
1027
1028         VIRTQUEUE_DUMP(vq);
1029
1030         return 0;
1031 }
1032
1033 /*
1034  * struct rte_eth_dev *dev: Used to update dev
1035  * uint16_t nb_desc: Defaults to values read from config space
1036  * unsigned int socket_id: Used to allocate memzone
1037  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
1038  * uint16_t queue_idx: Just used as an index in dev txq list
1039  */
1040 int
1041 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
1042                         uint16_t queue_idx,
1043                         uint16_t nb_desc,
1044                         unsigned int socket_id __rte_unused,
1045                         const struct rte_eth_txconf *tx_conf)
1046 {
1047         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1048         struct virtio_hw *hw = dev->data->dev_private;
1049         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1050         struct virtnet_tx *txvq;
1051         uint16_t tx_free_thresh;
1052
1053         PMD_INIT_FUNC_TRACE();
1054
1055         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
1056                 nb_desc = vq->vq_nentries;
1057         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
1058
1059         txvq = &vq->txq;
1060         txvq->queue_id = queue_idx;
1061
1062         tx_free_thresh = tx_conf->tx_free_thresh;
1063         if (tx_free_thresh == 0)
1064                 tx_free_thresh =
1065                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
1066
1067         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
1068                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
1069                         "number of TX entries minus 3 (%u)."
1070                         " (tx_free_thresh=%u port=%u queue=%u)\n",
1071                         vq->vq_nentries - 3,
1072                         tx_free_thresh, dev->data->port_id, queue_idx);
1073                 return -EINVAL;
1074         }
1075
1076         vq->vq_free_thresh = tx_free_thresh;
1077
1078         dev->data->tx_queues[queue_idx] = txvq;
1079         return 0;
1080 }
1081
1082 int
1083 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
1084                                 uint16_t queue_idx)
1085 {
1086         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1087         struct virtio_hw *hw = dev->data->dev_private;
1088         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1089
1090         PMD_INIT_FUNC_TRACE();
1091
1092         if (!vtpci_packed_queue(hw)) {
1093                 if (hw->use_inorder_tx)
1094                         vq->vq_split.ring.desc[vq->vq_nentries - 1].next = 0;
1095         }
1096
1097         VIRTQUEUE_DUMP(vq);
1098
1099         return 0;
1100 }
1101
1102 static inline void
1103 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1104 {
1105         int error;
1106         /*
1107          * Requeue the discarded mbuf. This should always be
1108          * successful since it was just dequeued.
1109          */
1110         if (vtpci_packed_queue(vq->hw))
1111                 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1112         else
1113                 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1114
1115         if (unlikely(error)) {
1116                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1117                 rte_pktmbuf_free(m);
1118         }
1119 }
1120
1121 static inline void
1122 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1123 {
1124         int error;
1125
1126         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1127         if (unlikely(error)) {
1128                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1129                 rte_pktmbuf_free(m);
1130         }
1131 }
1132
1133 /* Optionally fill offload information in structure */
1134 static inline int
1135 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1136 {
1137         struct rte_net_hdr_lens hdr_lens;
1138         uint32_t hdrlen, ptype;
1139         int l4_supported = 0;
1140
1141         /* nothing to do */
1142         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1143                 return 0;
1144
1145         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1146
1147         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1148         m->packet_type = ptype;
1149         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1150             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1151             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1152                 l4_supported = 1;
1153
1154         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1155                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1156                 if (hdr->csum_start <= hdrlen && l4_supported) {
1157                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1158                 } else {
1159                         /* Unknown proto or tunnel, do sw cksum. We can assume
1160                          * the cksum field is in the first segment since the
1161                          * buffers we provided to the host are large enough.
1162                          * In case of SCTP, this will be wrong since it's a CRC
1163                          * but there's nothing we can do.
1164                          */
1165                         uint16_t csum = 0, off;
1166
1167                         rte_raw_cksum_mbuf(m, hdr->csum_start,
1168                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1169                                 &csum);
1170                         if (likely(csum != 0xffff))
1171                                 csum = ~csum;
1172                         off = hdr->csum_offset + hdr->csum_start;
1173                         if (rte_pktmbuf_data_len(m) >= off + 1)
1174                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1175                                         off) = csum;
1176                 }
1177         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1178                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1179         }
1180
1181         /* GSO request, save required information in mbuf */
1182         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1183                 /* Check unsupported modes */
1184                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1185                     (hdr->gso_size == 0)) {
1186                         return -EINVAL;
1187                 }
1188
1189                 /* Update mss lengthes in mbuf */
1190                 m->tso_segsz = hdr->gso_size;
1191                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1192                         case VIRTIO_NET_HDR_GSO_TCPV4:
1193                         case VIRTIO_NET_HDR_GSO_TCPV6:
1194                                 m->ol_flags |= PKT_RX_LRO | \
1195                                         PKT_RX_L4_CKSUM_NONE;
1196                                 break;
1197                         default:
1198                                 return -EINVAL;
1199                 }
1200         }
1201
1202         return 0;
1203 }
1204
1205 #define VIRTIO_MBUF_BURST_SZ 64
1206 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1207 uint16_t
1208 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1209 {
1210         struct virtnet_rx *rxvq = rx_queue;
1211         struct virtqueue *vq = rxvq->vq;
1212         struct virtio_hw *hw = vq->hw;
1213         struct rte_mbuf *rxm;
1214         uint16_t nb_used, num, nb_rx;
1215         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1216         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1217         int error;
1218         uint32_t i, nb_enqueued;
1219         uint32_t hdr_size;
1220         struct virtio_net_hdr *hdr;
1221
1222         nb_rx = 0;
1223         if (unlikely(hw->started == 0))
1224                 return nb_rx;
1225
1226         nb_used = VIRTQUEUE_NUSED(vq);
1227
1228         virtio_rmb(hw->weak_barriers);
1229
1230         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1231         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1232                 num = VIRTIO_MBUF_BURST_SZ;
1233         if (likely(num > DESC_PER_CACHELINE))
1234                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1235
1236         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1237         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1238
1239         nb_enqueued = 0;
1240         hdr_size = hw->vtnet_hdr_size;
1241
1242         for (i = 0; i < num ; i++) {
1243                 rxm = rcv_pkts[i];
1244
1245                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1246
1247                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1248                         PMD_RX_LOG(ERR, "Packet drop");
1249                         nb_enqueued++;
1250                         virtio_discard_rxbuf(vq, rxm);
1251                         rxvq->stats.errors++;
1252                         continue;
1253                 }
1254
1255                 rxm->port = rxvq->port_id;
1256                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1257                 rxm->ol_flags = 0;
1258                 rxm->vlan_tci = 0;
1259
1260                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1261                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1262
1263                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1264                         RTE_PKTMBUF_HEADROOM - hdr_size);
1265
1266                 if (hw->vlan_strip)
1267                         rte_vlan_strip(rxm);
1268
1269                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1270                         virtio_discard_rxbuf(vq, rxm);
1271                         rxvq->stats.errors++;
1272                         continue;
1273                 }
1274
1275                 virtio_rx_stats_updated(rxvq, rxm);
1276
1277                 rx_pkts[nb_rx++] = rxm;
1278         }
1279
1280         rxvq->stats.packets += nb_rx;
1281
1282         /* Allocate new mbuf for the used descriptor */
1283         if (likely(!virtqueue_full(vq))) {
1284                 uint16_t free_cnt = vq->vq_free_cnt;
1285                 struct rte_mbuf *new_pkts[free_cnt];
1286
1287                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1288                                                 free_cnt) == 0)) {
1289                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1290                                         free_cnt);
1291                         if (unlikely(error)) {
1292                                 for (i = 0; i < free_cnt; i++)
1293                                         rte_pktmbuf_free(new_pkts[i]);
1294                         }
1295                         nb_enqueued += free_cnt;
1296                 } else {
1297                         struct rte_eth_dev *dev =
1298                                 &rte_eth_devices[rxvq->port_id];
1299                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1300                 }
1301         }
1302
1303         if (likely(nb_enqueued)) {
1304                 vq_update_avail_idx(vq);
1305
1306                 if (unlikely(virtqueue_kick_prepare(vq))) {
1307                         virtqueue_notify(vq);
1308                         PMD_RX_LOG(DEBUG, "Notified");
1309                 }
1310         }
1311
1312         return nb_rx;
1313 }
1314
1315 uint16_t
1316 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1317                         uint16_t nb_pkts)
1318 {
1319         struct virtnet_rx *rxvq = rx_queue;
1320         struct virtqueue *vq = rxvq->vq;
1321         struct virtio_hw *hw = vq->hw;
1322         struct rte_mbuf *rxm;
1323         uint16_t num, nb_rx;
1324         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1325         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1326         int error;
1327         uint32_t i, nb_enqueued;
1328         uint32_t hdr_size;
1329         struct virtio_net_hdr *hdr;
1330
1331         nb_rx = 0;
1332         if (unlikely(hw->started == 0))
1333                 return nb_rx;
1334
1335         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1336         if (likely(num > DESC_PER_CACHELINE))
1337                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1338
1339         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1340         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1341
1342         nb_enqueued = 0;
1343         hdr_size = hw->vtnet_hdr_size;
1344
1345         for (i = 0; i < num; i++) {
1346                 rxm = rcv_pkts[i];
1347
1348                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1349
1350                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1351                         PMD_RX_LOG(ERR, "Packet drop");
1352                         nb_enqueued++;
1353                         virtio_discard_rxbuf(vq, rxm);
1354                         rxvq->stats.errors++;
1355                         continue;
1356                 }
1357
1358                 rxm->port = rxvq->port_id;
1359                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1360                 rxm->ol_flags = 0;
1361                 rxm->vlan_tci = 0;
1362
1363                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1364                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1365
1366                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1367                         RTE_PKTMBUF_HEADROOM - hdr_size);
1368
1369                 if (hw->vlan_strip)
1370                         rte_vlan_strip(rxm);
1371
1372                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1373                         virtio_discard_rxbuf(vq, rxm);
1374                         rxvq->stats.errors++;
1375                         continue;
1376                 }
1377
1378                 virtio_rx_stats_updated(rxvq, rxm);
1379
1380                 rx_pkts[nb_rx++] = rxm;
1381         }
1382
1383         rxvq->stats.packets += nb_rx;
1384
1385         /* Allocate new mbuf for the used descriptor */
1386         if (likely(!virtqueue_full(vq))) {
1387                 uint16_t free_cnt = vq->vq_free_cnt;
1388                 struct rte_mbuf *new_pkts[free_cnt];
1389
1390                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1391                                                 free_cnt) == 0)) {
1392                         error = virtqueue_enqueue_recv_refill_packed(vq,
1393                                         new_pkts, free_cnt);
1394                         if (unlikely(error)) {
1395                                 for (i = 0; i < free_cnt; i++)
1396                                         rte_pktmbuf_free(new_pkts[i]);
1397                         }
1398                         nb_enqueued += free_cnt;
1399                 } else {
1400                         struct rte_eth_dev *dev =
1401                                 &rte_eth_devices[rxvq->port_id];
1402                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1403                 }
1404         }
1405
1406         if (likely(nb_enqueued)) {
1407                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1408                         virtqueue_notify(vq);
1409                         PMD_RX_LOG(DEBUG, "Notified");
1410                 }
1411         }
1412
1413         return nb_rx;
1414 }
1415
1416
1417 uint16_t
1418 virtio_recv_pkts_inorder(void *rx_queue,
1419                         struct rte_mbuf **rx_pkts,
1420                         uint16_t nb_pkts)
1421 {
1422         struct virtnet_rx *rxvq = rx_queue;
1423         struct virtqueue *vq = rxvq->vq;
1424         struct virtio_hw *hw = vq->hw;
1425         struct rte_mbuf *rxm;
1426         struct rte_mbuf *prev = NULL;
1427         uint16_t nb_used, num, nb_rx;
1428         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1429         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1430         int error;
1431         uint32_t nb_enqueued;
1432         uint32_t seg_num;
1433         uint32_t seg_res;
1434         uint32_t hdr_size;
1435         int32_t i;
1436
1437         nb_rx = 0;
1438         if (unlikely(hw->started == 0))
1439                 return nb_rx;
1440
1441         nb_used = VIRTQUEUE_NUSED(vq);
1442         nb_used = RTE_MIN(nb_used, nb_pkts);
1443         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1444
1445         virtio_rmb(hw->weak_barriers);
1446
1447         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1448
1449         nb_enqueued = 0;
1450         seg_num = 1;
1451         seg_res = 0;
1452         hdr_size = hw->vtnet_hdr_size;
1453
1454         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1455
1456         for (i = 0; i < num; i++) {
1457                 struct virtio_net_hdr_mrg_rxbuf *header;
1458
1459                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1460                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1461
1462                 rxm = rcv_pkts[i];
1463
1464                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1465                         PMD_RX_LOG(ERR, "Packet drop");
1466                         nb_enqueued++;
1467                         virtio_discard_rxbuf_inorder(vq, rxm);
1468                         rxvq->stats.errors++;
1469                         continue;
1470                 }
1471
1472                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1473                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1474                          - hdr_size);
1475
1476                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1477                         seg_num = header->num_buffers;
1478                         if (seg_num == 0)
1479                                 seg_num = 1;
1480                 } else {
1481                         seg_num = 1;
1482                 }
1483
1484                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1485                 rxm->nb_segs = seg_num;
1486                 rxm->ol_flags = 0;
1487                 rxm->vlan_tci = 0;
1488                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1489                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1490
1491                 rxm->port = rxvq->port_id;
1492
1493                 rx_pkts[nb_rx] = rxm;
1494                 prev = rxm;
1495
1496                 if (vq->hw->has_rx_offload &&
1497                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1498                         virtio_discard_rxbuf_inorder(vq, rxm);
1499                         rxvq->stats.errors++;
1500                         continue;
1501                 }
1502
1503                 if (hw->vlan_strip)
1504                         rte_vlan_strip(rx_pkts[nb_rx]);
1505
1506                 seg_res = seg_num - 1;
1507
1508                 /* Merge remaining segments */
1509                 while (seg_res != 0 && i < (num - 1)) {
1510                         i++;
1511
1512                         rxm = rcv_pkts[i];
1513                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1514                         rxm->pkt_len = (uint32_t)(len[i]);
1515                         rxm->data_len = (uint16_t)(len[i]);
1516
1517                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1518
1519                         prev->next = rxm;
1520                         prev = rxm;
1521                         seg_res -= 1;
1522                 }
1523
1524                 if (!seg_res) {
1525                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1526                         nb_rx++;
1527                 }
1528         }
1529
1530         /* Last packet still need merge segments */
1531         while (seg_res != 0) {
1532                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1533                                         VIRTIO_MBUF_BURST_SZ);
1534
1535                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1536                         virtio_rmb(hw->weak_barriers);
1537                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1538                                                            rcv_cnt);
1539                         uint16_t extra_idx = 0;
1540
1541                         rcv_cnt = num;
1542                         while (extra_idx < rcv_cnt) {
1543                                 rxm = rcv_pkts[extra_idx];
1544                                 rxm->data_off =
1545                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1546                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1547                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1548                                 prev->next = rxm;
1549                                 prev = rxm;
1550                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1551                                 extra_idx += 1;
1552                         };
1553                         seg_res -= rcv_cnt;
1554
1555                         if (!seg_res) {
1556                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1557                                 nb_rx++;
1558                         }
1559                 } else {
1560                         PMD_RX_LOG(ERR,
1561                                         "No enough segments for packet.");
1562                         rte_pktmbuf_free(rx_pkts[nb_rx]);
1563                         rxvq->stats.errors++;
1564                         break;
1565                 }
1566         }
1567
1568         rxvq->stats.packets += nb_rx;
1569
1570         /* Allocate new mbuf for the used descriptor */
1571
1572         if (likely(!virtqueue_full(vq))) {
1573                 /* free_cnt may include mrg descs */
1574                 uint16_t free_cnt = vq->vq_free_cnt;
1575                 struct rte_mbuf *new_pkts[free_cnt];
1576
1577                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1578                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1579                                         free_cnt);
1580                         if (unlikely(error)) {
1581                                 for (i = 0; i < free_cnt; i++)
1582                                         rte_pktmbuf_free(new_pkts[i]);
1583                         }
1584                         nb_enqueued += free_cnt;
1585                 } else {
1586                         struct rte_eth_dev *dev =
1587                                 &rte_eth_devices[rxvq->port_id];
1588                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1589                 }
1590         }
1591
1592         if (likely(nb_enqueued)) {
1593                 vq_update_avail_idx(vq);
1594
1595                 if (unlikely(virtqueue_kick_prepare(vq))) {
1596                         virtqueue_notify(vq);
1597                         PMD_RX_LOG(DEBUG, "Notified");
1598                 }
1599         }
1600
1601         return nb_rx;
1602 }
1603
1604 uint16_t
1605 virtio_recv_mergeable_pkts(void *rx_queue,
1606                         struct rte_mbuf **rx_pkts,
1607                         uint16_t nb_pkts)
1608 {
1609         struct virtnet_rx *rxvq = rx_queue;
1610         struct virtqueue *vq = rxvq->vq;
1611         struct virtio_hw *hw = vq->hw;
1612         struct rte_mbuf *rxm;
1613         struct rte_mbuf *prev = NULL;
1614         uint16_t nb_used, num, nb_rx = 0;
1615         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1616         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1617         int error;
1618         uint32_t nb_enqueued = 0;
1619         uint32_t seg_num = 0;
1620         uint32_t seg_res = 0;
1621         uint32_t hdr_size = hw->vtnet_hdr_size;
1622         int32_t i;
1623
1624         if (unlikely(hw->started == 0))
1625                 return nb_rx;
1626
1627         nb_used = VIRTQUEUE_NUSED(vq);
1628
1629         virtio_rmb(hw->weak_barriers);
1630
1631         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1632
1633         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1634         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1635                 num = VIRTIO_MBUF_BURST_SZ;
1636         if (likely(num > DESC_PER_CACHELINE))
1637                 num = num - ((vq->vq_used_cons_idx + num) %
1638                                 DESC_PER_CACHELINE);
1639
1640
1641         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1642
1643         for (i = 0; i < num; i++) {
1644                 struct virtio_net_hdr_mrg_rxbuf *header;
1645
1646                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1647                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1648
1649                 rxm = rcv_pkts[i];
1650
1651                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1652                         PMD_RX_LOG(ERR, "Packet drop");
1653                         nb_enqueued++;
1654                         virtio_discard_rxbuf(vq, rxm);
1655                         rxvq->stats.errors++;
1656                         continue;
1657                 }
1658
1659                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1660                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1661                          - hdr_size);
1662                 seg_num = header->num_buffers;
1663                 if (seg_num == 0)
1664                         seg_num = 1;
1665
1666                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1667                 rxm->nb_segs = seg_num;
1668                 rxm->ol_flags = 0;
1669                 rxm->vlan_tci = 0;
1670                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1671                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1672
1673                 rxm->port = rxvq->port_id;
1674
1675                 rx_pkts[nb_rx] = rxm;
1676                 prev = rxm;
1677
1678                 if (hw->has_rx_offload &&
1679                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1680                         virtio_discard_rxbuf(vq, rxm);
1681                         rxvq->stats.errors++;
1682                         continue;
1683                 }
1684
1685                 if (hw->vlan_strip)
1686                         rte_vlan_strip(rx_pkts[nb_rx]);
1687
1688                 seg_res = seg_num - 1;
1689
1690                 /* Merge remaining segments */
1691                 while (seg_res != 0 && i < (num - 1)) {
1692                         i++;
1693
1694                         rxm = rcv_pkts[i];
1695                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1696                         rxm->pkt_len = (uint32_t)(len[i]);
1697                         rxm->data_len = (uint16_t)(len[i]);
1698
1699                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1700
1701                         prev->next = rxm;
1702                         prev = rxm;
1703                         seg_res -= 1;
1704                 }
1705
1706                 if (!seg_res) {
1707                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1708                         nb_rx++;
1709                 }
1710         }
1711
1712         /* Last packet still need merge segments */
1713         while (seg_res != 0) {
1714                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1715                                         VIRTIO_MBUF_BURST_SZ);
1716
1717                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1718                         virtio_rmb(hw->weak_barriers);
1719                         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1720                                                            rcv_cnt);
1721                         uint16_t extra_idx = 0;
1722
1723                         rcv_cnt = num;
1724                         while (extra_idx < rcv_cnt) {
1725                                 rxm = rcv_pkts[extra_idx];
1726                                 rxm->data_off =
1727                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1728                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1729                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1730                                 prev->next = rxm;
1731                                 prev = rxm;
1732                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1733                                 extra_idx += 1;
1734                         };
1735                         seg_res -= rcv_cnt;
1736
1737                         if (!seg_res) {
1738                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1739                                 nb_rx++;
1740                         }
1741                 } else {
1742                         PMD_RX_LOG(ERR,
1743                                         "No enough segments for packet.");
1744                         rte_pktmbuf_free(rx_pkts[nb_rx]);
1745                         rxvq->stats.errors++;
1746                         break;
1747                 }
1748         }
1749
1750         rxvq->stats.packets += nb_rx;
1751
1752         /* Allocate new mbuf for the used descriptor */
1753         if (likely(!virtqueue_full(vq))) {
1754                 /* free_cnt may include mrg descs */
1755                 uint16_t free_cnt = vq->vq_free_cnt;
1756                 struct rte_mbuf *new_pkts[free_cnt];
1757
1758                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1759                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1760                                         free_cnt);
1761                         if (unlikely(error)) {
1762                                 for (i = 0; i < free_cnt; i++)
1763                                         rte_pktmbuf_free(new_pkts[i]);
1764                         }
1765                         nb_enqueued += free_cnt;
1766                 } else {
1767                         struct rte_eth_dev *dev =
1768                                 &rte_eth_devices[rxvq->port_id];
1769                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1770                 }
1771         }
1772
1773         if (likely(nb_enqueued)) {
1774                 vq_update_avail_idx(vq);
1775
1776                 if (unlikely(virtqueue_kick_prepare(vq))) {
1777                         virtqueue_notify(vq);
1778                         PMD_RX_LOG(DEBUG, "Notified");
1779                 }
1780         }
1781
1782         return nb_rx;
1783 }
1784
1785 uint16_t
1786 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1787                         struct rte_mbuf **rx_pkts,
1788                         uint16_t nb_pkts)
1789 {
1790         struct virtnet_rx *rxvq = rx_queue;
1791         struct virtqueue *vq = rxvq->vq;
1792         struct virtio_hw *hw = vq->hw;
1793         struct rte_mbuf *rxm;
1794         struct rte_mbuf *prev = NULL;
1795         uint16_t num, nb_rx = 0;
1796         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1797         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1798         uint32_t nb_enqueued = 0;
1799         uint32_t seg_num = 0;
1800         uint32_t seg_res = 0;
1801         uint32_t hdr_size = hw->vtnet_hdr_size;
1802         int32_t i;
1803         int error;
1804
1805         if (unlikely(hw->started == 0))
1806                 return nb_rx;
1807
1808
1809         num = nb_pkts;
1810         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1811                 num = VIRTIO_MBUF_BURST_SZ;
1812         if (likely(num > DESC_PER_CACHELINE))
1813                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1814
1815         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1816
1817         for (i = 0; i < num; i++) {
1818                 struct virtio_net_hdr_mrg_rxbuf *header;
1819
1820                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1821                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1822
1823                 rxm = rcv_pkts[i];
1824
1825                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1826                         PMD_RX_LOG(ERR, "Packet drop");
1827                         nb_enqueued++;
1828                         virtio_discard_rxbuf(vq, rxm);
1829                         rxvq->stats.errors++;
1830                         continue;
1831                 }
1832
1833                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1834                           rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1835                 seg_num = header->num_buffers;
1836
1837                 if (seg_num == 0)
1838                         seg_num = 1;
1839
1840                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1841                 rxm->nb_segs = seg_num;
1842                 rxm->ol_flags = 0;
1843                 rxm->vlan_tci = 0;
1844                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1845                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1846
1847                 rxm->port = rxvq->port_id;
1848                 rx_pkts[nb_rx] = rxm;
1849                 prev = rxm;
1850
1851                 if (hw->has_rx_offload &&
1852                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1853                         virtio_discard_rxbuf(vq, rxm);
1854                         rxvq->stats.errors++;
1855                         continue;
1856                 }
1857
1858                 if (hw->vlan_strip)
1859                         rte_vlan_strip(rx_pkts[nb_rx]);
1860
1861                 seg_res = seg_num - 1;
1862
1863                 /* Merge remaining segments */
1864                 while (seg_res != 0 && i < (num - 1)) {
1865                         i++;
1866
1867                         rxm = rcv_pkts[i];
1868                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1869                         rxm->pkt_len = (uint32_t)(len[i]);
1870                         rxm->data_len = (uint16_t)(len[i]);
1871
1872                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1873
1874                         prev->next = rxm;
1875                         prev = rxm;
1876                         seg_res -= 1;
1877                 }
1878
1879                 if (!seg_res) {
1880                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1881                         nb_rx++;
1882                 }
1883         }
1884
1885         /* Last packet still need merge segments */
1886         while (seg_res != 0) {
1887                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1888                                         VIRTIO_MBUF_BURST_SZ);
1889                 uint16_t extra_idx = 0;
1890
1891                 rcv_cnt = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1892                                 len, rcv_cnt);
1893                 if (unlikely(rcv_cnt == 0)) {
1894                         PMD_RX_LOG(ERR, "No enough segments for packet.");
1895                         rte_pktmbuf_free(rx_pkts[nb_rx]);
1896                         rxvq->stats.errors++;
1897                         break;
1898                 }
1899
1900                 while (extra_idx < rcv_cnt) {
1901                         rxm = rcv_pkts[extra_idx];
1902
1903                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1904                         rxm->pkt_len = (uint32_t)(len[extra_idx]);
1905                         rxm->data_len = (uint16_t)(len[extra_idx]);
1906
1907                         prev->next = rxm;
1908                         prev = rxm;
1909                         rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1910                         extra_idx += 1;
1911                 }
1912                 seg_res -= rcv_cnt;
1913                 if (!seg_res) {
1914                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1915                         nb_rx++;
1916                 }
1917         }
1918
1919         rxvq->stats.packets += nb_rx;
1920
1921         /* Allocate new mbuf for the used descriptor */
1922         if (likely(!virtqueue_full(vq))) {
1923                 /* free_cnt may include mrg descs */
1924                 uint16_t free_cnt = vq->vq_free_cnt;
1925                 struct rte_mbuf *new_pkts[free_cnt];
1926
1927                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1928                         error = virtqueue_enqueue_recv_refill_packed(vq,
1929                                         new_pkts, free_cnt);
1930                         if (unlikely(error)) {
1931                                 for (i = 0; i < free_cnt; i++)
1932                                         rte_pktmbuf_free(new_pkts[i]);
1933                         }
1934                         nb_enqueued += free_cnt;
1935                 } else {
1936                         struct rte_eth_dev *dev =
1937                                 &rte_eth_devices[rxvq->port_id];
1938                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1939                 }
1940         }
1941
1942         if (likely(nb_enqueued)) {
1943                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1944                         virtqueue_notify(vq);
1945                         PMD_RX_LOG(DEBUG, "Notified");
1946                 }
1947         }
1948
1949         return nb_rx;
1950 }
1951
1952 uint16_t
1953 virtio_xmit_pkts_prepare(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts,
1954                         uint16_t nb_pkts)
1955 {
1956         uint16_t nb_tx;
1957         int error;
1958
1959         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1960                 struct rte_mbuf *m = tx_pkts[nb_tx];
1961
1962 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1963                 error = rte_validate_tx_offload(m);
1964                 if (unlikely(error)) {
1965                         rte_errno = -error;
1966                         break;
1967                 }
1968 #endif
1969
1970                 /* Do VLAN tag insertion */
1971                 if (unlikely(m->ol_flags & PKT_TX_VLAN_PKT)) {
1972                         error = rte_vlan_insert(&m);
1973                         /* rte_vlan_insert() may change pointer
1974                          * even in the case of failure
1975                          */
1976                         tx_pkts[nb_tx] = m;
1977
1978                         if (unlikely(error)) {
1979                                 rte_errno = -error;
1980                                 break;
1981                         }
1982                 }
1983
1984                 error = rte_net_intel_cksum_prepare(m);
1985                 if (unlikely(error)) {
1986                         rte_errno = -error;
1987                         break;
1988                 }
1989
1990                 if (m->ol_flags & PKT_TX_TCP_SEG)
1991                         virtio_tso_fix_cksum(m);
1992         }
1993
1994         return nb_tx;
1995 }
1996
1997 uint16_t
1998 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1999                         uint16_t nb_pkts)
2000 {
2001         struct virtnet_tx *txvq = tx_queue;
2002         struct virtqueue *vq = txvq->vq;
2003         struct virtio_hw *hw = vq->hw;
2004         uint16_t hdr_size = hw->vtnet_hdr_size;
2005         uint16_t nb_tx = 0;
2006         bool in_order = hw->use_inorder_tx;
2007
2008         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2009                 return nb_tx;
2010
2011         if (unlikely(nb_pkts < 1))
2012                 return nb_pkts;
2013
2014         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2015
2016         if (nb_pkts > vq->vq_free_cnt)
2017                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
2018                                            in_order);
2019
2020         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2021                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2022                 int can_push = 0, slots, need;
2023
2024                 /* optimize ring usage */
2025                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2026                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2027                     rte_mbuf_refcnt_read(txm) == 1 &&
2028                     RTE_MBUF_DIRECT(txm) &&
2029                     txm->nb_segs == 1 &&
2030                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2031                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2032                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2033                         can_push = 1;
2034
2035                 /* How many main ring entries are needed to this Tx?
2036                  * any_layout => number of segments
2037                  * default    => number of segments + 1
2038                  */
2039                 slots = txm->nb_segs + !can_push;
2040                 need = slots - vq->vq_free_cnt;
2041
2042                 /* Positive value indicates it need free vring descriptors */
2043                 if (unlikely(need > 0)) {
2044                         virtio_xmit_cleanup_packed(vq, need, in_order);
2045                         need = slots - vq->vq_free_cnt;
2046                         if (unlikely(need > 0)) {
2047                                 PMD_TX_LOG(ERR,
2048                                            "No free tx descriptors to transmit");
2049                                 break;
2050                         }
2051                 }
2052
2053                 /* Enqueue Packet buffers */
2054                 if (can_push)
2055                         virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
2056                 else
2057                         virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
2058                                                       in_order);
2059
2060                 virtio_update_packet_stats(&txvq->stats, txm);
2061         }
2062
2063         txvq->stats.packets += nb_tx;
2064
2065         if (likely(nb_tx)) {
2066                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
2067                         virtqueue_notify(vq);
2068                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2069                 }
2070         }
2071
2072         return nb_tx;
2073 }
2074
2075 uint16_t
2076 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2077 {
2078         struct virtnet_tx *txvq = tx_queue;
2079         struct virtqueue *vq = txvq->vq;
2080         struct virtio_hw *hw = vq->hw;
2081         uint16_t hdr_size = hw->vtnet_hdr_size;
2082         uint16_t nb_used, nb_tx = 0;
2083
2084         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2085                 return nb_tx;
2086
2087         if (unlikely(nb_pkts < 1))
2088                 return nb_pkts;
2089
2090         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2091         nb_used = VIRTQUEUE_NUSED(vq);
2092
2093         virtio_rmb(hw->weak_barriers);
2094         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2095                 virtio_xmit_cleanup(vq, nb_used);
2096
2097         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2098                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2099                 int can_push = 0, use_indirect = 0, slots, need;
2100
2101                 /* optimize ring usage */
2102                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2103                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2104                     rte_mbuf_refcnt_read(txm) == 1 &&
2105                     RTE_MBUF_DIRECT(txm) &&
2106                     txm->nb_segs == 1 &&
2107                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2108                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2109                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2110                         can_push = 1;
2111                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2112                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2113                         use_indirect = 1;
2114
2115                 /* How many main ring entries are needed to this Tx?
2116                  * any_layout => number of segments
2117                  * indirect   => 1
2118                  * default    => number of segments + 1
2119                  */
2120                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2121                 need = slots - vq->vq_free_cnt;
2122
2123                 /* Positive value indicates it need free vring descriptors */
2124                 if (unlikely(need > 0)) {
2125                         nb_used = VIRTQUEUE_NUSED(vq);
2126                         virtio_rmb(hw->weak_barriers);
2127                         need = RTE_MIN(need, (int)nb_used);
2128
2129                         virtio_xmit_cleanup(vq, need);
2130                         need = slots - vq->vq_free_cnt;
2131                         if (unlikely(need > 0)) {
2132                                 PMD_TX_LOG(ERR,
2133                                            "No free tx descriptors to transmit");
2134                                 break;
2135                         }
2136                 }
2137
2138                 /* Enqueue Packet buffers */
2139                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2140                         can_push, 0);
2141
2142                 virtio_update_packet_stats(&txvq->stats, txm);
2143         }
2144
2145         txvq->stats.packets += nb_tx;
2146
2147         if (likely(nb_tx)) {
2148                 vq_update_avail_idx(vq);
2149
2150                 if (unlikely(virtqueue_kick_prepare(vq))) {
2151                         virtqueue_notify(vq);
2152                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2153                 }
2154         }
2155
2156         return nb_tx;
2157 }
2158
2159 static __rte_always_inline int
2160 virtio_xmit_try_cleanup_inorder(struct virtqueue *vq, uint16_t need)
2161 {
2162         uint16_t nb_used, nb_clean, nb_descs;
2163         struct virtio_hw *hw = vq->hw;
2164
2165         nb_descs = vq->vq_free_cnt + need;
2166         nb_used = VIRTQUEUE_NUSED(vq);
2167         virtio_rmb(hw->weak_barriers);
2168         nb_clean = RTE_MIN(need, (int)nb_used);
2169
2170         virtio_xmit_cleanup_inorder(vq, nb_clean);
2171
2172         return nb_descs - vq->vq_free_cnt;
2173 }
2174
2175 uint16_t
2176 virtio_xmit_pkts_inorder(void *tx_queue,
2177                         struct rte_mbuf **tx_pkts,
2178                         uint16_t nb_pkts)
2179 {
2180         struct virtnet_tx *txvq = tx_queue;
2181         struct virtqueue *vq = txvq->vq;
2182         struct virtio_hw *hw = vq->hw;
2183         uint16_t hdr_size = hw->vtnet_hdr_size;
2184         uint16_t nb_used, nb_tx = 0, nb_inorder_pkts = 0;
2185         struct rte_mbuf *inorder_pkts[nb_pkts];
2186         int need;
2187
2188         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2189                 return nb_tx;
2190
2191         if (unlikely(nb_pkts < 1))
2192                 return nb_pkts;
2193
2194         VIRTQUEUE_DUMP(vq);
2195         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2196         nb_used = VIRTQUEUE_NUSED(vq);
2197
2198         virtio_rmb(hw->weak_barriers);
2199         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2200                 virtio_xmit_cleanup_inorder(vq, nb_used);
2201
2202         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2203                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2204                 int slots;
2205
2206                 /* optimize ring usage */
2207                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2208                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2209                      rte_mbuf_refcnt_read(txm) == 1 &&
2210                      RTE_MBUF_DIRECT(txm) &&
2211                      txm->nb_segs == 1 &&
2212                      rte_pktmbuf_headroom(txm) >= hdr_size &&
2213                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2214                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2215                         inorder_pkts[nb_inorder_pkts] = txm;
2216                         nb_inorder_pkts++;
2217
2218                         continue;
2219                 }
2220
2221                 if (nb_inorder_pkts) {
2222                         need = nb_inorder_pkts - vq->vq_free_cnt;
2223                         if (unlikely(need > 0)) {
2224                                 need = virtio_xmit_try_cleanup_inorder(vq,
2225                                                                        need);
2226                                 if (unlikely(need > 0)) {
2227                                         PMD_TX_LOG(ERR,
2228                                                 "No free tx descriptors to "
2229                                                 "transmit");
2230                                         break;
2231                                 }
2232                         }
2233                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2234                                                         nb_inorder_pkts);
2235                         nb_inorder_pkts = 0;
2236                 }
2237
2238                 slots = txm->nb_segs + 1;
2239                 need = slots - vq->vq_free_cnt;
2240                 if (unlikely(need > 0)) {
2241                         need = virtio_xmit_try_cleanup_inorder(vq, slots);
2242
2243                         if (unlikely(need > 0)) {
2244                                 PMD_TX_LOG(ERR,
2245                                         "No free tx descriptors to transmit");
2246                                 break;
2247                         }
2248                 }
2249                 /* Enqueue Packet buffers */
2250                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2251
2252                 virtio_update_packet_stats(&txvq->stats, txm);
2253         }
2254
2255         /* Transmit all inorder packets */
2256         if (nb_inorder_pkts) {
2257                 need = nb_inorder_pkts - vq->vq_free_cnt;
2258                 if (unlikely(need > 0)) {
2259                         need = virtio_xmit_try_cleanup_inorder(vq,
2260                                                                   need);
2261                         if (unlikely(need > 0)) {
2262                                 PMD_TX_LOG(ERR,
2263                                         "No free tx descriptors to transmit");
2264                                 nb_inorder_pkts = vq->vq_free_cnt;
2265                                 nb_tx -= need;
2266                         }
2267                 }
2268
2269                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2270                                                 nb_inorder_pkts);
2271         }
2272
2273         txvq->stats.packets += nb_tx;
2274
2275         if (likely(nb_tx)) {
2276                 vq_update_avail_idx(vq);
2277
2278                 if (unlikely(virtqueue_kick_prepare(vq))) {
2279                         virtqueue_notify(vq);
2280                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2281                 }
2282         }
2283
2284         VIRTQUEUE_DUMP(vq);
2285
2286         return nb_tx;
2287 }