net/virtio: improve perf via one-way barriers on used flag
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45         struct virtnet_rx *rxvq = rxq;
46         struct virtqueue *vq = rxvq->vq;
47
48         return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54         vq->vq_free_cnt += num;
55         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61         struct vring_desc *dp, *dp_tail;
62         struct vq_desc_extra *dxp;
63         uint16_t desc_idx_last = desc_idx;
64
65         dp  = &vq->vq_split.ring.desc[desc_idx];
66         dxp = &vq->vq_descx[desc_idx];
67         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69                 while (dp->flags & VRING_DESC_F_NEXT) {
70                         desc_idx_last = dp->next;
71                         dp = &vq->vq_split.ring.desc[dp->next];
72                 }
73         }
74         dxp->ndescs = 0;
75
76         /*
77          * We must append the existing free chain, if any, to the end of
78          * newly freed chain. If the virtqueue was completely used, then
79          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80          */
81         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82                 vq->vq_desc_head_idx = desc_idx;
83         } else {
84                 dp_tail = &vq->vq_split.ring.desc[vq->vq_desc_tail_idx];
85                 dp_tail->next = desc_idx;
86         }
87
88         vq->vq_desc_tail_idx = desc_idx_last;
89         dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95         struct vq_desc_extra *dxp;
96
97         dxp = &vq->vq_descx[id];
98         vq->vq_free_cnt += dxp->ndescs;
99
100         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101                 vq->vq_desc_head_idx = id;
102         else
103                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104
105         vq->vq_desc_tail_idx = id;
106         dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108
109 void
110 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
111 {
112         uint32_t s = mbuf->pkt_len;
113         struct rte_ether_addr *ea;
114
115         stats->bytes += s;
116
117         if (s == 64) {
118                 stats->size_bins[1]++;
119         } else if (s > 64 && s < 1024) {
120                 uint32_t bin;
121
122                 /* count zeros, and offset into correct bin */
123                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
124                 stats->size_bins[bin]++;
125         } else {
126                 if (s < 64)
127                         stats->size_bins[0]++;
128                 else if (s < 1519)
129                         stats->size_bins[6]++;
130                 else
131                         stats->size_bins[7]++;
132         }
133
134         ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
135         if (rte_is_multicast_ether_addr(ea)) {
136                 if (rte_is_broadcast_ether_addr(ea))
137                         stats->broadcast++;
138                 else
139                         stats->multicast++;
140         }
141 }
142
143 static inline void
144 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
145 {
146         VIRTIO_DUMP_PACKET(m, m->data_len);
147
148         virtio_update_packet_stats(&rxvq->stats, m);
149 }
150
151 static uint16_t
152 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
153                                   struct rte_mbuf **rx_pkts,
154                                   uint32_t *len,
155                                   uint16_t num)
156 {
157         struct rte_mbuf *cookie;
158         uint16_t used_idx;
159         uint16_t id;
160         struct vring_packed_desc *desc;
161         uint16_t i;
162
163         desc = vq->vq_packed.ring.desc;
164
165         for (i = 0; i < num; i++) {
166                 used_idx = vq->vq_used_cons_idx;
167                 /* desc_is_used has a load-acquire or rte_cio_rmb inside
168                  * and wait for used desc in virtqueue.
169                  */
170                 if (!desc_is_used(&desc[used_idx], vq))
171                         return i;
172                 len[i] = desc[used_idx].len;
173                 id = desc[used_idx].id;
174                 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
175                 if (unlikely(cookie == NULL)) {
176                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
177                                 vq->vq_used_cons_idx);
178                         break;
179                 }
180                 rte_prefetch0(cookie);
181                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
182                 rx_pkts[i] = cookie;
183
184                 vq->vq_free_cnt++;
185                 vq->vq_used_cons_idx++;
186                 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
187                         vq->vq_used_cons_idx -= vq->vq_nentries;
188                         vq->vq_packed.used_wrap_counter ^= 1;
189                 }
190         }
191
192         return i;
193 }
194
195 static uint16_t
196 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
197                            uint32_t *len, uint16_t num)
198 {
199         struct vring_used_elem *uep;
200         struct rte_mbuf *cookie;
201         uint16_t used_idx, desc_idx;
202         uint16_t i;
203
204         /*  Caller does the check */
205         for (i = 0; i < num ; i++) {
206                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
207                 uep = &vq->vq_split.ring.used->ring[used_idx];
208                 desc_idx = (uint16_t) uep->id;
209                 len[i] = uep->len;
210                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
211
212                 if (unlikely(cookie == NULL)) {
213                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
214                                 vq->vq_used_cons_idx);
215                         break;
216                 }
217
218                 rte_prefetch0(cookie);
219                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
220                 rx_pkts[i]  = cookie;
221                 vq->vq_used_cons_idx++;
222                 vq_ring_free_chain(vq, desc_idx);
223                 vq->vq_descx[desc_idx].cookie = NULL;
224         }
225
226         return i;
227 }
228
229 static uint16_t
230 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
231                         struct rte_mbuf **rx_pkts,
232                         uint32_t *len,
233                         uint16_t num)
234 {
235         struct vring_used_elem *uep;
236         struct rte_mbuf *cookie;
237         uint16_t used_idx = 0;
238         uint16_t i;
239
240         if (unlikely(num == 0))
241                 return 0;
242
243         for (i = 0; i < num; i++) {
244                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
245                 /* Desc idx same as used idx */
246                 uep = &vq->vq_split.ring.used->ring[used_idx];
247                 len[i] = uep->len;
248                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
249
250                 if (unlikely(cookie == NULL)) {
251                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
252                                 vq->vq_used_cons_idx);
253                         break;
254                 }
255
256                 rte_prefetch0(cookie);
257                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
258                 rx_pkts[i]  = cookie;
259                 vq->vq_used_cons_idx++;
260                 vq->vq_descx[used_idx].cookie = NULL;
261         }
262
263         vq_ring_free_inorder(vq, used_idx, i);
264         return i;
265 }
266
267 #ifndef DEFAULT_TX_FREE_THRESH
268 #define DEFAULT_TX_FREE_THRESH 32
269 #endif
270
271 static void
272 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
273 {
274         uint16_t used_idx, id, curr_id, free_cnt = 0;
275         uint16_t size = vq->vq_nentries;
276         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
277         struct vq_desc_extra *dxp;
278
279         used_idx = vq->vq_used_cons_idx;
280         /* desc_is_used has a load-acquire or rte_cio_rmb inside
281          * and wait for used desc in virtqueue.
282          */
283         while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
284                 id = desc[used_idx].id;
285                 do {
286                         curr_id = used_idx;
287                         dxp = &vq->vq_descx[used_idx];
288                         used_idx += dxp->ndescs;
289                         free_cnt += dxp->ndescs;
290                         num -= dxp->ndescs;
291                         if (used_idx >= size) {
292                                 used_idx -= size;
293                                 vq->vq_packed.used_wrap_counter ^= 1;
294                         }
295                         if (dxp->cookie != NULL) {
296                                 rte_pktmbuf_free(dxp->cookie);
297                                 dxp->cookie = NULL;
298                         }
299                 } while (curr_id != id);
300         }
301         vq->vq_used_cons_idx = used_idx;
302         vq->vq_free_cnt += free_cnt;
303 }
304
305 static void
306 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
307 {
308         uint16_t used_idx, id;
309         uint16_t size = vq->vq_nentries;
310         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
311         struct vq_desc_extra *dxp;
312
313         used_idx = vq->vq_used_cons_idx;
314         /* desc_is_used has a load-acquire or rte_cio_rmb inside
315          * and wait for used desc in virtqueue.
316          */
317         while (num-- && desc_is_used(&desc[used_idx], vq)) {
318                 id = desc[used_idx].id;
319                 dxp = &vq->vq_descx[id];
320                 vq->vq_used_cons_idx += dxp->ndescs;
321                 if (vq->vq_used_cons_idx >= size) {
322                         vq->vq_used_cons_idx -= size;
323                         vq->vq_packed.used_wrap_counter ^= 1;
324                 }
325                 vq_ring_free_id_packed(vq, id);
326                 if (dxp->cookie != NULL) {
327                         rte_pktmbuf_free(dxp->cookie);
328                         dxp->cookie = NULL;
329                 }
330                 used_idx = vq->vq_used_cons_idx;
331         }
332 }
333
334 /* Cleanup from completed transmits. */
335 static inline void
336 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
337 {
338         if (in_order)
339                 virtio_xmit_cleanup_inorder_packed(vq, num);
340         else
341                 virtio_xmit_cleanup_normal_packed(vq, num);
342 }
343
344 static void
345 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
346 {
347         uint16_t i, used_idx, desc_idx;
348         for (i = 0; i < num; i++) {
349                 struct vring_used_elem *uep;
350                 struct vq_desc_extra *dxp;
351
352                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
353                 uep = &vq->vq_split.ring.used->ring[used_idx];
354
355                 desc_idx = (uint16_t) uep->id;
356                 dxp = &vq->vq_descx[desc_idx];
357                 vq->vq_used_cons_idx++;
358                 vq_ring_free_chain(vq, desc_idx);
359
360                 if (dxp->cookie != NULL) {
361                         rte_pktmbuf_free(dxp->cookie);
362                         dxp->cookie = NULL;
363                 }
364         }
365 }
366
367 /* Cleanup from completed inorder transmits. */
368 static __rte_always_inline void
369 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
370 {
371         uint16_t i, idx = vq->vq_used_cons_idx;
372         int16_t free_cnt = 0;
373         struct vq_desc_extra *dxp = NULL;
374
375         if (unlikely(num == 0))
376                 return;
377
378         for (i = 0; i < num; i++) {
379                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
380                 free_cnt += dxp->ndescs;
381                 if (dxp->cookie != NULL) {
382                         rte_pktmbuf_free(dxp->cookie);
383                         dxp->cookie = NULL;
384                 }
385         }
386
387         vq->vq_free_cnt += free_cnt;
388         vq->vq_used_cons_idx = idx;
389 }
390
391 static inline int
392 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
393                         struct rte_mbuf **cookies,
394                         uint16_t num)
395 {
396         struct vq_desc_extra *dxp;
397         struct virtio_hw *hw = vq->hw;
398         struct vring_desc *start_dp;
399         uint16_t head_idx, idx, i = 0;
400
401         if (unlikely(vq->vq_free_cnt == 0))
402                 return -ENOSPC;
403         if (unlikely(vq->vq_free_cnt < num))
404                 return -EMSGSIZE;
405
406         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
407         start_dp = vq->vq_split.ring.desc;
408
409         while (i < num) {
410                 idx = head_idx & (vq->vq_nentries - 1);
411                 dxp = &vq->vq_descx[idx];
412                 dxp->cookie = (void *)cookies[i];
413                 dxp->ndescs = 1;
414
415                 start_dp[idx].addr =
416                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
417                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
418                 start_dp[idx].len =
419                                 cookies[i]->buf_len -
420                                 RTE_PKTMBUF_HEADROOM +
421                                 hw->vtnet_hdr_size;
422                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
423
424                 vq_update_avail_ring(vq, idx);
425                 head_idx++;
426                 i++;
427         }
428
429         vq->vq_desc_head_idx += num;
430         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
431         return 0;
432 }
433
434 static inline int
435 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
436                                 uint16_t num)
437 {
438         struct vq_desc_extra *dxp;
439         struct virtio_hw *hw = vq->hw;
440         struct vring_desc *start_dp = vq->vq_split.ring.desc;
441         uint16_t idx, i;
442
443         if (unlikely(vq->vq_free_cnt == 0))
444                 return -ENOSPC;
445         if (unlikely(vq->vq_free_cnt < num))
446                 return -EMSGSIZE;
447
448         if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
449                 return -EFAULT;
450
451         for (i = 0; i < num; i++) {
452                 idx = vq->vq_desc_head_idx;
453                 dxp = &vq->vq_descx[idx];
454                 dxp->cookie = (void *)cookie[i];
455                 dxp->ndescs = 1;
456
457                 start_dp[idx].addr =
458                         VIRTIO_MBUF_ADDR(cookie[i], vq) +
459                         RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
460                 start_dp[idx].len =
461                         cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
462                         hw->vtnet_hdr_size;
463                 start_dp[idx].flags = VRING_DESC_F_WRITE;
464                 vq->vq_desc_head_idx = start_dp[idx].next;
465                 vq_update_avail_ring(vq, idx);
466                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
467                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
468                         break;
469                 }
470         }
471
472         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
473
474         return 0;
475 }
476
477 static inline int
478 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
479                                      struct rte_mbuf **cookie, uint16_t num)
480 {
481         struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
482         uint16_t flags = vq->vq_packed.cached_flags;
483         struct virtio_hw *hw = vq->hw;
484         struct vq_desc_extra *dxp;
485         uint16_t idx;
486         int i;
487
488         if (unlikely(vq->vq_free_cnt == 0))
489                 return -ENOSPC;
490         if (unlikely(vq->vq_free_cnt < num))
491                 return -EMSGSIZE;
492
493         for (i = 0; i < num; i++) {
494                 idx = vq->vq_avail_idx;
495                 dxp = &vq->vq_descx[idx];
496                 dxp->cookie = (void *)cookie[i];
497                 dxp->ndescs = 1;
498
499                 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
500                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
501                 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
502                                         + hw->vtnet_hdr_size;
503
504                 vq->vq_desc_head_idx = dxp->next;
505                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
506                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
507
508                 virtqueue_store_flags_packed(&start_dp[idx], flags,
509                                              hw->weak_barriers);
510
511                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
512                         vq->vq_avail_idx -= vq->vq_nentries;
513                         vq->vq_packed.cached_flags ^=
514                                 VRING_PACKED_DESC_F_AVAIL_USED;
515                         flags = vq->vq_packed.cached_flags;
516                 }
517         }
518         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
519         return 0;
520 }
521
522 /* When doing TSO, the IP length is not included in the pseudo header
523  * checksum of the packet given to the PMD, but for virtio it is
524  * expected.
525  */
526 static void
527 virtio_tso_fix_cksum(struct rte_mbuf *m)
528 {
529         /* common case: header is not fragmented */
530         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
531                         m->l4_len)) {
532                 struct rte_ipv4_hdr *iph;
533                 struct rte_ipv6_hdr *ip6h;
534                 struct rte_tcp_hdr *th;
535                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
536                 uint32_t tmp;
537
538                 iph = rte_pktmbuf_mtod_offset(m,
539                                         struct rte_ipv4_hdr *, m->l2_len);
540                 th = RTE_PTR_ADD(iph, m->l3_len);
541                 if ((iph->version_ihl >> 4) == 4) {
542                         iph->hdr_checksum = 0;
543                         iph->hdr_checksum = rte_ipv4_cksum(iph);
544                         ip_len = iph->total_length;
545                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
546                                 m->l3_len);
547                 } else {
548                         ip6h = (struct rte_ipv6_hdr *)iph;
549                         ip_paylen = ip6h->payload_len;
550                 }
551
552                 /* calculate the new phdr checksum not including ip_paylen */
553                 prev_cksum = th->cksum;
554                 tmp = prev_cksum;
555                 tmp += ip_paylen;
556                 tmp = (tmp & 0xffff) + (tmp >> 16);
557                 new_cksum = tmp;
558
559                 /* replace it in the packet */
560                 th->cksum = new_cksum;
561         }
562 }
563
564
565 /* avoid write operation when necessary, to lessen cache issues */
566 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
567         if ((var) != (val))                     \
568                 (var) = (val);                  \
569 } while (0)
570
571 #define virtqueue_clear_net_hdr(_hdr) do {              \
572         ASSIGN_UNLESS_EQUAL((_hdr)->csum_start, 0);     \
573         ASSIGN_UNLESS_EQUAL((_hdr)->csum_offset, 0);    \
574         ASSIGN_UNLESS_EQUAL((_hdr)->flags, 0);          \
575         ASSIGN_UNLESS_EQUAL((_hdr)->gso_type, 0);       \
576         ASSIGN_UNLESS_EQUAL((_hdr)->gso_size, 0);       \
577         ASSIGN_UNLESS_EQUAL((_hdr)->hdr_len, 0);        \
578 } while (0)
579
580 static inline void
581 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
582                         struct rte_mbuf *cookie,
583                         bool offload)
584 {
585         if (offload) {
586                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
587                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
588
589                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
590                 case PKT_TX_UDP_CKSUM:
591                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
592                         hdr->csum_offset = offsetof(struct rte_udp_hdr,
593                                 dgram_cksum);
594                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
595                         break;
596
597                 case PKT_TX_TCP_CKSUM:
598                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
599                         hdr->csum_offset = offsetof(struct rte_tcp_hdr, cksum);
600                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
601                         break;
602
603                 default:
604                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
605                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
606                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
607                         break;
608                 }
609
610                 /* TCP Segmentation Offload */
611                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
612                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
613                                 VIRTIO_NET_HDR_GSO_TCPV6 :
614                                 VIRTIO_NET_HDR_GSO_TCPV4;
615                         hdr->gso_size = cookie->tso_segsz;
616                         hdr->hdr_len =
617                                 cookie->l2_len +
618                                 cookie->l3_len +
619                                 cookie->l4_len;
620                 } else {
621                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
622                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
623                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
624                 }
625         }
626 }
627
628 static inline void
629 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
630                         struct rte_mbuf **cookies,
631                         uint16_t num)
632 {
633         struct vq_desc_extra *dxp;
634         struct virtqueue *vq = txvq->vq;
635         struct vring_desc *start_dp;
636         struct virtio_net_hdr *hdr;
637         uint16_t idx;
638         uint16_t head_size = vq->hw->vtnet_hdr_size;
639         uint16_t i = 0;
640
641         idx = vq->vq_desc_head_idx;
642         start_dp = vq->vq_split.ring.desc;
643
644         while (i < num) {
645                 idx = idx & (vq->vq_nentries - 1);
646                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
647                 dxp->cookie = (void *)cookies[i];
648                 dxp->ndescs = 1;
649                 virtio_update_packet_stats(&txvq->stats, cookies[i]);
650
651                 hdr = (struct virtio_net_hdr *)(char *)cookies[i]->buf_addr +
652                         cookies[i]->data_off - head_size;
653
654                 /* if offload disabled, hdr is not zeroed yet, do it now */
655                 if (!vq->hw->has_tx_offload)
656                         virtqueue_clear_net_hdr(hdr);
657                 else
658                         virtqueue_xmit_offload(hdr, cookies[i], true);
659
660                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
661                 start_dp[idx].len   = cookies[i]->data_len + head_size;
662                 start_dp[idx].flags = 0;
663
664
665                 vq_update_avail_ring(vq, idx);
666
667                 idx++;
668                 i++;
669         };
670
671         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
672         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
673 }
674
675 static inline void
676 virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
677                                    struct rte_mbuf *cookie,
678                                    int in_order)
679 {
680         struct virtqueue *vq = txvq->vq;
681         struct vring_packed_desc *dp;
682         struct vq_desc_extra *dxp;
683         uint16_t idx, id, flags;
684         uint16_t head_size = vq->hw->vtnet_hdr_size;
685         struct virtio_net_hdr *hdr;
686
687         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
688         idx = vq->vq_avail_idx;
689         dp = &vq->vq_packed.ring.desc[idx];
690
691         dxp = &vq->vq_descx[id];
692         dxp->ndescs = 1;
693         dxp->cookie = cookie;
694
695         flags = vq->vq_packed.cached_flags;
696
697         /* prepend cannot fail, checked by caller */
698         hdr = (struct virtio_net_hdr *)(char *)cookie->buf_addr +
699                 cookie->data_off - head_size;
700
701         /* if offload disabled, hdr is not zeroed yet, do it now */
702         if (!vq->hw->has_tx_offload)
703                 virtqueue_clear_net_hdr(hdr);
704         else
705                 virtqueue_xmit_offload(hdr, cookie, true);
706
707         dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
708         dp->len  = cookie->data_len + head_size;
709         dp->id   = id;
710
711         if (++vq->vq_avail_idx >= vq->vq_nentries) {
712                 vq->vq_avail_idx -= vq->vq_nentries;
713                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
714         }
715
716         vq->vq_free_cnt--;
717
718         if (!in_order) {
719                 vq->vq_desc_head_idx = dxp->next;
720                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
721                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
722         }
723
724         virtqueue_store_flags_packed(dp, flags, vq->hw->weak_barriers);
725 }
726
727 static inline void
728 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
729                               uint16_t needed, int can_push, int in_order)
730 {
731         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
732         struct vq_desc_extra *dxp;
733         struct virtqueue *vq = txvq->vq;
734         struct vring_packed_desc *start_dp, *head_dp;
735         uint16_t idx, id, head_idx, head_flags;
736         uint16_t head_size = vq->hw->vtnet_hdr_size;
737         struct virtio_net_hdr *hdr;
738         uint16_t prev;
739         bool prepend_header = false;
740
741         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
742
743         dxp = &vq->vq_descx[id];
744         dxp->ndescs = needed;
745         dxp->cookie = cookie;
746
747         head_idx = vq->vq_avail_idx;
748         idx = head_idx;
749         prev = head_idx;
750         start_dp = vq->vq_packed.ring.desc;
751
752         head_dp = &vq->vq_packed.ring.desc[idx];
753         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
754         head_flags |= vq->vq_packed.cached_flags;
755
756         if (can_push) {
757                 /* prepend cannot fail, checked by caller */
758                 hdr = (struct virtio_net_hdr *)(char *)cookie->buf_addr +
759                         cookie->data_off - head_size;
760                 prepend_header = true;
761
762                 /* if offload disabled, it is not zeroed below, do it now */
763                 if (!vq->hw->has_tx_offload)
764                         virtqueue_clear_net_hdr(hdr);
765         } else {
766                 /* setup first tx ring slot to point to header
767                  * stored in reserved region.
768                  */
769                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
770                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
771                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
772                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
773                 idx++;
774                 if (idx >= vq->vq_nentries) {
775                         idx -= vq->vq_nentries;
776                         vq->vq_packed.cached_flags ^=
777                                 VRING_PACKED_DESC_F_AVAIL_USED;
778                 }
779         }
780
781         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
782
783         do {
784                 uint16_t flags;
785
786                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
787                 start_dp[idx].len  = cookie->data_len;
788                 if (prepend_header) {
789                         start_dp[idx].len += head_size;
790                         prepend_header = false;
791                 }
792
793                 if (likely(idx != head_idx)) {
794                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
795                         flags |= vq->vq_packed.cached_flags;
796                         start_dp[idx].flags = flags;
797                 }
798                 prev = idx;
799                 idx++;
800                 if (idx >= vq->vq_nentries) {
801                         idx -= vq->vq_nentries;
802                         vq->vq_packed.cached_flags ^=
803                                 VRING_PACKED_DESC_F_AVAIL_USED;
804                 }
805         } while ((cookie = cookie->next) != NULL);
806
807         start_dp[prev].id = id;
808
809         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
810         vq->vq_avail_idx = idx;
811
812         if (!in_order) {
813                 vq->vq_desc_head_idx = dxp->next;
814                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
815                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
816         }
817
818         virtqueue_store_flags_packed(head_dp, head_flags,
819                                      vq->hw->weak_barriers);
820 }
821
822 static inline void
823 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
824                         uint16_t needed, int use_indirect, int can_push,
825                         int in_order)
826 {
827         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
828         struct vq_desc_extra *dxp;
829         struct virtqueue *vq = txvq->vq;
830         struct vring_desc *start_dp;
831         uint16_t seg_num = cookie->nb_segs;
832         uint16_t head_idx, idx;
833         uint16_t head_size = vq->hw->vtnet_hdr_size;
834         bool prepend_header = false;
835         struct virtio_net_hdr *hdr;
836
837         head_idx = vq->vq_desc_head_idx;
838         idx = head_idx;
839         if (in_order)
840                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
841         else
842                 dxp = &vq->vq_descx[idx];
843         dxp->cookie = (void *)cookie;
844         dxp->ndescs = needed;
845
846         start_dp = vq->vq_split.ring.desc;
847
848         if (can_push) {
849                 /* prepend cannot fail, checked by caller */
850                 hdr = (struct virtio_net_hdr *)(char *)cookie->buf_addr +
851                         cookie->data_off - head_size;
852                 prepend_header = true;
853
854                 /* if offload disabled, it is not zeroed below, do it now */
855                 if (!vq->hw->has_tx_offload)
856                         virtqueue_clear_net_hdr(hdr);
857         } else if (use_indirect) {
858                 /* setup tx ring slot to point to indirect
859                  * descriptor list stored in reserved region.
860                  *
861                  * the first slot in indirect ring is already preset
862                  * to point to the header in reserved region
863                  */
864                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
865                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
866                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
867                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
868                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
869
870                 /* loop below will fill in rest of the indirect elements */
871                 start_dp = txr[idx].tx_indir;
872                 idx = 1;
873         } else {
874                 /* setup first tx ring slot to point to header
875                  * stored in reserved region.
876                  */
877                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
878                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
879                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
880                 start_dp[idx].flags = VRING_DESC_F_NEXT;
881                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
882
883                 idx = start_dp[idx].next;
884         }
885
886         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
887
888         do {
889                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
890                 start_dp[idx].len   = cookie->data_len;
891                 if (prepend_header) {
892                         start_dp[idx].len += head_size;
893                         prepend_header = false;
894                 }
895                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
896                 idx = start_dp[idx].next;
897         } while ((cookie = cookie->next) != NULL);
898
899         if (use_indirect)
900                 idx = vq->vq_split.ring.desc[head_idx].next;
901
902         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
903
904         vq->vq_desc_head_idx = idx;
905         vq_update_avail_ring(vq, head_idx);
906
907         if (!in_order) {
908                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
909                         vq->vq_desc_tail_idx = idx;
910         }
911 }
912
913 void
914 virtio_dev_cq_start(struct rte_eth_dev *dev)
915 {
916         struct virtio_hw *hw = dev->data->dev_private;
917
918         if (hw->cvq && hw->cvq->vq) {
919                 rte_spinlock_init(&hw->cvq->lock);
920                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
921         }
922 }
923
924 int
925 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
926                         uint16_t queue_idx,
927                         uint16_t nb_desc,
928                         unsigned int socket_id __rte_unused,
929                         const struct rte_eth_rxconf *rx_conf __rte_unused,
930                         struct rte_mempool *mp)
931 {
932         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
933         struct virtio_hw *hw = dev->data->dev_private;
934         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
935         struct virtnet_rx *rxvq;
936
937         PMD_INIT_FUNC_TRACE();
938
939         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
940                 nb_desc = vq->vq_nentries;
941         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
942
943         rxvq = &vq->rxq;
944         rxvq->queue_id = queue_idx;
945         rxvq->mpool = mp;
946         dev->data->rx_queues[queue_idx] = rxvq;
947
948         return 0;
949 }
950
951 int
952 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
953 {
954         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
955         struct virtio_hw *hw = dev->data->dev_private;
956         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
957         struct virtnet_rx *rxvq = &vq->rxq;
958         struct rte_mbuf *m;
959         uint16_t desc_idx;
960         int error, nbufs, i;
961
962         PMD_INIT_FUNC_TRACE();
963
964         /* Allocate blank mbufs for the each rx descriptor */
965         nbufs = 0;
966
967         if (hw->use_simple_rx) {
968                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
969                      desc_idx++) {
970                         vq->vq_split.ring.avail->ring[desc_idx] = desc_idx;
971                         vq->vq_split.ring.desc[desc_idx].flags =
972                                 VRING_DESC_F_WRITE;
973                 }
974
975                 virtio_rxq_vec_setup(rxvq);
976         }
977
978         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
979         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
980              desc_idx++) {
981                 vq->sw_ring[vq->vq_nentries + desc_idx] =
982                         &rxvq->fake_mbuf;
983         }
984
985         if (hw->use_simple_rx) {
986                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
987                         virtio_rxq_rearm_vec(rxvq);
988                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
989                 }
990         } else if (hw->use_inorder_rx) {
991                 if ((!virtqueue_full(vq))) {
992                         uint16_t free_cnt = vq->vq_free_cnt;
993                         struct rte_mbuf *pkts[free_cnt];
994
995                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
996                                 free_cnt)) {
997                                 error = virtqueue_enqueue_refill_inorder(vq,
998                                                 pkts,
999                                                 free_cnt);
1000                                 if (unlikely(error)) {
1001                                         for (i = 0; i < free_cnt; i++)
1002                                                 rte_pktmbuf_free(pkts[i]);
1003                                 }
1004                         }
1005
1006                         nbufs += free_cnt;
1007                         vq_update_avail_idx(vq);
1008                 }
1009         } else {
1010                 while (!virtqueue_full(vq)) {
1011                         m = rte_mbuf_raw_alloc(rxvq->mpool);
1012                         if (m == NULL)
1013                                 break;
1014
1015                         /* Enqueue allocated buffers */
1016                         if (vtpci_packed_queue(vq->hw))
1017                                 error = virtqueue_enqueue_recv_refill_packed(vq,
1018                                                 &m, 1);
1019                         else
1020                                 error = virtqueue_enqueue_recv_refill(vq,
1021                                                 &m, 1);
1022                         if (error) {
1023                                 rte_pktmbuf_free(m);
1024                                 break;
1025                         }
1026                         nbufs++;
1027                 }
1028
1029                 if (!vtpci_packed_queue(vq->hw))
1030                         vq_update_avail_idx(vq);
1031         }
1032
1033         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
1034
1035         VIRTQUEUE_DUMP(vq);
1036
1037         return 0;
1038 }
1039
1040 /*
1041  * struct rte_eth_dev *dev: Used to update dev
1042  * uint16_t nb_desc: Defaults to values read from config space
1043  * unsigned int socket_id: Used to allocate memzone
1044  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
1045  * uint16_t queue_idx: Just used as an index in dev txq list
1046  */
1047 int
1048 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
1049                         uint16_t queue_idx,
1050                         uint16_t nb_desc,
1051                         unsigned int socket_id __rte_unused,
1052                         const struct rte_eth_txconf *tx_conf)
1053 {
1054         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1055         struct virtio_hw *hw = dev->data->dev_private;
1056         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1057         struct virtnet_tx *txvq;
1058         uint16_t tx_free_thresh;
1059
1060         PMD_INIT_FUNC_TRACE();
1061
1062         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
1063                 nb_desc = vq->vq_nentries;
1064         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
1065
1066         txvq = &vq->txq;
1067         txvq->queue_id = queue_idx;
1068
1069         tx_free_thresh = tx_conf->tx_free_thresh;
1070         if (tx_free_thresh == 0)
1071                 tx_free_thresh =
1072                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
1073
1074         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
1075                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
1076                         "number of TX entries minus 3 (%u)."
1077                         " (tx_free_thresh=%u port=%u queue=%u)\n",
1078                         vq->vq_nentries - 3,
1079                         tx_free_thresh, dev->data->port_id, queue_idx);
1080                 return -EINVAL;
1081         }
1082
1083         vq->vq_free_thresh = tx_free_thresh;
1084
1085         dev->data->tx_queues[queue_idx] = txvq;
1086         return 0;
1087 }
1088
1089 int
1090 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
1091                                 uint16_t queue_idx)
1092 {
1093         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1094         struct virtio_hw *hw = dev->data->dev_private;
1095         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1096
1097         PMD_INIT_FUNC_TRACE();
1098
1099         if (!vtpci_packed_queue(hw)) {
1100                 if (hw->use_inorder_tx)
1101                         vq->vq_split.ring.desc[vq->vq_nentries - 1].next = 0;
1102         }
1103
1104         VIRTQUEUE_DUMP(vq);
1105
1106         return 0;
1107 }
1108
1109 static inline void
1110 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1111 {
1112         int error;
1113         /*
1114          * Requeue the discarded mbuf. This should always be
1115          * successful since it was just dequeued.
1116          */
1117         if (vtpci_packed_queue(vq->hw))
1118                 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1119         else
1120                 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1121
1122         if (unlikely(error)) {
1123                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1124                 rte_pktmbuf_free(m);
1125         }
1126 }
1127
1128 static inline void
1129 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1130 {
1131         int error;
1132
1133         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1134         if (unlikely(error)) {
1135                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1136                 rte_pktmbuf_free(m);
1137         }
1138 }
1139
1140 /* Optionally fill offload information in structure */
1141 static inline int
1142 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1143 {
1144         struct rte_net_hdr_lens hdr_lens;
1145         uint32_t hdrlen, ptype;
1146         int l4_supported = 0;
1147
1148         /* nothing to do */
1149         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1150                 return 0;
1151
1152         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1153
1154         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1155         m->packet_type = ptype;
1156         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1157             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1158             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1159                 l4_supported = 1;
1160
1161         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1162                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1163                 if (hdr->csum_start <= hdrlen && l4_supported) {
1164                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1165                 } else {
1166                         /* Unknown proto or tunnel, do sw cksum. We can assume
1167                          * the cksum field is in the first segment since the
1168                          * buffers we provided to the host are large enough.
1169                          * In case of SCTP, this will be wrong since it's a CRC
1170                          * but there's nothing we can do.
1171                          */
1172                         uint16_t csum = 0, off;
1173
1174                         rte_raw_cksum_mbuf(m, hdr->csum_start,
1175                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1176                                 &csum);
1177                         if (likely(csum != 0xffff))
1178                                 csum = ~csum;
1179                         off = hdr->csum_offset + hdr->csum_start;
1180                         if (rte_pktmbuf_data_len(m) >= off + 1)
1181                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1182                                         off) = csum;
1183                 }
1184         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1185                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1186         }
1187
1188         /* GSO request, save required information in mbuf */
1189         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1190                 /* Check unsupported modes */
1191                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1192                     (hdr->gso_size == 0)) {
1193                         return -EINVAL;
1194                 }
1195
1196                 /* Update mss lengthes in mbuf */
1197                 m->tso_segsz = hdr->gso_size;
1198                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1199                         case VIRTIO_NET_HDR_GSO_TCPV4:
1200                         case VIRTIO_NET_HDR_GSO_TCPV6:
1201                                 m->ol_flags |= PKT_RX_LRO | \
1202                                         PKT_RX_L4_CKSUM_NONE;
1203                                 break;
1204                         default:
1205                                 return -EINVAL;
1206                 }
1207         }
1208
1209         return 0;
1210 }
1211
1212 #define VIRTIO_MBUF_BURST_SZ 64
1213 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1214 uint16_t
1215 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1216 {
1217         struct virtnet_rx *rxvq = rx_queue;
1218         struct virtqueue *vq = rxvq->vq;
1219         struct virtio_hw *hw = vq->hw;
1220         struct rte_mbuf *rxm;
1221         uint16_t nb_used, num, nb_rx;
1222         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1223         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1224         int error;
1225         uint32_t i, nb_enqueued;
1226         uint32_t hdr_size;
1227         struct virtio_net_hdr *hdr;
1228
1229         nb_rx = 0;
1230         if (unlikely(hw->started == 0))
1231                 return nb_rx;
1232
1233         nb_used = VIRTQUEUE_NUSED(vq);
1234
1235         virtio_rmb(hw->weak_barriers);
1236
1237         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1238         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1239                 num = VIRTIO_MBUF_BURST_SZ;
1240         if (likely(num > DESC_PER_CACHELINE))
1241                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1242
1243         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1244         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1245
1246         nb_enqueued = 0;
1247         hdr_size = hw->vtnet_hdr_size;
1248
1249         for (i = 0; i < num ; i++) {
1250                 rxm = rcv_pkts[i];
1251
1252                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1253
1254                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1255                         PMD_RX_LOG(ERR, "Packet drop");
1256                         nb_enqueued++;
1257                         virtio_discard_rxbuf(vq, rxm);
1258                         rxvq->stats.errors++;
1259                         continue;
1260                 }
1261
1262                 rxm->port = rxvq->port_id;
1263                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1264                 rxm->ol_flags = 0;
1265                 rxm->vlan_tci = 0;
1266
1267                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1268                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1269
1270                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1271                         RTE_PKTMBUF_HEADROOM - hdr_size);
1272
1273                 if (hw->vlan_strip)
1274                         rte_vlan_strip(rxm);
1275
1276                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1277                         virtio_discard_rxbuf(vq, rxm);
1278                         rxvq->stats.errors++;
1279                         continue;
1280                 }
1281
1282                 virtio_rx_stats_updated(rxvq, rxm);
1283
1284                 rx_pkts[nb_rx++] = rxm;
1285         }
1286
1287         rxvq->stats.packets += nb_rx;
1288
1289         /* Allocate new mbuf for the used descriptor */
1290         if (likely(!virtqueue_full(vq))) {
1291                 uint16_t free_cnt = vq->vq_free_cnt;
1292                 struct rte_mbuf *new_pkts[free_cnt];
1293
1294                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1295                                                 free_cnt) == 0)) {
1296                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1297                                         free_cnt);
1298                         if (unlikely(error)) {
1299                                 for (i = 0; i < free_cnt; i++)
1300                                         rte_pktmbuf_free(new_pkts[i]);
1301                         }
1302                         nb_enqueued += free_cnt;
1303                 } else {
1304                         struct rte_eth_dev *dev =
1305                                 &rte_eth_devices[rxvq->port_id];
1306                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1307                 }
1308         }
1309
1310         if (likely(nb_enqueued)) {
1311                 vq_update_avail_idx(vq);
1312
1313                 if (unlikely(virtqueue_kick_prepare(vq))) {
1314                         virtqueue_notify(vq);
1315                         PMD_RX_LOG(DEBUG, "Notified");
1316                 }
1317         }
1318
1319         return nb_rx;
1320 }
1321
1322 uint16_t
1323 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1324                         uint16_t nb_pkts)
1325 {
1326         struct virtnet_rx *rxvq = rx_queue;
1327         struct virtqueue *vq = rxvq->vq;
1328         struct virtio_hw *hw = vq->hw;
1329         struct rte_mbuf *rxm;
1330         uint16_t num, nb_rx;
1331         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1332         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1333         int error;
1334         uint32_t i, nb_enqueued;
1335         uint32_t hdr_size;
1336         struct virtio_net_hdr *hdr;
1337
1338         nb_rx = 0;
1339         if (unlikely(hw->started == 0))
1340                 return nb_rx;
1341
1342         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1343         if (likely(num > DESC_PER_CACHELINE))
1344                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1345
1346         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1347         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1348
1349         nb_enqueued = 0;
1350         hdr_size = hw->vtnet_hdr_size;
1351
1352         for (i = 0; i < num; i++) {
1353                 rxm = rcv_pkts[i];
1354
1355                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1356
1357                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1358                         PMD_RX_LOG(ERR, "Packet drop");
1359                         nb_enqueued++;
1360                         virtio_discard_rxbuf(vq, rxm);
1361                         rxvq->stats.errors++;
1362                         continue;
1363                 }
1364
1365                 rxm->port = rxvq->port_id;
1366                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1367                 rxm->ol_flags = 0;
1368                 rxm->vlan_tci = 0;
1369
1370                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1371                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1372
1373                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1374                         RTE_PKTMBUF_HEADROOM - hdr_size);
1375
1376                 if (hw->vlan_strip)
1377                         rte_vlan_strip(rxm);
1378
1379                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1380                         virtio_discard_rxbuf(vq, rxm);
1381                         rxvq->stats.errors++;
1382                         continue;
1383                 }
1384
1385                 virtio_rx_stats_updated(rxvq, rxm);
1386
1387                 rx_pkts[nb_rx++] = rxm;
1388         }
1389
1390         rxvq->stats.packets += nb_rx;
1391
1392         /* Allocate new mbuf for the used descriptor */
1393         if (likely(!virtqueue_full(vq))) {
1394                 uint16_t free_cnt = vq->vq_free_cnt;
1395                 struct rte_mbuf *new_pkts[free_cnt];
1396
1397                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1398                                                 free_cnt) == 0)) {
1399                         error = virtqueue_enqueue_recv_refill_packed(vq,
1400                                         new_pkts, free_cnt);
1401                         if (unlikely(error)) {
1402                                 for (i = 0; i < free_cnt; i++)
1403                                         rte_pktmbuf_free(new_pkts[i]);
1404                         }
1405                         nb_enqueued += free_cnt;
1406                 } else {
1407                         struct rte_eth_dev *dev =
1408                                 &rte_eth_devices[rxvq->port_id];
1409                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1410                 }
1411         }
1412
1413         if (likely(nb_enqueued)) {
1414                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1415                         virtqueue_notify(vq);
1416                         PMD_RX_LOG(DEBUG, "Notified");
1417                 }
1418         }
1419
1420         return nb_rx;
1421 }
1422
1423
1424 uint16_t
1425 virtio_recv_pkts_inorder(void *rx_queue,
1426                         struct rte_mbuf **rx_pkts,
1427                         uint16_t nb_pkts)
1428 {
1429         struct virtnet_rx *rxvq = rx_queue;
1430         struct virtqueue *vq = rxvq->vq;
1431         struct virtio_hw *hw = vq->hw;
1432         struct rte_mbuf *rxm;
1433         struct rte_mbuf *prev = NULL;
1434         uint16_t nb_used, num, nb_rx;
1435         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1436         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1437         int error;
1438         uint32_t nb_enqueued;
1439         uint32_t seg_num;
1440         uint32_t seg_res;
1441         uint32_t hdr_size;
1442         int32_t i;
1443
1444         nb_rx = 0;
1445         if (unlikely(hw->started == 0))
1446                 return nb_rx;
1447
1448         nb_used = VIRTQUEUE_NUSED(vq);
1449         nb_used = RTE_MIN(nb_used, nb_pkts);
1450         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1451
1452         virtio_rmb(hw->weak_barriers);
1453
1454         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1455
1456         nb_enqueued = 0;
1457         seg_num = 1;
1458         seg_res = 0;
1459         hdr_size = hw->vtnet_hdr_size;
1460
1461         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1462
1463         for (i = 0; i < num; i++) {
1464                 struct virtio_net_hdr_mrg_rxbuf *header;
1465
1466                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1467                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1468
1469                 rxm = rcv_pkts[i];
1470
1471                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1472                         PMD_RX_LOG(ERR, "Packet drop");
1473                         nb_enqueued++;
1474                         virtio_discard_rxbuf_inorder(vq, rxm);
1475                         rxvq->stats.errors++;
1476                         continue;
1477                 }
1478
1479                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1480                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1481                          - hdr_size);
1482
1483                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1484                         seg_num = header->num_buffers;
1485                         if (seg_num == 0)
1486                                 seg_num = 1;
1487                 } else {
1488                         seg_num = 1;
1489                 }
1490
1491                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1492                 rxm->nb_segs = seg_num;
1493                 rxm->ol_flags = 0;
1494                 rxm->vlan_tci = 0;
1495                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1496                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1497
1498                 rxm->port = rxvq->port_id;
1499
1500                 rx_pkts[nb_rx] = rxm;
1501                 prev = rxm;
1502
1503                 if (vq->hw->has_rx_offload &&
1504                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1505                         virtio_discard_rxbuf_inorder(vq, rxm);
1506                         rxvq->stats.errors++;
1507                         continue;
1508                 }
1509
1510                 if (hw->vlan_strip)
1511                         rte_vlan_strip(rx_pkts[nb_rx]);
1512
1513                 seg_res = seg_num - 1;
1514
1515                 /* Merge remaining segments */
1516                 while (seg_res != 0 && i < (num - 1)) {
1517                         i++;
1518
1519                         rxm = rcv_pkts[i];
1520                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1521                         rxm->pkt_len = (uint32_t)(len[i]);
1522                         rxm->data_len = (uint16_t)(len[i]);
1523
1524                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1525
1526                         prev->next = rxm;
1527                         prev = rxm;
1528                         seg_res -= 1;
1529                 }
1530
1531                 if (!seg_res) {
1532                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1533                         nb_rx++;
1534                 }
1535         }
1536
1537         /* Last packet still need merge segments */
1538         while (seg_res != 0) {
1539                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1540                                         VIRTIO_MBUF_BURST_SZ);
1541
1542                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1543                         virtio_rmb(hw->weak_barriers);
1544                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1545                                                            rcv_cnt);
1546                         uint16_t extra_idx = 0;
1547
1548                         rcv_cnt = num;
1549                         while (extra_idx < rcv_cnt) {
1550                                 rxm = rcv_pkts[extra_idx];
1551                                 rxm->data_off =
1552                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1553                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1554                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1555                                 prev->next = rxm;
1556                                 prev = rxm;
1557                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1558                                 extra_idx += 1;
1559                         };
1560                         seg_res -= rcv_cnt;
1561
1562                         if (!seg_res) {
1563                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1564                                 nb_rx++;
1565                         }
1566                 } else {
1567                         PMD_RX_LOG(ERR,
1568                                         "No enough segments for packet.");
1569                         rte_pktmbuf_free(rx_pkts[nb_rx]);
1570                         rxvq->stats.errors++;
1571                         break;
1572                 }
1573         }
1574
1575         rxvq->stats.packets += nb_rx;
1576
1577         /* Allocate new mbuf for the used descriptor */
1578
1579         if (likely(!virtqueue_full(vq))) {
1580                 /* free_cnt may include mrg descs */
1581                 uint16_t free_cnt = vq->vq_free_cnt;
1582                 struct rte_mbuf *new_pkts[free_cnt];
1583
1584                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1585                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1586                                         free_cnt);
1587                         if (unlikely(error)) {
1588                                 for (i = 0; i < free_cnt; i++)
1589                                         rte_pktmbuf_free(new_pkts[i]);
1590                         }
1591                         nb_enqueued += free_cnt;
1592                 } else {
1593                         struct rte_eth_dev *dev =
1594                                 &rte_eth_devices[rxvq->port_id];
1595                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1596                 }
1597         }
1598
1599         if (likely(nb_enqueued)) {
1600                 vq_update_avail_idx(vq);
1601
1602                 if (unlikely(virtqueue_kick_prepare(vq))) {
1603                         virtqueue_notify(vq);
1604                         PMD_RX_LOG(DEBUG, "Notified");
1605                 }
1606         }
1607
1608         return nb_rx;
1609 }
1610
1611 uint16_t
1612 virtio_recv_mergeable_pkts(void *rx_queue,
1613                         struct rte_mbuf **rx_pkts,
1614                         uint16_t nb_pkts)
1615 {
1616         struct virtnet_rx *rxvq = rx_queue;
1617         struct virtqueue *vq = rxvq->vq;
1618         struct virtio_hw *hw = vq->hw;
1619         struct rte_mbuf *rxm;
1620         struct rte_mbuf *prev = NULL;
1621         uint16_t nb_used, num, nb_rx = 0;
1622         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1623         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1624         int error;
1625         uint32_t nb_enqueued = 0;
1626         uint32_t seg_num = 0;
1627         uint32_t seg_res = 0;
1628         uint32_t hdr_size = hw->vtnet_hdr_size;
1629         int32_t i;
1630
1631         if (unlikely(hw->started == 0))
1632                 return nb_rx;
1633
1634         nb_used = VIRTQUEUE_NUSED(vq);
1635
1636         virtio_rmb(hw->weak_barriers);
1637
1638         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1639
1640         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1641         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1642                 num = VIRTIO_MBUF_BURST_SZ;
1643         if (likely(num > DESC_PER_CACHELINE))
1644                 num = num - ((vq->vq_used_cons_idx + num) %
1645                                 DESC_PER_CACHELINE);
1646
1647
1648         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1649
1650         for (i = 0; i < num; i++) {
1651                 struct virtio_net_hdr_mrg_rxbuf *header;
1652
1653                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1654                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1655
1656                 rxm = rcv_pkts[i];
1657
1658                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1659                         PMD_RX_LOG(ERR, "Packet drop");
1660                         nb_enqueued++;
1661                         virtio_discard_rxbuf(vq, rxm);
1662                         rxvq->stats.errors++;
1663                         continue;
1664                 }
1665
1666                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1667                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1668                          - hdr_size);
1669                 seg_num = header->num_buffers;
1670                 if (seg_num == 0)
1671                         seg_num = 1;
1672
1673                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1674                 rxm->nb_segs = seg_num;
1675                 rxm->ol_flags = 0;
1676                 rxm->vlan_tci = 0;
1677                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1678                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1679
1680                 rxm->port = rxvq->port_id;
1681
1682                 rx_pkts[nb_rx] = rxm;
1683                 prev = rxm;
1684
1685                 if (hw->has_rx_offload &&
1686                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1687                         virtio_discard_rxbuf(vq, rxm);
1688                         rxvq->stats.errors++;
1689                         continue;
1690                 }
1691
1692                 if (hw->vlan_strip)
1693                         rte_vlan_strip(rx_pkts[nb_rx]);
1694
1695                 seg_res = seg_num - 1;
1696
1697                 /* Merge remaining segments */
1698                 while (seg_res != 0 && i < (num - 1)) {
1699                         i++;
1700
1701                         rxm = rcv_pkts[i];
1702                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1703                         rxm->pkt_len = (uint32_t)(len[i]);
1704                         rxm->data_len = (uint16_t)(len[i]);
1705
1706                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1707
1708                         prev->next = rxm;
1709                         prev = rxm;
1710                         seg_res -= 1;
1711                 }
1712
1713                 if (!seg_res) {
1714                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1715                         nb_rx++;
1716                 }
1717         }
1718
1719         /* Last packet still need merge segments */
1720         while (seg_res != 0) {
1721                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1722                                         VIRTIO_MBUF_BURST_SZ);
1723
1724                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1725                         virtio_rmb(hw->weak_barriers);
1726                         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1727                                                            rcv_cnt);
1728                         uint16_t extra_idx = 0;
1729
1730                         rcv_cnt = num;
1731                         while (extra_idx < rcv_cnt) {
1732                                 rxm = rcv_pkts[extra_idx];
1733                                 rxm->data_off =
1734                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1735                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1736                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1737                                 prev->next = rxm;
1738                                 prev = rxm;
1739                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1740                                 extra_idx += 1;
1741                         };
1742                         seg_res -= rcv_cnt;
1743
1744                         if (!seg_res) {
1745                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1746                                 nb_rx++;
1747                         }
1748                 } else {
1749                         PMD_RX_LOG(ERR,
1750                                         "No enough segments for packet.");
1751                         rte_pktmbuf_free(rx_pkts[nb_rx]);
1752                         rxvq->stats.errors++;
1753                         break;
1754                 }
1755         }
1756
1757         rxvq->stats.packets += nb_rx;
1758
1759         /* Allocate new mbuf for the used descriptor */
1760         if (likely(!virtqueue_full(vq))) {
1761                 /* free_cnt may include mrg descs */
1762                 uint16_t free_cnt = vq->vq_free_cnt;
1763                 struct rte_mbuf *new_pkts[free_cnt];
1764
1765                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1766                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1767                                         free_cnt);
1768                         if (unlikely(error)) {
1769                                 for (i = 0; i < free_cnt; i++)
1770                                         rte_pktmbuf_free(new_pkts[i]);
1771                         }
1772                         nb_enqueued += free_cnt;
1773                 } else {
1774                         struct rte_eth_dev *dev =
1775                                 &rte_eth_devices[rxvq->port_id];
1776                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1777                 }
1778         }
1779
1780         if (likely(nb_enqueued)) {
1781                 vq_update_avail_idx(vq);
1782
1783                 if (unlikely(virtqueue_kick_prepare(vq))) {
1784                         virtqueue_notify(vq);
1785                         PMD_RX_LOG(DEBUG, "Notified");
1786                 }
1787         }
1788
1789         return nb_rx;
1790 }
1791
1792 uint16_t
1793 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1794                         struct rte_mbuf **rx_pkts,
1795                         uint16_t nb_pkts)
1796 {
1797         struct virtnet_rx *rxvq = rx_queue;
1798         struct virtqueue *vq = rxvq->vq;
1799         struct virtio_hw *hw = vq->hw;
1800         struct rte_mbuf *rxm;
1801         struct rte_mbuf *prev = NULL;
1802         uint16_t num, nb_rx = 0;
1803         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1804         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1805         uint32_t nb_enqueued = 0;
1806         uint32_t seg_num = 0;
1807         uint32_t seg_res = 0;
1808         uint32_t hdr_size = hw->vtnet_hdr_size;
1809         int32_t i;
1810         int error;
1811
1812         if (unlikely(hw->started == 0))
1813                 return nb_rx;
1814
1815
1816         num = nb_pkts;
1817         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1818                 num = VIRTIO_MBUF_BURST_SZ;
1819         if (likely(num > DESC_PER_CACHELINE))
1820                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1821
1822         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1823
1824         for (i = 0; i < num; i++) {
1825                 struct virtio_net_hdr_mrg_rxbuf *header;
1826
1827                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1828                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1829
1830                 rxm = rcv_pkts[i];
1831
1832                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1833                         PMD_RX_LOG(ERR, "Packet drop");
1834                         nb_enqueued++;
1835                         virtio_discard_rxbuf(vq, rxm);
1836                         rxvq->stats.errors++;
1837                         continue;
1838                 }
1839
1840                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1841                           rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1842                 seg_num = header->num_buffers;
1843
1844                 if (seg_num == 0)
1845                         seg_num = 1;
1846
1847                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1848                 rxm->nb_segs = seg_num;
1849                 rxm->ol_flags = 0;
1850                 rxm->vlan_tci = 0;
1851                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1852                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1853
1854                 rxm->port = rxvq->port_id;
1855                 rx_pkts[nb_rx] = rxm;
1856                 prev = rxm;
1857
1858                 if (hw->has_rx_offload &&
1859                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1860                         virtio_discard_rxbuf(vq, rxm);
1861                         rxvq->stats.errors++;
1862                         continue;
1863                 }
1864
1865                 if (hw->vlan_strip)
1866                         rte_vlan_strip(rx_pkts[nb_rx]);
1867
1868                 seg_res = seg_num - 1;
1869
1870                 /* Merge remaining segments */
1871                 while (seg_res != 0 && i < (num - 1)) {
1872                         i++;
1873
1874                         rxm = rcv_pkts[i];
1875                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1876                         rxm->pkt_len = (uint32_t)(len[i]);
1877                         rxm->data_len = (uint16_t)(len[i]);
1878
1879                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1880
1881                         prev->next = rxm;
1882                         prev = rxm;
1883                         seg_res -= 1;
1884                 }
1885
1886                 if (!seg_res) {
1887                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1888                         nb_rx++;
1889                 }
1890         }
1891
1892         /* Last packet still need merge segments */
1893         while (seg_res != 0) {
1894                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1895                                         VIRTIO_MBUF_BURST_SZ);
1896                 uint16_t extra_idx = 0;
1897
1898                 rcv_cnt = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1899                                 len, rcv_cnt);
1900                 if (unlikely(rcv_cnt == 0)) {
1901                         PMD_RX_LOG(ERR, "No enough segments for packet.");
1902                         rte_pktmbuf_free(rx_pkts[nb_rx]);
1903                         rxvq->stats.errors++;
1904                         break;
1905                 }
1906
1907                 while (extra_idx < rcv_cnt) {
1908                         rxm = rcv_pkts[extra_idx];
1909
1910                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1911                         rxm->pkt_len = (uint32_t)(len[extra_idx]);
1912                         rxm->data_len = (uint16_t)(len[extra_idx]);
1913
1914                         prev->next = rxm;
1915                         prev = rxm;
1916                         rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1917                         extra_idx += 1;
1918                 }
1919                 seg_res -= rcv_cnt;
1920                 if (!seg_res) {
1921                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1922                         nb_rx++;
1923                 }
1924         }
1925
1926         rxvq->stats.packets += nb_rx;
1927
1928         /* Allocate new mbuf for the used descriptor */
1929         if (likely(!virtqueue_full(vq))) {
1930                 /* free_cnt may include mrg descs */
1931                 uint16_t free_cnt = vq->vq_free_cnt;
1932                 struct rte_mbuf *new_pkts[free_cnt];
1933
1934                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1935                         error = virtqueue_enqueue_recv_refill_packed(vq,
1936                                         new_pkts, free_cnt);
1937                         if (unlikely(error)) {
1938                                 for (i = 0; i < free_cnt; i++)
1939                                         rte_pktmbuf_free(new_pkts[i]);
1940                         }
1941                         nb_enqueued += free_cnt;
1942                 } else {
1943                         struct rte_eth_dev *dev =
1944                                 &rte_eth_devices[rxvq->port_id];
1945                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1946                 }
1947         }
1948
1949         if (likely(nb_enqueued)) {
1950                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1951                         virtqueue_notify(vq);
1952                         PMD_RX_LOG(DEBUG, "Notified");
1953                 }
1954         }
1955
1956         return nb_rx;
1957 }
1958
1959 uint16_t
1960 virtio_xmit_pkts_prepare(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts,
1961                         uint16_t nb_pkts)
1962 {
1963         uint16_t nb_tx;
1964         int error;
1965
1966         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1967                 struct rte_mbuf *m = tx_pkts[nb_tx];
1968
1969 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1970                 error = rte_validate_tx_offload(m);
1971                 if (unlikely(error)) {
1972                         rte_errno = -error;
1973                         break;
1974                 }
1975 #endif
1976
1977                 /* Do VLAN tag insertion */
1978                 if (unlikely(m->ol_flags & PKT_TX_VLAN_PKT)) {
1979                         error = rte_vlan_insert(&m);
1980                         /* rte_vlan_insert() may change pointer
1981                          * even in the case of failure
1982                          */
1983                         tx_pkts[nb_tx] = m;
1984
1985                         if (unlikely(error)) {
1986                                 rte_errno = -error;
1987                                 break;
1988                         }
1989                 }
1990
1991                 error = rte_net_intel_cksum_prepare(m);
1992                 if (unlikely(error)) {
1993                         rte_errno = -error;
1994                         break;
1995                 }
1996
1997                 if (m->ol_flags & PKT_TX_TCP_SEG)
1998                         virtio_tso_fix_cksum(m);
1999         }
2000
2001         return nb_tx;
2002 }
2003
2004 uint16_t
2005 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
2006                         uint16_t nb_pkts)
2007 {
2008         struct virtnet_tx *txvq = tx_queue;
2009         struct virtqueue *vq = txvq->vq;
2010         struct virtio_hw *hw = vq->hw;
2011         uint16_t hdr_size = hw->vtnet_hdr_size;
2012         uint16_t nb_tx = 0;
2013         bool in_order = hw->use_inorder_tx;
2014
2015         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2016                 return nb_tx;
2017
2018         if (unlikely(nb_pkts < 1))
2019                 return nb_pkts;
2020
2021         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2022
2023         if (nb_pkts > vq->vq_free_cnt)
2024                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
2025                                            in_order);
2026
2027         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2028                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2029                 int can_push = 0, slots, need;
2030
2031                 /* optimize ring usage */
2032                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2033                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2034                     rte_mbuf_refcnt_read(txm) == 1 &&
2035                     RTE_MBUF_DIRECT(txm) &&
2036                     txm->nb_segs == 1 &&
2037                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2038                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2039                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2040                         can_push = 1;
2041
2042                 /* How many main ring entries are needed to this Tx?
2043                  * any_layout => number of segments
2044                  * default    => number of segments + 1
2045                  */
2046                 slots = txm->nb_segs + !can_push;
2047                 need = slots - vq->vq_free_cnt;
2048
2049                 /* Positive value indicates it need free vring descriptors */
2050                 if (unlikely(need > 0)) {
2051                         virtio_xmit_cleanup_packed(vq, need, in_order);
2052                         need = slots - vq->vq_free_cnt;
2053                         if (unlikely(need > 0)) {
2054                                 PMD_TX_LOG(ERR,
2055                                            "No free tx descriptors to transmit");
2056                                 break;
2057                         }
2058                 }
2059
2060                 /* Enqueue Packet buffers */
2061                 if (can_push)
2062                         virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
2063                 else
2064                         virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
2065                                                       in_order);
2066
2067                 virtio_update_packet_stats(&txvq->stats, txm);
2068         }
2069
2070         txvq->stats.packets += nb_tx;
2071
2072         if (likely(nb_tx)) {
2073                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
2074                         virtqueue_notify(vq);
2075                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2076                 }
2077         }
2078
2079         return nb_tx;
2080 }
2081
2082 uint16_t
2083 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2084 {
2085         struct virtnet_tx *txvq = tx_queue;
2086         struct virtqueue *vq = txvq->vq;
2087         struct virtio_hw *hw = vq->hw;
2088         uint16_t hdr_size = hw->vtnet_hdr_size;
2089         uint16_t nb_used, nb_tx = 0;
2090
2091         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2092                 return nb_tx;
2093
2094         if (unlikely(nb_pkts < 1))
2095                 return nb_pkts;
2096
2097         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2098         nb_used = VIRTQUEUE_NUSED(vq);
2099
2100         virtio_rmb(hw->weak_barriers);
2101         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2102                 virtio_xmit_cleanup(vq, nb_used);
2103
2104         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2105                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2106                 int can_push = 0, use_indirect = 0, slots, need;
2107
2108                 /* optimize ring usage */
2109                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2110                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2111                     rte_mbuf_refcnt_read(txm) == 1 &&
2112                     RTE_MBUF_DIRECT(txm) &&
2113                     txm->nb_segs == 1 &&
2114                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2115                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2116                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2117                         can_push = 1;
2118                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2119                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2120                         use_indirect = 1;
2121
2122                 /* How many main ring entries are needed to this Tx?
2123                  * any_layout => number of segments
2124                  * indirect   => 1
2125                  * default    => number of segments + 1
2126                  */
2127                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2128                 need = slots - vq->vq_free_cnt;
2129
2130                 /* Positive value indicates it need free vring descriptors */
2131                 if (unlikely(need > 0)) {
2132                         nb_used = VIRTQUEUE_NUSED(vq);
2133                         virtio_rmb(hw->weak_barriers);
2134                         need = RTE_MIN(need, (int)nb_used);
2135
2136                         virtio_xmit_cleanup(vq, need);
2137                         need = slots - vq->vq_free_cnt;
2138                         if (unlikely(need > 0)) {
2139                                 PMD_TX_LOG(ERR,
2140                                            "No free tx descriptors to transmit");
2141                                 break;
2142                         }
2143                 }
2144
2145                 /* Enqueue Packet buffers */
2146                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2147                         can_push, 0);
2148
2149                 virtio_update_packet_stats(&txvq->stats, txm);
2150         }
2151
2152         txvq->stats.packets += nb_tx;
2153
2154         if (likely(nb_tx)) {
2155                 vq_update_avail_idx(vq);
2156
2157                 if (unlikely(virtqueue_kick_prepare(vq))) {
2158                         virtqueue_notify(vq);
2159                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2160                 }
2161         }
2162
2163         return nb_tx;
2164 }
2165
2166 static __rte_always_inline int
2167 virtio_xmit_try_cleanup_inorder(struct virtqueue *vq, uint16_t need)
2168 {
2169         uint16_t nb_used, nb_clean, nb_descs;
2170         struct virtio_hw *hw = vq->hw;
2171
2172         nb_descs = vq->vq_free_cnt + need;
2173         nb_used = VIRTQUEUE_NUSED(vq);
2174         virtio_rmb(hw->weak_barriers);
2175         nb_clean = RTE_MIN(need, (int)nb_used);
2176
2177         virtio_xmit_cleanup_inorder(vq, nb_clean);
2178
2179         return nb_descs - vq->vq_free_cnt;
2180 }
2181
2182 uint16_t
2183 virtio_xmit_pkts_inorder(void *tx_queue,
2184                         struct rte_mbuf **tx_pkts,
2185                         uint16_t nb_pkts)
2186 {
2187         struct virtnet_tx *txvq = tx_queue;
2188         struct virtqueue *vq = txvq->vq;
2189         struct virtio_hw *hw = vq->hw;
2190         uint16_t hdr_size = hw->vtnet_hdr_size;
2191         uint16_t nb_used, nb_tx = 0, nb_inorder_pkts = 0;
2192         struct rte_mbuf *inorder_pkts[nb_pkts];
2193         int need;
2194
2195         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2196                 return nb_tx;
2197
2198         if (unlikely(nb_pkts < 1))
2199                 return nb_pkts;
2200
2201         VIRTQUEUE_DUMP(vq);
2202         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2203         nb_used = VIRTQUEUE_NUSED(vq);
2204
2205         virtio_rmb(hw->weak_barriers);
2206         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2207                 virtio_xmit_cleanup_inorder(vq, nb_used);
2208
2209         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2210                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2211                 int slots;
2212
2213                 /* optimize ring usage */
2214                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2215                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2216                      rte_mbuf_refcnt_read(txm) == 1 &&
2217                      RTE_MBUF_DIRECT(txm) &&
2218                      txm->nb_segs == 1 &&
2219                      rte_pktmbuf_headroom(txm) >= hdr_size &&
2220                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2221                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2222                         inorder_pkts[nb_inorder_pkts] = txm;
2223                         nb_inorder_pkts++;
2224
2225                         continue;
2226                 }
2227
2228                 if (nb_inorder_pkts) {
2229                         need = nb_inorder_pkts - vq->vq_free_cnt;
2230                         if (unlikely(need > 0)) {
2231                                 need = virtio_xmit_try_cleanup_inorder(vq,
2232                                                                        need);
2233                                 if (unlikely(need > 0)) {
2234                                         PMD_TX_LOG(ERR,
2235                                                 "No free tx descriptors to "
2236                                                 "transmit");
2237                                         break;
2238                                 }
2239                         }
2240                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2241                                                         nb_inorder_pkts);
2242                         nb_inorder_pkts = 0;
2243                 }
2244
2245                 slots = txm->nb_segs + 1;
2246                 need = slots - vq->vq_free_cnt;
2247                 if (unlikely(need > 0)) {
2248                         need = virtio_xmit_try_cleanup_inorder(vq, slots);
2249
2250                         if (unlikely(need > 0)) {
2251                                 PMD_TX_LOG(ERR,
2252                                         "No free tx descriptors to transmit");
2253                                 break;
2254                         }
2255                 }
2256                 /* Enqueue Packet buffers */
2257                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2258
2259                 virtio_update_packet_stats(&txvq->stats, txm);
2260         }
2261
2262         /* Transmit all inorder packets */
2263         if (nb_inorder_pkts) {
2264                 need = nb_inorder_pkts - vq->vq_free_cnt;
2265                 if (unlikely(need > 0)) {
2266                         need = virtio_xmit_try_cleanup_inorder(vq,
2267                                                                   need);
2268                         if (unlikely(need > 0)) {
2269                                 PMD_TX_LOG(ERR,
2270                                         "No free tx descriptors to transmit");
2271                                 nb_inorder_pkts = vq->vq_free_cnt;
2272                                 nb_tx -= need;
2273                         }
2274                 }
2275
2276                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2277                                                 nb_inorder_pkts);
2278         }
2279
2280         txvq->stats.packets += nb_tx;
2281
2282         if (likely(nb_tx)) {
2283                 vq_update_avail_idx(vq);
2284
2285                 if (unlikely(virtqueue_kick_prepare(vq))) {
2286                         virtqueue_notify(vq);
2287                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2288                 }
2289         }
2290
2291         VIRTQUEUE_DUMP(vq);
2292
2293         return nb_tx;
2294 }