net/virtio: improve perf via one-way barrier on avail flag
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45         struct virtnet_rx *rxvq = rxq;
46         struct virtqueue *vq = rxvq->vq;
47
48         return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54         vq->vq_free_cnt += num;
55         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61         struct vring_desc *dp, *dp_tail;
62         struct vq_desc_extra *dxp;
63         uint16_t desc_idx_last = desc_idx;
64
65         dp  = &vq->vq_split.ring.desc[desc_idx];
66         dxp = &vq->vq_descx[desc_idx];
67         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69                 while (dp->flags & VRING_DESC_F_NEXT) {
70                         desc_idx_last = dp->next;
71                         dp = &vq->vq_split.ring.desc[dp->next];
72                 }
73         }
74         dxp->ndescs = 0;
75
76         /*
77          * We must append the existing free chain, if any, to the end of
78          * newly freed chain. If the virtqueue was completely used, then
79          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80          */
81         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82                 vq->vq_desc_head_idx = desc_idx;
83         } else {
84                 dp_tail = &vq->vq_split.ring.desc[vq->vq_desc_tail_idx];
85                 dp_tail->next = desc_idx;
86         }
87
88         vq->vq_desc_tail_idx = desc_idx_last;
89         dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95         struct vq_desc_extra *dxp;
96
97         dxp = &vq->vq_descx[id];
98         vq->vq_free_cnt += dxp->ndescs;
99
100         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101                 vq->vq_desc_head_idx = id;
102         else
103                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104
105         vq->vq_desc_tail_idx = id;
106         dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108
109 void
110 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
111 {
112         uint32_t s = mbuf->pkt_len;
113         struct rte_ether_addr *ea;
114
115         stats->bytes += s;
116
117         if (s == 64) {
118                 stats->size_bins[1]++;
119         } else if (s > 64 && s < 1024) {
120                 uint32_t bin;
121
122                 /* count zeros, and offset into correct bin */
123                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
124                 stats->size_bins[bin]++;
125         } else {
126                 if (s < 64)
127                         stats->size_bins[0]++;
128                 else if (s < 1519)
129                         stats->size_bins[6]++;
130                 else
131                         stats->size_bins[7]++;
132         }
133
134         ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
135         if (rte_is_multicast_ether_addr(ea)) {
136                 if (rte_is_broadcast_ether_addr(ea))
137                         stats->broadcast++;
138                 else
139                         stats->multicast++;
140         }
141 }
142
143 static inline void
144 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
145 {
146         VIRTIO_DUMP_PACKET(m, m->data_len);
147
148         virtio_update_packet_stats(&rxvq->stats, m);
149 }
150
151 static uint16_t
152 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
153                                   struct rte_mbuf **rx_pkts,
154                                   uint32_t *len,
155                                   uint16_t num)
156 {
157         struct rte_mbuf *cookie;
158         uint16_t used_idx;
159         uint16_t id;
160         struct vring_packed_desc *desc;
161         uint16_t i;
162
163         desc = vq->vq_packed.ring.desc;
164
165         for (i = 0; i < num; i++) {
166                 used_idx = vq->vq_used_cons_idx;
167                 if (!desc_is_used(&desc[used_idx], vq))
168                         return i;
169                 virtio_rmb(vq->hw->weak_barriers);
170                 len[i] = desc[used_idx].len;
171                 id = desc[used_idx].id;
172                 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
173                 if (unlikely(cookie == NULL)) {
174                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
175                                 vq->vq_used_cons_idx);
176                         break;
177                 }
178                 rte_prefetch0(cookie);
179                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
180                 rx_pkts[i] = cookie;
181
182                 vq->vq_free_cnt++;
183                 vq->vq_used_cons_idx++;
184                 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
185                         vq->vq_used_cons_idx -= vq->vq_nentries;
186                         vq->vq_packed.used_wrap_counter ^= 1;
187                 }
188         }
189
190         return i;
191 }
192
193 static uint16_t
194 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
195                            uint32_t *len, uint16_t num)
196 {
197         struct vring_used_elem *uep;
198         struct rte_mbuf *cookie;
199         uint16_t used_idx, desc_idx;
200         uint16_t i;
201
202         /*  Caller does the check */
203         for (i = 0; i < num ; i++) {
204                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
205                 uep = &vq->vq_split.ring.used->ring[used_idx];
206                 desc_idx = (uint16_t) uep->id;
207                 len[i] = uep->len;
208                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
209
210                 if (unlikely(cookie == NULL)) {
211                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
212                                 vq->vq_used_cons_idx);
213                         break;
214                 }
215
216                 rte_prefetch0(cookie);
217                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
218                 rx_pkts[i]  = cookie;
219                 vq->vq_used_cons_idx++;
220                 vq_ring_free_chain(vq, desc_idx);
221                 vq->vq_descx[desc_idx].cookie = NULL;
222         }
223
224         return i;
225 }
226
227 static uint16_t
228 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
229                         struct rte_mbuf **rx_pkts,
230                         uint32_t *len,
231                         uint16_t num)
232 {
233         struct vring_used_elem *uep;
234         struct rte_mbuf *cookie;
235         uint16_t used_idx = 0;
236         uint16_t i;
237
238         if (unlikely(num == 0))
239                 return 0;
240
241         for (i = 0; i < num; i++) {
242                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
243                 /* Desc idx same as used idx */
244                 uep = &vq->vq_split.ring.used->ring[used_idx];
245                 len[i] = uep->len;
246                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
247
248                 if (unlikely(cookie == NULL)) {
249                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
250                                 vq->vq_used_cons_idx);
251                         break;
252                 }
253
254                 rte_prefetch0(cookie);
255                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
256                 rx_pkts[i]  = cookie;
257                 vq->vq_used_cons_idx++;
258                 vq->vq_descx[used_idx].cookie = NULL;
259         }
260
261         vq_ring_free_inorder(vq, used_idx, i);
262         return i;
263 }
264
265 #ifndef DEFAULT_TX_FREE_THRESH
266 #define DEFAULT_TX_FREE_THRESH 32
267 #endif
268
269 static void
270 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
271 {
272         uint16_t used_idx, id, curr_id, free_cnt = 0;
273         uint16_t size = vq->vq_nentries;
274         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
275         struct vq_desc_extra *dxp;
276
277         used_idx = vq->vq_used_cons_idx;
278         while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
279                 virtio_rmb(vq->hw->weak_barriers);
280                 id = desc[used_idx].id;
281                 do {
282                         curr_id = used_idx;
283                         dxp = &vq->vq_descx[used_idx];
284                         used_idx += dxp->ndescs;
285                         free_cnt += dxp->ndescs;
286                         num -= dxp->ndescs;
287                         if (used_idx >= size) {
288                                 used_idx -= size;
289                                 vq->vq_packed.used_wrap_counter ^= 1;
290                         }
291                         if (dxp->cookie != NULL) {
292                                 rte_pktmbuf_free(dxp->cookie);
293                                 dxp->cookie = NULL;
294                         }
295                 } while (curr_id != id);
296         }
297         vq->vq_used_cons_idx = used_idx;
298         vq->vq_free_cnt += free_cnt;
299 }
300
301 static void
302 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
303 {
304         uint16_t used_idx, id;
305         uint16_t size = vq->vq_nentries;
306         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
307         struct vq_desc_extra *dxp;
308
309         used_idx = vq->vq_used_cons_idx;
310         while (num-- && desc_is_used(&desc[used_idx], vq)) {
311                 virtio_rmb(vq->hw->weak_barriers);
312                 id = desc[used_idx].id;
313                 dxp = &vq->vq_descx[id];
314                 vq->vq_used_cons_idx += dxp->ndescs;
315                 if (vq->vq_used_cons_idx >= size) {
316                         vq->vq_used_cons_idx -= size;
317                         vq->vq_packed.used_wrap_counter ^= 1;
318                 }
319                 vq_ring_free_id_packed(vq, id);
320                 if (dxp->cookie != NULL) {
321                         rte_pktmbuf_free(dxp->cookie);
322                         dxp->cookie = NULL;
323                 }
324                 used_idx = vq->vq_used_cons_idx;
325         }
326 }
327
328 /* Cleanup from completed transmits. */
329 static inline void
330 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
331 {
332         if (in_order)
333                 virtio_xmit_cleanup_inorder_packed(vq, num);
334         else
335                 virtio_xmit_cleanup_normal_packed(vq, num);
336 }
337
338 static void
339 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
340 {
341         uint16_t i, used_idx, desc_idx;
342         for (i = 0; i < num; i++) {
343                 struct vring_used_elem *uep;
344                 struct vq_desc_extra *dxp;
345
346                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
347                 uep = &vq->vq_split.ring.used->ring[used_idx];
348
349                 desc_idx = (uint16_t) uep->id;
350                 dxp = &vq->vq_descx[desc_idx];
351                 vq->vq_used_cons_idx++;
352                 vq_ring_free_chain(vq, desc_idx);
353
354                 if (dxp->cookie != NULL) {
355                         rte_pktmbuf_free(dxp->cookie);
356                         dxp->cookie = NULL;
357                 }
358         }
359 }
360
361 /* Cleanup from completed inorder transmits. */
362 static __rte_always_inline void
363 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
364 {
365         uint16_t i, idx = vq->vq_used_cons_idx;
366         int16_t free_cnt = 0;
367         struct vq_desc_extra *dxp = NULL;
368
369         if (unlikely(num == 0))
370                 return;
371
372         for (i = 0; i < num; i++) {
373                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
374                 free_cnt += dxp->ndescs;
375                 if (dxp->cookie != NULL) {
376                         rte_pktmbuf_free(dxp->cookie);
377                         dxp->cookie = NULL;
378                 }
379         }
380
381         vq->vq_free_cnt += free_cnt;
382         vq->vq_used_cons_idx = idx;
383 }
384
385 static inline int
386 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
387                         struct rte_mbuf **cookies,
388                         uint16_t num)
389 {
390         struct vq_desc_extra *dxp;
391         struct virtio_hw *hw = vq->hw;
392         struct vring_desc *start_dp;
393         uint16_t head_idx, idx, i = 0;
394
395         if (unlikely(vq->vq_free_cnt == 0))
396                 return -ENOSPC;
397         if (unlikely(vq->vq_free_cnt < num))
398                 return -EMSGSIZE;
399
400         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
401         start_dp = vq->vq_split.ring.desc;
402
403         while (i < num) {
404                 idx = head_idx & (vq->vq_nentries - 1);
405                 dxp = &vq->vq_descx[idx];
406                 dxp->cookie = (void *)cookies[i];
407                 dxp->ndescs = 1;
408
409                 start_dp[idx].addr =
410                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
411                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
412                 start_dp[idx].len =
413                                 cookies[i]->buf_len -
414                                 RTE_PKTMBUF_HEADROOM +
415                                 hw->vtnet_hdr_size;
416                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
417
418                 vq_update_avail_ring(vq, idx);
419                 head_idx++;
420                 i++;
421         }
422
423         vq->vq_desc_head_idx += num;
424         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
425         return 0;
426 }
427
428 static inline int
429 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
430                                 uint16_t num)
431 {
432         struct vq_desc_extra *dxp;
433         struct virtio_hw *hw = vq->hw;
434         struct vring_desc *start_dp = vq->vq_split.ring.desc;
435         uint16_t idx, i;
436
437         if (unlikely(vq->vq_free_cnt == 0))
438                 return -ENOSPC;
439         if (unlikely(vq->vq_free_cnt < num))
440                 return -EMSGSIZE;
441
442         if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
443                 return -EFAULT;
444
445         for (i = 0; i < num; i++) {
446                 idx = vq->vq_desc_head_idx;
447                 dxp = &vq->vq_descx[idx];
448                 dxp->cookie = (void *)cookie[i];
449                 dxp->ndescs = 1;
450
451                 start_dp[idx].addr =
452                         VIRTIO_MBUF_ADDR(cookie[i], vq) +
453                         RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
454                 start_dp[idx].len =
455                         cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
456                         hw->vtnet_hdr_size;
457                 start_dp[idx].flags = VRING_DESC_F_WRITE;
458                 vq->vq_desc_head_idx = start_dp[idx].next;
459                 vq_update_avail_ring(vq, idx);
460                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
461                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
462                         break;
463                 }
464         }
465
466         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
467
468         return 0;
469 }
470
471 static inline int
472 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
473                                      struct rte_mbuf **cookie, uint16_t num)
474 {
475         struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
476         uint16_t flags = vq->vq_packed.cached_flags;
477         struct virtio_hw *hw = vq->hw;
478         struct vq_desc_extra *dxp;
479         uint16_t idx;
480         int i;
481
482         if (unlikely(vq->vq_free_cnt == 0))
483                 return -ENOSPC;
484         if (unlikely(vq->vq_free_cnt < num))
485                 return -EMSGSIZE;
486
487         for (i = 0; i < num; i++) {
488                 idx = vq->vq_avail_idx;
489                 dxp = &vq->vq_descx[idx];
490                 dxp->cookie = (void *)cookie[i];
491                 dxp->ndescs = 1;
492
493                 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
494                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
495                 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
496                                         + hw->vtnet_hdr_size;
497
498                 vq->vq_desc_head_idx = dxp->next;
499                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
500                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
501
502                 virtqueue_store_flags_packed(&start_dp[idx], flags,
503                                              hw->weak_barriers);
504
505                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
506                         vq->vq_avail_idx -= vq->vq_nentries;
507                         vq->vq_packed.cached_flags ^=
508                                 VRING_PACKED_DESC_F_AVAIL_USED;
509                         flags = vq->vq_packed.cached_flags;
510                 }
511         }
512         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
513         return 0;
514 }
515
516 /* When doing TSO, the IP length is not included in the pseudo header
517  * checksum of the packet given to the PMD, but for virtio it is
518  * expected.
519  */
520 static void
521 virtio_tso_fix_cksum(struct rte_mbuf *m)
522 {
523         /* common case: header is not fragmented */
524         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
525                         m->l4_len)) {
526                 struct rte_ipv4_hdr *iph;
527                 struct rte_ipv6_hdr *ip6h;
528                 struct rte_tcp_hdr *th;
529                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
530                 uint32_t tmp;
531
532                 iph = rte_pktmbuf_mtod_offset(m,
533                                         struct rte_ipv4_hdr *, m->l2_len);
534                 th = RTE_PTR_ADD(iph, m->l3_len);
535                 if ((iph->version_ihl >> 4) == 4) {
536                         iph->hdr_checksum = 0;
537                         iph->hdr_checksum = rte_ipv4_cksum(iph);
538                         ip_len = iph->total_length;
539                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
540                                 m->l3_len);
541                 } else {
542                         ip6h = (struct rte_ipv6_hdr *)iph;
543                         ip_paylen = ip6h->payload_len;
544                 }
545
546                 /* calculate the new phdr checksum not including ip_paylen */
547                 prev_cksum = th->cksum;
548                 tmp = prev_cksum;
549                 tmp += ip_paylen;
550                 tmp = (tmp & 0xffff) + (tmp >> 16);
551                 new_cksum = tmp;
552
553                 /* replace it in the packet */
554                 th->cksum = new_cksum;
555         }
556 }
557
558
559 /* avoid write operation when necessary, to lessen cache issues */
560 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
561         if ((var) != (val))                     \
562                 (var) = (val);                  \
563 } while (0)
564
565 #define virtqueue_clear_net_hdr(_hdr) do {              \
566         ASSIGN_UNLESS_EQUAL((_hdr)->csum_start, 0);     \
567         ASSIGN_UNLESS_EQUAL((_hdr)->csum_offset, 0);    \
568         ASSIGN_UNLESS_EQUAL((_hdr)->flags, 0);          \
569         ASSIGN_UNLESS_EQUAL((_hdr)->gso_type, 0);       \
570         ASSIGN_UNLESS_EQUAL((_hdr)->gso_size, 0);       \
571         ASSIGN_UNLESS_EQUAL((_hdr)->hdr_len, 0);        \
572 } while (0)
573
574 static inline void
575 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
576                         struct rte_mbuf *cookie,
577                         bool offload)
578 {
579         if (offload) {
580                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
581                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
582
583                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
584                 case PKT_TX_UDP_CKSUM:
585                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
586                         hdr->csum_offset = offsetof(struct rte_udp_hdr,
587                                 dgram_cksum);
588                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
589                         break;
590
591                 case PKT_TX_TCP_CKSUM:
592                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
593                         hdr->csum_offset = offsetof(struct rte_tcp_hdr, cksum);
594                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
595                         break;
596
597                 default:
598                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
599                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
600                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
601                         break;
602                 }
603
604                 /* TCP Segmentation Offload */
605                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
606                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
607                                 VIRTIO_NET_HDR_GSO_TCPV6 :
608                                 VIRTIO_NET_HDR_GSO_TCPV4;
609                         hdr->gso_size = cookie->tso_segsz;
610                         hdr->hdr_len =
611                                 cookie->l2_len +
612                                 cookie->l3_len +
613                                 cookie->l4_len;
614                 } else {
615                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
616                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
617                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
618                 }
619         }
620 }
621
622 static inline void
623 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
624                         struct rte_mbuf **cookies,
625                         uint16_t num)
626 {
627         struct vq_desc_extra *dxp;
628         struct virtqueue *vq = txvq->vq;
629         struct vring_desc *start_dp;
630         struct virtio_net_hdr *hdr;
631         uint16_t idx;
632         uint16_t head_size = vq->hw->vtnet_hdr_size;
633         uint16_t i = 0;
634
635         idx = vq->vq_desc_head_idx;
636         start_dp = vq->vq_split.ring.desc;
637
638         while (i < num) {
639                 idx = idx & (vq->vq_nentries - 1);
640                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
641                 dxp->cookie = (void *)cookies[i];
642                 dxp->ndescs = 1;
643                 virtio_update_packet_stats(&txvq->stats, cookies[i]);
644
645                 hdr = (struct virtio_net_hdr *)(char *)cookies[i]->buf_addr +
646                         cookies[i]->data_off - head_size;
647
648                 /* if offload disabled, hdr is not zeroed yet, do it now */
649                 if (!vq->hw->has_tx_offload)
650                         virtqueue_clear_net_hdr(hdr);
651                 else
652                         virtqueue_xmit_offload(hdr, cookies[i], true);
653
654                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
655                 start_dp[idx].len   = cookies[i]->data_len + head_size;
656                 start_dp[idx].flags = 0;
657
658
659                 vq_update_avail_ring(vq, idx);
660
661                 idx++;
662                 i++;
663         };
664
665         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
666         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
667 }
668
669 static inline void
670 virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
671                                    struct rte_mbuf *cookie,
672                                    int in_order)
673 {
674         struct virtqueue *vq = txvq->vq;
675         struct vring_packed_desc *dp;
676         struct vq_desc_extra *dxp;
677         uint16_t idx, id, flags;
678         uint16_t head_size = vq->hw->vtnet_hdr_size;
679         struct virtio_net_hdr *hdr;
680
681         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
682         idx = vq->vq_avail_idx;
683         dp = &vq->vq_packed.ring.desc[idx];
684
685         dxp = &vq->vq_descx[id];
686         dxp->ndescs = 1;
687         dxp->cookie = cookie;
688
689         flags = vq->vq_packed.cached_flags;
690
691         /* prepend cannot fail, checked by caller */
692         hdr = (struct virtio_net_hdr *)(char *)cookie->buf_addr +
693                 cookie->data_off - head_size;
694
695         /* if offload disabled, hdr is not zeroed yet, do it now */
696         if (!vq->hw->has_tx_offload)
697                 virtqueue_clear_net_hdr(hdr);
698         else
699                 virtqueue_xmit_offload(hdr, cookie, true);
700
701         dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
702         dp->len  = cookie->data_len + head_size;
703         dp->id   = id;
704
705         if (++vq->vq_avail_idx >= vq->vq_nentries) {
706                 vq->vq_avail_idx -= vq->vq_nentries;
707                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
708         }
709
710         vq->vq_free_cnt--;
711
712         if (!in_order) {
713                 vq->vq_desc_head_idx = dxp->next;
714                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
715                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
716         }
717
718         virtqueue_store_flags_packed(dp, flags, vq->hw->weak_barriers);
719 }
720
721 static inline void
722 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
723                               uint16_t needed, int can_push, int in_order)
724 {
725         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
726         struct vq_desc_extra *dxp;
727         struct virtqueue *vq = txvq->vq;
728         struct vring_packed_desc *start_dp, *head_dp;
729         uint16_t idx, id, head_idx, head_flags;
730         uint16_t head_size = vq->hw->vtnet_hdr_size;
731         struct virtio_net_hdr *hdr;
732         uint16_t prev;
733         bool prepend_header = false;
734
735         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
736
737         dxp = &vq->vq_descx[id];
738         dxp->ndescs = needed;
739         dxp->cookie = cookie;
740
741         head_idx = vq->vq_avail_idx;
742         idx = head_idx;
743         prev = head_idx;
744         start_dp = vq->vq_packed.ring.desc;
745
746         head_dp = &vq->vq_packed.ring.desc[idx];
747         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
748         head_flags |= vq->vq_packed.cached_flags;
749
750         if (can_push) {
751                 /* prepend cannot fail, checked by caller */
752                 hdr = (struct virtio_net_hdr *)(char *)cookie->buf_addr +
753                         cookie->data_off - head_size;
754                 prepend_header = true;
755
756                 /* if offload disabled, it is not zeroed below, do it now */
757                 if (!vq->hw->has_tx_offload)
758                         virtqueue_clear_net_hdr(hdr);
759         } else {
760                 /* setup first tx ring slot to point to header
761                  * stored in reserved region.
762                  */
763                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
764                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
765                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
766                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
767                 idx++;
768                 if (idx >= vq->vq_nentries) {
769                         idx -= vq->vq_nentries;
770                         vq->vq_packed.cached_flags ^=
771                                 VRING_PACKED_DESC_F_AVAIL_USED;
772                 }
773         }
774
775         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
776
777         do {
778                 uint16_t flags;
779
780                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
781                 start_dp[idx].len  = cookie->data_len;
782                 if (prepend_header) {
783                         start_dp[idx].len += head_size;
784                         prepend_header = false;
785                 }
786
787                 if (likely(idx != head_idx)) {
788                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
789                         flags |= vq->vq_packed.cached_flags;
790                         start_dp[idx].flags = flags;
791                 }
792                 prev = idx;
793                 idx++;
794                 if (idx >= vq->vq_nentries) {
795                         idx -= vq->vq_nentries;
796                         vq->vq_packed.cached_flags ^=
797                                 VRING_PACKED_DESC_F_AVAIL_USED;
798                 }
799         } while ((cookie = cookie->next) != NULL);
800
801         start_dp[prev].id = id;
802
803         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
804         vq->vq_avail_idx = idx;
805
806         if (!in_order) {
807                 vq->vq_desc_head_idx = dxp->next;
808                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
809                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
810         }
811
812         virtqueue_store_flags_packed(head_dp, head_flags,
813                                      vq->hw->weak_barriers);
814 }
815
816 static inline void
817 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
818                         uint16_t needed, int use_indirect, int can_push,
819                         int in_order)
820 {
821         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
822         struct vq_desc_extra *dxp;
823         struct virtqueue *vq = txvq->vq;
824         struct vring_desc *start_dp;
825         uint16_t seg_num = cookie->nb_segs;
826         uint16_t head_idx, idx;
827         uint16_t head_size = vq->hw->vtnet_hdr_size;
828         bool prepend_header = false;
829         struct virtio_net_hdr *hdr;
830
831         head_idx = vq->vq_desc_head_idx;
832         idx = head_idx;
833         if (in_order)
834                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
835         else
836                 dxp = &vq->vq_descx[idx];
837         dxp->cookie = (void *)cookie;
838         dxp->ndescs = needed;
839
840         start_dp = vq->vq_split.ring.desc;
841
842         if (can_push) {
843                 /* prepend cannot fail, checked by caller */
844                 hdr = (struct virtio_net_hdr *)(char *)cookie->buf_addr +
845                         cookie->data_off - head_size;
846                 prepend_header = true;
847
848                 /* if offload disabled, it is not zeroed below, do it now */
849                 if (!vq->hw->has_tx_offload)
850                         virtqueue_clear_net_hdr(hdr);
851         } else if (use_indirect) {
852                 /* setup tx ring slot to point to indirect
853                  * descriptor list stored in reserved region.
854                  *
855                  * the first slot in indirect ring is already preset
856                  * to point to the header in reserved region
857                  */
858                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
859                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
860                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
861                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
862                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
863
864                 /* loop below will fill in rest of the indirect elements */
865                 start_dp = txr[idx].tx_indir;
866                 idx = 1;
867         } else {
868                 /* setup first tx ring slot to point to header
869                  * stored in reserved region.
870                  */
871                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
872                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
873                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
874                 start_dp[idx].flags = VRING_DESC_F_NEXT;
875                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
876
877                 idx = start_dp[idx].next;
878         }
879
880         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
881
882         do {
883                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
884                 start_dp[idx].len   = cookie->data_len;
885                 if (prepend_header) {
886                         start_dp[idx].len += head_size;
887                         prepend_header = false;
888                 }
889                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
890                 idx = start_dp[idx].next;
891         } while ((cookie = cookie->next) != NULL);
892
893         if (use_indirect)
894                 idx = vq->vq_split.ring.desc[head_idx].next;
895
896         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
897
898         vq->vq_desc_head_idx = idx;
899         vq_update_avail_ring(vq, head_idx);
900
901         if (!in_order) {
902                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
903                         vq->vq_desc_tail_idx = idx;
904         }
905 }
906
907 void
908 virtio_dev_cq_start(struct rte_eth_dev *dev)
909 {
910         struct virtio_hw *hw = dev->data->dev_private;
911
912         if (hw->cvq && hw->cvq->vq) {
913                 rte_spinlock_init(&hw->cvq->lock);
914                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
915         }
916 }
917
918 int
919 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
920                         uint16_t queue_idx,
921                         uint16_t nb_desc,
922                         unsigned int socket_id __rte_unused,
923                         const struct rte_eth_rxconf *rx_conf __rte_unused,
924                         struct rte_mempool *mp)
925 {
926         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
927         struct virtio_hw *hw = dev->data->dev_private;
928         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
929         struct virtnet_rx *rxvq;
930
931         PMD_INIT_FUNC_TRACE();
932
933         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
934                 nb_desc = vq->vq_nentries;
935         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
936
937         rxvq = &vq->rxq;
938         rxvq->queue_id = queue_idx;
939         rxvq->mpool = mp;
940         dev->data->rx_queues[queue_idx] = rxvq;
941
942         return 0;
943 }
944
945 int
946 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
947 {
948         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
949         struct virtio_hw *hw = dev->data->dev_private;
950         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
951         struct virtnet_rx *rxvq = &vq->rxq;
952         struct rte_mbuf *m;
953         uint16_t desc_idx;
954         int error, nbufs, i;
955
956         PMD_INIT_FUNC_TRACE();
957
958         /* Allocate blank mbufs for the each rx descriptor */
959         nbufs = 0;
960
961         if (hw->use_simple_rx) {
962                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
963                      desc_idx++) {
964                         vq->vq_split.ring.avail->ring[desc_idx] = desc_idx;
965                         vq->vq_split.ring.desc[desc_idx].flags =
966                                 VRING_DESC_F_WRITE;
967                 }
968
969                 virtio_rxq_vec_setup(rxvq);
970         }
971
972         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
973         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
974              desc_idx++) {
975                 vq->sw_ring[vq->vq_nentries + desc_idx] =
976                         &rxvq->fake_mbuf;
977         }
978
979         if (hw->use_simple_rx) {
980                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
981                         virtio_rxq_rearm_vec(rxvq);
982                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
983                 }
984         } else if (hw->use_inorder_rx) {
985                 if ((!virtqueue_full(vq))) {
986                         uint16_t free_cnt = vq->vq_free_cnt;
987                         struct rte_mbuf *pkts[free_cnt];
988
989                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
990                                 free_cnt)) {
991                                 error = virtqueue_enqueue_refill_inorder(vq,
992                                                 pkts,
993                                                 free_cnt);
994                                 if (unlikely(error)) {
995                                         for (i = 0; i < free_cnt; i++)
996                                                 rte_pktmbuf_free(pkts[i]);
997                                 }
998                         }
999
1000                         nbufs += free_cnt;
1001                         vq_update_avail_idx(vq);
1002                 }
1003         } else {
1004                 while (!virtqueue_full(vq)) {
1005                         m = rte_mbuf_raw_alloc(rxvq->mpool);
1006                         if (m == NULL)
1007                                 break;
1008
1009                         /* Enqueue allocated buffers */
1010                         if (vtpci_packed_queue(vq->hw))
1011                                 error = virtqueue_enqueue_recv_refill_packed(vq,
1012                                                 &m, 1);
1013                         else
1014                                 error = virtqueue_enqueue_recv_refill(vq,
1015                                                 &m, 1);
1016                         if (error) {
1017                                 rte_pktmbuf_free(m);
1018                                 break;
1019                         }
1020                         nbufs++;
1021                 }
1022
1023                 if (!vtpci_packed_queue(vq->hw))
1024                         vq_update_avail_idx(vq);
1025         }
1026
1027         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
1028
1029         VIRTQUEUE_DUMP(vq);
1030
1031         return 0;
1032 }
1033
1034 /*
1035  * struct rte_eth_dev *dev: Used to update dev
1036  * uint16_t nb_desc: Defaults to values read from config space
1037  * unsigned int socket_id: Used to allocate memzone
1038  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
1039  * uint16_t queue_idx: Just used as an index in dev txq list
1040  */
1041 int
1042 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
1043                         uint16_t queue_idx,
1044                         uint16_t nb_desc,
1045                         unsigned int socket_id __rte_unused,
1046                         const struct rte_eth_txconf *tx_conf)
1047 {
1048         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1049         struct virtio_hw *hw = dev->data->dev_private;
1050         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1051         struct virtnet_tx *txvq;
1052         uint16_t tx_free_thresh;
1053
1054         PMD_INIT_FUNC_TRACE();
1055
1056         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
1057                 nb_desc = vq->vq_nentries;
1058         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
1059
1060         txvq = &vq->txq;
1061         txvq->queue_id = queue_idx;
1062
1063         tx_free_thresh = tx_conf->tx_free_thresh;
1064         if (tx_free_thresh == 0)
1065                 tx_free_thresh =
1066                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
1067
1068         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
1069                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
1070                         "number of TX entries minus 3 (%u)."
1071                         " (tx_free_thresh=%u port=%u queue=%u)\n",
1072                         vq->vq_nentries - 3,
1073                         tx_free_thresh, dev->data->port_id, queue_idx);
1074                 return -EINVAL;
1075         }
1076
1077         vq->vq_free_thresh = tx_free_thresh;
1078
1079         dev->data->tx_queues[queue_idx] = txvq;
1080         return 0;
1081 }
1082
1083 int
1084 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
1085                                 uint16_t queue_idx)
1086 {
1087         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1088         struct virtio_hw *hw = dev->data->dev_private;
1089         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1090
1091         PMD_INIT_FUNC_TRACE();
1092
1093         if (!vtpci_packed_queue(hw)) {
1094                 if (hw->use_inorder_tx)
1095                         vq->vq_split.ring.desc[vq->vq_nentries - 1].next = 0;
1096         }
1097
1098         VIRTQUEUE_DUMP(vq);
1099
1100         return 0;
1101 }
1102
1103 static inline void
1104 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1105 {
1106         int error;
1107         /*
1108          * Requeue the discarded mbuf. This should always be
1109          * successful since it was just dequeued.
1110          */
1111         if (vtpci_packed_queue(vq->hw))
1112                 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1113         else
1114                 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1115
1116         if (unlikely(error)) {
1117                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1118                 rte_pktmbuf_free(m);
1119         }
1120 }
1121
1122 static inline void
1123 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1124 {
1125         int error;
1126
1127         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1128         if (unlikely(error)) {
1129                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1130                 rte_pktmbuf_free(m);
1131         }
1132 }
1133
1134 /* Optionally fill offload information in structure */
1135 static inline int
1136 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1137 {
1138         struct rte_net_hdr_lens hdr_lens;
1139         uint32_t hdrlen, ptype;
1140         int l4_supported = 0;
1141
1142         /* nothing to do */
1143         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1144                 return 0;
1145
1146         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1147
1148         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1149         m->packet_type = ptype;
1150         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1151             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1152             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1153                 l4_supported = 1;
1154
1155         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1156                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1157                 if (hdr->csum_start <= hdrlen && l4_supported) {
1158                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1159                 } else {
1160                         /* Unknown proto or tunnel, do sw cksum. We can assume
1161                          * the cksum field is in the first segment since the
1162                          * buffers we provided to the host are large enough.
1163                          * In case of SCTP, this will be wrong since it's a CRC
1164                          * but there's nothing we can do.
1165                          */
1166                         uint16_t csum = 0, off;
1167
1168                         rte_raw_cksum_mbuf(m, hdr->csum_start,
1169                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1170                                 &csum);
1171                         if (likely(csum != 0xffff))
1172                                 csum = ~csum;
1173                         off = hdr->csum_offset + hdr->csum_start;
1174                         if (rte_pktmbuf_data_len(m) >= off + 1)
1175                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1176                                         off) = csum;
1177                 }
1178         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1179                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1180         }
1181
1182         /* GSO request, save required information in mbuf */
1183         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1184                 /* Check unsupported modes */
1185                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1186                     (hdr->gso_size == 0)) {
1187                         return -EINVAL;
1188                 }
1189
1190                 /* Update mss lengthes in mbuf */
1191                 m->tso_segsz = hdr->gso_size;
1192                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1193                         case VIRTIO_NET_HDR_GSO_TCPV4:
1194                         case VIRTIO_NET_HDR_GSO_TCPV6:
1195                                 m->ol_flags |= PKT_RX_LRO | \
1196                                         PKT_RX_L4_CKSUM_NONE;
1197                                 break;
1198                         default:
1199                                 return -EINVAL;
1200                 }
1201         }
1202
1203         return 0;
1204 }
1205
1206 #define VIRTIO_MBUF_BURST_SZ 64
1207 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1208 uint16_t
1209 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1210 {
1211         struct virtnet_rx *rxvq = rx_queue;
1212         struct virtqueue *vq = rxvq->vq;
1213         struct virtio_hw *hw = vq->hw;
1214         struct rte_mbuf *rxm;
1215         uint16_t nb_used, num, nb_rx;
1216         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1217         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1218         int error;
1219         uint32_t i, nb_enqueued;
1220         uint32_t hdr_size;
1221         struct virtio_net_hdr *hdr;
1222
1223         nb_rx = 0;
1224         if (unlikely(hw->started == 0))
1225                 return nb_rx;
1226
1227         nb_used = VIRTQUEUE_NUSED(vq);
1228
1229         virtio_rmb(hw->weak_barriers);
1230
1231         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1232         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1233                 num = VIRTIO_MBUF_BURST_SZ;
1234         if (likely(num > DESC_PER_CACHELINE))
1235                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1236
1237         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1238         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1239
1240         nb_enqueued = 0;
1241         hdr_size = hw->vtnet_hdr_size;
1242
1243         for (i = 0; i < num ; i++) {
1244                 rxm = rcv_pkts[i];
1245
1246                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1247
1248                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1249                         PMD_RX_LOG(ERR, "Packet drop");
1250                         nb_enqueued++;
1251                         virtio_discard_rxbuf(vq, rxm);
1252                         rxvq->stats.errors++;
1253                         continue;
1254                 }
1255
1256                 rxm->port = rxvq->port_id;
1257                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1258                 rxm->ol_flags = 0;
1259                 rxm->vlan_tci = 0;
1260
1261                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1262                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1263
1264                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1265                         RTE_PKTMBUF_HEADROOM - hdr_size);
1266
1267                 if (hw->vlan_strip)
1268                         rte_vlan_strip(rxm);
1269
1270                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1271                         virtio_discard_rxbuf(vq, rxm);
1272                         rxvq->stats.errors++;
1273                         continue;
1274                 }
1275
1276                 virtio_rx_stats_updated(rxvq, rxm);
1277
1278                 rx_pkts[nb_rx++] = rxm;
1279         }
1280
1281         rxvq->stats.packets += nb_rx;
1282
1283         /* Allocate new mbuf for the used descriptor */
1284         if (likely(!virtqueue_full(vq))) {
1285                 uint16_t free_cnt = vq->vq_free_cnt;
1286                 struct rte_mbuf *new_pkts[free_cnt];
1287
1288                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1289                                                 free_cnt) == 0)) {
1290                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1291                                         free_cnt);
1292                         if (unlikely(error)) {
1293                                 for (i = 0; i < free_cnt; i++)
1294                                         rte_pktmbuf_free(new_pkts[i]);
1295                         }
1296                         nb_enqueued += free_cnt;
1297                 } else {
1298                         struct rte_eth_dev *dev =
1299                                 &rte_eth_devices[rxvq->port_id];
1300                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1301                 }
1302         }
1303
1304         if (likely(nb_enqueued)) {
1305                 vq_update_avail_idx(vq);
1306
1307                 if (unlikely(virtqueue_kick_prepare(vq))) {
1308                         virtqueue_notify(vq);
1309                         PMD_RX_LOG(DEBUG, "Notified");
1310                 }
1311         }
1312
1313         return nb_rx;
1314 }
1315
1316 uint16_t
1317 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1318                         uint16_t nb_pkts)
1319 {
1320         struct virtnet_rx *rxvq = rx_queue;
1321         struct virtqueue *vq = rxvq->vq;
1322         struct virtio_hw *hw = vq->hw;
1323         struct rte_mbuf *rxm;
1324         uint16_t num, nb_rx;
1325         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1326         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1327         int error;
1328         uint32_t i, nb_enqueued;
1329         uint32_t hdr_size;
1330         struct virtio_net_hdr *hdr;
1331
1332         nb_rx = 0;
1333         if (unlikely(hw->started == 0))
1334                 return nb_rx;
1335
1336         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1337         if (likely(num > DESC_PER_CACHELINE))
1338                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1339
1340         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1341         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1342
1343         nb_enqueued = 0;
1344         hdr_size = hw->vtnet_hdr_size;
1345
1346         for (i = 0; i < num; i++) {
1347                 rxm = rcv_pkts[i];
1348
1349                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1350
1351                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1352                         PMD_RX_LOG(ERR, "Packet drop");
1353                         nb_enqueued++;
1354                         virtio_discard_rxbuf(vq, rxm);
1355                         rxvq->stats.errors++;
1356                         continue;
1357                 }
1358
1359                 rxm->port = rxvq->port_id;
1360                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1361                 rxm->ol_flags = 0;
1362                 rxm->vlan_tci = 0;
1363
1364                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1365                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1366
1367                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1368                         RTE_PKTMBUF_HEADROOM - hdr_size);
1369
1370                 if (hw->vlan_strip)
1371                         rte_vlan_strip(rxm);
1372
1373                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1374                         virtio_discard_rxbuf(vq, rxm);
1375                         rxvq->stats.errors++;
1376                         continue;
1377                 }
1378
1379                 virtio_rx_stats_updated(rxvq, rxm);
1380
1381                 rx_pkts[nb_rx++] = rxm;
1382         }
1383
1384         rxvq->stats.packets += nb_rx;
1385
1386         /* Allocate new mbuf for the used descriptor */
1387         if (likely(!virtqueue_full(vq))) {
1388                 uint16_t free_cnt = vq->vq_free_cnt;
1389                 struct rte_mbuf *new_pkts[free_cnt];
1390
1391                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1392                                                 free_cnt) == 0)) {
1393                         error = virtqueue_enqueue_recv_refill_packed(vq,
1394                                         new_pkts, free_cnt);
1395                         if (unlikely(error)) {
1396                                 for (i = 0; i < free_cnt; i++)
1397                                         rte_pktmbuf_free(new_pkts[i]);
1398                         }
1399                         nb_enqueued += free_cnt;
1400                 } else {
1401                         struct rte_eth_dev *dev =
1402                                 &rte_eth_devices[rxvq->port_id];
1403                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1404                 }
1405         }
1406
1407         if (likely(nb_enqueued)) {
1408                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1409                         virtqueue_notify(vq);
1410                         PMD_RX_LOG(DEBUG, "Notified");
1411                 }
1412         }
1413
1414         return nb_rx;
1415 }
1416
1417
1418 uint16_t
1419 virtio_recv_pkts_inorder(void *rx_queue,
1420                         struct rte_mbuf **rx_pkts,
1421                         uint16_t nb_pkts)
1422 {
1423         struct virtnet_rx *rxvq = rx_queue;
1424         struct virtqueue *vq = rxvq->vq;
1425         struct virtio_hw *hw = vq->hw;
1426         struct rte_mbuf *rxm;
1427         struct rte_mbuf *prev = NULL;
1428         uint16_t nb_used, num, nb_rx;
1429         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1430         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1431         int error;
1432         uint32_t nb_enqueued;
1433         uint32_t seg_num;
1434         uint32_t seg_res;
1435         uint32_t hdr_size;
1436         int32_t i;
1437
1438         nb_rx = 0;
1439         if (unlikely(hw->started == 0))
1440                 return nb_rx;
1441
1442         nb_used = VIRTQUEUE_NUSED(vq);
1443         nb_used = RTE_MIN(nb_used, nb_pkts);
1444         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1445
1446         virtio_rmb(hw->weak_barriers);
1447
1448         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1449
1450         nb_enqueued = 0;
1451         seg_num = 1;
1452         seg_res = 0;
1453         hdr_size = hw->vtnet_hdr_size;
1454
1455         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1456
1457         for (i = 0; i < num; i++) {
1458                 struct virtio_net_hdr_mrg_rxbuf *header;
1459
1460                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1461                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1462
1463                 rxm = rcv_pkts[i];
1464
1465                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1466                         PMD_RX_LOG(ERR, "Packet drop");
1467                         nb_enqueued++;
1468                         virtio_discard_rxbuf_inorder(vq, rxm);
1469                         rxvq->stats.errors++;
1470                         continue;
1471                 }
1472
1473                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1474                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1475                          - hdr_size);
1476
1477                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1478                         seg_num = header->num_buffers;
1479                         if (seg_num == 0)
1480                                 seg_num = 1;
1481                 } else {
1482                         seg_num = 1;
1483                 }
1484
1485                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1486                 rxm->nb_segs = seg_num;
1487                 rxm->ol_flags = 0;
1488                 rxm->vlan_tci = 0;
1489                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1490                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1491
1492                 rxm->port = rxvq->port_id;
1493
1494                 rx_pkts[nb_rx] = rxm;
1495                 prev = rxm;
1496
1497                 if (vq->hw->has_rx_offload &&
1498                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1499                         virtio_discard_rxbuf_inorder(vq, rxm);
1500                         rxvq->stats.errors++;
1501                         continue;
1502                 }
1503
1504                 if (hw->vlan_strip)
1505                         rte_vlan_strip(rx_pkts[nb_rx]);
1506
1507                 seg_res = seg_num - 1;
1508
1509                 /* Merge remaining segments */
1510                 while (seg_res != 0 && i < (num - 1)) {
1511                         i++;
1512
1513                         rxm = rcv_pkts[i];
1514                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1515                         rxm->pkt_len = (uint32_t)(len[i]);
1516                         rxm->data_len = (uint16_t)(len[i]);
1517
1518                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1519
1520                         prev->next = rxm;
1521                         prev = rxm;
1522                         seg_res -= 1;
1523                 }
1524
1525                 if (!seg_res) {
1526                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1527                         nb_rx++;
1528                 }
1529         }
1530
1531         /* Last packet still need merge segments */
1532         while (seg_res != 0) {
1533                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1534                                         VIRTIO_MBUF_BURST_SZ);
1535
1536                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1537                         virtio_rmb(hw->weak_barriers);
1538                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1539                                                            rcv_cnt);
1540                         uint16_t extra_idx = 0;
1541
1542                         rcv_cnt = num;
1543                         while (extra_idx < rcv_cnt) {
1544                                 rxm = rcv_pkts[extra_idx];
1545                                 rxm->data_off =
1546                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1547                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1548                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1549                                 prev->next = rxm;
1550                                 prev = rxm;
1551                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1552                                 extra_idx += 1;
1553                         };
1554                         seg_res -= rcv_cnt;
1555
1556                         if (!seg_res) {
1557                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1558                                 nb_rx++;
1559                         }
1560                 } else {
1561                         PMD_RX_LOG(ERR,
1562                                         "No enough segments for packet.");
1563                         rte_pktmbuf_free(rx_pkts[nb_rx]);
1564                         rxvq->stats.errors++;
1565                         break;
1566                 }
1567         }
1568
1569         rxvq->stats.packets += nb_rx;
1570
1571         /* Allocate new mbuf for the used descriptor */
1572
1573         if (likely(!virtqueue_full(vq))) {
1574                 /* free_cnt may include mrg descs */
1575                 uint16_t free_cnt = vq->vq_free_cnt;
1576                 struct rte_mbuf *new_pkts[free_cnt];
1577
1578                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1579                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1580                                         free_cnt);
1581                         if (unlikely(error)) {
1582                                 for (i = 0; i < free_cnt; i++)
1583                                         rte_pktmbuf_free(new_pkts[i]);
1584                         }
1585                         nb_enqueued += free_cnt;
1586                 } else {
1587                         struct rte_eth_dev *dev =
1588                                 &rte_eth_devices[rxvq->port_id];
1589                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1590                 }
1591         }
1592
1593         if (likely(nb_enqueued)) {
1594                 vq_update_avail_idx(vq);
1595
1596                 if (unlikely(virtqueue_kick_prepare(vq))) {
1597                         virtqueue_notify(vq);
1598                         PMD_RX_LOG(DEBUG, "Notified");
1599                 }
1600         }
1601
1602         return nb_rx;
1603 }
1604
1605 uint16_t
1606 virtio_recv_mergeable_pkts(void *rx_queue,
1607                         struct rte_mbuf **rx_pkts,
1608                         uint16_t nb_pkts)
1609 {
1610         struct virtnet_rx *rxvq = rx_queue;
1611         struct virtqueue *vq = rxvq->vq;
1612         struct virtio_hw *hw = vq->hw;
1613         struct rte_mbuf *rxm;
1614         struct rte_mbuf *prev = NULL;
1615         uint16_t nb_used, num, nb_rx = 0;
1616         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1617         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1618         int error;
1619         uint32_t nb_enqueued = 0;
1620         uint32_t seg_num = 0;
1621         uint32_t seg_res = 0;
1622         uint32_t hdr_size = hw->vtnet_hdr_size;
1623         int32_t i;
1624
1625         if (unlikely(hw->started == 0))
1626                 return nb_rx;
1627
1628         nb_used = VIRTQUEUE_NUSED(vq);
1629
1630         virtio_rmb(hw->weak_barriers);
1631
1632         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1633
1634         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1635         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1636                 num = VIRTIO_MBUF_BURST_SZ;
1637         if (likely(num > DESC_PER_CACHELINE))
1638                 num = num - ((vq->vq_used_cons_idx + num) %
1639                                 DESC_PER_CACHELINE);
1640
1641
1642         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1643
1644         for (i = 0; i < num; i++) {
1645                 struct virtio_net_hdr_mrg_rxbuf *header;
1646
1647                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1648                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1649
1650                 rxm = rcv_pkts[i];
1651
1652                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1653                         PMD_RX_LOG(ERR, "Packet drop");
1654                         nb_enqueued++;
1655                         virtio_discard_rxbuf(vq, rxm);
1656                         rxvq->stats.errors++;
1657                         continue;
1658                 }
1659
1660                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1661                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1662                          - hdr_size);
1663                 seg_num = header->num_buffers;
1664                 if (seg_num == 0)
1665                         seg_num = 1;
1666
1667                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1668                 rxm->nb_segs = seg_num;
1669                 rxm->ol_flags = 0;
1670                 rxm->vlan_tci = 0;
1671                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1672                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1673
1674                 rxm->port = rxvq->port_id;
1675
1676                 rx_pkts[nb_rx] = rxm;
1677                 prev = rxm;
1678
1679                 if (hw->has_rx_offload &&
1680                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1681                         virtio_discard_rxbuf(vq, rxm);
1682                         rxvq->stats.errors++;
1683                         continue;
1684                 }
1685
1686                 if (hw->vlan_strip)
1687                         rte_vlan_strip(rx_pkts[nb_rx]);
1688
1689                 seg_res = seg_num - 1;
1690
1691                 /* Merge remaining segments */
1692                 while (seg_res != 0 && i < (num - 1)) {
1693                         i++;
1694
1695                         rxm = rcv_pkts[i];
1696                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1697                         rxm->pkt_len = (uint32_t)(len[i]);
1698                         rxm->data_len = (uint16_t)(len[i]);
1699
1700                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1701
1702                         prev->next = rxm;
1703                         prev = rxm;
1704                         seg_res -= 1;
1705                 }
1706
1707                 if (!seg_res) {
1708                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1709                         nb_rx++;
1710                 }
1711         }
1712
1713         /* Last packet still need merge segments */
1714         while (seg_res != 0) {
1715                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1716                                         VIRTIO_MBUF_BURST_SZ);
1717
1718                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1719                         virtio_rmb(hw->weak_barriers);
1720                         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1721                                                            rcv_cnt);
1722                         uint16_t extra_idx = 0;
1723
1724                         rcv_cnt = num;
1725                         while (extra_idx < rcv_cnt) {
1726                                 rxm = rcv_pkts[extra_idx];
1727                                 rxm->data_off =
1728                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1729                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1730                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1731                                 prev->next = rxm;
1732                                 prev = rxm;
1733                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1734                                 extra_idx += 1;
1735                         };
1736                         seg_res -= rcv_cnt;
1737
1738                         if (!seg_res) {
1739                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1740                                 nb_rx++;
1741                         }
1742                 } else {
1743                         PMD_RX_LOG(ERR,
1744                                         "No enough segments for packet.");
1745                         rte_pktmbuf_free(rx_pkts[nb_rx]);
1746                         rxvq->stats.errors++;
1747                         break;
1748                 }
1749         }
1750
1751         rxvq->stats.packets += nb_rx;
1752
1753         /* Allocate new mbuf for the used descriptor */
1754         if (likely(!virtqueue_full(vq))) {
1755                 /* free_cnt may include mrg descs */
1756                 uint16_t free_cnt = vq->vq_free_cnt;
1757                 struct rte_mbuf *new_pkts[free_cnt];
1758
1759                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1760                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1761                                         free_cnt);
1762                         if (unlikely(error)) {
1763                                 for (i = 0; i < free_cnt; i++)
1764                                         rte_pktmbuf_free(new_pkts[i]);
1765                         }
1766                         nb_enqueued += free_cnt;
1767                 } else {
1768                         struct rte_eth_dev *dev =
1769                                 &rte_eth_devices[rxvq->port_id];
1770                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1771                 }
1772         }
1773
1774         if (likely(nb_enqueued)) {
1775                 vq_update_avail_idx(vq);
1776
1777                 if (unlikely(virtqueue_kick_prepare(vq))) {
1778                         virtqueue_notify(vq);
1779                         PMD_RX_LOG(DEBUG, "Notified");
1780                 }
1781         }
1782
1783         return nb_rx;
1784 }
1785
1786 uint16_t
1787 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1788                         struct rte_mbuf **rx_pkts,
1789                         uint16_t nb_pkts)
1790 {
1791         struct virtnet_rx *rxvq = rx_queue;
1792         struct virtqueue *vq = rxvq->vq;
1793         struct virtio_hw *hw = vq->hw;
1794         struct rte_mbuf *rxm;
1795         struct rte_mbuf *prev = NULL;
1796         uint16_t num, nb_rx = 0;
1797         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1798         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1799         uint32_t nb_enqueued = 0;
1800         uint32_t seg_num = 0;
1801         uint32_t seg_res = 0;
1802         uint32_t hdr_size = hw->vtnet_hdr_size;
1803         int32_t i;
1804         int error;
1805
1806         if (unlikely(hw->started == 0))
1807                 return nb_rx;
1808
1809
1810         num = nb_pkts;
1811         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1812                 num = VIRTIO_MBUF_BURST_SZ;
1813         if (likely(num > DESC_PER_CACHELINE))
1814                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1815
1816         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1817
1818         for (i = 0; i < num; i++) {
1819                 struct virtio_net_hdr_mrg_rxbuf *header;
1820
1821                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1822                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1823
1824                 rxm = rcv_pkts[i];
1825
1826                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1827                         PMD_RX_LOG(ERR, "Packet drop");
1828                         nb_enqueued++;
1829                         virtio_discard_rxbuf(vq, rxm);
1830                         rxvq->stats.errors++;
1831                         continue;
1832                 }
1833
1834                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1835                           rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1836                 seg_num = header->num_buffers;
1837
1838                 if (seg_num == 0)
1839                         seg_num = 1;
1840
1841                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1842                 rxm->nb_segs = seg_num;
1843                 rxm->ol_flags = 0;
1844                 rxm->vlan_tci = 0;
1845                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1846                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1847
1848                 rxm->port = rxvq->port_id;
1849                 rx_pkts[nb_rx] = rxm;
1850                 prev = rxm;
1851
1852                 if (hw->has_rx_offload &&
1853                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1854                         virtio_discard_rxbuf(vq, rxm);
1855                         rxvq->stats.errors++;
1856                         continue;
1857                 }
1858
1859                 if (hw->vlan_strip)
1860                         rte_vlan_strip(rx_pkts[nb_rx]);
1861
1862                 seg_res = seg_num - 1;
1863
1864                 /* Merge remaining segments */
1865                 while (seg_res != 0 && i < (num - 1)) {
1866                         i++;
1867
1868                         rxm = rcv_pkts[i];
1869                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1870                         rxm->pkt_len = (uint32_t)(len[i]);
1871                         rxm->data_len = (uint16_t)(len[i]);
1872
1873                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1874
1875                         prev->next = rxm;
1876                         prev = rxm;
1877                         seg_res -= 1;
1878                 }
1879
1880                 if (!seg_res) {
1881                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1882                         nb_rx++;
1883                 }
1884         }
1885
1886         /* Last packet still need merge segments */
1887         while (seg_res != 0) {
1888                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1889                                         VIRTIO_MBUF_BURST_SZ);
1890                 uint16_t extra_idx = 0;
1891
1892                 rcv_cnt = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1893                                 len, rcv_cnt);
1894                 if (unlikely(rcv_cnt == 0)) {
1895                         PMD_RX_LOG(ERR, "No enough segments for packet.");
1896                         rte_pktmbuf_free(rx_pkts[nb_rx]);
1897                         rxvq->stats.errors++;
1898                         break;
1899                 }
1900
1901                 while (extra_idx < rcv_cnt) {
1902                         rxm = rcv_pkts[extra_idx];
1903
1904                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1905                         rxm->pkt_len = (uint32_t)(len[extra_idx]);
1906                         rxm->data_len = (uint16_t)(len[extra_idx]);
1907
1908                         prev->next = rxm;
1909                         prev = rxm;
1910                         rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1911                         extra_idx += 1;
1912                 }
1913                 seg_res -= rcv_cnt;
1914                 if (!seg_res) {
1915                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1916                         nb_rx++;
1917                 }
1918         }
1919
1920         rxvq->stats.packets += nb_rx;
1921
1922         /* Allocate new mbuf for the used descriptor */
1923         if (likely(!virtqueue_full(vq))) {
1924                 /* free_cnt may include mrg descs */
1925                 uint16_t free_cnt = vq->vq_free_cnt;
1926                 struct rte_mbuf *new_pkts[free_cnt];
1927
1928                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1929                         error = virtqueue_enqueue_recv_refill_packed(vq,
1930                                         new_pkts, free_cnt);
1931                         if (unlikely(error)) {
1932                                 for (i = 0; i < free_cnt; i++)
1933                                         rte_pktmbuf_free(new_pkts[i]);
1934                         }
1935                         nb_enqueued += free_cnt;
1936                 } else {
1937                         struct rte_eth_dev *dev =
1938                                 &rte_eth_devices[rxvq->port_id];
1939                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1940                 }
1941         }
1942
1943         if (likely(nb_enqueued)) {
1944                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1945                         virtqueue_notify(vq);
1946                         PMD_RX_LOG(DEBUG, "Notified");
1947                 }
1948         }
1949
1950         return nb_rx;
1951 }
1952
1953 uint16_t
1954 virtio_xmit_pkts_prepare(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts,
1955                         uint16_t nb_pkts)
1956 {
1957         uint16_t nb_tx;
1958         int error;
1959
1960         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1961                 struct rte_mbuf *m = tx_pkts[nb_tx];
1962
1963 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1964                 error = rte_validate_tx_offload(m);
1965                 if (unlikely(error)) {
1966                         rte_errno = -error;
1967                         break;
1968                 }
1969 #endif
1970
1971                 /* Do VLAN tag insertion */
1972                 if (unlikely(m->ol_flags & PKT_TX_VLAN_PKT)) {
1973                         error = rte_vlan_insert(&m);
1974                         /* rte_vlan_insert() may change pointer
1975                          * even in the case of failure
1976                          */
1977                         tx_pkts[nb_tx] = m;
1978
1979                         if (unlikely(error)) {
1980                                 rte_errno = -error;
1981                                 break;
1982                         }
1983                 }
1984
1985                 error = rte_net_intel_cksum_prepare(m);
1986                 if (unlikely(error)) {
1987                         rte_errno = -error;
1988                         break;
1989                 }
1990
1991                 if (m->ol_flags & PKT_TX_TCP_SEG)
1992                         virtio_tso_fix_cksum(m);
1993         }
1994
1995         return nb_tx;
1996 }
1997
1998 uint16_t
1999 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
2000                         uint16_t nb_pkts)
2001 {
2002         struct virtnet_tx *txvq = tx_queue;
2003         struct virtqueue *vq = txvq->vq;
2004         struct virtio_hw *hw = vq->hw;
2005         uint16_t hdr_size = hw->vtnet_hdr_size;
2006         uint16_t nb_tx = 0;
2007         bool in_order = hw->use_inorder_tx;
2008
2009         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2010                 return nb_tx;
2011
2012         if (unlikely(nb_pkts < 1))
2013                 return nb_pkts;
2014
2015         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2016
2017         if (nb_pkts > vq->vq_free_cnt)
2018                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
2019                                            in_order);
2020
2021         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2022                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2023                 int can_push = 0, slots, need;
2024
2025                 /* optimize ring usage */
2026                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2027                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2028                     rte_mbuf_refcnt_read(txm) == 1 &&
2029                     RTE_MBUF_DIRECT(txm) &&
2030                     txm->nb_segs == 1 &&
2031                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2032                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2033                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2034                         can_push = 1;
2035
2036                 /* How many main ring entries are needed to this Tx?
2037                  * any_layout => number of segments
2038                  * default    => number of segments + 1
2039                  */
2040                 slots = txm->nb_segs + !can_push;
2041                 need = slots - vq->vq_free_cnt;
2042
2043                 /* Positive value indicates it need free vring descriptors */
2044                 if (unlikely(need > 0)) {
2045                         virtio_xmit_cleanup_packed(vq, need, in_order);
2046                         need = slots - vq->vq_free_cnt;
2047                         if (unlikely(need > 0)) {
2048                                 PMD_TX_LOG(ERR,
2049                                            "No free tx descriptors to transmit");
2050                                 break;
2051                         }
2052                 }
2053
2054                 /* Enqueue Packet buffers */
2055                 if (can_push)
2056                         virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
2057                 else
2058                         virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
2059                                                       in_order);
2060
2061                 virtio_update_packet_stats(&txvq->stats, txm);
2062         }
2063
2064         txvq->stats.packets += nb_tx;
2065
2066         if (likely(nb_tx)) {
2067                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
2068                         virtqueue_notify(vq);
2069                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2070                 }
2071         }
2072
2073         return nb_tx;
2074 }
2075
2076 uint16_t
2077 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2078 {
2079         struct virtnet_tx *txvq = tx_queue;
2080         struct virtqueue *vq = txvq->vq;
2081         struct virtio_hw *hw = vq->hw;
2082         uint16_t hdr_size = hw->vtnet_hdr_size;
2083         uint16_t nb_used, nb_tx = 0;
2084
2085         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2086                 return nb_tx;
2087
2088         if (unlikely(nb_pkts < 1))
2089                 return nb_pkts;
2090
2091         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2092         nb_used = VIRTQUEUE_NUSED(vq);
2093
2094         virtio_rmb(hw->weak_barriers);
2095         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2096                 virtio_xmit_cleanup(vq, nb_used);
2097
2098         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2099                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2100                 int can_push = 0, use_indirect = 0, slots, need;
2101
2102                 /* optimize ring usage */
2103                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2104                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2105                     rte_mbuf_refcnt_read(txm) == 1 &&
2106                     RTE_MBUF_DIRECT(txm) &&
2107                     txm->nb_segs == 1 &&
2108                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2109                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2110                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2111                         can_push = 1;
2112                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2113                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2114                         use_indirect = 1;
2115
2116                 /* How many main ring entries are needed to this Tx?
2117                  * any_layout => number of segments
2118                  * indirect   => 1
2119                  * default    => number of segments + 1
2120                  */
2121                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2122                 need = slots - vq->vq_free_cnt;
2123
2124                 /* Positive value indicates it need free vring descriptors */
2125                 if (unlikely(need > 0)) {
2126                         nb_used = VIRTQUEUE_NUSED(vq);
2127                         virtio_rmb(hw->weak_barriers);
2128                         need = RTE_MIN(need, (int)nb_used);
2129
2130                         virtio_xmit_cleanup(vq, need);
2131                         need = slots - vq->vq_free_cnt;
2132                         if (unlikely(need > 0)) {
2133                                 PMD_TX_LOG(ERR,
2134                                            "No free tx descriptors to transmit");
2135                                 break;
2136                         }
2137                 }
2138
2139                 /* Enqueue Packet buffers */
2140                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2141                         can_push, 0);
2142
2143                 virtio_update_packet_stats(&txvq->stats, txm);
2144         }
2145
2146         txvq->stats.packets += nb_tx;
2147
2148         if (likely(nb_tx)) {
2149                 vq_update_avail_idx(vq);
2150
2151                 if (unlikely(virtqueue_kick_prepare(vq))) {
2152                         virtqueue_notify(vq);
2153                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2154                 }
2155         }
2156
2157         return nb_tx;
2158 }
2159
2160 static __rte_always_inline int
2161 virtio_xmit_try_cleanup_inorder(struct virtqueue *vq, uint16_t need)
2162 {
2163         uint16_t nb_used, nb_clean, nb_descs;
2164         struct virtio_hw *hw = vq->hw;
2165
2166         nb_descs = vq->vq_free_cnt + need;
2167         nb_used = VIRTQUEUE_NUSED(vq);
2168         virtio_rmb(hw->weak_barriers);
2169         nb_clean = RTE_MIN(need, (int)nb_used);
2170
2171         virtio_xmit_cleanup_inorder(vq, nb_clean);
2172
2173         return nb_descs - vq->vq_free_cnt;
2174 }
2175
2176 uint16_t
2177 virtio_xmit_pkts_inorder(void *tx_queue,
2178                         struct rte_mbuf **tx_pkts,
2179                         uint16_t nb_pkts)
2180 {
2181         struct virtnet_tx *txvq = tx_queue;
2182         struct virtqueue *vq = txvq->vq;
2183         struct virtio_hw *hw = vq->hw;
2184         uint16_t hdr_size = hw->vtnet_hdr_size;
2185         uint16_t nb_used, nb_tx = 0, nb_inorder_pkts = 0;
2186         struct rte_mbuf *inorder_pkts[nb_pkts];
2187         int need;
2188
2189         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2190                 return nb_tx;
2191
2192         if (unlikely(nb_pkts < 1))
2193                 return nb_pkts;
2194
2195         VIRTQUEUE_DUMP(vq);
2196         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2197         nb_used = VIRTQUEUE_NUSED(vq);
2198
2199         virtio_rmb(hw->weak_barriers);
2200         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2201                 virtio_xmit_cleanup_inorder(vq, nb_used);
2202
2203         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2204                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2205                 int slots;
2206
2207                 /* optimize ring usage */
2208                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2209                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2210                      rte_mbuf_refcnt_read(txm) == 1 &&
2211                      RTE_MBUF_DIRECT(txm) &&
2212                      txm->nb_segs == 1 &&
2213                      rte_pktmbuf_headroom(txm) >= hdr_size &&
2214                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2215                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2216                         inorder_pkts[nb_inorder_pkts] = txm;
2217                         nb_inorder_pkts++;
2218
2219                         continue;
2220                 }
2221
2222                 if (nb_inorder_pkts) {
2223                         need = nb_inorder_pkts - vq->vq_free_cnt;
2224                         if (unlikely(need > 0)) {
2225                                 need = virtio_xmit_try_cleanup_inorder(vq,
2226                                                                        need);
2227                                 if (unlikely(need > 0)) {
2228                                         PMD_TX_LOG(ERR,
2229                                                 "No free tx descriptors to "
2230                                                 "transmit");
2231                                         break;
2232                                 }
2233                         }
2234                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2235                                                         nb_inorder_pkts);
2236                         nb_inorder_pkts = 0;
2237                 }
2238
2239                 slots = txm->nb_segs + 1;
2240                 need = slots - vq->vq_free_cnt;
2241                 if (unlikely(need > 0)) {
2242                         need = virtio_xmit_try_cleanup_inorder(vq, slots);
2243
2244                         if (unlikely(need > 0)) {
2245                                 PMD_TX_LOG(ERR,
2246                                         "No free tx descriptors to transmit");
2247                                 break;
2248                         }
2249                 }
2250                 /* Enqueue Packet buffers */
2251                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2252
2253                 virtio_update_packet_stats(&txvq->stats, txm);
2254         }
2255
2256         /* Transmit all inorder packets */
2257         if (nb_inorder_pkts) {
2258                 need = nb_inorder_pkts - vq->vq_free_cnt;
2259                 if (unlikely(need > 0)) {
2260                         need = virtio_xmit_try_cleanup_inorder(vq,
2261                                                                   need);
2262                         if (unlikely(need > 0)) {
2263                                 PMD_TX_LOG(ERR,
2264                                         "No free tx descriptors to transmit");
2265                                 nb_inorder_pkts = vq->vq_free_cnt;
2266                                 nb_tx -= need;
2267                         }
2268                 }
2269
2270                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2271                                                 nb_inorder_pkts);
2272         }
2273
2274         txvq->stats.packets += nb_tx;
2275
2276         if (likely(nb_tx)) {
2277                 vq_update_avail_idx(vq);
2278
2279                 if (unlikely(virtqueue_kick_prepare(vq))) {
2280                         virtqueue_notify(vq);
2281                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2282                 }
2283         }
2284
2285         VIRTQUEUE_DUMP(vq);
2286
2287         return nb_tx;
2288 }