net/virtio: enable in-order feature if negotiated
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45         struct virtnet_rx *rxvq = rxq;
46         struct virtqueue *vq = rxvq->vq;
47
48         return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54         vq->vq_free_cnt += num;
55         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61         struct vring_desc *dp, *dp_tail;
62         struct vq_desc_extra *dxp;
63         uint16_t desc_idx_last = desc_idx;
64
65         dp  = &vq->vq_split.ring.desc[desc_idx];
66         dxp = &vq->vq_descx[desc_idx];
67         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69                 while (dp->flags & VRING_DESC_F_NEXT) {
70                         desc_idx_last = dp->next;
71                         dp = &vq->vq_split.ring.desc[dp->next];
72                 }
73         }
74         dxp->ndescs = 0;
75
76         /*
77          * We must append the existing free chain, if any, to the end of
78          * newly freed chain. If the virtqueue was completely used, then
79          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80          */
81         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82                 vq->vq_desc_head_idx = desc_idx;
83         } else {
84                 dp_tail = &vq->vq_split.ring.desc[vq->vq_desc_tail_idx];
85                 dp_tail->next = desc_idx;
86         }
87
88         vq->vq_desc_tail_idx = desc_idx_last;
89         dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95         struct vq_desc_extra *dxp;
96
97         dxp = &vq->vq_descx[id];
98         vq->vq_free_cnt += dxp->ndescs;
99
100         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101                 vq->vq_desc_head_idx = id;
102         else
103                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104
105         vq->vq_desc_tail_idx = id;
106         dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108
109 void
110 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
111 {
112         uint32_t s = mbuf->pkt_len;
113         struct rte_ether_addr *ea;
114
115         stats->bytes += s;
116
117         if (s == 64) {
118                 stats->size_bins[1]++;
119         } else if (s > 64 && s < 1024) {
120                 uint32_t bin;
121
122                 /* count zeros, and offset into correct bin */
123                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
124                 stats->size_bins[bin]++;
125         } else {
126                 if (s < 64)
127                         stats->size_bins[0]++;
128                 else if (s < 1519)
129                         stats->size_bins[6]++;
130                 else
131                         stats->size_bins[7]++;
132         }
133
134         ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
135         if (rte_is_multicast_ether_addr(ea)) {
136                 if (rte_is_broadcast_ether_addr(ea))
137                         stats->broadcast++;
138                 else
139                         stats->multicast++;
140         }
141 }
142
143 static inline void
144 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
145 {
146         VIRTIO_DUMP_PACKET(m, m->data_len);
147
148         virtio_update_packet_stats(&rxvq->stats, m);
149 }
150
151 static uint16_t
152 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
153                                   struct rte_mbuf **rx_pkts,
154                                   uint32_t *len,
155                                   uint16_t num)
156 {
157         struct rte_mbuf *cookie;
158         uint16_t used_idx;
159         uint16_t id;
160         struct vring_packed_desc *desc;
161         uint16_t i;
162
163         desc = vq->vq_packed.ring.desc;
164
165         for (i = 0; i < num; i++) {
166                 used_idx = vq->vq_used_cons_idx;
167                 /* desc_is_used has a load-acquire or rte_cio_rmb inside
168                  * and wait for used desc in virtqueue.
169                  */
170                 if (!desc_is_used(&desc[used_idx], vq))
171                         return i;
172                 len[i] = desc[used_idx].len;
173                 id = desc[used_idx].id;
174                 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
175                 if (unlikely(cookie == NULL)) {
176                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
177                                 vq->vq_used_cons_idx);
178                         break;
179                 }
180                 rte_prefetch0(cookie);
181                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
182                 rx_pkts[i] = cookie;
183
184                 vq->vq_free_cnt++;
185                 vq->vq_used_cons_idx++;
186                 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
187                         vq->vq_used_cons_idx -= vq->vq_nentries;
188                         vq->vq_packed.used_wrap_counter ^= 1;
189                 }
190         }
191
192         return i;
193 }
194
195 static uint16_t
196 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
197                            uint32_t *len, uint16_t num)
198 {
199         struct vring_used_elem *uep;
200         struct rte_mbuf *cookie;
201         uint16_t used_idx, desc_idx;
202         uint16_t i;
203
204         /*  Caller does the check */
205         for (i = 0; i < num ; i++) {
206                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
207                 uep = &vq->vq_split.ring.used->ring[used_idx];
208                 desc_idx = (uint16_t) uep->id;
209                 len[i] = uep->len;
210                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
211
212                 if (unlikely(cookie == NULL)) {
213                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
214                                 vq->vq_used_cons_idx);
215                         break;
216                 }
217
218                 rte_prefetch0(cookie);
219                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
220                 rx_pkts[i]  = cookie;
221                 vq->vq_used_cons_idx++;
222                 vq_ring_free_chain(vq, desc_idx);
223                 vq->vq_descx[desc_idx].cookie = NULL;
224         }
225
226         return i;
227 }
228
229 static uint16_t
230 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
231                         struct rte_mbuf **rx_pkts,
232                         uint32_t *len,
233                         uint16_t num)
234 {
235         struct vring_used_elem *uep;
236         struct rte_mbuf *cookie;
237         uint16_t used_idx = 0;
238         uint16_t i;
239
240         if (unlikely(num == 0))
241                 return 0;
242
243         for (i = 0; i < num; i++) {
244                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
245                 /* Desc idx same as used idx */
246                 uep = &vq->vq_split.ring.used->ring[used_idx];
247                 len[i] = uep->len;
248                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
249
250                 if (unlikely(cookie == NULL)) {
251                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
252                                 vq->vq_used_cons_idx);
253                         break;
254                 }
255
256                 rte_prefetch0(cookie);
257                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
258                 rx_pkts[i]  = cookie;
259                 vq->vq_used_cons_idx++;
260                 vq->vq_descx[used_idx].cookie = NULL;
261         }
262
263         vq_ring_free_inorder(vq, used_idx, i);
264         return i;
265 }
266
267 #ifndef DEFAULT_TX_FREE_THRESH
268 #define DEFAULT_TX_FREE_THRESH 32
269 #endif
270
271 static void
272 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
273 {
274         uint16_t used_idx, id, curr_id, free_cnt = 0;
275         uint16_t size = vq->vq_nentries;
276         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
277         struct vq_desc_extra *dxp;
278
279         used_idx = vq->vq_used_cons_idx;
280         /* desc_is_used has a load-acquire or rte_cio_rmb inside
281          * and wait for used desc in virtqueue.
282          */
283         while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
284                 id = desc[used_idx].id;
285                 do {
286                         curr_id = used_idx;
287                         dxp = &vq->vq_descx[used_idx];
288                         used_idx += dxp->ndescs;
289                         free_cnt += dxp->ndescs;
290                         num -= dxp->ndescs;
291                         if (used_idx >= size) {
292                                 used_idx -= size;
293                                 vq->vq_packed.used_wrap_counter ^= 1;
294                         }
295                         if (dxp->cookie != NULL) {
296                                 rte_pktmbuf_free(dxp->cookie);
297                                 dxp->cookie = NULL;
298                         }
299                 } while (curr_id != id);
300         }
301         vq->vq_used_cons_idx = used_idx;
302         vq->vq_free_cnt += free_cnt;
303 }
304
305 static void
306 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
307 {
308         uint16_t used_idx, id;
309         uint16_t size = vq->vq_nentries;
310         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
311         struct vq_desc_extra *dxp;
312
313         used_idx = vq->vq_used_cons_idx;
314         /* desc_is_used has a load-acquire or rte_cio_rmb inside
315          * and wait for used desc in virtqueue.
316          */
317         while (num-- && desc_is_used(&desc[used_idx], vq)) {
318                 id = desc[used_idx].id;
319                 dxp = &vq->vq_descx[id];
320                 vq->vq_used_cons_idx += dxp->ndescs;
321                 if (vq->vq_used_cons_idx >= size) {
322                         vq->vq_used_cons_idx -= size;
323                         vq->vq_packed.used_wrap_counter ^= 1;
324                 }
325                 vq_ring_free_id_packed(vq, id);
326                 if (dxp->cookie != NULL) {
327                         rte_pktmbuf_free(dxp->cookie);
328                         dxp->cookie = NULL;
329                 }
330                 used_idx = vq->vq_used_cons_idx;
331         }
332 }
333
334 /* Cleanup from completed transmits. */
335 static inline void
336 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
337 {
338         if (in_order)
339                 virtio_xmit_cleanup_inorder_packed(vq, num);
340         else
341                 virtio_xmit_cleanup_normal_packed(vq, num);
342 }
343
344 static void
345 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
346 {
347         uint16_t i, used_idx, desc_idx;
348         for (i = 0; i < num; i++) {
349                 struct vring_used_elem *uep;
350                 struct vq_desc_extra *dxp;
351
352                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
353                 uep = &vq->vq_split.ring.used->ring[used_idx];
354
355                 desc_idx = (uint16_t) uep->id;
356                 dxp = &vq->vq_descx[desc_idx];
357                 vq->vq_used_cons_idx++;
358                 vq_ring_free_chain(vq, desc_idx);
359
360                 if (dxp->cookie != NULL) {
361                         rte_pktmbuf_free(dxp->cookie);
362                         dxp->cookie = NULL;
363                 }
364         }
365 }
366
367 /* Cleanup from completed inorder transmits. */
368 static __rte_always_inline void
369 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
370 {
371         uint16_t i, idx = vq->vq_used_cons_idx;
372         int16_t free_cnt = 0;
373         struct vq_desc_extra *dxp = NULL;
374
375         if (unlikely(num == 0))
376                 return;
377
378         for (i = 0; i < num; i++) {
379                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
380                 free_cnt += dxp->ndescs;
381                 if (dxp->cookie != NULL) {
382                         rte_pktmbuf_free(dxp->cookie);
383                         dxp->cookie = NULL;
384                 }
385         }
386
387         vq->vq_free_cnt += free_cnt;
388         vq->vq_used_cons_idx = idx;
389 }
390
391 static inline int
392 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
393                         struct rte_mbuf **cookies,
394                         uint16_t num)
395 {
396         struct vq_desc_extra *dxp;
397         struct virtio_hw *hw = vq->hw;
398         struct vring_desc *start_dp;
399         uint16_t head_idx, idx, i = 0;
400
401         if (unlikely(vq->vq_free_cnt == 0))
402                 return -ENOSPC;
403         if (unlikely(vq->vq_free_cnt < num))
404                 return -EMSGSIZE;
405
406         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
407         start_dp = vq->vq_split.ring.desc;
408
409         while (i < num) {
410                 idx = head_idx & (vq->vq_nentries - 1);
411                 dxp = &vq->vq_descx[idx];
412                 dxp->cookie = (void *)cookies[i];
413                 dxp->ndescs = 1;
414
415                 start_dp[idx].addr =
416                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
417                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
418                 start_dp[idx].len =
419                                 cookies[i]->buf_len -
420                                 RTE_PKTMBUF_HEADROOM +
421                                 hw->vtnet_hdr_size;
422                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
423
424                 vq_update_avail_ring(vq, idx);
425                 head_idx++;
426                 i++;
427         }
428
429         vq->vq_desc_head_idx += num;
430         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
431         return 0;
432 }
433
434 static inline int
435 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
436                                 uint16_t num)
437 {
438         struct vq_desc_extra *dxp;
439         struct virtio_hw *hw = vq->hw;
440         struct vring_desc *start_dp = vq->vq_split.ring.desc;
441         uint16_t idx, i;
442
443         if (unlikely(vq->vq_free_cnt == 0))
444                 return -ENOSPC;
445         if (unlikely(vq->vq_free_cnt < num))
446                 return -EMSGSIZE;
447
448         if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
449                 return -EFAULT;
450
451         for (i = 0; i < num; i++) {
452                 idx = vq->vq_desc_head_idx;
453                 dxp = &vq->vq_descx[idx];
454                 dxp->cookie = (void *)cookie[i];
455                 dxp->ndescs = 1;
456
457                 start_dp[idx].addr =
458                         VIRTIO_MBUF_ADDR(cookie[i], vq) +
459                         RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
460                 start_dp[idx].len =
461                         cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
462                         hw->vtnet_hdr_size;
463                 start_dp[idx].flags = VRING_DESC_F_WRITE;
464                 vq->vq_desc_head_idx = start_dp[idx].next;
465                 vq_update_avail_ring(vq, idx);
466                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
467                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
468                         break;
469                 }
470         }
471
472         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
473
474         return 0;
475 }
476
477 static inline int
478 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
479                                      struct rte_mbuf **cookie, uint16_t num)
480 {
481         struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
482         uint16_t flags = vq->vq_packed.cached_flags;
483         struct virtio_hw *hw = vq->hw;
484         struct vq_desc_extra *dxp;
485         uint16_t idx;
486         int i;
487
488         if (unlikely(vq->vq_free_cnt == 0))
489                 return -ENOSPC;
490         if (unlikely(vq->vq_free_cnt < num))
491                 return -EMSGSIZE;
492
493         for (i = 0; i < num; i++) {
494                 idx = vq->vq_avail_idx;
495                 dxp = &vq->vq_descx[idx];
496                 dxp->cookie = (void *)cookie[i];
497                 dxp->ndescs = 1;
498
499                 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
500                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
501                 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
502                                         + hw->vtnet_hdr_size;
503
504                 vq->vq_desc_head_idx = dxp->next;
505                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
506                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
507
508                 virtqueue_store_flags_packed(&start_dp[idx], flags,
509                                              hw->weak_barriers);
510
511                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
512                         vq->vq_avail_idx -= vq->vq_nentries;
513                         vq->vq_packed.cached_flags ^=
514                                 VRING_PACKED_DESC_F_AVAIL_USED;
515                         flags = vq->vq_packed.cached_flags;
516                 }
517         }
518         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
519         return 0;
520 }
521
522 /* When doing TSO, the IP length is not included in the pseudo header
523  * checksum of the packet given to the PMD, but for virtio it is
524  * expected.
525  */
526 static void
527 virtio_tso_fix_cksum(struct rte_mbuf *m)
528 {
529         /* common case: header is not fragmented */
530         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
531                         m->l4_len)) {
532                 struct rte_ipv4_hdr *iph;
533                 struct rte_ipv6_hdr *ip6h;
534                 struct rte_tcp_hdr *th;
535                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
536                 uint32_t tmp;
537
538                 iph = rte_pktmbuf_mtod_offset(m,
539                                         struct rte_ipv4_hdr *, m->l2_len);
540                 th = RTE_PTR_ADD(iph, m->l3_len);
541                 if ((iph->version_ihl >> 4) == 4) {
542                         iph->hdr_checksum = 0;
543                         iph->hdr_checksum = rte_ipv4_cksum(iph);
544                         ip_len = iph->total_length;
545                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
546                                 m->l3_len);
547                 } else {
548                         ip6h = (struct rte_ipv6_hdr *)iph;
549                         ip_paylen = ip6h->payload_len;
550                 }
551
552                 /* calculate the new phdr checksum not including ip_paylen */
553                 prev_cksum = th->cksum;
554                 tmp = prev_cksum;
555                 tmp += ip_paylen;
556                 tmp = (tmp & 0xffff) + (tmp >> 16);
557                 new_cksum = tmp;
558
559                 /* replace it in the packet */
560                 th->cksum = new_cksum;
561         }
562 }
563
564
565 /* avoid write operation when necessary, to lessen cache issues */
566 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
567         if ((var) != (val))                     \
568                 (var) = (val);                  \
569 } while (0)
570
571 #define virtqueue_clear_net_hdr(_hdr) do {              \
572         ASSIGN_UNLESS_EQUAL((_hdr)->csum_start, 0);     \
573         ASSIGN_UNLESS_EQUAL((_hdr)->csum_offset, 0);    \
574         ASSIGN_UNLESS_EQUAL((_hdr)->flags, 0);          \
575         ASSIGN_UNLESS_EQUAL((_hdr)->gso_type, 0);       \
576         ASSIGN_UNLESS_EQUAL((_hdr)->gso_size, 0);       \
577         ASSIGN_UNLESS_EQUAL((_hdr)->hdr_len, 0);        \
578 } while (0)
579
580 static inline void
581 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
582                         struct rte_mbuf *cookie,
583                         bool offload)
584 {
585         if (offload) {
586                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
587                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
588
589                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
590                 case PKT_TX_UDP_CKSUM:
591                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
592                         hdr->csum_offset = offsetof(struct rte_udp_hdr,
593                                 dgram_cksum);
594                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
595                         break;
596
597                 case PKT_TX_TCP_CKSUM:
598                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
599                         hdr->csum_offset = offsetof(struct rte_tcp_hdr, cksum);
600                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
601                         break;
602
603                 default:
604                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
605                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
606                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
607                         break;
608                 }
609
610                 /* TCP Segmentation Offload */
611                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
612                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
613                                 VIRTIO_NET_HDR_GSO_TCPV6 :
614                                 VIRTIO_NET_HDR_GSO_TCPV4;
615                         hdr->gso_size = cookie->tso_segsz;
616                         hdr->hdr_len =
617                                 cookie->l2_len +
618                                 cookie->l3_len +
619                                 cookie->l4_len;
620                 } else {
621                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
622                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
623                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
624                 }
625         }
626 }
627
628 static inline void
629 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
630                         struct rte_mbuf **cookies,
631                         uint16_t num)
632 {
633         struct vq_desc_extra *dxp;
634         struct virtqueue *vq = txvq->vq;
635         struct vring_desc *start_dp;
636         struct virtio_net_hdr *hdr;
637         uint16_t idx;
638         int16_t head_size = vq->hw->vtnet_hdr_size;
639         uint16_t i = 0;
640
641         idx = vq->vq_desc_head_idx;
642         start_dp = vq->vq_split.ring.desc;
643
644         while (i < num) {
645                 idx = idx & (vq->vq_nentries - 1);
646                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
647                 dxp->cookie = (void *)cookies[i];
648                 dxp->ndescs = 1;
649                 virtio_update_packet_stats(&txvq->stats, cookies[i]);
650
651                 hdr = rte_pktmbuf_mtod_offset(cookies[i],
652                                 struct virtio_net_hdr *, -head_size);
653
654                 /* if offload disabled, hdr is not zeroed yet, do it now */
655                 if (!vq->hw->has_tx_offload)
656                         virtqueue_clear_net_hdr(hdr);
657                 else
658                         virtqueue_xmit_offload(hdr, cookies[i], true);
659
660                 start_dp[idx].addr  =
661                         VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq) - head_size;
662                 start_dp[idx].len   = cookies[i]->data_len + head_size;
663                 start_dp[idx].flags = 0;
664
665
666                 vq_update_avail_ring(vq, idx);
667
668                 idx++;
669                 i++;
670         };
671
672         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
673         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
674 }
675
676 static inline void
677 virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
678                                    struct rte_mbuf *cookie,
679                                    int in_order)
680 {
681         struct virtqueue *vq = txvq->vq;
682         struct vring_packed_desc *dp;
683         struct vq_desc_extra *dxp;
684         uint16_t idx, id, flags;
685         int16_t head_size = vq->hw->vtnet_hdr_size;
686         struct virtio_net_hdr *hdr;
687
688         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
689         idx = vq->vq_avail_idx;
690         dp = &vq->vq_packed.ring.desc[idx];
691
692         dxp = &vq->vq_descx[id];
693         dxp->ndescs = 1;
694         dxp->cookie = cookie;
695
696         flags = vq->vq_packed.cached_flags;
697
698         /* prepend cannot fail, checked by caller */
699         hdr = rte_pktmbuf_mtod_offset(cookie, struct virtio_net_hdr *,
700                                       -head_size);
701
702         /* if offload disabled, hdr is not zeroed yet, do it now */
703         if (!vq->hw->has_tx_offload)
704                 virtqueue_clear_net_hdr(hdr);
705         else
706                 virtqueue_xmit_offload(hdr, cookie, true);
707
708         dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq) - head_size;
709         dp->len  = cookie->data_len + head_size;
710         dp->id   = id;
711
712         if (++vq->vq_avail_idx >= vq->vq_nentries) {
713                 vq->vq_avail_idx -= vq->vq_nentries;
714                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
715         }
716
717         vq->vq_free_cnt--;
718
719         if (!in_order) {
720                 vq->vq_desc_head_idx = dxp->next;
721                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
722                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
723         }
724
725         virtqueue_store_flags_packed(dp, flags, vq->hw->weak_barriers);
726 }
727
728 static inline void
729 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
730                               uint16_t needed, int can_push, int in_order)
731 {
732         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
733         struct vq_desc_extra *dxp;
734         struct virtqueue *vq = txvq->vq;
735         struct vring_packed_desc *start_dp, *head_dp;
736         uint16_t idx, id, head_idx, head_flags;
737         int16_t head_size = vq->hw->vtnet_hdr_size;
738         struct virtio_net_hdr *hdr;
739         uint16_t prev;
740         bool prepend_header = false;
741
742         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
743
744         dxp = &vq->vq_descx[id];
745         dxp->ndescs = needed;
746         dxp->cookie = cookie;
747
748         head_idx = vq->vq_avail_idx;
749         idx = head_idx;
750         prev = head_idx;
751         start_dp = vq->vq_packed.ring.desc;
752
753         head_dp = &vq->vq_packed.ring.desc[idx];
754         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
755         head_flags |= vq->vq_packed.cached_flags;
756
757         if (can_push) {
758                 /* prepend cannot fail, checked by caller */
759                 hdr = rte_pktmbuf_mtod_offset(cookie, struct virtio_net_hdr *,
760                                               -head_size);
761                 prepend_header = true;
762
763                 /* if offload disabled, it is not zeroed below, do it now */
764                 if (!vq->hw->has_tx_offload)
765                         virtqueue_clear_net_hdr(hdr);
766         } else {
767                 /* setup first tx ring slot to point to header
768                  * stored in reserved region.
769                  */
770                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
771                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
772                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
773                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
774                 idx++;
775                 if (idx >= vq->vq_nentries) {
776                         idx -= vq->vq_nentries;
777                         vq->vq_packed.cached_flags ^=
778                                 VRING_PACKED_DESC_F_AVAIL_USED;
779                 }
780         }
781
782         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
783
784         do {
785                 uint16_t flags;
786
787                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
788                 start_dp[idx].len  = cookie->data_len;
789                 if (prepend_header) {
790                         start_dp[idx].addr -= head_size;
791                         start_dp[idx].len += head_size;
792                         prepend_header = false;
793                 }
794
795                 if (likely(idx != head_idx)) {
796                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
797                         flags |= vq->vq_packed.cached_flags;
798                         start_dp[idx].flags = flags;
799                 }
800                 prev = idx;
801                 idx++;
802                 if (idx >= vq->vq_nentries) {
803                         idx -= vq->vq_nentries;
804                         vq->vq_packed.cached_flags ^=
805                                 VRING_PACKED_DESC_F_AVAIL_USED;
806                 }
807         } while ((cookie = cookie->next) != NULL);
808
809         start_dp[prev].id = id;
810
811         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
812         vq->vq_avail_idx = idx;
813
814         if (!in_order) {
815                 vq->vq_desc_head_idx = dxp->next;
816                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
817                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
818         }
819
820         virtqueue_store_flags_packed(head_dp, head_flags,
821                                      vq->hw->weak_barriers);
822 }
823
824 static inline void
825 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
826                         uint16_t needed, int use_indirect, int can_push,
827                         int in_order)
828 {
829         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
830         struct vq_desc_extra *dxp;
831         struct virtqueue *vq = txvq->vq;
832         struct vring_desc *start_dp;
833         uint16_t seg_num = cookie->nb_segs;
834         uint16_t head_idx, idx;
835         int16_t head_size = vq->hw->vtnet_hdr_size;
836         bool prepend_header = false;
837         struct virtio_net_hdr *hdr;
838
839         head_idx = vq->vq_desc_head_idx;
840         idx = head_idx;
841         if (in_order)
842                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
843         else
844                 dxp = &vq->vq_descx[idx];
845         dxp->cookie = (void *)cookie;
846         dxp->ndescs = needed;
847
848         start_dp = vq->vq_split.ring.desc;
849
850         if (can_push) {
851                 /* prepend cannot fail, checked by caller */
852                 hdr = rte_pktmbuf_mtod_offset(cookie, struct virtio_net_hdr *,
853                                               -head_size);
854                 prepend_header = true;
855
856                 /* if offload disabled, it is not zeroed below, do it now */
857                 if (!vq->hw->has_tx_offload)
858                         virtqueue_clear_net_hdr(hdr);
859         } else if (use_indirect) {
860                 /* setup tx ring slot to point to indirect
861                  * descriptor list stored in reserved region.
862                  *
863                  * the first slot in indirect ring is already preset
864                  * to point to the header in reserved region
865                  */
866                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
867                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
868                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
869                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
870                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
871
872                 /* loop below will fill in rest of the indirect elements */
873                 start_dp = txr[idx].tx_indir;
874                 idx = 1;
875         } else {
876                 /* setup first tx ring slot to point to header
877                  * stored in reserved region.
878                  */
879                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
880                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
881                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
882                 start_dp[idx].flags = VRING_DESC_F_NEXT;
883                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
884
885                 idx = start_dp[idx].next;
886         }
887
888         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
889
890         do {
891                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
892                 start_dp[idx].len   = cookie->data_len;
893                 if (prepend_header) {
894                         start_dp[idx].addr -= head_size;
895                         start_dp[idx].len += head_size;
896                         prepend_header = false;
897                 }
898                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
899                 idx = start_dp[idx].next;
900         } while ((cookie = cookie->next) != NULL);
901
902         if (use_indirect)
903                 idx = vq->vq_split.ring.desc[head_idx].next;
904
905         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
906
907         vq->vq_desc_head_idx = idx;
908         vq_update_avail_ring(vq, head_idx);
909
910         if (!in_order) {
911                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
912                         vq->vq_desc_tail_idx = idx;
913         }
914 }
915
916 void
917 virtio_dev_cq_start(struct rte_eth_dev *dev)
918 {
919         struct virtio_hw *hw = dev->data->dev_private;
920
921         if (hw->cvq && hw->cvq->vq) {
922                 rte_spinlock_init(&hw->cvq->lock);
923                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
924         }
925 }
926
927 int
928 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
929                         uint16_t queue_idx,
930                         uint16_t nb_desc,
931                         unsigned int socket_id __rte_unused,
932                         const struct rte_eth_rxconf *rx_conf,
933                         struct rte_mempool *mp)
934 {
935         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
936         struct virtio_hw *hw = dev->data->dev_private;
937         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
938         struct virtnet_rx *rxvq;
939         uint16_t rx_free_thresh;
940
941         PMD_INIT_FUNC_TRACE();
942
943         if (rx_conf->rx_deferred_start) {
944                 PMD_INIT_LOG(ERR, "Rx deferred start is not supported");
945                 return -EINVAL;
946         }
947
948         rx_free_thresh = rx_conf->rx_free_thresh;
949         if (rx_free_thresh == 0)
950                 rx_free_thresh =
951                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_RX_FREE_THRESH);
952
953         if (rx_free_thresh & 0x3) {
954                 RTE_LOG(ERR, PMD, "rx_free_thresh must be multiples of four."
955                         " (rx_free_thresh=%u port=%u queue=%u)\n",
956                         rx_free_thresh, dev->data->port_id, queue_idx);
957                 return -EINVAL;
958         }
959
960         if (rx_free_thresh >= vq->vq_nentries) {
961                 RTE_LOG(ERR, PMD, "rx_free_thresh must be less than the "
962                         "number of RX entries (%u)."
963                         " (rx_free_thresh=%u port=%u queue=%u)\n",
964                         vq->vq_nentries,
965                         rx_free_thresh, dev->data->port_id, queue_idx);
966                 return -EINVAL;
967         }
968         vq->vq_free_thresh = rx_free_thresh;
969
970         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
971                 nb_desc = vq->vq_nentries;
972         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
973
974         rxvq = &vq->rxq;
975         rxvq->queue_id = queue_idx;
976         rxvq->mpool = mp;
977         dev->data->rx_queues[queue_idx] = rxvq;
978
979         return 0;
980 }
981
982 int
983 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
984 {
985         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
986         struct virtio_hw *hw = dev->data->dev_private;
987         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
988         struct virtnet_rx *rxvq = &vq->rxq;
989         struct rte_mbuf *m;
990         uint16_t desc_idx;
991         int error, nbufs, i;
992         bool in_order = vtpci_with_feature(hw, VIRTIO_F_IN_ORDER);
993
994         PMD_INIT_FUNC_TRACE();
995
996         /* Allocate blank mbufs for the each rx descriptor */
997         nbufs = 0;
998
999         if (hw->use_simple_rx) {
1000                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
1001                      desc_idx++) {
1002                         vq->vq_split.ring.avail->ring[desc_idx] = desc_idx;
1003                         vq->vq_split.ring.desc[desc_idx].flags =
1004                                 VRING_DESC_F_WRITE;
1005                 }
1006
1007                 virtio_rxq_vec_setup(rxvq);
1008         }
1009
1010         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
1011         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
1012              desc_idx++) {
1013                 vq->sw_ring[vq->vq_nentries + desc_idx] =
1014                         &rxvq->fake_mbuf;
1015         }
1016
1017         if (hw->use_simple_rx) {
1018                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
1019                         virtio_rxq_rearm_vec(rxvq);
1020                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
1021                 }
1022         } else if (!vtpci_packed_queue(vq->hw) && in_order) {
1023                 if ((!virtqueue_full(vq))) {
1024                         uint16_t free_cnt = vq->vq_free_cnt;
1025                         struct rte_mbuf *pkts[free_cnt];
1026
1027                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
1028                                 free_cnt)) {
1029                                 error = virtqueue_enqueue_refill_inorder(vq,
1030                                                 pkts,
1031                                                 free_cnt);
1032                                 if (unlikely(error)) {
1033                                         for (i = 0; i < free_cnt; i++)
1034                                                 rte_pktmbuf_free(pkts[i]);
1035                                 }
1036                         }
1037
1038                         nbufs += free_cnt;
1039                         vq_update_avail_idx(vq);
1040                 }
1041         } else {
1042                 while (!virtqueue_full(vq)) {
1043                         m = rte_mbuf_raw_alloc(rxvq->mpool);
1044                         if (m == NULL)
1045                                 break;
1046
1047                         /* Enqueue allocated buffers */
1048                         if (vtpci_packed_queue(vq->hw))
1049                                 error = virtqueue_enqueue_recv_refill_packed(vq,
1050                                                 &m, 1);
1051                         else
1052                                 error = virtqueue_enqueue_recv_refill(vq,
1053                                                 &m, 1);
1054                         if (error) {
1055                                 rte_pktmbuf_free(m);
1056                                 break;
1057                         }
1058                         nbufs++;
1059                 }
1060
1061                 if (!vtpci_packed_queue(vq->hw))
1062                         vq_update_avail_idx(vq);
1063         }
1064
1065         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
1066
1067         VIRTQUEUE_DUMP(vq);
1068
1069         return 0;
1070 }
1071
1072 /*
1073  * struct rte_eth_dev *dev: Used to update dev
1074  * uint16_t nb_desc: Defaults to values read from config space
1075  * unsigned int socket_id: Used to allocate memzone
1076  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
1077  * uint16_t queue_idx: Just used as an index in dev txq list
1078  */
1079 int
1080 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
1081                         uint16_t queue_idx,
1082                         uint16_t nb_desc,
1083                         unsigned int socket_id __rte_unused,
1084                         const struct rte_eth_txconf *tx_conf)
1085 {
1086         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1087         struct virtio_hw *hw = dev->data->dev_private;
1088         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1089         struct virtnet_tx *txvq;
1090         uint16_t tx_free_thresh;
1091
1092         PMD_INIT_FUNC_TRACE();
1093
1094         if (tx_conf->tx_deferred_start) {
1095                 PMD_INIT_LOG(ERR, "Tx deferred start is not supported");
1096                 return -EINVAL;
1097         }
1098
1099         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
1100                 nb_desc = vq->vq_nentries;
1101         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
1102
1103         txvq = &vq->txq;
1104         txvq->queue_id = queue_idx;
1105
1106         tx_free_thresh = tx_conf->tx_free_thresh;
1107         if (tx_free_thresh == 0)
1108                 tx_free_thresh =
1109                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
1110
1111         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
1112                 PMD_DRV_LOG(ERR, "tx_free_thresh must be less than the "
1113                         "number of TX entries minus 3 (%u)."
1114                         " (tx_free_thresh=%u port=%u queue=%u)\n",
1115                         vq->vq_nentries - 3,
1116                         tx_free_thresh, dev->data->port_id, queue_idx);
1117                 return -EINVAL;
1118         }
1119
1120         vq->vq_free_thresh = tx_free_thresh;
1121
1122         dev->data->tx_queues[queue_idx] = txvq;
1123         return 0;
1124 }
1125
1126 int
1127 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
1128                                 uint16_t queue_idx)
1129 {
1130         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1131         struct virtio_hw *hw = dev->data->dev_private;
1132         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1133
1134         PMD_INIT_FUNC_TRACE();
1135
1136         if (!vtpci_packed_queue(hw)) {
1137                 if (vtpci_with_feature(hw, VIRTIO_F_IN_ORDER))
1138                         vq->vq_split.ring.desc[vq->vq_nentries - 1].next = 0;
1139         }
1140
1141         VIRTQUEUE_DUMP(vq);
1142
1143         return 0;
1144 }
1145
1146 static inline void
1147 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1148 {
1149         int error;
1150         /*
1151          * Requeue the discarded mbuf. This should always be
1152          * successful since it was just dequeued.
1153          */
1154         if (vtpci_packed_queue(vq->hw))
1155                 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1156         else
1157                 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1158
1159         if (unlikely(error)) {
1160                 PMD_DRV_LOG(ERR, "cannot requeue discarded mbuf");
1161                 rte_pktmbuf_free(m);
1162         }
1163 }
1164
1165 static inline void
1166 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1167 {
1168         int error;
1169
1170         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1171         if (unlikely(error)) {
1172                 PMD_DRV_LOG(ERR, "cannot requeue discarded mbuf");
1173                 rte_pktmbuf_free(m);
1174         }
1175 }
1176
1177 /* Optionally fill offload information in structure */
1178 static inline int
1179 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1180 {
1181         struct rte_net_hdr_lens hdr_lens;
1182         uint32_t hdrlen, ptype;
1183         int l4_supported = 0;
1184
1185         /* nothing to do */
1186         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1187                 return 0;
1188
1189         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1190
1191         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1192         m->packet_type = ptype;
1193         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1194             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1195             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1196                 l4_supported = 1;
1197
1198         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1199                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1200                 if (hdr->csum_start <= hdrlen && l4_supported) {
1201                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1202                 } else {
1203                         /* Unknown proto or tunnel, do sw cksum. We can assume
1204                          * the cksum field is in the first segment since the
1205                          * buffers we provided to the host are large enough.
1206                          * In case of SCTP, this will be wrong since it's a CRC
1207                          * but there's nothing we can do.
1208                          */
1209                         uint16_t csum = 0, off;
1210
1211                         rte_raw_cksum_mbuf(m, hdr->csum_start,
1212                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1213                                 &csum);
1214                         if (likely(csum != 0xffff))
1215                                 csum = ~csum;
1216                         off = hdr->csum_offset + hdr->csum_start;
1217                         if (rte_pktmbuf_data_len(m) >= off + 1)
1218                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1219                                         off) = csum;
1220                 }
1221         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1222                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1223         }
1224
1225         /* GSO request, save required information in mbuf */
1226         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1227                 /* Check unsupported modes */
1228                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1229                     (hdr->gso_size == 0)) {
1230                         return -EINVAL;
1231                 }
1232
1233                 /* Update mss lengthes in mbuf */
1234                 m->tso_segsz = hdr->gso_size;
1235                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1236                         case VIRTIO_NET_HDR_GSO_TCPV4:
1237                         case VIRTIO_NET_HDR_GSO_TCPV6:
1238                                 m->ol_flags |= PKT_RX_LRO | \
1239                                         PKT_RX_L4_CKSUM_NONE;
1240                                 break;
1241                         default:
1242                                 return -EINVAL;
1243                 }
1244         }
1245
1246         return 0;
1247 }
1248
1249 #define VIRTIO_MBUF_BURST_SZ 64
1250 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1251 uint16_t
1252 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1253 {
1254         struct virtnet_rx *rxvq = rx_queue;
1255         struct virtqueue *vq = rxvq->vq;
1256         struct virtio_hw *hw = vq->hw;
1257         struct rte_mbuf *rxm;
1258         uint16_t nb_used, num, nb_rx;
1259         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1260         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1261         int error;
1262         uint32_t i, nb_enqueued;
1263         uint32_t hdr_size;
1264         struct virtio_net_hdr *hdr;
1265
1266         nb_rx = 0;
1267         if (unlikely(hw->started == 0))
1268                 return nb_rx;
1269
1270         nb_used = VIRTQUEUE_NUSED(vq);
1271
1272         virtio_rmb(hw->weak_barriers);
1273
1274         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1275         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1276                 num = VIRTIO_MBUF_BURST_SZ;
1277         if (likely(num > DESC_PER_CACHELINE))
1278                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1279
1280         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1281         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1282
1283         nb_enqueued = 0;
1284         hdr_size = hw->vtnet_hdr_size;
1285
1286         for (i = 0; i < num ; i++) {
1287                 rxm = rcv_pkts[i];
1288
1289                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1290
1291                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1292                         PMD_RX_LOG(ERR, "Packet drop");
1293                         nb_enqueued++;
1294                         virtio_discard_rxbuf(vq, rxm);
1295                         rxvq->stats.errors++;
1296                         continue;
1297                 }
1298
1299                 rxm->port = rxvq->port_id;
1300                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1301                 rxm->ol_flags = 0;
1302                 rxm->vlan_tci = 0;
1303
1304                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1305                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1306
1307                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1308                         RTE_PKTMBUF_HEADROOM - hdr_size);
1309
1310                 if (hw->vlan_strip)
1311                         rte_vlan_strip(rxm);
1312
1313                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1314                         virtio_discard_rxbuf(vq, rxm);
1315                         rxvq->stats.errors++;
1316                         continue;
1317                 }
1318
1319                 virtio_rx_stats_updated(rxvq, rxm);
1320
1321                 rx_pkts[nb_rx++] = rxm;
1322         }
1323
1324         rxvq->stats.packets += nb_rx;
1325
1326         /* Allocate new mbuf for the used descriptor */
1327         if (likely(!virtqueue_full(vq))) {
1328                 uint16_t free_cnt = vq->vq_free_cnt;
1329                 struct rte_mbuf *new_pkts[free_cnt];
1330
1331                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1332                                                 free_cnt) == 0)) {
1333                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1334                                         free_cnt);
1335                         if (unlikely(error)) {
1336                                 for (i = 0; i < free_cnt; i++)
1337                                         rte_pktmbuf_free(new_pkts[i]);
1338                         }
1339                         nb_enqueued += free_cnt;
1340                 } else {
1341                         struct rte_eth_dev *dev =
1342                                 &rte_eth_devices[rxvq->port_id];
1343                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1344                 }
1345         }
1346
1347         if (likely(nb_enqueued)) {
1348                 vq_update_avail_idx(vq);
1349
1350                 if (unlikely(virtqueue_kick_prepare(vq))) {
1351                         virtqueue_notify(vq);
1352                         PMD_RX_LOG(DEBUG, "Notified");
1353                 }
1354         }
1355
1356         return nb_rx;
1357 }
1358
1359 uint16_t
1360 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1361                         uint16_t nb_pkts)
1362 {
1363         struct virtnet_rx *rxvq = rx_queue;
1364         struct virtqueue *vq = rxvq->vq;
1365         struct virtio_hw *hw = vq->hw;
1366         struct rte_mbuf *rxm;
1367         uint16_t num, nb_rx;
1368         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1369         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1370         int error;
1371         uint32_t i, nb_enqueued;
1372         uint32_t hdr_size;
1373         struct virtio_net_hdr *hdr;
1374
1375         nb_rx = 0;
1376         if (unlikely(hw->started == 0))
1377                 return nb_rx;
1378
1379         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1380         if (likely(num > DESC_PER_CACHELINE))
1381                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1382
1383         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1384         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1385
1386         nb_enqueued = 0;
1387         hdr_size = hw->vtnet_hdr_size;
1388
1389         for (i = 0; i < num; i++) {
1390                 rxm = rcv_pkts[i];
1391
1392                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1393
1394                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1395                         PMD_RX_LOG(ERR, "Packet drop");
1396                         nb_enqueued++;
1397                         virtio_discard_rxbuf(vq, rxm);
1398                         rxvq->stats.errors++;
1399                         continue;
1400                 }
1401
1402                 rxm->port = rxvq->port_id;
1403                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1404                 rxm->ol_flags = 0;
1405                 rxm->vlan_tci = 0;
1406
1407                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1408                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1409
1410                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1411                         RTE_PKTMBUF_HEADROOM - hdr_size);
1412
1413                 if (hw->vlan_strip)
1414                         rte_vlan_strip(rxm);
1415
1416                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1417                         virtio_discard_rxbuf(vq, rxm);
1418                         rxvq->stats.errors++;
1419                         continue;
1420                 }
1421
1422                 virtio_rx_stats_updated(rxvq, rxm);
1423
1424                 rx_pkts[nb_rx++] = rxm;
1425         }
1426
1427         rxvq->stats.packets += nb_rx;
1428
1429         /* Allocate new mbuf for the used descriptor */
1430         if (likely(!virtqueue_full(vq))) {
1431                 uint16_t free_cnt = vq->vq_free_cnt;
1432                 struct rte_mbuf *new_pkts[free_cnt];
1433
1434                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1435                                                 free_cnt) == 0)) {
1436                         error = virtqueue_enqueue_recv_refill_packed(vq,
1437                                         new_pkts, free_cnt);
1438                         if (unlikely(error)) {
1439                                 for (i = 0; i < free_cnt; i++)
1440                                         rte_pktmbuf_free(new_pkts[i]);
1441                         }
1442                         nb_enqueued += free_cnt;
1443                 } else {
1444                         struct rte_eth_dev *dev =
1445                                 &rte_eth_devices[rxvq->port_id];
1446                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1447                 }
1448         }
1449
1450         if (likely(nb_enqueued)) {
1451                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1452                         virtqueue_notify(vq);
1453                         PMD_RX_LOG(DEBUG, "Notified");
1454                 }
1455         }
1456
1457         return nb_rx;
1458 }
1459
1460
1461 uint16_t
1462 virtio_recv_pkts_inorder(void *rx_queue,
1463                         struct rte_mbuf **rx_pkts,
1464                         uint16_t nb_pkts)
1465 {
1466         struct virtnet_rx *rxvq = rx_queue;
1467         struct virtqueue *vq = rxvq->vq;
1468         struct virtio_hw *hw = vq->hw;
1469         struct rte_mbuf *rxm;
1470         struct rte_mbuf *prev = NULL;
1471         uint16_t nb_used, num, nb_rx;
1472         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1473         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1474         int error;
1475         uint32_t nb_enqueued;
1476         uint32_t seg_num;
1477         uint32_t seg_res;
1478         uint32_t hdr_size;
1479         int32_t i;
1480
1481         nb_rx = 0;
1482         if (unlikely(hw->started == 0))
1483                 return nb_rx;
1484
1485         nb_used = VIRTQUEUE_NUSED(vq);
1486         nb_used = RTE_MIN(nb_used, nb_pkts);
1487         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1488
1489         virtio_rmb(hw->weak_barriers);
1490
1491         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1492
1493         nb_enqueued = 0;
1494         seg_num = 1;
1495         seg_res = 0;
1496         hdr_size = hw->vtnet_hdr_size;
1497
1498         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1499
1500         for (i = 0; i < num; i++) {
1501                 struct virtio_net_hdr_mrg_rxbuf *header;
1502
1503                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1504                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1505
1506                 rxm = rcv_pkts[i];
1507
1508                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1509                         PMD_RX_LOG(ERR, "Packet drop");
1510                         nb_enqueued++;
1511                         virtio_discard_rxbuf_inorder(vq, rxm);
1512                         rxvq->stats.errors++;
1513                         continue;
1514                 }
1515
1516                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1517                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1518                          - hdr_size);
1519
1520                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1521                         seg_num = header->num_buffers;
1522                         if (seg_num == 0)
1523                                 seg_num = 1;
1524                 } else {
1525                         seg_num = 1;
1526                 }
1527
1528                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1529                 rxm->nb_segs = seg_num;
1530                 rxm->ol_flags = 0;
1531                 rxm->vlan_tci = 0;
1532                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1533                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1534
1535                 rxm->port = rxvq->port_id;
1536
1537                 rx_pkts[nb_rx] = rxm;
1538                 prev = rxm;
1539
1540                 if (vq->hw->has_rx_offload &&
1541                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1542                         virtio_discard_rxbuf_inorder(vq, rxm);
1543                         rxvq->stats.errors++;
1544                         continue;
1545                 }
1546
1547                 if (hw->vlan_strip)
1548                         rte_vlan_strip(rx_pkts[nb_rx]);
1549
1550                 seg_res = seg_num - 1;
1551
1552                 /* Merge remaining segments */
1553                 while (seg_res != 0 && i < (num - 1)) {
1554                         i++;
1555
1556                         rxm = rcv_pkts[i];
1557                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1558                         rxm->pkt_len = (uint32_t)(len[i]);
1559                         rxm->data_len = (uint16_t)(len[i]);
1560
1561                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1562
1563                         prev->next = rxm;
1564                         prev = rxm;
1565                         seg_res -= 1;
1566                 }
1567
1568                 if (!seg_res) {
1569                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1570                         nb_rx++;
1571                 }
1572         }
1573
1574         /* Last packet still need merge segments */
1575         while (seg_res != 0) {
1576                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1577                                         VIRTIO_MBUF_BURST_SZ);
1578
1579                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1580                         virtio_rmb(hw->weak_barriers);
1581                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1582                                                            rcv_cnt);
1583                         uint16_t extra_idx = 0;
1584
1585                         rcv_cnt = num;
1586                         while (extra_idx < rcv_cnt) {
1587                                 rxm = rcv_pkts[extra_idx];
1588                                 rxm->data_off =
1589                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1590                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1591                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1592                                 prev->next = rxm;
1593                                 prev = rxm;
1594                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1595                                 extra_idx += 1;
1596                         };
1597                         seg_res -= rcv_cnt;
1598
1599                         if (!seg_res) {
1600                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1601                                 nb_rx++;
1602                         }
1603                 } else {
1604                         PMD_RX_LOG(ERR,
1605                                         "No enough segments for packet.");
1606                         rte_pktmbuf_free(rx_pkts[nb_rx]);
1607                         rxvq->stats.errors++;
1608                         break;
1609                 }
1610         }
1611
1612         rxvq->stats.packets += nb_rx;
1613
1614         /* Allocate new mbuf for the used descriptor */
1615
1616         if (likely(!virtqueue_full(vq))) {
1617                 /* free_cnt may include mrg descs */
1618                 uint16_t free_cnt = vq->vq_free_cnt;
1619                 struct rte_mbuf *new_pkts[free_cnt];
1620
1621                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1622                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1623                                         free_cnt);
1624                         if (unlikely(error)) {
1625                                 for (i = 0; i < free_cnt; i++)
1626                                         rte_pktmbuf_free(new_pkts[i]);
1627                         }
1628                         nb_enqueued += free_cnt;
1629                 } else {
1630                         struct rte_eth_dev *dev =
1631                                 &rte_eth_devices[rxvq->port_id];
1632                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1633                 }
1634         }
1635
1636         if (likely(nb_enqueued)) {
1637                 vq_update_avail_idx(vq);
1638
1639                 if (unlikely(virtqueue_kick_prepare(vq))) {
1640                         virtqueue_notify(vq);
1641                         PMD_RX_LOG(DEBUG, "Notified");
1642                 }
1643         }
1644
1645         return nb_rx;
1646 }
1647
1648 uint16_t
1649 virtio_recv_mergeable_pkts(void *rx_queue,
1650                         struct rte_mbuf **rx_pkts,
1651                         uint16_t nb_pkts)
1652 {
1653         struct virtnet_rx *rxvq = rx_queue;
1654         struct virtqueue *vq = rxvq->vq;
1655         struct virtio_hw *hw = vq->hw;
1656         struct rte_mbuf *rxm;
1657         struct rte_mbuf *prev = NULL;
1658         uint16_t nb_used, num, nb_rx = 0;
1659         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1660         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1661         int error;
1662         uint32_t nb_enqueued = 0;
1663         uint32_t seg_num = 0;
1664         uint32_t seg_res = 0;
1665         uint32_t hdr_size = hw->vtnet_hdr_size;
1666         int32_t i;
1667
1668         if (unlikely(hw->started == 0))
1669                 return nb_rx;
1670
1671         nb_used = VIRTQUEUE_NUSED(vq);
1672
1673         virtio_rmb(hw->weak_barriers);
1674
1675         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1676
1677         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1678         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1679                 num = VIRTIO_MBUF_BURST_SZ;
1680         if (likely(num > DESC_PER_CACHELINE))
1681                 num = num - ((vq->vq_used_cons_idx + num) %
1682                                 DESC_PER_CACHELINE);
1683
1684
1685         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1686
1687         for (i = 0; i < num; i++) {
1688                 struct virtio_net_hdr_mrg_rxbuf *header;
1689
1690                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1691                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1692
1693                 rxm = rcv_pkts[i];
1694
1695                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1696                         PMD_RX_LOG(ERR, "Packet drop");
1697                         nb_enqueued++;
1698                         virtio_discard_rxbuf(vq, rxm);
1699                         rxvq->stats.errors++;
1700                         continue;
1701                 }
1702
1703                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1704                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1705                          - hdr_size);
1706                 seg_num = header->num_buffers;
1707                 if (seg_num == 0)
1708                         seg_num = 1;
1709
1710                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1711                 rxm->nb_segs = seg_num;
1712                 rxm->ol_flags = 0;
1713                 rxm->vlan_tci = 0;
1714                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1715                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1716
1717                 rxm->port = rxvq->port_id;
1718
1719                 rx_pkts[nb_rx] = rxm;
1720                 prev = rxm;
1721
1722                 if (hw->has_rx_offload &&
1723                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1724                         virtio_discard_rxbuf(vq, rxm);
1725                         rxvq->stats.errors++;
1726                         continue;
1727                 }
1728
1729                 if (hw->vlan_strip)
1730                         rte_vlan_strip(rx_pkts[nb_rx]);
1731
1732                 seg_res = seg_num - 1;
1733
1734                 /* Merge remaining segments */
1735                 while (seg_res != 0 && i < (num - 1)) {
1736                         i++;
1737
1738                         rxm = rcv_pkts[i];
1739                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1740                         rxm->pkt_len = (uint32_t)(len[i]);
1741                         rxm->data_len = (uint16_t)(len[i]);
1742
1743                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1744
1745                         prev->next = rxm;
1746                         prev = rxm;
1747                         seg_res -= 1;
1748                 }
1749
1750                 if (!seg_res) {
1751                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1752                         nb_rx++;
1753                 }
1754         }
1755
1756         /* Last packet still need merge segments */
1757         while (seg_res != 0) {
1758                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1759                                         VIRTIO_MBUF_BURST_SZ);
1760
1761                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1762                         virtio_rmb(hw->weak_barriers);
1763                         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1764                                                            rcv_cnt);
1765                         uint16_t extra_idx = 0;
1766
1767                         rcv_cnt = num;
1768                         while (extra_idx < rcv_cnt) {
1769                                 rxm = rcv_pkts[extra_idx];
1770                                 rxm->data_off =
1771                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1772                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1773                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1774                                 prev->next = rxm;
1775                                 prev = rxm;
1776                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1777                                 extra_idx += 1;
1778                         };
1779                         seg_res -= rcv_cnt;
1780
1781                         if (!seg_res) {
1782                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1783                                 nb_rx++;
1784                         }
1785                 } else {
1786                         PMD_RX_LOG(ERR,
1787                                         "No enough segments for packet.");
1788                         rte_pktmbuf_free(rx_pkts[nb_rx]);
1789                         rxvq->stats.errors++;
1790                         break;
1791                 }
1792         }
1793
1794         rxvq->stats.packets += nb_rx;
1795
1796         /* Allocate new mbuf for the used descriptor */
1797         if (likely(!virtqueue_full(vq))) {
1798                 /* free_cnt may include mrg descs */
1799                 uint16_t free_cnt = vq->vq_free_cnt;
1800                 struct rte_mbuf *new_pkts[free_cnt];
1801
1802                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1803                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1804                                         free_cnt);
1805                         if (unlikely(error)) {
1806                                 for (i = 0; i < free_cnt; i++)
1807                                         rte_pktmbuf_free(new_pkts[i]);
1808                         }
1809                         nb_enqueued += free_cnt;
1810                 } else {
1811                         struct rte_eth_dev *dev =
1812                                 &rte_eth_devices[rxvq->port_id];
1813                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1814                 }
1815         }
1816
1817         if (likely(nb_enqueued)) {
1818                 vq_update_avail_idx(vq);
1819
1820                 if (unlikely(virtqueue_kick_prepare(vq))) {
1821                         virtqueue_notify(vq);
1822                         PMD_RX_LOG(DEBUG, "Notified");
1823                 }
1824         }
1825
1826         return nb_rx;
1827 }
1828
1829 uint16_t
1830 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1831                         struct rte_mbuf **rx_pkts,
1832                         uint16_t nb_pkts)
1833 {
1834         struct virtnet_rx *rxvq = rx_queue;
1835         struct virtqueue *vq = rxvq->vq;
1836         struct virtio_hw *hw = vq->hw;
1837         struct rte_mbuf *rxm;
1838         struct rte_mbuf *prev = NULL;
1839         uint16_t num, nb_rx = 0;
1840         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1841         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1842         uint32_t nb_enqueued = 0;
1843         uint32_t seg_num = 0;
1844         uint32_t seg_res = 0;
1845         uint32_t hdr_size = hw->vtnet_hdr_size;
1846         int32_t i;
1847         int error;
1848
1849         if (unlikely(hw->started == 0))
1850                 return nb_rx;
1851
1852
1853         num = nb_pkts;
1854         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1855                 num = VIRTIO_MBUF_BURST_SZ;
1856         if (likely(num > DESC_PER_CACHELINE))
1857                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1858
1859         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1860
1861         for (i = 0; i < num; i++) {
1862                 struct virtio_net_hdr_mrg_rxbuf *header;
1863
1864                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1865                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1866
1867                 rxm = rcv_pkts[i];
1868
1869                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1870                         PMD_RX_LOG(ERR, "Packet drop");
1871                         nb_enqueued++;
1872                         virtio_discard_rxbuf(vq, rxm);
1873                         rxvq->stats.errors++;
1874                         continue;
1875                 }
1876
1877                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1878                           rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1879                 seg_num = header->num_buffers;
1880
1881                 if (seg_num == 0)
1882                         seg_num = 1;
1883
1884                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1885                 rxm->nb_segs = seg_num;
1886                 rxm->ol_flags = 0;
1887                 rxm->vlan_tci = 0;
1888                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1889                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1890
1891                 rxm->port = rxvq->port_id;
1892                 rx_pkts[nb_rx] = rxm;
1893                 prev = rxm;
1894
1895                 if (hw->has_rx_offload &&
1896                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1897                         virtio_discard_rxbuf(vq, rxm);
1898                         rxvq->stats.errors++;
1899                         continue;
1900                 }
1901
1902                 if (hw->vlan_strip)
1903                         rte_vlan_strip(rx_pkts[nb_rx]);
1904
1905                 seg_res = seg_num - 1;
1906
1907                 /* Merge remaining segments */
1908                 while (seg_res != 0 && i < (num - 1)) {
1909                         i++;
1910
1911                         rxm = rcv_pkts[i];
1912                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1913                         rxm->pkt_len = (uint32_t)(len[i]);
1914                         rxm->data_len = (uint16_t)(len[i]);
1915
1916                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1917
1918                         prev->next = rxm;
1919                         prev = rxm;
1920                         seg_res -= 1;
1921                 }
1922
1923                 if (!seg_res) {
1924                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1925                         nb_rx++;
1926                 }
1927         }
1928
1929         /* Last packet still need merge segments */
1930         while (seg_res != 0) {
1931                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1932                                         VIRTIO_MBUF_BURST_SZ);
1933                 uint16_t extra_idx = 0;
1934
1935                 rcv_cnt = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1936                                 len, rcv_cnt);
1937                 if (unlikely(rcv_cnt == 0)) {
1938                         PMD_RX_LOG(ERR, "No enough segments for packet.");
1939                         rte_pktmbuf_free(rx_pkts[nb_rx]);
1940                         rxvq->stats.errors++;
1941                         break;
1942                 }
1943
1944                 while (extra_idx < rcv_cnt) {
1945                         rxm = rcv_pkts[extra_idx];
1946
1947                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1948                         rxm->pkt_len = (uint32_t)(len[extra_idx]);
1949                         rxm->data_len = (uint16_t)(len[extra_idx]);
1950
1951                         prev->next = rxm;
1952                         prev = rxm;
1953                         rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1954                         extra_idx += 1;
1955                 }
1956                 seg_res -= rcv_cnt;
1957                 if (!seg_res) {
1958                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1959                         nb_rx++;
1960                 }
1961         }
1962
1963         rxvq->stats.packets += nb_rx;
1964
1965         /* Allocate new mbuf for the used descriptor */
1966         if (likely(!virtqueue_full(vq))) {
1967                 /* free_cnt may include mrg descs */
1968                 uint16_t free_cnt = vq->vq_free_cnt;
1969                 struct rte_mbuf *new_pkts[free_cnt];
1970
1971                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1972                         error = virtqueue_enqueue_recv_refill_packed(vq,
1973                                         new_pkts, free_cnt);
1974                         if (unlikely(error)) {
1975                                 for (i = 0; i < free_cnt; i++)
1976                                         rte_pktmbuf_free(new_pkts[i]);
1977                         }
1978                         nb_enqueued += free_cnt;
1979                 } else {
1980                         struct rte_eth_dev *dev =
1981                                 &rte_eth_devices[rxvq->port_id];
1982                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1983                 }
1984         }
1985
1986         if (likely(nb_enqueued)) {
1987                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1988                         virtqueue_notify(vq);
1989                         PMD_RX_LOG(DEBUG, "Notified");
1990                 }
1991         }
1992
1993         return nb_rx;
1994 }
1995
1996 uint16_t
1997 virtio_xmit_pkts_prepare(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts,
1998                         uint16_t nb_pkts)
1999 {
2000         uint16_t nb_tx;
2001         int error;
2002
2003         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2004                 struct rte_mbuf *m = tx_pkts[nb_tx];
2005
2006 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
2007                 error = rte_validate_tx_offload(m);
2008                 if (unlikely(error)) {
2009                         rte_errno = -error;
2010                         break;
2011                 }
2012 #endif
2013
2014                 /* Do VLAN tag insertion */
2015                 if (unlikely(m->ol_flags & PKT_TX_VLAN_PKT)) {
2016                         error = rte_vlan_insert(&m);
2017                         /* rte_vlan_insert() may change pointer
2018                          * even in the case of failure
2019                          */
2020                         tx_pkts[nb_tx] = m;
2021
2022                         if (unlikely(error)) {
2023                                 rte_errno = -error;
2024                                 break;
2025                         }
2026                 }
2027
2028                 error = rte_net_intel_cksum_prepare(m);
2029                 if (unlikely(error)) {
2030                         rte_errno = -error;
2031                         break;
2032                 }
2033
2034                 if (m->ol_flags & PKT_TX_TCP_SEG)
2035                         virtio_tso_fix_cksum(m);
2036         }
2037
2038         return nb_tx;
2039 }
2040
2041 uint16_t
2042 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
2043                         uint16_t nb_pkts)
2044 {
2045         struct virtnet_tx *txvq = tx_queue;
2046         struct virtqueue *vq = txvq->vq;
2047         struct virtio_hw *hw = vq->hw;
2048         uint16_t hdr_size = hw->vtnet_hdr_size;
2049         uint16_t nb_tx = 0;
2050         bool in_order = vtpci_with_feature(hw, VIRTIO_F_IN_ORDER);
2051
2052         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2053                 return nb_tx;
2054
2055         if (unlikely(nb_pkts < 1))
2056                 return nb_pkts;
2057
2058         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2059
2060         if (nb_pkts > vq->vq_free_cnt)
2061                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
2062                                            in_order);
2063
2064         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2065                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2066                 int can_push = 0, slots, need;
2067
2068                 /* optimize ring usage */
2069                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2070                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2071                     rte_mbuf_refcnt_read(txm) == 1 &&
2072                     RTE_MBUF_DIRECT(txm) &&
2073                     txm->nb_segs == 1 &&
2074                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2075                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2076                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2077                         can_push = 1;
2078
2079                 /* How many main ring entries are needed to this Tx?
2080                  * any_layout => number of segments
2081                  * default    => number of segments + 1
2082                  */
2083                 slots = txm->nb_segs + !can_push;
2084                 need = slots - vq->vq_free_cnt;
2085
2086                 /* Positive value indicates it need free vring descriptors */
2087                 if (unlikely(need > 0)) {
2088                         virtio_xmit_cleanup_packed(vq, need, in_order);
2089                         need = slots - vq->vq_free_cnt;
2090                         if (unlikely(need > 0)) {
2091                                 PMD_TX_LOG(ERR,
2092                                            "No free tx descriptors to transmit");
2093                                 break;
2094                         }
2095                 }
2096
2097                 /* Enqueue Packet buffers */
2098                 if (can_push)
2099                         virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
2100                 else
2101                         virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
2102                                                       in_order);
2103
2104                 virtio_update_packet_stats(&txvq->stats, txm);
2105         }
2106
2107         txvq->stats.packets += nb_tx;
2108
2109         if (likely(nb_tx)) {
2110                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
2111                         virtqueue_notify(vq);
2112                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2113                 }
2114         }
2115
2116         return nb_tx;
2117 }
2118
2119 uint16_t
2120 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2121 {
2122         struct virtnet_tx *txvq = tx_queue;
2123         struct virtqueue *vq = txvq->vq;
2124         struct virtio_hw *hw = vq->hw;
2125         uint16_t hdr_size = hw->vtnet_hdr_size;
2126         uint16_t nb_used, nb_tx = 0;
2127
2128         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2129                 return nb_tx;
2130
2131         if (unlikely(nb_pkts < 1))
2132                 return nb_pkts;
2133
2134         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2135         nb_used = VIRTQUEUE_NUSED(vq);
2136
2137         virtio_rmb(hw->weak_barriers);
2138         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2139                 virtio_xmit_cleanup(vq, nb_used);
2140
2141         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2142                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2143                 int can_push = 0, use_indirect = 0, slots, need;
2144
2145                 /* optimize ring usage */
2146                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2147                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2148                     rte_mbuf_refcnt_read(txm) == 1 &&
2149                     RTE_MBUF_DIRECT(txm) &&
2150                     txm->nb_segs == 1 &&
2151                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2152                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2153                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2154                         can_push = 1;
2155                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2156                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2157                         use_indirect = 1;
2158
2159                 /* How many main ring entries are needed to this Tx?
2160                  * any_layout => number of segments
2161                  * indirect   => 1
2162                  * default    => number of segments + 1
2163                  */
2164                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2165                 need = slots - vq->vq_free_cnt;
2166
2167                 /* Positive value indicates it need free vring descriptors */
2168                 if (unlikely(need > 0)) {
2169                         nb_used = VIRTQUEUE_NUSED(vq);
2170                         virtio_rmb(hw->weak_barriers);
2171                         need = RTE_MIN(need, (int)nb_used);
2172
2173                         virtio_xmit_cleanup(vq, need);
2174                         need = slots - vq->vq_free_cnt;
2175                         if (unlikely(need > 0)) {
2176                                 PMD_TX_LOG(ERR,
2177                                            "No free tx descriptors to transmit");
2178                                 break;
2179                         }
2180                 }
2181
2182                 /* Enqueue Packet buffers */
2183                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2184                         can_push, 0);
2185
2186                 virtio_update_packet_stats(&txvq->stats, txm);
2187         }
2188
2189         txvq->stats.packets += nb_tx;
2190
2191         if (likely(nb_tx)) {
2192                 vq_update_avail_idx(vq);
2193
2194                 if (unlikely(virtqueue_kick_prepare(vq))) {
2195                         virtqueue_notify(vq);
2196                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2197                 }
2198         }
2199
2200         return nb_tx;
2201 }
2202
2203 static __rte_always_inline int
2204 virtio_xmit_try_cleanup_inorder(struct virtqueue *vq, uint16_t need)
2205 {
2206         uint16_t nb_used, nb_clean, nb_descs;
2207         struct virtio_hw *hw = vq->hw;
2208
2209         nb_descs = vq->vq_free_cnt + need;
2210         nb_used = VIRTQUEUE_NUSED(vq);
2211         virtio_rmb(hw->weak_barriers);
2212         nb_clean = RTE_MIN(need, (int)nb_used);
2213
2214         virtio_xmit_cleanup_inorder(vq, nb_clean);
2215
2216         return nb_descs - vq->vq_free_cnt;
2217 }
2218
2219 uint16_t
2220 virtio_xmit_pkts_inorder(void *tx_queue,
2221                         struct rte_mbuf **tx_pkts,
2222                         uint16_t nb_pkts)
2223 {
2224         struct virtnet_tx *txvq = tx_queue;
2225         struct virtqueue *vq = txvq->vq;
2226         struct virtio_hw *hw = vq->hw;
2227         uint16_t hdr_size = hw->vtnet_hdr_size;
2228         uint16_t nb_used, nb_tx = 0, nb_inorder_pkts = 0;
2229         struct rte_mbuf *inorder_pkts[nb_pkts];
2230         int need;
2231
2232         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2233                 return nb_tx;
2234
2235         if (unlikely(nb_pkts < 1))
2236                 return nb_pkts;
2237
2238         VIRTQUEUE_DUMP(vq);
2239         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2240         nb_used = VIRTQUEUE_NUSED(vq);
2241
2242         virtio_rmb(hw->weak_barriers);
2243         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2244                 virtio_xmit_cleanup_inorder(vq, nb_used);
2245
2246         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2247                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2248                 int slots;
2249
2250                 /* optimize ring usage */
2251                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2252                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2253                      rte_mbuf_refcnt_read(txm) == 1 &&
2254                      RTE_MBUF_DIRECT(txm) &&
2255                      txm->nb_segs == 1 &&
2256                      rte_pktmbuf_headroom(txm) >= hdr_size &&
2257                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2258                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2259                         inorder_pkts[nb_inorder_pkts] = txm;
2260                         nb_inorder_pkts++;
2261
2262                         continue;
2263                 }
2264
2265                 if (nb_inorder_pkts) {
2266                         need = nb_inorder_pkts - vq->vq_free_cnt;
2267                         if (unlikely(need > 0)) {
2268                                 need = virtio_xmit_try_cleanup_inorder(vq,
2269                                                                        need);
2270                                 if (unlikely(need > 0)) {
2271                                         PMD_TX_LOG(ERR,
2272                                                 "No free tx descriptors to "
2273                                                 "transmit");
2274                                         break;
2275                                 }
2276                         }
2277                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2278                                                         nb_inorder_pkts);
2279                         nb_inorder_pkts = 0;
2280                 }
2281
2282                 slots = txm->nb_segs + 1;
2283                 need = slots - vq->vq_free_cnt;
2284                 if (unlikely(need > 0)) {
2285                         need = virtio_xmit_try_cleanup_inorder(vq, slots);
2286
2287                         if (unlikely(need > 0)) {
2288                                 PMD_TX_LOG(ERR,
2289                                         "No free tx descriptors to transmit");
2290                                 break;
2291                         }
2292                 }
2293                 /* Enqueue Packet buffers */
2294                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2295
2296                 virtio_update_packet_stats(&txvq->stats, txm);
2297         }
2298
2299         /* Transmit all inorder packets */
2300         if (nb_inorder_pkts) {
2301                 need = nb_inorder_pkts - vq->vq_free_cnt;
2302                 if (unlikely(need > 0)) {
2303                         need = virtio_xmit_try_cleanup_inorder(vq,
2304                                                                   need);
2305                         if (unlikely(need > 0)) {
2306                                 PMD_TX_LOG(ERR,
2307                                         "No free tx descriptors to transmit");
2308                                 nb_inorder_pkts = vq->vq_free_cnt;
2309                                 nb_tx -= need;
2310                         }
2311                 }
2312
2313                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2314                                                 nb_inorder_pkts);
2315         }
2316
2317         txvq->stats.packets += nb_tx;
2318
2319         if (likely(nb_tx)) {
2320                 vq_update_avail_idx(vq);
2321
2322                 if (unlikely(virtqueue_kick_prepare(vq))) {
2323                         virtqueue_notify(vq);
2324                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2325                 }
2326         }
2327
2328         VIRTQUEUE_DUMP(vq);
2329
2330         return nb_tx;
2331 }