net/virtio: remove useless pointer checks
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45         struct virtnet_rx *rxvq = rxq;
46         struct virtqueue *vq = rxvq->vq;
47
48         return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54         vq->vq_free_cnt += num;
55         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61         struct vring_desc *dp, *dp_tail;
62         struct vq_desc_extra *dxp;
63         uint16_t desc_idx_last = desc_idx;
64
65         dp  = &vq->vq_split.ring.desc[desc_idx];
66         dxp = &vq->vq_descx[desc_idx];
67         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69                 while (dp->flags & VRING_DESC_F_NEXT) {
70                         desc_idx_last = dp->next;
71                         dp = &vq->vq_split.ring.desc[dp->next];
72                 }
73         }
74         dxp->ndescs = 0;
75
76         /*
77          * We must append the existing free chain, if any, to the end of
78          * newly freed chain. If the virtqueue was completely used, then
79          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80          */
81         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82                 vq->vq_desc_head_idx = desc_idx;
83         } else {
84                 dp_tail = &vq->vq_split.ring.desc[vq->vq_desc_tail_idx];
85                 dp_tail->next = desc_idx;
86         }
87
88         vq->vq_desc_tail_idx = desc_idx_last;
89         dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95         struct vq_desc_extra *dxp;
96
97         dxp = &vq->vq_descx[id];
98         vq->vq_free_cnt += dxp->ndescs;
99
100         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101                 vq->vq_desc_head_idx = id;
102         else
103                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104
105         vq->vq_desc_tail_idx = id;
106         dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108
109 static uint16_t
110 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
111                                   struct rte_mbuf **rx_pkts,
112                                   uint32_t *len,
113                                   uint16_t num)
114 {
115         struct rte_mbuf *cookie;
116         uint16_t used_idx;
117         uint16_t id;
118         struct vring_packed_desc *desc;
119         uint16_t i;
120
121         desc = vq->vq_packed.ring.desc;
122
123         for (i = 0; i < num; i++) {
124                 used_idx = vq->vq_used_cons_idx;
125                 if (!desc_is_used(&desc[used_idx], vq))
126                         return i;
127                 virtio_rmb(vq->hw->weak_barriers);
128                 len[i] = desc[used_idx].len;
129                 id = desc[used_idx].id;
130                 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
131                 if (unlikely(cookie == NULL)) {
132                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
133                                 vq->vq_used_cons_idx);
134                         break;
135                 }
136                 rte_prefetch0(cookie);
137                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
138                 rx_pkts[i] = cookie;
139
140                 vq->vq_free_cnt++;
141                 vq->vq_used_cons_idx++;
142                 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
143                         vq->vq_used_cons_idx -= vq->vq_nentries;
144                         vq->vq_packed.used_wrap_counter ^= 1;
145                 }
146         }
147
148         return i;
149 }
150
151 static uint16_t
152 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
153                            uint32_t *len, uint16_t num)
154 {
155         struct vring_used_elem *uep;
156         struct rte_mbuf *cookie;
157         uint16_t used_idx, desc_idx;
158         uint16_t i;
159
160         /*  Caller does the check */
161         for (i = 0; i < num ; i++) {
162                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
163                 uep = &vq->vq_split.ring.used->ring[used_idx];
164                 desc_idx = (uint16_t) uep->id;
165                 len[i] = uep->len;
166                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
167
168                 if (unlikely(cookie == NULL)) {
169                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
170                                 vq->vq_used_cons_idx);
171                         break;
172                 }
173
174                 rte_prefetch0(cookie);
175                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
176                 rx_pkts[i]  = cookie;
177                 vq->vq_used_cons_idx++;
178                 vq_ring_free_chain(vq, desc_idx);
179                 vq->vq_descx[desc_idx].cookie = NULL;
180         }
181
182         return i;
183 }
184
185 static uint16_t
186 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
187                         struct rte_mbuf **rx_pkts,
188                         uint32_t *len,
189                         uint16_t num)
190 {
191         struct vring_used_elem *uep;
192         struct rte_mbuf *cookie;
193         uint16_t used_idx = 0;
194         uint16_t i;
195
196         if (unlikely(num == 0))
197                 return 0;
198
199         for (i = 0; i < num; i++) {
200                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
201                 /* Desc idx same as used idx */
202                 uep = &vq->vq_split.ring.used->ring[used_idx];
203                 len[i] = uep->len;
204                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
205
206                 if (unlikely(cookie == NULL)) {
207                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
208                                 vq->vq_used_cons_idx);
209                         break;
210                 }
211
212                 rte_prefetch0(cookie);
213                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
214                 rx_pkts[i]  = cookie;
215                 vq->vq_used_cons_idx++;
216                 vq->vq_descx[used_idx].cookie = NULL;
217         }
218
219         vq_ring_free_inorder(vq, used_idx, i);
220         return i;
221 }
222
223 #ifndef DEFAULT_TX_FREE_THRESH
224 #define DEFAULT_TX_FREE_THRESH 32
225 #endif
226
227 static void
228 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
229 {
230         uint16_t used_idx, id, curr_id, free_cnt = 0;
231         uint16_t size = vq->vq_nentries;
232         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
233         struct vq_desc_extra *dxp;
234
235         used_idx = vq->vq_used_cons_idx;
236         while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
237                 virtio_rmb(vq->hw->weak_barriers);
238                 id = desc[used_idx].id;
239                 do {
240                         curr_id = used_idx;
241                         dxp = &vq->vq_descx[used_idx];
242                         used_idx += dxp->ndescs;
243                         free_cnt += dxp->ndescs;
244                         num -= dxp->ndescs;
245                         if (used_idx >= size) {
246                                 used_idx -= size;
247                                 vq->vq_packed.used_wrap_counter ^= 1;
248                         }
249                         if (dxp->cookie != NULL) {
250                                 rte_pktmbuf_free(dxp->cookie);
251                                 dxp->cookie = NULL;
252                         }
253                 } while (curr_id != id);
254         }
255         vq->vq_used_cons_idx = used_idx;
256         vq->vq_free_cnt += free_cnt;
257 }
258
259 static void
260 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
261 {
262         uint16_t used_idx, id;
263         uint16_t size = vq->vq_nentries;
264         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
265         struct vq_desc_extra *dxp;
266
267         used_idx = vq->vq_used_cons_idx;
268         while (num-- && desc_is_used(&desc[used_idx], vq)) {
269                 virtio_rmb(vq->hw->weak_barriers);
270                 id = desc[used_idx].id;
271                 dxp = &vq->vq_descx[id];
272                 vq->vq_used_cons_idx += dxp->ndescs;
273                 if (vq->vq_used_cons_idx >= size) {
274                         vq->vq_used_cons_idx -= size;
275                         vq->vq_packed.used_wrap_counter ^= 1;
276                 }
277                 vq_ring_free_id_packed(vq, id);
278                 if (dxp->cookie != NULL) {
279                         rte_pktmbuf_free(dxp->cookie);
280                         dxp->cookie = NULL;
281                 }
282                 used_idx = vq->vq_used_cons_idx;
283         }
284 }
285
286 /* Cleanup from completed transmits. */
287 static inline void
288 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
289 {
290         if (in_order)
291                 virtio_xmit_cleanup_inorder_packed(vq, num);
292         else
293                 virtio_xmit_cleanup_normal_packed(vq, num);
294 }
295
296 static void
297 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
298 {
299         uint16_t i, used_idx, desc_idx;
300         for (i = 0; i < num; i++) {
301                 struct vring_used_elem *uep;
302                 struct vq_desc_extra *dxp;
303
304                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
305                 uep = &vq->vq_split.ring.used->ring[used_idx];
306
307                 desc_idx = (uint16_t) uep->id;
308                 dxp = &vq->vq_descx[desc_idx];
309                 vq->vq_used_cons_idx++;
310                 vq_ring_free_chain(vq, desc_idx);
311
312                 if (dxp->cookie != NULL) {
313                         rte_pktmbuf_free(dxp->cookie);
314                         dxp->cookie = NULL;
315                 }
316         }
317 }
318
319 /* Cleanup from completed inorder transmits. */
320 static void
321 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
322 {
323         uint16_t i, idx = vq->vq_used_cons_idx;
324         int16_t free_cnt = 0;
325         struct vq_desc_extra *dxp = NULL;
326
327         if (unlikely(num == 0))
328                 return;
329
330         for (i = 0; i < num; i++) {
331                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
332                 free_cnt += dxp->ndescs;
333                 if (dxp->cookie != NULL) {
334                         rte_pktmbuf_free(dxp->cookie);
335                         dxp->cookie = NULL;
336                 }
337         }
338
339         vq->vq_free_cnt += free_cnt;
340         vq->vq_used_cons_idx = idx;
341 }
342
343 static inline int
344 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
345                         struct rte_mbuf **cookies,
346                         uint16_t num)
347 {
348         struct vq_desc_extra *dxp;
349         struct virtio_hw *hw = vq->hw;
350         struct vring_desc *start_dp;
351         uint16_t head_idx, idx, i = 0;
352
353         if (unlikely(vq->vq_free_cnt == 0))
354                 return -ENOSPC;
355         if (unlikely(vq->vq_free_cnt < num))
356                 return -EMSGSIZE;
357
358         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
359         start_dp = vq->vq_split.ring.desc;
360
361         while (i < num) {
362                 idx = head_idx & (vq->vq_nentries - 1);
363                 dxp = &vq->vq_descx[idx];
364                 dxp->cookie = (void *)cookies[i];
365                 dxp->ndescs = 1;
366
367                 start_dp[idx].addr =
368                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
369                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
370                 start_dp[idx].len =
371                                 cookies[i]->buf_len -
372                                 RTE_PKTMBUF_HEADROOM +
373                                 hw->vtnet_hdr_size;
374                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
375
376                 vq_update_avail_ring(vq, idx);
377                 head_idx++;
378                 i++;
379         }
380
381         vq->vq_desc_head_idx += num;
382         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
383         return 0;
384 }
385
386 static inline int
387 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
388                                 uint16_t num)
389 {
390         struct vq_desc_extra *dxp;
391         struct virtio_hw *hw = vq->hw;
392         struct vring_desc *start_dp = vq->vq_split.ring.desc;
393         uint16_t idx, i;
394
395         if (unlikely(vq->vq_free_cnt == 0))
396                 return -ENOSPC;
397         if (unlikely(vq->vq_free_cnt < num))
398                 return -EMSGSIZE;
399
400         if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
401                 return -EFAULT;
402
403         for (i = 0; i < num; i++) {
404                 idx = vq->vq_desc_head_idx;
405                 dxp = &vq->vq_descx[idx];
406                 dxp->cookie = (void *)cookie[i];
407                 dxp->ndescs = 1;
408
409                 start_dp[idx].addr =
410                         VIRTIO_MBUF_ADDR(cookie[i], vq) +
411                         RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
412                 start_dp[idx].len =
413                         cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
414                         hw->vtnet_hdr_size;
415                 start_dp[idx].flags = VRING_DESC_F_WRITE;
416                 vq->vq_desc_head_idx = start_dp[idx].next;
417                 vq_update_avail_ring(vq, idx);
418                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
419                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
420                         break;
421                 }
422         }
423
424         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
425
426         return 0;
427 }
428
429 static inline int
430 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
431                                      struct rte_mbuf **cookie, uint16_t num)
432 {
433         struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
434         uint16_t flags = vq->vq_packed.cached_flags;
435         struct virtio_hw *hw = vq->hw;
436         struct vq_desc_extra *dxp;
437         uint16_t idx;
438         int i;
439
440         if (unlikely(vq->vq_free_cnt == 0))
441                 return -ENOSPC;
442         if (unlikely(vq->vq_free_cnt < num))
443                 return -EMSGSIZE;
444
445         for (i = 0; i < num; i++) {
446                 idx = vq->vq_avail_idx;
447                 dxp = &vq->vq_descx[idx];
448                 dxp->cookie = (void *)cookie[i];
449                 dxp->ndescs = 1;
450
451                 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
452                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
453                 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
454                                         + hw->vtnet_hdr_size;
455
456                 vq->vq_desc_head_idx = dxp->next;
457                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
458                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
459                 virtio_wmb(hw->weak_barriers);
460                 start_dp[idx].flags = flags;
461                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
462                         vq->vq_avail_idx -= vq->vq_nentries;
463                         vq->vq_packed.cached_flags ^=
464                                 VRING_PACKED_DESC_F_AVAIL_USED;
465                         flags = vq->vq_packed.cached_flags;
466                 }
467         }
468         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
469         return 0;
470 }
471
472 /* When doing TSO, the IP length is not included in the pseudo header
473  * checksum of the packet given to the PMD, but for virtio it is
474  * expected.
475  */
476 static void
477 virtio_tso_fix_cksum(struct rte_mbuf *m)
478 {
479         /* common case: header is not fragmented */
480         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
481                         m->l4_len)) {
482                 struct rte_ipv4_hdr *iph;
483                 struct rte_ipv6_hdr *ip6h;
484                 struct rte_tcp_hdr *th;
485                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
486                 uint32_t tmp;
487
488                 iph = rte_pktmbuf_mtod_offset(m,
489                                         struct rte_ipv4_hdr *, m->l2_len);
490                 th = RTE_PTR_ADD(iph, m->l3_len);
491                 if ((iph->version_ihl >> 4) == 4) {
492                         iph->hdr_checksum = 0;
493                         iph->hdr_checksum = rte_ipv4_cksum(iph);
494                         ip_len = iph->total_length;
495                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
496                                 m->l3_len);
497                 } else {
498                         ip6h = (struct rte_ipv6_hdr *)iph;
499                         ip_paylen = ip6h->payload_len;
500                 }
501
502                 /* calculate the new phdr checksum not including ip_paylen */
503                 prev_cksum = th->cksum;
504                 tmp = prev_cksum;
505                 tmp += ip_paylen;
506                 tmp = (tmp & 0xffff) + (tmp >> 16);
507                 new_cksum = tmp;
508
509                 /* replace it in the packet */
510                 th->cksum = new_cksum;
511         }
512 }
513
514
515 /* avoid write operation when necessary, to lessen cache issues */
516 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
517         if ((var) != (val))                     \
518                 (var) = (val);                  \
519 } while (0)
520
521 #define virtqueue_clear_net_hdr(_hdr) do {              \
522         ASSIGN_UNLESS_EQUAL((_hdr)->csum_start, 0);     \
523         ASSIGN_UNLESS_EQUAL((_hdr)->csum_offset, 0);    \
524         ASSIGN_UNLESS_EQUAL((_hdr)->flags, 0);          \
525         ASSIGN_UNLESS_EQUAL((_hdr)->gso_type, 0);       \
526         ASSIGN_UNLESS_EQUAL((_hdr)->gso_size, 0);       \
527         ASSIGN_UNLESS_EQUAL((_hdr)->hdr_len, 0);        \
528 } while (0)
529
530 static inline void
531 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
532                         struct rte_mbuf *cookie,
533                         bool offload)
534 {
535         if (offload) {
536                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
537                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
538
539                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
540                 case PKT_TX_UDP_CKSUM:
541                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
542                         hdr->csum_offset = offsetof(struct rte_udp_hdr,
543                                 dgram_cksum);
544                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
545                         break;
546
547                 case PKT_TX_TCP_CKSUM:
548                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
549                         hdr->csum_offset = offsetof(struct rte_tcp_hdr, cksum);
550                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
551                         break;
552
553                 default:
554                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
555                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
556                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
557                         break;
558                 }
559
560                 /* TCP Segmentation Offload */
561                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
562                         virtio_tso_fix_cksum(cookie);
563                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
564                                 VIRTIO_NET_HDR_GSO_TCPV6 :
565                                 VIRTIO_NET_HDR_GSO_TCPV4;
566                         hdr->gso_size = cookie->tso_segsz;
567                         hdr->hdr_len =
568                                 cookie->l2_len +
569                                 cookie->l3_len +
570                                 cookie->l4_len;
571                 } else {
572                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
573                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
574                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
575                 }
576         }
577 }
578
579 static inline void
580 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
581                         struct rte_mbuf **cookies,
582                         uint16_t num)
583 {
584         struct vq_desc_extra *dxp;
585         struct virtqueue *vq = txvq->vq;
586         struct vring_desc *start_dp;
587         struct virtio_net_hdr *hdr;
588         uint16_t idx;
589         uint16_t head_size = vq->hw->vtnet_hdr_size;
590         uint16_t i = 0;
591
592         idx = vq->vq_desc_head_idx;
593         start_dp = vq->vq_split.ring.desc;
594
595         while (i < num) {
596                 idx = idx & (vq->vq_nentries - 1);
597                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
598                 dxp->cookie = (void *)cookies[i];
599                 dxp->ndescs = 1;
600
601                 hdr = (struct virtio_net_hdr *)
602                         rte_pktmbuf_prepend(cookies[i], head_size);
603                 cookies[i]->pkt_len -= head_size;
604
605                 /* if offload disabled, hdr is not zeroed yet, do it now */
606                 if (!vq->hw->has_tx_offload)
607                         virtqueue_clear_net_hdr(hdr);
608                 else
609                         virtqueue_xmit_offload(hdr, cookies[i], true);
610
611                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
612                 start_dp[idx].len   = cookies[i]->data_len;
613                 start_dp[idx].flags = 0;
614
615                 vq_update_avail_ring(vq, idx);
616
617                 idx++;
618                 i++;
619         };
620
621         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
622         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
623 }
624
625 static inline void
626 virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
627                                    struct rte_mbuf *cookie,
628                                    int in_order)
629 {
630         struct virtqueue *vq = txvq->vq;
631         struct vring_packed_desc *dp;
632         struct vq_desc_extra *dxp;
633         uint16_t idx, id, flags;
634         uint16_t head_size = vq->hw->vtnet_hdr_size;
635         struct virtio_net_hdr *hdr;
636
637         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
638         idx = vq->vq_avail_idx;
639         dp = &vq->vq_packed.ring.desc[idx];
640
641         dxp = &vq->vq_descx[id];
642         dxp->ndescs = 1;
643         dxp->cookie = cookie;
644
645         flags = vq->vq_packed.cached_flags;
646
647         /* prepend cannot fail, checked by caller */
648         hdr = (struct virtio_net_hdr *)
649                 rte_pktmbuf_prepend(cookie, head_size);
650         cookie->pkt_len -= head_size;
651
652         /* if offload disabled, hdr is not zeroed yet, do it now */
653         if (!vq->hw->has_tx_offload)
654                 virtqueue_clear_net_hdr(hdr);
655         else
656                 virtqueue_xmit_offload(hdr, cookie, true);
657
658         dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
659         dp->len  = cookie->data_len;
660         dp->id   = id;
661
662         if (++vq->vq_avail_idx >= vq->vq_nentries) {
663                 vq->vq_avail_idx -= vq->vq_nentries;
664                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
665         }
666
667         vq->vq_free_cnt--;
668
669         if (!in_order) {
670                 vq->vq_desc_head_idx = dxp->next;
671                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
672                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
673         }
674
675         virtio_wmb(vq->hw->weak_barriers);
676         dp->flags = flags;
677 }
678
679 static inline void
680 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
681                               uint16_t needed, int can_push, int in_order)
682 {
683         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
684         struct vq_desc_extra *dxp;
685         struct virtqueue *vq = txvq->vq;
686         struct vring_packed_desc *start_dp, *head_dp;
687         uint16_t idx, id, head_idx, head_flags;
688         uint16_t head_size = vq->hw->vtnet_hdr_size;
689         struct virtio_net_hdr *hdr;
690         uint16_t prev;
691
692         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
693
694         dxp = &vq->vq_descx[id];
695         dxp->ndescs = needed;
696         dxp->cookie = cookie;
697
698         head_idx = vq->vq_avail_idx;
699         idx = head_idx;
700         prev = head_idx;
701         start_dp = vq->vq_packed.ring.desc;
702
703         head_dp = &vq->vq_packed.ring.desc[idx];
704         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
705         head_flags |= vq->vq_packed.cached_flags;
706
707         if (can_push) {
708                 /* prepend cannot fail, checked by caller */
709                 hdr = (struct virtio_net_hdr *)
710                         rte_pktmbuf_prepend(cookie, head_size);
711                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
712                  * which is wrong. Below subtract restores correct pkt size.
713                  */
714                 cookie->pkt_len -= head_size;
715
716                 /* if offload disabled, it is not zeroed below, do it now */
717                 if (!vq->hw->has_tx_offload)
718                         virtqueue_clear_net_hdr(hdr);
719         } else {
720                 /* setup first tx ring slot to point to header
721                  * stored in reserved region.
722                  */
723                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
724                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
725                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
726                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
727                 idx++;
728                 if (idx >= vq->vq_nentries) {
729                         idx -= vq->vq_nentries;
730                         vq->vq_packed.cached_flags ^=
731                                 VRING_PACKED_DESC_F_AVAIL_USED;
732                 }
733         }
734
735         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
736
737         do {
738                 uint16_t flags;
739
740                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
741                 start_dp[idx].len  = cookie->data_len;
742                 if (likely(idx != head_idx)) {
743                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
744                         flags |= vq->vq_packed.cached_flags;
745                         start_dp[idx].flags = flags;
746                 }
747                 prev = idx;
748                 idx++;
749                 if (idx >= vq->vq_nentries) {
750                         idx -= vq->vq_nentries;
751                         vq->vq_packed.cached_flags ^=
752                                 VRING_PACKED_DESC_F_AVAIL_USED;
753                 }
754         } while ((cookie = cookie->next) != NULL);
755
756         start_dp[prev].id = id;
757
758         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
759         vq->vq_avail_idx = idx;
760
761         if (!in_order) {
762                 vq->vq_desc_head_idx = dxp->next;
763                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
764                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
765         }
766
767         virtio_wmb(vq->hw->weak_barriers);
768         head_dp->flags = head_flags;
769 }
770
771 static inline void
772 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
773                         uint16_t needed, int use_indirect, int can_push,
774                         int in_order)
775 {
776         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
777         struct vq_desc_extra *dxp;
778         struct virtqueue *vq = txvq->vq;
779         struct vring_desc *start_dp;
780         uint16_t seg_num = cookie->nb_segs;
781         uint16_t head_idx, idx;
782         uint16_t head_size = vq->hw->vtnet_hdr_size;
783         struct virtio_net_hdr *hdr;
784
785         head_idx = vq->vq_desc_head_idx;
786         idx = head_idx;
787         if (in_order)
788                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
789         else
790                 dxp = &vq->vq_descx[idx];
791         dxp->cookie = (void *)cookie;
792         dxp->ndescs = needed;
793
794         start_dp = vq->vq_split.ring.desc;
795
796         if (can_push) {
797                 /* prepend cannot fail, checked by caller */
798                 hdr = (struct virtio_net_hdr *)
799                         rte_pktmbuf_prepend(cookie, head_size);
800                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
801                  * which is wrong. Below subtract restores correct pkt size.
802                  */
803                 cookie->pkt_len -= head_size;
804
805                 /* if offload disabled, it is not zeroed below, do it now */
806                 if (!vq->hw->has_tx_offload)
807                         virtqueue_clear_net_hdr(hdr);
808         } else if (use_indirect) {
809                 /* setup tx ring slot to point to indirect
810                  * descriptor list stored in reserved region.
811                  *
812                  * the first slot in indirect ring is already preset
813                  * to point to the header in reserved region
814                  */
815                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
816                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
817                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
818                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
819                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
820
821                 /* loop below will fill in rest of the indirect elements */
822                 start_dp = txr[idx].tx_indir;
823                 idx = 1;
824         } else {
825                 /* setup first tx ring slot to point to header
826                  * stored in reserved region.
827                  */
828                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
829                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
830                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
831                 start_dp[idx].flags = VRING_DESC_F_NEXT;
832                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
833
834                 idx = start_dp[idx].next;
835         }
836
837         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
838
839         do {
840                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
841                 start_dp[idx].len   = cookie->data_len;
842                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
843                 idx = start_dp[idx].next;
844         } while ((cookie = cookie->next) != NULL);
845
846         if (use_indirect)
847                 idx = vq->vq_split.ring.desc[head_idx].next;
848
849         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
850
851         vq->vq_desc_head_idx = idx;
852         vq_update_avail_ring(vq, head_idx);
853
854         if (!in_order) {
855                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
856                         vq->vq_desc_tail_idx = idx;
857         }
858 }
859
860 void
861 virtio_dev_cq_start(struct rte_eth_dev *dev)
862 {
863         struct virtio_hw *hw = dev->data->dev_private;
864
865         if (hw->cvq && hw->cvq->vq) {
866                 rte_spinlock_init(&hw->cvq->lock);
867                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
868         }
869 }
870
871 int
872 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
873                         uint16_t queue_idx,
874                         uint16_t nb_desc,
875                         unsigned int socket_id __rte_unused,
876                         const struct rte_eth_rxconf *rx_conf __rte_unused,
877                         struct rte_mempool *mp)
878 {
879         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
880         struct virtio_hw *hw = dev->data->dev_private;
881         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
882         struct virtnet_rx *rxvq;
883
884         PMD_INIT_FUNC_TRACE();
885
886         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
887                 nb_desc = vq->vq_nentries;
888         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
889
890         rxvq = &vq->rxq;
891         rxvq->queue_id = queue_idx;
892         rxvq->mpool = mp;
893         dev->data->rx_queues[queue_idx] = rxvq;
894
895         return 0;
896 }
897
898 int
899 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
900 {
901         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
902         struct virtio_hw *hw = dev->data->dev_private;
903         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
904         struct virtnet_rx *rxvq = &vq->rxq;
905         struct rte_mbuf *m;
906         uint16_t desc_idx;
907         int error, nbufs, i;
908
909         PMD_INIT_FUNC_TRACE();
910
911         /* Allocate blank mbufs for the each rx descriptor */
912         nbufs = 0;
913
914         if (hw->use_simple_rx) {
915                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
916                      desc_idx++) {
917                         vq->vq_split.ring.avail->ring[desc_idx] = desc_idx;
918                         vq->vq_split.ring.desc[desc_idx].flags =
919                                 VRING_DESC_F_WRITE;
920                 }
921
922                 virtio_rxq_vec_setup(rxvq);
923         }
924
925         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
926         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
927              desc_idx++) {
928                 vq->sw_ring[vq->vq_nentries + desc_idx] =
929                         &rxvq->fake_mbuf;
930         }
931
932         if (hw->use_simple_rx) {
933                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
934                         virtio_rxq_rearm_vec(rxvq);
935                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
936                 }
937         } else if (hw->use_inorder_rx) {
938                 if ((!virtqueue_full(vq))) {
939                         uint16_t free_cnt = vq->vq_free_cnt;
940                         struct rte_mbuf *pkts[free_cnt];
941
942                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
943                                 free_cnt)) {
944                                 error = virtqueue_enqueue_refill_inorder(vq,
945                                                 pkts,
946                                                 free_cnt);
947                                 if (unlikely(error)) {
948                                         for (i = 0; i < free_cnt; i++)
949                                                 rte_pktmbuf_free(pkts[i]);
950                                 }
951                         }
952
953                         nbufs += free_cnt;
954                         vq_update_avail_idx(vq);
955                 }
956         } else {
957                 while (!virtqueue_full(vq)) {
958                         m = rte_mbuf_raw_alloc(rxvq->mpool);
959                         if (m == NULL)
960                                 break;
961
962                         /* Enqueue allocated buffers */
963                         if (vtpci_packed_queue(vq->hw))
964                                 error = virtqueue_enqueue_recv_refill_packed(vq,
965                                                 &m, 1);
966                         else
967                                 error = virtqueue_enqueue_recv_refill(vq,
968                                                 &m, 1);
969                         if (error) {
970                                 rte_pktmbuf_free(m);
971                                 break;
972                         }
973                         nbufs++;
974                 }
975
976                 if (!vtpci_packed_queue(vq->hw))
977                         vq_update_avail_idx(vq);
978         }
979
980         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
981
982         VIRTQUEUE_DUMP(vq);
983
984         return 0;
985 }
986
987 /*
988  * struct rte_eth_dev *dev: Used to update dev
989  * uint16_t nb_desc: Defaults to values read from config space
990  * unsigned int socket_id: Used to allocate memzone
991  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
992  * uint16_t queue_idx: Just used as an index in dev txq list
993  */
994 int
995 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
996                         uint16_t queue_idx,
997                         uint16_t nb_desc,
998                         unsigned int socket_id __rte_unused,
999                         const struct rte_eth_txconf *tx_conf)
1000 {
1001         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1002         struct virtio_hw *hw = dev->data->dev_private;
1003         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1004         struct virtnet_tx *txvq;
1005         uint16_t tx_free_thresh;
1006
1007         PMD_INIT_FUNC_TRACE();
1008
1009         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
1010                 nb_desc = vq->vq_nentries;
1011         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
1012
1013         txvq = &vq->txq;
1014         txvq->queue_id = queue_idx;
1015
1016         tx_free_thresh = tx_conf->tx_free_thresh;
1017         if (tx_free_thresh == 0)
1018                 tx_free_thresh =
1019                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
1020
1021         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
1022                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
1023                         "number of TX entries minus 3 (%u)."
1024                         " (tx_free_thresh=%u port=%u queue=%u)\n",
1025                         vq->vq_nentries - 3,
1026                         tx_free_thresh, dev->data->port_id, queue_idx);
1027                 return -EINVAL;
1028         }
1029
1030         vq->vq_free_thresh = tx_free_thresh;
1031
1032         dev->data->tx_queues[queue_idx] = txvq;
1033         return 0;
1034 }
1035
1036 int
1037 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
1038                                 uint16_t queue_idx)
1039 {
1040         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1041         struct virtio_hw *hw = dev->data->dev_private;
1042         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1043
1044         PMD_INIT_FUNC_TRACE();
1045
1046         if (!vtpci_packed_queue(hw)) {
1047                 if (hw->use_inorder_tx)
1048                         vq->vq_split.ring.desc[vq->vq_nentries - 1].next = 0;
1049         }
1050
1051         VIRTQUEUE_DUMP(vq);
1052
1053         return 0;
1054 }
1055
1056 static inline void
1057 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1058 {
1059         int error;
1060         /*
1061          * Requeue the discarded mbuf. This should always be
1062          * successful since it was just dequeued.
1063          */
1064         if (vtpci_packed_queue(vq->hw))
1065                 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1066         else
1067                 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1068
1069         if (unlikely(error)) {
1070                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1071                 rte_pktmbuf_free(m);
1072         }
1073 }
1074
1075 static inline void
1076 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1077 {
1078         int error;
1079
1080         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1081         if (unlikely(error)) {
1082                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1083                 rte_pktmbuf_free(m);
1084         }
1085 }
1086
1087 static inline void
1088 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
1089 {
1090         uint32_t s = mbuf->pkt_len;
1091         struct rte_ether_addr *ea;
1092
1093         stats->bytes += s;
1094
1095         if (s == 64) {
1096                 stats->size_bins[1]++;
1097         } else if (s > 64 && s < 1024) {
1098                 uint32_t bin;
1099
1100                 /* count zeros, and offset into correct bin */
1101                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
1102                 stats->size_bins[bin]++;
1103         } else {
1104                 if (s < 64)
1105                         stats->size_bins[0]++;
1106                 else if (s < 1519)
1107                         stats->size_bins[6]++;
1108                 else
1109                         stats->size_bins[7]++;
1110         }
1111
1112         ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
1113         if (rte_is_multicast_ether_addr(ea)) {
1114                 if (rte_is_broadcast_ether_addr(ea))
1115                         stats->broadcast++;
1116                 else
1117                         stats->multicast++;
1118         }
1119 }
1120
1121 static inline void
1122 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
1123 {
1124         VIRTIO_DUMP_PACKET(m, m->data_len);
1125
1126         virtio_update_packet_stats(&rxvq->stats, m);
1127 }
1128
1129 /* Optionally fill offload information in structure */
1130 static inline int
1131 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1132 {
1133         struct rte_net_hdr_lens hdr_lens;
1134         uint32_t hdrlen, ptype;
1135         int l4_supported = 0;
1136
1137         /* nothing to do */
1138         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1139                 return 0;
1140
1141         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1142
1143         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1144         m->packet_type = ptype;
1145         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1146             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1147             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1148                 l4_supported = 1;
1149
1150         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1151                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1152                 if (hdr->csum_start <= hdrlen && l4_supported) {
1153                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1154                 } else {
1155                         /* Unknown proto or tunnel, do sw cksum. We can assume
1156                          * the cksum field is in the first segment since the
1157                          * buffers we provided to the host are large enough.
1158                          * In case of SCTP, this will be wrong since it's a CRC
1159                          * but there's nothing we can do.
1160                          */
1161                         uint16_t csum = 0, off;
1162
1163                         rte_raw_cksum_mbuf(m, hdr->csum_start,
1164                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1165                                 &csum);
1166                         if (likely(csum != 0xffff))
1167                                 csum = ~csum;
1168                         off = hdr->csum_offset + hdr->csum_start;
1169                         if (rte_pktmbuf_data_len(m) >= off + 1)
1170                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1171                                         off) = csum;
1172                 }
1173         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1174                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1175         }
1176
1177         /* GSO request, save required information in mbuf */
1178         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1179                 /* Check unsupported modes */
1180                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1181                     (hdr->gso_size == 0)) {
1182                         return -EINVAL;
1183                 }
1184
1185                 /* Update mss lengthes in mbuf */
1186                 m->tso_segsz = hdr->gso_size;
1187                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1188                         case VIRTIO_NET_HDR_GSO_TCPV4:
1189                         case VIRTIO_NET_HDR_GSO_TCPV6:
1190                                 m->ol_flags |= PKT_RX_LRO | \
1191                                         PKT_RX_L4_CKSUM_NONE;
1192                                 break;
1193                         default:
1194                                 return -EINVAL;
1195                 }
1196         }
1197
1198         return 0;
1199 }
1200
1201 #define VIRTIO_MBUF_BURST_SZ 64
1202 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1203 uint16_t
1204 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1205 {
1206         struct virtnet_rx *rxvq = rx_queue;
1207         struct virtqueue *vq = rxvq->vq;
1208         struct virtio_hw *hw = vq->hw;
1209         struct rte_mbuf *rxm;
1210         uint16_t nb_used, num, nb_rx;
1211         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1212         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1213         int error;
1214         uint32_t i, nb_enqueued;
1215         uint32_t hdr_size;
1216         struct virtio_net_hdr *hdr;
1217
1218         nb_rx = 0;
1219         if (unlikely(hw->started == 0))
1220                 return nb_rx;
1221
1222         nb_used = VIRTQUEUE_NUSED(vq);
1223
1224         virtio_rmb(hw->weak_barriers);
1225
1226         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1227         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1228                 num = VIRTIO_MBUF_BURST_SZ;
1229         if (likely(num > DESC_PER_CACHELINE))
1230                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1231
1232         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1233         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1234
1235         nb_enqueued = 0;
1236         hdr_size = hw->vtnet_hdr_size;
1237
1238         for (i = 0; i < num ; i++) {
1239                 rxm = rcv_pkts[i];
1240
1241                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1242
1243                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1244                         PMD_RX_LOG(ERR, "Packet drop");
1245                         nb_enqueued++;
1246                         virtio_discard_rxbuf(vq, rxm);
1247                         rxvq->stats.errors++;
1248                         continue;
1249                 }
1250
1251                 rxm->port = rxvq->port_id;
1252                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1253                 rxm->ol_flags = 0;
1254                 rxm->vlan_tci = 0;
1255
1256                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1257                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1258
1259                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1260                         RTE_PKTMBUF_HEADROOM - hdr_size);
1261
1262                 if (hw->vlan_strip)
1263                         rte_vlan_strip(rxm);
1264
1265                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1266                         virtio_discard_rxbuf(vq, rxm);
1267                         rxvq->stats.errors++;
1268                         continue;
1269                 }
1270
1271                 virtio_rx_stats_updated(rxvq, rxm);
1272
1273                 rx_pkts[nb_rx++] = rxm;
1274         }
1275
1276         rxvq->stats.packets += nb_rx;
1277
1278         /* Allocate new mbuf for the used descriptor */
1279         if (likely(!virtqueue_full(vq))) {
1280                 uint16_t free_cnt = vq->vq_free_cnt;
1281                 struct rte_mbuf *new_pkts[free_cnt];
1282
1283                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1284                                                 free_cnt) == 0)) {
1285                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1286                                         free_cnt);
1287                         if (unlikely(error)) {
1288                                 for (i = 0; i < free_cnt; i++)
1289                                         rte_pktmbuf_free(new_pkts[i]);
1290                         }
1291                         nb_enqueued += free_cnt;
1292                 } else {
1293                         struct rte_eth_dev *dev =
1294                                 &rte_eth_devices[rxvq->port_id];
1295                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1296                 }
1297         }
1298
1299         if (likely(nb_enqueued)) {
1300                 vq_update_avail_idx(vq);
1301
1302                 if (unlikely(virtqueue_kick_prepare(vq))) {
1303                         virtqueue_notify(vq);
1304                         PMD_RX_LOG(DEBUG, "Notified");
1305                 }
1306         }
1307
1308         return nb_rx;
1309 }
1310
1311 uint16_t
1312 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1313                         uint16_t nb_pkts)
1314 {
1315         struct virtnet_rx *rxvq = rx_queue;
1316         struct virtqueue *vq = rxvq->vq;
1317         struct virtio_hw *hw = vq->hw;
1318         struct rte_mbuf *rxm;
1319         uint16_t num, nb_rx;
1320         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1321         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1322         int error;
1323         uint32_t i, nb_enqueued;
1324         uint32_t hdr_size;
1325         struct virtio_net_hdr *hdr;
1326
1327         nb_rx = 0;
1328         if (unlikely(hw->started == 0))
1329                 return nb_rx;
1330
1331         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1332         if (likely(num > DESC_PER_CACHELINE))
1333                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1334
1335         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1336         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1337
1338         nb_enqueued = 0;
1339         hdr_size = hw->vtnet_hdr_size;
1340
1341         for (i = 0; i < num; i++) {
1342                 rxm = rcv_pkts[i];
1343
1344                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1345
1346                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1347                         PMD_RX_LOG(ERR, "Packet drop");
1348                         nb_enqueued++;
1349                         virtio_discard_rxbuf(vq, rxm);
1350                         rxvq->stats.errors++;
1351                         continue;
1352                 }
1353
1354                 rxm->port = rxvq->port_id;
1355                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1356                 rxm->ol_flags = 0;
1357                 rxm->vlan_tci = 0;
1358
1359                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1360                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1361
1362                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1363                         RTE_PKTMBUF_HEADROOM - hdr_size);
1364
1365                 if (hw->vlan_strip)
1366                         rte_vlan_strip(rxm);
1367
1368                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1369                         virtio_discard_rxbuf(vq, rxm);
1370                         rxvq->stats.errors++;
1371                         continue;
1372                 }
1373
1374                 virtio_rx_stats_updated(rxvq, rxm);
1375
1376                 rx_pkts[nb_rx++] = rxm;
1377         }
1378
1379         rxvq->stats.packets += nb_rx;
1380
1381         /* Allocate new mbuf for the used descriptor */
1382         if (likely(!virtqueue_full(vq))) {
1383                 uint16_t free_cnt = vq->vq_free_cnt;
1384                 struct rte_mbuf *new_pkts[free_cnt];
1385
1386                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1387                                                 free_cnt) == 0)) {
1388                         error = virtqueue_enqueue_recv_refill_packed(vq,
1389                                         new_pkts, free_cnt);
1390                         if (unlikely(error)) {
1391                                 for (i = 0; i < free_cnt; i++)
1392                                         rte_pktmbuf_free(new_pkts[i]);
1393                         }
1394                         nb_enqueued += free_cnt;
1395                 } else {
1396                         struct rte_eth_dev *dev =
1397                                 &rte_eth_devices[rxvq->port_id];
1398                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1399                 }
1400         }
1401
1402         if (likely(nb_enqueued)) {
1403                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1404                         virtqueue_notify(vq);
1405                         PMD_RX_LOG(DEBUG, "Notified");
1406                 }
1407         }
1408
1409         return nb_rx;
1410 }
1411
1412
1413 uint16_t
1414 virtio_recv_pkts_inorder(void *rx_queue,
1415                         struct rte_mbuf **rx_pkts,
1416                         uint16_t nb_pkts)
1417 {
1418         struct virtnet_rx *rxvq = rx_queue;
1419         struct virtqueue *vq = rxvq->vq;
1420         struct virtio_hw *hw = vq->hw;
1421         struct rte_mbuf *rxm;
1422         struct rte_mbuf *prev = NULL;
1423         uint16_t nb_used, num, nb_rx;
1424         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1425         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1426         int error;
1427         uint32_t nb_enqueued;
1428         uint32_t seg_num;
1429         uint32_t seg_res;
1430         uint32_t hdr_size;
1431         int32_t i;
1432
1433         nb_rx = 0;
1434         if (unlikely(hw->started == 0))
1435                 return nb_rx;
1436
1437         nb_used = VIRTQUEUE_NUSED(vq);
1438         nb_used = RTE_MIN(nb_used, nb_pkts);
1439         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1440
1441         virtio_rmb(hw->weak_barriers);
1442
1443         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1444
1445         nb_enqueued = 0;
1446         seg_num = 1;
1447         seg_res = 0;
1448         hdr_size = hw->vtnet_hdr_size;
1449
1450         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1451
1452         for (i = 0; i < num; i++) {
1453                 struct virtio_net_hdr_mrg_rxbuf *header;
1454
1455                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1456                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1457
1458                 rxm = rcv_pkts[i];
1459
1460                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1461                         PMD_RX_LOG(ERR, "Packet drop");
1462                         nb_enqueued++;
1463                         virtio_discard_rxbuf_inorder(vq, rxm);
1464                         rxvq->stats.errors++;
1465                         continue;
1466                 }
1467
1468                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1469                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1470                          - hdr_size);
1471
1472                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1473                         seg_num = header->num_buffers;
1474                         if (seg_num == 0)
1475                                 seg_num = 1;
1476                 } else {
1477                         seg_num = 1;
1478                 }
1479
1480                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1481                 rxm->nb_segs = seg_num;
1482                 rxm->ol_flags = 0;
1483                 rxm->vlan_tci = 0;
1484                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1485                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1486
1487                 rxm->port = rxvq->port_id;
1488
1489                 rx_pkts[nb_rx] = rxm;
1490                 prev = rxm;
1491
1492                 if (vq->hw->has_rx_offload &&
1493                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1494                         virtio_discard_rxbuf_inorder(vq, rxm);
1495                         rxvq->stats.errors++;
1496                         continue;
1497                 }
1498
1499                 if (hw->vlan_strip)
1500                         rte_vlan_strip(rx_pkts[nb_rx]);
1501
1502                 seg_res = seg_num - 1;
1503
1504                 /* Merge remaining segments */
1505                 while (seg_res != 0 && i < (num - 1)) {
1506                         i++;
1507
1508                         rxm = rcv_pkts[i];
1509                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1510                         rxm->pkt_len = (uint32_t)(len[i]);
1511                         rxm->data_len = (uint16_t)(len[i]);
1512
1513                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1514
1515                         prev->next = rxm;
1516                         prev = rxm;
1517                         seg_res -= 1;
1518                 }
1519
1520                 if (!seg_res) {
1521                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1522                         nb_rx++;
1523                 }
1524         }
1525
1526         /* Last packet still need merge segments */
1527         while (seg_res != 0) {
1528                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1529                                         VIRTIO_MBUF_BURST_SZ);
1530
1531                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1532                         virtio_rmb(hw->weak_barriers);
1533                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1534                                                            rcv_cnt);
1535                         uint16_t extra_idx = 0;
1536
1537                         rcv_cnt = num;
1538                         while (extra_idx < rcv_cnt) {
1539                                 rxm = rcv_pkts[extra_idx];
1540                                 rxm->data_off =
1541                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1542                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1543                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1544                                 prev->next = rxm;
1545                                 prev = rxm;
1546                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1547                                 extra_idx += 1;
1548                         };
1549                         seg_res -= rcv_cnt;
1550
1551                         if (!seg_res) {
1552                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1553                                 nb_rx++;
1554                         }
1555                 } else {
1556                         PMD_RX_LOG(ERR,
1557                                         "No enough segments for packet.");
1558                         virtio_discard_rxbuf_inorder(vq, prev);
1559                         rxvq->stats.errors++;
1560                         break;
1561                 }
1562         }
1563
1564         rxvq->stats.packets += nb_rx;
1565
1566         /* Allocate new mbuf for the used descriptor */
1567
1568         if (likely(!virtqueue_full(vq))) {
1569                 /* free_cnt may include mrg descs */
1570                 uint16_t free_cnt = vq->vq_free_cnt;
1571                 struct rte_mbuf *new_pkts[free_cnt];
1572
1573                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1574                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1575                                         free_cnt);
1576                         if (unlikely(error)) {
1577                                 for (i = 0; i < free_cnt; i++)
1578                                         rte_pktmbuf_free(new_pkts[i]);
1579                         }
1580                         nb_enqueued += free_cnt;
1581                 } else {
1582                         struct rte_eth_dev *dev =
1583                                 &rte_eth_devices[rxvq->port_id];
1584                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1585                 }
1586         }
1587
1588         if (likely(nb_enqueued)) {
1589                 vq_update_avail_idx(vq);
1590
1591                 if (unlikely(virtqueue_kick_prepare(vq))) {
1592                         virtqueue_notify(vq);
1593                         PMD_RX_LOG(DEBUG, "Notified");
1594                 }
1595         }
1596
1597         return nb_rx;
1598 }
1599
1600 uint16_t
1601 virtio_recv_mergeable_pkts(void *rx_queue,
1602                         struct rte_mbuf **rx_pkts,
1603                         uint16_t nb_pkts)
1604 {
1605         struct virtnet_rx *rxvq = rx_queue;
1606         struct virtqueue *vq = rxvq->vq;
1607         struct virtio_hw *hw = vq->hw;
1608         struct rte_mbuf *rxm;
1609         struct rte_mbuf *prev = NULL;
1610         uint16_t nb_used, num, nb_rx = 0;
1611         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1612         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1613         int error;
1614         uint32_t nb_enqueued = 0;
1615         uint32_t seg_num = 0;
1616         uint32_t seg_res = 0;
1617         uint32_t hdr_size = hw->vtnet_hdr_size;
1618         int32_t i;
1619
1620         if (unlikely(hw->started == 0))
1621                 return nb_rx;
1622
1623         nb_used = VIRTQUEUE_NUSED(vq);
1624
1625         virtio_rmb(hw->weak_barriers);
1626
1627         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1628
1629         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1630         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1631                 num = VIRTIO_MBUF_BURST_SZ;
1632         if (likely(num > DESC_PER_CACHELINE))
1633                 num = num - ((vq->vq_used_cons_idx + num) %
1634                                 DESC_PER_CACHELINE);
1635
1636
1637         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1638
1639         for (i = 0; i < num; i++) {
1640                 struct virtio_net_hdr_mrg_rxbuf *header;
1641
1642                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1643                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1644
1645                 rxm = rcv_pkts[i];
1646
1647                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1648                         PMD_RX_LOG(ERR, "Packet drop");
1649                         nb_enqueued++;
1650                         virtio_discard_rxbuf(vq, rxm);
1651                         rxvq->stats.errors++;
1652                         continue;
1653                 }
1654
1655                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1656                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1657                          - hdr_size);
1658                 seg_num = header->num_buffers;
1659                 if (seg_num == 0)
1660                         seg_num = 1;
1661
1662                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1663                 rxm->nb_segs = seg_num;
1664                 rxm->ol_flags = 0;
1665                 rxm->vlan_tci = 0;
1666                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1667                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1668
1669                 rxm->port = rxvq->port_id;
1670
1671                 rx_pkts[nb_rx] = rxm;
1672                 prev = rxm;
1673
1674                 if (hw->has_rx_offload &&
1675                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1676                         virtio_discard_rxbuf(vq, rxm);
1677                         rxvq->stats.errors++;
1678                         continue;
1679                 }
1680
1681                 if (hw->vlan_strip)
1682                         rte_vlan_strip(rx_pkts[nb_rx]);
1683
1684                 seg_res = seg_num - 1;
1685
1686                 /* Merge remaining segments */
1687                 while (seg_res != 0 && i < (num - 1)) {
1688                         i++;
1689
1690                         rxm = rcv_pkts[i];
1691                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1692                         rxm->pkt_len = (uint32_t)(len[i]);
1693                         rxm->data_len = (uint16_t)(len[i]);
1694
1695                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1696
1697                         prev->next = rxm;
1698                         prev = rxm;
1699                         seg_res -= 1;
1700                 }
1701
1702                 if (!seg_res) {
1703                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1704                         nb_rx++;
1705                 }
1706         }
1707
1708         /* Last packet still need merge segments */
1709         while (seg_res != 0) {
1710                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1711                                         VIRTIO_MBUF_BURST_SZ);
1712
1713                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1714                         virtio_rmb(hw->weak_barriers);
1715                         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1716                                                            rcv_cnt);
1717                         uint16_t extra_idx = 0;
1718
1719                         rcv_cnt = num;
1720                         while (extra_idx < rcv_cnt) {
1721                                 rxm = rcv_pkts[extra_idx];
1722                                 rxm->data_off =
1723                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1724                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1725                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1726                                 prev->next = rxm;
1727                                 prev = rxm;
1728                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1729                                 extra_idx += 1;
1730                         };
1731                         seg_res -= rcv_cnt;
1732
1733                         if (!seg_res) {
1734                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1735                                 nb_rx++;
1736                         }
1737                 } else {
1738                         PMD_RX_LOG(ERR,
1739                                         "No enough segments for packet.");
1740                         virtio_discard_rxbuf(vq, prev);
1741                         rxvq->stats.errors++;
1742                         break;
1743                 }
1744         }
1745
1746         rxvq->stats.packets += nb_rx;
1747
1748         /* Allocate new mbuf for the used descriptor */
1749         if (likely(!virtqueue_full(vq))) {
1750                 /* free_cnt may include mrg descs */
1751                 uint16_t free_cnt = vq->vq_free_cnt;
1752                 struct rte_mbuf *new_pkts[free_cnt];
1753
1754                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1755                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1756                                         free_cnt);
1757                         if (unlikely(error)) {
1758                                 for (i = 0; i < free_cnt; i++)
1759                                         rte_pktmbuf_free(new_pkts[i]);
1760                         }
1761                         nb_enqueued += free_cnt;
1762                 } else {
1763                         struct rte_eth_dev *dev =
1764                                 &rte_eth_devices[rxvq->port_id];
1765                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1766                 }
1767         }
1768
1769         if (likely(nb_enqueued)) {
1770                 vq_update_avail_idx(vq);
1771
1772                 if (unlikely(virtqueue_kick_prepare(vq))) {
1773                         virtqueue_notify(vq);
1774                         PMD_RX_LOG(DEBUG, "Notified");
1775                 }
1776         }
1777
1778         return nb_rx;
1779 }
1780
1781 uint16_t
1782 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1783                         struct rte_mbuf **rx_pkts,
1784                         uint16_t nb_pkts)
1785 {
1786         struct virtnet_rx *rxvq = rx_queue;
1787         struct virtqueue *vq = rxvq->vq;
1788         struct virtio_hw *hw = vq->hw;
1789         struct rte_mbuf *rxm;
1790         struct rte_mbuf *prev = NULL;
1791         uint16_t num, nb_rx = 0;
1792         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1793         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1794         uint32_t nb_enqueued = 0;
1795         uint32_t seg_num = 0;
1796         uint32_t seg_res = 0;
1797         uint32_t hdr_size = hw->vtnet_hdr_size;
1798         int32_t i;
1799         int error;
1800
1801         if (unlikely(hw->started == 0))
1802                 return nb_rx;
1803
1804
1805         num = nb_pkts;
1806         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1807                 num = VIRTIO_MBUF_BURST_SZ;
1808         if (likely(num > DESC_PER_CACHELINE))
1809                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1810
1811         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1812
1813         for (i = 0; i < num; i++) {
1814                 struct virtio_net_hdr_mrg_rxbuf *header;
1815
1816                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1817                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1818
1819                 rxm = rcv_pkts[i];
1820
1821                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1822                         PMD_RX_LOG(ERR, "Packet drop");
1823                         nb_enqueued++;
1824                         virtio_discard_rxbuf(vq, rxm);
1825                         rxvq->stats.errors++;
1826                         continue;
1827                 }
1828
1829                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1830                           rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1831                 seg_num = header->num_buffers;
1832
1833                 if (seg_num == 0)
1834                         seg_num = 1;
1835
1836                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1837                 rxm->nb_segs = seg_num;
1838                 rxm->ol_flags = 0;
1839                 rxm->vlan_tci = 0;
1840                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1841                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1842
1843                 rxm->port = rxvq->port_id;
1844                 rx_pkts[nb_rx] = rxm;
1845                 prev = rxm;
1846
1847                 if (hw->has_rx_offload &&
1848                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1849                         virtio_discard_rxbuf(vq, rxm);
1850                         rxvq->stats.errors++;
1851                         continue;
1852                 }
1853
1854                 if (hw->vlan_strip)
1855                         rte_vlan_strip(rx_pkts[nb_rx]);
1856
1857                 seg_res = seg_num - 1;
1858
1859                 /* Merge remaining segments */
1860                 while (seg_res != 0 && i < (num - 1)) {
1861                         i++;
1862
1863                         rxm = rcv_pkts[i];
1864                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1865                         rxm->pkt_len = (uint32_t)(len[i]);
1866                         rxm->data_len = (uint16_t)(len[i]);
1867
1868                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1869
1870                         prev->next = rxm;
1871                         prev = rxm;
1872                         seg_res -= 1;
1873                 }
1874
1875                 if (!seg_res) {
1876                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1877                         nb_rx++;
1878                 }
1879         }
1880
1881         /* Last packet still need merge segments */
1882         while (seg_res != 0) {
1883                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1884                                         VIRTIO_MBUF_BURST_SZ);
1885                 if (likely(vq->vq_free_cnt >= rcv_cnt)) {
1886                         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1887                                         len, rcv_cnt);
1888                         uint16_t extra_idx = 0;
1889
1890                         rcv_cnt = num;
1891
1892                         while (extra_idx < rcv_cnt) {
1893                                 rxm = rcv_pkts[extra_idx];
1894
1895                                 rxm->data_off =
1896                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1897                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1898                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1899
1900                                 prev->next = rxm;
1901                                 prev = rxm;
1902                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1903                                 extra_idx += 1;
1904                         }
1905                         seg_res -= rcv_cnt;
1906                         if (!seg_res) {
1907                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1908                                 nb_rx++;
1909                         }
1910                 } else {
1911                         PMD_RX_LOG(ERR,
1912                                         "No enough segments for packet.");
1913                         virtio_discard_rxbuf(vq, prev);
1914                         rxvq->stats.errors++;
1915                         break;
1916                 }
1917         }
1918
1919         rxvq->stats.packets += nb_rx;
1920
1921         /* Allocate new mbuf for the used descriptor */
1922         if (likely(!virtqueue_full(vq))) {
1923                 /* free_cnt may include mrg descs */
1924                 uint16_t free_cnt = vq->vq_free_cnt;
1925                 struct rte_mbuf *new_pkts[free_cnt];
1926
1927                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1928                         error = virtqueue_enqueue_recv_refill_packed(vq,
1929                                         new_pkts, free_cnt);
1930                         if (unlikely(error)) {
1931                                 for (i = 0; i < free_cnt; i++)
1932                                         rte_pktmbuf_free(new_pkts[i]);
1933                         }
1934                         nb_enqueued += free_cnt;
1935                 } else {
1936                         struct rte_eth_dev *dev =
1937                                 &rte_eth_devices[rxvq->port_id];
1938                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1939                 }
1940         }
1941
1942         if (likely(nb_enqueued)) {
1943                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1944                         virtqueue_notify(vq);
1945                         PMD_RX_LOG(DEBUG, "Notified");
1946                 }
1947         }
1948
1949         return nb_rx;
1950 }
1951
1952 uint16_t
1953 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1954                         uint16_t nb_pkts)
1955 {
1956         struct virtnet_tx *txvq = tx_queue;
1957         struct virtqueue *vq = txvq->vq;
1958         struct virtio_hw *hw = vq->hw;
1959         uint16_t hdr_size = hw->vtnet_hdr_size;
1960         uint16_t nb_tx = 0;
1961         bool in_order = hw->use_inorder_tx;
1962         int error;
1963
1964         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1965                 return nb_tx;
1966
1967         if (unlikely(nb_pkts < 1))
1968                 return nb_pkts;
1969
1970         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1971
1972         if (nb_pkts > vq->vq_free_cnt)
1973                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
1974                                            in_order);
1975
1976         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1977                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1978                 int can_push = 0, slots, need;
1979
1980                 /* Do VLAN tag insertion */
1981                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1982                         error = rte_vlan_insert(&txm);
1983                         if (unlikely(error)) {
1984                                 rte_pktmbuf_free(txm);
1985                                 continue;
1986                         }
1987                         /* vlan_insert may add a header mbuf */
1988                         tx_pkts[nb_tx] = txm;
1989                 }
1990
1991                 /* optimize ring usage */
1992                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1993                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1994                     rte_mbuf_refcnt_read(txm) == 1 &&
1995                     RTE_MBUF_DIRECT(txm) &&
1996                     txm->nb_segs == 1 &&
1997                     rte_pktmbuf_headroom(txm) >= hdr_size &&
1998                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1999                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2000                         can_push = 1;
2001
2002                 /* How many main ring entries are needed to this Tx?
2003                  * any_layout => number of segments
2004                  * default    => number of segments + 1
2005                  */
2006                 slots = txm->nb_segs + !can_push;
2007                 need = slots - vq->vq_free_cnt;
2008
2009                 /* Positive value indicates it need free vring descriptors */
2010                 if (unlikely(need > 0)) {
2011                         virtio_xmit_cleanup_packed(vq, need, in_order);
2012                         need = slots - vq->vq_free_cnt;
2013                         if (unlikely(need > 0)) {
2014                                 PMD_TX_LOG(ERR,
2015                                            "No free tx descriptors to transmit");
2016                                 break;
2017                         }
2018                 }
2019
2020                 /* Enqueue Packet buffers */
2021                 if (can_push)
2022                         virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
2023                 else
2024                         virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
2025                                                       in_order);
2026
2027                 virtio_update_packet_stats(&txvq->stats, txm);
2028         }
2029
2030         txvq->stats.packets += nb_tx;
2031
2032         if (likely(nb_tx)) {
2033                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
2034                         virtqueue_notify(vq);
2035                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2036                 }
2037         }
2038
2039         return nb_tx;
2040 }
2041
2042 uint16_t
2043 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2044 {
2045         struct virtnet_tx *txvq = tx_queue;
2046         struct virtqueue *vq = txvq->vq;
2047         struct virtio_hw *hw = vq->hw;
2048         uint16_t hdr_size = hw->vtnet_hdr_size;
2049         uint16_t nb_used, nb_tx = 0;
2050         int error;
2051
2052         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2053                 return nb_tx;
2054
2055         if (unlikely(nb_pkts < 1))
2056                 return nb_pkts;
2057
2058         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2059         nb_used = VIRTQUEUE_NUSED(vq);
2060
2061         virtio_rmb(hw->weak_barriers);
2062         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2063                 virtio_xmit_cleanup(vq, nb_used);
2064
2065         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2066                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2067                 int can_push = 0, use_indirect = 0, slots, need;
2068
2069                 /* Do VLAN tag insertion */
2070                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2071                         error = rte_vlan_insert(&txm);
2072                         if (unlikely(error)) {
2073                                 rte_pktmbuf_free(txm);
2074                                 continue;
2075                         }
2076                         /* vlan_insert may add a header mbuf */
2077                         tx_pkts[nb_tx] = txm;
2078                 }
2079
2080                 /* optimize ring usage */
2081                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2082                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2083                     rte_mbuf_refcnt_read(txm) == 1 &&
2084                     RTE_MBUF_DIRECT(txm) &&
2085                     txm->nb_segs == 1 &&
2086                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2087                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2088                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2089                         can_push = 1;
2090                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2091                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2092                         use_indirect = 1;
2093
2094                 /* How many main ring entries are needed to this Tx?
2095                  * any_layout => number of segments
2096                  * indirect   => 1
2097                  * default    => number of segments + 1
2098                  */
2099                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2100                 need = slots - vq->vq_free_cnt;
2101
2102                 /* Positive value indicates it need free vring descriptors */
2103                 if (unlikely(need > 0)) {
2104                         nb_used = VIRTQUEUE_NUSED(vq);
2105                         virtio_rmb(hw->weak_barriers);
2106                         need = RTE_MIN(need, (int)nb_used);
2107
2108                         virtio_xmit_cleanup(vq, need);
2109                         need = slots - vq->vq_free_cnt;
2110                         if (unlikely(need > 0)) {
2111                                 PMD_TX_LOG(ERR,
2112                                            "No free tx descriptors to transmit");
2113                                 break;
2114                         }
2115                 }
2116
2117                 /* Enqueue Packet buffers */
2118                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2119                         can_push, 0);
2120
2121                 virtio_update_packet_stats(&txvq->stats, txm);
2122         }
2123
2124         txvq->stats.packets += nb_tx;
2125
2126         if (likely(nb_tx)) {
2127                 vq_update_avail_idx(vq);
2128
2129                 if (unlikely(virtqueue_kick_prepare(vq))) {
2130                         virtqueue_notify(vq);
2131                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2132                 }
2133         }
2134
2135         return nb_tx;
2136 }
2137
2138 uint16_t
2139 virtio_xmit_pkts_inorder(void *tx_queue,
2140                         struct rte_mbuf **tx_pkts,
2141                         uint16_t nb_pkts)
2142 {
2143         struct virtnet_tx *txvq = tx_queue;
2144         struct virtqueue *vq = txvq->vq;
2145         struct virtio_hw *hw = vq->hw;
2146         uint16_t hdr_size = hw->vtnet_hdr_size;
2147         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
2148         struct rte_mbuf *inorder_pkts[nb_pkts];
2149         int error;
2150
2151         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2152                 return nb_tx;
2153
2154         if (unlikely(nb_pkts < 1))
2155                 return nb_pkts;
2156
2157         VIRTQUEUE_DUMP(vq);
2158         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2159         nb_used = VIRTQUEUE_NUSED(vq);
2160
2161         virtio_rmb(hw->weak_barriers);
2162         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2163                 virtio_xmit_cleanup_inorder(vq, nb_used);
2164
2165         if (unlikely(!vq->vq_free_cnt))
2166                 virtio_xmit_cleanup_inorder(vq, nb_used);
2167
2168         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
2169
2170         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
2171                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2172                 int slots, need;
2173
2174                 /* Do VLAN tag insertion */
2175                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2176                         error = rte_vlan_insert(&txm);
2177                         if (unlikely(error)) {
2178                                 rte_pktmbuf_free(txm);
2179                                 continue;
2180                         }
2181                         /* vlan_insert may add a header mbuf */
2182                         tx_pkts[nb_tx] = txm;
2183                 }
2184
2185                 /* optimize ring usage */
2186                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2187                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2188                      rte_mbuf_refcnt_read(txm) == 1 &&
2189                      RTE_MBUF_DIRECT(txm) &&
2190                      txm->nb_segs == 1 &&
2191                      rte_pktmbuf_headroom(txm) >= hdr_size &&
2192                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2193                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2194                         inorder_pkts[nb_inorder_pkts] = txm;
2195                         nb_inorder_pkts++;
2196
2197                         virtio_update_packet_stats(&txvq->stats, txm);
2198                         continue;
2199                 }
2200
2201                 if (nb_inorder_pkts) {
2202                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2203                                                         nb_inorder_pkts);
2204                         nb_inorder_pkts = 0;
2205                 }
2206
2207                 slots = txm->nb_segs + 1;
2208                 need = slots - vq->vq_free_cnt;
2209                 if (unlikely(need > 0)) {
2210                         nb_used = VIRTQUEUE_NUSED(vq);
2211                         virtio_rmb(hw->weak_barriers);
2212                         need = RTE_MIN(need, (int)nb_used);
2213
2214                         virtio_xmit_cleanup_inorder(vq, need);
2215
2216                         need = slots - vq->vq_free_cnt;
2217
2218                         if (unlikely(need > 0)) {
2219                                 PMD_TX_LOG(ERR,
2220                                         "No free tx descriptors to transmit");
2221                                 break;
2222                         }
2223                 }
2224                 /* Enqueue Packet buffers */
2225                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2226
2227                 virtio_update_packet_stats(&txvq->stats, txm);
2228         }
2229
2230         /* Transmit all inorder packets */
2231         if (nb_inorder_pkts)
2232                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2233                                                 nb_inorder_pkts);
2234
2235         txvq->stats.packets += nb_tx;
2236
2237         if (likely(nb_tx)) {
2238                 vq_update_avail_idx(vq);
2239
2240                 if (unlikely(virtqueue_kick_prepare(vq))) {
2241                         virtqueue_notify(vq);
2242                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2243                 }
2244         }
2245
2246         VIRTQUEUE_DUMP(vq);
2247
2248         return nb_tx;
2249 }