e23cd04dd86134f53bb936c9d683840e68910a2c
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45         struct virtnet_rx *rxvq = rxq;
46         struct virtqueue *vq = rxvq->vq;
47
48         return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54         vq->vq_free_cnt += num;
55         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61         struct vring_desc *dp, *dp_tail;
62         struct vq_desc_extra *dxp;
63         uint16_t desc_idx_last = desc_idx;
64
65         dp  = &vq->vq_split.ring.desc[desc_idx];
66         dxp = &vq->vq_descx[desc_idx];
67         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69                 while (dp->flags & VRING_DESC_F_NEXT) {
70                         desc_idx_last = dp->next;
71                         dp = &vq->vq_split.ring.desc[dp->next];
72                 }
73         }
74         dxp->ndescs = 0;
75
76         /*
77          * We must append the existing free chain, if any, to the end of
78          * newly freed chain. If the virtqueue was completely used, then
79          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80          */
81         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82                 vq->vq_desc_head_idx = desc_idx;
83         } else {
84                 dp_tail = &vq->vq_split.ring.desc[vq->vq_desc_tail_idx];
85                 dp_tail->next = desc_idx;
86         }
87
88         vq->vq_desc_tail_idx = desc_idx_last;
89         dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95         struct vq_desc_extra *dxp;
96
97         dxp = &vq->vq_descx[id];
98         vq->vq_free_cnt += dxp->ndescs;
99
100         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101                 vq->vq_desc_head_idx = id;
102         else
103                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104
105         vq->vq_desc_tail_idx = id;
106         dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108
109 static uint16_t
110 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
111                                   struct rte_mbuf **rx_pkts,
112                                   uint32_t *len,
113                                   uint16_t num)
114 {
115         struct rte_mbuf *cookie;
116         uint16_t used_idx;
117         uint16_t id;
118         struct vring_packed_desc *desc;
119         uint16_t i;
120
121         desc = vq->vq_packed.ring.desc;
122
123         for (i = 0; i < num; i++) {
124                 used_idx = vq->vq_used_cons_idx;
125                 if (!desc_is_used(&desc[used_idx], vq))
126                         return i;
127                 virtio_rmb(vq->hw->weak_barriers);
128                 len[i] = desc[used_idx].len;
129                 id = desc[used_idx].id;
130                 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
131                 if (unlikely(cookie == NULL)) {
132                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
133                                 vq->vq_used_cons_idx);
134                         break;
135                 }
136                 rte_prefetch0(cookie);
137                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
138                 rx_pkts[i] = cookie;
139
140                 vq->vq_free_cnt++;
141                 vq->vq_used_cons_idx++;
142                 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
143                         vq->vq_used_cons_idx -= vq->vq_nentries;
144                         vq->vq_packed.used_wrap_counter ^= 1;
145                 }
146         }
147
148         return i;
149 }
150
151 static uint16_t
152 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
153                            uint32_t *len, uint16_t num)
154 {
155         struct vring_used_elem *uep;
156         struct rte_mbuf *cookie;
157         uint16_t used_idx, desc_idx;
158         uint16_t i;
159
160         /*  Caller does the check */
161         for (i = 0; i < num ; i++) {
162                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
163                 uep = &vq->vq_split.ring.used->ring[used_idx];
164                 desc_idx = (uint16_t) uep->id;
165                 len[i] = uep->len;
166                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
167
168                 if (unlikely(cookie == NULL)) {
169                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
170                                 vq->vq_used_cons_idx);
171                         break;
172                 }
173
174                 rte_prefetch0(cookie);
175                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
176                 rx_pkts[i]  = cookie;
177                 vq->vq_used_cons_idx++;
178                 vq_ring_free_chain(vq, desc_idx);
179                 vq->vq_descx[desc_idx].cookie = NULL;
180         }
181
182         return i;
183 }
184
185 static uint16_t
186 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
187                         struct rte_mbuf **rx_pkts,
188                         uint32_t *len,
189                         uint16_t num)
190 {
191         struct vring_used_elem *uep;
192         struct rte_mbuf *cookie;
193         uint16_t used_idx = 0;
194         uint16_t i;
195
196         if (unlikely(num == 0))
197                 return 0;
198
199         for (i = 0; i < num; i++) {
200                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
201                 /* Desc idx same as used idx */
202                 uep = &vq->vq_split.ring.used->ring[used_idx];
203                 len[i] = uep->len;
204                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
205
206                 if (unlikely(cookie == NULL)) {
207                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
208                                 vq->vq_used_cons_idx);
209                         break;
210                 }
211
212                 rte_prefetch0(cookie);
213                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
214                 rx_pkts[i]  = cookie;
215                 vq->vq_used_cons_idx++;
216                 vq->vq_descx[used_idx].cookie = NULL;
217         }
218
219         vq_ring_free_inorder(vq, used_idx, i);
220         return i;
221 }
222
223 #ifndef DEFAULT_TX_FREE_THRESH
224 #define DEFAULT_TX_FREE_THRESH 32
225 #endif
226
227 static void
228 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
229 {
230         uint16_t used_idx, id, curr_id, free_cnt = 0;
231         uint16_t size = vq->vq_nentries;
232         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
233         struct vq_desc_extra *dxp;
234
235         used_idx = vq->vq_used_cons_idx;
236         while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
237                 virtio_rmb(vq->hw->weak_barriers);
238                 id = desc[used_idx].id;
239                 do {
240                         curr_id = used_idx;
241                         dxp = &vq->vq_descx[used_idx];
242                         used_idx += dxp->ndescs;
243                         free_cnt += dxp->ndescs;
244                         num -= dxp->ndescs;
245                         if (used_idx >= size) {
246                                 used_idx -= size;
247                                 vq->vq_packed.used_wrap_counter ^= 1;
248                         }
249                         if (dxp->cookie != NULL) {
250                                 rte_pktmbuf_free(dxp->cookie);
251                                 dxp->cookie = NULL;
252                         }
253                 } while (curr_id != id);
254         }
255         vq->vq_used_cons_idx = used_idx;
256         vq->vq_free_cnt += free_cnt;
257 }
258
259 static void
260 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
261 {
262         uint16_t used_idx, id;
263         uint16_t size = vq->vq_nentries;
264         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
265         struct vq_desc_extra *dxp;
266
267         used_idx = vq->vq_used_cons_idx;
268         while (num-- && desc_is_used(&desc[used_idx], vq)) {
269                 virtio_rmb(vq->hw->weak_barriers);
270                 id = desc[used_idx].id;
271                 dxp = &vq->vq_descx[id];
272                 vq->vq_used_cons_idx += dxp->ndescs;
273                 if (vq->vq_used_cons_idx >= size) {
274                         vq->vq_used_cons_idx -= size;
275                         vq->vq_packed.used_wrap_counter ^= 1;
276                 }
277                 vq_ring_free_id_packed(vq, id);
278                 if (dxp->cookie != NULL) {
279                         rte_pktmbuf_free(dxp->cookie);
280                         dxp->cookie = NULL;
281                 }
282                 used_idx = vq->vq_used_cons_idx;
283         }
284 }
285
286 /* Cleanup from completed transmits. */
287 static inline void
288 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
289 {
290         if (in_order)
291                 virtio_xmit_cleanup_inorder_packed(vq, num);
292         else
293                 virtio_xmit_cleanup_normal_packed(vq, num);
294 }
295
296 static void
297 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
298 {
299         uint16_t i, used_idx, desc_idx;
300         for (i = 0; i < num; i++) {
301                 struct vring_used_elem *uep;
302                 struct vq_desc_extra *dxp;
303
304                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
305                 uep = &vq->vq_split.ring.used->ring[used_idx];
306
307                 desc_idx = (uint16_t) uep->id;
308                 dxp = &vq->vq_descx[desc_idx];
309                 vq->vq_used_cons_idx++;
310                 vq_ring_free_chain(vq, desc_idx);
311
312                 if (dxp->cookie != NULL) {
313                         rte_pktmbuf_free(dxp->cookie);
314                         dxp->cookie = NULL;
315                 }
316         }
317 }
318
319 /* Cleanup from completed inorder transmits. */
320 static void
321 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
322 {
323         uint16_t i, idx = vq->vq_used_cons_idx;
324         int16_t free_cnt = 0;
325         struct vq_desc_extra *dxp = NULL;
326
327         if (unlikely(num == 0))
328                 return;
329
330         for (i = 0; i < num; i++) {
331                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
332                 free_cnt += dxp->ndescs;
333                 if (dxp->cookie != NULL) {
334                         rte_pktmbuf_free(dxp->cookie);
335                         dxp->cookie = NULL;
336                 }
337         }
338
339         vq->vq_free_cnt += free_cnt;
340         vq->vq_used_cons_idx = idx;
341 }
342
343 static inline int
344 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
345                         struct rte_mbuf **cookies,
346                         uint16_t num)
347 {
348         struct vq_desc_extra *dxp;
349         struct virtio_hw *hw = vq->hw;
350         struct vring_desc *start_dp;
351         uint16_t head_idx, idx, i = 0;
352
353         if (unlikely(vq->vq_free_cnt == 0))
354                 return -ENOSPC;
355         if (unlikely(vq->vq_free_cnt < num))
356                 return -EMSGSIZE;
357
358         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
359         start_dp = vq->vq_split.ring.desc;
360
361         while (i < num) {
362                 idx = head_idx & (vq->vq_nentries - 1);
363                 dxp = &vq->vq_descx[idx];
364                 dxp->cookie = (void *)cookies[i];
365                 dxp->ndescs = 1;
366
367                 start_dp[idx].addr =
368                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
369                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
370                 start_dp[idx].len =
371                                 cookies[i]->buf_len -
372                                 RTE_PKTMBUF_HEADROOM +
373                                 hw->vtnet_hdr_size;
374                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
375
376                 vq_update_avail_ring(vq, idx);
377                 head_idx++;
378                 i++;
379         }
380
381         vq->vq_desc_head_idx += num;
382         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
383         return 0;
384 }
385
386 static inline int
387 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
388                                 uint16_t num)
389 {
390         struct vq_desc_extra *dxp;
391         struct virtio_hw *hw = vq->hw;
392         struct vring_desc *start_dp = vq->vq_split.ring.desc;
393         uint16_t idx, i;
394
395         if (unlikely(vq->vq_free_cnt == 0))
396                 return -ENOSPC;
397         if (unlikely(vq->vq_free_cnt < num))
398                 return -EMSGSIZE;
399
400         if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
401                 return -EFAULT;
402
403         for (i = 0; i < num; i++) {
404                 idx = vq->vq_desc_head_idx;
405                 dxp = &vq->vq_descx[idx];
406                 dxp->cookie = (void *)cookie[i];
407                 dxp->ndescs = 1;
408
409                 start_dp[idx].addr =
410                         VIRTIO_MBUF_ADDR(cookie[i], vq) +
411                         RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
412                 start_dp[idx].len =
413                         cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
414                         hw->vtnet_hdr_size;
415                 start_dp[idx].flags = VRING_DESC_F_WRITE;
416                 vq->vq_desc_head_idx = start_dp[idx].next;
417                 vq_update_avail_ring(vq, idx);
418                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
419                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
420                         break;
421                 }
422         }
423
424         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
425
426         return 0;
427 }
428
429 static inline int
430 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
431                                      struct rte_mbuf **cookie, uint16_t num)
432 {
433         struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
434         uint16_t flags = vq->vq_packed.cached_flags;
435         struct virtio_hw *hw = vq->hw;
436         struct vq_desc_extra *dxp;
437         uint16_t idx;
438         int i;
439
440         if (unlikely(vq->vq_free_cnt == 0))
441                 return -ENOSPC;
442         if (unlikely(vq->vq_free_cnt < num))
443                 return -EMSGSIZE;
444
445         for (i = 0; i < num; i++) {
446                 idx = vq->vq_avail_idx;
447                 dxp = &vq->vq_descx[idx];
448                 dxp->cookie = (void *)cookie[i];
449                 dxp->ndescs = 1;
450
451                 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
452                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
453                 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
454                                         + hw->vtnet_hdr_size;
455
456                 vq->vq_desc_head_idx = dxp->next;
457                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
458                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
459                 virtio_wmb(hw->weak_barriers);
460                 start_dp[idx].flags = flags;
461                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
462                         vq->vq_avail_idx -= vq->vq_nentries;
463                         vq->vq_packed.cached_flags ^=
464                                 VRING_PACKED_DESC_F_AVAIL_USED;
465                         flags = vq->vq_packed.cached_flags;
466                 }
467         }
468         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
469         return 0;
470 }
471
472 /* When doing TSO, the IP length is not included in the pseudo header
473  * checksum of the packet given to the PMD, but for virtio it is
474  * expected.
475  */
476 static void
477 virtio_tso_fix_cksum(struct rte_mbuf *m)
478 {
479         /* common case: header is not fragmented */
480         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
481                         m->l4_len)) {
482                 struct rte_ipv4_hdr *iph;
483                 struct rte_ipv6_hdr *ip6h;
484                 struct rte_tcp_hdr *th;
485                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
486                 uint32_t tmp;
487
488                 iph = rte_pktmbuf_mtod_offset(m,
489                                         struct rte_ipv4_hdr *, m->l2_len);
490                 th = RTE_PTR_ADD(iph, m->l3_len);
491                 if ((iph->version_ihl >> 4) == 4) {
492                         iph->hdr_checksum = 0;
493                         iph->hdr_checksum = rte_ipv4_cksum(iph);
494                         ip_len = iph->total_length;
495                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
496                                 m->l3_len);
497                 } else {
498                         ip6h = (struct rte_ipv6_hdr *)iph;
499                         ip_paylen = ip6h->payload_len;
500                 }
501
502                 /* calculate the new phdr checksum not including ip_paylen */
503                 prev_cksum = th->cksum;
504                 tmp = prev_cksum;
505                 tmp += ip_paylen;
506                 tmp = (tmp & 0xffff) + (tmp >> 16);
507                 new_cksum = tmp;
508
509                 /* replace it in the packet */
510                 th->cksum = new_cksum;
511         }
512 }
513
514
515 /* avoid write operation when necessary, to lessen cache issues */
516 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
517         if ((var) != (val))                     \
518                 (var) = (val);                  \
519 } while (0)
520
521 #define virtqueue_clear_net_hdr(_hdr) do {              \
522         ASSIGN_UNLESS_EQUAL((_hdr)->csum_start, 0);     \
523         ASSIGN_UNLESS_EQUAL((_hdr)->csum_offset, 0);    \
524         ASSIGN_UNLESS_EQUAL((_hdr)->flags, 0);          \
525         ASSIGN_UNLESS_EQUAL((_hdr)->gso_type, 0);       \
526         ASSIGN_UNLESS_EQUAL((_hdr)->gso_size, 0);       \
527         ASSIGN_UNLESS_EQUAL((_hdr)->hdr_len, 0);        \
528 } while (0)
529
530 static inline void
531 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
532                         struct rte_mbuf *cookie,
533                         bool offload)
534 {
535         if (offload) {
536                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
537                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
538
539                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
540                 case PKT_TX_UDP_CKSUM:
541                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
542                         hdr->csum_offset = offsetof(struct rte_udp_hdr,
543                                 dgram_cksum);
544                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
545                         break;
546
547                 case PKT_TX_TCP_CKSUM:
548                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
549                         hdr->csum_offset = offsetof(struct rte_tcp_hdr, cksum);
550                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
551                         break;
552
553                 default:
554                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
555                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
556                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
557                         break;
558                 }
559
560                 /* TCP Segmentation Offload */
561                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
562                         virtio_tso_fix_cksum(cookie);
563                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
564                                 VIRTIO_NET_HDR_GSO_TCPV6 :
565                                 VIRTIO_NET_HDR_GSO_TCPV4;
566                         hdr->gso_size = cookie->tso_segsz;
567                         hdr->hdr_len =
568                                 cookie->l2_len +
569                                 cookie->l3_len +
570                                 cookie->l4_len;
571                 } else {
572                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
573                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
574                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
575                 }
576         }
577 }
578
579 static inline void
580 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
581                         struct rte_mbuf **cookies,
582                         uint16_t num)
583 {
584         struct vq_desc_extra *dxp;
585         struct virtqueue *vq = txvq->vq;
586         struct vring_desc *start_dp;
587         struct virtio_net_hdr *hdr;
588         uint16_t idx;
589         uint16_t head_size = vq->hw->vtnet_hdr_size;
590         uint16_t i = 0;
591
592         idx = vq->vq_desc_head_idx;
593         start_dp = vq->vq_split.ring.desc;
594
595         while (i < num) {
596                 idx = idx & (vq->vq_nentries - 1);
597                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
598                 dxp->cookie = (void *)cookies[i];
599                 dxp->ndescs = 1;
600
601                 hdr = (struct virtio_net_hdr *)
602                         rte_pktmbuf_prepend(cookies[i], head_size);
603                 cookies[i]->pkt_len -= head_size;
604
605                 /* if offload disabled, hdr is not zeroed yet, do it now */
606                 if (!vq->hw->has_tx_offload)
607                         virtqueue_clear_net_hdr(hdr);
608                 else
609                         virtqueue_xmit_offload(hdr, cookies[i], true);
610
611                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
612                 start_dp[idx].len   = cookies[i]->data_len;
613                 start_dp[idx].flags = 0;
614
615                 vq_update_avail_ring(vq, idx);
616
617                 idx++;
618                 i++;
619         };
620
621         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
622         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
623 }
624
625 static inline void
626 virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
627                                    struct rte_mbuf *cookie,
628                                    int in_order)
629 {
630         struct virtqueue *vq = txvq->vq;
631         struct vring_packed_desc *dp;
632         struct vq_desc_extra *dxp;
633         uint16_t idx, id, flags;
634         uint16_t head_size = vq->hw->vtnet_hdr_size;
635         struct virtio_net_hdr *hdr;
636
637         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
638         idx = vq->vq_avail_idx;
639         dp = &vq->vq_packed.ring.desc[idx];
640
641         dxp = &vq->vq_descx[id];
642         dxp->ndescs = 1;
643         dxp->cookie = cookie;
644
645         flags = vq->vq_packed.cached_flags;
646
647         /* prepend cannot fail, checked by caller */
648         hdr = (struct virtio_net_hdr *)
649                 rte_pktmbuf_prepend(cookie, head_size);
650         cookie->pkt_len -= head_size;
651
652         /* if offload disabled, hdr is not zeroed yet, do it now */
653         if (!vq->hw->has_tx_offload)
654                 virtqueue_clear_net_hdr(hdr);
655         else
656                 virtqueue_xmit_offload(hdr, cookie, true);
657
658         dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
659         dp->len  = cookie->data_len;
660         dp->id   = id;
661
662         if (++vq->vq_avail_idx >= vq->vq_nentries) {
663                 vq->vq_avail_idx -= vq->vq_nentries;
664                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
665         }
666
667         vq->vq_free_cnt--;
668
669         if (!in_order) {
670                 vq->vq_desc_head_idx = dxp->next;
671                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
672                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
673         }
674
675         virtio_wmb(vq->hw->weak_barriers);
676         dp->flags = flags;
677 }
678
679 static inline void
680 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
681                               uint16_t needed, int can_push, int in_order)
682 {
683         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
684         struct vq_desc_extra *dxp;
685         struct virtqueue *vq = txvq->vq;
686         struct vring_packed_desc *start_dp, *head_dp;
687         uint16_t idx, id, head_idx, head_flags;
688         uint16_t head_size = vq->hw->vtnet_hdr_size;
689         struct virtio_net_hdr *hdr;
690         uint16_t prev;
691
692         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
693
694         dxp = &vq->vq_descx[id];
695         dxp->ndescs = needed;
696         dxp->cookie = cookie;
697
698         head_idx = vq->vq_avail_idx;
699         idx = head_idx;
700         prev = head_idx;
701         start_dp = vq->vq_packed.ring.desc;
702
703         head_dp = &vq->vq_packed.ring.desc[idx];
704         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
705         head_flags |= vq->vq_packed.cached_flags;
706
707         if (can_push) {
708                 /* prepend cannot fail, checked by caller */
709                 hdr = (struct virtio_net_hdr *)
710                         rte_pktmbuf_prepend(cookie, head_size);
711                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
712                  * which is wrong. Below subtract restores correct pkt size.
713                  */
714                 cookie->pkt_len -= head_size;
715
716                 /* if offload disabled, it is not zeroed below, do it now */
717                 if (!vq->hw->has_tx_offload)
718                         virtqueue_clear_net_hdr(hdr);
719         } else {
720                 /* setup first tx ring slot to point to header
721                  * stored in reserved region.
722                  */
723                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
724                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
725                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
726                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
727                 idx++;
728                 if (idx >= vq->vq_nentries) {
729                         idx -= vq->vq_nentries;
730                         vq->vq_packed.cached_flags ^=
731                                 VRING_PACKED_DESC_F_AVAIL_USED;
732                 }
733         }
734
735         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
736
737         do {
738                 uint16_t flags;
739
740                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
741                 start_dp[idx].len  = cookie->data_len;
742                 if (likely(idx != head_idx)) {
743                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
744                         flags |= vq->vq_packed.cached_flags;
745                         start_dp[idx].flags = flags;
746                 }
747                 prev = idx;
748                 idx++;
749                 if (idx >= vq->vq_nentries) {
750                         idx -= vq->vq_nentries;
751                         vq->vq_packed.cached_flags ^=
752                                 VRING_PACKED_DESC_F_AVAIL_USED;
753                 }
754         } while ((cookie = cookie->next) != NULL);
755
756         start_dp[prev].id = id;
757
758         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
759         vq->vq_avail_idx = idx;
760
761         if (!in_order) {
762                 vq->vq_desc_head_idx = dxp->next;
763                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
764                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
765         }
766
767         virtio_wmb(vq->hw->weak_barriers);
768         head_dp->flags = head_flags;
769 }
770
771 static inline void
772 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
773                         uint16_t needed, int use_indirect, int can_push,
774                         int in_order)
775 {
776         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
777         struct vq_desc_extra *dxp;
778         struct virtqueue *vq = txvq->vq;
779         struct vring_desc *start_dp;
780         uint16_t seg_num = cookie->nb_segs;
781         uint16_t head_idx, idx;
782         uint16_t head_size = vq->hw->vtnet_hdr_size;
783         struct virtio_net_hdr *hdr;
784
785         head_idx = vq->vq_desc_head_idx;
786         idx = head_idx;
787         if (in_order)
788                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
789         else
790                 dxp = &vq->vq_descx[idx];
791         dxp->cookie = (void *)cookie;
792         dxp->ndescs = needed;
793
794         start_dp = vq->vq_split.ring.desc;
795
796         if (can_push) {
797                 /* prepend cannot fail, checked by caller */
798                 hdr = (struct virtio_net_hdr *)
799                         rte_pktmbuf_prepend(cookie, head_size);
800                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
801                  * which is wrong. Below subtract restores correct pkt size.
802                  */
803                 cookie->pkt_len -= head_size;
804
805                 /* if offload disabled, it is not zeroed below, do it now */
806                 if (!vq->hw->has_tx_offload)
807                         virtqueue_clear_net_hdr(hdr);
808         } else if (use_indirect) {
809                 /* setup tx ring slot to point to indirect
810                  * descriptor list stored in reserved region.
811                  *
812                  * the first slot in indirect ring is already preset
813                  * to point to the header in reserved region
814                  */
815                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
816                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
817                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
818                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
819                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
820
821                 /* loop below will fill in rest of the indirect elements */
822                 start_dp = txr[idx].tx_indir;
823                 idx = 1;
824         } else {
825                 /* setup first tx ring slot to point to header
826                  * stored in reserved region.
827                  */
828                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
829                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
830                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
831                 start_dp[idx].flags = VRING_DESC_F_NEXT;
832                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
833
834                 idx = start_dp[idx].next;
835         }
836
837         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
838
839         do {
840                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
841                 start_dp[idx].len   = cookie->data_len;
842                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
843                 idx = start_dp[idx].next;
844         } while ((cookie = cookie->next) != NULL);
845
846         if (use_indirect)
847                 idx = vq->vq_split.ring.desc[head_idx].next;
848
849         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
850
851         vq->vq_desc_head_idx = idx;
852         vq_update_avail_ring(vq, head_idx);
853
854         if (!in_order) {
855                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
856                         vq->vq_desc_tail_idx = idx;
857         }
858 }
859
860 void
861 virtio_dev_cq_start(struct rte_eth_dev *dev)
862 {
863         struct virtio_hw *hw = dev->data->dev_private;
864
865         if (hw->cvq && hw->cvq->vq) {
866                 rte_spinlock_init(&hw->cvq->lock);
867                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
868         }
869 }
870
871 int
872 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
873                         uint16_t queue_idx,
874                         uint16_t nb_desc,
875                         unsigned int socket_id __rte_unused,
876                         const struct rte_eth_rxconf *rx_conf __rte_unused,
877                         struct rte_mempool *mp)
878 {
879         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
880         struct virtio_hw *hw = dev->data->dev_private;
881         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
882         struct virtnet_rx *rxvq;
883
884         PMD_INIT_FUNC_TRACE();
885
886         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
887                 nb_desc = vq->vq_nentries;
888         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
889
890         rxvq = &vq->rxq;
891         rxvq->queue_id = queue_idx;
892         rxvq->mpool = mp;
893         dev->data->rx_queues[queue_idx] = rxvq;
894
895         return 0;
896 }
897
898 int
899 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
900 {
901         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
902         struct virtio_hw *hw = dev->data->dev_private;
903         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
904         struct virtnet_rx *rxvq = &vq->rxq;
905         struct rte_mbuf *m;
906         uint16_t desc_idx;
907         int error, nbufs, i;
908
909         PMD_INIT_FUNC_TRACE();
910
911         /* Allocate blank mbufs for the each rx descriptor */
912         nbufs = 0;
913
914         if (hw->use_simple_rx) {
915                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
916                      desc_idx++) {
917                         vq->vq_split.ring.avail->ring[desc_idx] = desc_idx;
918                         vq->vq_split.ring.desc[desc_idx].flags =
919                                 VRING_DESC_F_WRITE;
920                 }
921
922                 virtio_rxq_vec_setup(rxvq);
923         }
924
925         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
926         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
927              desc_idx++) {
928                 vq->sw_ring[vq->vq_nentries + desc_idx] =
929                         &rxvq->fake_mbuf;
930         }
931
932         if (hw->use_simple_rx) {
933                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
934                         virtio_rxq_rearm_vec(rxvq);
935                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
936                 }
937         } else if (hw->use_inorder_rx) {
938                 if ((!virtqueue_full(vq))) {
939                         uint16_t free_cnt = vq->vq_free_cnt;
940                         struct rte_mbuf *pkts[free_cnt];
941
942                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
943                                 free_cnt)) {
944                                 error = virtqueue_enqueue_refill_inorder(vq,
945                                                 pkts,
946                                                 free_cnt);
947                                 if (unlikely(error)) {
948                                         for (i = 0; i < free_cnt; i++)
949                                                 rte_pktmbuf_free(pkts[i]);
950                                 }
951                         }
952
953                         nbufs += free_cnt;
954                         vq_update_avail_idx(vq);
955                 }
956         } else {
957                 while (!virtqueue_full(vq)) {
958                         m = rte_mbuf_raw_alloc(rxvq->mpool);
959                         if (m == NULL)
960                                 break;
961
962                         /* Enqueue allocated buffers */
963                         if (vtpci_packed_queue(vq->hw))
964                                 error = virtqueue_enqueue_recv_refill_packed(vq,
965                                                 &m, 1);
966                         else
967                                 error = virtqueue_enqueue_recv_refill(vq,
968                                                 &m, 1);
969                         if (error) {
970                                 rte_pktmbuf_free(m);
971                                 break;
972                         }
973                         nbufs++;
974                 }
975
976                 if (!vtpci_packed_queue(vq->hw))
977                         vq_update_avail_idx(vq);
978         }
979
980         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
981
982         VIRTQUEUE_DUMP(vq);
983
984         return 0;
985 }
986
987 /*
988  * struct rte_eth_dev *dev: Used to update dev
989  * uint16_t nb_desc: Defaults to values read from config space
990  * unsigned int socket_id: Used to allocate memzone
991  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
992  * uint16_t queue_idx: Just used as an index in dev txq list
993  */
994 int
995 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
996                         uint16_t queue_idx,
997                         uint16_t nb_desc,
998                         unsigned int socket_id __rte_unused,
999                         const struct rte_eth_txconf *tx_conf)
1000 {
1001         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1002         struct virtio_hw *hw = dev->data->dev_private;
1003         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1004         struct virtnet_tx *txvq;
1005         uint16_t tx_free_thresh;
1006
1007         PMD_INIT_FUNC_TRACE();
1008
1009         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
1010                 nb_desc = vq->vq_nentries;
1011         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
1012
1013         txvq = &vq->txq;
1014         txvq->queue_id = queue_idx;
1015
1016         tx_free_thresh = tx_conf->tx_free_thresh;
1017         if (tx_free_thresh == 0)
1018                 tx_free_thresh =
1019                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
1020
1021         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
1022                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
1023                         "number of TX entries minus 3 (%u)."
1024                         " (tx_free_thresh=%u port=%u queue=%u)\n",
1025                         vq->vq_nentries - 3,
1026                         tx_free_thresh, dev->data->port_id, queue_idx);
1027                 return -EINVAL;
1028         }
1029
1030         vq->vq_free_thresh = tx_free_thresh;
1031
1032         dev->data->tx_queues[queue_idx] = txvq;
1033         return 0;
1034 }
1035
1036 int
1037 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
1038                                 uint16_t queue_idx)
1039 {
1040         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1041         struct virtio_hw *hw = dev->data->dev_private;
1042         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1043
1044         PMD_INIT_FUNC_TRACE();
1045
1046         if (!vtpci_packed_queue(hw)) {
1047                 if (hw->use_inorder_tx)
1048                         vq->vq_split.ring.desc[vq->vq_nentries - 1].next = 0;
1049         }
1050
1051         VIRTQUEUE_DUMP(vq);
1052
1053         return 0;
1054 }
1055
1056 static inline void
1057 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1058 {
1059         int error;
1060         /*
1061          * Requeue the discarded mbuf. This should always be
1062          * successful since it was just dequeued.
1063          */
1064         if (vtpci_packed_queue(vq->hw))
1065                 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1066         else
1067                 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1068
1069         if (unlikely(error)) {
1070                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1071                 rte_pktmbuf_free(m);
1072         }
1073 }
1074
1075 static inline void
1076 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1077 {
1078         int error;
1079
1080         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1081         if (unlikely(error)) {
1082                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1083                 rte_pktmbuf_free(m);
1084         }
1085 }
1086
1087 static inline void
1088 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
1089 {
1090         uint32_t s = mbuf->pkt_len;
1091         struct rte_ether_addr *ea;
1092
1093         stats->bytes += s;
1094
1095         if (s == 64) {
1096                 stats->size_bins[1]++;
1097         } else if (s > 64 && s < 1024) {
1098                 uint32_t bin;
1099
1100                 /* count zeros, and offset into correct bin */
1101                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
1102                 stats->size_bins[bin]++;
1103         } else {
1104                 if (s < 64)
1105                         stats->size_bins[0]++;
1106                 else if (s < 1519)
1107                         stats->size_bins[6]++;
1108                 else
1109                         stats->size_bins[7]++;
1110         }
1111
1112         ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
1113         if (rte_is_multicast_ether_addr(ea)) {
1114                 if (rte_is_broadcast_ether_addr(ea))
1115                         stats->broadcast++;
1116                 else
1117                         stats->multicast++;
1118         }
1119 }
1120
1121 static inline void
1122 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
1123 {
1124         VIRTIO_DUMP_PACKET(m, m->data_len);
1125
1126         virtio_update_packet_stats(&rxvq->stats, m);
1127 }
1128
1129 /* Optionally fill offload information in structure */
1130 static inline int
1131 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1132 {
1133         struct rte_net_hdr_lens hdr_lens;
1134         uint32_t hdrlen, ptype;
1135         int l4_supported = 0;
1136
1137         /* nothing to do */
1138         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1139                 return 0;
1140
1141         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1142
1143         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1144         m->packet_type = ptype;
1145         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1146             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1147             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1148                 l4_supported = 1;
1149
1150         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1151                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1152                 if (hdr->csum_start <= hdrlen && l4_supported) {
1153                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1154                 } else {
1155                         /* Unknown proto or tunnel, do sw cksum. We can assume
1156                          * the cksum field is in the first segment since the
1157                          * buffers we provided to the host are large enough.
1158                          * In case of SCTP, this will be wrong since it's a CRC
1159                          * but there's nothing we can do.
1160                          */
1161                         uint16_t csum = 0, off;
1162
1163                         rte_raw_cksum_mbuf(m, hdr->csum_start,
1164                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1165                                 &csum);
1166                         if (likely(csum != 0xffff))
1167                                 csum = ~csum;
1168                         off = hdr->csum_offset + hdr->csum_start;
1169                         if (rte_pktmbuf_data_len(m) >= off + 1)
1170                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1171                                         off) = csum;
1172                 }
1173         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1174                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1175         }
1176
1177         /* GSO request, save required information in mbuf */
1178         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1179                 /* Check unsupported modes */
1180                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1181                     (hdr->gso_size == 0)) {
1182                         return -EINVAL;
1183                 }
1184
1185                 /* Update mss lengthes in mbuf */
1186                 m->tso_segsz = hdr->gso_size;
1187                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1188                         case VIRTIO_NET_HDR_GSO_TCPV4:
1189                         case VIRTIO_NET_HDR_GSO_TCPV6:
1190                                 m->ol_flags |= PKT_RX_LRO | \
1191                                         PKT_RX_L4_CKSUM_NONE;
1192                                 break;
1193                         default:
1194                                 return -EINVAL;
1195                 }
1196         }
1197
1198         return 0;
1199 }
1200
1201 #define VIRTIO_MBUF_BURST_SZ 64
1202 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1203 uint16_t
1204 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1205 {
1206         struct virtnet_rx *rxvq = rx_queue;
1207         struct virtqueue *vq = rxvq->vq;
1208         struct virtio_hw *hw = vq->hw;
1209         struct rte_mbuf *rxm;
1210         uint16_t nb_used, num, nb_rx;
1211         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1212         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1213         int error;
1214         uint32_t i, nb_enqueued;
1215         uint32_t hdr_size;
1216         struct virtio_net_hdr *hdr;
1217
1218         nb_rx = 0;
1219         if (unlikely(hw->started == 0))
1220                 return nb_rx;
1221
1222         nb_used = VIRTQUEUE_NUSED(vq);
1223
1224         virtio_rmb(hw->weak_barriers);
1225
1226         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1227         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1228                 num = VIRTIO_MBUF_BURST_SZ;
1229         if (likely(num > DESC_PER_CACHELINE))
1230                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1231
1232         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1233         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1234
1235         nb_enqueued = 0;
1236         hdr_size = hw->vtnet_hdr_size;
1237
1238         for (i = 0; i < num ; i++) {
1239                 rxm = rcv_pkts[i];
1240
1241                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1242
1243                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1244                         PMD_RX_LOG(ERR, "Packet drop");
1245                         nb_enqueued++;
1246                         virtio_discard_rxbuf(vq, rxm);
1247                         rxvq->stats.errors++;
1248                         continue;
1249                 }
1250
1251                 rxm->port = rxvq->port_id;
1252                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1253                 rxm->ol_flags = 0;
1254                 rxm->vlan_tci = 0;
1255
1256                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1257                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1258
1259                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1260                         RTE_PKTMBUF_HEADROOM - hdr_size);
1261
1262                 if (hw->vlan_strip)
1263                         rte_vlan_strip(rxm);
1264
1265                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1266                         virtio_discard_rxbuf(vq, rxm);
1267                         rxvq->stats.errors++;
1268                         continue;
1269                 }
1270
1271                 virtio_rx_stats_updated(rxvq, rxm);
1272
1273                 rx_pkts[nb_rx++] = rxm;
1274         }
1275
1276         rxvq->stats.packets += nb_rx;
1277
1278         /* Allocate new mbuf for the used descriptor */
1279         if (likely(!virtqueue_full(vq))) {
1280                 uint16_t free_cnt = vq->vq_free_cnt;
1281                 struct rte_mbuf *new_pkts[free_cnt];
1282
1283                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1284                                                 free_cnt) == 0)) {
1285                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1286                                         free_cnt);
1287                         if (unlikely(error)) {
1288                                 for (i = 0; i < free_cnt; i++)
1289                                         rte_pktmbuf_free(new_pkts[i]);
1290                         }
1291                         nb_enqueued += free_cnt;
1292                 } else {
1293                         struct rte_eth_dev *dev =
1294                                 &rte_eth_devices[rxvq->port_id];
1295                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1296                 }
1297         }
1298
1299         if (likely(nb_enqueued)) {
1300                 vq_update_avail_idx(vq);
1301
1302                 if (unlikely(virtqueue_kick_prepare(vq))) {
1303                         virtqueue_notify(vq);
1304                         PMD_RX_LOG(DEBUG, "Notified");
1305                 }
1306         }
1307
1308         return nb_rx;
1309 }
1310
1311 uint16_t
1312 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1313                         uint16_t nb_pkts)
1314 {
1315         struct virtnet_rx *rxvq = rx_queue;
1316         struct virtqueue *vq = rxvq->vq;
1317         struct virtio_hw *hw = vq->hw;
1318         struct rte_mbuf *rxm;
1319         uint16_t num, nb_rx;
1320         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1321         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1322         int error;
1323         uint32_t i, nb_enqueued;
1324         uint32_t hdr_size;
1325         struct virtio_net_hdr *hdr;
1326
1327         nb_rx = 0;
1328         if (unlikely(hw->started == 0))
1329                 return nb_rx;
1330
1331         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1332         if (likely(num > DESC_PER_CACHELINE))
1333                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1334
1335         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1336         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1337
1338         nb_enqueued = 0;
1339         hdr_size = hw->vtnet_hdr_size;
1340
1341         for (i = 0; i < num; i++) {
1342                 rxm = rcv_pkts[i];
1343
1344                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1345
1346                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1347                         PMD_RX_LOG(ERR, "Packet drop");
1348                         nb_enqueued++;
1349                         virtio_discard_rxbuf(vq, rxm);
1350                         rxvq->stats.errors++;
1351                         continue;
1352                 }
1353
1354                 rxm->port = rxvq->port_id;
1355                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1356                 rxm->ol_flags = 0;
1357                 rxm->vlan_tci = 0;
1358
1359                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1360                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1361
1362                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1363                         RTE_PKTMBUF_HEADROOM - hdr_size);
1364
1365                 if (hw->vlan_strip)
1366                         rte_vlan_strip(rxm);
1367
1368                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1369                         virtio_discard_rxbuf(vq, rxm);
1370                         rxvq->stats.errors++;
1371                         continue;
1372                 }
1373
1374                 virtio_rx_stats_updated(rxvq, rxm);
1375
1376                 rx_pkts[nb_rx++] = rxm;
1377         }
1378
1379         rxvq->stats.packets += nb_rx;
1380
1381         /* Allocate new mbuf for the used descriptor */
1382         if (likely(!virtqueue_full(vq))) {
1383                 uint16_t free_cnt = vq->vq_free_cnt;
1384                 struct rte_mbuf *new_pkts[free_cnt];
1385
1386                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1387                                                 free_cnt) == 0)) {
1388                         error = virtqueue_enqueue_recv_refill_packed(vq,
1389                                         new_pkts, free_cnt);
1390                         if (unlikely(error)) {
1391                                 for (i = 0; i < free_cnt; i++)
1392                                         rte_pktmbuf_free(new_pkts[i]);
1393                         }
1394                         nb_enqueued += free_cnt;
1395                 } else {
1396                         struct rte_eth_dev *dev =
1397                                 &rte_eth_devices[rxvq->port_id];
1398                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1399                 }
1400         }
1401
1402         if (likely(nb_enqueued)) {
1403                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1404                         virtqueue_notify(vq);
1405                         PMD_RX_LOG(DEBUG, "Notified");
1406                 }
1407         }
1408
1409         return nb_rx;
1410 }
1411
1412
1413 uint16_t
1414 virtio_recv_pkts_inorder(void *rx_queue,
1415                         struct rte_mbuf **rx_pkts,
1416                         uint16_t nb_pkts)
1417 {
1418         struct virtnet_rx *rxvq = rx_queue;
1419         struct virtqueue *vq = rxvq->vq;
1420         struct virtio_hw *hw = vq->hw;
1421         struct rte_mbuf *rxm;
1422         struct rte_mbuf *prev = NULL;
1423         uint16_t nb_used, num, nb_rx;
1424         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1425         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1426         int error;
1427         uint32_t nb_enqueued;
1428         uint32_t seg_num;
1429         uint32_t seg_res;
1430         uint32_t hdr_size;
1431         int32_t i;
1432
1433         nb_rx = 0;
1434         if (unlikely(hw->started == 0))
1435                 return nb_rx;
1436
1437         nb_used = VIRTQUEUE_NUSED(vq);
1438         nb_used = RTE_MIN(nb_used, nb_pkts);
1439         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1440
1441         virtio_rmb(hw->weak_barriers);
1442
1443         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1444
1445         nb_enqueued = 0;
1446         seg_num = 1;
1447         seg_res = 0;
1448         hdr_size = hw->vtnet_hdr_size;
1449
1450         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1451
1452         for (i = 0; i < num; i++) {
1453                 struct virtio_net_hdr_mrg_rxbuf *header;
1454
1455                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1456                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1457
1458                 rxm = rcv_pkts[i];
1459
1460                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1461                         PMD_RX_LOG(ERR, "Packet drop");
1462                         nb_enqueued++;
1463                         virtio_discard_rxbuf_inorder(vq, rxm);
1464                         rxvq->stats.errors++;
1465                         continue;
1466                 }
1467
1468                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1469                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1470                          - hdr_size);
1471
1472                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1473                         seg_num = header->num_buffers;
1474                         if (seg_num == 0)
1475                                 seg_num = 1;
1476                 } else {
1477                         seg_num = 1;
1478                 }
1479
1480                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1481                 rxm->nb_segs = seg_num;
1482                 rxm->ol_flags = 0;
1483                 rxm->vlan_tci = 0;
1484                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1485                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1486
1487                 rxm->port = rxvq->port_id;
1488
1489                 rx_pkts[nb_rx] = rxm;
1490                 prev = rxm;
1491
1492                 if (vq->hw->has_rx_offload &&
1493                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1494                         virtio_discard_rxbuf_inorder(vq, rxm);
1495                         rxvq->stats.errors++;
1496                         continue;
1497                 }
1498
1499                 if (hw->vlan_strip)
1500                         rte_vlan_strip(rx_pkts[nb_rx]);
1501
1502                 seg_res = seg_num - 1;
1503
1504                 /* Merge remaining segments */
1505                 while (seg_res != 0 && i < (num - 1)) {
1506                         i++;
1507
1508                         rxm = rcv_pkts[i];
1509                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1510                         rxm->pkt_len = (uint32_t)(len[i]);
1511                         rxm->data_len = (uint16_t)(len[i]);
1512
1513                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1514
1515                         if (prev)
1516                                 prev->next = rxm;
1517
1518                         prev = rxm;
1519                         seg_res -= 1;
1520                 }
1521
1522                 if (!seg_res) {
1523                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1524                         nb_rx++;
1525                 }
1526         }
1527
1528         /* Last packet still need merge segments */
1529         while (seg_res != 0) {
1530                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1531                                         VIRTIO_MBUF_BURST_SZ);
1532
1533                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1534                         virtio_rmb(hw->weak_barriers);
1535                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1536                                                            rcv_cnt);
1537                         uint16_t extra_idx = 0;
1538
1539                         rcv_cnt = num;
1540                         while (extra_idx < rcv_cnt) {
1541                                 rxm = rcv_pkts[extra_idx];
1542                                 rxm->data_off =
1543                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1544                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1545                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1546                                 prev->next = rxm;
1547                                 prev = rxm;
1548                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1549                                 extra_idx += 1;
1550                         };
1551                         seg_res -= rcv_cnt;
1552
1553                         if (!seg_res) {
1554                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1555                                 nb_rx++;
1556                         }
1557                 } else {
1558                         PMD_RX_LOG(ERR,
1559                                         "No enough segments for packet.");
1560                         virtio_discard_rxbuf_inorder(vq, prev);
1561                         rxvq->stats.errors++;
1562                         break;
1563                 }
1564         }
1565
1566         rxvq->stats.packets += nb_rx;
1567
1568         /* Allocate new mbuf for the used descriptor */
1569
1570         if (likely(!virtqueue_full(vq))) {
1571                 /* free_cnt may include mrg descs */
1572                 uint16_t free_cnt = vq->vq_free_cnt;
1573                 struct rte_mbuf *new_pkts[free_cnt];
1574
1575                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1576                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1577                                         free_cnt);
1578                         if (unlikely(error)) {
1579                                 for (i = 0; i < free_cnt; i++)
1580                                         rte_pktmbuf_free(new_pkts[i]);
1581                         }
1582                         nb_enqueued += free_cnt;
1583                 } else {
1584                         struct rte_eth_dev *dev =
1585                                 &rte_eth_devices[rxvq->port_id];
1586                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1587                 }
1588         }
1589
1590         if (likely(nb_enqueued)) {
1591                 vq_update_avail_idx(vq);
1592
1593                 if (unlikely(virtqueue_kick_prepare(vq))) {
1594                         virtqueue_notify(vq);
1595                         PMD_RX_LOG(DEBUG, "Notified");
1596                 }
1597         }
1598
1599         return nb_rx;
1600 }
1601
1602 uint16_t
1603 virtio_recv_mergeable_pkts(void *rx_queue,
1604                         struct rte_mbuf **rx_pkts,
1605                         uint16_t nb_pkts)
1606 {
1607         struct virtnet_rx *rxvq = rx_queue;
1608         struct virtqueue *vq = rxvq->vq;
1609         struct virtio_hw *hw = vq->hw;
1610         struct rte_mbuf *rxm;
1611         struct rte_mbuf *prev = NULL;
1612         uint16_t nb_used, num, nb_rx = 0;
1613         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1614         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1615         int error;
1616         uint32_t nb_enqueued = 0;
1617         uint32_t seg_num = 0;
1618         uint32_t seg_res = 0;
1619         uint32_t hdr_size = hw->vtnet_hdr_size;
1620         int32_t i;
1621
1622         if (unlikely(hw->started == 0))
1623                 return nb_rx;
1624
1625         nb_used = VIRTQUEUE_NUSED(vq);
1626
1627         virtio_rmb(hw->weak_barriers);
1628
1629         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1630
1631         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1632         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1633                 num = VIRTIO_MBUF_BURST_SZ;
1634         if (likely(num > DESC_PER_CACHELINE))
1635                 num = num - ((vq->vq_used_cons_idx + num) %
1636                                 DESC_PER_CACHELINE);
1637
1638
1639         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1640
1641         for (i = 0; i < num; i++) {
1642                 struct virtio_net_hdr_mrg_rxbuf *header;
1643
1644                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1645                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1646
1647                 rxm = rcv_pkts[i];
1648
1649                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1650                         PMD_RX_LOG(ERR, "Packet drop");
1651                         nb_enqueued++;
1652                         virtio_discard_rxbuf(vq, rxm);
1653                         rxvq->stats.errors++;
1654                         continue;
1655                 }
1656
1657                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1658                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1659                          - hdr_size);
1660                 seg_num = header->num_buffers;
1661                 if (seg_num == 0)
1662                         seg_num = 1;
1663
1664                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1665                 rxm->nb_segs = seg_num;
1666                 rxm->ol_flags = 0;
1667                 rxm->vlan_tci = 0;
1668                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1669                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1670
1671                 rxm->port = rxvq->port_id;
1672
1673                 rx_pkts[nb_rx] = rxm;
1674                 prev = rxm;
1675
1676                 if (hw->has_rx_offload &&
1677                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1678                         virtio_discard_rxbuf(vq, rxm);
1679                         rxvq->stats.errors++;
1680                         continue;
1681                 }
1682
1683                 if (hw->vlan_strip)
1684                         rte_vlan_strip(rx_pkts[nb_rx]);
1685
1686                 seg_res = seg_num - 1;
1687
1688                 /* Merge remaining segments */
1689                 while (seg_res != 0 && i < (num - 1)) {
1690                         i++;
1691
1692                         rxm = rcv_pkts[i];
1693                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1694                         rxm->pkt_len = (uint32_t)(len[i]);
1695                         rxm->data_len = (uint16_t)(len[i]);
1696
1697                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1698
1699                         if (prev)
1700                                 prev->next = rxm;
1701
1702                         prev = rxm;
1703                         seg_res -= 1;
1704                 }
1705
1706                 if (!seg_res) {
1707                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1708                         nb_rx++;
1709                 }
1710         }
1711
1712         /* Last packet still need merge segments */
1713         while (seg_res != 0) {
1714                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1715                                         VIRTIO_MBUF_BURST_SZ);
1716
1717                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1718                         virtio_rmb(hw->weak_barriers);
1719                         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1720                                                            rcv_cnt);
1721                         uint16_t extra_idx = 0;
1722
1723                         rcv_cnt = num;
1724                         while (extra_idx < rcv_cnt) {
1725                                 rxm = rcv_pkts[extra_idx];
1726                                 rxm->data_off =
1727                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1728                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1729                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1730                                 prev->next = rxm;
1731                                 prev = rxm;
1732                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1733                                 extra_idx += 1;
1734                         };
1735                         seg_res -= rcv_cnt;
1736
1737                         if (!seg_res) {
1738                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1739                                 nb_rx++;
1740                         }
1741                 } else {
1742                         PMD_RX_LOG(ERR,
1743                                         "No enough segments for packet.");
1744                         virtio_discard_rxbuf(vq, prev);
1745                         rxvq->stats.errors++;
1746                         break;
1747                 }
1748         }
1749
1750         rxvq->stats.packets += nb_rx;
1751
1752         /* Allocate new mbuf for the used descriptor */
1753         if (likely(!virtqueue_full(vq))) {
1754                 /* free_cnt may include mrg descs */
1755                 uint16_t free_cnt = vq->vq_free_cnt;
1756                 struct rte_mbuf *new_pkts[free_cnt];
1757
1758                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1759                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1760                                         free_cnt);
1761                         if (unlikely(error)) {
1762                                 for (i = 0; i < free_cnt; i++)
1763                                         rte_pktmbuf_free(new_pkts[i]);
1764                         }
1765                         nb_enqueued += free_cnt;
1766                 } else {
1767                         struct rte_eth_dev *dev =
1768                                 &rte_eth_devices[rxvq->port_id];
1769                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1770                 }
1771         }
1772
1773         if (likely(nb_enqueued)) {
1774                 vq_update_avail_idx(vq);
1775
1776                 if (unlikely(virtqueue_kick_prepare(vq))) {
1777                         virtqueue_notify(vq);
1778                         PMD_RX_LOG(DEBUG, "Notified");
1779                 }
1780         }
1781
1782         return nb_rx;
1783 }
1784
1785 uint16_t
1786 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1787                         struct rte_mbuf **rx_pkts,
1788                         uint16_t nb_pkts)
1789 {
1790         struct virtnet_rx *rxvq = rx_queue;
1791         struct virtqueue *vq = rxvq->vq;
1792         struct virtio_hw *hw = vq->hw;
1793         struct rte_mbuf *rxm;
1794         struct rte_mbuf *prev = NULL;
1795         uint16_t num, nb_rx = 0;
1796         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1797         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1798         uint32_t nb_enqueued = 0;
1799         uint32_t seg_num = 0;
1800         uint32_t seg_res = 0;
1801         uint32_t hdr_size = hw->vtnet_hdr_size;
1802         int32_t i;
1803         int error;
1804
1805         if (unlikely(hw->started == 0))
1806                 return nb_rx;
1807
1808
1809         num = nb_pkts;
1810         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1811                 num = VIRTIO_MBUF_BURST_SZ;
1812         if (likely(num > DESC_PER_CACHELINE))
1813                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1814
1815         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1816
1817         for (i = 0; i < num; i++) {
1818                 struct virtio_net_hdr_mrg_rxbuf *header;
1819
1820                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1821                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1822
1823                 rxm = rcv_pkts[i];
1824
1825                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1826                         PMD_RX_LOG(ERR, "Packet drop");
1827                         nb_enqueued++;
1828                         virtio_discard_rxbuf(vq, rxm);
1829                         rxvq->stats.errors++;
1830                         continue;
1831                 }
1832
1833                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1834                           rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1835                 seg_num = header->num_buffers;
1836
1837                 if (seg_num == 0)
1838                         seg_num = 1;
1839
1840                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1841                 rxm->nb_segs = seg_num;
1842                 rxm->ol_flags = 0;
1843                 rxm->vlan_tci = 0;
1844                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1845                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1846
1847                 rxm->port = rxvq->port_id;
1848                 rx_pkts[nb_rx] = rxm;
1849                 prev = rxm;
1850
1851                 if (hw->has_rx_offload &&
1852                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1853                         virtio_discard_rxbuf(vq, rxm);
1854                         rxvq->stats.errors++;
1855                         continue;
1856                 }
1857
1858                 if (hw->vlan_strip)
1859                         rte_vlan_strip(rx_pkts[nb_rx]);
1860
1861                 seg_res = seg_num - 1;
1862
1863                 /* Merge remaining segments */
1864                 while (seg_res != 0 && i < (num - 1)) {
1865                         i++;
1866
1867                         rxm = rcv_pkts[i];
1868                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1869                         rxm->pkt_len = (uint32_t)(len[i]);
1870                         rxm->data_len = (uint16_t)(len[i]);
1871
1872                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1873                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1874
1875                         if (prev)
1876                                 prev->next = rxm;
1877
1878                         prev = rxm;
1879                         seg_res -= 1;
1880                 }
1881
1882                 if (!seg_res) {
1883                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1884                         nb_rx++;
1885                 }
1886         }
1887
1888         /* Last packet still need merge segments */
1889         while (seg_res != 0) {
1890                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1891                                         VIRTIO_MBUF_BURST_SZ);
1892                 if (likely(vq->vq_free_cnt >= rcv_cnt)) {
1893                         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1894                                         len, rcv_cnt);
1895                         uint16_t extra_idx = 0;
1896
1897                         rcv_cnt = num;
1898
1899                         while (extra_idx < rcv_cnt) {
1900                                 rxm = rcv_pkts[extra_idx];
1901
1902                                 rxm->data_off =
1903                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1904                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1905                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1906
1907                                 prev->next = rxm;
1908                                 prev = rxm;
1909                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1910                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1911                                 extra_idx += 1;
1912                         }
1913                         seg_res -= rcv_cnt;
1914                         if (!seg_res) {
1915                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1916                                 nb_rx++;
1917                         }
1918                 } else {
1919                         PMD_RX_LOG(ERR,
1920                                         "No enough segments for packet.");
1921                         if (prev)
1922                                 virtio_discard_rxbuf(vq, prev);
1923                         rxvq->stats.errors++;
1924                         break;
1925                 }
1926         }
1927
1928         rxvq->stats.packets += nb_rx;
1929
1930         /* Allocate new mbuf for the used descriptor */
1931         if (likely(!virtqueue_full(vq))) {
1932                 /* free_cnt may include mrg descs */
1933                 uint16_t free_cnt = vq->vq_free_cnt;
1934                 struct rte_mbuf *new_pkts[free_cnt];
1935
1936                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1937                         error = virtqueue_enqueue_recv_refill_packed(vq,
1938                                         new_pkts, free_cnt);
1939                         if (unlikely(error)) {
1940                                 for (i = 0; i < free_cnt; i++)
1941                                         rte_pktmbuf_free(new_pkts[i]);
1942                         }
1943                         nb_enqueued += free_cnt;
1944                 } else {
1945                         struct rte_eth_dev *dev =
1946                                 &rte_eth_devices[rxvq->port_id];
1947                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1948                 }
1949         }
1950
1951         if (likely(nb_enqueued)) {
1952                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1953                         virtqueue_notify(vq);
1954                         PMD_RX_LOG(DEBUG, "Notified");
1955                 }
1956         }
1957
1958         return nb_rx;
1959 }
1960
1961 uint16_t
1962 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1963                         uint16_t nb_pkts)
1964 {
1965         struct virtnet_tx *txvq = tx_queue;
1966         struct virtqueue *vq = txvq->vq;
1967         struct virtio_hw *hw = vq->hw;
1968         uint16_t hdr_size = hw->vtnet_hdr_size;
1969         uint16_t nb_tx = 0;
1970         bool in_order = hw->use_inorder_tx;
1971         int error;
1972
1973         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1974                 return nb_tx;
1975
1976         if (unlikely(nb_pkts < 1))
1977                 return nb_pkts;
1978
1979         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1980
1981         if (nb_pkts > vq->vq_free_cnt)
1982                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
1983                                            in_order);
1984
1985         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1986                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1987                 int can_push = 0, slots, need;
1988
1989                 /* Do VLAN tag insertion */
1990                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1991                         error = rte_vlan_insert(&txm);
1992                         if (unlikely(error)) {
1993                                 rte_pktmbuf_free(txm);
1994                                 continue;
1995                         }
1996                         /* vlan_insert may add a header mbuf */
1997                         tx_pkts[nb_tx] = txm;
1998                 }
1999
2000                 /* optimize ring usage */
2001                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2002                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2003                     rte_mbuf_refcnt_read(txm) == 1 &&
2004                     RTE_MBUF_DIRECT(txm) &&
2005                     txm->nb_segs == 1 &&
2006                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2007                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2008                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2009                         can_push = 1;
2010
2011                 /* How many main ring entries are needed to this Tx?
2012                  * any_layout => number of segments
2013                  * default    => number of segments + 1
2014                  */
2015                 slots = txm->nb_segs + !can_push;
2016                 need = slots - vq->vq_free_cnt;
2017
2018                 /* Positive value indicates it need free vring descriptors */
2019                 if (unlikely(need > 0)) {
2020                         virtio_xmit_cleanup_packed(vq, need, in_order);
2021                         need = slots - vq->vq_free_cnt;
2022                         if (unlikely(need > 0)) {
2023                                 PMD_TX_LOG(ERR,
2024                                            "No free tx descriptors to transmit");
2025                                 break;
2026                         }
2027                 }
2028
2029                 /* Enqueue Packet buffers */
2030                 if (can_push)
2031                         virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
2032                 else
2033                         virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
2034                                                       in_order);
2035
2036                 virtio_update_packet_stats(&txvq->stats, txm);
2037         }
2038
2039         txvq->stats.packets += nb_tx;
2040
2041         if (likely(nb_tx)) {
2042                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
2043                         virtqueue_notify(vq);
2044                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2045                 }
2046         }
2047
2048         return nb_tx;
2049 }
2050
2051 uint16_t
2052 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2053 {
2054         struct virtnet_tx *txvq = tx_queue;
2055         struct virtqueue *vq = txvq->vq;
2056         struct virtio_hw *hw = vq->hw;
2057         uint16_t hdr_size = hw->vtnet_hdr_size;
2058         uint16_t nb_used, nb_tx = 0;
2059         int error;
2060
2061         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2062                 return nb_tx;
2063
2064         if (unlikely(nb_pkts < 1))
2065                 return nb_pkts;
2066
2067         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2068         nb_used = VIRTQUEUE_NUSED(vq);
2069
2070         virtio_rmb(hw->weak_barriers);
2071         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2072                 virtio_xmit_cleanup(vq, nb_used);
2073
2074         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2075                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2076                 int can_push = 0, use_indirect = 0, slots, need;
2077
2078                 /* Do VLAN tag insertion */
2079                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2080                         error = rte_vlan_insert(&txm);
2081                         if (unlikely(error)) {
2082                                 rte_pktmbuf_free(txm);
2083                                 continue;
2084                         }
2085                         /* vlan_insert may add a header mbuf */
2086                         tx_pkts[nb_tx] = txm;
2087                 }
2088
2089                 /* optimize ring usage */
2090                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2091                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2092                     rte_mbuf_refcnt_read(txm) == 1 &&
2093                     RTE_MBUF_DIRECT(txm) &&
2094                     txm->nb_segs == 1 &&
2095                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2096                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2097                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2098                         can_push = 1;
2099                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2100                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2101                         use_indirect = 1;
2102
2103                 /* How many main ring entries are needed to this Tx?
2104                  * any_layout => number of segments
2105                  * indirect   => 1
2106                  * default    => number of segments + 1
2107                  */
2108                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2109                 need = slots - vq->vq_free_cnt;
2110
2111                 /* Positive value indicates it need free vring descriptors */
2112                 if (unlikely(need > 0)) {
2113                         nb_used = VIRTQUEUE_NUSED(vq);
2114                         virtio_rmb(hw->weak_barriers);
2115                         need = RTE_MIN(need, (int)nb_used);
2116
2117                         virtio_xmit_cleanup(vq, need);
2118                         need = slots - vq->vq_free_cnt;
2119                         if (unlikely(need > 0)) {
2120                                 PMD_TX_LOG(ERR,
2121                                            "No free tx descriptors to transmit");
2122                                 break;
2123                         }
2124                 }
2125
2126                 /* Enqueue Packet buffers */
2127                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2128                         can_push, 0);
2129
2130                 virtio_update_packet_stats(&txvq->stats, txm);
2131         }
2132
2133         txvq->stats.packets += nb_tx;
2134
2135         if (likely(nb_tx)) {
2136                 vq_update_avail_idx(vq);
2137
2138                 if (unlikely(virtqueue_kick_prepare(vq))) {
2139                         virtqueue_notify(vq);
2140                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2141                 }
2142         }
2143
2144         return nb_tx;
2145 }
2146
2147 uint16_t
2148 virtio_xmit_pkts_inorder(void *tx_queue,
2149                         struct rte_mbuf **tx_pkts,
2150                         uint16_t nb_pkts)
2151 {
2152         struct virtnet_tx *txvq = tx_queue;
2153         struct virtqueue *vq = txvq->vq;
2154         struct virtio_hw *hw = vq->hw;
2155         uint16_t hdr_size = hw->vtnet_hdr_size;
2156         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
2157         struct rte_mbuf *inorder_pkts[nb_pkts];
2158         int error;
2159
2160         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2161                 return nb_tx;
2162
2163         if (unlikely(nb_pkts < 1))
2164                 return nb_pkts;
2165
2166         VIRTQUEUE_DUMP(vq);
2167         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2168         nb_used = VIRTQUEUE_NUSED(vq);
2169
2170         virtio_rmb(hw->weak_barriers);
2171         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2172                 virtio_xmit_cleanup_inorder(vq, nb_used);
2173
2174         if (unlikely(!vq->vq_free_cnt))
2175                 virtio_xmit_cleanup_inorder(vq, nb_used);
2176
2177         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
2178
2179         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
2180                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2181                 int slots, need;
2182
2183                 /* Do VLAN tag insertion */
2184                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2185                         error = rte_vlan_insert(&txm);
2186                         if (unlikely(error)) {
2187                                 rte_pktmbuf_free(txm);
2188                                 continue;
2189                         }
2190                         /* vlan_insert may add a header mbuf */
2191                         tx_pkts[nb_tx] = txm;
2192                 }
2193
2194                 /* optimize ring usage */
2195                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2196                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2197                      rte_mbuf_refcnt_read(txm) == 1 &&
2198                      RTE_MBUF_DIRECT(txm) &&
2199                      txm->nb_segs == 1 &&
2200                      rte_pktmbuf_headroom(txm) >= hdr_size &&
2201                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2202                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2203                         inorder_pkts[nb_inorder_pkts] = txm;
2204                         nb_inorder_pkts++;
2205
2206                         virtio_update_packet_stats(&txvq->stats, txm);
2207                         continue;
2208                 }
2209
2210                 if (nb_inorder_pkts) {
2211                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2212                                                         nb_inorder_pkts);
2213                         nb_inorder_pkts = 0;
2214                 }
2215
2216                 slots = txm->nb_segs + 1;
2217                 need = slots - vq->vq_free_cnt;
2218                 if (unlikely(need > 0)) {
2219                         nb_used = VIRTQUEUE_NUSED(vq);
2220                         virtio_rmb(hw->weak_barriers);
2221                         need = RTE_MIN(need, (int)nb_used);
2222
2223                         virtio_xmit_cleanup_inorder(vq, need);
2224
2225                         need = slots - vq->vq_free_cnt;
2226
2227                         if (unlikely(need > 0)) {
2228                                 PMD_TX_LOG(ERR,
2229                                         "No free tx descriptors to transmit");
2230                                 break;
2231                         }
2232                 }
2233                 /* Enqueue Packet buffers */
2234                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2235
2236                 virtio_update_packet_stats(&txvq->stats, txm);
2237         }
2238
2239         /* Transmit all inorder packets */
2240         if (nb_inorder_pkts)
2241                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2242                                                 nb_inorder_pkts);
2243
2244         txvq->stats.packets += nb_tx;
2245
2246         if (likely(nb_tx)) {
2247                 vq_update_avail_idx(vq);
2248
2249                 if (unlikely(virtqueue_kick_prepare(vq))) {
2250                         virtqueue_notify(vq);
2251                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2252                 }
2253         }
2254
2255         VIRTQUEUE_DUMP(vq);
2256
2257         return nb_tx;
2258 }