net/virtio: optimize Tx enqueue for packed ring
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45         struct virtnet_rx *rxvq = rxq;
46         struct virtqueue *vq = rxvq->vq;
47
48         return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54         vq->vq_free_cnt += num;
55         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61         struct vring_desc *dp, *dp_tail;
62         struct vq_desc_extra *dxp;
63         uint16_t desc_idx_last = desc_idx;
64
65         dp  = &vq->vq_ring.desc[desc_idx];
66         dxp = &vq->vq_descx[desc_idx];
67         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69                 while (dp->flags & VRING_DESC_F_NEXT) {
70                         desc_idx_last = dp->next;
71                         dp = &vq->vq_ring.desc[dp->next];
72                 }
73         }
74         dxp->ndescs = 0;
75
76         /*
77          * We must append the existing free chain, if any, to the end of
78          * newly freed chain. If the virtqueue was completely used, then
79          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80          */
81         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82                 vq->vq_desc_head_idx = desc_idx;
83         } else {
84                 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
85                 dp_tail->next = desc_idx;
86         }
87
88         vq->vq_desc_tail_idx = desc_idx_last;
89         dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95         struct vq_desc_extra *dxp;
96
97         dxp = &vq->vq_descx[id];
98         vq->vq_free_cnt += dxp->ndescs;
99
100         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101                 vq->vq_desc_head_idx = id;
102         else
103                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104
105         vq->vq_desc_tail_idx = id;
106         dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108
109 static uint16_t
110 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
111                                   struct rte_mbuf **rx_pkts,
112                                   uint32_t *len,
113                                   uint16_t num)
114 {
115         struct rte_mbuf *cookie;
116         uint16_t used_idx;
117         uint16_t id;
118         struct vring_packed_desc *desc;
119         uint16_t i;
120
121         desc = vq->ring_packed.desc_packed;
122
123         for (i = 0; i < num; i++) {
124                 used_idx = vq->vq_used_cons_idx;
125                 if (!desc_is_used(&desc[used_idx], vq))
126                         return i;
127                 virtio_rmb(vq->hw->weak_barriers);
128                 len[i] = desc[used_idx].len;
129                 id = desc[used_idx].id;
130                 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
131                 if (unlikely(cookie == NULL)) {
132                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
133                                 vq->vq_used_cons_idx);
134                         break;
135                 }
136                 rte_prefetch0(cookie);
137                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
138                 rx_pkts[i] = cookie;
139
140                 vq->vq_free_cnt++;
141                 vq->vq_used_cons_idx++;
142                 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
143                         vq->vq_used_cons_idx -= vq->vq_nentries;
144                         vq->used_wrap_counter ^= 1;
145                 }
146         }
147
148         return i;
149 }
150
151 static uint16_t
152 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
153                            uint32_t *len, uint16_t num)
154 {
155         struct vring_used_elem *uep;
156         struct rte_mbuf *cookie;
157         uint16_t used_idx, desc_idx;
158         uint16_t i;
159
160         /*  Caller does the check */
161         for (i = 0; i < num ; i++) {
162                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
163                 uep = &vq->vq_ring.used->ring[used_idx];
164                 desc_idx = (uint16_t) uep->id;
165                 len[i] = uep->len;
166                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
167
168                 if (unlikely(cookie == NULL)) {
169                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
170                                 vq->vq_used_cons_idx);
171                         break;
172                 }
173
174                 rte_prefetch0(cookie);
175                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
176                 rx_pkts[i]  = cookie;
177                 vq->vq_used_cons_idx++;
178                 vq_ring_free_chain(vq, desc_idx);
179                 vq->vq_descx[desc_idx].cookie = NULL;
180         }
181
182         return i;
183 }
184
185 static uint16_t
186 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
187                         struct rte_mbuf **rx_pkts,
188                         uint32_t *len,
189                         uint16_t num)
190 {
191         struct vring_used_elem *uep;
192         struct rte_mbuf *cookie;
193         uint16_t used_idx = 0;
194         uint16_t i;
195
196         if (unlikely(num == 0))
197                 return 0;
198
199         for (i = 0; i < num; i++) {
200                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
201                 /* Desc idx same as used idx */
202                 uep = &vq->vq_ring.used->ring[used_idx];
203                 len[i] = uep->len;
204                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
205
206                 if (unlikely(cookie == NULL)) {
207                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
208                                 vq->vq_used_cons_idx);
209                         break;
210                 }
211
212                 rte_prefetch0(cookie);
213                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
214                 rx_pkts[i]  = cookie;
215                 vq->vq_used_cons_idx++;
216                 vq->vq_descx[used_idx].cookie = NULL;
217         }
218
219         vq_ring_free_inorder(vq, used_idx, i);
220         return i;
221 }
222
223 #ifndef DEFAULT_TX_FREE_THRESH
224 #define DEFAULT_TX_FREE_THRESH 32
225 #endif
226
227 static void
228 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
229 {
230         uint16_t used_idx, id, curr_id, free_cnt = 0;
231         uint16_t size = vq->vq_nentries;
232         struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
233         struct vq_desc_extra *dxp;
234
235         used_idx = vq->vq_used_cons_idx;
236         while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
237                 virtio_rmb(vq->hw->weak_barriers);
238                 id = desc[used_idx].id;
239                 do {
240                         curr_id = used_idx;
241                         dxp = &vq->vq_descx[used_idx];
242                         used_idx += dxp->ndescs;
243                         free_cnt += dxp->ndescs;
244                         num -= dxp->ndescs;
245                         if (used_idx >= size) {
246                                 used_idx -= size;
247                                 vq->used_wrap_counter ^= 1;
248                         }
249                         if (dxp->cookie != NULL) {
250                                 rte_pktmbuf_free(dxp->cookie);
251                                 dxp->cookie = NULL;
252                         }
253                 } while (curr_id != id);
254         }
255         vq->vq_used_cons_idx = used_idx;
256         vq->vq_free_cnt += free_cnt;
257 }
258
259 static void
260 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
261 {
262         uint16_t used_idx, id;
263         uint16_t size = vq->vq_nentries;
264         struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
265         struct vq_desc_extra *dxp;
266
267         used_idx = vq->vq_used_cons_idx;
268         while (num-- && desc_is_used(&desc[used_idx], vq)) {
269                 virtio_rmb(vq->hw->weak_barriers);
270                 id = desc[used_idx].id;
271                 dxp = &vq->vq_descx[id];
272                 vq->vq_used_cons_idx += dxp->ndescs;
273                 if (vq->vq_used_cons_idx >= size) {
274                         vq->vq_used_cons_idx -= size;
275                         vq->used_wrap_counter ^= 1;
276                 }
277                 vq_ring_free_id_packed(vq, id);
278                 if (dxp->cookie != NULL) {
279                         rte_pktmbuf_free(dxp->cookie);
280                         dxp->cookie = NULL;
281                 }
282                 used_idx = vq->vq_used_cons_idx;
283         }
284 }
285
286 /* Cleanup from completed transmits. */
287 static inline void
288 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
289 {
290         if (in_order)
291                 virtio_xmit_cleanup_inorder_packed(vq, num);
292         else
293                 virtio_xmit_cleanup_normal_packed(vq, num);
294 }
295
296 static void
297 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
298 {
299         uint16_t i, used_idx, desc_idx;
300         for (i = 0; i < num; i++) {
301                 struct vring_used_elem *uep;
302                 struct vq_desc_extra *dxp;
303
304                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
305                 uep = &vq->vq_ring.used->ring[used_idx];
306
307                 desc_idx = (uint16_t) uep->id;
308                 dxp = &vq->vq_descx[desc_idx];
309                 vq->vq_used_cons_idx++;
310                 vq_ring_free_chain(vq, desc_idx);
311
312                 if (dxp->cookie != NULL) {
313                         rte_pktmbuf_free(dxp->cookie);
314                         dxp->cookie = NULL;
315                 }
316         }
317 }
318
319 /* Cleanup from completed inorder transmits. */
320 static void
321 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
322 {
323         uint16_t i, idx = vq->vq_used_cons_idx;
324         int16_t free_cnt = 0;
325         struct vq_desc_extra *dxp = NULL;
326
327         if (unlikely(num == 0))
328                 return;
329
330         for (i = 0; i < num; i++) {
331                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
332                 free_cnt += dxp->ndescs;
333                 if (dxp->cookie != NULL) {
334                         rte_pktmbuf_free(dxp->cookie);
335                         dxp->cookie = NULL;
336                 }
337         }
338
339         vq->vq_free_cnt += free_cnt;
340         vq->vq_used_cons_idx = idx;
341 }
342
343 static inline int
344 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
345                         struct rte_mbuf **cookies,
346                         uint16_t num)
347 {
348         struct vq_desc_extra *dxp;
349         struct virtio_hw *hw = vq->hw;
350         struct vring_desc *start_dp;
351         uint16_t head_idx, idx, i = 0;
352
353         if (unlikely(vq->vq_free_cnt == 0))
354                 return -ENOSPC;
355         if (unlikely(vq->vq_free_cnt < num))
356                 return -EMSGSIZE;
357
358         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
359         start_dp = vq->vq_ring.desc;
360
361         while (i < num) {
362                 idx = head_idx & (vq->vq_nentries - 1);
363                 dxp = &vq->vq_descx[idx];
364                 dxp->cookie = (void *)cookies[i];
365                 dxp->ndescs = 1;
366
367                 start_dp[idx].addr =
368                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
369                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
370                 start_dp[idx].len =
371                                 cookies[i]->buf_len -
372                                 RTE_PKTMBUF_HEADROOM +
373                                 hw->vtnet_hdr_size;
374                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
375
376                 vq_update_avail_ring(vq, idx);
377                 head_idx++;
378                 i++;
379         }
380
381         vq->vq_desc_head_idx += num;
382         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
383         return 0;
384 }
385
386 static inline int
387 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
388                                 uint16_t num)
389 {
390         struct vq_desc_extra *dxp;
391         struct virtio_hw *hw = vq->hw;
392         struct vring_desc *start_dp = vq->vq_ring.desc;
393         uint16_t idx, i;
394
395         if (unlikely(vq->vq_free_cnt == 0))
396                 return -ENOSPC;
397         if (unlikely(vq->vq_free_cnt < num))
398                 return -EMSGSIZE;
399
400         if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
401                 return -EFAULT;
402
403         for (i = 0; i < num; i++) {
404                 idx = vq->vq_desc_head_idx;
405                 dxp = &vq->vq_descx[idx];
406                 dxp->cookie = (void *)cookie[i];
407                 dxp->ndescs = 1;
408
409                 start_dp[idx].addr =
410                         VIRTIO_MBUF_ADDR(cookie[i], vq) +
411                         RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
412                 start_dp[idx].len =
413                         cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
414                         hw->vtnet_hdr_size;
415                 start_dp[idx].flags = VRING_DESC_F_WRITE;
416                 vq->vq_desc_head_idx = start_dp[idx].next;
417                 vq_update_avail_ring(vq, idx);
418                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
419                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
420                         break;
421                 }
422         }
423
424         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
425
426         return 0;
427 }
428
429 static inline int
430 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
431                                      struct rte_mbuf **cookie, uint16_t num)
432 {
433         struct vring_packed_desc *start_dp = vq->ring_packed.desc_packed;
434         uint16_t flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
435         struct virtio_hw *hw = vq->hw;
436         struct vq_desc_extra *dxp;
437         uint16_t idx;
438         int i;
439
440         if (unlikely(vq->vq_free_cnt == 0))
441                 return -ENOSPC;
442         if (unlikely(vq->vq_free_cnt < num))
443                 return -EMSGSIZE;
444
445         for (i = 0; i < num; i++) {
446                 idx = vq->vq_avail_idx;
447                 dxp = &vq->vq_descx[idx];
448                 dxp->cookie = (void *)cookie[i];
449                 dxp->ndescs = 1;
450
451                 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
452                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
453                 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
454                                         + hw->vtnet_hdr_size;
455
456                 vq->vq_desc_head_idx = dxp->next;
457                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
458                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
459                 virtio_wmb(hw->weak_barriers);
460                 start_dp[idx].flags = flags;
461                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
462                         vq->vq_avail_idx -= vq->vq_nentries;
463                         vq->avail_wrap_counter ^= 1;
464                         vq->avail_used_flags =
465                                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
466                                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
467                         flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
468                 }
469         }
470         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
471         return 0;
472 }
473
474 /* When doing TSO, the IP length is not included in the pseudo header
475  * checksum of the packet given to the PMD, but for virtio it is
476  * expected.
477  */
478 static void
479 virtio_tso_fix_cksum(struct rte_mbuf *m)
480 {
481         /* common case: header is not fragmented */
482         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
483                         m->l4_len)) {
484                 struct ipv4_hdr *iph;
485                 struct ipv6_hdr *ip6h;
486                 struct tcp_hdr *th;
487                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
488                 uint32_t tmp;
489
490                 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
491                 th = RTE_PTR_ADD(iph, m->l3_len);
492                 if ((iph->version_ihl >> 4) == 4) {
493                         iph->hdr_checksum = 0;
494                         iph->hdr_checksum = rte_ipv4_cksum(iph);
495                         ip_len = iph->total_length;
496                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
497                                 m->l3_len);
498                 } else {
499                         ip6h = (struct ipv6_hdr *)iph;
500                         ip_paylen = ip6h->payload_len;
501                 }
502
503                 /* calculate the new phdr checksum not including ip_paylen */
504                 prev_cksum = th->cksum;
505                 tmp = prev_cksum;
506                 tmp += ip_paylen;
507                 tmp = (tmp & 0xffff) + (tmp >> 16);
508                 new_cksum = tmp;
509
510                 /* replace it in the packet */
511                 th->cksum = new_cksum;
512         }
513 }
514
515
516 /* avoid write operation when necessary, to lessen cache issues */
517 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
518         if ((var) != (val))                     \
519                 (var) = (val);                  \
520 } while (0)
521
522 #define virtqueue_clear_net_hdr(_hdr) do {              \
523         ASSIGN_UNLESS_EQUAL((_hdr)->csum_start, 0);     \
524         ASSIGN_UNLESS_EQUAL((_hdr)->csum_offset, 0);    \
525         ASSIGN_UNLESS_EQUAL((_hdr)->flags, 0);          \
526         ASSIGN_UNLESS_EQUAL((_hdr)->gso_type, 0);       \
527         ASSIGN_UNLESS_EQUAL((_hdr)->gso_size, 0);       \
528         ASSIGN_UNLESS_EQUAL((_hdr)->hdr_len, 0);        \
529 } while (0)
530
531 static inline void
532 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
533                         struct rte_mbuf *cookie,
534                         bool offload)
535 {
536         if (offload) {
537                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
538                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
539
540                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
541                 case PKT_TX_UDP_CKSUM:
542                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
543                         hdr->csum_offset = offsetof(struct udp_hdr,
544                                 dgram_cksum);
545                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
546                         break;
547
548                 case PKT_TX_TCP_CKSUM:
549                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
550                         hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
551                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
552                         break;
553
554                 default:
555                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
556                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
557                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
558                         break;
559                 }
560
561                 /* TCP Segmentation Offload */
562                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
563                         virtio_tso_fix_cksum(cookie);
564                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
565                                 VIRTIO_NET_HDR_GSO_TCPV6 :
566                                 VIRTIO_NET_HDR_GSO_TCPV4;
567                         hdr->gso_size = cookie->tso_segsz;
568                         hdr->hdr_len =
569                                 cookie->l2_len +
570                                 cookie->l3_len +
571                                 cookie->l4_len;
572                 } else {
573                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
574                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
575                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
576                 }
577         }
578 }
579
580 static inline void
581 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
582                         struct rte_mbuf **cookies,
583                         uint16_t num)
584 {
585         struct vq_desc_extra *dxp;
586         struct virtqueue *vq = txvq->vq;
587         struct vring_desc *start_dp;
588         struct virtio_net_hdr *hdr;
589         uint16_t idx;
590         uint16_t head_size = vq->hw->vtnet_hdr_size;
591         uint16_t i = 0;
592
593         idx = vq->vq_desc_head_idx;
594         start_dp = vq->vq_ring.desc;
595
596         while (i < num) {
597                 idx = idx & (vq->vq_nentries - 1);
598                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
599                 dxp->cookie = (void *)cookies[i];
600                 dxp->ndescs = 1;
601
602                 hdr = (struct virtio_net_hdr *)
603                         rte_pktmbuf_prepend(cookies[i], head_size);
604                 cookies[i]->pkt_len -= head_size;
605
606                 /* if offload disabled, hdr is not zeroed yet, do it now */
607                 if (!vq->hw->has_tx_offload)
608                         virtqueue_clear_net_hdr(hdr);
609                 else
610                         virtqueue_xmit_offload(hdr, cookies[i], true);
611
612                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
613                 start_dp[idx].len   = cookies[i]->data_len;
614                 start_dp[idx].flags = 0;
615
616                 vq_update_avail_ring(vq, idx);
617
618                 idx++;
619                 i++;
620         };
621
622         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
623         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
624 }
625
626 static inline void
627 virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
628                                    struct rte_mbuf *cookie,
629                                    int in_order)
630 {
631         struct virtqueue *vq = txvq->vq;
632         struct vring_packed_desc *dp;
633         struct vq_desc_extra *dxp;
634         uint16_t idx, id, flags;
635         uint16_t head_size = vq->hw->vtnet_hdr_size;
636         struct virtio_net_hdr *hdr;
637
638         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
639         idx = vq->vq_avail_idx;
640         dp = &vq->ring_packed.desc_packed[idx];
641
642         dxp = &vq->vq_descx[id];
643         dxp->ndescs = 1;
644         dxp->cookie = cookie;
645
646         flags = vq->avail_used_flags;
647
648         /* prepend cannot fail, checked by caller */
649         hdr = (struct virtio_net_hdr *)
650                 rte_pktmbuf_prepend(cookie, head_size);
651         cookie->pkt_len -= head_size;
652
653         /* if offload disabled, hdr is not zeroed yet, do it now */
654         if (!vq->hw->has_tx_offload)
655                 virtqueue_clear_net_hdr(hdr);
656         else
657                 virtqueue_xmit_offload(hdr, cookie, true);
658
659         dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
660         dp->len  = cookie->data_len;
661         dp->id   = id;
662
663         if (++vq->vq_avail_idx >= vq->vq_nentries) {
664                 vq->vq_avail_idx -= vq->vq_nentries;
665                 vq->avail_wrap_counter ^= 1;
666                 vq->avail_used_flags ^=
667                         VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
668         }
669
670         vq->vq_free_cnt--;
671
672         if (!in_order) {
673                 vq->vq_desc_head_idx = dxp->next;
674                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
675                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
676         }
677
678         virtio_wmb(vq->hw->weak_barriers);
679         dp->flags = flags;
680 }
681
682 static inline void
683 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
684                               uint16_t needed, int can_push, int in_order)
685 {
686         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
687         struct vq_desc_extra *dxp;
688         struct virtqueue *vq = txvq->vq;
689         struct vring_packed_desc *start_dp, *head_dp;
690         uint16_t idx, id, head_idx, head_flags;
691         uint16_t head_size = vq->hw->vtnet_hdr_size;
692         struct virtio_net_hdr *hdr;
693         uint16_t prev;
694
695         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
696
697         dxp = &vq->vq_descx[id];
698         dxp->ndescs = needed;
699         dxp->cookie = cookie;
700
701         head_idx = vq->vq_avail_idx;
702         idx = head_idx;
703         prev = head_idx;
704         start_dp = vq->ring_packed.desc_packed;
705
706         head_dp = &vq->ring_packed.desc_packed[idx];
707         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
708         head_flags |= vq->avail_used_flags;
709
710         if (can_push) {
711                 /* prepend cannot fail, checked by caller */
712                 hdr = (struct virtio_net_hdr *)
713                         rte_pktmbuf_prepend(cookie, head_size);
714                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
715                  * which is wrong. Below subtract restores correct pkt size.
716                  */
717                 cookie->pkt_len -= head_size;
718
719                 /* if offload disabled, it is not zeroed below, do it now */
720                 if (!vq->hw->has_tx_offload)
721                         virtqueue_clear_net_hdr(hdr);
722         } else {
723                 /* setup first tx ring slot to point to header
724                  * stored in reserved region.
725                  */
726                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
727                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
728                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
729                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
730                 idx++;
731                 if (idx >= vq->vq_nentries) {
732                         idx -= vq->vq_nentries;
733                         vq->avail_wrap_counter ^= 1;
734                         vq->avail_used_flags =
735                                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
736                                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
737                 }
738         }
739
740         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
741
742         do {
743                 uint16_t flags;
744
745                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
746                 start_dp[idx].len  = cookie->data_len;
747                 if (likely(idx != head_idx)) {
748                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
749                         flags |= vq->avail_used_flags;
750                         start_dp[idx].flags = flags;
751                 }
752                 prev = idx;
753                 idx++;
754                 if (idx >= vq->vq_nentries) {
755                         idx -= vq->vq_nentries;
756                         vq->avail_wrap_counter ^= 1;
757                         vq->avail_used_flags =
758                                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
759                                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
760                 }
761         } while ((cookie = cookie->next) != NULL);
762
763         start_dp[prev].id = id;
764
765         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
766         vq->vq_avail_idx = idx;
767
768         if (!in_order) {
769                 vq->vq_desc_head_idx = dxp->next;
770                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
771                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
772         }
773
774         virtio_wmb(vq->hw->weak_barriers);
775         head_dp->flags = head_flags;
776 }
777
778 static inline void
779 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
780                         uint16_t needed, int use_indirect, int can_push,
781                         int in_order)
782 {
783         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
784         struct vq_desc_extra *dxp;
785         struct virtqueue *vq = txvq->vq;
786         struct vring_desc *start_dp;
787         uint16_t seg_num = cookie->nb_segs;
788         uint16_t head_idx, idx;
789         uint16_t head_size = vq->hw->vtnet_hdr_size;
790         struct virtio_net_hdr *hdr;
791
792         head_idx = vq->vq_desc_head_idx;
793         idx = head_idx;
794         if (in_order)
795                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
796         else
797                 dxp = &vq->vq_descx[idx];
798         dxp->cookie = (void *)cookie;
799         dxp->ndescs = needed;
800
801         start_dp = vq->vq_ring.desc;
802
803         if (can_push) {
804                 /* prepend cannot fail, checked by caller */
805                 hdr = (struct virtio_net_hdr *)
806                         rte_pktmbuf_prepend(cookie, head_size);
807                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
808                  * which is wrong. Below subtract restores correct pkt size.
809                  */
810                 cookie->pkt_len -= head_size;
811
812                 /* if offload disabled, it is not zeroed below, do it now */
813                 if (!vq->hw->has_tx_offload)
814                         virtqueue_clear_net_hdr(hdr);
815         } else if (use_indirect) {
816                 /* setup tx ring slot to point to indirect
817                  * descriptor list stored in reserved region.
818                  *
819                  * the first slot in indirect ring is already preset
820                  * to point to the header in reserved region
821                  */
822                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
823                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
824                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
825                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
826                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
827
828                 /* loop below will fill in rest of the indirect elements */
829                 start_dp = txr[idx].tx_indir;
830                 idx = 1;
831         } else {
832                 /* setup first tx ring slot to point to header
833                  * stored in reserved region.
834                  */
835                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
836                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
837                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
838                 start_dp[idx].flags = VRING_DESC_F_NEXT;
839                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
840
841                 idx = start_dp[idx].next;
842         }
843
844         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
845
846         do {
847                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
848                 start_dp[idx].len   = cookie->data_len;
849                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
850                 idx = start_dp[idx].next;
851         } while ((cookie = cookie->next) != NULL);
852
853         if (use_indirect)
854                 idx = vq->vq_ring.desc[head_idx].next;
855
856         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
857
858         vq->vq_desc_head_idx = idx;
859         vq_update_avail_ring(vq, head_idx);
860
861         if (!in_order) {
862                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
863                         vq->vq_desc_tail_idx = idx;
864         }
865 }
866
867 void
868 virtio_dev_cq_start(struct rte_eth_dev *dev)
869 {
870         struct virtio_hw *hw = dev->data->dev_private;
871
872         if (hw->cvq && hw->cvq->vq) {
873                 rte_spinlock_init(&hw->cvq->lock);
874                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
875         }
876 }
877
878 int
879 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
880                         uint16_t queue_idx,
881                         uint16_t nb_desc,
882                         unsigned int socket_id __rte_unused,
883                         const struct rte_eth_rxconf *rx_conf __rte_unused,
884                         struct rte_mempool *mp)
885 {
886         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
887         struct virtio_hw *hw = dev->data->dev_private;
888         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
889         struct virtnet_rx *rxvq;
890
891         PMD_INIT_FUNC_TRACE();
892
893         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
894                 nb_desc = vq->vq_nentries;
895         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
896
897         rxvq = &vq->rxq;
898         rxvq->queue_id = queue_idx;
899         rxvq->mpool = mp;
900         if (rxvq->mpool == NULL) {
901                 rte_exit(EXIT_FAILURE,
902                         "Cannot allocate mbufs for rx virtqueue");
903         }
904
905         dev->data->rx_queues[queue_idx] = rxvq;
906
907         return 0;
908 }
909
910 int
911 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
912 {
913         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
914         struct virtio_hw *hw = dev->data->dev_private;
915         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
916         struct virtnet_rx *rxvq = &vq->rxq;
917         struct rte_mbuf *m;
918         uint16_t desc_idx;
919         int error, nbufs, i;
920
921         PMD_INIT_FUNC_TRACE();
922
923         /* Allocate blank mbufs for the each rx descriptor */
924         nbufs = 0;
925
926         if (hw->use_simple_rx) {
927                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
928                      desc_idx++) {
929                         vq->vq_ring.avail->ring[desc_idx] = desc_idx;
930                         vq->vq_ring.desc[desc_idx].flags =
931                                 VRING_DESC_F_WRITE;
932                 }
933
934                 virtio_rxq_vec_setup(rxvq);
935         }
936
937         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
938         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
939              desc_idx++) {
940                 vq->sw_ring[vq->vq_nentries + desc_idx] =
941                         &rxvq->fake_mbuf;
942         }
943
944         if (hw->use_simple_rx) {
945                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
946                         virtio_rxq_rearm_vec(rxvq);
947                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
948                 }
949         } else if (hw->use_inorder_rx) {
950                 if ((!virtqueue_full(vq))) {
951                         uint16_t free_cnt = vq->vq_free_cnt;
952                         struct rte_mbuf *pkts[free_cnt];
953
954                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
955                                 free_cnt)) {
956                                 error = virtqueue_enqueue_refill_inorder(vq,
957                                                 pkts,
958                                                 free_cnt);
959                                 if (unlikely(error)) {
960                                         for (i = 0; i < free_cnt; i++)
961                                                 rte_pktmbuf_free(pkts[i]);
962                                 }
963                         }
964
965                         nbufs += free_cnt;
966                         vq_update_avail_idx(vq);
967                 }
968         } else {
969                 while (!virtqueue_full(vq)) {
970                         m = rte_mbuf_raw_alloc(rxvq->mpool);
971                         if (m == NULL)
972                                 break;
973
974                         /* Enqueue allocated buffers */
975                         if (vtpci_packed_queue(vq->hw))
976                                 error = virtqueue_enqueue_recv_refill_packed(vq,
977                                                 &m, 1);
978                         else
979                                 error = virtqueue_enqueue_recv_refill(vq,
980                                                 &m, 1);
981                         if (error) {
982                                 rte_pktmbuf_free(m);
983                                 break;
984                         }
985                         nbufs++;
986                 }
987
988                 if (!vtpci_packed_queue(vq->hw))
989                         vq_update_avail_idx(vq);
990         }
991
992         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
993
994         VIRTQUEUE_DUMP(vq);
995
996         return 0;
997 }
998
999 /*
1000  * struct rte_eth_dev *dev: Used to update dev
1001  * uint16_t nb_desc: Defaults to values read from config space
1002  * unsigned int socket_id: Used to allocate memzone
1003  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
1004  * uint16_t queue_idx: Just used as an index in dev txq list
1005  */
1006 int
1007 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
1008                         uint16_t queue_idx,
1009                         uint16_t nb_desc,
1010                         unsigned int socket_id __rte_unused,
1011                         const struct rte_eth_txconf *tx_conf)
1012 {
1013         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1014         struct virtio_hw *hw = dev->data->dev_private;
1015         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1016         struct virtnet_tx *txvq;
1017         uint16_t tx_free_thresh;
1018
1019         PMD_INIT_FUNC_TRACE();
1020
1021         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
1022                 nb_desc = vq->vq_nentries;
1023         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
1024
1025         txvq = &vq->txq;
1026         txvq->queue_id = queue_idx;
1027
1028         tx_free_thresh = tx_conf->tx_free_thresh;
1029         if (tx_free_thresh == 0)
1030                 tx_free_thresh =
1031                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
1032
1033         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
1034                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
1035                         "number of TX entries minus 3 (%u)."
1036                         " (tx_free_thresh=%u port=%u queue=%u)\n",
1037                         vq->vq_nentries - 3,
1038                         tx_free_thresh, dev->data->port_id, queue_idx);
1039                 return -EINVAL;
1040         }
1041
1042         vq->vq_free_thresh = tx_free_thresh;
1043
1044         dev->data->tx_queues[queue_idx] = txvq;
1045         return 0;
1046 }
1047
1048 int
1049 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
1050                                 uint16_t queue_idx)
1051 {
1052         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1053         struct virtio_hw *hw = dev->data->dev_private;
1054         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1055
1056         PMD_INIT_FUNC_TRACE();
1057
1058         if (!vtpci_packed_queue(hw)) {
1059                 if (hw->use_inorder_tx)
1060                         vq->vq_ring.desc[vq->vq_nentries - 1].next = 0;
1061         }
1062
1063         VIRTQUEUE_DUMP(vq);
1064
1065         return 0;
1066 }
1067
1068 static inline void
1069 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1070 {
1071         int error;
1072         /*
1073          * Requeue the discarded mbuf. This should always be
1074          * successful since it was just dequeued.
1075          */
1076         if (vtpci_packed_queue(vq->hw))
1077                 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1078         else
1079                 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1080
1081         if (unlikely(error)) {
1082                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1083                 rte_pktmbuf_free(m);
1084         }
1085 }
1086
1087 static inline void
1088 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1089 {
1090         int error;
1091
1092         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1093         if (unlikely(error)) {
1094                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1095                 rte_pktmbuf_free(m);
1096         }
1097 }
1098
1099 static inline void
1100 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
1101 {
1102         uint32_t s = mbuf->pkt_len;
1103         struct ether_addr *ea;
1104
1105         stats->bytes += s;
1106
1107         if (s == 64) {
1108                 stats->size_bins[1]++;
1109         } else if (s > 64 && s < 1024) {
1110                 uint32_t bin;
1111
1112                 /* count zeros, and offset into correct bin */
1113                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
1114                 stats->size_bins[bin]++;
1115         } else {
1116                 if (s < 64)
1117                         stats->size_bins[0]++;
1118                 else if (s < 1519)
1119                         stats->size_bins[6]++;
1120                 else if (s >= 1519)
1121                         stats->size_bins[7]++;
1122         }
1123
1124         ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
1125         if (is_multicast_ether_addr(ea)) {
1126                 if (is_broadcast_ether_addr(ea))
1127                         stats->broadcast++;
1128                 else
1129                         stats->multicast++;
1130         }
1131 }
1132
1133 static inline void
1134 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
1135 {
1136         VIRTIO_DUMP_PACKET(m, m->data_len);
1137
1138         virtio_update_packet_stats(&rxvq->stats, m);
1139 }
1140
1141 /* Optionally fill offload information in structure */
1142 static inline int
1143 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1144 {
1145         struct rte_net_hdr_lens hdr_lens;
1146         uint32_t hdrlen, ptype;
1147         int l4_supported = 0;
1148
1149         /* nothing to do */
1150         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1151                 return 0;
1152
1153         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1154
1155         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1156         m->packet_type = ptype;
1157         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1158             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1159             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1160                 l4_supported = 1;
1161
1162         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1163                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1164                 if (hdr->csum_start <= hdrlen && l4_supported) {
1165                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1166                 } else {
1167                         /* Unknown proto or tunnel, do sw cksum. We can assume
1168                          * the cksum field is in the first segment since the
1169                          * buffers we provided to the host are large enough.
1170                          * In case of SCTP, this will be wrong since it's a CRC
1171                          * but there's nothing we can do.
1172                          */
1173                         uint16_t csum = 0, off;
1174
1175                         rte_raw_cksum_mbuf(m, hdr->csum_start,
1176                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1177                                 &csum);
1178                         if (likely(csum != 0xffff))
1179                                 csum = ~csum;
1180                         off = hdr->csum_offset + hdr->csum_start;
1181                         if (rte_pktmbuf_data_len(m) >= off + 1)
1182                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1183                                         off) = csum;
1184                 }
1185         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1186                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1187         }
1188
1189         /* GSO request, save required information in mbuf */
1190         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1191                 /* Check unsupported modes */
1192                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1193                     (hdr->gso_size == 0)) {
1194                         return -EINVAL;
1195                 }
1196
1197                 /* Update mss lengthes in mbuf */
1198                 m->tso_segsz = hdr->gso_size;
1199                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1200                         case VIRTIO_NET_HDR_GSO_TCPV4:
1201                         case VIRTIO_NET_HDR_GSO_TCPV6:
1202                                 m->ol_flags |= PKT_RX_LRO | \
1203                                         PKT_RX_L4_CKSUM_NONE;
1204                                 break;
1205                         default:
1206                                 return -EINVAL;
1207                 }
1208         }
1209
1210         return 0;
1211 }
1212
1213 #define VIRTIO_MBUF_BURST_SZ 64
1214 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1215 uint16_t
1216 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1217 {
1218         struct virtnet_rx *rxvq = rx_queue;
1219         struct virtqueue *vq = rxvq->vq;
1220         struct virtio_hw *hw = vq->hw;
1221         struct rte_mbuf *rxm, *new_mbuf;
1222         uint16_t nb_used, num, nb_rx;
1223         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1224         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1225         int error;
1226         uint32_t i, nb_enqueued;
1227         uint32_t hdr_size;
1228         struct virtio_net_hdr *hdr;
1229
1230         nb_rx = 0;
1231         if (unlikely(hw->started == 0))
1232                 return nb_rx;
1233
1234         nb_used = VIRTQUEUE_NUSED(vq);
1235
1236         virtio_rmb(hw->weak_barriers);
1237
1238         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1239         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1240                 num = VIRTIO_MBUF_BURST_SZ;
1241         if (likely(num > DESC_PER_CACHELINE))
1242                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1243
1244         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1245         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1246
1247         nb_enqueued = 0;
1248         hdr_size = hw->vtnet_hdr_size;
1249
1250         for (i = 0; i < num ; i++) {
1251                 rxm = rcv_pkts[i];
1252
1253                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1254
1255                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1256                         PMD_RX_LOG(ERR, "Packet drop");
1257                         nb_enqueued++;
1258                         virtio_discard_rxbuf(vq, rxm);
1259                         rxvq->stats.errors++;
1260                         continue;
1261                 }
1262
1263                 rxm->port = rxvq->port_id;
1264                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1265                 rxm->ol_flags = 0;
1266                 rxm->vlan_tci = 0;
1267
1268                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1269                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1270
1271                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1272                         RTE_PKTMBUF_HEADROOM - hdr_size);
1273
1274                 if (hw->vlan_strip)
1275                         rte_vlan_strip(rxm);
1276
1277                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1278                         virtio_discard_rxbuf(vq, rxm);
1279                         rxvq->stats.errors++;
1280                         continue;
1281                 }
1282
1283                 virtio_rx_stats_updated(rxvq, rxm);
1284
1285                 rx_pkts[nb_rx++] = rxm;
1286         }
1287
1288         rxvq->stats.packets += nb_rx;
1289
1290         /* Allocate new mbuf for the used descriptor */
1291         while (likely(!virtqueue_full(vq))) {
1292                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1293                 if (unlikely(new_mbuf == NULL)) {
1294                         struct rte_eth_dev *dev
1295                                 = &rte_eth_devices[rxvq->port_id];
1296                         dev->data->rx_mbuf_alloc_failed++;
1297                         break;
1298                 }
1299                 error = virtqueue_enqueue_recv_refill(vq, &new_mbuf, 1);
1300                 if (unlikely(error)) {
1301                         rte_pktmbuf_free(new_mbuf);
1302                         break;
1303                 }
1304                 nb_enqueued++;
1305         }
1306
1307         if (likely(nb_enqueued)) {
1308                 vq_update_avail_idx(vq);
1309
1310                 if (unlikely(virtqueue_kick_prepare(vq))) {
1311                         virtqueue_notify(vq);
1312                         PMD_RX_LOG(DEBUG, "Notified");
1313                 }
1314         }
1315
1316         return nb_rx;
1317 }
1318
1319 uint16_t
1320 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1321                         uint16_t nb_pkts)
1322 {
1323         struct virtnet_rx *rxvq = rx_queue;
1324         struct virtqueue *vq = rxvq->vq;
1325         struct virtio_hw *hw = vq->hw;
1326         struct rte_mbuf *rxm, *new_mbuf;
1327         uint16_t num, nb_rx;
1328         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1329         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1330         int error;
1331         uint32_t i, nb_enqueued;
1332         uint32_t hdr_size;
1333         struct virtio_net_hdr *hdr;
1334
1335         nb_rx = 0;
1336         if (unlikely(hw->started == 0))
1337                 return nb_rx;
1338
1339         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1340         if (likely(num > DESC_PER_CACHELINE))
1341                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1342
1343         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1344         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1345
1346         nb_enqueued = 0;
1347         hdr_size = hw->vtnet_hdr_size;
1348
1349         for (i = 0; i < num; i++) {
1350                 rxm = rcv_pkts[i];
1351
1352                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1353
1354                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1355                         PMD_RX_LOG(ERR, "Packet drop");
1356                         nb_enqueued++;
1357                         virtio_discard_rxbuf(vq, rxm);
1358                         rxvq->stats.errors++;
1359                         continue;
1360                 }
1361
1362                 rxm->port = rxvq->port_id;
1363                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1364                 rxm->ol_flags = 0;
1365                 rxm->vlan_tci = 0;
1366
1367                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1368                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1369
1370                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1371                         RTE_PKTMBUF_HEADROOM - hdr_size);
1372
1373                 if (hw->vlan_strip)
1374                         rte_vlan_strip(rxm);
1375
1376                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1377                         virtio_discard_rxbuf(vq, rxm);
1378                         rxvq->stats.errors++;
1379                         continue;
1380                 }
1381
1382                 virtio_rx_stats_updated(rxvq, rxm);
1383
1384                 rx_pkts[nb_rx++] = rxm;
1385         }
1386
1387         rxvq->stats.packets += nb_rx;
1388
1389         /* Allocate new mbuf for the used descriptor */
1390         while (likely(!virtqueue_full(vq))) {
1391                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1392                 if (unlikely(new_mbuf == NULL)) {
1393                         struct rte_eth_dev *dev =
1394                                 &rte_eth_devices[rxvq->port_id];
1395                         dev->data->rx_mbuf_alloc_failed++;
1396                         break;
1397                 }
1398                 error = virtqueue_enqueue_recv_refill_packed(vq, &new_mbuf, 1);
1399                 if (unlikely(error)) {
1400                         rte_pktmbuf_free(new_mbuf);
1401                         break;
1402                 }
1403                 nb_enqueued++;
1404         }
1405
1406         if (likely(nb_enqueued)) {
1407                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1408                         virtqueue_notify(vq);
1409                         PMD_RX_LOG(DEBUG, "Notified");
1410                 }
1411         }
1412
1413         return nb_rx;
1414 }
1415
1416
1417 uint16_t
1418 virtio_recv_pkts_inorder(void *rx_queue,
1419                         struct rte_mbuf **rx_pkts,
1420                         uint16_t nb_pkts)
1421 {
1422         struct virtnet_rx *rxvq = rx_queue;
1423         struct virtqueue *vq = rxvq->vq;
1424         struct virtio_hw *hw = vq->hw;
1425         struct rte_mbuf *rxm;
1426         struct rte_mbuf *prev;
1427         uint16_t nb_used, num, nb_rx;
1428         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1429         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1430         int error;
1431         uint32_t nb_enqueued;
1432         uint32_t seg_num;
1433         uint32_t seg_res;
1434         uint32_t hdr_size;
1435         int32_t i;
1436
1437         nb_rx = 0;
1438         if (unlikely(hw->started == 0))
1439                 return nb_rx;
1440
1441         nb_used = VIRTQUEUE_NUSED(vq);
1442         nb_used = RTE_MIN(nb_used, nb_pkts);
1443         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1444
1445         virtio_rmb(hw->weak_barriers);
1446
1447         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1448
1449         nb_enqueued = 0;
1450         seg_num = 1;
1451         seg_res = 0;
1452         hdr_size = hw->vtnet_hdr_size;
1453
1454         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1455
1456         for (i = 0; i < num; i++) {
1457                 struct virtio_net_hdr_mrg_rxbuf *header;
1458
1459                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1460                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1461
1462                 rxm = rcv_pkts[i];
1463
1464                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1465                         PMD_RX_LOG(ERR, "Packet drop");
1466                         nb_enqueued++;
1467                         virtio_discard_rxbuf_inorder(vq, rxm);
1468                         rxvq->stats.errors++;
1469                         continue;
1470                 }
1471
1472                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1473                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1474                          - hdr_size);
1475
1476                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1477                         seg_num = header->num_buffers;
1478                         if (seg_num == 0)
1479                                 seg_num = 1;
1480                 } else {
1481                         seg_num = 1;
1482                 }
1483
1484                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1485                 rxm->nb_segs = seg_num;
1486                 rxm->ol_flags = 0;
1487                 rxm->vlan_tci = 0;
1488                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1489                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1490
1491                 rxm->port = rxvq->port_id;
1492
1493                 rx_pkts[nb_rx] = rxm;
1494                 prev = rxm;
1495
1496                 if (vq->hw->has_rx_offload &&
1497                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1498                         virtio_discard_rxbuf_inorder(vq, rxm);
1499                         rxvq->stats.errors++;
1500                         continue;
1501                 }
1502
1503                 if (hw->vlan_strip)
1504                         rte_vlan_strip(rx_pkts[nb_rx]);
1505
1506                 seg_res = seg_num - 1;
1507
1508                 /* Merge remaining segments */
1509                 while (seg_res != 0 && i < (num - 1)) {
1510                         i++;
1511
1512                         rxm = rcv_pkts[i];
1513                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1514                         rxm->pkt_len = (uint32_t)(len[i]);
1515                         rxm->data_len = (uint16_t)(len[i]);
1516
1517                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1518                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1519
1520                         if (prev)
1521                                 prev->next = rxm;
1522
1523                         prev = rxm;
1524                         seg_res -= 1;
1525                 }
1526
1527                 if (!seg_res) {
1528                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1529                         nb_rx++;
1530                 }
1531         }
1532
1533         /* Last packet still need merge segments */
1534         while (seg_res != 0) {
1535                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1536                                         VIRTIO_MBUF_BURST_SZ);
1537
1538                 prev = rcv_pkts[nb_rx];
1539                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1540                         virtio_rmb(hw->weak_barriers);
1541                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1542                                                            rcv_cnt);
1543                         uint16_t extra_idx = 0;
1544
1545                         rcv_cnt = num;
1546                         while (extra_idx < rcv_cnt) {
1547                                 rxm = rcv_pkts[extra_idx];
1548                                 rxm->data_off =
1549                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1550                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1551                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1552                                 prev->next = rxm;
1553                                 prev = rxm;
1554                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1555                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1556                                 extra_idx += 1;
1557                         };
1558                         seg_res -= rcv_cnt;
1559
1560                         if (!seg_res) {
1561                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1562                                 nb_rx++;
1563                         }
1564                 } else {
1565                         PMD_RX_LOG(ERR,
1566                                         "No enough segments for packet.");
1567                         virtio_discard_rxbuf_inorder(vq, prev);
1568                         rxvq->stats.errors++;
1569                         break;
1570                 }
1571         }
1572
1573         rxvq->stats.packets += nb_rx;
1574
1575         /* Allocate new mbuf for the used descriptor */
1576
1577         if (likely(!virtqueue_full(vq))) {
1578                 /* free_cnt may include mrg descs */
1579                 uint16_t free_cnt = vq->vq_free_cnt;
1580                 struct rte_mbuf *new_pkts[free_cnt];
1581
1582                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1583                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1584                                         free_cnt);
1585                         if (unlikely(error)) {
1586                                 for (i = 0; i < free_cnt; i++)
1587                                         rte_pktmbuf_free(new_pkts[i]);
1588                         }
1589                         nb_enqueued += free_cnt;
1590                 } else {
1591                         struct rte_eth_dev *dev =
1592                                 &rte_eth_devices[rxvq->port_id];
1593                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1594                 }
1595         }
1596
1597         if (likely(nb_enqueued)) {
1598                 vq_update_avail_idx(vq);
1599
1600                 if (unlikely(virtqueue_kick_prepare(vq))) {
1601                         virtqueue_notify(vq);
1602                         PMD_RX_LOG(DEBUG, "Notified");
1603                 }
1604         }
1605
1606         return nb_rx;
1607 }
1608
1609 uint16_t
1610 virtio_recv_mergeable_pkts(void *rx_queue,
1611                         struct rte_mbuf **rx_pkts,
1612                         uint16_t nb_pkts)
1613 {
1614         struct virtnet_rx *rxvq = rx_queue;
1615         struct virtqueue *vq = rxvq->vq;
1616         struct virtio_hw *hw = vq->hw;
1617         struct rte_mbuf *rxm;
1618         struct rte_mbuf *prev;
1619         uint16_t nb_used, num, nb_rx = 0;
1620         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1621         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1622         int error;
1623         uint32_t nb_enqueued = 0;
1624         uint32_t seg_num = 0;
1625         uint32_t seg_res = 0;
1626         uint32_t hdr_size = hw->vtnet_hdr_size;
1627         int32_t i;
1628
1629         if (unlikely(hw->started == 0))
1630                 return nb_rx;
1631
1632         nb_used = VIRTQUEUE_NUSED(vq);
1633
1634         virtio_rmb(hw->weak_barriers);
1635
1636         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1637
1638         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1639         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1640                 num = VIRTIO_MBUF_BURST_SZ;
1641         if (likely(num > DESC_PER_CACHELINE))
1642                 num = num - ((vq->vq_used_cons_idx + num) %
1643                                 DESC_PER_CACHELINE);
1644
1645
1646         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1647
1648         for (i = 0; i < num; i++) {
1649                 struct virtio_net_hdr_mrg_rxbuf *header;
1650
1651                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1652                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1653
1654                 rxm = rcv_pkts[i];
1655
1656                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1657                         PMD_RX_LOG(ERR, "Packet drop");
1658                         nb_enqueued++;
1659                         virtio_discard_rxbuf(vq, rxm);
1660                         rxvq->stats.errors++;
1661                         continue;
1662                 }
1663
1664                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1665                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1666                          - hdr_size);
1667                 seg_num = header->num_buffers;
1668                 if (seg_num == 0)
1669                         seg_num = 1;
1670
1671                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1672                 rxm->nb_segs = seg_num;
1673                 rxm->ol_flags = 0;
1674                 rxm->vlan_tci = 0;
1675                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1676                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1677
1678                 rxm->port = rxvq->port_id;
1679
1680                 rx_pkts[nb_rx] = rxm;
1681                 prev = rxm;
1682
1683                 if (hw->has_rx_offload &&
1684                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1685                         virtio_discard_rxbuf(vq, rxm);
1686                         rxvq->stats.errors++;
1687                         continue;
1688                 }
1689
1690                 if (hw->vlan_strip)
1691                         rte_vlan_strip(rx_pkts[nb_rx]);
1692
1693                 seg_res = seg_num - 1;
1694
1695                 /* Merge remaining segments */
1696                 while (seg_res != 0 && i < (num - 1)) {
1697                         i++;
1698
1699                         rxm = rcv_pkts[i];
1700                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1701                         rxm->pkt_len = (uint32_t)(len[i]);
1702                         rxm->data_len = (uint16_t)(len[i]);
1703
1704                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1705                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1706
1707                         if (prev)
1708                                 prev->next = rxm;
1709
1710                         prev = rxm;
1711                         seg_res -= 1;
1712                 }
1713
1714                 if (!seg_res) {
1715                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1716                         nb_rx++;
1717                 }
1718         }
1719
1720         /* Last packet still need merge segments */
1721         while (seg_res != 0) {
1722                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1723                                         VIRTIO_MBUF_BURST_SZ);
1724
1725                 prev = rcv_pkts[nb_rx];
1726                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1727                         virtio_rmb(hw->weak_barriers);
1728                         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1729                                                            rcv_cnt);
1730                         uint16_t extra_idx = 0;
1731
1732                         rcv_cnt = num;
1733                         while (extra_idx < rcv_cnt) {
1734                                 rxm = rcv_pkts[extra_idx];
1735                                 rxm->data_off =
1736                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1737                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1738                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1739                                 prev->next = rxm;
1740                                 prev = rxm;
1741                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1742                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1743                                 extra_idx += 1;
1744                         };
1745                         seg_res -= rcv_cnt;
1746
1747                         if (!seg_res) {
1748                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1749                                 nb_rx++;
1750                         }
1751                 } else {
1752                         PMD_RX_LOG(ERR,
1753                                         "No enough segments for packet.");
1754                         virtio_discard_rxbuf(vq, prev);
1755                         rxvq->stats.errors++;
1756                         break;
1757                 }
1758         }
1759
1760         rxvq->stats.packets += nb_rx;
1761
1762         /* Allocate new mbuf for the used descriptor */
1763         if (likely(!virtqueue_full(vq))) {
1764                 /* free_cnt may include mrg descs */
1765                 uint16_t free_cnt = vq->vq_free_cnt;
1766                 struct rte_mbuf *new_pkts[free_cnt];
1767
1768                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1769                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1770                                         free_cnt);
1771                         if (unlikely(error)) {
1772                                 for (i = 0; i < free_cnt; i++)
1773                                         rte_pktmbuf_free(new_pkts[i]);
1774                         }
1775                         nb_enqueued += free_cnt;
1776                 } else {
1777                         struct rte_eth_dev *dev =
1778                                 &rte_eth_devices[rxvq->port_id];
1779                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1780                 }
1781         }
1782
1783         if (likely(nb_enqueued)) {
1784                 vq_update_avail_idx(vq);
1785
1786                 if (unlikely(virtqueue_kick_prepare(vq))) {
1787                         virtqueue_notify(vq);
1788                         PMD_RX_LOG(DEBUG, "Notified");
1789                 }
1790         }
1791
1792         return nb_rx;
1793 }
1794
1795 uint16_t
1796 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1797                         struct rte_mbuf **rx_pkts,
1798                         uint16_t nb_pkts)
1799 {
1800         struct virtnet_rx *rxvq = rx_queue;
1801         struct virtqueue *vq = rxvq->vq;
1802         struct virtio_hw *hw = vq->hw;
1803         struct rte_mbuf *rxm;
1804         struct rte_mbuf *prev = NULL;
1805         uint16_t num, nb_rx = 0;
1806         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1807         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1808         uint32_t nb_enqueued = 0;
1809         uint32_t seg_num = 0;
1810         uint32_t seg_res = 0;
1811         uint32_t hdr_size = hw->vtnet_hdr_size;
1812         int32_t i;
1813         int error;
1814
1815         if (unlikely(hw->started == 0))
1816                 return nb_rx;
1817
1818
1819         num = nb_pkts;
1820         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1821                 num = VIRTIO_MBUF_BURST_SZ;
1822         if (likely(num > DESC_PER_CACHELINE))
1823                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1824
1825         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1826
1827         for (i = 0; i < num; i++) {
1828                 struct virtio_net_hdr_mrg_rxbuf *header;
1829
1830                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1831                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1832
1833                 rxm = rcv_pkts[i];
1834
1835                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1836                         PMD_RX_LOG(ERR, "Packet drop");
1837                         nb_enqueued++;
1838                         virtio_discard_rxbuf(vq, rxm);
1839                         rxvq->stats.errors++;
1840                         continue;
1841                 }
1842
1843                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1844                           rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1845                 seg_num = header->num_buffers;
1846
1847                 if (seg_num == 0)
1848                         seg_num = 1;
1849
1850                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1851                 rxm->nb_segs = seg_num;
1852                 rxm->ol_flags = 0;
1853                 rxm->vlan_tci = 0;
1854                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1855                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1856
1857                 rxm->port = rxvq->port_id;
1858                 rx_pkts[nb_rx] = rxm;
1859                 prev = rxm;
1860
1861                 if (hw->has_rx_offload &&
1862                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1863                         virtio_discard_rxbuf(vq, rxm);
1864                         rxvq->stats.errors++;
1865                         continue;
1866                 }
1867
1868                 if (hw->vlan_strip)
1869                         rte_vlan_strip(rx_pkts[nb_rx]);
1870
1871                 seg_res = seg_num - 1;
1872
1873                 /* Merge remaining segments */
1874                 while (seg_res != 0 && i < (num - 1)) {
1875                         i++;
1876
1877                         rxm = rcv_pkts[i];
1878                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1879                         rxm->pkt_len = (uint32_t)(len[i]);
1880                         rxm->data_len = (uint16_t)(len[i]);
1881
1882                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1883                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1884
1885                         if (prev)
1886                                 prev->next = rxm;
1887
1888                         prev = rxm;
1889                         seg_res -= 1;
1890                 }
1891
1892                 if (!seg_res) {
1893                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1894                         nb_rx++;
1895                 }
1896         }
1897
1898         /* Last packet still need merge segments */
1899         while (seg_res != 0) {
1900                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1901                                         VIRTIO_MBUF_BURST_SZ);
1902                 if (likely(vq->vq_free_cnt >= rcv_cnt)) {
1903                         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1904                                         len, rcv_cnt);
1905                         uint16_t extra_idx = 0;
1906
1907                         rcv_cnt = num;
1908
1909                         while (extra_idx < rcv_cnt) {
1910                                 rxm = rcv_pkts[extra_idx];
1911
1912                                 rxm->data_off =
1913                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1914                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1915                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1916
1917                                 prev->next = rxm;
1918                                 prev = rxm;
1919                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1920                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1921                                 extra_idx += 1;
1922                         }
1923                         seg_res -= rcv_cnt;
1924                         if (!seg_res) {
1925                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1926                                 nb_rx++;
1927                         }
1928                 } else {
1929                         PMD_RX_LOG(ERR,
1930                                         "No enough segments for packet.");
1931                         if (prev)
1932                                 virtio_discard_rxbuf(vq, prev);
1933                         rxvq->stats.errors++;
1934                         break;
1935                 }
1936         }
1937
1938         rxvq->stats.packets += nb_rx;
1939
1940         /* Allocate new mbuf for the used descriptor */
1941         if (likely(!virtqueue_full(vq))) {
1942                 /* free_cnt may include mrg descs */
1943                 uint16_t free_cnt = vq->vq_free_cnt;
1944                 struct rte_mbuf *new_pkts[free_cnt];
1945
1946                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1947                         error = virtqueue_enqueue_recv_refill_packed(vq,
1948                                         new_pkts, free_cnt);
1949                         if (unlikely(error)) {
1950                                 for (i = 0; i < free_cnt; i++)
1951                                         rte_pktmbuf_free(new_pkts[i]);
1952                         }
1953                         nb_enqueued += free_cnt;
1954                 } else {
1955                         struct rte_eth_dev *dev =
1956                                 &rte_eth_devices[rxvq->port_id];
1957                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1958                 }
1959         }
1960
1961         if (likely(nb_enqueued)) {
1962                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1963                         virtqueue_notify(vq);
1964                         PMD_RX_LOG(DEBUG, "Notified");
1965                 }
1966         }
1967
1968         return nb_rx;
1969 }
1970
1971 uint16_t
1972 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1973                         uint16_t nb_pkts)
1974 {
1975         struct virtnet_tx *txvq = tx_queue;
1976         struct virtqueue *vq = txvq->vq;
1977         struct virtio_hw *hw = vq->hw;
1978         uint16_t hdr_size = hw->vtnet_hdr_size;
1979         uint16_t nb_tx = 0;
1980         bool in_order = hw->use_inorder_tx;
1981         int error;
1982
1983         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1984                 return nb_tx;
1985
1986         if (unlikely(nb_pkts < 1))
1987                 return nb_pkts;
1988
1989         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1990
1991         if (nb_pkts > vq->vq_free_cnt)
1992                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
1993                                            in_order);
1994
1995         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1996                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1997                 int can_push = 0, slots, need;
1998
1999                 /* Do VLAN tag insertion */
2000                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2001                         error = rte_vlan_insert(&txm);
2002                         if (unlikely(error)) {
2003                                 rte_pktmbuf_free(txm);
2004                                 continue;
2005                         }
2006                 }
2007
2008                 /* optimize ring usage */
2009                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2010                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2011                     rte_mbuf_refcnt_read(txm) == 1 &&
2012                     RTE_MBUF_DIRECT(txm) &&
2013                     txm->nb_segs == 1 &&
2014                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2015                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2016                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2017                         can_push = 1;
2018
2019                 /* How many main ring entries are needed to this Tx?
2020                  * any_layout => number of segments
2021                  * default    => number of segments + 1
2022                  */
2023                 slots = txm->nb_segs + !can_push;
2024                 need = slots - vq->vq_free_cnt;
2025
2026                 /* Positive value indicates it need free vring descriptors */
2027                 if (unlikely(need > 0)) {
2028                         virtio_xmit_cleanup_packed(vq, need, in_order);
2029                         need = slots - vq->vq_free_cnt;
2030                         if (unlikely(need > 0)) {
2031                                 PMD_TX_LOG(ERR,
2032                                            "No free tx descriptors to transmit");
2033                                 break;
2034                         }
2035                 }
2036
2037                 /* Enqueue Packet buffers */
2038                 if (can_push)
2039                         virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
2040                 else
2041                         virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
2042                                                       in_order);
2043
2044                 virtio_update_packet_stats(&txvq->stats, txm);
2045         }
2046
2047         txvq->stats.packets += nb_tx;
2048
2049         if (likely(nb_tx)) {
2050                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
2051                         virtqueue_notify(vq);
2052                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2053                 }
2054         }
2055
2056         return nb_tx;
2057 }
2058
2059 uint16_t
2060 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2061 {
2062         struct virtnet_tx *txvq = tx_queue;
2063         struct virtqueue *vq = txvq->vq;
2064         struct virtio_hw *hw = vq->hw;
2065         uint16_t hdr_size = hw->vtnet_hdr_size;
2066         uint16_t nb_used, nb_tx = 0;
2067         int error;
2068
2069         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2070                 return nb_tx;
2071
2072         if (unlikely(nb_pkts < 1))
2073                 return nb_pkts;
2074
2075         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2076         nb_used = VIRTQUEUE_NUSED(vq);
2077
2078         virtio_rmb(hw->weak_barriers);
2079         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2080                 virtio_xmit_cleanup(vq, nb_used);
2081
2082         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2083                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2084                 int can_push = 0, use_indirect = 0, slots, need;
2085
2086                 /* Do VLAN tag insertion */
2087                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2088                         error = rte_vlan_insert(&txm);
2089                         if (unlikely(error)) {
2090                                 rte_pktmbuf_free(txm);
2091                                 continue;
2092                         }
2093                 }
2094
2095                 /* optimize ring usage */
2096                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2097                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2098                     rte_mbuf_refcnt_read(txm) == 1 &&
2099                     RTE_MBUF_DIRECT(txm) &&
2100                     txm->nb_segs == 1 &&
2101                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2102                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2103                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2104                         can_push = 1;
2105                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2106                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2107                         use_indirect = 1;
2108
2109                 /* How many main ring entries are needed to this Tx?
2110                  * any_layout => number of segments
2111                  * indirect   => 1
2112                  * default    => number of segments + 1
2113                  */
2114                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2115                 need = slots - vq->vq_free_cnt;
2116
2117                 /* Positive value indicates it need free vring descriptors */
2118                 if (unlikely(need > 0)) {
2119                         nb_used = VIRTQUEUE_NUSED(vq);
2120                         virtio_rmb(hw->weak_barriers);
2121                         need = RTE_MIN(need, (int)nb_used);
2122
2123                         virtio_xmit_cleanup(vq, need);
2124                         need = slots - vq->vq_free_cnt;
2125                         if (unlikely(need > 0)) {
2126                                 PMD_TX_LOG(ERR,
2127                                            "No free tx descriptors to transmit");
2128                                 break;
2129                         }
2130                 }
2131
2132                 /* Enqueue Packet buffers */
2133                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2134                         can_push, 0);
2135
2136                 virtio_update_packet_stats(&txvq->stats, txm);
2137         }
2138
2139         txvq->stats.packets += nb_tx;
2140
2141         if (likely(nb_tx)) {
2142                 vq_update_avail_idx(vq);
2143
2144                 if (unlikely(virtqueue_kick_prepare(vq))) {
2145                         virtqueue_notify(vq);
2146                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2147                 }
2148         }
2149
2150         return nb_tx;
2151 }
2152
2153 uint16_t
2154 virtio_xmit_pkts_inorder(void *tx_queue,
2155                         struct rte_mbuf **tx_pkts,
2156                         uint16_t nb_pkts)
2157 {
2158         struct virtnet_tx *txvq = tx_queue;
2159         struct virtqueue *vq = txvq->vq;
2160         struct virtio_hw *hw = vq->hw;
2161         uint16_t hdr_size = hw->vtnet_hdr_size;
2162         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
2163         struct rte_mbuf *inorder_pkts[nb_pkts];
2164         int error;
2165
2166         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2167                 return nb_tx;
2168
2169         if (unlikely(nb_pkts < 1))
2170                 return nb_pkts;
2171
2172         VIRTQUEUE_DUMP(vq);
2173         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2174         nb_used = VIRTQUEUE_NUSED(vq);
2175
2176         virtio_rmb(hw->weak_barriers);
2177         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2178                 virtio_xmit_cleanup_inorder(vq, nb_used);
2179
2180         if (unlikely(!vq->vq_free_cnt))
2181                 virtio_xmit_cleanup_inorder(vq, nb_used);
2182
2183         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
2184
2185         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
2186                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2187                 int slots, need;
2188
2189                 /* Do VLAN tag insertion */
2190                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2191                         error = rte_vlan_insert(&txm);
2192                         if (unlikely(error)) {
2193                                 rte_pktmbuf_free(txm);
2194                                 continue;
2195                         }
2196                 }
2197
2198                 /* optimize ring usage */
2199                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2200                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2201                      rte_mbuf_refcnt_read(txm) == 1 &&
2202                      RTE_MBUF_DIRECT(txm) &&
2203                      txm->nb_segs == 1 &&
2204                      rte_pktmbuf_headroom(txm) >= hdr_size &&
2205                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2206                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2207                         inorder_pkts[nb_inorder_pkts] = txm;
2208                         nb_inorder_pkts++;
2209
2210                         virtio_update_packet_stats(&txvq->stats, txm);
2211                         continue;
2212                 }
2213
2214                 if (nb_inorder_pkts) {
2215                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2216                                                         nb_inorder_pkts);
2217                         nb_inorder_pkts = 0;
2218                 }
2219
2220                 slots = txm->nb_segs + 1;
2221                 need = slots - vq->vq_free_cnt;
2222                 if (unlikely(need > 0)) {
2223                         nb_used = VIRTQUEUE_NUSED(vq);
2224                         virtio_rmb(hw->weak_barriers);
2225                         need = RTE_MIN(need, (int)nb_used);
2226
2227                         virtio_xmit_cleanup_inorder(vq, need);
2228
2229                         need = slots - vq->vq_free_cnt;
2230
2231                         if (unlikely(need > 0)) {
2232                                 PMD_TX_LOG(ERR,
2233                                         "No free tx descriptors to transmit");
2234                                 break;
2235                         }
2236                 }
2237                 /* Enqueue Packet buffers */
2238                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2239
2240                 virtio_update_packet_stats(&txvq->stats, txm);
2241         }
2242
2243         /* Transmit all inorder packets */
2244         if (nb_inorder_pkts)
2245                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2246                                                 nb_inorder_pkts);
2247
2248         txvq->stats.packets += nb_tx;
2249
2250         if (likely(nb_tx)) {
2251                 vq_update_avail_idx(vq);
2252
2253                 if (unlikely(virtqueue_kick_prepare(vq))) {
2254                         virtqueue_notify(vq);
2255                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2256                 }
2257         }
2258
2259         VIRTQUEUE_DUMP(vq);
2260
2261         return nb_tx;
2262 }