net/virtio: fix in-order Tx path for packed ring
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45         struct virtnet_rx *rxvq = rxq;
46         struct virtqueue *vq = rxvq->vq;
47
48         return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54         vq->vq_free_cnt += num;
55         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61         struct vring_desc *dp, *dp_tail;
62         struct vq_desc_extra *dxp;
63         uint16_t desc_idx_last = desc_idx;
64
65         dp  = &vq->vq_ring.desc[desc_idx];
66         dxp = &vq->vq_descx[desc_idx];
67         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69                 while (dp->flags & VRING_DESC_F_NEXT) {
70                         desc_idx_last = dp->next;
71                         dp = &vq->vq_ring.desc[dp->next];
72                 }
73         }
74         dxp->ndescs = 0;
75
76         /*
77          * We must append the existing free chain, if any, to the end of
78          * newly freed chain. If the virtqueue was completely used, then
79          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80          */
81         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82                 vq->vq_desc_head_idx = desc_idx;
83         } else {
84                 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
85                 dp_tail->next = desc_idx;
86         }
87
88         vq->vq_desc_tail_idx = desc_idx_last;
89         dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95         struct vq_desc_extra *dxp;
96
97         dxp = &vq->vq_descx[id];
98         vq->vq_free_cnt += dxp->ndescs;
99
100         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101                 vq->vq_desc_head_idx = id;
102         else
103                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104
105         vq->vq_desc_tail_idx = id;
106         dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108
109 static uint16_t
110 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
111                                   struct rte_mbuf **rx_pkts,
112                                   uint32_t *len,
113                                   uint16_t num)
114 {
115         struct rte_mbuf *cookie;
116         uint16_t used_idx;
117         uint16_t id;
118         struct vring_packed_desc *desc;
119         uint16_t i;
120
121         desc = vq->ring_packed.desc_packed;
122
123         for (i = 0; i < num; i++) {
124                 used_idx = vq->vq_used_cons_idx;
125                 if (!desc_is_used(&desc[used_idx], vq))
126                         return i;
127                 virtio_rmb(vq->hw->weak_barriers);
128                 len[i] = desc[used_idx].len;
129                 id = desc[used_idx].id;
130                 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
131                 if (unlikely(cookie == NULL)) {
132                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
133                                 vq->vq_used_cons_idx);
134                         break;
135                 }
136                 rte_prefetch0(cookie);
137                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
138                 rx_pkts[i] = cookie;
139
140                 vq->vq_free_cnt++;
141                 vq->vq_used_cons_idx++;
142                 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
143                         vq->vq_used_cons_idx -= vq->vq_nentries;
144                         vq->used_wrap_counter ^= 1;
145                 }
146         }
147
148         return i;
149 }
150
151 static uint16_t
152 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
153                            uint32_t *len, uint16_t num)
154 {
155         struct vring_used_elem *uep;
156         struct rte_mbuf *cookie;
157         uint16_t used_idx, desc_idx;
158         uint16_t i;
159
160         /*  Caller does the check */
161         for (i = 0; i < num ; i++) {
162                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
163                 uep = &vq->vq_ring.used->ring[used_idx];
164                 desc_idx = (uint16_t) uep->id;
165                 len[i] = uep->len;
166                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
167
168                 if (unlikely(cookie == NULL)) {
169                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
170                                 vq->vq_used_cons_idx);
171                         break;
172                 }
173
174                 rte_prefetch0(cookie);
175                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
176                 rx_pkts[i]  = cookie;
177                 vq->vq_used_cons_idx++;
178                 vq_ring_free_chain(vq, desc_idx);
179                 vq->vq_descx[desc_idx].cookie = NULL;
180         }
181
182         return i;
183 }
184
185 static uint16_t
186 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
187                         struct rte_mbuf **rx_pkts,
188                         uint32_t *len,
189                         uint16_t num)
190 {
191         struct vring_used_elem *uep;
192         struct rte_mbuf *cookie;
193         uint16_t used_idx = 0;
194         uint16_t i;
195
196         if (unlikely(num == 0))
197                 return 0;
198
199         for (i = 0; i < num; i++) {
200                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
201                 /* Desc idx same as used idx */
202                 uep = &vq->vq_ring.used->ring[used_idx];
203                 len[i] = uep->len;
204                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
205
206                 if (unlikely(cookie == NULL)) {
207                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
208                                 vq->vq_used_cons_idx);
209                         break;
210                 }
211
212                 rte_prefetch0(cookie);
213                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
214                 rx_pkts[i]  = cookie;
215                 vq->vq_used_cons_idx++;
216                 vq->vq_descx[used_idx].cookie = NULL;
217         }
218
219         vq_ring_free_inorder(vq, used_idx, i);
220         return i;
221 }
222
223 #ifndef DEFAULT_TX_FREE_THRESH
224 #define DEFAULT_TX_FREE_THRESH 32
225 #endif
226
227 static void
228 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
229 {
230         uint16_t used_idx, id, curr_id, free_cnt = 0;
231         uint16_t size = vq->vq_nentries;
232         struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
233         struct vq_desc_extra *dxp;
234
235         used_idx = vq->vq_used_cons_idx;
236         while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
237                 virtio_rmb(vq->hw->weak_barriers);
238                 id = desc[used_idx].id;
239                 do {
240                         curr_id = used_idx;
241                         dxp = &vq->vq_descx[used_idx];
242                         used_idx += dxp->ndescs;
243                         free_cnt += dxp->ndescs;
244                         num -= dxp->ndescs;
245                         if (used_idx >= size) {
246                                 used_idx -= size;
247                                 vq->used_wrap_counter ^= 1;
248                         }
249                         if (dxp->cookie != NULL) {
250                                 rte_pktmbuf_free(dxp->cookie);
251                                 dxp->cookie = NULL;
252                         }
253                 } while (curr_id != id);
254         }
255         vq->vq_used_cons_idx = used_idx;
256         vq->vq_free_cnt += free_cnt;
257 }
258
259 static void
260 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
261 {
262         uint16_t used_idx, id;
263         uint16_t size = vq->vq_nentries;
264         struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
265         struct vq_desc_extra *dxp;
266
267         used_idx = vq->vq_used_cons_idx;
268         while (num-- && desc_is_used(&desc[used_idx], vq)) {
269                 virtio_rmb(vq->hw->weak_barriers);
270                 id = desc[used_idx].id;
271                 dxp = &vq->vq_descx[id];
272                 vq->vq_used_cons_idx += dxp->ndescs;
273                 if (vq->vq_used_cons_idx >= size) {
274                         vq->vq_used_cons_idx -= size;
275                         vq->used_wrap_counter ^= 1;
276                 }
277                 vq_ring_free_id_packed(vq, id);
278                 if (dxp->cookie != NULL) {
279                         rte_pktmbuf_free(dxp->cookie);
280                         dxp->cookie = NULL;
281                 }
282                 used_idx = vq->vq_used_cons_idx;
283         }
284 }
285
286 /* Cleanup from completed transmits. */
287 static inline void
288 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
289 {
290         if (in_order)
291                 virtio_xmit_cleanup_inorder_packed(vq, num);
292         else
293                 virtio_xmit_cleanup_normal_packed(vq, num);
294 }
295
296 static void
297 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
298 {
299         uint16_t i, used_idx, desc_idx;
300         for (i = 0; i < num; i++) {
301                 struct vring_used_elem *uep;
302                 struct vq_desc_extra *dxp;
303
304                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
305                 uep = &vq->vq_ring.used->ring[used_idx];
306
307                 desc_idx = (uint16_t) uep->id;
308                 dxp = &vq->vq_descx[desc_idx];
309                 vq->vq_used_cons_idx++;
310                 vq_ring_free_chain(vq, desc_idx);
311
312                 if (dxp->cookie != NULL) {
313                         rte_pktmbuf_free(dxp->cookie);
314                         dxp->cookie = NULL;
315                 }
316         }
317 }
318
319 /* Cleanup from completed inorder transmits. */
320 static void
321 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
322 {
323         uint16_t i, idx = vq->vq_used_cons_idx;
324         int16_t free_cnt = 0;
325         struct vq_desc_extra *dxp = NULL;
326
327         if (unlikely(num == 0))
328                 return;
329
330         for (i = 0; i < num; i++) {
331                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
332                 free_cnt += dxp->ndescs;
333                 if (dxp->cookie != NULL) {
334                         rte_pktmbuf_free(dxp->cookie);
335                         dxp->cookie = NULL;
336                 }
337         }
338
339         vq->vq_free_cnt += free_cnt;
340         vq->vq_used_cons_idx = idx;
341 }
342
343 static inline int
344 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
345                         struct rte_mbuf **cookies,
346                         uint16_t num)
347 {
348         struct vq_desc_extra *dxp;
349         struct virtio_hw *hw = vq->hw;
350         struct vring_desc *start_dp;
351         uint16_t head_idx, idx, i = 0;
352
353         if (unlikely(vq->vq_free_cnt == 0))
354                 return -ENOSPC;
355         if (unlikely(vq->vq_free_cnt < num))
356                 return -EMSGSIZE;
357
358         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
359         start_dp = vq->vq_ring.desc;
360
361         while (i < num) {
362                 idx = head_idx & (vq->vq_nentries - 1);
363                 dxp = &vq->vq_descx[idx];
364                 dxp->cookie = (void *)cookies[i];
365                 dxp->ndescs = 1;
366
367                 start_dp[idx].addr =
368                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
369                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
370                 start_dp[idx].len =
371                                 cookies[i]->buf_len -
372                                 RTE_PKTMBUF_HEADROOM +
373                                 hw->vtnet_hdr_size;
374                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
375
376                 vq_update_avail_ring(vq, idx);
377                 head_idx++;
378                 i++;
379         }
380
381         vq->vq_desc_head_idx += num;
382         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
383         return 0;
384 }
385
386 static inline int
387 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
388                                 uint16_t num)
389 {
390         struct vq_desc_extra *dxp;
391         struct virtio_hw *hw = vq->hw;
392         struct vring_desc *start_dp = vq->vq_ring.desc;
393         uint16_t idx, i;
394
395         if (unlikely(vq->vq_free_cnt == 0))
396                 return -ENOSPC;
397         if (unlikely(vq->vq_free_cnt < num))
398                 return -EMSGSIZE;
399
400         if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
401                 return -EFAULT;
402
403         for (i = 0; i < num; i++) {
404                 idx = vq->vq_desc_head_idx;
405                 dxp = &vq->vq_descx[idx];
406                 dxp->cookie = (void *)cookie[i];
407                 dxp->ndescs = 1;
408
409                 start_dp[idx].addr =
410                         VIRTIO_MBUF_ADDR(cookie[i], vq) +
411                         RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
412                 start_dp[idx].len =
413                         cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
414                         hw->vtnet_hdr_size;
415                 start_dp[idx].flags = VRING_DESC_F_WRITE;
416                 vq->vq_desc_head_idx = start_dp[idx].next;
417                 vq_update_avail_ring(vq, idx);
418                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
419                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
420                         break;
421                 }
422         }
423
424         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
425
426         return 0;
427 }
428
429 static inline int
430 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
431                                      struct rte_mbuf **cookie, uint16_t num)
432 {
433         struct vring_packed_desc *start_dp = vq->ring_packed.desc_packed;
434         uint16_t flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
435         struct virtio_hw *hw = vq->hw;
436         struct vq_desc_extra *dxp;
437         uint16_t idx;
438         int i;
439
440         if (unlikely(vq->vq_free_cnt == 0))
441                 return -ENOSPC;
442         if (unlikely(vq->vq_free_cnt < num))
443                 return -EMSGSIZE;
444
445         for (i = 0; i < num; i++) {
446                 idx = vq->vq_avail_idx;
447                 dxp = &vq->vq_descx[idx];
448                 dxp->cookie = (void *)cookie[i];
449                 dxp->ndescs = 1;
450
451                 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
452                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
453                 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
454                                         + hw->vtnet_hdr_size;
455
456                 vq->vq_desc_head_idx = dxp->next;
457                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
458                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
459                 virtio_wmb(hw->weak_barriers);
460                 start_dp[idx].flags = flags;
461                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
462                         vq->vq_avail_idx -= vq->vq_nentries;
463                         vq->avail_wrap_counter ^= 1;
464                         vq->avail_used_flags =
465                                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
466                                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
467                         flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
468                 }
469         }
470         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
471         return 0;
472 }
473
474 /* When doing TSO, the IP length is not included in the pseudo header
475  * checksum of the packet given to the PMD, but for virtio it is
476  * expected.
477  */
478 static void
479 virtio_tso_fix_cksum(struct rte_mbuf *m)
480 {
481         /* common case: header is not fragmented */
482         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
483                         m->l4_len)) {
484                 struct ipv4_hdr *iph;
485                 struct ipv6_hdr *ip6h;
486                 struct tcp_hdr *th;
487                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
488                 uint32_t tmp;
489
490                 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
491                 th = RTE_PTR_ADD(iph, m->l3_len);
492                 if ((iph->version_ihl >> 4) == 4) {
493                         iph->hdr_checksum = 0;
494                         iph->hdr_checksum = rte_ipv4_cksum(iph);
495                         ip_len = iph->total_length;
496                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
497                                 m->l3_len);
498                 } else {
499                         ip6h = (struct ipv6_hdr *)iph;
500                         ip_paylen = ip6h->payload_len;
501                 }
502
503                 /* calculate the new phdr checksum not including ip_paylen */
504                 prev_cksum = th->cksum;
505                 tmp = prev_cksum;
506                 tmp += ip_paylen;
507                 tmp = (tmp & 0xffff) + (tmp >> 16);
508                 new_cksum = tmp;
509
510                 /* replace it in the packet */
511                 th->cksum = new_cksum;
512         }
513 }
514
515
516 /* avoid write operation when necessary, to lessen cache issues */
517 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
518         if ((var) != (val))                     \
519                 (var) = (val);                  \
520 } while (0)
521
522 static inline void
523 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
524                         struct rte_mbuf *cookie,
525                         bool offload)
526 {
527         if (offload) {
528                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
529                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
530
531                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
532                 case PKT_TX_UDP_CKSUM:
533                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
534                         hdr->csum_offset = offsetof(struct udp_hdr,
535                                 dgram_cksum);
536                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
537                         break;
538
539                 case PKT_TX_TCP_CKSUM:
540                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
541                         hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
542                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
543                         break;
544
545                 default:
546                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
547                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
548                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
549                         break;
550                 }
551
552                 /* TCP Segmentation Offload */
553                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
554                         virtio_tso_fix_cksum(cookie);
555                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
556                                 VIRTIO_NET_HDR_GSO_TCPV6 :
557                                 VIRTIO_NET_HDR_GSO_TCPV4;
558                         hdr->gso_size = cookie->tso_segsz;
559                         hdr->hdr_len =
560                                 cookie->l2_len +
561                                 cookie->l3_len +
562                                 cookie->l4_len;
563                 } else {
564                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
565                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
566                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
567                 }
568         }
569 }
570
571 static inline void
572 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
573                         struct rte_mbuf **cookies,
574                         uint16_t num)
575 {
576         struct vq_desc_extra *dxp;
577         struct virtqueue *vq = txvq->vq;
578         struct vring_desc *start_dp;
579         struct virtio_net_hdr *hdr;
580         uint16_t idx;
581         uint16_t head_size = vq->hw->vtnet_hdr_size;
582         uint16_t i = 0;
583
584         idx = vq->vq_desc_head_idx;
585         start_dp = vq->vq_ring.desc;
586
587         while (i < num) {
588                 idx = idx & (vq->vq_nentries - 1);
589                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
590                 dxp->cookie = (void *)cookies[i];
591                 dxp->ndescs = 1;
592
593                 hdr = (struct virtio_net_hdr *)
594                         rte_pktmbuf_prepend(cookies[i], head_size);
595                 cookies[i]->pkt_len -= head_size;
596
597                 /* if offload disabled, it is not zeroed below, do it now */
598                 if (!vq->hw->has_tx_offload) {
599                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
600                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
601                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
602                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
603                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
604                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
605                 }
606
607                 virtqueue_xmit_offload(hdr, cookies[i],
608                                 vq->hw->has_tx_offload);
609
610                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
611                 start_dp[idx].len   = cookies[i]->data_len;
612                 start_dp[idx].flags = 0;
613
614                 vq_update_avail_ring(vq, idx);
615
616                 idx++;
617                 i++;
618         };
619
620         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
621         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
622 }
623
624 static inline void
625 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
626                               uint16_t needed, int can_push, int in_order)
627 {
628         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
629         struct vq_desc_extra *dxp;
630         struct virtqueue *vq = txvq->vq;
631         struct vring_packed_desc *start_dp, *head_dp;
632         uint16_t idx, id, head_idx, head_flags;
633         uint16_t head_size = vq->hw->vtnet_hdr_size;
634         struct virtio_net_hdr *hdr;
635         uint16_t prev;
636
637         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
638
639         dxp = &vq->vq_descx[id];
640         dxp->ndescs = needed;
641         dxp->cookie = cookie;
642
643         head_idx = vq->vq_avail_idx;
644         idx = head_idx;
645         prev = head_idx;
646         start_dp = vq->ring_packed.desc_packed;
647
648         head_dp = &vq->ring_packed.desc_packed[idx];
649         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
650         head_flags |= vq->avail_used_flags;
651
652         if (can_push) {
653                 /* prepend cannot fail, checked by caller */
654                 hdr = (struct virtio_net_hdr *)
655                         rte_pktmbuf_prepend(cookie, head_size);
656                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
657                  * which is wrong. Below subtract restores correct pkt size.
658                  */
659                 cookie->pkt_len -= head_size;
660
661                 /* if offload disabled, it is not zeroed below, do it now */
662                 if (!vq->hw->has_tx_offload) {
663                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
664                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
665                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
666                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
667                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
668                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
669                 }
670         } else {
671                 /* setup first tx ring slot to point to header
672                  * stored in reserved region.
673                  */
674                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
675                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
676                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
677                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
678                 idx++;
679                 if (idx >= vq->vq_nentries) {
680                         idx -= vq->vq_nentries;
681                         vq->avail_wrap_counter ^= 1;
682                         vq->avail_used_flags =
683                                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
684                                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
685                 }
686         }
687
688         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
689
690         do {
691                 uint16_t flags;
692
693                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
694                 start_dp[idx].len  = cookie->data_len;
695                 if (likely(idx != head_idx)) {
696                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
697                         flags |= vq->avail_used_flags;
698                         start_dp[idx].flags = flags;
699                 }
700                 prev = idx;
701                 idx++;
702                 if (idx >= vq->vq_nentries) {
703                         idx -= vq->vq_nentries;
704                         vq->avail_wrap_counter ^= 1;
705                         vq->avail_used_flags =
706                                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
707                                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
708                 }
709         } while ((cookie = cookie->next) != NULL);
710
711         start_dp[prev].id = id;
712
713         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
714         vq->vq_avail_idx = idx;
715
716         if (!in_order) {
717                 vq->vq_desc_head_idx = dxp->next;
718                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
719                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
720         }
721
722         virtio_wmb(vq->hw->weak_barriers);
723         head_dp->flags = head_flags;
724 }
725
726 static inline void
727 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
728                         uint16_t needed, int use_indirect, int can_push,
729                         int in_order)
730 {
731         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
732         struct vq_desc_extra *dxp;
733         struct virtqueue *vq = txvq->vq;
734         struct vring_desc *start_dp;
735         uint16_t seg_num = cookie->nb_segs;
736         uint16_t head_idx, idx;
737         uint16_t head_size = vq->hw->vtnet_hdr_size;
738         struct virtio_net_hdr *hdr;
739
740         head_idx = vq->vq_desc_head_idx;
741         idx = head_idx;
742         if (in_order)
743                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
744         else
745                 dxp = &vq->vq_descx[idx];
746         dxp->cookie = (void *)cookie;
747         dxp->ndescs = needed;
748
749         start_dp = vq->vq_ring.desc;
750
751         if (can_push) {
752                 /* prepend cannot fail, checked by caller */
753                 hdr = (struct virtio_net_hdr *)
754                         rte_pktmbuf_prepend(cookie, head_size);
755                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
756                  * which is wrong. Below subtract restores correct pkt size.
757                  */
758                 cookie->pkt_len -= head_size;
759
760                 /* if offload disabled, it is not zeroed below, do it now */
761                 if (!vq->hw->has_tx_offload) {
762                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
763                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
764                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
765                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
766                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
767                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
768                 }
769         } else if (use_indirect) {
770                 /* setup tx ring slot to point to indirect
771                  * descriptor list stored in reserved region.
772                  *
773                  * the first slot in indirect ring is already preset
774                  * to point to the header in reserved region
775                  */
776                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
777                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
778                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
779                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
780                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
781
782                 /* loop below will fill in rest of the indirect elements */
783                 start_dp = txr[idx].tx_indir;
784                 idx = 1;
785         } else {
786                 /* setup first tx ring slot to point to header
787                  * stored in reserved region.
788                  */
789                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
790                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
791                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
792                 start_dp[idx].flags = VRING_DESC_F_NEXT;
793                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
794
795                 idx = start_dp[idx].next;
796         }
797
798         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
799
800         do {
801                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
802                 start_dp[idx].len   = cookie->data_len;
803                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
804                 idx = start_dp[idx].next;
805         } while ((cookie = cookie->next) != NULL);
806
807         if (use_indirect)
808                 idx = vq->vq_ring.desc[head_idx].next;
809
810         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
811
812         vq->vq_desc_head_idx = idx;
813         vq_update_avail_ring(vq, head_idx);
814
815         if (!in_order) {
816                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
817                         vq->vq_desc_tail_idx = idx;
818         }
819 }
820
821 void
822 virtio_dev_cq_start(struct rte_eth_dev *dev)
823 {
824         struct virtio_hw *hw = dev->data->dev_private;
825
826         if (hw->cvq && hw->cvq->vq) {
827                 rte_spinlock_init(&hw->cvq->lock);
828                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
829         }
830 }
831
832 int
833 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
834                         uint16_t queue_idx,
835                         uint16_t nb_desc,
836                         unsigned int socket_id __rte_unused,
837                         const struct rte_eth_rxconf *rx_conf __rte_unused,
838                         struct rte_mempool *mp)
839 {
840         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
841         struct virtio_hw *hw = dev->data->dev_private;
842         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
843         struct virtnet_rx *rxvq;
844
845         PMD_INIT_FUNC_TRACE();
846
847         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
848                 nb_desc = vq->vq_nentries;
849         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
850
851         rxvq = &vq->rxq;
852         rxvq->queue_id = queue_idx;
853         rxvq->mpool = mp;
854         if (rxvq->mpool == NULL) {
855                 rte_exit(EXIT_FAILURE,
856                         "Cannot allocate mbufs for rx virtqueue");
857         }
858
859         dev->data->rx_queues[queue_idx] = rxvq;
860
861         return 0;
862 }
863
864 int
865 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
866 {
867         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
868         struct virtio_hw *hw = dev->data->dev_private;
869         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
870         struct virtnet_rx *rxvq = &vq->rxq;
871         struct rte_mbuf *m;
872         uint16_t desc_idx;
873         int error, nbufs, i;
874
875         PMD_INIT_FUNC_TRACE();
876
877         /* Allocate blank mbufs for the each rx descriptor */
878         nbufs = 0;
879
880         if (hw->use_simple_rx) {
881                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
882                      desc_idx++) {
883                         vq->vq_ring.avail->ring[desc_idx] = desc_idx;
884                         vq->vq_ring.desc[desc_idx].flags =
885                                 VRING_DESC_F_WRITE;
886                 }
887
888                 virtio_rxq_vec_setup(rxvq);
889         }
890
891         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
892         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
893              desc_idx++) {
894                 vq->sw_ring[vq->vq_nentries + desc_idx] =
895                         &rxvq->fake_mbuf;
896         }
897
898         if (hw->use_simple_rx) {
899                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
900                         virtio_rxq_rearm_vec(rxvq);
901                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
902                 }
903         } else if (hw->use_inorder_rx) {
904                 if ((!virtqueue_full(vq))) {
905                         uint16_t free_cnt = vq->vq_free_cnt;
906                         struct rte_mbuf *pkts[free_cnt];
907
908                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
909                                 free_cnt)) {
910                                 error = virtqueue_enqueue_refill_inorder(vq,
911                                                 pkts,
912                                                 free_cnt);
913                                 if (unlikely(error)) {
914                                         for (i = 0; i < free_cnt; i++)
915                                                 rte_pktmbuf_free(pkts[i]);
916                                 }
917                         }
918
919                         nbufs += free_cnt;
920                         vq_update_avail_idx(vq);
921                 }
922         } else {
923                 while (!virtqueue_full(vq)) {
924                         m = rte_mbuf_raw_alloc(rxvq->mpool);
925                         if (m == NULL)
926                                 break;
927
928                         /* Enqueue allocated buffers */
929                         if (vtpci_packed_queue(vq->hw))
930                                 error = virtqueue_enqueue_recv_refill_packed(vq,
931                                                 &m, 1);
932                         else
933                                 error = virtqueue_enqueue_recv_refill(vq,
934                                                 &m, 1);
935                         if (error) {
936                                 rte_pktmbuf_free(m);
937                                 break;
938                         }
939                         nbufs++;
940                 }
941
942                 if (!vtpci_packed_queue(vq->hw))
943                         vq_update_avail_idx(vq);
944         }
945
946         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
947
948         VIRTQUEUE_DUMP(vq);
949
950         return 0;
951 }
952
953 /*
954  * struct rte_eth_dev *dev: Used to update dev
955  * uint16_t nb_desc: Defaults to values read from config space
956  * unsigned int socket_id: Used to allocate memzone
957  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
958  * uint16_t queue_idx: Just used as an index in dev txq list
959  */
960 int
961 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
962                         uint16_t queue_idx,
963                         uint16_t nb_desc,
964                         unsigned int socket_id __rte_unused,
965                         const struct rte_eth_txconf *tx_conf)
966 {
967         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
968         struct virtio_hw *hw = dev->data->dev_private;
969         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
970         struct virtnet_tx *txvq;
971         uint16_t tx_free_thresh;
972
973         PMD_INIT_FUNC_TRACE();
974
975         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
976                 nb_desc = vq->vq_nentries;
977         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
978
979         txvq = &vq->txq;
980         txvq->queue_id = queue_idx;
981
982         tx_free_thresh = tx_conf->tx_free_thresh;
983         if (tx_free_thresh == 0)
984                 tx_free_thresh =
985                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
986
987         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
988                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
989                         "number of TX entries minus 3 (%u)."
990                         " (tx_free_thresh=%u port=%u queue=%u)\n",
991                         vq->vq_nentries - 3,
992                         tx_free_thresh, dev->data->port_id, queue_idx);
993                 return -EINVAL;
994         }
995
996         vq->vq_free_thresh = tx_free_thresh;
997
998         dev->data->tx_queues[queue_idx] = txvq;
999         return 0;
1000 }
1001
1002 int
1003 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
1004                                 uint16_t queue_idx)
1005 {
1006         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1007         struct virtio_hw *hw = dev->data->dev_private;
1008         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1009
1010         PMD_INIT_FUNC_TRACE();
1011
1012         if (!vtpci_packed_queue(hw)) {
1013                 if (hw->use_inorder_tx)
1014                         vq->vq_ring.desc[vq->vq_nentries - 1].next = 0;
1015         }
1016
1017         VIRTQUEUE_DUMP(vq);
1018
1019         return 0;
1020 }
1021
1022 static inline void
1023 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1024 {
1025         int error;
1026         /*
1027          * Requeue the discarded mbuf. This should always be
1028          * successful since it was just dequeued.
1029          */
1030         if (vtpci_packed_queue(vq->hw))
1031                 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1032         else
1033                 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1034
1035         if (unlikely(error)) {
1036                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1037                 rte_pktmbuf_free(m);
1038         }
1039 }
1040
1041 static inline void
1042 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1043 {
1044         int error;
1045
1046         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1047         if (unlikely(error)) {
1048                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1049                 rte_pktmbuf_free(m);
1050         }
1051 }
1052
1053 static inline void
1054 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
1055 {
1056         uint32_t s = mbuf->pkt_len;
1057         struct ether_addr *ea;
1058
1059         stats->bytes += s;
1060
1061         if (s == 64) {
1062                 stats->size_bins[1]++;
1063         } else if (s > 64 && s < 1024) {
1064                 uint32_t bin;
1065
1066                 /* count zeros, and offset into correct bin */
1067                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
1068                 stats->size_bins[bin]++;
1069         } else {
1070                 if (s < 64)
1071                         stats->size_bins[0]++;
1072                 else if (s < 1519)
1073                         stats->size_bins[6]++;
1074                 else if (s >= 1519)
1075                         stats->size_bins[7]++;
1076         }
1077
1078         ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
1079         if (is_multicast_ether_addr(ea)) {
1080                 if (is_broadcast_ether_addr(ea))
1081                         stats->broadcast++;
1082                 else
1083                         stats->multicast++;
1084         }
1085 }
1086
1087 static inline void
1088 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
1089 {
1090         VIRTIO_DUMP_PACKET(m, m->data_len);
1091
1092         virtio_update_packet_stats(&rxvq->stats, m);
1093 }
1094
1095 /* Optionally fill offload information in structure */
1096 static inline int
1097 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1098 {
1099         struct rte_net_hdr_lens hdr_lens;
1100         uint32_t hdrlen, ptype;
1101         int l4_supported = 0;
1102
1103         /* nothing to do */
1104         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1105                 return 0;
1106
1107         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1108
1109         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1110         m->packet_type = ptype;
1111         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1112             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1113             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1114                 l4_supported = 1;
1115
1116         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1117                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1118                 if (hdr->csum_start <= hdrlen && l4_supported) {
1119                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1120                 } else {
1121                         /* Unknown proto or tunnel, do sw cksum. We can assume
1122                          * the cksum field is in the first segment since the
1123                          * buffers we provided to the host are large enough.
1124                          * In case of SCTP, this will be wrong since it's a CRC
1125                          * but there's nothing we can do.
1126                          */
1127                         uint16_t csum = 0, off;
1128
1129                         rte_raw_cksum_mbuf(m, hdr->csum_start,
1130                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1131                                 &csum);
1132                         if (likely(csum != 0xffff))
1133                                 csum = ~csum;
1134                         off = hdr->csum_offset + hdr->csum_start;
1135                         if (rte_pktmbuf_data_len(m) >= off + 1)
1136                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1137                                         off) = csum;
1138                 }
1139         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1140                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1141         }
1142
1143         /* GSO request, save required information in mbuf */
1144         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1145                 /* Check unsupported modes */
1146                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1147                     (hdr->gso_size == 0)) {
1148                         return -EINVAL;
1149                 }
1150
1151                 /* Update mss lengthes in mbuf */
1152                 m->tso_segsz = hdr->gso_size;
1153                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1154                         case VIRTIO_NET_HDR_GSO_TCPV4:
1155                         case VIRTIO_NET_HDR_GSO_TCPV6:
1156                                 m->ol_flags |= PKT_RX_LRO | \
1157                                         PKT_RX_L4_CKSUM_NONE;
1158                                 break;
1159                         default:
1160                                 return -EINVAL;
1161                 }
1162         }
1163
1164         return 0;
1165 }
1166
1167 #define VIRTIO_MBUF_BURST_SZ 64
1168 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1169 uint16_t
1170 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1171 {
1172         struct virtnet_rx *rxvq = rx_queue;
1173         struct virtqueue *vq = rxvq->vq;
1174         struct virtio_hw *hw = vq->hw;
1175         struct rte_mbuf *rxm, *new_mbuf;
1176         uint16_t nb_used, num, nb_rx;
1177         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1178         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1179         int error;
1180         uint32_t i, nb_enqueued;
1181         uint32_t hdr_size;
1182         struct virtio_net_hdr *hdr;
1183
1184         nb_rx = 0;
1185         if (unlikely(hw->started == 0))
1186                 return nb_rx;
1187
1188         nb_used = VIRTQUEUE_NUSED(vq);
1189
1190         virtio_rmb(hw->weak_barriers);
1191
1192         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1193         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1194                 num = VIRTIO_MBUF_BURST_SZ;
1195         if (likely(num > DESC_PER_CACHELINE))
1196                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1197
1198         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1199         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1200
1201         nb_enqueued = 0;
1202         hdr_size = hw->vtnet_hdr_size;
1203
1204         for (i = 0; i < num ; i++) {
1205                 rxm = rcv_pkts[i];
1206
1207                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1208
1209                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1210                         PMD_RX_LOG(ERR, "Packet drop");
1211                         nb_enqueued++;
1212                         virtio_discard_rxbuf(vq, rxm);
1213                         rxvq->stats.errors++;
1214                         continue;
1215                 }
1216
1217                 rxm->port = rxvq->port_id;
1218                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1219                 rxm->ol_flags = 0;
1220                 rxm->vlan_tci = 0;
1221
1222                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1223                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1224
1225                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1226                         RTE_PKTMBUF_HEADROOM - hdr_size);
1227
1228                 if (hw->vlan_strip)
1229                         rte_vlan_strip(rxm);
1230
1231                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1232                         virtio_discard_rxbuf(vq, rxm);
1233                         rxvq->stats.errors++;
1234                         continue;
1235                 }
1236
1237                 virtio_rx_stats_updated(rxvq, rxm);
1238
1239                 rx_pkts[nb_rx++] = rxm;
1240         }
1241
1242         rxvq->stats.packets += nb_rx;
1243
1244         /* Allocate new mbuf for the used descriptor */
1245         while (likely(!virtqueue_full(vq))) {
1246                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1247                 if (unlikely(new_mbuf == NULL)) {
1248                         struct rte_eth_dev *dev
1249                                 = &rte_eth_devices[rxvq->port_id];
1250                         dev->data->rx_mbuf_alloc_failed++;
1251                         break;
1252                 }
1253                 error = virtqueue_enqueue_recv_refill(vq, &new_mbuf, 1);
1254                 if (unlikely(error)) {
1255                         rte_pktmbuf_free(new_mbuf);
1256                         break;
1257                 }
1258                 nb_enqueued++;
1259         }
1260
1261         if (likely(nb_enqueued)) {
1262                 vq_update_avail_idx(vq);
1263
1264                 if (unlikely(virtqueue_kick_prepare(vq))) {
1265                         virtqueue_notify(vq);
1266                         PMD_RX_LOG(DEBUG, "Notified");
1267                 }
1268         }
1269
1270         return nb_rx;
1271 }
1272
1273 uint16_t
1274 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1275                         uint16_t nb_pkts)
1276 {
1277         struct virtnet_rx *rxvq = rx_queue;
1278         struct virtqueue *vq = rxvq->vq;
1279         struct virtio_hw *hw = vq->hw;
1280         struct rte_mbuf *rxm, *new_mbuf;
1281         uint16_t num, nb_rx;
1282         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1283         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1284         int error;
1285         uint32_t i, nb_enqueued;
1286         uint32_t hdr_size;
1287         struct virtio_net_hdr *hdr;
1288
1289         nb_rx = 0;
1290         if (unlikely(hw->started == 0))
1291                 return nb_rx;
1292
1293         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1294         if (likely(num > DESC_PER_CACHELINE))
1295                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1296
1297         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1298         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1299
1300         nb_enqueued = 0;
1301         hdr_size = hw->vtnet_hdr_size;
1302
1303         for (i = 0; i < num; i++) {
1304                 rxm = rcv_pkts[i];
1305
1306                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1307
1308                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1309                         PMD_RX_LOG(ERR, "Packet drop");
1310                         nb_enqueued++;
1311                         virtio_discard_rxbuf(vq, rxm);
1312                         rxvq->stats.errors++;
1313                         continue;
1314                 }
1315
1316                 rxm->port = rxvq->port_id;
1317                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1318                 rxm->ol_flags = 0;
1319                 rxm->vlan_tci = 0;
1320
1321                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1322                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1323
1324                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1325                         RTE_PKTMBUF_HEADROOM - hdr_size);
1326
1327                 if (hw->vlan_strip)
1328                         rte_vlan_strip(rxm);
1329
1330                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1331                         virtio_discard_rxbuf(vq, rxm);
1332                         rxvq->stats.errors++;
1333                         continue;
1334                 }
1335
1336                 virtio_rx_stats_updated(rxvq, rxm);
1337
1338                 rx_pkts[nb_rx++] = rxm;
1339         }
1340
1341         rxvq->stats.packets += nb_rx;
1342
1343         /* Allocate new mbuf for the used descriptor */
1344         while (likely(!virtqueue_full(vq))) {
1345                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1346                 if (unlikely(new_mbuf == NULL)) {
1347                         struct rte_eth_dev *dev =
1348                                 &rte_eth_devices[rxvq->port_id];
1349                         dev->data->rx_mbuf_alloc_failed++;
1350                         break;
1351                 }
1352                 error = virtqueue_enqueue_recv_refill_packed(vq, &new_mbuf, 1);
1353                 if (unlikely(error)) {
1354                         rte_pktmbuf_free(new_mbuf);
1355                         break;
1356                 }
1357                 nb_enqueued++;
1358         }
1359
1360         if (likely(nb_enqueued)) {
1361                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1362                         virtqueue_notify(vq);
1363                         PMD_RX_LOG(DEBUG, "Notified");
1364                 }
1365         }
1366
1367         return nb_rx;
1368 }
1369
1370
1371 uint16_t
1372 virtio_recv_pkts_inorder(void *rx_queue,
1373                         struct rte_mbuf **rx_pkts,
1374                         uint16_t nb_pkts)
1375 {
1376         struct virtnet_rx *rxvq = rx_queue;
1377         struct virtqueue *vq = rxvq->vq;
1378         struct virtio_hw *hw = vq->hw;
1379         struct rte_mbuf *rxm;
1380         struct rte_mbuf *prev;
1381         uint16_t nb_used, num, nb_rx;
1382         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1383         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1384         int error;
1385         uint32_t nb_enqueued;
1386         uint32_t seg_num;
1387         uint32_t seg_res;
1388         uint32_t hdr_size;
1389         int32_t i;
1390
1391         nb_rx = 0;
1392         if (unlikely(hw->started == 0))
1393                 return nb_rx;
1394
1395         nb_used = VIRTQUEUE_NUSED(vq);
1396         nb_used = RTE_MIN(nb_used, nb_pkts);
1397         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1398
1399         virtio_rmb(hw->weak_barriers);
1400
1401         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1402
1403         nb_enqueued = 0;
1404         seg_num = 1;
1405         seg_res = 0;
1406         hdr_size = hw->vtnet_hdr_size;
1407
1408         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1409
1410         for (i = 0; i < num; i++) {
1411                 struct virtio_net_hdr_mrg_rxbuf *header;
1412
1413                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1414                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1415
1416                 rxm = rcv_pkts[i];
1417
1418                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1419                         PMD_RX_LOG(ERR, "Packet drop");
1420                         nb_enqueued++;
1421                         virtio_discard_rxbuf_inorder(vq, rxm);
1422                         rxvq->stats.errors++;
1423                         continue;
1424                 }
1425
1426                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1427                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1428                          - hdr_size);
1429
1430                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1431                         seg_num = header->num_buffers;
1432                         if (seg_num == 0)
1433                                 seg_num = 1;
1434                 } else {
1435                         seg_num = 1;
1436                 }
1437
1438                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1439                 rxm->nb_segs = seg_num;
1440                 rxm->ol_flags = 0;
1441                 rxm->vlan_tci = 0;
1442                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1443                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1444
1445                 rxm->port = rxvq->port_id;
1446
1447                 rx_pkts[nb_rx] = rxm;
1448                 prev = rxm;
1449
1450                 if (vq->hw->has_rx_offload &&
1451                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1452                         virtio_discard_rxbuf_inorder(vq, rxm);
1453                         rxvq->stats.errors++;
1454                         continue;
1455                 }
1456
1457                 if (hw->vlan_strip)
1458                         rte_vlan_strip(rx_pkts[nb_rx]);
1459
1460                 seg_res = seg_num - 1;
1461
1462                 /* Merge remaining segments */
1463                 while (seg_res != 0 && i < (num - 1)) {
1464                         i++;
1465
1466                         rxm = rcv_pkts[i];
1467                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1468                         rxm->pkt_len = (uint32_t)(len[i]);
1469                         rxm->data_len = (uint16_t)(len[i]);
1470
1471                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1472                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1473
1474                         if (prev)
1475                                 prev->next = rxm;
1476
1477                         prev = rxm;
1478                         seg_res -= 1;
1479                 }
1480
1481                 if (!seg_res) {
1482                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1483                         nb_rx++;
1484                 }
1485         }
1486
1487         /* Last packet still need merge segments */
1488         while (seg_res != 0) {
1489                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1490                                         VIRTIO_MBUF_BURST_SZ);
1491
1492                 prev = rcv_pkts[nb_rx];
1493                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1494                         virtio_rmb(hw->weak_barriers);
1495                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1496                                                            rcv_cnt);
1497                         uint16_t extra_idx = 0;
1498
1499                         rcv_cnt = num;
1500                         while (extra_idx < rcv_cnt) {
1501                                 rxm = rcv_pkts[extra_idx];
1502                                 rxm->data_off =
1503                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1504                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1505                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1506                                 prev->next = rxm;
1507                                 prev = rxm;
1508                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1509                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1510                                 extra_idx += 1;
1511                         };
1512                         seg_res -= rcv_cnt;
1513
1514                         if (!seg_res) {
1515                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1516                                 nb_rx++;
1517                         }
1518                 } else {
1519                         PMD_RX_LOG(ERR,
1520                                         "No enough segments for packet.");
1521                         virtio_discard_rxbuf_inorder(vq, prev);
1522                         rxvq->stats.errors++;
1523                         break;
1524                 }
1525         }
1526
1527         rxvq->stats.packets += nb_rx;
1528
1529         /* Allocate new mbuf for the used descriptor */
1530
1531         if (likely(!virtqueue_full(vq))) {
1532                 /* free_cnt may include mrg descs */
1533                 uint16_t free_cnt = vq->vq_free_cnt;
1534                 struct rte_mbuf *new_pkts[free_cnt];
1535
1536                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1537                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1538                                         free_cnt);
1539                         if (unlikely(error)) {
1540                                 for (i = 0; i < free_cnt; i++)
1541                                         rte_pktmbuf_free(new_pkts[i]);
1542                         }
1543                         nb_enqueued += free_cnt;
1544                 } else {
1545                         struct rte_eth_dev *dev =
1546                                 &rte_eth_devices[rxvq->port_id];
1547                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1548                 }
1549         }
1550
1551         if (likely(nb_enqueued)) {
1552                 vq_update_avail_idx(vq);
1553
1554                 if (unlikely(virtqueue_kick_prepare(vq))) {
1555                         virtqueue_notify(vq);
1556                         PMD_RX_LOG(DEBUG, "Notified");
1557                 }
1558         }
1559
1560         return nb_rx;
1561 }
1562
1563 uint16_t
1564 virtio_recv_mergeable_pkts(void *rx_queue,
1565                         struct rte_mbuf **rx_pkts,
1566                         uint16_t nb_pkts)
1567 {
1568         struct virtnet_rx *rxvq = rx_queue;
1569         struct virtqueue *vq = rxvq->vq;
1570         struct virtio_hw *hw = vq->hw;
1571         struct rte_mbuf *rxm;
1572         struct rte_mbuf *prev;
1573         uint16_t nb_used, num, nb_rx = 0;
1574         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1575         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1576         int error;
1577         uint32_t nb_enqueued = 0;
1578         uint32_t seg_num = 0;
1579         uint32_t seg_res = 0;
1580         uint32_t hdr_size = hw->vtnet_hdr_size;
1581         int32_t i;
1582
1583         if (unlikely(hw->started == 0))
1584                 return nb_rx;
1585
1586         nb_used = VIRTQUEUE_NUSED(vq);
1587
1588         virtio_rmb(hw->weak_barriers);
1589
1590         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1591
1592         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1593         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1594                 num = VIRTIO_MBUF_BURST_SZ;
1595         if (likely(num > DESC_PER_CACHELINE))
1596                 num = num - ((vq->vq_used_cons_idx + num) %
1597                                 DESC_PER_CACHELINE);
1598
1599
1600         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1601
1602         for (i = 0; i < num; i++) {
1603                 struct virtio_net_hdr_mrg_rxbuf *header;
1604
1605                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1606                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1607
1608                 rxm = rcv_pkts[i];
1609
1610                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1611                         PMD_RX_LOG(ERR, "Packet drop");
1612                         nb_enqueued++;
1613                         virtio_discard_rxbuf(vq, rxm);
1614                         rxvq->stats.errors++;
1615                         continue;
1616                 }
1617
1618                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1619                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1620                          - hdr_size);
1621                 seg_num = header->num_buffers;
1622                 if (seg_num == 0)
1623                         seg_num = 1;
1624
1625                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1626                 rxm->nb_segs = seg_num;
1627                 rxm->ol_flags = 0;
1628                 rxm->vlan_tci = 0;
1629                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1630                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1631
1632                 rxm->port = rxvq->port_id;
1633
1634                 rx_pkts[nb_rx] = rxm;
1635                 prev = rxm;
1636
1637                 if (hw->has_rx_offload &&
1638                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1639                         virtio_discard_rxbuf(vq, rxm);
1640                         rxvq->stats.errors++;
1641                         continue;
1642                 }
1643
1644                 if (hw->vlan_strip)
1645                         rte_vlan_strip(rx_pkts[nb_rx]);
1646
1647                 seg_res = seg_num - 1;
1648
1649                 /* Merge remaining segments */
1650                 while (seg_res != 0 && i < (num - 1)) {
1651                         i++;
1652
1653                         rxm = rcv_pkts[i];
1654                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1655                         rxm->pkt_len = (uint32_t)(len[i]);
1656                         rxm->data_len = (uint16_t)(len[i]);
1657
1658                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1659                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1660
1661                         if (prev)
1662                                 prev->next = rxm;
1663
1664                         prev = rxm;
1665                         seg_res -= 1;
1666                 }
1667
1668                 if (!seg_res) {
1669                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1670                         nb_rx++;
1671                 }
1672         }
1673
1674         /* Last packet still need merge segments */
1675         while (seg_res != 0) {
1676                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1677                                         VIRTIO_MBUF_BURST_SZ);
1678
1679                 prev = rcv_pkts[nb_rx];
1680                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1681                         virtio_rmb(hw->weak_barriers);
1682                         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1683                                                            rcv_cnt);
1684                         uint16_t extra_idx = 0;
1685
1686                         rcv_cnt = num;
1687                         while (extra_idx < rcv_cnt) {
1688                                 rxm = rcv_pkts[extra_idx];
1689                                 rxm->data_off =
1690                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1691                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1692                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1693                                 prev->next = rxm;
1694                                 prev = rxm;
1695                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1696                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1697                                 extra_idx += 1;
1698                         };
1699                         seg_res -= rcv_cnt;
1700
1701                         if (!seg_res) {
1702                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1703                                 nb_rx++;
1704                         }
1705                 } else {
1706                         PMD_RX_LOG(ERR,
1707                                         "No enough segments for packet.");
1708                         virtio_discard_rxbuf(vq, prev);
1709                         rxvq->stats.errors++;
1710                         break;
1711                 }
1712         }
1713
1714         rxvq->stats.packets += nb_rx;
1715
1716         /* Allocate new mbuf for the used descriptor */
1717         if (likely(!virtqueue_full(vq))) {
1718                 /* free_cnt may include mrg descs */
1719                 uint16_t free_cnt = vq->vq_free_cnt;
1720                 struct rte_mbuf *new_pkts[free_cnt];
1721
1722                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1723                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1724                                         free_cnt);
1725                         if (unlikely(error)) {
1726                                 for (i = 0; i < free_cnt; i++)
1727                                         rte_pktmbuf_free(new_pkts[i]);
1728                         }
1729                         nb_enqueued += free_cnt;
1730                 } else {
1731                         struct rte_eth_dev *dev =
1732                                 &rte_eth_devices[rxvq->port_id];
1733                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1734                 }
1735         }
1736
1737         if (likely(nb_enqueued)) {
1738                 vq_update_avail_idx(vq);
1739
1740                 if (unlikely(virtqueue_kick_prepare(vq))) {
1741                         virtqueue_notify(vq);
1742                         PMD_RX_LOG(DEBUG, "Notified");
1743                 }
1744         }
1745
1746         return nb_rx;
1747 }
1748
1749 uint16_t
1750 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1751                         struct rte_mbuf **rx_pkts,
1752                         uint16_t nb_pkts)
1753 {
1754         struct virtnet_rx *rxvq = rx_queue;
1755         struct virtqueue *vq = rxvq->vq;
1756         struct virtio_hw *hw = vq->hw;
1757         struct rte_mbuf *rxm;
1758         struct rte_mbuf *prev = NULL;
1759         uint16_t num, nb_rx = 0;
1760         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1761         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1762         uint32_t nb_enqueued = 0;
1763         uint32_t seg_num = 0;
1764         uint32_t seg_res = 0;
1765         uint32_t hdr_size = hw->vtnet_hdr_size;
1766         int32_t i;
1767         int error;
1768
1769         if (unlikely(hw->started == 0))
1770                 return nb_rx;
1771
1772
1773         num = nb_pkts;
1774         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1775                 num = VIRTIO_MBUF_BURST_SZ;
1776         if (likely(num > DESC_PER_CACHELINE))
1777                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1778
1779         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1780
1781         for (i = 0; i < num; i++) {
1782                 struct virtio_net_hdr_mrg_rxbuf *header;
1783
1784                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1785                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1786
1787                 rxm = rcv_pkts[i];
1788
1789                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1790                         PMD_RX_LOG(ERR, "Packet drop");
1791                         nb_enqueued++;
1792                         virtio_discard_rxbuf(vq, rxm);
1793                         rxvq->stats.errors++;
1794                         continue;
1795                 }
1796
1797                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1798                           rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1799                 seg_num = header->num_buffers;
1800
1801                 if (seg_num == 0)
1802                         seg_num = 1;
1803
1804                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1805                 rxm->nb_segs = seg_num;
1806                 rxm->ol_flags = 0;
1807                 rxm->vlan_tci = 0;
1808                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1809                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1810
1811                 rxm->port = rxvq->port_id;
1812                 rx_pkts[nb_rx] = rxm;
1813                 prev = rxm;
1814
1815                 if (hw->has_rx_offload &&
1816                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1817                         virtio_discard_rxbuf(vq, rxm);
1818                         rxvq->stats.errors++;
1819                         continue;
1820                 }
1821
1822                 if (hw->vlan_strip)
1823                         rte_vlan_strip(rx_pkts[nb_rx]);
1824
1825                 seg_res = seg_num - 1;
1826
1827                 /* Merge remaining segments */
1828                 while (seg_res != 0 && i < (num - 1)) {
1829                         i++;
1830
1831                         rxm = rcv_pkts[i];
1832                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1833                         rxm->pkt_len = (uint32_t)(len[i]);
1834                         rxm->data_len = (uint16_t)(len[i]);
1835
1836                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1837                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1838
1839                         if (prev)
1840                                 prev->next = rxm;
1841
1842                         prev = rxm;
1843                         seg_res -= 1;
1844                 }
1845
1846                 if (!seg_res) {
1847                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1848                         nb_rx++;
1849                 }
1850         }
1851
1852         /* Last packet still need merge segments */
1853         while (seg_res != 0) {
1854                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1855                                         VIRTIO_MBUF_BURST_SZ);
1856                 if (likely(vq->vq_free_cnt >= rcv_cnt)) {
1857                         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1858                                         len, rcv_cnt);
1859                         uint16_t extra_idx = 0;
1860
1861                         rcv_cnt = num;
1862
1863                         while (extra_idx < rcv_cnt) {
1864                                 rxm = rcv_pkts[extra_idx];
1865
1866                                 rxm->data_off =
1867                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1868                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1869                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1870
1871                                 prev->next = rxm;
1872                                 prev = rxm;
1873                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1874                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1875                                 extra_idx += 1;
1876                         }
1877                         seg_res -= rcv_cnt;
1878                         if (!seg_res) {
1879                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1880                                 nb_rx++;
1881                         }
1882                 } else {
1883                         PMD_RX_LOG(ERR,
1884                                         "No enough segments for packet.");
1885                         if (prev)
1886                                 virtio_discard_rxbuf(vq, prev);
1887                         rxvq->stats.errors++;
1888                         break;
1889                 }
1890         }
1891
1892         rxvq->stats.packets += nb_rx;
1893
1894         /* Allocate new mbuf for the used descriptor */
1895         if (likely(!virtqueue_full(vq))) {
1896                 /* free_cnt may include mrg descs */
1897                 uint16_t free_cnt = vq->vq_free_cnt;
1898                 struct rte_mbuf *new_pkts[free_cnt];
1899
1900                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1901                         error = virtqueue_enqueue_recv_refill_packed(vq,
1902                                         new_pkts, free_cnt);
1903                         if (unlikely(error)) {
1904                                 for (i = 0; i < free_cnt; i++)
1905                                         rte_pktmbuf_free(new_pkts[i]);
1906                         }
1907                         nb_enqueued += free_cnt;
1908                 } else {
1909                         struct rte_eth_dev *dev =
1910                                 &rte_eth_devices[rxvq->port_id];
1911                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1912                 }
1913         }
1914
1915         if (likely(nb_enqueued)) {
1916                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1917                         virtqueue_notify(vq);
1918                         PMD_RX_LOG(DEBUG, "Notified");
1919                 }
1920         }
1921
1922         return nb_rx;
1923 }
1924
1925 uint16_t
1926 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1927                         uint16_t nb_pkts)
1928 {
1929         struct virtnet_tx *txvq = tx_queue;
1930         struct virtqueue *vq = txvq->vq;
1931         struct virtio_hw *hw = vq->hw;
1932         uint16_t hdr_size = hw->vtnet_hdr_size;
1933         uint16_t nb_tx = 0;
1934         bool in_order = hw->use_inorder_tx;
1935         int error;
1936
1937         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1938                 return nb_tx;
1939
1940         if (unlikely(nb_pkts < 1))
1941                 return nb_pkts;
1942
1943         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1944
1945         if (nb_pkts > vq->vq_free_cnt)
1946                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
1947                                            in_order);
1948
1949         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1950                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1951                 int can_push = 0, slots, need;
1952
1953                 /* Do VLAN tag insertion */
1954                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1955                         error = rte_vlan_insert(&txm);
1956                         if (unlikely(error)) {
1957                                 rte_pktmbuf_free(txm);
1958                                 continue;
1959                         }
1960                 }
1961
1962                 /* optimize ring usage */
1963                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1964                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1965                     rte_mbuf_refcnt_read(txm) == 1 &&
1966                     RTE_MBUF_DIRECT(txm) &&
1967                     txm->nb_segs == 1 &&
1968                     rte_pktmbuf_headroom(txm) >= hdr_size &&
1969                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1970                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1971                         can_push = 1;
1972
1973                 /* How many main ring entries are needed to this Tx?
1974                  * any_layout => number of segments
1975                  * default    => number of segments + 1
1976                  */
1977                 slots = txm->nb_segs + !can_push;
1978                 need = slots - vq->vq_free_cnt;
1979
1980                 /* Positive value indicates it need free vring descriptors */
1981                 if (unlikely(need > 0)) {
1982                         virtio_xmit_cleanup_packed(vq, need, in_order);
1983                         need = slots - vq->vq_free_cnt;
1984                         if (unlikely(need > 0)) {
1985                                 PMD_TX_LOG(ERR,
1986                                            "No free tx descriptors to transmit");
1987                                 break;
1988                         }
1989                 }
1990
1991                 /* Enqueue Packet buffers */
1992                 virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push,
1993                                               in_order);
1994
1995                 virtio_update_packet_stats(&txvq->stats, txm);
1996         }
1997
1998         txvq->stats.packets += nb_tx;
1999
2000         if (likely(nb_tx)) {
2001                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
2002                         virtqueue_notify(vq);
2003                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2004                 }
2005         }
2006
2007         return nb_tx;
2008 }
2009
2010 uint16_t
2011 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2012 {
2013         struct virtnet_tx *txvq = tx_queue;
2014         struct virtqueue *vq = txvq->vq;
2015         struct virtio_hw *hw = vq->hw;
2016         uint16_t hdr_size = hw->vtnet_hdr_size;
2017         uint16_t nb_used, nb_tx = 0;
2018         int error;
2019
2020         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2021                 return nb_tx;
2022
2023         if (unlikely(nb_pkts < 1))
2024                 return nb_pkts;
2025
2026         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2027         nb_used = VIRTQUEUE_NUSED(vq);
2028
2029         virtio_rmb(hw->weak_barriers);
2030         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2031                 virtio_xmit_cleanup(vq, nb_used);
2032
2033         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2034                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2035                 int can_push = 0, use_indirect = 0, slots, need;
2036
2037                 /* Do VLAN tag insertion */
2038                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2039                         error = rte_vlan_insert(&txm);
2040                         if (unlikely(error)) {
2041                                 rte_pktmbuf_free(txm);
2042                                 continue;
2043                         }
2044                 }
2045
2046                 /* optimize ring usage */
2047                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2048                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2049                     rte_mbuf_refcnt_read(txm) == 1 &&
2050                     RTE_MBUF_DIRECT(txm) &&
2051                     txm->nb_segs == 1 &&
2052                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2053                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2054                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2055                         can_push = 1;
2056                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2057                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2058                         use_indirect = 1;
2059
2060                 /* How many main ring entries are needed to this Tx?
2061                  * any_layout => number of segments
2062                  * indirect   => 1
2063                  * default    => number of segments + 1
2064                  */
2065                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2066                 need = slots - vq->vq_free_cnt;
2067
2068                 /* Positive value indicates it need free vring descriptors */
2069                 if (unlikely(need > 0)) {
2070                         nb_used = VIRTQUEUE_NUSED(vq);
2071                         virtio_rmb(hw->weak_barriers);
2072                         need = RTE_MIN(need, (int)nb_used);
2073
2074                         virtio_xmit_cleanup(vq, need);
2075                         need = slots - vq->vq_free_cnt;
2076                         if (unlikely(need > 0)) {
2077                                 PMD_TX_LOG(ERR,
2078                                            "No free tx descriptors to transmit");
2079                                 break;
2080                         }
2081                 }
2082
2083                 /* Enqueue Packet buffers */
2084                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2085                         can_push, 0);
2086
2087                 virtio_update_packet_stats(&txvq->stats, txm);
2088         }
2089
2090         txvq->stats.packets += nb_tx;
2091
2092         if (likely(nb_tx)) {
2093                 vq_update_avail_idx(vq);
2094
2095                 if (unlikely(virtqueue_kick_prepare(vq))) {
2096                         virtqueue_notify(vq);
2097                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2098                 }
2099         }
2100
2101         return nb_tx;
2102 }
2103
2104 uint16_t
2105 virtio_xmit_pkts_inorder(void *tx_queue,
2106                         struct rte_mbuf **tx_pkts,
2107                         uint16_t nb_pkts)
2108 {
2109         struct virtnet_tx *txvq = tx_queue;
2110         struct virtqueue *vq = txvq->vq;
2111         struct virtio_hw *hw = vq->hw;
2112         uint16_t hdr_size = hw->vtnet_hdr_size;
2113         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
2114         struct rte_mbuf *inorder_pkts[nb_pkts];
2115         int error;
2116
2117         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2118                 return nb_tx;
2119
2120         if (unlikely(nb_pkts < 1))
2121                 return nb_pkts;
2122
2123         VIRTQUEUE_DUMP(vq);
2124         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2125         nb_used = VIRTQUEUE_NUSED(vq);
2126
2127         virtio_rmb(hw->weak_barriers);
2128         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2129                 virtio_xmit_cleanup_inorder(vq, nb_used);
2130
2131         if (unlikely(!vq->vq_free_cnt))
2132                 virtio_xmit_cleanup_inorder(vq, nb_used);
2133
2134         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
2135
2136         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
2137                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2138                 int slots, need;
2139
2140                 /* Do VLAN tag insertion */
2141                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2142                         error = rte_vlan_insert(&txm);
2143                         if (unlikely(error)) {
2144                                 rte_pktmbuf_free(txm);
2145                                 continue;
2146                         }
2147                 }
2148
2149                 /* optimize ring usage */
2150                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2151                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2152                      rte_mbuf_refcnt_read(txm) == 1 &&
2153                      RTE_MBUF_DIRECT(txm) &&
2154                      txm->nb_segs == 1 &&
2155                      rte_pktmbuf_headroom(txm) >= hdr_size &&
2156                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2157                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2158                         inorder_pkts[nb_inorder_pkts] = txm;
2159                         nb_inorder_pkts++;
2160
2161                         virtio_update_packet_stats(&txvq->stats, txm);
2162                         continue;
2163                 }
2164
2165                 if (nb_inorder_pkts) {
2166                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2167                                                         nb_inorder_pkts);
2168                         nb_inorder_pkts = 0;
2169                 }
2170
2171                 slots = txm->nb_segs + 1;
2172                 need = slots - vq->vq_free_cnt;
2173                 if (unlikely(need > 0)) {
2174                         nb_used = VIRTQUEUE_NUSED(vq);
2175                         virtio_rmb(hw->weak_barriers);
2176                         need = RTE_MIN(need, (int)nb_used);
2177
2178                         virtio_xmit_cleanup_inorder(vq, need);
2179
2180                         need = slots - vq->vq_free_cnt;
2181
2182                         if (unlikely(need > 0)) {
2183                                 PMD_TX_LOG(ERR,
2184                                         "No free tx descriptors to transmit");
2185                                 break;
2186                         }
2187                 }
2188                 /* Enqueue Packet buffers */
2189                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2190
2191                 virtio_update_packet_stats(&txvq->stats, txm);
2192         }
2193
2194         /* Transmit all inorder packets */
2195         if (nb_inorder_pkts)
2196                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2197                                                 nb_inorder_pkts);
2198
2199         txvq->stats.packets += nb_tx;
2200
2201         if (likely(nb_tx)) {
2202                 vq_update_avail_idx(vq);
2203
2204                 if (unlikely(virtqueue_kick_prepare(vq))) {
2205                         virtqueue_notify(vq);
2206                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2207                 }
2208         }
2209
2210         VIRTQUEUE_DUMP(vq);
2211
2212         return nb_tx;
2213 }