net/virtio: introduce helper for clearing net header
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45         struct virtnet_rx *rxvq = rxq;
46         struct virtqueue *vq = rxvq->vq;
47
48         return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54         vq->vq_free_cnt += num;
55         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61         struct vring_desc *dp, *dp_tail;
62         struct vq_desc_extra *dxp;
63         uint16_t desc_idx_last = desc_idx;
64
65         dp  = &vq->vq_ring.desc[desc_idx];
66         dxp = &vq->vq_descx[desc_idx];
67         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69                 while (dp->flags & VRING_DESC_F_NEXT) {
70                         desc_idx_last = dp->next;
71                         dp = &vq->vq_ring.desc[dp->next];
72                 }
73         }
74         dxp->ndescs = 0;
75
76         /*
77          * We must append the existing free chain, if any, to the end of
78          * newly freed chain. If the virtqueue was completely used, then
79          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80          */
81         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82                 vq->vq_desc_head_idx = desc_idx;
83         } else {
84                 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
85                 dp_tail->next = desc_idx;
86         }
87
88         vq->vq_desc_tail_idx = desc_idx_last;
89         dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95         struct vq_desc_extra *dxp;
96
97         dxp = &vq->vq_descx[id];
98         vq->vq_free_cnt += dxp->ndescs;
99
100         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101                 vq->vq_desc_head_idx = id;
102         else
103                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104
105         vq->vq_desc_tail_idx = id;
106         dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108
109 static uint16_t
110 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
111                                   struct rte_mbuf **rx_pkts,
112                                   uint32_t *len,
113                                   uint16_t num)
114 {
115         struct rte_mbuf *cookie;
116         uint16_t used_idx;
117         uint16_t id;
118         struct vring_packed_desc *desc;
119         uint16_t i;
120
121         desc = vq->ring_packed.desc_packed;
122
123         for (i = 0; i < num; i++) {
124                 used_idx = vq->vq_used_cons_idx;
125                 if (!desc_is_used(&desc[used_idx], vq))
126                         return i;
127                 virtio_rmb(vq->hw->weak_barriers);
128                 len[i] = desc[used_idx].len;
129                 id = desc[used_idx].id;
130                 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
131                 if (unlikely(cookie == NULL)) {
132                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
133                                 vq->vq_used_cons_idx);
134                         break;
135                 }
136                 rte_prefetch0(cookie);
137                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
138                 rx_pkts[i] = cookie;
139
140                 vq->vq_free_cnt++;
141                 vq->vq_used_cons_idx++;
142                 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
143                         vq->vq_used_cons_idx -= vq->vq_nentries;
144                         vq->used_wrap_counter ^= 1;
145                 }
146         }
147
148         return i;
149 }
150
151 static uint16_t
152 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
153                            uint32_t *len, uint16_t num)
154 {
155         struct vring_used_elem *uep;
156         struct rte_mbuf *cookie;
157         uint16_t used_idx, desc_idx;
158         uint16_t i;
159
160         /*  Caller does the check */
161         for (i = 0; i < num ; i++) {
162                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
163                 uep = &vq->vq_ring.used->ring[used_idx];
164                 desc_idx = (uint16_t) uep->id;
165                 len[i] = uep->len;
166                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
167
168                 if (unlikely(cookie == NULL)) {
169                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
170                                 vq->vq_used_cons_idx);
171                         break;
172                 }
173
174                 rte_prefetch0(cookie);
175                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
176                 rx_pkts[i]  = cookie;
177                 vq->vq_used_cons_idx++;
178                 vq_ring_free_chain(vq, desc_idx);
179                 vq->vq_descx[desc_idx].cookie = NULL;
180         }
181
182         return i;
183 }
184
185 static uint16_t
186 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
187                         struct rte_mbuf **rx_pkts,
188                         uint32_t *len,
189                         uint16_t num)
190 {
191         struct vring_used_elem *uep;
192         struct rte_mbuf *cookie;
193         uint16_t used_idx = 0;
194         uint16_t i;
195
196         if (unlikely(num == 0))
197                 return 0;
198
199         for (i = 0; i < num; i++) {
200                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
201                 /* Desc idx same as used idx */
202                 uep = &vq->vq_ring.used->ring[used_idx];
203                 len[i] = uep->len;
204                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
205
206                 if (unlikely(cookie == NULL)) {
207                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
208                                 vq->vq_used_cons_idx);
209                         break;
210                 }
211
212                 rte_prefetch0(cookie);
213                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
214                 rx_pkts[i]  = cookie;
215                 vq->vq_used_cons_idx++;
216                 vq->vq_descx[used_idx].cookie = NULL;
217         }
218
219         vq_ring_free_inorder(vq, used_idx, i);
220         return i;
221 }
222
223 #ifndef DEFAULT_TX_FREE_THRESH
224 #define DEFAULT_TX_FREE_THRESH 32
225 #endif
226
227 static void
228 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
229 {
230         uint16_t used_idx, id, curr_id, free_cnt = 0;
231         uint16_t size = vq->vq_nentries;
232         struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
233         struct vq_desc_extra *dxp;
234
235         used_idx = vq->vq_used_cons_idx;
236         while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
237                 virtio_rmb(vq->hw->weak_barriers);
238                 id = desc[used_idx].id;
239                 do {
240                         curr_id = used_idx;
241                         dxp = &vq->vq_descx[used_idx];
242                         used_idx += dxp->ndescs;
243                         free_cnt += dxp->ndescs;
244                         num -= dxp->ndescs;
245                         if (used_idx >= size) {
246                                 used_idx -= size;
247                                 vq->used_wrap_counter ^= 1;
248                         }
249                         if (dxp->cookie != NULL) {
250                                 rte_pktmbuf_free(dxp->cookie);
251                                 dxp->cookie = NULL;
252                         }
253                 } while (curr_id != id);
254         }
255         vq->vq_used_cons_idx = used_idx;
256         vq->vq_free_cnt += free_cnt;
257 }
258
259 static void
260 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
261 {
262         uint16_t used_idx, id;
263         uint16_t size = vq->vq_nentries;
264         struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
265         struct vq_desc_extra *dxp;
266
267         used_idx = vq->vq_used_cons_idx;
268         while (num-- && desc_is_used(&desc[used_idx], vq)) {
269                 virtio_rmb(vq->hw->weak_barriers);
270                 id = desc[used_idx].id;
271                 dxp = &vq->vq_descx[id];
272                 vq->vq_used_cons_idx += dxp->ndescs;
273                 if (vq->vq_used_cons_idx >= size) {
274                         vq->vq_used_cons_idx -= size;
275                         vq->used_wrap_counter ^= 1;
276                 }
277                 vq_ring_free_id_packed(vq, id);
278                 if (dxp->cookie != NULL) {
279                         rte_pktmbuf_free(dxp->cookie);
280                         dxp->cookie = NULL;
281                 }
282                 used_idx = vq->vq_used_cons_idx;
283         }
284 }
285
286 /* Cleanup from completed transmits. */
287 static inline void
288 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
289 {
290         if (in_order)
291                 virtio_xmit_cleanup_inorder_packed(vq, num);
292         else
293                 virtio_xmit_cleanup_normal_packed(vq, num);
294 }
295
296 static void
297 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
298 {
299         uint16_t i, used_idx, desc_idx;
300         for (i = 0; i < num; i++) {
301                 struct vring_used_elem *uep;
302                 struct vq_desc_extra *dxp;
303
304                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
305                 uep = &vq->vq_ring.used->ring[used_idx];
306
307                 desc_idx = (uint16_t) uep->id;
308                 dxp = &vq->vq_descx[desc_idx];
309                 vq->vq_used_cons_idx++;
310                 vq_ring_free_chain(vq, desc_idx);
311
312                 if (dxp->cookie != NULL) {
313                         rte_pktmbuf_free(dxp->cookie);
314                         dxp->cookie = NULL;
315                 }
316         }
317 }
318
319 /* Cleanup from completed inorder transmits. */
320 static void
321 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
322 {
323         uint16_t i, idx = vq->vq_used_cons_idx;
324         int16_t free_cnt = 0;
325         struct vq_desc_extra *dxp = NULL;
326
327         if (unlikely(num == 0))
328                 return;
329
330         for (i = 0; i < num; i++) {
331                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
332                 free_cnt += dxp->ndescs;
333                 if (dxp->cookie != NULL) {
334                         rte_pktmbuf_free(dxp->cookie);
335                         dxp->cookie = NULL;
336                 }
337         }
338
339         vq->vq_free_cnt += free_cnt;
340         vq->vq_used_cons_idx = idx;
341 }
342
343 static inline int
344 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
345                         struct rte_mbuf **cookies,
346                         uint16_t num)
347 {
348         struct vq_desc_extra *dxp;
349         struct virtio_hw *hw = vq->hw;
350         struct vring_desc *start_dp;
351         uint16_t head_idx, idx, i = 0;
352
353         if (unlikely(vq->vq_free_cnt == 0))
354                 return -ENOSPC;
355         if (unlikely(vq->vq_free_cnt < num))
356                 return -EMSGSIZE;
357
358         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
359         start_dp = vq->vq_ring.desc;
360
361         while (i < num) {
362                 idx = head_idx & (vq->vq_nentries - 1);
363                 dxp = &vq->vq_descx[idx];
364                 dxp->cookie = (void *)cookies[i];
365                 dxp->ndescs = 1;
366
367                 start_dp[idx].addr =
368                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
369                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
370                 start_dp[idx].len =
371                                 cookies[i]->buf_len -
372                                 RTE_PKTMBUF_HEADROOM +
373                                 hw->vtnet_hdr_size;
374                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
375
376                 vq_update_avail_ring(vq, idx);
377                 head_idx++;
378                 i++;
379         }
380
381         vq->vq_desc_head_idx += num;
382         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
383         return 0;
384 }
385
386 static inline int
387 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
388                                 uint16_t num)
389 {
390         struct vq_desc_extra *dxp;
391         struct virtio_hw *hw = vq->hw;
392         struct vring_desc *start_dp = vq->vq_ring.desc;
393         uint16_t idx, i;
394
395         if (unlikely(vq->vq_free_cnt == 0))
396                 return -ENOSPC;
397         if (unlikely(vq->vq_free_cnt < num))
398                 return -EMSGSIZE;
399
400         if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
401                 return -EFAULT;
402
403         for (i = 0; i < num; i++) {
404                 idx = vq->vq_desc_head_idx;
405                 dxp = &vq->vq_descx[idx];
406                 dxp->cookie = (void *)cookie[i];
407                 dxp->ndescs = 1;
408
409                 start_dp[idx].addr =
410                         VIRTIO_MBUF_ADDR(cookie[i], vq) +
411                         RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
412                 start_dp[idx].len =
413                         cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
414                         hw->vtnet_hdr_size;
415                 start_dp[idx].flags = VRING_DESC_F_WRITE;
416                 vq->vq_desc_head_idx = start_dp[idx].next;
417                 vq_update_avail_ring(vq, idx);
418                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
419                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
420                         break;
421                 }
422         }
423
424         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
425
426         return 0;
427 }
428
429 static inline int
430 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
431                                      struct rte_mbuf **cookie, uint16_t num)
432 {
433         struct vring_packed_desc *start_dp = vq->ring_packed.desc_packed;
434         uint16_t flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
435         struct virtio_hw *hw = vq->hw;
436         struct vq_desc_extra *dxp;
437         uint16_t idx;
438         int i;
439
440         if (unlikely(vq->vq_free_cnt == 0))
441                 return -ENOSPC;
442         if (unlikely(vq->vq_free_cnt < num))
443                 return -EMSGSIZE;
444
445         for (i = 0; i < num; i++) {
446                 idx = vq->vq_avail_idx;
447                 dxp = &vq->vq_descx[idx];
448                 dxp->cookie = (void *)cookie[i];
449                 dxp->ndescs = 1;
450
451                 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
452                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
453                 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
454                                         + hw->vtnet_hdr_size;
455
456                 vq->vq_desc_head_idx = dxp->next;
457                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
458                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
459                 virtio_wmb(hw->weak_barriers);
460                 start_dp[idx].flags = flags;
461                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
462                         vq->vq_avail_idx -= vq->vq_nentries;
463                         vq->avail_wrap_counter ^= 1;
464                         vq->avail_used_flags =
465                                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
466                                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
467                         flags = VRING_DESC_F_WRITE | vq->avail_used_flags;
468                 }
469         }
470         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
471         return 0;
472 }
473
474 /* When doing TSO, the IP length is not included in the pseudo header
475  * checksum of the packet given to the PMD, but for virtio it is
476  * expected.
477  */
478 static void
479 virtio_tso_fix_cksum(struct rte_mbuf *m)
480 {
481         /* common case: header is not fragmented */
482         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
483                         m->l4_len)) {
484                 struct ipv4_hdr *iph;
485                 struct ipv6_hdr *ip6h;
486                 struct tcp_hdr *th;
487                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
488                 uint32_t tmp;
489
490                 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
491                 th = RTE_PTR_ADD(iph, m->l3_len);
492                 if ((iph->version_ihl >> 4) == 4) {
493                         iph->hdr_checksum = 0;
494                         iph->hdr_checksum = rte_ipv4_cksum(iph);
495                         ip_len = iph->total_length;
496                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
497                                 m->l3_len);
498                 } else {
499                         ip6h = (struct ipv6_hdr *)iph;
500                         ip_paylen = ip6h->payload_len;
501                 }
502
503                 /* calculate the new phdr checksum not including ip_paylen */
504                 prev_cksum = th->cksum;
505                 tmp = prev_cksum;
506                 tmp += ip_paylen;
507                 tmp = (tmp & 0xffff) + (tmp >> 16);
508                 new_cksum = tmp;
509
510                 /* replace it in the packet */
511                 th->cksum = new_cksum;
512         }
513 }
514
515
516 /* avoid write operation when necessary, to lessen cache issues */
517 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
518         if ((var) != (val))                     \
519                 (var) = (val);                  \
520 } while (0)
521
522 #define virtqueue_clear_net_hdr(_hdr) do {              \
523         ASSIGN_UNLESS_EQUAL((_hdr)->csum_start, 0);     \
524         ASSIGN_UNLESS_EQUAL((_hdr)->csum_offset, 0);    \
525         ASSIGN_UNLESS_EQUAL((_hdr)->flags, 0);          \
526         ASSIGN_UNLESS_EQUAL((_hdr)->gso_type, 0);       \
527         ASSIGN_UNLESS_EQUAL((_hdr)->gso_size, 0);       \
528         ASSIGN_UNLESS_EQUAL((_hdr)->hdr_len, 0);        \
529 } while (0)
530
531 static inline void
532 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
533                         struct rte_mbuf *cookie,
534                         bool offload)
535 {
536         if (offload) {
537                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
538                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
539
540                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
541                 case PKT_TX_UDP_CKSUM:
542                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
543                         hdr->csum_offset = offsetof(struct udp_hdr,
544                                 dgram_cksum);
545                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
546                         break;
547
548                 case PKT_TX_TCP_CKSUM:
549                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
550                         hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
551                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
552                         break;
553
554                 default:
555                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
556                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
557                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
558                         break;
559                 }
560
561                 /* TCP Segmentation Offload */
562                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
563                         virtio_tso_fix_cksum(cookie);
564                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
565                                 VIRTIO_NET_HDR_GSO_TCPV6 :
566                                 VIRTIO_NET_HDR_GSO_TCPV4;
567                         hdr->gso_size = cookie->tso_segsz;
568                         hdr->hdr_len =
569                                 cookie->l2_len +
570                                 cookie->l3_len +
571                                 cookie->l4_len;
572                 } else {
573                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
574                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
575                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
576                 }
577         }
578 }
579
580 static inline void
581 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
582                         struct rte_mbuf **cookies,
583                         uint16_t num)
584 {
585         struct vq_desc_extra *dxp;
586         struct virtqueue *vq = txvq->vq;
587         struct vring_desc *start_dp;
588         struct virtio_net_hdr *hdr;
589         uint16_t idx;
590         uint16_t head_size = vq->hw->vtnet_hdr_size;
591         uint16_t i = 0;
592
593         idx = vq->vq_desc_head_idx;
594         start_dp = vq->vq_ring.desc;
595
596         while (i < num) {
597                 idx = idx & (vq->vq_nentries - 1);
598                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
599                 dxp->cookie = (void *)cookies[i];
600                 dxp->ndescs = 1;
601
602                 hdr = (struct virtio_net_hdr *)
603                         rte_pktmbuf_prepend(cookies[i], head_size);
604                 cookies[i]->pkt_len -= head_size;
605
606                 /* if offload disabled, hdr is not zeroed yet, do it now */
607                 if (!vq->hw->has_tx_offload)
608                         virtqueue_clear_net_hdr(hdr);
609                 else
610                         virtqueue_xmit_offload(hdr, cookies[i], true);
611
612                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
613                 start_dp[idx].len   = cookies[i]->data_len;
614                 start_dp[idx].flags = 0;
615
616                 vq_update_avail_ring(vq, idx);
617
618                 idx++;
619                 i++;
620         };
621
622         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
623         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
624 }
625
626 static inline void
627 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
628                               uint16_t needed, int can_push, int in_order)
629 {
630         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
631         struct vq_desc_extra *dxp;
632         struct virtqueue *vq = txvq->vq;
633         struct vring_packed_desc *start_dp, *head_dp;
634         uint16_t idx, id, head_idx, head_flags;
635         uint16_t head_size = vq->hw->vtnet_hdr_size;
636         struct virtio_net_hdr *hdr;
637         uint16_t prev;
638
639         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
640
641         dxp = &vq->vq_descx[id];
642         dxp->ndescs = needed;
643         dxp->cookie = cookie;
644
645         head_idx = vq->vq_avail_idx;
646         idx = head_idx;
647         prev = head_idx;
648         start_dp = vq->ring_packed.desc_packed;
649
650         head_dp = &vq->ring_packed.desc_packed[idx];
651         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
652         head_flags |= vq->avail_used_flags;
653
654         if (can_push) {
655                 /* prepend cannot fail, checked by caller */
656                 hdr = (struct virtio_net_hdr *)
657                         rte_pktmbuf_prepend(cookie, head_size);
658                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
659                  * which is wrong. Below subtract restores correct pkt size.
660                  */
661                 cookie->pkt_len -= head_size;
662
663                 /* if offload disabled, it is not zeroed below, do it now */
664                 if (!vq->hw->has_tx_offload)
665                         virtqueue_clear_net_hdr(hdr);
666         } else {
667                 /* setup first tx ring slot to point to header
668                  * stored in reserved region.
669                  */
670                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
671                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
672                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
673                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
674                 idx++;
675                 if (idx >= vq->vq_nentries) {
676                         idx -= vq->vq_nentries;
677                         vq->avail_wrap_counter ^= 1;
678                         vq->avail_used_flags =
679                                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
680                                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
681                 }
682         }
683
684         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
685
686         do {
687                 uint16_t flags;
688
689                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
690                 start_dp[idx].len  = cookie->data_len;
691                 if (likely(idx != head_idx)) {
692                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
693                         flags |= vq->avail_used_flags;
694                         start_dp[idx].flags = flags;
695                 }
696                 prev = idx;
697                 idx++;
698                 if (idx >= vq->vq_nentries) {
699                         idx -= vq->vq_nentries;
700                         vq->avail_wrap_counter ^= 1;
701                         vq->avail_used_flags =
702                                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
703                                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
704                 }
705         } while ((cookie = cookie->next) != NULL);
706
707         start_dp[prev].id = id;
708
709         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
710         vq->vq_avail_idx = idx;
711
712         if (!in_order) {
713                 vq->vq_desc_head_idx = dxp->next;
714                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
715                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
716         }
717
718         virtio_wmb(vq->hw->weak_barriers);
719         head_dp->flags = head_flags;
720 }
721
722 static inline void
723 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
724                         uint16_t needed, int use_indirect, int can_push,
725                         int in_order)
726 {
727         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
728         struct vq_desc_extra *dxp;
729         struct virtqueue *vq = txvq->vq;
730         struct vring_desc *start_dp;
731         uint16_t seg_num = cookie->nb_segs;
732         uint16_t head_idx, idx;
733         uint16_t head_size = vq->hw->vtnet_hdr_size;
734         struct virtio_net_hdr *hdr;
735
736         head_idx = vq->vq_desc_head_idx;
737         idx = head_idx;
738         if (in_order)
739                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
740         else
741                 dxp = &vq->vq_descx[idx];
742         dxp->cookie = (void *)cookie;
743         dxp->ndescs = needed;
744
745         start_dp = vq->vq_ring.desc;
746
747         if (can_push) {
748                 /* prepend cannot fail, checked by caller */
749                 hdr = (struct virtio_net_hdr *)
750                         rte_pktmbuf_prepend(cookie, head_size);
751                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
752                  * which is wrong. Below subtract restores correct pkt size.
753                  */
754                 cookie->pkt_len -= head_size;
755
756                 /* if offload disabled, it is not zeroed below, do it now */
757                 if (!vq->hw->has_tx_offload)
758                         virtqueue_clear_net_hdr(hdr);
759         } else if (use_indirect) {
760                 /* setup tx ring slot to point to indirect
761                  * descriptor list stored in reserved region.
762                  *
763                  * the first slot in indirect ring is already preset
764                  * to point to the header in reserved region
765                  */
766                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
767                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
768                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
769                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
770                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
771
772                 /* loop below will fill in rest of the indirect elements */
773                 start_dp = txr[idx].tx_indir;
774                 idx = 1;
775         } else {
776                 /* setup first tx ring slot to point to header
777                  * stored in reserved region.
778                  */
779                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
780                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
781                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
782                 start_dp[idx].flags = VRING_DESC_F_NEXT;
783                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
784
785                 idx = start_dp[idx].next;
786         }
787
788         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
789
790         do {
791                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
792                 start_dp[idx].len   = cookie->data_len;
793                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
794                 idx = start_dp[idx].next;
795         } while ((cookie = cookie->next) != NULL);
796
797         if (use_indirect)
798                 idx = vq->vq_ring.desc[head_idx].next;
799
800         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
801
802         vq->vq_desc_head_idx = idx;
803         vq_update_avail_ring(vq, head_idx);
804
805         if (!in_order) {
806                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
807                         vq->vq_desc_tail_idx = idx;
808         }
809 }
810
811 void
812 virtio_dev_cq_start(struct rte_eth_dev *dev)
813 {
814         struct virtio_hw *hw = dev->data->dev_private;
815
816         if (hw->cvq && hw->cvq->vq) {
817                 rte_spinlock_init(&hw->cvq->lock);
818                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
819         }
820 }
821
822 int
823 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
824                         uint16_t queue_idx,
825                         uint16_t nb_desc,
826                         unsigned int socket_id __rte_unused,
827                         const struct rte_eth_rxconf *rx_conf __rte_unused,
828                         struct rte_mempool *mp)
829 {
830         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
831         struct virtio_hw *hw = dev->data->dev_private;
832         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
833         struct virtnet_rx *rxvq;
834
835         PMD_INIT_FUNC_TRACE();
836
837         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
838                 nb_desc = vq->vq_nentries;
839         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
840
841         rxvq = &vq->rxq;
842         rxvq->queue_id = queue_idx;
843         rxvq->mpool = mp;
844         if (rxvq->mpool == NULL) {
845                 rte_exit(EXIT_FAILURE,
846                         "Cannot allocate mbufs for rx virtqueue");
847         }
848
849         dev->data->rx_queues[queue_idx] = rxvq;
850
851         return 0;
852 }
853
854 int
855 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
856 {
857         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
858         struct virtio_hw *hw = dev->data->dev_private;
859         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
860         struct virtnet_rx *rxvq = &vq->rxq;
861         struct rte_mbuf *m;
862         uint16_t desc_idx;
863         int error, nbufs, i;
864
865         PMD_INIT_FUNC_TRACE();
866
867         /* Allocate blank mbufs for the each rx descriptor */
868         nbufs = 0;
869
870         if (hw->use_simple_rx) {
871                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
872                      desc_idx++) {
873                         vq->vq_ring.avail->ring[desc_idx] = desc_idx;
874                         vq->vq_ring.desc[desc_idx].flags =
875                                 VRING_DESC_F_WRITE;
876                 }
877
878                 virtio_rxq_vec_setup(rxvq);
879         }
880
881         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
882         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
883              desc_idx++) {
884                 vq->sw_ring[vq->vq_nentries + desc_idx] =
885                         &rxvq->fake_mbuf;
886         }
887
888         if (hw->use_simple_rx) {
889                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
890                         virtio_rxq_rearm_vec(rxvq);
891                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
892                 }
893         } else if (hw->use_inorder_rx) {
894                 if ((!virtqueue_full(vq))) {
895                         uint16_t free_cnt = vq->vq_free_cnt;
896                         struct rte_mbuf *pkts[free_cnt];
897
898                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
899                                 free_cnt)) {
900                                 error = virtqueue_enqueue_refill_inorder(vq,
901                                                 pkts,
902                                                 free_cnt);
903                                 if (unlikely(error)) {
904                                         for (i = 0; i < free_cnt; i++)
905                                                 rte_pktmbuf_free(pkts[i]);
906                                 }
907                         }
908
909                         nbufs += free_cnt;
910                         vq_update_avail_idx(vq);
911                 }
912         } else {
913                 while (!virtqueue_full(vq)) {
914                         m = rte_mbuf_raw_alloc(rxvq->mpool);
915                         if (m == NULL)
916                                 break;
917
918                         /* Enqueue allocated buffers */
919                         if (vtpci_packed_queue(vq->hw))
920                                 error = virtqueue_enqueue_recv_refill_packed(vq,
921                                                 &m, 1);
922                         else
923                                 error = virtqueue_enqueue_recv_refill(vq,
924                                                 &m, 1);
925                         if (error) {
926                                 rte_pktmbuf_free(m);
927                                 break;
928                         }
929                         nbufs++;
930                 }
931
932                 if (!vtpci_packed_queue(vq->hw))
933                         vq_update_avail_idx(vq);
934         }
935
936         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
937
938         VIRTQUEUE_DUMP(vq);
939
940         return 0;
941 }
942
943 /*
944  * struct rte_eth_dev *dev: Used to update dev
945  * uint16_t nb_desc: Defaults to values read from config space
946  * unsigned int socket_id: Used to allocate memzone
947  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
948  * uint16_t queue_idx: Just used as an index in dev txq list
949  */
950 int
951 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
952                         uint16_t queue_idx,
953                         uint16_t nb_desc,
954                         unsigned int socket_id __rte_unused,
955                         const struct rte_eth_txconf *tx_conf)
956 {
957         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
958         struct virtio_hw *hw = dev->data->dev_private;
959         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
960         struct virtnet_tx *txvq;
961         uint16_t tx_free_thresh;
962
963         PMD_INIT_FUNC_TRACE();
964
965         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
966                 nb_desc = vq->vq_nentries;
967         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
968
969         txvq = &vq->txq;
970         txvq->queue_id = queue_idx;
971
972         tx_free_thresh = tx_conf->tx_free_thresh;
973         if (tx_free_thresh == 0)
974                 tx_free_thresh =
975                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
976
977         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
978                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
979                         "number of TX entries minus 3 (%u)."
980                         " (tx_free_thresh=%u port=%u queue=%u)\n",
981                         vq->vq_nentries - 3,
982                         tx_free_thresh, dev->data->port_id, queue_idx);
983                 return -EINVAL;
984         }
985
986         vq->vq_free_thresh = tx_free_thresh;
987
988         dev->data->tx_queues[queue_idx] = txvq;
989         return 0;
990 }
991
992 int
993 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
994                                 uint16_t queue_idx)
995 {
996         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
997         struct virtio_hw *hw = dev->data->dev_private;
998         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
999
1000         PMD_INIT_FUNC_TRACE();
1001
1002         if (!vtpci_packed_queue(hw)) {
1003                 if (hw->use_inorder_tx)
1004                         vq->vq_ring.desc[vq->vq_nentries - 1].next = 0;
1005         }
1006
1007         VIRTQUEUE_DUMP(vq);
1008
1009         return 0;
1010 }
1011
1012 static inline void
1013 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1014 {
1015         int error;
1016         /*
1017          * Requeue the discarded mbuf. This should always be
1018          * successful since it was just dequeued.
1019          */
1020         if (vtpci_packed_queue(vq->hw))
1021                 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1022         else
1023                 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1024
1025         if (unlikely(error)) {
1026                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1027                 rte_pktmbuf_free(m);
1028         }
1029 }
1030
1031 static inline void
1032 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1033 {
1034         int error;
1035
1036         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1037         if (unlikely(error)) {
1038                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1039                 rte_pktmbuf_free(m);
1040         }
1041 }
1042
1043 static inline void
1044 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
1045 {
1046         uint32_t s = mbuf->pkt_len;
1047         struct ether_addr *ea;
1048
1049         stats->bytes += s;
1050
1051         if (s == 64) {
1052                 stats->size_bins[1]++;
1053         } else if (s > 64 && s < 1024) {
1054                 uint32_t bin;
1055
1056                 /* count zeros, and offset into correct bin */
1057                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
1058                 stats->size_bins[bin]++;
1059         } else {
1060                 if (s < 64)
1061                         stats->size_bins[0]++;
1062                 else if (s < 1519)
1063                         stats->size_bins[6]++;
1064                 else if (s >= 1519)
1065                         stats->size_bins[7]++;
1066         }
1067
1068         ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
1069         if (is_multicast_ether_addr(ea)) {
1070                 if (is_broadcast_ether_addr(ea))
1071                         stats->broadcast++;
1072                 else
1073                         stats->multicast++;
1074         }
1075 }
1076
1077 static inline void
1078 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
1079 {
1080         VIRTIO_DUMP_PACKET(m, m->data_len);
1081
1082         virtio_update_packet_stats(&rxvq->stats, m);
1083 }
1084
1085 /* Optionally fill offload information in structure */
1086 static inline int
1087 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1088 {
1089         struct rte_net_hdr_lens hdr_lens;
1090         uint32_t hdrlen, ptype;
1091         int l4_supported = 0;
1092
1093         /* nothing to do */
1094         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1095                 return 0;
1096
1097         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1098
1099         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1100         m->packet_type = ptype;
1101         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1102             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1103             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1104                 l4_supported = 1;
1105
1106         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1107                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1108                 if (hdr->csum_start <= hdrlen && l4_supported) {
1109                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1110                 } else {
1111                         /* Unknown proto or tunnel, do sw cksum. We can assume
1112                          * the cksum field is in the first segment since the
1113                          * buffers we provided to the host are large enough.
1114                          * In case of SCTP, this will be wrong since it's a CRC
1115                          * but there's nothing we can do.
1116                          */
1117                         uint16_t csum = 0, off;
1118
1119                         rte_raw_cksum_mbuf(m, hdr->csum_start,
1120                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1121                                 &csum);
1122                         if (likely(csum != 0xffff))
1123                                 csum = ~csum;
1124                         off = hdr->csum_offset + hdr->csum_start;
1125                         if (rte_pktmbuf_data_len(m) >= off + 1)
1126                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1127                                         off) = csum;
1128                 }
1129         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1130                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1131         }
1132
1133         /* GSO request, save required information in mbuf */
1134         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1135                 /* Check unsupported modes */
1136                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1137                     (hdr->gso_size == 0)) {
1138                         return -EINVAL;
1139                 }
1140
1141                 /* Update mss lengthes in mbuf */
1142                 m->tso_segsz = hdr->gso_size;
1143                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1144                         case VIRTIO_NET_HDR_GSO_TCPV4:
1145                         case VIRTIO_NET_HDR_GSO_TCPV6:
1146                                 m->ol_flags |= PKT_RX_LRO | \
1147                                         PKT_RX_L4_CKSUM_NONE;
1148                                 break;
1149                         default:
1150                                 return -EINVAL;
1151                 }
1152         }
1153
1154         return 0;
1155 }
1156
1157 #define VIRTIO_MBUF_BURST_SZ 64
1158 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1159 uint16_t
1160 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1161 {
1162         struct virtnet_rx *rxvq = rx_queue;
1163         struct virtqueue *vq = rxvq->vq;
1164         struct virtio_hw *hw = vq->hw;
1165         struct rte_mbuf *rxm, *new_mbuf;
1166         uint16_t nb_used, num, nb_rx;
1167         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1168         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1169         int error;
1170         uint32_t i, nb_enqueued;
1171         uint32_t hdr_size;
1172         struct virtio_net_hdr *hdr;
1173
1174         nb_rx = 0;
1175         if (unlikely(hw->started == 0))
1176                 return nb_rx;
1177
1178         nb_used = VIRTQUEUE_NUSED(vq);
1179
1180         virtio_rmb(hw->weak_barriers);
1181
1182         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1183         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1184                 num = VIRTIO_MBUF_BURST_SZ;
1185         if (likely(num > DESC_PER_CACHELINE))
1186                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1187
1188         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1189         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1190
1191         nb_enqueued = 0;
1192         hdr_size = hw->vtnet_hdr_size;
1193
1194         for (i = 0; i < num ; i++) {
1195                 rxm = rcv_pkts[i];
1196
1197                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1198
1199                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1200                         PMD_RX_LOG(ERR, "Packet drop");
1201                         nb_enqueued++;
1202                         virtio_discard_rxbuf(vq, rxm);
1203                         rxvq->stats.errors++;
1204                         continue;
1205                 }
1206
1207                 rxm->port = rxvq->port_id;
1208                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1209                 rxm->ol_flags = 0;
1210                 rxm->vlan_tci = 0;
1211
1212                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1213                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1214
1215                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1216                         RTE_PKTMBUF_HEADROOM - hdr_size);
1217
1218                 if (hw->vlan_strip)
1219                         rte_vlan_strip(rxm);
1220
1221                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1222                         virtio_discard_rxbuf(vq, rxm);
1223                         rxvq->stats.errors++;
1224                         continue;
1225                 }
1226
1227                 virtio_rx_stats_updated(rxvq, rxm);
1228
1229                 rx_pkts[nb_rx++] = rxm;
1230         }
1231
1232         rxvq->stats.packets += nb_rx;
1233
1234         /* Allocate new mbuf for the used descriptor */
1235         while (likely(!virtqueue_full(vq))) {
1236                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1237                 if (unlikely(new_mbuf == NULL)) {
1238                         struct rte_eth_dev *dev
1239                                 = &rte_eth_devices[rxvq->port_id];
1240                         dev->data->rx_mbuf_alloc_failed++;
1241                         break;
1242                 }
1243                 error = virtqueue_enqueue_recv_refill(vq, &new_mbuf, 1);
1244                 if (unlikely(error)) {
1245                         rte_pktmbuf_free(new_mbuf);
1246                         break;
1247                 }
1248                 nb_enqueued++;
1249         }
1250
1251         if (likely(nb_enqueued)) {
1252                 vq_update_avail_idx(vq);
1253
1254                 if (unlikely(virtqueue_kick_prepare(vq))) {
1255                         virtqueue_notify(vq);
1256                         PMD_RX_LOG(DEBUG, "Notified");
1257                 }
1258         }
1259
1260         return nb_rx;
1261 }
1262
1263 uint16_t
1264 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1265                         uint16_t nb_pkts)
1266 {
1267         struct virtnet_rx *rxvq = rx_queue;
1268         struct virtqueue *vq = rxvq->vq;
1269         struct virtio_hw *hw = vq->hw;
1270         struct rte_mbuf *rxm, *new_mbuf;
1271         uint16_t num, nb_rx;
1272         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1273         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1274         int error;
1275         uint32_t i, nb_enqueued;
1276         uint32_t hdr_size;
1277         struct virtio_net_hdr *hdr;
1278
1279         nb_rx = 0;
1280         if (unlikely(hw->started == 0))
1281                 return nb_rx;
1282
1283         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1284         if (likely(num > DESC_PER_CACHELINE))
1285                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1286
1287         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1288         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1289
1290         nb_enqueued = 0;
1291         hdr_size = hw->vtnet_hdr_size;
1292
1293         for (i = 0; i < num; i++) {
1294                 rxm = rcv_pkts[i];
1295
1296                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1297
1298                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1299                         PMD_RX_LOG(ERR, "Packet drop");
1300                         nb_enqueued++;
1301                         virtio_discard_rxbuf(vq, rxm);
1302                         rxvq->stats.errors++;
1303                         continue;
1304                 }
1305
1306                 rxm->port = rxvq->port_id;
1307                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1308                 rxm->ol_flags = 0;
1309                 rxm->vlan_tci = 0;
1310
1311                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1312                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1313
1314                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1315                         RTE_PKTMBUF_HEADROOM - hdr_size);
1316
1317                 if (hw->vlan_strip)
1318                         rte_vlan_strip(rxm);
1319
1320                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1321                         virtio_discard_rxbuf(vq, rxm);
1322                         rxvq->stats.errors++;
1323                         continue;
1324                 }
1325
1326                 virtio_rx_stats_updated(rxvq, rxm);
1327
1328                 rx_pkts[nb_rx++] = rxm;
1329         }
1330
1331         rxvq->stats.packets += nb_rx;
1332
1333         /* Allocate new mbuf for the used descriptor */
1334         while (likely(!virtqueue_full(vq))) {
1335                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1336                 if (unlikely(new_mbuf == NULL)) {
1337                         struct rte_eth_dev *dev =
1338                                 &rte_eth_devices[rxvq->port_id];
1339                         dev->data->rx_mbuf_alloc_failed++;
1340                         break;
1341                 }
1342                 error = virtqueue_enqueue_recv_refill_packed(vq, &new_mbuf, 1);
1343                 if (unlikely(error)) {
1344                         rte_pktmbuf_free(new_mbuf);
1345                         break;
1346                 }
1347                 nb_enqueued++;
1348         }
1349
1350         if (likely(nb_enqueued)) {
1351                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1352                         virtqueue_notify(vq);
1353                         PMD_RX_LOG(DEBUG, "Notified");
1354                 }
1355         }
1356
1357         return nb_rx;
1358 }
1359
1360
1361 uint16_t
1362 virtio_recv_pkts_inorder(void *rx_queue,
1363                         struct rte_mbuf **rx_pkts,
1364                         uint16_t nb_pkts)
1365 {
1366         struct virtnet_rx *rxvq = rx_queue;
1367         struct virtqueue *vq = rxvq->vq;
1368         struct virtio_hw *hw = vq->hw;
1369         struct rte_mbuf *rxm;
1370         struct rte_mbuf *prev;
1371         uint16_t nb_used, num, nb_rx;
1372         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1373         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1374         int error;
1375         uint32_t nb_enqueued;
1376         uint32_t seg_num;
1377         uint32_t seg_res;
1378         uint32_t hdr_size;
1379         int32_t i;
1380
1381         nb_rx = 0;
1382         if (unlikely(hw->started == 0))
1383                 return nb_rx;
1384
1385         nb_used = VIRTQUEUE_NUSED(vq);
1386         nb_used = RTE_MIN(nb_used, nb_pkts);
1387         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1388
1389         virtio_rmb(hw->weak_barriers);
1390
1391         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1392
1393         nb_enqueued = 0;
1394         seg_num = 1;
1395         seg_res = 0;
1396         hdr_size = hw->vtnet_hdr_size;
1397
1398         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1399
1400         for (i = 0; i < num; i++) {
1401                 struct virtio_net_hdr_mrg_rxbuf *header;
1402
1403                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1404                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1405
1406                 rxm = rcv_pkts[i];
1407
1408                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1409                         PMD_RX_LOG(ERR, "Packet drop");
1410                         nb_enqueued++;
1411                         virtio_discard_rxbuf_inorder(vq, rxm);
1412                         rxvq->stats.errors++;
1413                         continue;
1414                 }
1415
1416                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1417                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1418                          - hdr_size);
1419
1420                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1421                         seg_num = header->num_buffers;
1422                         if (seg_num == 0)
1423                                 seg_num = 1;
1424                 } else {
1425                         seg_num = 1;
1426                 }
1427
1428                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1429                 rxm->nb_segs = seg_num;
1430                 rxm->ol_flags = 0;
1431                 rxm->vlan_tci = 0;
1432                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1433                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1434
1435                 rxm->port = rxvq->port_id;
1436
1437                 rx_pkts[nb_rx] = rxm;
1438                 prev = rxm;
1439
1440                 if (vq->hw->has_rx_offload &&
1441                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1442                         virtio_discard_rxbuf_inorder(vq, rxm);
1443                         rxvq->stats.errors++;
1444                         continue;
1445                 }
1446
1447                 if (hw->vlan_strip)
1448                         rte_vlan_strip(rx_pkts[nb_rx]);
1449
1450                 seg_res = seg_num - 1;
1451
1452                 /* Merge remaining segments */
1453                 while (seg_res != 0 && i < (num - 1)) {
1454                         i++;
1455
1456                         rxm = rcv_pkts[i];
1457                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1458                         rxm->pkt_len = (uint32_t)(len[i]);
1459                         rxm->data_len = (uint16_t)(len[i]);
1460
1461                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1462                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1463
1464                         if (prev)
1465                                 prev->next = rxm;
1466
1467                         prev = rxm;
1468                         seg_res -= 1;
1469                 }
1470
1471                 if (!seg_res) {
1472                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1473                         nb_rx++;
1474                 }
1475         }
1476
1477         /* Last packet still need merge segments */
1478         while (seg_res != 0) {
1479                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1480                                         VIRTIO_MBUF_BURST_SZ);
1481
1482                 prev = rcv_pkts[nb_rx];
1483                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1484                         virtio_rmb(hw->weak_barriers);
1485                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1486                                                            rcv_cnt);
1487                         uint16_t extra_idx = 0;
1488
1489                         rcv_cnt = num;
1490                         while (extra_idx < rcv_cnt) {
1491                                 rxm = rcv_pkts[extra_idx];
1492                                 rxm->data_off =
1493                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1494                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1495                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1496                                 prev->next = rxm;
1497                                 prev = rxm;
1498                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1499                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1500                                 extra_idx += 1;
1501                         };
1502                         seg_res -= rcv_cnt;
1503
1504                         if (!seg_res) {
1505                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1506                                 nb_rx++;
1507                         }
1508                 } else {
1509                         PMD_RX_LOG(ERR,
1510                                         "No enough segments for packet.");
1511                         virtio_discard_rxbuf_inorder(vq, prev);
1512                         rxvq->stats.errors++;
1513                         break;
1514                 }
1515         }
1516
1517         rxvq->stats.packets += nb_rx;
1518
1519         /* Allocate new mbuf for the used descriptor */
1520
1521         if (likely(!virtqueue_full(vq))) {
1522                 /* free_cnt may include mrg descs */
1523                 uint16_t free_cnt = vq->vq_free_cnt;
1524                 struct rte_mbuf *new_pkts[free_cnt];
1525
1526                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1527                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1528                                         free_cnt);
1529                         if (unlikely(error)) {
1530                                 for (i = 0; i < free_cnt; i++)
1531                                         rte_pktmbuf_free(new_pkts[i]);
1532                         }
1533                         nb_enqueued += free_cnt;
1534                 } else {
1535                         struct rte_eth_dev *dev =
1536                                 &rte_eth_devices[rxvq->port_id];
1537                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1538                 }
1539         }
1540
1541         if (likely(nb_enqueued)) {
1542                 vq_update_avail_idx(vq);
1543
1544                 if (unlikely(virtqueue_kick_prepare(vq))) {
1545                         virtqueue_notify(vq);
1546                         PMD_RX_LOG(DEBUG, "Notified");
1547                 }
1548         }
1549
1550         return nb_rx;
1551 }
1552
1553 uint16_t
1554 virtio_recv_mergeable_pkts(void *rx_queue,
1555                         struct rte_mbuf **rx_pkts,
1556                         uint16_t nb_pkts)
1557 {
1558         struct virtnet_rx *rxvq = rx_queue;
1559         struct virtqueue *vq = rxvq->vq;
1560         struct virtio_hw *hw = vq->hw;
1561         struct rte_mbuf *rxm;
1562         struct rte_mbuf *prev;
1563         uint16_t nb_used, num, nb_rx = 0;
1564         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1565         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1566         int error;
1567         uint32_t nb_enqueued = 0;
1568         uint32_t seg_num = 0;
1569         uint32_t seg_res = 0;
1570         uint32_t hdr_size = hw->vtnet_hdr_size;
1571         int32_t i;
1572
1573         if (unlikely(hw->started == 0))
1574                 return nb_rx;
1575
1576         nb_used = VIRTQUEUE_NUSED(vq);
1577
1578         virtio_rmb(hw->weak_barriers);
1579
1580         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1581
1582         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1583         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1584                 num = VIRTIO_MBUF_BURST_SZ;
1585         if (likely(num > DESC_PER_CACHELINE))
1586                 num = num - ((vq->vq_used_cons_idx + num) %
1587                                 DESC_PER_CACHELINE);
1588
1589
1590         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1591
1592         for (i = 0; i < num; i++) {
1593                 struct virtio_net_hdr_mrg_rxbuf *header;
1594
1595                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1596                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1597
1598                 rxm = rcv_pkts[i];
1599
1600                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1601                         PMD_RX_LOG(ERR, "Packet drop");
1602                         nb_enqueued++;
1603                         virtio_discard_rxbuf(vq, rxm);
1604                         rxvq->stats.errors++;
1605                         continue;
1606                 }
1607
1608                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1609                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1610                          - hdr_size);
1611                 seg_num = header->num_buffers;
1612                 if (seg_num == 0)
1613                         seg_num = 1;
1614
1615                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1616                 rxm->nb_segs = seg_num;
1617                 rxm->ol_flags = 0;
1618                 rxm->vlan_tci = 0;
1619                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1620                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1621
1622                 rxm->port = rxvq->port_id;
1623
1624                 rx_pkts[nb_rx] = rxm;
1625                 prev = rxm;
1626
1627                 if (hw->has_rx_offload &&
1628                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1629                         virtio_discard_rxbuf(vq, rxm);
1630                         rxvq->stats.errors++;
1631                         continue;
1632                 }
1633
1634                 if (hw->vlan_strip)
1635                         rte_vlan_strip(rx_pkts[nb_rx]);
1636
1637                 seg_res = seg_num - 1;
1638
1639                 /* Merge remaining segments */
1640                 while (seg_res != 0 && i < (num - 1)) {
1641                         i++;
1642
1643                         rxm = rcv_pkts[i];
1644                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1645                         rxm->pkt_len = (uint32_t)(len[i]);
1646                         rxm->data_len = (uint16_t)(len[i]);
1647
1648                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1649                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1650
1651                         if (prev)
1652                                 prev->next = rxm;
1653
1654                         prev = rxm;
1655                         seg_res -= 1;
1656                 }
1657
1658                 if (!seg_res) {
1659                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1660                         nb_rx++;
1661                 }
1662         }
1663
1664         /* Last packet still need merge segments */
1665         while (seg_res != 0) {
1666                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1667                                         VIRTIO_MBUF_BURST_SZ);
1668
1669                 prev = rcv_pkts[nb_rx];
1670                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1671                         virtio_rmb(hw->weak_barriers);
1672                         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1673                                                            rcv_cnt);
1674                         uint16_t extra_idx = 0;
1675
1676                         rcv_cnt = num;
1677                         while (extra_idx < rcv_cnt) {
1678                                 rxm = rcv_pkts[extra_idx];
1679                                 rxm->data_off =
1680                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1681                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1682                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1683                                 prev->next = rxm;
1684                                 prev = rxm;
1685                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1686                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1687                                 extra_idx += 1;
1688                         };
1689                         seg_res -= rcv_cnt;
1690
1691                         if (!seg_res) {
1692                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1693                                 nb_rx++;
1694                         }
1695                 } else {
1696                         PMD_RX_LOG(ERR,
1697                                         "No enough segments for packet.");
1698                         virtio_discard_rxbuf(vq, prev);
1699                         rxvq->stats.errors++;
1700                         break;
1701                 }
1702         }
1703
1704         rxvq->stats.packets += nb_rx;
1705
1706         /* Allocate new mbuf for the used descriptor */
1707         if (likely(!virtqueue_full(vq))) {
1708                 /* free_cnt may include mrg descs */
1709                 uint16_t free_cnt = vq->vq_free_cnt;
1710                 struct rte_mbuf *new_pkts[free_cnt];
1711
1712                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1713                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1714                                         free_cnt);
1715                         if (unlikely(error)) {
1716                                 for (i = 0; i < free_cnt; i++)
1717                                         rte_pktmbuf_free(new_pkts[i]);
1718                         }
1719                         nb_enqueued += free_cnt;
1720                 } else {
1721                         struct rte_eth_dev *dev =
1722                                 &rte_eth_devices[rxvq->port_id];
1723                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1724                 }
1725         }
1726
1727         if (likely(nb_enqueued)) {
1728                 vq_update_avail_idx(vq);
1729
1730                 if (unlikely(virtqueue_kick_prepare(vq))) {
1731                         virtqueue_notify(vq);
1732                         PMD_RX_LOG(DEBUG, "Notified");
1733                 }
1734         }
1735
1736         return nb_rx;
1737 }
1738
1739 uint16_t
1740 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1741                         struct rte_mbuf **rx_pkts,
1742                         uint16_t nb_pkts)
1743 {
1744         struct virtnet_rx *rxvq = rx_queue;
1745         struct virtqueue *vq = rxvq->vq;
1746         struct virtio_hw *hw = vq->hw;
1747         struct rte_mbuf *rxm;
1748         struct rte_mbuf *prev = NULL;
1749         uint16_t num, nb_rx = 0;
1750         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1751         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1752         uint32_t nb_enqueued = 0;
1753         uint32_t seg_num = 0;
1754         uint32_t seg_res = 0;
1755         uint32_t hdr_size = hw->vtnet_hdr_size;
1756         int32_t i;
1757         int error;
1758
1759         if (unlikely(hw->started == 0))
1760                 return nb_rx;
1761
1762
1763         num = nb_pkts;
1764         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1765                 num = VIRTIO_MBUF_BURST_SZ;
1766         if (likely(num > DESC_PER_CACHELINE))
1767                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1768
1769         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1770
1771         for (i = 0; i < num; i++) {
1772                 struct virtio_net_hdr_mrg_rxbuf *header;
1773
1774                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1775                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1776
1777                 rxm = rcv_pkts[i];
1778
1779                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1780                         PMD_RX_LOG(ERR, "Packet drop");
1781                         nb_enqueued++;
1782                         virtio_discard_rxbuf(vq, rxm);
1783                         rxvq->stats.errors++;
1784                         continue;
1785                 }
1786
1787                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1788                           rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1789                 seg_num = header->num_buffers;
1790
1791                 if (seg_num == 0)
1792                         seg_num = 1;
1793
1794                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1795                 rxm->nb_segs = seg_num;
1796                 rxm->ol_flags = 0;
1797                 rxm->vlan_tci = 0;
1798                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1799                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1800
1801                 rxm->port = rxvq->port_id;
1802                 rx_pkts[nb_rx] = rxm;
1803                 prev = rxm;
1804
1805                 if (hw->has_rx_offload &&
1806                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1807                         virtio_discard_rxbuf(vq, rxm);
1808                         rxvq->stats.errors++;
1809                         continue;
1810                 }
1811
1812                 if (hw->vlan_strip)
1813                         rte_vlan_strip(rx_pkts[nb_rx]);
1814
1815                 seg_res = seg_num - 1;
1816
1817                 /* Merge remaining segments */
1818                 while (seg_res != 0 && i < (num - 1)) {
1819                         i++;
1820
1821                         rxm = rcv_pkts[i];
1822                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1823                         rxm->pkt_len = (uint32_t)(len[i]);
1824                         rxm->data_len = (uint16_t)(len[i]);
1825
1826                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1827                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1828
1829                         if (prev)
1830                                 prev->next = rxm;
1831
1832                         prev = rxm;
1833                         seg_res -= 1;
1834                 }
1835
1836                 if (!seg_res) {
1837                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1838                         nb_rx++;
1839                 }
1840         }
1841
1842         /* Last packet still need merge segments */
1843         while (seg_res != 0) {
1844                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1845                                         VIRTIO_MBUF_BURST_SZ);
1846                 if (likely(vq->vq_free_cnt >= rcv_cnt)) {
1847                         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1848                                         len, rcv_cnt);
1849                         uint16_t extra_idx = 0;
1850
1851                         rcv_cnt = num;
1852
1853                         while (extra_idx < rcv_cnt) {
1854                                 rxm = rcv_pkts[extra_idx];
1855
1856                                 rxm->data_off =
1857                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1858                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1859                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1860
1861                                 prev->next = rxm;
1862                                 prev = rxm;
1863                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1864                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1865                                 extra_idx += 1;
1866                         }
1867                         seg_res -= rcv_cnt;
1868                         if (!seg_res) {
1869                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1870                                 nb_rx++;
1871                         }
1872                 } else {
1873                         PMD_RX_LOG(ERR,
1874                                         "No enough segments for packet.");
1875                         if (prev)
1876                                 virtio_discard_rxbuf(vq, prev);
1877                         rxvq->stats.errors++;
1878                         break;
1879                 }
1880         }
1881
1882         rxvq->stats.packets += nb_rx;
1883
1884         /* Allocate new mbuf for the used descriptor */
1885         if (likely(!virtqueue_full(vq))) {
1886                 /* free_cnt may include mrg descs */
1887                 uint16_t free_cnt = vq->vq_free_cnt;
1888                 struct rte_mbuf *new_pkts[free_cnt];
1889
1890                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1891                         error = virtqueue_enqueue_recv_refill_packed(vq,
1892                                         new_pkts, free_cnt);
1893                         if (unlikely(error)) {
1894                                 for (i = 0; i < free_cnt; i++)
1895                                         rte_pktmbuf_free(new_pkts[i]);
1896                         }
1897                         nb_enqueued += free_cnt;
1898                 } else {
1899                         struct rte_eth_dev *dev =
1900                                 &rte_eth_devices[rxvq->port_id];
1901                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1902                 }
1903         }
1904
1905         if (likely(nb_enqueued)) {
1906                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1907                         virtqueue_notify(vq);
1908                         PMD_RX_LOG(DEBUG, "Notified");
1909                 }
1910         }
1911
1912         return nb_rx;
1913 }
1914
1915 uint16_t
1916 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1917                         uint16_t nb_pkts)
1918 {
1919         struct virtnet_tx *txvq = tx_queue;
1920         struct virtqueue *vq = txvq->vq;
1921         struct virtio_hw *hw = vq->hw;
1922         uint16_t hdr_size = hw->vtnet_hdr_size;
1923         uint16_t nb_tx = 0;
1924         bool in_order = hw->use_inorder_tx;
1925         int error;
1926
1927         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1928                 return nb_tx;
1929
1930         if (unlikely(nb_pkts < 1))
1931                 return nb_pkts;
1932
1933         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1934
1935         if (nb_pkts > vq->vq_free_cnt)
1936                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
1937                                            in_order);
1938
1939         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1940                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1941                 int can_push = 0, slots, need;
1942
1943                 /* Do VLAN tag insertion */
1944                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1945                         error = rte_vlan_insert(&txm);
1946                         if (unlikely(error)) {
1947                                 rte_pktmbuf_free(txm);
1948                                 continue;
1949                         }
1950                 }
1951
1952                 /* optimize ring usage */
1953                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1954                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1955                     rte_mbuf_refcnt_read(txm) == 1 &&
1956                     RTE_MBUF_DIRECT(txm) &&
1957                     txm->nb_segs == 1 &&
1958                     rte_pktmbuf_headroom(txm) >= hdr_size &&
1959                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1960                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1961                         can_push = 1;
1962
1963                 /* How many main ring entries are needed to this Tx?
1964                  * any_layout => number of segments
1965                  * default    => number of segments + 1
1966                  */
1967                 slots = txm->nb_segs + !can_push;
1968                 need = slots - vq->vq_free_cnt;
1969
1970                 /* Positive value indicates it need free vring descriptors */
1971                 if (unlikely(need > 0)) {
1972                         virtio_xmit_cleanup_packed(vq, need, in_order);
1973                         need = slots - vq->vq_free_cnt;
1974                         if (unlikely(need > 0)) {
1975                                 PMD_TX_LOG(ERR,
1976                                            "No free tx descriptors to transmit");
1977                                 break;
1978                         }
1979                 }
1980
1981                 /* Enqueue Packet buffers */
1982                 virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push,
1983                                               in_order);
1984
1985                 virtio_update_packet_stats(&txvq->stats, txm);
1986         }
1987
1988         txvq->stats.packets += nb_tx;
1989
1990         if (likely(nb_tx)) {
1991                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1992                         virtqueue_notify(vq);
1993                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1994                 }
1995         }
1996
1997         return nb_tx;
1998 }
1999
2000 uint16_t
2001 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2002 {
2003         struct virtnet_tx *txvq = tx_queue;
2004         struct virtqueue *vq = txvq->vq;
2005         struct virtio_hw *hw = vq->hw;
2006         uint16_t hdr_size = hw->vtnet_hdr_size;
2007         uint16_t nb_used, nb_tx = 0;
2008         int error;
2009
2010         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2011                 return nb_tx;
2012
2013         if (unlikely(nb_pkts < 1))
2014                 return nb_pkts;
2015
2016         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2017         nb_used = VIRTQUEUE_NUSED(vq);
2018
2019         virtio_rmb(hw->weak_barriers);
2020         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2021                 virtio_xmit_cleanup(vq, nb_used);
2022
2023         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2024                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2025                 int can_push = 0, use_indirect = 0, slots, need;
2026
2027                 /* Do VLAN tag insertion */
2028                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2029                         error = rte_vlan_insert(&txm);
2030                         if (unlikely(error)) {
2031                                 rte_pktmbuf_free(txm);
2032                                 continue;
2033                         }
2034                 }
2035
2036                 /* optimize ring usage */
2037                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2038                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2039                     rte_mbuf_refcnt_read(txm) == 1 &&
2040                     RTE_MBUF_DIRECT(txm) &&
2041                     txm->nb_segs == 1 &&
2042                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2043                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2044                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2045                         can_push = 1;
2046                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2047                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2048                         use_indirect = 1;
2049
2050                 /* How many main ring entries are needed to this Tx?
2051                  * any_layout => number of segments
2052                  * indirect   => 1
2053                  * default    => number of segments + 1
2054                  */
2055                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2056                 need = slots - vq->vq_free_cnt;
2057
2058                 /* Positive value indicates it need free vring descriptors */
2059                 if (unlikely(need > 0)) {
2060                         nb_used = VIRTQUEUE_NUSED(vq);
2061                         virtio_rmb(hw->weak_barriers);
2062                         need = RTE_MIN(need, (int)nb_used);
2063
2064                         virtio_xmit_cleanup(vq, need);
2065                         need = slots - vq->vq_free_cnt;
2066                         if (unlikely(need > 0)) {
2067                                 PMD_TX_LOG(ERR,
2068                                            "No free tx descriptors to transmit");
2069                                 break;
2070                         }
2071                 }
2072
2073                 /* Enqueue Packet buffers */
2074                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2075                         can_push, 0);
2076
2077                 virtio_update_packet_stats(&txvq->stats, txm);
2078         }
2079
2080         txvq->stats.packets += nb_tx;
2081
2082         if (likely(nb_tx)) {
2083                 vq_update_avail_idx(vq);
2084
2085                 if (unlikely(virtqueue_kick_prepare(vq))) {
2086                         virtqueue_notify(vq);
2087                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2088                 }
2089         }
2090
2091         return nb_tx;
2092 }
2093
2094 uint16_t
2095 virtio_xmit_pkts_inorder(void *tx_queue,
2096                         struct rte_mbuf **tx_pkts,
2097                         uint16_t nb_pkts)
2098 {
2099         struct virtnet_tx *txvq = tx_queue;
2100         struct virtqueue *vq = txvq->vq;
2101         struct virtio_hw *hw = vq->hw;
2102         uint16_t hdr_size = hw->vtnet_hdr_size;
2103         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
2104         struct rte_mbuf *inorder_pkts[nb_pkts];
2105         int error;
2106
2107         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2108                 return nb_tx;
2109
2110         if (unlikely(nb_pkts < 1))
2111                 return nb_pkts;
2112
2113         VIRTQUEUE_DUMP(vq);
2114         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2115         nb_used = VIRTQUEUE_NUSED(vq);
2116
2117         virtio_rmb(hw->weak_barriers);
2118         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2119                 virtio_xmit_cleanup_inorder(vq, nb_used);
2120
2121         if (unlikely(!vq->vq_free_cnt))
2122                 virtio_xmit_cleanup_inorder(vq, nb_used);
2123
2124         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
2125
2126         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
2127                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2128                 int slots, need;
2129
2130                 /* Do VLAN tag insertion */
2131                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2132                         error = rte_vlan_insert(&txm);
2133                         if (unlikely(error)) {
2134                                 rte_pktmbuf_free(txm);
2135                                 continue;
2136                         }
2137                 }
2138
2139                 /* optimize ring usage */
2140                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2141                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2142                      rte_mbuf_refcnt_read(txm) == 1 &&
2143                      RTE_MBUF_DIRECT(txm) &&
2144                      txm->nb_segs == 1 &&
2145                      rte_pktmbuf_headroom(txm) >= hdr_size &&
2146                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2147                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2148                         inorder_pkts[nb_inorder_pkts] = txm;
2149                         nb_inorder_pkts++;
2150
2151                         virtio_update_packet_stats(&txvq->stats, txm);
2152                         continue;
2153                 }
2154
2155                 if (nb_inorder_pkts) {
2156                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2157                                                         nb_inorder_pkts);
2158                         nb_inorder_pkts = 0;
2159                 }
2160
2161                 slots = txm->nb_segs + 1;
2162                 need = slots - vq->vq_free_cnt;
2163                 if (unlikely(need > 0)) {
2164                         nb_used = VIRTQUEUE_NUSED(vq);
2165                         virtio_rmb(hw->weak_barriers);
2166                         need = RTE_MIN(need, (int)nb_used);
2167
2168                         virtio_xmit_cleanup_inorder(vq, need);
2169
2170                         need = slots - vq->vq_free_cnt;
2171
2172                         if (unlikely(need > 0)) {
2173                                 PMD_TX_LOG(ERR,
2174                                         "No free tx descriptors to transmit");
2175                                 break;
2176                         }
2177                 }
2178                 /* Enqueue Packet buffers */
2179                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2180
2181                 virtio_update_packet_stats(&txvq->stats, txm);
2182         }
2183
2184         /* Transmit all inorder packets */
2185         if (nb_inorder_pkts)
2186                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2187                                                 nb_inorder_pkts);
2188
2189         txvq->stats.packets += nb_tx;
2190
2191         if (likely(nb_tx)) {
2192                 vq_update_avail_idx(vq);
2193
2194                 if (unlikely(virtqueue_kick_prepare(vq))) {
2195                         virtqueue_notify(vq);
2196                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2197                 }
2198         }
2199
2200         VIRTQUEUE_DUMP(vq);
2201
2202         return nb_tx;
2203 }