net: add rte prefix to ether structures
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45         struct virtnet_rx *rxvq = rxq;
46         struct virtqueue *vq = rxvq->vq;
47
48         return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54         vq->vq_free_cnt += num;
55         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61         struct vring_desc *dp, *dp_tail;
62         struct vq_desc_extra *dxp;
63         uint16_t desc_idx_last = desc_idx;
64
65         dp  = &vq->vq_split.ring.desc[desc_idx];
66         dxp = &vq->vq_descx[desc_idx];
67         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69                 while (dp->flags & VRING_DESC_F_NEXT) {
70                         desc_idx_last = dp->next;
71                         dp = &vq->vq_split.ring.desc[dp->next];
72                 }
73         }
74         dxp->ndescs = 0;
75
76         /*
77          * We must append the existing free chain, if any, to the end of
78          * newly freed chain. If the virtqueue was completely used, then
79          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80          */
81         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82                 vq->vq_desc_head_idx = desc_idx;
83         } else {
84                 dp_tail = &vq->vq_split.ring.desc[vq->vq_desc_tail_idx];
85                 dp_tail->next = desc_idx;
86         }
87
88         vq->vq_desc_tail_idx = desc_idx_last;
89         dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95         struct vq_desc_extra *dxp;
96
97         dxp = &vq->vq_descx[id];
98         vq->vq_free_cnt += dxp->ndescs;
99
100         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101                 vq->vq_desc_head_idx = id;
102         else
103                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104
105         vq->vq_desc_tail_idx = id;
106         dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108
109 static uint16_t
110 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
111                                   struct rte_mbuf **rx_pkts,
112                                   uint32_t *len,
113                                   uint16_t num)
114 {
115         struct rte_mbuf *cookie;
116         uint16_t used_idx;
117         uint16_t id;
118         struct vring_packed_desc *desc;
119         uint16_t i;
120
121         desc = vq->vq_packed.ring.desc;
122
123         for (i = 0; i < num; i++) {
124                 used_idx = vq->vq_used_cons_idx;
125                 if (!desc_is_used(&desc[used_idx], vq))
126                         return i;
127                 virtio_rmb(vq->hw->weak_barriers);
128                 len[i] = desc[used_idx].len;
129                 id = desc[used_idx].id;
130                 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
131                 if (unlikely(cookie == NULL)) {
132                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
133                                 vq->vq_used_cons_idx);
134                         break;
135                 }
136                 rte_prefetch0(cookie);
137                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
138                 rx_pkts[i] = cookie;
139
140                 vq->vq_free_cnt++;
141                 vq->vq_used_cons_idx++;
142                 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
143                         vq->vq_used_cons_idx -= vq->vq_nentries;
144                         vq->vq_packed.used_wrap_counter ^= 1;
145                 }
146         }
147
148         return i;
149 }
150
151 static uint16_t
152 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
153                            uint32_t *len, uint16_t num)
154 {
155         struct vring_used_elem *uep;
156         struct rte_mbuf *cookie;
157         uint16_t used_idx, desc_idx;
158         uint16_t i;
159
160         /*  Caller does the check */
161         for (i = 0; i < num ; i++) {
162                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
163                 uep = &vq->vq_split.ring.used->ring[used_idx];
164                 desc_idx = (uint16_t) uep->id;
165                 len[i] = uep->len;
166                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
167
168                 if (unlikely(cookie == NULL)) {
169                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
170                                 vq->vq_used_cons_idx);
171                         break;
172                 }
173
174                 rte_prefetch0(cookie);
175                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
176                 rx_pkts[i]  = cookie;
177                 vq->vq_used_cons_idx++;
178                 vq_ring_free_chain(vq, desc_idx);
179                 vq->vq_descx[desc_idx].cookie = NULL;
180         }
181
182         return i;
183 }
184
185 static uint16_t
186 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
187                         struct rte_mbuf **rx_pkts,
188                         uint32_t *len,
189                         uint16_t num)
190 {
191         struct vring_used_elem *uep;
192         struct rte_mbuf *cookie;
193         uint16_t used_idx = 0;
194         uint16_t i;
195
196         if (unlikely(num == 0))
197                 return 0;
198
199         for (i = 0; i < num; i++) {
200                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
201                 /* Desc idx same as used idx */
202                 uep = &vq->vq_split.ring.used->ring[used_idx];
203                 len[i] = uep->len;
204                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
205
206                 if (unlikely(cookie == NULL)) {
207                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
208                                 vq->vq_used_cons_idx);
209                         break;
210                 }
211
212                 rte_prefetch0(cookie);
213                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
214                 rx_pkts[i]  = cookie;
215                 vq->vq_used_cons_idx++;
216                 vq->vq_descx[used_idx].cookie = NULL;
217         }
218
219         vq_ring_free_inorder(vq, used_idx, i);
220         return i;
221 }
222
223 #ifndef DEFAULT_TX_FREE_THRESH
224 #define DEFAULT_TX_FREE_THRESH 32
225 #endif
226
227 static void
228 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
229 {
230         uint16_t used_idx, id, curr_id, free_cnt = 0;
231         uint16_t size = vq->vq_nentries;
232         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
233         struct vq_desc_extra *dxp;
234
235         used_idx = vq->vq_used_cons_idx;
236         while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
237                 virtio_rmb(vq->hw->weak_barriers);
238                 id = desc[used_idx].id;
239                 do {
240                         curr_id = used_idx;
241                         dxp = &vq->vq_descx[used_idx];
242                         used_idx += dxp->ndescs;
243                         free_cnt += dxp->ndescs;
244                         num -= dxp->ndescs;
245                         if (used_idx >= size) {
246                                 used_idx -= size;
247                                 vq->vq_packed.used_wrap_counter ^= 1;
248                         }
249                         if (dxp->cookie != NULL) {
250                                 rte_pktmbuf_free(dxp->cookie);
251                                 dxp->cookie = NULL;
252                         }
253                 } while (curr_id != id);
254         }
255         vq->vq_used_cons_idx = used_idx;
256         vq->vq_free_cnt += free_cnt;
257 }
258
259 static void
260 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
261 {
262         uint16_t used_idx, id;
263         uint16_t size = vq->vq_nentries;
264         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
265         struct vq_desc_extra *dxp;
266
267         used_idx = vq->vq_used_cons_idx;
268         while (num-- && desc_is_used(&desc[used_idx], vq)) {
269                 virtio_rmb(vq->hw->weak_barriers);
270                 id = desc[used_idx].id;
271                 dxp = &vq->vq_descx[id];
272                 vq->vq_used_cons_idx += dxp->ndescs;
273                 if (vq->vq_used_cons_idx >= size) {
274                         vq->vq_used_cons_idx -= size;
275                         vq->vq_packed.used_wrap_counter ^= 1;
276                 }
277                 vq_ring_free_id_packed(vq, id);
278                 if (dxp->cookie != NULL) {
279                         rte_pktmbuf_free(dxp->cookie);
280                         dxp->cookie = NULL;
281                 }
282                 used_idx = vq->vq_used_cons_idx;
283         }
284 }
285
286 /* Cleanup from completed transmits. */
287 static inline void
288 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
289 {
290         if (in_order)
291                 virtio_xmit_cleanup_inorder_packed(vq, num);
292         else
293                 virtio_xmit_cleanup_normal_packed(vq, num);
294 }
295
296 static void
297 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
298 {
299         uint16_t i, used_idx, desc_idx;
300         for (i = 0; i < num; i++) {
301                 struct vring_used_elem *uep;
302                 struct vq_desc_extra *dxp;
303
304                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
305                 uep = &vq->vq_split.ring.used->ring[used_idx];
306
307                 desc_idx = (uint16_t) uep->id;
308                 dxp = &vq->vq_descx[desc_idx];
309                 vq->vq_used_cons_idx++;
310                 vq_ring_free_chain(vq, desc_idx);
311
312                 if (dxp->cookie != NULL) {
313                         rte_pktmbuf_free(dxp->cookie);
314                         dxp->cookie = NULL;
315                 }
316         }
317 }
318
319 /* Cleanup from completed inorder transmits. */
320 static void
321 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
322 {
323         uint16_t i, idx = vq->vq_used_cons_idx;
324         int16_t free_cnt = 0;
325         struct vq_desc_extra *dxp = NULL;
326
327         if (unlikely(num == 0))
328                 return;
329
330         for (i = 0; i < num; i++) {
331                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
332                 free_cnt += dxp->ndescs;
333                 if (dxp->cookie != NULL) {
334                         rte_pktmbuf_free(dxp->cookie);
335                         dxp->cookie = NULL;
336                 }
337         }
338
339         vq->vq_free_cnt += free_cnt;
340         vq->vq_used_cons_idx = idx;
341 }
342
343 static inline int
344 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
345                         struct rte_mbuf **cookies,
346                         uint16_t num)
347 {
348         struct vq_desc_extra *dxp;
349         struct virtio_hw *hw = vq->hw;
350         struct vring_desc *start_dp;
351         uint16_t head_idx, idx, i = 0;
352
353         if (unlikely(vq->vq_free_cnt == 0))
354                 return -ENOSPC;
355         if (unlikely(vq->vq_free_cnt < num))
356                 return -EMSGSIZE;
357
358         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
359         start_dp = vq->vq_split.ring.desc;
360
361         while (i < num) {
362                 idx = head_idx & (vq->vq_nentries - 1);
363                 dxp = &vq->vq_descx[idx];
364                 dxp->cookie = (void *)cookies[i];
365                 dxp->ndescs = 1;
366
367                 start_dp[idx].addr =
368                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
369                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
370                 start_dp[idx].len =
371                                 cookies[i]->buf_len -
372                                 RTE_PKTMBUF_HEADROOM +
373                                 hw->vtnet_hdr_size;
374                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
375
376                 vq_update_avail_ring(vq, idx);
377                 head_idx++;
378                 i++;
379         }
380
381         vq->vq_desc_head_idx += num;
382         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
383         return 0;
384 }
385
386 static inline int
387 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
388                                 uint16_t num)
389 {
390         struct vq_desc_extra *dxp;
391         struct virtio_hw *hw = vq->hw;
392         struct vring_desc *start_dp = vq->vq_split.ring.desc;
393         uint16_t idx, i;
394
395         if (unlikely(vq->vq_free_cnt == 0))
396                 return -ENOSPC;
397         if (unlikely(vq->vq_free_cnt < num))
398                 return -EMSGSIZE;
399
400         if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
401                 return -EFAULT;
402
403         for (i = 0; i < num; i++) {
404                 idx = vq->vq_desc_head_idx;
405                 dxp = &vq->vq_descx[idx];
406                 dxp->cookie = (void *)cookie[i];
407                 dxp->ndescs = 1;
408
409                 start_dp[idx].addr =
410                         VIRTIO_MBUF_ADDR(cookie[i], vq) +
411                         RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
412                 start_dp[idx].len =
413                         cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
414                         hw->vtnet_hdr_size;
415                 start_dp[idx].flags = VRING_DESC_F_WRITE;
416                 vq->vq_desc_head_idx = start_dp[idx].next;
417                 vq_update_avail_ring(vq, idx);
418                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
419                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
420                         break;
421                 }
422         }
423
424         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
425
426         return 0;
427 }
428
429 static inline int
430 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
431                                      struct rte_mbuf **cookie, uint16_t num)
432 {
433         struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
434         uint16_t flags = vq->vq_packed.cached_flags;
435         struct virtio_hw *hw = vq->hw;
436         struct vq_desc_extra *dxp;
437         uint16_t idx;
438         int i;
439
440         if (unlikely(vq->vq_free_cnt == 0))
441                 return -ENOSPC;
442         if (unlikely(vq->vq_free_cnt < num))
443                 return -EMSGSIZE;
444
445         for (i = 0; i < num; i++) {
446                 idx = vq->vq_avail_idx;
447                 dxp = &vq->vq_descx[idx];
448                 dxp->cookie = (void *)cookie[i];
449                 dxp->ndescs = 1;
450
451                 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
452                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
453                 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
454                                         + hw->vtnet_hdr_size;
455
456                 vq->vq_desc_head_idx = dxp->next;
457                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
458                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
459                 virtio_wmb(hw->weak_barriers);
460                 start_dp[idx].flags = flags;
461                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
462                         vq->vq_avail_idx -= vq->vq_nentries;
463                         vq->vq_packed.cached_flags ^=
464                                 VRING_PACKED_DESC_F_AVAIL_USED;
465                         flags = vq->vq_packed.cached_flags;
466                 }
467         }
468         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
469         return 0;
470 }
471
472 /* When doing TSO, the IP length is not included in the pseudo header
473  * checksum of the packet given to the PMD, but for virtio it is
474  * expected.
475  */
476 static void
477 virtio_tso_fix_cksum(struct rte_mbuf *m)
478 {
479         /* common case: header is not fragmented */
480         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
481                         m->l4_len)) {
482                 struct ipv4_hdr *iph;
483                 struct ipv6_hdr *ip6h;
484                 struct tcp_hdr *th;
485                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
486                 uint32_t tmp;
487
488                 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
489                 th = RTE_PTR_ADD(iph, m->l3_len);
490                 if ((iph->version_ihl >> 4) == 4) {
491                         iph->hdr_checksum = 0;
492                         iph->hdr_checksum = rte_ipv4_cksum(iph);
493                         ip_len = iph->total_length;
494                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
495                                 m->l3_len);
496                 } else {
497                         ip6h = (struct ipv6_hdr *)iph;
498                         ip_paylen = ip6h->payload_len;
499                 }
500
501                 /* calculate the new phdr checksum not including ip_paylen */
502                 prev_cksum = th->cksum;
503                 tmp = prev_cksum;
504                 tmp += ip_paylen;
505                 tmp = (tmp & 0xffff) + (tmp >> 16);
506                 new_cksum = tmp;
507
508                 /* replace it in the packet */
509                 th->cksum = new_cksum;
510         }
511 }
512
513
514 /* avoid write operation when necessary, to lessen cache issues */
515 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
516         if ((var) != (val))                     \
517                 (var) = (val);                  \
518 } while (0)
519
520 #define virtqueue_clear_net_hdr(_hdr) do {              \
521         ASSIGN_UNLESS_EQUAL((_hdr)->csum_start, 0);     \
522         ASSIGN_UNLESS_EQUAL((_hdr)->csum_offset, 0);    \
523         ASSIGN_UNLESS_EQUAL((_hdr)->flags, 0);          \
524         ASSIGN_UNLESS_EQUAL((_hdr)->gso_type, 0);       \
525         ASSIGN_UNLESS_EQUAL((_hdr)->gso_size, 0);       \
526         ASSIGN_UNLESS_EQUAL((_hdr)->hdr_len, 0);        \
527 } while (0)
528
529 static inline void
530 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
531                         struct rte_mbuf *cookie,
532                         bool offload)
533 {
534         if (offload) {
535                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
536                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
537
538                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
539                 case PKT_TX_UDP_CKSUM:
540                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
541                         hdr->csum_offset = offsetof(struct udp_hdr,
542                                 dgram_cksum);
543                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
544                         break;
545
546                 case PKT_TX_TCP_CKSUM:
547                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
548                         hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
549                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
550                         break;
551
552                 default:
553                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
554                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
555                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
556                         break;
557                 }
558
559                 /* TCP Segmentation Offload */
560                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
561                         virtio_tso_fix_cksum(cookie);
562                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
563                                 VIRTIO_NET_HDR_GSO_TCPV6 :
564                                 VIRTIO_NET_HDR_GSO_TCPV4;
565                         hdr->gso_size = cookie->tso_segsz;
566                         hdr->hdr_len =
567                                 cookie->l2_len +
568                                 cookie->l3_len +
569                                 cookie->l4_len;
570                 } else {
571                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
572                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
573                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
574                 }
575         }
576 }
577
578 static inline void
579 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
580                         struct rte_mbuf **cookies,
581                         uint16_t num)
582 {
583         struct vq_desc_extra *dxp;
584         struct virtqueue *vq = txvq->vq;
585         struct vring_desc *start_dp;
586         struct virtio_net_hdr *hdr;
587         uint16_t idx;
588         uint16_t head_size = vq->hw->vtnet_hdr_size;
589         uint16_t i = 0;
590
591         idx = vq->vq_desc_head_idx;
592         start_dp = vq->vq_split.ring.desc;
593
594         while (i < num) {
595                 idx = idx & (vq->vq_nentries - 1);
596                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
597                 dxp->cookie = (void *)cookies[i];
598                 dxp->ndescs = 1;
599
600                 hdr = (struct virtio_net_hdr *)
601                         rte_pktmbuf_prepend(cookies[i], head_size);
602                 cookies[i]->pkt_len -= head_size;
603
604                 /* if offload disabled, hdr is not zeroed yet, do it now */
605                 if (!vq->hw->has_tx_offload)
606                         virtqueue_clear_net_hdr(hdr);
607                 else
608                         virtqueue_xmit_offload(hdr, cookies[i], true);
609
610                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
611                 start_dp[idx].len   = cookies[i]->data_len;
612                 start_dp[idx].flags = 0;
613
614                 vq_update_avail_ring(vq, idx);
615
616                 idx++;
617                 i++;
618         };
619
620         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
621         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
622 }
623
624 static inline void
625 virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
626                                    struct rte_mbuf *cookie,
627                                    int in_order)
628 {
629         struct virtqueue *vq = txvq->vq;
630         struct vring_packed_desc *dp;
631         struct vq_desc_extra *dxp;
632         uint16_t idx, id, flags;
633         uint16_t head_size = vq->hw->vtnet_hdr_size;
634         struct virtio_net_hdr *hdr;
635
636         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
637         idx = vq->vq_avail_idx;
638         dp = &vq->vq_packed.ring.desc[idx];
639
640         dxp = &vq->vq_descx[id];
641         dxp->ndescs = 1;
642         dxp->cookie = cookie;
643
644         flags = vq->vq_packed.cached_flags;
645
646         /* prepend cannot fail, checked by caller */
647         hdr = (struct virtio_net_hdr *)
648                 rte_pktmbuf_prepend(cookie, head_size);
649         cookie->pkt_len -= head_size;
650
651         /* if offload disabled, hdr is not zeroed yet, do it now */
652         if (!vq->hw->has_tx_offload)
653                 virtqueue_clear_net_hdr(hdr);
654         else
655                 virtqueue_xmit_offload(hdr, cookie, true);
656
657         dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
658         dp->len  = cookie->data_len;
659         dp->id   = id;
660
661         if (++vq->vq_avail_idx >= vq->vq_nentries) {
662                 vq->vq_avail_idx -= vq->vq_nentries;
663                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
664         }
665
666         vq->vq_free_cnt--;
667
668         if (!in_order) {
669                 vq->vq_desc_head_idx = dxp->next;
670                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
671                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
672         }
673
674         virtio_wmb(vq->hw->weak_barriers);
675         dp->flags = flags;
676 }
677
678 static inline void
679 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
680                               uint16_t needed, int can_push, int in_order)
681 {
682         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
683         struct vq_desc_extra *dxp;
684         struct virtqueue *vq = txvq->vq;
685         struct vring_packed_desc *start_dp, *head_dp;
686         uint16_t idx, id, head_idx, head_flags;
687         uint16_t head_size = vq->hw->vtnet_hdr_size;
688         struct virtio_net_hdr *hdr;
689         uint16_t prev;
690
691         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
692
693         dxp = &vq->vq_descx[id];
694         dxp->ndescs = needed;
695         dxp->cookie = cookie;
696
697         head_idx = vq->vq_avail_idx;
698         idx = head_idx;
699         prev = head_idx;
700         start_dp = vq->vq_packed.ring.desc;
701
702         head_dp = &vq->vq_packed.ring.desc[idx];
703         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
704         head_flags |= vq->vq_packed.cached_flags;
705
706         if (can_push) {
707                 /* prepend cannot fail, checked by caller */
708                 hdr = (struct virtio_net_hdr *)
709                         rte_pktmbuf_prepend(cookie, head_size);
710                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
711                  * which is wrong. Below subtract restores correct pkt size.
712                  */
713                 cookie->pkt_len -= head_size;
714
715                 /* if offload disabled, it is not zeroed below, do it now */
716                 if (!vq->hw->has_tx_offload)
717                         virtqueue_clear_net_hdr(hdr);
718         } else {
719                 /* setup first tx ring slot to point to header
720                  * stored in reserved region.
721                  */
722                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
723                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
724                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
725                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
726                 idx++;
727                 if (idx >= vq->vq_nentries) {
728                         idx -= vq->vq_nentries;
729                         vq->vq_packed.cached_flags ^=
730                                 VRING_PACKED_DESC_F_AVAIL_USED;
731                 }
732         }
733
734         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
735
736         do {
737                 uint16_t flags;
738
739                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
740                 start_dp[idx].len  = cookie->data_len;
741                 if (likely(idx != head_idx)) {
742                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
743                         flags |= vq->vq_packed.cached_flags;
744                         start_dp[idx].flags = flags;
745                 }
746                 prev = idx;
747                 idx++;
748                 if (idx >= vq->vq_nentries) {
749                         idx -= vq->vq_nentries;
750                         vq->vq_packed.cached_flags ^=
751                                 VRING_PACKED_DESC_F_AVAIL_USED;
752                 }
753         } while ((cookie = cookie->next) != NULL);
754
755         start_dp[prev].id = id;
756
757         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
758         vq->vq_avail_idx = idx;
759
760         if (!in_order) {
761                 vq->vq_desc_head_idx = dxp->next;
762                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
763                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
764         }
765
766         virtio_wmb(vq->hw->weak_barriers);
767         head_dp->flags = head_flags;
768 }
769
770 static inline void
771 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
772                         uint16_t needed, int use_indirect, int can_push,
773                         int in_order)
774 {
775         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
776         struct vq_desc_extra *dxp;
777         struct virtqueue *vq = txvq->vq;
778         struct vring_desc *start_dp;
779         uint16_t seg_num = cookie->nb_segs;
780         uint16_t head_idx, idx;
781         uint16_t head_size = vq->hw->vtnet_hdr_size;
782         struct virtio_net_hdr *hdr;
783
784         head_idx = vq->vq_desc_head_idx;
785         idx = head_idx;
786         if (in_order)
787                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
788         else
789                 dxp = &vq->vq_descx[idx];
790         dxp->cookie = (void *)cookie;
791         dxp->ndescs = needed;
792
793         start_dp = vq->vq_split.ring.desc;
794
795         if (can_push) {
796                 /* prepend cannot fail, checked by caller */
797                 hdr = (struct virtio_net_hdr *)
798                         rte_pktmbuf_prepend(cookie, head_size);
799                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
800                  * which is wrong. Below subtract restores correct pkt size.
801                  */
802                 cookie->pkt_len -= head_size;
803
804                 /* if offload disabled, it is not zeroed below, do it now */
805                 if (!vq->hw->has_tx_offload)
806                         virtqueue_clear_net_hdr(hdr);
807         } else if (use_indirect) {
808                 /* setup tx ring slot to point to indirect
809                  * descriptor list stored in reserved region.
810                  *
811                  * the first slot in indirect ring is already preset
812                  * to point to the header in reserved region
813                  */
814                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
815                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
816                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
817                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
818                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
819
820                 /* loop below will fill in rest of the indirect elements */
821                 start_dp = txr[idx].tx_indir;
822                 idx = 1;
823         } else {
824                 /* setup first tx ring slot to point to header
825                  * stored in reserved region.
826                  */
827                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
828                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
829                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
830                 start_dp[idx].flags = VRING_DESC_F_NEXT;
831                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
832
833                 idx = start_dp[idx].next;
834         }
835
836         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
837
838         do {
839                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
840                 start_dp[idx].len   = cookie->data_len;
841                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
842                 idx = start_dp[idx].next;
843         } while ((cookie = cookie->next) != NULL);
844
845         if (use_indirect)
846                 idx = vq->vq_split.ring.desc[head_idx].next;
847
848         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
849
850         vq->vq_desc_head_idx = idx;
851         vq_update_avail_ring(vq, head_idx);
852
853         if (!in_order) {
854                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
855                         vq->vq_desc_tail_idx = idx;
856         }
857 }
858
859 void
860 virtio_dev_cq_start(struct rte_eth_dev *dev)
861 {
862         struct virtio_hw *hw = dev->data->dev_private;
863
864         if (hw->cvq && hw->cvq->vq) {
865                 rte_spinlock_init(&hw->cvq->lock);
866                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
867         }
868 }
869
870 int
871 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
872                         uint16_t queue_idx,
873                         uint16_t nb_desc,
874                         unsigned int socket_id __rte_unused,
875                         const struct rte_eth_rxconf *rx_conf __rte_unused,
876                         struct rte_mempool *mp)
877 {
878         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
879         struct virtio_hw *hw = dev->data->dev_private;
880         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
881         struct virtnet_rx *rxvq;
882
883         PMD_INIT_FUNC_TRACE();
884
885         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
886                 nb_desc = vq->vq_nentries;
887         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
888
889         rxvq = &vq->rxq;
890         rxvq->queue_id = queue_idx;
891         rxvq->mpool = mp;
892         if (rxvq->mpool == NULL) {
893                 rte_exit(EXIT_FAILURE,
894                         "Cannot allocate mbufs for rx virtqueue");
895         }
896
897         dev->data->rx_queues[queue_idx] = rxvq;
898
899         return 0;
900 }
901
902 int
903 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
904 {
905         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
906         struct virtio_hw *hw = dev->data->dev_private;
907         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
908         struct virtnet_rx *rxvq = &vq->rxq;
909         struct rte_mbuf *m;
910         uint16_t desc_idx;
911         int error, nbufs, i;
912
913         PMD_INIT_FUNC_TRACE();
914
915         /* Allocate blank mbufs for the each rx descriptor */
916         nbufs = 0;
917
918         if (hw->use_simple_rx) {
919                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
920                      desc_idx++) {
921                         vq->vq_split.ring.avail->ring[desc_idx] = desc_idx;
922                         vq->vq_split.ring.desc[desc_idx].flags =
923                                 VRING_DESC_F_WRITE;
924                 }
925
926                 virtio_rxq_vec_setup(rxvq);
927         }
928
929         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
930         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
931              desc_idx++) {
932                 vq->sw_ring[vq->vq_nentries + desc_idx] =
933                         &rxvq->fake_mbuf;
934         }
935
936         if (hw->use_simple_rx) {
937                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
938                         virtio_rxq_rearm_vec(rxvq);
939                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
940                 }
941         } else if (hw->use_inorder_rx) {
942                 if ((!virtqueue_full(vq))) {
943                         uint16_t free_cnt = vq->vq_free_cnt;
944                         struct rte_mbuf *pkts[free_cnt];
945
946                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
947                                 free_cnt)) {
948                                 error = virtqueue_enqueue_refill_inorder(vq,
949                                                 pkts,
950                                                 free_cnt);
951                                 if (unlikely(error)) {
952                                         for (i = 0; i < free_cnt; i++)
953                                                 rte_pktmbuf_free(pkts[i]);
954                                 }
955                         }
956
957                         nbufs += free_cnt;
958                         vq_update_avail_idx(vq);
959                 }
960         } else {
961                 while (!virtqueue_full(vq)) {
962                         m = rte_mbuf_raw_alloc(rxvq->mpool);
963                         if (m == NULL)
964                                 break;
965
966                         /* Enqueue allocated buffers */
967                         if (vtpci_packed_queue(vq->hw))
968                                 error = virtqueue_enqueue_recv_refill_packed(vq,
969                                                 &m, 1);
970                         else
971                                 error = virtqueue_enqueue_recv_refill(vq,
972                                                 &m, 1);
973                         if (error) {
974                                 rte_pktmbuf_free(m);
975                                 break;
976                         }
977                         nbufs++;
978                 }
979
980                 if (!vtpci_packed_queue(vq->hw))
981                         vq_update_avail_idx(vq);
982         }
983
984         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
985
986         VIRTQUEUE_DUMP(vq);
987
988         return 0;
989 }
990
991 /*
992  * struct rte_eth_dev *dev: Used to update dev
993  * uint16_t nb_desc: Defaults to values read from config space
994  * unsigned int socket_id: Used to allocate memzone
995  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
996  * uint16_t queue_idx: Just used as an index in dev txq list
997  */
998 int
999 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
1000                         uint16_t queue_idx,
1001                         uint16_t nb_desc,
1002                         unsigned int socket_id __rte_unused,
1003                         const struct rte_eth_txconf *tx_conf)
1004 {
1005         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1006         struct virtio_hw *hw = dev->data->dev_private;
1007         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1008         struct virtnet_tx *txvq;
1009         uint16_t tx_free_thresh;
1010
1011         PMD_INIT_FUNC_TRACE();
1012
1013         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
1014                 nb_desc = vq->vq_nentries;
1015         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
1016
1017         txvq = &vq->txq;
1018         txvq->queue_id = queue_idx;
1019
1020         tx_free_thresh = tx_conf->tx_free_thresh;
1021         if (tx_free_thresh == 0)
1022                 tx_free_thresh =
1023                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
1024
1025         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
1026                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
1027                         "number of TX entries minus 3 (%u)."
1028                         " (tx_free_thresh=%u port=%u queue=%u)\n",
1029                         vq->vq_nentries - 3,
1030                         tx_free_thresh, dev->data->port_id, queue_idx);
1031                 return -EINVAL;
1032         }
1033
1034         vq->vq_free_thresh = tx_free_thresh;
1035
1036         dev->data->tx_queues[queue_idx] = txvq;
1037         return 0;
1038 }
1039
1040 int
1041 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
1042                                 uint16_t queue_idx)
1043 {
1044         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1045         struct virtio_hw *hw = dev->data->dev_private;
1046         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1047
1048         PMD_INIT_FUNC_TRACE();
1049
1050         if (!vtpci_packed_queue(hw)) {
1051                 if (hw->use_inorder_tx)
1052                         vq->vq_split.ring.desc[vq->vq_nentries - 1].next = 0;
1053         }
1054
1055         VIRTQUEUE_DUMP(vq);
1056
1057         return 0;
1058 }
1059
1060 static inline void
1061 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1062 {
1063         int error;
1064         /*
1065          * Requeue the discarded mbuf. This should always be
1066          * successful since it was just dequeued.
1067          */
1068         if (vtpci_packed_queue(vq->hw))
1069                 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1070         else
1071                 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1072
1073         if (unlikely(error)) {
1074                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1075                 rte_pktmbuf_free(m);
1076         }
1077 }
1078
1079 static inline void
1080 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1081 {
1082         int error;
1083
1084         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1085         if (unlikely(error)) {
1086                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1087                 rte_pktmbuf_free(m);
1088         }
1089 }
1090
1091 static inline void
1092 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
1093 {
1094         uint32_t s = mbuf->pkt_len;
1095         struct rte_ether_addr *ea;
1096
1097         stats->bytes += s;
1098
1099         if (s == 64) {
1100                 stats->size_bins[1]++;
1101         } else if (s > 64 && s < 1024) {
1102                 uint32_t bin;
1103
1104                 /* count zeros, and offset into correct bin */
1105                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
1106                 stats->size_bins[bin]++;
1107         } else {
1108                 if (s < 64)
1109                         stats->size_bins[0]++;
1110                 else if (s < 1519)
1111                         stats->size_bins[6]++;
1112                 else
1113                         stats->size_bins[7]++;
1114         }
1115
1116         ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
1117         if (is_multicast_ether_addr(ea)) {
1118                 if (is_broadcast_ether_addr(ea))
1119                         stats->broadcast++;
1120                 else
1121                         stats->multicast++;
1122         }
1123 }
1124
1125 static inline void
1126 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
1127 {
1128         VIRTIO_DUMP_PACKET(m, m->data_len);
1129
1130         virtio_update_packet_stats(&rxvq->stats, m);
1131 }
1132
1133 /* Optionally fill offload information in structure */
1134 static inline int
1135 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1136 {
1137         struct rte_net_hdr_lens hdr_lens;
1138         uint32_t hdrlen, ptype;
1139         int l4_supported = 0;
1140
1141         /* nothing to do */
1142         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1143                 return 0;
1144
1145         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1146
1147         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1148         m->packet_type = ptype;
1149         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1150             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1151             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1152                 l4_supported = 1;
1153
1154         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1155                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1156                 if (hdr->csum_start <= hdrlen && l4_supported) {
1157                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1158                 } else {
1159                         /* Unknown proto or tunnel, do sw cksum. We can assume
1160                          * the cksum field is in the first segment since the
1161                          * buffers we provided to the host are large enough.
1162                          * In case of SCTP, this will be wrong since it's a CRC
1163                          * but there's nothing we can do.
1164                          */
1165                         uint16_t csum = 0, off;
1166
1167                         rte_raw_cksum_mbuf(m, hdr->csum_start,
1168                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1169                                 &csum);
1170                         if (likely(csum != 0xffff))
1171                                 csum = ~csum;
1172                         off = hdr->csum_offset + hdr->csum_start;
1173                         if (rte_pktmbuf_data_len(m) >= off + 1)
1174                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1175                                         off) = csum;
1176                 }
1177         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1178                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1179         }
1180
1181         /* GSO request, save required information in mbuf */
1182         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1183                 /* Check unsupported modes */
1184                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1185                     (hdr->gso_size == 0)) {
1186                         return -EINVAL;
1187                 }
1188
1189                 /* Update mss lengthes in mbuf */
1190                 m->tso_segsz = hdr->gso_size;
1191                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1192                         case VIRTIO_NET_HDR_GSO_TCPV4:
1193                         case VIRTIO_NET_HDR_GSO_TCPV6:
1194                                 m->ol_flags |= PKT_RX_LRO | \
1195                                         PKT_RX_L4_CKSUM_NONE;
1196                                 break;
1197                         default:
1198                                 return -EINVAL;
1199                 }
1200         }
1201
1202         return 0;
1203 }
1204
1205 #define VIRTIO_MBUF_BURST_SZ 64
1206 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1207 uint16_t
1208 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1209 {
1210         struct virtnet_rx *rxvq = rx_queue;
1211         struct virtqueue *vq = rxvq->vq;
1212         struct virtio_hw *hw = vq->hw;
1213         struct rte_mbuf *rxm;
1214         uint16_t nb_used, num, nb_rx;
1215         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1216         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1217         int error;
1218         uint32_t i, nb_enqueued;
1219         uint32_t hdr_size;
1220         struct virtio_net_hdr *hdr;
1221
1222         nb_rx = 0;
1223         if (unlikely(hw->started == 0))
1224                 return nb_rx;
1225
1226         nb_used = VIRTQUEUE_NUSED(vq);
1227
1228         virtio_rmb(hw->weak_barriers);
1229
1230         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1231         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1232                 num = VIRTIO_MBUF_BURST_SZ;
1233         if (likely(num > DESC_PER_CACHELINE))
1234                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1235
1236         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1237         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1238
1239         nb_enqueued = 0;
1240         hdr_size = hw->vtnet_hdr_size;
1241
1242         for (i = 0; i < num ; i++) {
1243                 rxm = rcv_pkts[i];
1244
1245                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1246
1247                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1248                         PMD_RX_LOG(ERR, "Packet drop");
1249                         nb_enqueued++;
1250                         virtio_discard_rxbuf(vq, rxm);
1251                         rxvq->stats.errors++;
1252                         continue;
1253                 }
1254
1255                 rxm->port = rxvq->port_id;
1256                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1257                 rxm->ol_flags = 0;
1258                 rxm->vlan_tci = 0;
1259
1260                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1261                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1262
1263                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1264                         RTE_PKTMBUF_HEADROOM - hdr_size);
1265
1266                 if (hw->vlan_strip)
1267                         rte_vlan_strip(rxm);
1268
1269                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1270                         virtio_discard_rxbuf(vq, rxm);
1271                         rxvq->stats.errors++;
1272                         continue;
1273                 }
1274
1275                 virtio_rx_stats_updated(rxvq, rxm);
1276
1277                 rx_pkts[nb_rx++] = rxm;
1278         }
1279
1280         rxvq->stats.packets += nb_rx;
1281
1282         /* Allocate new mbuf for the used descriptor */
1283         if (likely(!virtqueue_full(vq))) {
1284                 uint16_t free_cnt = vq->vq_free_cnt;
1285                 struct rte_mbuf *new_pkts[free_cnt];
1286
1287                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1288                                                 free_cnt) == 0)) {
1289                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1290                                         free_cnt);
1291                         if (unlikely(error)) {
1292                                 for (i = 0; i < free_cnt; i++)
1293                                         rte_pktmbuf_free(new_pkts[i]);
1294                         }
1295                         nb_enqueued += free_cnt;
1296                 } else {
1297                         struct rte_eth_dev *dev =
1298                                 &rte_eth_devices[rxvq->port_id];
1299                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1300                 }
1301         }
1302
1303         if (likely(nb_enqueued)) {
1304                 vq_update_avail_idx(vq);
1305
1306                 if (unlikely(virtqueue_kick_prepare(vq))) {
1307                         virtqueue_notify(vq);
1308                         PMD_RX_LOG(DEBUG, "Notified");
1309                 }
1310         }
1311
1312         return nb_rx;
1313 }
1314
1315 uint16_t
1316 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1317                         uint16_t nb_pkts)
1318 {
1319         struct virtnet_rx *rxvq = rx_queue;
1320         struct virtqueue *vq = rxvq->vq;
1321         struct virtio_hw *hw = vq->hw;
1322         struct rte_mbuf *rxm;
1323         uint16_t num, nb_rx;
1324         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1325         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1326         int error;
1327         uint32_t i, nb_enqueued;
1328         uint32_t hdr_size;
1329         struct virtio_net_hdr *hdr;
1330
1331         nb_rx = 0;
1332         if (unlikely(hw->started == 0))
1333                 return nb_rx;
1334
1335         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1336         if (likely(num > DESC_PER_CACHELINE))
1337                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1338
1339         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1340         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1341
1342         nb_enqueued = 0;
1343         hdr_size = hw->vtnet_hdr_size;
1344
1345         for (i = 0; i < num; i++) {
1346                 rxm = rcv_pkts[i];
1347
1348                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1349
1350                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1351                         PMD_RX_LOG(ERR, "Packet drop");
1352                         nb_enqueued++;
1353                         virtio_discard_rxbuf(vq, rxm);
1354                         rxvq->stats.errors++;
1355                         continue;
1356                 }
1357
1358                 rxm->port = rxvq->port_id;
1359                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1360                 rxm->ol_flags = 0;
1361                 rxm->vlan_tci = 0;
1362
1363                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1364                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1365
1366                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1367                         RTE_PKTMBUF_HEADROOM - hdr_size);
1368
1369                 if (hw->vlan_strip)
1370                         rte_vlan_strip(rxm);
1371
1372                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1373                         virtio_discard_rxbuf(vq, rxm);
1374                         rxvq->stats.errors++;
1375                         continue;
1376                 }
1377
1378                 virtio_rx_stats_updated(rxvq, rxm);
1379
1380                 rx_pkts[nb_rx++] = rxm;
1381         }
1382
1383         rxvq->stats.packets += nb_rx;
1384
1385         /* Allocate new mbuf for the used descriptor */
1386         if (likely(!virtqueue_full(vq))) {
1387                 uint16_t free_cnt = vq->vq_free_cnt;
1388                 struct rte_mbuf *new_pkts[free_cnt];
1389
1390                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1391                                                 free_cnt) == 0)) {
1392                         error = virtqueue_enqueue_recv_refill_packed(vq,
1393                                         new_pkts, free_cnt);
1394                         if (unlikely(error)) {
1395                                 for (i = 0; i < free_cnt; i++)
1396                                         rte_pktmbuf_free(new_pkts[i]);
1397                         }
1398                         nb_enqueued += free_cnt;
1399                 } else {
1400                         struct rte_eth_dev *dev =
1401                                 &rte_eth_devices[rxvq->port_id];
1402                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1403                 }
1404         }
1405
1406         if (likely(nb_enqueued)) {
1407                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1408                         virtqueue_notify(vq);
1409                         PMD_RX_LOG(DEBUG, "Notified");
1410                 }
1411         }
1412
1413         return nb_rx;
1414 }
1415
1416
1417 uint16_t
1418 virtio_recv_pkts_inorder(void *rx_queue,
1419                         struct rte_mbuf **rx_pkts,
1420                         uint16_t nb_pkts)
1421 {
1422         struct virtnet_rx *rxvq = rx_queue;
1423         struct virtqueue *vq = rxvq->vq;
1424         struct virtio_hw *hw = vq->hw;
1425         struct rte_mbuf *rxm;
1426         struct rte_mbuf *prev;
1427         uint16_t nb_used, num, nb_rx;
1428         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1429         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1430         int error;
1431         uint32_t nb_enqueued;
1432         uint32_t seg_num;
1433         uint32_t seg_res;
1434         uint32_t hdr_size;
1435         int32_t i;
1436
1437         nb_rx = 0;
1438         if (unlikely(hw->started == 0))
1439                 return nb_rx;
1440
1441         nb_used = VIRTQUEUE_NUSED(vq);
1442         nb_used = RTE_MIN(nb_used, nb_pkts);
1443         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1444
1445         virtio_rmb(hw->weak_barriers);
1446
1447         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1448
1449         nb_enqueued = 0;
1450         seg_num = 1;
1451         seg_res = 0;
1452         hdr_size = hw->vtnet_hdr_size;
1453
1454         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1455
1456         for (i = 0; i < num; i++) {
1457                 struct virtio_net_hdr_mrg_rxbuf *header;
1458
1459                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1460                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1461
1462                 rxm = rcv_pkts[i];
1463
1464                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1465                         PMD_RX_LOG(ERR, "Packet drop");
1466                         nb_enqueued++;
1467                         virtio_discard_rxbuf_inorder(vq, rxm);
1468                         rxvq->stats.errors++;
1469                         continue;
1470                 }
1471
1472                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1473                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1474                          - hdr_size);
1475
1476                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1477                         seg_num = header->num_buffers;
1478                         if (seg_num == 0)
1479                                 seg_num = 1;
1480                 } else {
1481                         seg_num = 1;
1482                 }
1483
1484                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1485                 rxm->nb_segs = seg_num;
1486                 rxm->ol_flags = 0;
1487                 rxm->vlan_tci = 0;
1488                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1489                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1490
1491                 rxm->port = rxvq->port_id;
1492
1493                 rx_pkts[nb_rx] = rxm;
1494                 prev = rxm;
1495
1496                 if (vq->hw->has_rx_offload &&
1497                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1498                         virtio_discard_rxbuf_inorder(vq, rxm);
1499                         rxvq->stats.errors++;
1500                         continue;
1501                 }
1502
1503                 if (hw->vlan_strip)
1504                         rte_vlan_strip(rx_pkts[nb_rx]);
1505
1506                 seg_res = seg_num - 1;
1507
1508                 /* Merge remaining segments */
1509                 while (seg_res != 0 && i < (num - 1)) {
1510                         i++;
1511
1512                         rxm = rcv_pkts[i];
1513                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1514                         rxm->pkt_len = (uint32_t)(len[i]);
1515                         rxm->data_len = (uint16_t)(len[i]);
1516
1517                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1518                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1519
1520                         if (prev)
1521                                 prev->next = rxm;
1522
1523                         prev = rxm;
1524                         seg_res -= 1;
1525                 }
1526
1527                 if (!seg_res) {
1528                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1529                         nb_rx++;
1530                 }
1531         }
1532
1533         /* Last packet still need merge segments */
1534         while (seg_res != 0) {
1535                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1536                                         VIRTIO_MBUF_BURST_SZ);
1537
1538                 prev = rcv_pkts[nb_rx];
1539                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1540                         virtio_rmb(hw->weak_barriers);
1541                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1542                                                            rcv_cnt);
1543                         uint16_t extra_idx = 0;
1544
1545                         rcv_cnt = num;
1546                         while (extra_idx < rcv_cnt) {
1547                                 rxm = rcv_pkts[extra_idx];
1548                                 rxm->data_off =
1549                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1550                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1551                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1552                                 prev->next = rxm;
1553                                 prev = rxm;
1554                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1555                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1556                                 extra_idx += 1;
1557                         };
1558                         seg_res -= rcv_cnt;
1559
1560                         if (!seg_res) {
1561                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1562                                 nb_rx++;
1563                         }
1564                 } else {
1565                         PMD_RX_LOG(ERR,
1566                                         "No enough segments for packet.");
1567                         virtio_discard_rxbuf_inorder(vq, prev);
1568                         rxvq->stats.errors++;
1569                         break;
1570                 }
1571         }
1572
1573         rxvq->stats.packets += nb_rx;
1574
1575         /* Allocate new mbuf for the used descriptor */
1576
1577         if (likely(!virtqueue_full(vq))) {
1578                 /* free_cnt may include mrg descs */
1579                 uint16_t free_cnt = vq->vq_free_cnt;
1580                 struct rte_mbuf *new_pkts[free_cnt];
1581
1582                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1583                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1584                                         free_cnt);
1585                         if (unlikely(error)) {
1586                                 for (i = 0; i < free_cnt; i++)
1587                                         rte_pktmbuf_free(new_pkts[i]);
1588                         }
1589                         nb_enqueued += free_cnt;
1590                 } else {
1591                         struct rte_eth_dev *dev =
1592                                 &rte_eth_devices[rxvq->port_id];
1593                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1594                 }
1595         }
1596
1597         if (likely(nb_enqueued)) {
1598                 vq_update_avail_idx(vq);
1599
1600                 if (unlikely(virtqueue_kick_prepare(vq))) {
1601                         virtqueue_notify(vq);
1602                         PMD_RX_LOG(DEBUG, "Notified");
1603                 }
1604         }
1605
1606         return nb_rx;
1607 }
1608
1609 uint16_t
1610 virtio_recv_mergeable_pkts(void *rx_queue,
1611                         struct rte_mbuf **rx_pkts,
1612                         uint16_t nb_pkts)
1613 {
1614         struct virtnet_rx *rxvq = rx_queue;
1615         struct virtqueue *vq = rxvq->vq;
1616         struct virtio_hw *hw = vq->hw;
1617         struct rte_mbuf *rxm;
1618         struct rte_mbuf *prev;
1619         uint16_t nb_used, num, nb_rx = 0;
1620         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1621         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1622         int error;
1623         uint32_t nb_enqueued = 0;
1624         uint32_t seg_num = 0;
1625         uint32_t seg_res = 0;
1626         uint32_t hdr_size = hw->vtnet_hdr_size;
1627         int32_t i;
1628
1629         if (unlikely(hw->started == 0))
1630                 return nb_rx;
1631
1632         nb_used = VIRTQUEUE_NUSED(vq);
1633
1634         virtio_rmb(hw->weak_barriers);
1635
1636         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1637
1638         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1639         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1640                 num = VIRTIO_MBUF_BURST_SZ;
1641         if (likely(num > DESC_PER_CACHELINE))
1642                 num = num - ((vq->vq_used_cons_idx + num) %
1643                                 DESC_PER_CACHELINE);
1644
1645
1646         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1647
1648         for (i = 0; i < num; i++) {
1649                 struct virtio_net_hdr_mrg_rxbuf *header;
1650
1651                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1652                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1653
1654                 rxm = rcv_pkts[i];
1655
1656                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1657                         PMD_RX_LOG(ERR, "Packet drop");
1658                         nb_enqueued++;
1659                         virtio_discard_rxbuf(vq, rxm);
1660                         rxvq->stats.errors++;
1661                         continue;
1662                 }
1663
1664                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1665                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1666                          - hdr_size);
1667                 seg_num = header->num_buffers;
1668                 if (seg_num == 0)
1669                         seg_num = 1;
1670
1671                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1672                 rxm->nb_segs = seg_num;
1673                 rxm->ol_flags = 0;
1674                 rxm->vlan_tci = 0;
1675                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1676                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1677
1678                 rxm->port = rxvq->port_id;
1679
1680                 rx_pkts[nb_rx] = rxm;
1681                 prev = rxm;
1682
1683                 if (hw->has_rx_offload &&
1684                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1685                         virtio_discard_rxbuf(vq, rxm);
1686                         rxvq->stats.errors++;
1687                         continue;
1688                 }
1689
1690                 if (hw->vlan_strip)
1691                         rte_vlan_strip(rx_pkts[nb_rx]);
1692
1693                 seg_res = seg_num - 1;
1694
1695                 /* Merge remaining segments */
1696                 while (seg_res != 0 && i < (num - 1)) {
1697                         i++;
1698
1699                         rxm = rcv_pkts[i];
1700                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1701                         rxm->pkt_len = (uint32_t)(len[i]);
1702                         rxm->data_len = (uint16_t)(len[i]);
1703
1704                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1705                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1706
1707                         if (prev)
1708                                 prev->next = rxm;
1709
1710                         prev = rxm;
1711                         seg_res -= 1;
1712                 }
1713
1714                 if (!seg_res) {
1715                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1716                         nb_rx++;
1717                 }
1718         }
1719
1720         /* Last packet still need merge segments */
1721         while (seg_res != 0) {
1722                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1723                                         VIRTIO_MBUF_BURST_SZ);
1724
1725                 prev = rcv_pkts[nb_rx];
1726                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1727                         virtio_rmb(hw->weak_barriers);
1728                         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1729                                                            rcv_cnt);
1730                         uint16_t extra_idx = 0;
1731
1732                         rcv_cnt = num;
1733                         while (extra_idx < rcv_cnt) {
1734                                 rxm = rcv_pkts[extra_idx];
1735                                 rxm->data_off =
1736                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1737                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1738                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1739                                 prev->next = rxm;
1740                                 prev = rxm;
1741                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1742                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1743                                 extra_idx += 1;
1744                         };
1745                         seg_res -= rcv_cnt;
1746
1747                         if (!seg_res) {
1748                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1749                                 nb_rx++;
1750                         }
1751                 } else {
1752                         PMD_RX_LOG(ERR,
1753                                         "No enough segments for packet.");
1754                         virtio_discard_rxbuf(vq, prev);
1755                         rxvq->stats.errors++;
1756                         break;
1757                 }
1758         }
1759
1760         rxvq->stats.packets += nb_rx;
1761
1762         /* Allocate new mbuf for the used descriptor */
1763         if (likely(!virtqueue_full(vq))) {
1764                 /* free_cnt may include mrg descs */
1765                 uint16_t free_cnt = vq->vq_free_cnt;
1766                 struct rte_mbuf *new_pkts[free_cnt];
1767
1768                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1769                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1770                                         free_cnt);
1771                         if (unlikely(error)) {
1772                                 for (i = 0; i < free_cnt; i++)
1773                                         rte_pktmbuf_free(new_pkts[i]);
1774                         }
1775                         nb_enqueued += free_cnt;
1776                 } else {
1777                         struct rte_eth_dev *dev =
1778                                 &rte_eth_devices[rxvq->port_id];
1779                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1780                 }
1781         }
1782
1783         if (likely(nb_enqueued)) {
1784                 vq_update_avail_idx(vq);
1785
1786                 if (unlikely(virtqueue_kick_prepare(vq))) {
1787                         virtqueue_notify(vq);
1788                         PMD_RX_LOG(DEBUG, "Notified");
1789                 }
1790         }
1791
1792         return nb_rx;
1793 }
1794
1795 uint16_t
1796 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1797                         struct rte_mbuf **rx_pkts,
1798                         uint16_t nb_pkts)
1799 {
1800         struct virtnet_rx *rxvq = rx_queue;
1801         struct virtqueue *vq = rxvq->vq;
1802         struct virtio_hw *hw = vq->hw;
1803         struct rte_mbuf *rxm;
1804         struct rte_mbuf *prev = NULL;
1805         uint16_t num, nb_rx = 0;
1806         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1807         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1808         uint32_t nb_enqueued = 0;
1809         uint32_t seg_num = 0;
1810         uint32_t seg_res = 0;
1811         uint32_t hdr_size = hw->vtnet_hdr_size;
1812         int32_t i;
1813         int error;
1814
1815         if (unlikely(hw->started == 0))
1816                 return nb_rx;
1817
1818
1819         num = nb_pkts;
1820         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1821                 num = VIRTIO_MBUF_BURST_SZ;
1822         if (likely(num > DESC_PER_CACHELINE))
1823                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1824
1825         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1826
1827         for (i = 0; i < num; i++) {
1828                 struct virtio_net_hdr_mrg_rxbuf *header;
1829
1830                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1831                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1832
1833                 rxm = rcv_pkts[i];
1834
1835                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1836                         PMD_RX_LOG(ERR, "Packet drop");
1837                         nb_enqueued++;
1838                         virtio_discard_rxbuf(vq, rxm);
1839                         rxvq->stats.errors++;
1840                         continue;
1841                 }
1842
1843                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1844                           rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1845                 seg_num = header->num_buffers;
1846
1847                 if (seg_num == 0)
1848                         seg_num = 1;
1849
1850                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1851                 rxm->nb_segs = seg_num;
1852                 rxm->ol_flags = 0;
1853                 rxm->vlan_tci = 0;
1854                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1855                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1856
1857                 rxm->port = rxvq->port_id;
1858                 rx_pkts[nb_rx] = rxm;
1859                 prev = rxm;
1860
1861                 if (hw->has_rx_offload &&
1862                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1863                         virtio_discard_rxbuf(vq, rxm);
1864                         rxvq->stats.errors++;
1865                         continue;
1866                 }
1867
1868                 if (hw->vlan_strip)
1869                         rte_vlan_strip(rx_pkts[nb_rx]);
1870
1871                 seg_res = seg_num - 1;
1872
1873                 /* Merge remaining segments */
1874                 while (seg_res != 0 && i < (num - 1)) {
1875                         i++;
1876
1877                         rxm = rcv_pkts[i];
1878                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1879                         rxm->pkt_len = (uint32_t)(len[i]);
1880                         rxm->data_len = (uint16_t)(len[i]);
1881
1882                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1883                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1884
1885                         if (prev)
1886                                 prev->next = rxm;
1887
1888                         prev = rxm;
1889                         seg_res -= 1;
1890                 }
1891
1892                 if (!seg_res) {
1893                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1894                         nb_rx++;
1895                 }
1896         }
1897
1898         /* Last packet still need merge segments */
1899         while (seg_res != 0) {
1900                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1901                                         VIRTIO_MBUF_BURST_SZ);
1902                 if (likely(vq->vq_free_cnt >= rcv_cnt)) {
1903                         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1904                                         len, rcv_cnt);
1905                         uint16_t extra_idx = 0;
1906
1907                         rcv_cnt = num;
1908
1909                         while (extra_idx < rcv_cnt) {
1910                                 rxm = rcv_pkts[extra_idx];
1911
1912                                 rxm->data_off =
1913                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1914                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1915                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1916
1917                                 prev->next = rxm;
1918                                 prev = rxm;
1919                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1920                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1921                                 extra_idx += 1;
1922                         }
1923                         seg_res -= rcv_cnt;
1924                         if (!seg_res) {
1925                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1926                                 nb_rx++;
1927                         }
1928                 } else {
1929                         PMD_RX_LOG(ERR,
1930                                         "No enough segments for packet.");
1931                         if (prev)
1932                                 virtio_discard_rxbuf(vq, prev);
1933                         rxvq->stats.errors++;
1934                         break;
1935                 }
1936         }
1937
1938         rxvq->stats.packets += nb_rx;
1939
1940         /* Allocate new mbuf for the used descriptor */
1941         if (likely(!virtqueue_full(vq))) {
1942                 /* free_cnt may include mrg descs */
1943                 uint16_t free_cnt = vq->vq_free_cnt;
1944                 struct rte_mbuf *new_pkts[free_cnt];
1945
1946                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1947                         error = virtqueue_enqueue_recv_refill_packed(vq,
1948                                         new_pkts, free_cnt);
1949                         if (unlikely(error)) {
1950                                 for (i = 0; i < free_cnt; i++)
1951                                         rte_pktmbuf_free(new_pkts[i]);
1952                         }
1953                         nb_enqueued += free_cnt;
1954                 } else {
1955                         struct rte_eth_dev *dev =
1956                                 &rte_eth_devices[rxvq->port_id];
1957                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1958                 }
1959         }
1960
1961         if (likely(nb_enqueued)) {
1962                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1963                         virtqueue_notify(vq);
1964                         PMD_RX_LOG(DEBUG, "Notified");
1965                 }
1966         }
1967
1968         return nb_rx;
1969 }
1970
1971 uint16_t
1972 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1973                         uint16_t nb_pkts)
1974 {
1975         struct virtnet_tx *txvq = tx_queue;
1976         struct virtqueue *vq = txvq->vq;
1977         struct virtio_hw *hw = vq->hw;
1978         uint16_t hdr_size = hw->vtnet_hdr_size;
1979         uint16_t nb_tx = 0;
1980         bool in_order = hw->use_inorder_tx;
1981         int error;
1982
1983         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1984                 return nb_tx;
1985
1986         if (unlikely(nb_pkts < 1))
1987                 return nb_pkts;
1988
1989         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1990
1991         if (nb_pkts > vq->vq_free_cnt)
1992                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
1993                                            in_order);
1994
1995         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1996                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1997                 int can_push = 0, slots, need;
1998
1999                 /* Do VLAN tag insertion */
2000                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2001                         error = rte_vlan_insert(&txm);
2002                         if (unlikely(error)) {
2003                                 rte_pktmbuf_free(txm);
2004                                 continue;
2005                         }
2006                         /* vlan_insert may add a header mbuf */
2007                         tx_pkts[nb_tx] = txm;
2008                 }
2009
2010                 /* optimize ring usage */
2011                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2012                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2013                     rte_mbuf_refcnt_read(txm) == 1 &&
2014                     RTE_MBUF_DIRECT(txm) &&
2015                     txm->nb_segs == 1 &&
2016                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2017                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2018                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2019                         can_push = 1;
2020
2021                 /* How many main ring entries are needed to this Tx?
2022                  * any_layout => number of segments
2023                  * default    => number of segments + 1
2024                  */
2025                 slots = txm->nb_segs + !can_push;
2026                 need = slots - vq->vq_free_cnt;
2027
2028                 /* Positive value indicates it need free vring descriptors */
2029                 if (unlikely(need > 0)) {
2030                         virtio_xmit_cleanup_packed(vq, need, in_order);
2031                         need = slots - vq->vq_free_cnt;
2032                         if (unlikely(need > 0)) {
2033                                 PMD_TX_LOG(ERR,
2034                                            "No free tx descriptors to transmit");
2035                                 break;
2036                         }
2037                 }
2038
2039                 /* Enqueue Packet buffers */
2040                 if (can_push)
2041                         virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
2042                 else
2043                         virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
2044                                                       in_order);
2045
2046                 virtio_update_packet_stats(&txvq->stats, txm);
2047         }
2048
2049         txvq->stats.packets += nb_tx;
2050
2051         if (likely(nb_tx)) {
2052                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
2053                         virtqueue_notify(vq);
2054                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2055                 }
2056         }
2057
2058         return nb_tx;
2059 }
2060
2061 uint16_t
2062 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2063 {
2064         struct virtnet_tx *txvq = tx_queue;
2065         struct virtqueue *vq = txvq->vq;
2066         struct virtio_hw *hw = vq->hw;
2067         uint16_t hdr_size = hw->vtnet_hdr_size;
2068         uint16_t nb_used, nb_tx = 0;
2069         int error;
2070
2071         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2072                 return nb_tx;
2073
2074         if (unlikely(nb_pkts < 1))
2075                 return nb_pkts;
2076
2077         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2078         nb_used = VIRTQUEUE_NUSED(vq);
2079
2080         virtio_rmb(hw->weak_barriers);
2081         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2082                 virtio_xmit_cleanup(vq, nb_used);
2083
2084         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2085                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2086                 int can_push = 0, use_indirect = 0, slots, need;
2087
2088                 /* Do VLAN tag insertion */
2089                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2090                         error = rte_vlan_insert(&txm);
2091                         if (unlikely(error)) {
2092                                 rte_pktmbuf_free(txm);
2093                                 continue;
2094                         }
2095                         /* vlan_insert may add a header mbuf */
2096                         tx_pkts[nb_tx] = txm;
2097                 }
2098
2099                 /* optimize ring usage */
2100                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2101                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2102                     rte_mbuf_refcnt_read(txm) == 1 &&
2103                     RTE_MBUF_DIRECT(txm) &&
2104                     txm->nb_segs == 1 &&
2105                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2106                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2107                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2108                         can_push = 1;
2109                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2110                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2111                         use_indirect = 1;
2112
2113                 /* How many main ring entries are needed to this Tx?
2114                  * any_layout => number of segments
2115                  * indirect   => 1
2116                  * default    => number of segments + 1
2117                  */
2118                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2119                 need = slots - vq->vq_free_cnt;
2120
2121                 /* Positive value indicates it need free vring descriptors */
2122                 if (unlikely(need > 0)) {
2123                         nb_used = VIRTQUEUE_NUSED(vq);
2124                         virtio_rmb(hw->weak_barriers);
2125                         need = RTE_MIN(need, (int)nb_used);
2126
2127                         virtio_xmit_cleanup(vq, need);
2128                         need = slots - vq->vq_free_cnt;
2129                         if (unlikely(need > 0)) {
2130                                 PMD_TX_LOG(ERR,
2131                                            "No free tx descriptors to transmit");
2132                                 break;
2133                         }
2134                 }
2135
2136                 /* Enqueue Packet buffers */
2137                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2138                         can_push, 0);
2139
2140                 virtio_update_packet_stats(&txvq->stats, txm);
2141         }
2142
2143         txvq->stats.packets += nb_tx;
2144
2145         if (likely(nb_tx)) {
2146                 vq_update_avail_idx(vq);
2147
2148                 if (unlikely(virtqueue_kick_prepare(vq))) {
2149                         virtqueue_notify(vq);
2150                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2151                 }
2152         }
2153
2154         return nb_tx;
2155 }
2156
2157 uint16_t
2158 virtio_xmit_pkts_inorder(void *tx_queue,
2159                         struct rte_mbuf **tx_pkts,
2160                         uint16_t nb_pkts)
2161 {
2162         struct virtnet_tx *txvq = tx_queue;
2163         struct virtqueue *vq = txvq->vq;
2164         struct virtio_hw *hw = vq->hw;
2165         uint16_t hdr_size = hw->vtnet_hdr_size;
2166         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
2167         struct rte_mbuf *inorder_pkts[nb_pkts];
2168         int error;
2169
2170         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2171                 return nb_tx;
2172
2173         if (unlikely(nb_pkts < 1))
2174                 return nb_pkts;
2175
2176         VIRTQUEUE_DUMP(vq);
2177         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2178         nb_used = VIRTQUEUE_NUSED(vq);
2179
2180         virtio_rmb(hw->weak_barriers);
2181         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2182                 virtio_xmit_cleanup_inorder(vq, nb_used);
2183
2184         if (unlikely(!vq->vq_free_cnt))
2185                 virtio_xmit_cleanup_inorder(vq, nb_used);
2186
2187         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
2188
2189         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
2190                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2191                 int slots, need;
2192
2193                 /* Do VLAN tag insertion */
2194                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2195                         error = rte_vlan_insert(&txm);
2196                         if (unlikely(error)) {
2197                                 rte_pktmbuf_free(txm);
2198                                 continue;
2199                         }
2200                         /* vlan_insert may add a header mbuf */
2201                         tx_pkts[nb_tx] = txm;
2202                 }
2203
2204                 /* optimize ring usage */
2205                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2206                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2207                      rte_mbuf_refcnt_read(txm) == 1 &&
2208                      RTE_MBUF_DIRECT(txm) &&
2209                      txm->nb_segs == 1 &&
2210                      rte_pktmbuf_headroom(txm) >= hdr_size &&
2211                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2212                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2213                         inorder_pkts[nb_inorder_pkts] = txm;
2214                         nb_inorder_pkts++;
2215
2216                         virtio_update_packet_stats(&txvq->stats, txm);
2217                         continue;
2218                 }
2219
2220                 if (nb_inorder_pkts) {
2221                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2222                                                         nb_inorder_pkts);
2223                         nb_inorder_pkts = 0;
2224                 }
2225
2226                 slots = txm->nb_segs + 1;
2227                 need = slots - vq->vq_free_cnt;
2228                 if (unlikely(need > 0)) {
2229                         nb_used = VIRTQUEUE_NUSED(vq);
2230                         virtio_rmb(hw->weak_barriers);
2231                         need = RTE_MIN(need, (int)nb_used);
2232
2233                         virtio_xmit_cleanup_inorder(vq, need);
2234
2235                         need = slots - vq->vq_free_cnt;
2236
2237                         if (unlikely(need > 0)) {
2238                                 PMD_TX_LOG(ERR,
2239                                         "No free tx descriptors to transmit");
2240                                 break;
2241                         }
2242                 }
2243                 /* Enqueue Packet buffers */
2244                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2245
2246                 virtio_update_packet_stats(&txvq->stats, txm);
2247         }
2248
2249         /* Transmit all inorder packets */
2250         if (nb_inorder_pkts)
2251                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2252                                                 nb_inorder_pkts);
2253
2254         txvq->stats.packets += nb_tx;
2255
2256         if (likely(nb_tx)) {
2257                 vq_update_avail_idx(vq);
2258
2259                 if (unlikely(virtqueue_kick_prepare(vq))) {
2260                         virtqueue_notify(vq);
2261                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2262                 }
2263         }
2264
2265         VIRTQUEUE_DUMP(vq);
2266
2267         return nb_tx;
2268 }