net/virtio: refactor virtqueue structure
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45         struct virtnet_rx *rxvq = rxq;
46         struct virtqueue *vq = rxvq->vq;
47
48         return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54         vq->vq_free_cnt += num;
55         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61         struct vring_desc *dp, *dp_tail;
62         struct vq_desc_extra *dxp;
63         uint16_t desc_idx_last = desc_idx;
64
65         dp  = &vq->vq_split.ring.desc[desc_idx];
66         dxp = &vq->vq_descx[desc_idx];
67         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69                 while (dp->flags & VRING_DESC_F_NEXT) {
70                         desc_idx_last = dp->next;
71                         dp = &vq->vq_split.ring.desc[dp->next];
72                 }
73         }
74         dxp->ndescs = 0;
75
76         /*
77          * We must append the existing free chain, if any, to the end of
78          * newly freed chain. If the virtqueue was completely used, then
79          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80          */
81         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82                 vq->vq_desc_head_idx = desc_idx;
83         } else {
84                 dp_tail = &vq->vq_split.ring.desc[vq->vq_desc_tail_idx];
85                 dp_tail->next = desc_idx;
86         }
87
88         vq->vq_desc_tail_idx = desc_idx_last;
89         dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95         struct vq_desc_extra *dxp;
96
97         dxp = &vq->vq_descx[id];
98         vq->vq_free_cnt += dxp->ndescs;
99
100         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101                 vq->vq_desc_head_idx = id;
102         else
103                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104
105         vq->vq_desc_tail_idx = id;
106         dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108
109 static uint16_t
110 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
111                                   struct rte_mbuf **rx_pkts,
112                                   uint32_t *len,
113                                   uint16_t num)
114 {
115         struct rte_mbuf *cookie;
116         uint16_t used_idx;
117         uint16_t id;
118         struct vring_packed_desc *desc;
119         uint16_t i;
120
121         desc = vq->vq_packed.ring.desc_packed;
122
123         for (i = 0; i < num; i++) {
124                 used_idx = vq->vq_used_cons_idx;
125                 if (!desc_is_used(&desc[used_idx], vq))
126                         return i;
127                 virtio_rmb(vq->hw->weak_barriers);
128                 len[i] = desc[used_idx].len;
129                 id = desc[used_idx].id;
130                 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
131                 if (unlikely(cookie == NULL)) {
132                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
133                                 vq->vq_used_cons_idx);
134                         break;
135                 }
136                 rte_prefetch0(cookie);
137                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
138                 rx_pkts[i] = cookie;
139
140                 vq->vq_free_cnt++;
141                 vq->vq_used_cons_idx++;
142                 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
143                         vq->vq_used_cons_idx -= vq->vq_nentries;
144                         vq->vq_packed.used_wrap_counter ^= 1;
145                 }
146         }
147
148         return i;
149 }
150
151 static uint16_t
152 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
153                            uint32_t *len, uint16_t num)
154 {
155         struct vring_used_elem *uep;
156         struct rte_mbuf *cookie;
157         uint16_t used_idx, desc_idx;
158         uint16_t i;
159
160         /*  Caller does the check */
161         for (i = 0; i < num ; i++) {
162                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
163                 uep = &vq->vq_split.ring.used->ring[used_idx];
164                 desc_idx = (uint16_t) uep->id;
165                 len[i] = uep->len;
166                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
167
168                 if (unlikely(cookie == NULL)) {
169                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
170                                 vq->vq_used_cons_idx);
171                         break;
172                 }
173
174                 rte_prefetch0(cookie);
175                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
176                 rx_pkts[i]  = cookie;
177                 vq->vq_used_cons_idx++;
178                 vq_ring_free_chain(vq, desc_idx);
179                 vq->vq_descx[desc_idx].cookie = NULL;
180         }
181
182         return i;
183 }
184
185 static uint16_t
186 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
187                         struct rte_mbuf **rx_pkts,
188                         uint32_t *len,
189                         uint16_t num)
190 {
191         struct vring_used_elem *uep;
192         struct rte_mbuf *cookie;
193         uint16_t used_idx = 0;
194         uint16_t i;
195
196         if (unlikely(num == 0))
197                 return 0;
198
199         for (i = 0; i < num; i++) {
200                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
201                 /* Desc idx same as used idx */
202                 uep = &vq->vq_split.ring.used->ring[used_idx];
203                 len[i] = uep->len;
204                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
205
206                 if (unlikely(cookie == NULL)) {
207                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
208                                 vq->vq_used_cons_idx);
209                         break;
210                 }
211
212                 rte_prefetch0(cookie);
213                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
214                 rx_pkts[i]  = cookie;
215                 vq->vq_used_cons_idx++;
216                 vq->vq_descx[used_idx].cookie = NULL;
217         }
218
219         vq_ring_free_inorder(vq, used_idx, i);
220         return i;
221 }
222
223 #ifndef DEFAULT_TX_FREE_THRESH
224 #define DEFAULT_TX_FREE_THRESH 32
225 #endif
226
227 static void
228 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
229 {
230         uint16_t used_idx, id, curr_id, free_cnt = 0;
231         uint16_t size = vq->vq_nentries;
232         struct vring_packed_desc *desc = vq->vq_packed.ring.desc_packed;
233         struct vq_desc_extra *dxp;
234
235         used_idx = vq->vq_used_cons_idx;
236         while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
237                 virtio_rmb(vq->hw->weak_barriers);
238                 id = desc[used_idx].id;
239                 do {
240                         curr_id = used_idx;
241                         dxp = &vq->vq_descx[used_idx];
242                         used_idx += dxp->ndescs;
243                         free_cnt += dxp->ndescs;
244                         num -= dxp->ndescs;
245                         if (used_idx >= size) {
246                                 used_idx -= size;
247                                 vq->vq_packed.used_wrap_counter ^= 1;
248                         }
249                         if (dxp->cookie != NULL) {
250                                 rte_pktmbuf_free(dxp->cookie);
251                                 dxp->cookie = NULL;
252                         }
253                 } while (curr_id != id);
254         }
255         vq->vq_used_cons_idx = used_idx;
256         vq->vq_free_cnt += free_cnt;
257 }
258
259 static void
260 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
261 {
262         uint16_t used_idx, id;
263         uint16_t size = vq->vq_nentries;
264         struct vring_packed_desc *desc = vq->vq_packed.ring.desc_packed;
265         struct vq_desc_extra *dxp;
266
267         used_idx = vq->vq_used_cons_idx;
268         while (num-- && desc_is_used(&desc[used_idx], vq)) {
269                 virtio_rmb(vq->hw->weak_barriers);
270                 id = desc[used_idx].id;
271                 dxp = &vq->vq_descx[id];
272                 vq->vq_used_cons_idx += dxp->ndescs;
273                 if (vq->vq_used_cons_idx >= size) {
274                         vq->vq_used_cons_idx -= size;
275                         vq->vq_packed.used_wrap_counter ^= 1;
276                 }
277                 vq_ring_free_id_packed(vq, id);
278                 if (dxp->cookie != NULL) {
279                         rte_pktmbuf_free(dxp->cookie);
280                         dxp->cookie = NULL;
281                 }
282                 used_idx = vq->vq_used_cons_idx;
283         }
284 }
285
286 /* Cleanup from completed transmits. */
287 static inline void
288 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
289 {
290         if (in_order)
291                 virtio_xmit_cleanup_inorder_packed(vq, num);
292         else
293                 virtio_xmit_cleanup_normal_packed(vq, num);
294 }
295
296 static void
297 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
298 {
299         uint16_t i, used_idx, desc_idx;
300         for (i = 0; i < num; i++) {
301                 struct vring_used_elem *uep;
302                 struct vq_desc_extra *dxp;
303
304                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
305                 uep = &vq->vq_split.ring.used->ring[used_idx];
306
307                 desc_idx = (uint16_t) uep->id;
308                 dxp = &vq->vq_descx[desc_idx];
309                 vq->vq_used_cons_idx++;
310                 vq_ring_free_chain(vq, desc_idx);
311
312                 if (dxp->cookie != NULL) {
313                         rte_pktmbuf_free(dxp->cookie);
314                         dxp->cookie = NULL;
315                 }
316         }
317 }
318
319 /* Cleanup from completed inorder transmits. */
320 static void
321 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
322 {
323         uint16_t i, idx = vq->vq_used_cons_idx;
324         int16_t free_cnt = 0;
325         struct vq_desc_extra *dxp = NULL;
326
327         if (unlikely(num == 0))
328                 return;
329
330         for (i = 0; i < num; i++) {
331                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
332                 free_cnt += dxp->ndescs;
333                 if (dxp->cookie != NULL) {
334                         rte_pktmbuf_free(dxp->cookie);
335                         dxp->cookie = NULL;
336                 }
337         }
338
339         vq->vq_free_cnt += free_cnt;
340         vq->vq_used_cons_idx = idx;
341 }
342
343 static inline int
344 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
345                         struct rte_mbuf **cookies,
346                         uint16_t num)
347 {
348         struct vq_desc_extra *dxp;
349         struct virtio_hw *hw = vq->hw;
350         struct vring_desc *start_dp;
351         uint16_t head_idx, idx, i = 0;
352
353         if (unlikely(vq->vq_free_cnt == 0))
354                 return -ENOSPC;
355         if (unlikely(vq->vq_free_cnt < num))
356                 return -EMSGSIZE;
357
358         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
359         start_dp = vq->vq_split.ring.desc;
360
361         while (i < num) {
362                 idx = head_idx & (vq->vq_nentries - 1);
363                 dxp = &vq->vq_descx[idx];
364                 dxp->cookie = (void *)cookies[i];
365                 dxp->ndescs = 1;
366
367                 start_dp[idx].addr =
368                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
369                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
370                 start_dp[idx].len =
371                                 cookies[i]->buf_len -
372                                 RTE_PKTMBUF_HEADROOM +
373                                 hw->vtnet_hdr_size;
374                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
375
376                 vq_update_avail_ring(vq, idx);
377                 head_idx++;
378                 i++;
379         }
380
381         vq->vq_desc_head_idx += num;
382         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
383         return 0;
384 }
385
386 static inline int
387 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
388                                 uint16_t num)
389 {
390         struct vq_desc_extra *dxp;
391         struct virtio_hw *hw = vq->hw;
392         struct vring_desc *start_dp = vq->vq_split.ring.desc;
393         uint16_t idx, i;
394
395         if (unlikely(vq->vq_free_cnt == 0))
396                 return -ENOSPC;
397         if (unlikely(vq->vq_free_cnt < num))
398                 return -EMSGSIZE;
399
400         if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
401                 return -EFAULT;
402
403         for (i = 0; i < num; i++) {
404                 idx = vq->vq_desc_head_idx;
405                 dxp = &vq->vq_descx[idx];
406                 dxp->cookie = (void *)cookie[i];
407                 dxp->ndescs = 1;
408
409                 start_dp[idx].addr =
410                         VIRTIO_MBUF_ADDR(cookie[i], vq) +
411                         RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
412                 start_dp[idx].len =
413                         cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
414                         hw->vtnet_hdr_size;
415                 start_dp[idx].flags = VRING_DESC_F_WRITE;
416                 vq->vq_desc_head_idx = start_dp[idx].next;
417                 vq_update_avail_ring(vq, idx);
418                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
419                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
420                         break;
421                 }
422         }
423
424         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
425
426         return 0;
427 }
428
429 static inline int
430 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
431                                      struct rte_mbuf **cookie, uint16_t num)
432 {
433         struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc_packed;
434         uint16_t flags = vq->vq_packed.cached_flags;
435         struct virtio_hw *hw = vq->hw;
436         struct vq_desc_extra *dxp;
437         uint16_t idx;
438         int i;
439
440         if (unlikely(vq->vq_free_cnt == 0))
441                 return -ENOSPC;
442         if (unlikely(vq->vq_free_cnt < num))
443                 return -EMSGSIZE;
444
445         for (i = 0; i < num; i++) {
446                 idx = vq->vq_avail_idx;
447                 dxp = &vq->vq_descx[idx];
448                 dxp->cookie = (void *)cookie[i];
449                 dxp->ndescs = 1;
450
451                 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
452                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
453                 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
454                                         + hw->vtnet_hdr_size;
455
456                 vq->vq_desc_head_idx = dxp->next;
457                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
458                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
459                 virtio_wmb(hw->weak_barriers);
460                 start_dp[idx].flags = flags;
461                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
462                         vq->vq_avail_idx -= vq->vq_nentries;
463                         vq->vq_packed.cached_flags ^=
464                                 VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
465                         flags = vq->vq_packed.cached_flags;
466                 }
467         }
468         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
469         return 0;
470 }
471
472 /* When doing TSO, the IP length is not included in the pseudo header
473  * checksum of the packet given to the PMD, but for virtio it is
474  * expected.
475  */
476 static void
477 virtio_tso_fix_cksum(struct rte_mbuf *m)
478 {
479         /* common case: header is not fragmented */
480         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
481                         m->l4_len)) {
482                 struct ipv4_hdr *iph;
483                 struct ipv6_hdr *ip6h;
484                 struct tcp_hdr *th;
485                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
486                 uint32_t tmp;
487
488                 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
489                 th = RTE_PTR_ADD(iph, m->l3_len);
490                 if ((iph->version_ihl >> 4) == 4) {
491                         iph->hdr_checksum = 0;
492                         iph->hdr_checksum = rte_ipv4_cksum(iph);
493                         ip_len = iph->total_length;
494                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
495                                 m->l3_len);
496                 } else {
497                         ip6h = (struct ipv6_hdr *)iph;
498                         ip_paylen = ip6h->payload_len;
499                 }
500
501                 /* calculate the new phdr checksum not including ip_paylen */
502                 prev_cksum = th->cksum;
503                 tmp = prev_cksum;
504                 tmp += ip_paylen;
505                 tmp = (tmp & 0xffff) + (tmp >> 16);
506                 new_cksum = tmp;
507
508                 /* replace it in the packet */
509                 th->cksum = new_cksum;
510         }
511 }
512
513
514 /* avoid write operation when necessary, to lessen cache issues */
515 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
516         if ((var) != (val))                     \
517                 (var) = (val);                  \
518 } while (0)
519
520 #define virtqueue_clear_net_hdr(_hdr) do {              \
521         ASSIGN_UNLESS_EQUAL((_hdr)->csum_start, 0);     \
522         ASSIGN_UNLESS_EQUAL((_hdr)->csum_offset, 0);    \
523         ASSIGN_UNLESS_EQUAL((_hdr)->flags, 0);          \
524         ASSIGN_UNLESS_EQUAL((_hdr)->gso_type, 0);       \
525         ASSIGN_UNLESS_EQUAL((_hdr)->gso_size, 0);       \
526         ASSIGN_UNLESS_EQUAL((_hdr)->hdr_len, 0);        \
527 } while (0)
528
529 static inline void
530 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
531                         struct rte_mbuf *cookie,
532                         bool offload)
533 {
534         if (offload) {
535                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
536                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
537
538                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
539                 case PKT_TX_UDP_CKSUM:
540                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
541                         hdr->csum_offset = offsetof(struct udp_hdr,
542                                 dgram_cksum);
543                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
544                         break;
545
546                 case PKT_TX_TCP_CKSUM:
547                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
548                         hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
549                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
550                         break;
551
552                 default:
553                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
554                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
555                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
556                         break;
557                 }
558
559                 /* TCP Segmentation Offload */
560                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
561                         virtio_tso_fix_cksum(cookie);
562                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
563                                 VIRTIO_NET_HDR_GSO_TCPV6 :
564                                 VIRTIO_NET_HDR_GSO_TCPV4;
565                         hdr->gso_size = cookie->tso_segsz;
566                         hdr->hdr_len =
567                                 cookie->l2_len +
568                                 cookie->l3_len +
569                                 cookie->l4_len;
570                 } else {
571                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
572                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
573                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
574                 }
575         }
576 }
577
578 static inline void
579 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
580                         struct rte_mbuf **cookies,
581                         uint16_t num)
582 {
583         struct vq_desc_extra *dxp;
584         struct virtqueue *vq = txvq->vq;
585         struct vring_desc *start_dp;
586         struct virtio_net_hdr *hdr;
587         uint16_t idx;
588         uint16_t head_size = vq->hw->vtnet_hdr_size;
589         uint16_t i = 0;
590
591         idx = vq->vq_desc_head_idx;
592         start_dp = vq->vq_split.ring.desc;
593
594         while (i < num) {
595                 idx = idx & (vq->vq_nentries - 1);
596                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
597                 dxp->cookie = (void *)cookies[i];
598                 dxp->ndescs = 1;
599
600                 hdr = (struct virtio_net_hdr *)
601                         rte_pktmbuf_prepend(cookies[i], head_size);
602                 cookies[i]->pkt_len -= head_size;
603
604                 /* if offload disabled, hdr is not zeroed yet, do it now */
605                 if (!vq->hw->has_tx_offload)
606                         virtqueue_clear_net_hdr(hdr);
607                 else
608                         virtqueue_xmit_offload(hdr, cookies[i], true);
609
610                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
611                 start_dp[idx].len   = cookies[i]->data_len;
612                 start_dp[idx].flags = 0;
613
614                 vq_update_avail_ring(vq, idx);
615
616                 idx++;
617                 i++;
618         };
619
620         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
621         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
622 }
623
624 static inline void
625 virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
626                                    struct rte_mbuf *cookie,
627                                    int in_order)
628 {
629         struct virtqueue *vq = txvq->vq;
630         struct vring_packed_desc *dp;
631         struct vq_desc_extra *dxp;
632         uint16_t idx, id, flags;
633         uint16_t head_size = vq->hw->vtnet_hdr_size;
634         struct virtio_net_hdr *hdr;
635
636         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
637         idx = vq->vq_avail_idx;
638         dp = &vq->vq_packed.ring.desc_packed[idx];
639
640         dxp = &vq->vq_descx[id];
641         dxp->ndescs = 1;
642         dxp->cookie = cookie;
643
644         flags = vq->vq_packed.cached_flags;
645
646         /* prepend cannot fail, checked by caller */
647         hdr = (struct virtio_net_hdr *)
648                 rte_pktmbuf_prepend(cookie, head_size);
649         cookie->pkt_len -= head_size;
650
651         /* if offload disabled, hdr is not zeroed yet, do it now */
652         if (!vq->hw->has_tx_offload)
653                 virtqueue_clear_net_hdr(hdr);
654         else
655                 virtqueue_xmit_offload(hdr, cookie, true);
656
657         dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
658         dp->len  = cookie->data_len;
659         dp->id   = id;
660
661         if (++vq->vq_avail_idx >= vq->vq_nentries) {
662                 vq->vq_avail_idx -= vq->vq_nentries;
663                 vq->vq_packed.cached_flags ^=
664                         VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
665         }
666
667         vq->vq_free_cnt--;
668
669         if (!in_order) {
670                 vq->vq_desc_head_idx = dxp->next;
671                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
672                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
673         }
674
675         virtio_wmb(vq->hw->weak_barriers);
676         dp->flags = flags;
677 }
678
679 static inline void
680 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
681                               uint16_t needed, int can_push, int in_order)
682 {
683         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
684         struct vq_desc_extra *dxp;
685         struct virtqueue *vq = txvq->vq;
686         struct vring_packed_desc *start_dp, *head_dp;
687         uint16_t idx, id, head_idx, head_flags;
688         uint16_t head_size = vq->hw->vtnet_hdr_size;
689         struct virtio_net_hdr *hdr;
690         uint16_t prev;
691
692         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
693
694         dxp = &vq->vq_descx[id];
695         dxp->ndescs = needed;
696         dxp->cookie = cookie;
697
698         head_idx = vq->vq_avail_idx;
699         idx = head_idx;
700         prev = head_idx;
701         start_dp = vq->vq_packed.ring.desc_packed;
702
703         head_dp = &vq->vq_packed.ring.desc_packed[idx];
704         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
705         head_flags |= vq->vq_packed.cached_flags;
706
707         if (can_push) {
708                 /* prepend cannot fail, checked by caller */
709                 hdr = (struct virtio_net_hdr *)
710                         rte_pktmbuf_prepend(cookie, head_size);
711                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
712                  * which is wrong. Below subtract restores correct pkt size.
713                  */
714                 cookie->pkt_len -= head_size;
715
716                 /* if offload disabled, it is not zeroed below, do it now */
717                 if (!vq->hw->has_tx_offload)
718                         virtqueue_clear_net_hdr(hdr);
719         } else {
720                 /* setup first tx ring slot to point to header
721                  * stored in reserved region.
722                  */
723                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
724                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
725                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
726                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
727                 idx++;
728                 if (idx >= vq->vq_nentries) {
729                         idx -= vq->vq_nentries;
730                         vq->vq_packed.cached_flags ^=
731                                 VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
732                 }
733         }
734
735         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
736
737         do {
738                 uint16_t flags;
739
740                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
741                 start_dp[idx].len  = cookie->data_len;
742                 if (likely(idx != head_idx)) {
743                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
744                         flags |= vq->vq_packed.cached_flags;
745                         start_dp[idx].flags = flags;
746                 }
747                 prev = idx;
748                 idx++;
749                 if (idx >= vq->vq_nentries) {
750                         idx -= vq->vq_nentries;
751                         vq->vq_packed.cached_flags ^=
752                                 VRING_DESC_F_AVAIL(1) | VRING_DESC_F_USED(1);
753                 }
754         } while ((cookie = cookie->next) != NULL);
755
756         start_dp[prev].id = id;
757
758         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
759         vq->vq_avail_idx = idx;
760
761         if (!in_order) {
762                 vq->vq_desc_head_idx = dxp->next;
763                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
764                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
765         }
766
767         virtio_wmb(vq->hw->weak_barriers);
768         head_dp->flags = head_flags;
769 }
770
771 static inline void
772 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
773                         uint16_t needed, int use_indirect, int can_push,
774                         int in_order)
775 {
776         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
777         struct vq_desc_extra *dxp;
778         struct virtqueue *vq = txvq->vq;
779         struct vring_desc *start_dp;
780         uint16_t seg_num = cookie->nb_segs;
781         uint16_t head_idx, idx;
782         uint16_t head_size = vq->hw->vtnet_hdr_size;
783         struct virtio_net_hdr *hdr;
784
785         head_idx = vq->vq_desc_head_idx;
786         idx = head_idx;
787         if (in_order)
788                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
789         else
790                 dxp = &vq->vq_descx[idx];
791         dxp->cookie = (void *)cookie;
792         dxp->ndescs = needed;
793
794         start_dp = vq->vq_split.ring.desc;
795
796         if (can_push) {
797                 /* prepend cannot fail, checked by caller */
798                 hdr = (struct virtio_net_hdr *)
799                         rte_pktmbuf_prepend(cookie, head_size);
800                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
801                  * which is wrong. Below subtract restores correct pkt size.
802                  */
803                 cookie->pkt_len -= head_size;
804
805                 /* if offload disabled, it is not zeroed below, do it now */
806                 if (!vq->hw->has_tx_offload)
807                         virtqueue_clear_net_hdr(hdr);
808         } else if (use_indirect) {
809                 /* setup tx ring slot to point to indirect
810                  * descriptor list stored in reserved region.
811                  *
812                  * the first slot in indirect ring is already preset
813                  * to point to the header in reserved region
814                  */
815                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
816                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
817                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
818                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
819                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
820
821                 /* loop below will fill in rest of the indirect elements */
822                 start_dp = txr[idx].tx_indir;
823                 idx = 1;
824         } else {
825                 /* setup first tx ring slot to point to header
826                  * stored in reserved region.
827                  */
828                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
829                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
830                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
831                 start_dp[idx].flags = VRING_DESC_F_NEXT;
832                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
833
834                 idx = start_dp[idx].next;
835         }
836
837         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
838
839         do {
840                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
841                 start_dp[idx].len   = cookie->data_len;
842                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
843                 idx = start_dp[idx].next;
844         } while ((cookie = cookie->next) != NULL);
845
846         if (use_indirect)
847                 idx = vq->vq_split.ring.desc[head_idx].next;
848
849         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
850
851         vq->vq_desc_head_idx = idx;
852         vq_update_avail_ring(vq, head_idx);
853
854         if (!in_order) {
855                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
856                         vq->vq_desc_tail_idx = idx;
857         }
858 }
859
860 void
861 virtio_dev_cq_start(struct rte_eth_dev *dev)
862 {
863         struct virtio_hw *hw = dev->data->dev_private;
864
865         if (hw->cvq && hw->cvq->vq) {
866                 rte_spinlock_init(&hw->cvq->lock);
867                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
868         }
869 }
870
871 int
872 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
873                         uint16_t queue_idx,
874                         uint16_t nb_desc,
875                         unsigned int socket_id __rte_unused,
876                         const struct rte_eth_rxconf *rx_conf __rte_unused,
877                         struct rte_mempool *mp)
878 {
879         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
880         struct virtio_hw *hw = dev->data->dev_private;
881         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
882         struct virtnet_rx *rxvq;
883
884         PMD_INIT_FUNC_TRACE();
885
886         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
887                 nb_desc = vq->vq_nentries;
888         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
889
890         rxvq = &vq->rxq;
891         rxvq->queue_id = queue_idx;
892         rxvq->mpool = mp;
893         if (rxvq->mpool == NULL) {
894                 rte_exit(EXIT_FAILURE,
895                         "Cannot allocate mbufs for rx virtqueue");
896         }
897
898         dev->data->rx_queues[queue_idx] = rxvq;
899
900         return 0;
901 }
902
903 int
904 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
905 {
906         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
907         struct virtio_hw *hw = dev->data->dev_private;
908         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
909         struct virtnet_rx *rxvq = &vq->rxq;
910         struct rte_mbuf *m;
911         uint16_t desc_idx;
912         int error, nbufs, i;
913
914         PMD_INIT_FUNC_TRACE();
915
916         /* Allocate blank mbufs for the each rx descriptor */
917         nbufs = 0;
918
919         if (hw->use_simple_rx) {
920                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
921                      desc_idx++) {
922                         vq->vq_split.ring.avail->ring[desc_idx] = desc_idx;
923                         vq->vq_split.ring.desc[desc_idx].flags =
924                                 VRING_DESC_F_WRITE;
925                 }
926
927                 virtio_rxq_vec_setup(rxvq);
928         }
929
930         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
931         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
932              desc_idx++) {
933                 vq->sw_ring[vq->vq_nentries + desc_idx] =
934                         &rxvq->fake_mbuf;
935         }
936
937         if (hw->use_simple_rx) {
938                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
939                         virtio_rxq_rearm_vec(rxvq);
940                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
941                 }
942         } else if (hw->use_inorder_rx) {
943                 if ((!virtqueue_full(vq))) {
944                         uint16_t free_cnt = vq->vq_free_cnt;
945                         struct rte_mbuf *pkts[free_cnt];
946
947                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
948                                 free_cnt)) {
949                                 error = virtqueue_enqueue_refill_inorder(vq,
950                                                 pkts,
951                                                 free_cnt);
952                                 if (unlikely(error)) {
953                                         for (i = 0; i < free_cnt; i++)
954                                                 rte_pktmbuf_free(pkts[i]);
955                                 }
956                         }
957
958                         nbufs += free_cnt;
959                         vq_update_avail_idx(vq);
960                 }
961         } else {
962                 while (!virtqueue_full(vq)) {
963                         m = rte_mbuf_raw_alloc(rxvq->mpool);
964                         if (m == NULL)
965                                 break;
966
967                         /* Enqueue allocated buffers */
968                         if (vtpci_packed_queue(vq->hw))
969                                 error = virtqueue_enqueue_recv_refill_packed(vq,
970                                                 &m, 1);
971                         else
972                                 error = virtqueue_enqueue_recv_refill(vq,
973                                                 &m, 1);
974                         if (error) {
975                                 rte_pktmbuf_free(m);
976                                 break;
977                         }
978                         nbufs++;
979                 }
980
981                 if (!vtpci_packed_queue(vq->hw))
982                         vq_update_avail_idx(vq);
983         }
984
985         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
986
987         VIRTQUEUE_DUMP(vq);
988
989         return 0;
990 }
991
992 /*
993  * struct rte_eth_dev *dev: Used to update dev
994  * uint16_t nb_desc: Defaults to values read from config space
995  * unsigned int socket_id: Used to allocate memzone
996  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
997  * uint16_t queue_idx: Just used as an index in dev txq list
998  */
999 int
1000 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
1001                         uint16_t queue_idx,
1002                         uint16_t nb_desc,
1003                         unsigned int socket_id __rte_unused,
1004                         const struct rte_eth_txconf *tx_conf)
1005 {
1006         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1007         struct virtio_hw *hw = dev->data->dev_private;
1008         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1009         struct virtnet_tx *txvq;
1010         uint16_t tx_free_thresh;
1011
1012         PMD_INIT_FUNC_TRACE();
1013
1014         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
1015                 nb_desc = vq->vq_nentries;
1016         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
1017
1018         txvq = &vq->txq;
1019         txvq->queue_id = queue_idx;
1020
1021         tx_free_thresh = tx_conf->tx_free_thresh;
1022         if (tx_free_thresh == 0)
1023                 tx_free_thresh =
1024                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
1025
1026         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
1027                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
1028                         "number of TX entries minus 3 (%u)."
1029                         " (tx_free_thresh=%u port=%u queue=%u)\n",
1030                         vq->vq_nentries - 3,
1031                         tx_free_thresh, dev->data->port_id, queue_idx);
1032                 return -EINVAL;
1033         }
1034
1035         vq->vq_free_thresh = tx_free_thresh;
1036
1037         dev->data->tx_queues[queue_idx] = txvq;
1038         return 0;
1039 }
1040
1041 int
1042 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
1043                                 uint16_t queue_idx)
1044 {
1045         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1046         struct virtio_hw *hw = dev->data->dev_private;
1047         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1048
1049         PMD_INIT_FUNC_TRACE();
1050
1051         if (!vtpci_packed_queue(hw)) {
1052                 if (hw->use_inorder_tx)
1053                         vq->vq_split.ring.desc[vq->vq_nentries - 1].next = 0;
1054         }
1055
1056         VIRTQUEUE_DUMP(vq);
1057
1058         return 0;
1059 }
1060
1061 static inline void
1062 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1063 {
1064         int error;
1065         /*
1066          * Requeue the discarded mbuf. This should always be
1067          * successful since it was just dequeued.
1068          */
1069         if (vtpci_packed_queue(vq->hw))
1070                 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1071         else
1072                 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1073
1074         if (unlikely(error)) {
1075                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1076                 rte_pktmbuf_free(m);
1077         }
1078 }
1079
1080 static inline void
1081 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1082 {
1083         int error;
1084
1085         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1086         if (unlikely(error)) {
1087                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1088                 rte_pktmbuf_free(m);
1089         }
1090 }
1091
1092 static inline void
1093 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
1094 {
1095         uint32_t s = mbuf->pkt_len;
1096         struct ether_addr *ea;
1097
1098         stats->bytes += s;
1099
1100         if (s == 64) {
1101                 stats->size_bins[1]++;
1102         } else if (s > 64 && s < 1024) {
1103                 uint32_t bin;
1104
1105                 /* count zeros, and offset into correct bin */
1106                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
1107                 stats->size_bins[bin]++;
1108         } else {
1109                 if (s < 64)
1110                         stats->size_bins[0]++;
1111                 else if (s < 1519)
1112                         stats->size_bins[6]++;
1113                 else if (s >= 1519)
1114                         stats->size_bins[7]++;
1115         }
1116
1117         ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
1118         if (is_multicast_ether_addr(ea)) {
1119                 if (is_broadcast_ether_addr(ea))
1120                         stats->broadcast++;
1121                 else
1122                         stats->multicast++;
1123         }
1124 }
1125
1126 static inline void
1127 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
1128 {
1129         VIRTIO_DUMP_PACKET(m, m->data_len);
1130
1131         virtio_update_packet_stats(&rxvq->stats, m);
1132 }
1133
1134 /* Optionally fill offload information in structure */
1135 static inline int
1136 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1137 {
1138         struct rte_net_hdr_lens hdr_lens;
1139         uint32_t hdrlen, ptype;
1140         int l4_supported = 0;
1141
1142         /* nothing to do */
1143         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1144                 return 0;
1145
1146         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1147
1148         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1149         m->packet_type = ptype;
1150         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1151             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1152             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1153                 l4_supported = 1;
1154
1155         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1156                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1157                 if (hdr->csum_start <= hdrlen && l4_supported) {
1158                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1159                 } else {
1160                         /* Unknown proto or tunnel, do sw cksum. We can assume
1161                          * the cksum field is in the first segment since the
1162                          * buffers we provided to the host are large enough.
1163                          * In case of SCTP, this will be wrong since it's a CRC
1164                          * but there's nothing we can do.
1165                          */
1166                         uint16_t csum = 0, off;
1167
1168                         rte_raw_cksum_mbuf(m, hdr->csum_start,
1169                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1170                                 &csum);
1171                         if (likely(csum != 0xffff))
1172                                 csum = ~csum;
1173                         off = hdr->csum_offset + hdr->csum_start;
1174                         if (rte_pktmbuf_data_len(m) >= off + 1)
1175                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1176                                         off) = csum;
1177                 }
1178         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1179                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1180         }
1181
1182         /* GSO request, save required information in mbuf */
1183         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1184                 /* Check unsupported modes */
1185                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1186                     (hdr->gso_size == 0)) {
1187                         return -EINVAL;
1188                 }
1189
1190                 /* Update mss lengthes in mbuf */
1191                 m->tso_segsz = hdr->gso_size;
1192                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1193                         case VIRTIO_NET_HDR_GSO_TCPV4:
1194                         case VIRTIO_NET_HDR_GSO_TCPV6:
1195                                 m->ol_flags |= PKT_RX_LRO | \
1196                                         PKT_RX_L4_CKSUM_NONE;
1197                                 break;
1198                         default:
1199                                 return -EINVAL;
1200                 }
1201         }
1202
1203         return 0;
1204 }
1205
1206 #define VIRTIO_MBUF_BURST_SZ 64
1207 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1208 uint16_t
1209 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1210 {
1211         struct virtnet_rx *rxvq = rx_queue;
1212         struct virtqueue *vq = rxvq->vq;
1213         struct virtio_hw *hw = vq->hw;
1214         struct rte_mbuf *rxm, *new_mbuf;
1215         uint16_t nb_used, num, nb_rx;
1216         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1217         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1218         int error;
1219         uint32_t i, nb_enqueued;
1220         uint32_t hdr_size;
1221         struct virtio_net_hdr *hdr;
1222
1223         nb_rx = 0;
1224         if (unlikely(hw->started == 0))
1225                 return nb_rx;
1226
1227         nb_used = VIRTQUEUE_NUSED(vq);
1228
1229         virtio_rmb(hw->weak_barriers);
1230
1231         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1232         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1233                 num = VIRTIO_MBUF_BURST_SZ;
1234         if (likely(num > DESC_PER_CACHELINE))
1235                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1236
1237         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1238         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1239
1240         nb_enqueued = 0;
1241         hdr_size = hw->vtnet_hdr_size;
1242
1243         for (i = 0; i < num ; i++) {
1244                 rxm = rcv_pkts[i];
1245
1246                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1247
1248                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1249                         PMD_RX_LOG(ERR, "Packet drop");
1250                         nb_enqueued++;
1251                         virtio_discard_rxbuf(vq, rxm);
1252                         rxvq->stats.errors++;
1253                         continue;
1254                 }
1255
1256                 rxm->port = rxvq->port_id;
1257                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1258                 rxm->ol_flags = 0;
1259                 rxm->vlan_tci = 0;
1260
1261                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1262                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1263
1264                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1265                         RTE_PKTMBUF_HEADROOM - hdr_size);
1266
1267                 if (hw->vlan_strip)
1268                         rte_vlan_strip(rxm);
1269
1270                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1271                         virtio_discard_rxbuf(vq, rxm);
1272                         rxvq->stats.errors++;
1273                         continue;
1274                 }
1275
1276                 virtio_rx_stats_updated(rxvq, rxm);
1277
1278                 rx_pkts[nb_rx++] = rxm;
1279         }
1280
1281         rxvq->stats.packets += nb_rx;
1282
1283         /* Allocate new mbuf for the used descriptor */
1284         while (likely(!virtqueue_full(vq))) {
1285                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1286                 if (unlikely(new_mbuf == NULL)) {
1287                         struct rte_eth_dev *dev
1288                                 = &rte_eth_devices[rxvq->port_id];
1289                         dev->data->rx_mbuf_alloc_failed++;
1290                         break;
1291                 }
1292                 error = virtqueue_enqueue_recv_refill(vq, &new_mbuf, 1);
1293                 if (unlikely(error)) {
1294                         rte_pktmbuf_free(new_mbuf);
1295                         break;
1296                 }
1297                 nb_enqueued++;
1298         }
1299
1300         if (likely(nb_enqueued)) {
1301                 vq_update_avail_idx(vq);
1302
1303                 if (unlikely(virtqueue_kick_prepare(vq))) {
1304                         virtqueue_notify(vq);
1305                         PMD_RX_LOG(DEBUG, "Notified");
1306                 }
1307         }
1308
1309         return nb_rx;
1310 }
1311
1312 uint16_t
1313 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1314                         uint16_t nb_pkts)
1315 {
1316         struct virtnet_rx *rxvq = rx_queue;
1317         struct virtqueue *vq = rxvq->vq;
1318         struct virtio_hw *hw = vq->hw;
1319         struct rte_mbuf *rxm, *new_mbuf;
1320         uint16_t num, nb_rx;
1321         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1322         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1323         int error;
1324         uint32_t i, nb_enqueued;
1325         uint32_t hdr_size;
1326         struct virtio_net_hdr *hdr;
1327
1328         nb_rx = 0;
1329         if (unlikely(hw->started == 0))
1330                 return nb_rx;
1331
1332         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1333         if (likely(num > DESC_PER_CACHELINE))
1334                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1335
1336         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1337         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1338
1339         nb_enqueued = 0;
1340         hdr_size = hw->vtnet_hdr_size;
1341
1342         for (i = 0; i < num; i++) {
1343                 rxm = rcv_pkts[i];
1344
1345                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1346
1347                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1348                         PMD_RX_LOG(ERR, "Packet drop");
1349                         nb_enqueued++;
1350                         virtio_discard_rxbuf(vq, rxm);
1351                         rxvq->stats.errors++;
1352                         continue;
1353                 }
1354
1355                 rxm->port = rxvq->port_id;
1356                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1357                 rxm->ol_flags = 0;
1358                 rxm->vlan_tci = 0;
1359
1360                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1361                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1362
1363                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1364                         RTE_PKTMBUF_HEADROOM - hdr_size);
1365
1366                 if (hw->vlan_strip)
1367                         rte_vlan_strip(rxm);
1368
1369                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1370                         virtio_discard_rxbuf(vq, rxm);
1371                         rxvq->stats.errors++;
1372                         continue;
1373                 }
1374
1375                 virtio_rx_stats_updated(rxvq, rxm);
1376
1377                 rx_pkts[nb_rx++] = rxm;
1378         }
1379
1380         rxvq->stats.packets += nb_rx;
1381
1382         /* Allocate new mbuf for the used descriptor */
1383         while (likely(!virtqueue_full(vq))) {
1384                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1385                 if (unlikely(new_mbuf == NULL)) {
1386                         struct rte_eth_dev *dev =
1387                                 &rte_eth_devices[rxvq->port_id];
1388                         dev->data->rx_mbuf_alloc_failed++;
1389                         break;
1390                 }
1391                 error = virtqueue_enqueue_recv_refill_packed(vq, &new_mbuf, 1);
1392                 if (unlikely(error)) {
1393                         rte_pktmbuf_free(new_mbuf);
1394                         break;
1395                 }
1396                 nb_enqueued++;
1397         }
1398
1399         if (likely(nb_enqueued)) {
1400                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1401                         virtqueue_notify(vq);
1402                         PMD_RX_LOG(DEBUG, "Notified");
1403                 }
1404         }
1405
1406         return nb_rx;
1407 }
1408
1409
1410 uint16_t
1411 virtio_recv_pkts_inorder(void *rx_queue,
1412                         struct rte_mbuf **rx_pkts,
1413                         uint16_t nb_pkts)
1414 {
1415         struct virtnet_rx *rxvq = rx_queue;
1416         struct virtqueue *vq = rxvq->vq;
1417         struct virtio_hw *hw = vq->hw;
1418         struct rte_mbuf *rxm;
1419         struct rte_mbuf *prev;
1420         uint16_t nb_used, num, nb_rx;
1421         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1422         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1423         int error;
1424         uint32_t nb_enqueued;
1425         uint32_t seg_num;
1426         uint32_t seg_res;
1427         uint32_t hdr_size;
1428         int32_t i;
1429
1430         nb_rx = 0;
1431         if (unlikely(hw->started == 0))
1432                 return nb_rx;
1433
1434         nb_used = VIRTQUEUE_NUSED(vq);
1435         nb_used = RTE_MIN(nb_used, nb_pkts);
1436         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1437
1438         virtio_rmb(hw->weak_barriers);
1439
1440         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1441
1442         nb_enqueued = 0;
1443         seg_num = 1;
1444         seg_res = 0;
1445         hdr_size = hw->vtnet_hdr_size;
1446
1447         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1448
1449         for (i = 0; i < num; i++) {
1450                 struct virtio_net_hdr_mrg_rxbuf *header;
1451
1452                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1453                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1454
1455                 rxm = rcv_pkts[i];
1456
1457                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1458                         PMD_RX_LOG(ERR, "Packet drop");
1459                         nb_enqueued++;
1460                         virtio_discard_rxbuf_inorder(vq, rxm);
1461                         rxvq->stats.errors++;
1462                         continue;
1463                 }
1464
1465                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1466                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1467                          - hdr_size);
1468
1469                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1470                         seg_num = header->num_buffers;
1471                         if (seg_num == 0)
1472                                 seg_num = 1;
1473                 } else {
1474                         seg_num = 1;
1475                 }
1476
1477                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1478                 rxm->nb_segs = seg_num;
1479                 rxm->ol_flags = 0;
1480                 rxm->vlan_tci = 0;
1481                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1482                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1483
1484                 rxm->port = rxvq->port_id;
1485
1486                 rx_pkts[nb_rx] = rxm;
1487                 prev = rxm;
1488
1489                 if (vq->hw->has_rx_offload &&
1490                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1491                         virtio_discard_rxbuf_inorder(vq, rxm);
1492                         rxvq->stats.errors++;
1493                         continue;
1494                 }
1495
1496                 if (hw->vlan_strip)
1497                         rte_vlan_strip(rx_pkts[nb_rx]);
1498
1499                 seg_res = seg_num - 1;
1500
1501                 /* Merge remaining segments */
1502                 while (seg_res != 0 && i < (num - 1)) {
1503                         i++;
1504
1505                         rxm = rcv_pkts[i];
1506                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1507                         rxm->pkt_len = (uint32_t)(len[i]);
1508                         rxm->data_len = (uint16_t)(len[i]);
1509
1510                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1511                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1512
1513                         if (prev)
1514                                 prev->next = rxm;
1515
1516                         prev = rxm;
1517                         seg_res -= 1;
1518                 }
1519
1520                 if (!seg_res) {
1521                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1522                         nb_rx++;
1523                 }
1524         }
1525
1526         /* Last packet still need merge segments */
1527         while (seg_res != 0) {
1528                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1529                                         VIRTIO_MBUF_BURST_SZ);
1530
1531                 prev = rcv_pkts[nb_rx];
1532                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1533                         virtio_rmb(hw->weak_barriers);
1534                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1535                                                            rcv_cnt);
1536                         uint16_t extra_idx = 0;
1537
1538                         rcv_cnt = num;
1539                         while (extra_idx < rcv_cnt) {
1540                                 rxm = rcv_pkts[extra_idx];
1541                                 rxm->data_off =
1542                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1543                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1544                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1545                                 prev->next = rxm;
1546                                 prev = rxm;
1547                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1548                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1549                                 extra_idx += 1;
1550                         };
1551                         seg_res -= rcv_cnt;
1552
1553                         if (!seg_res) {
1554                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1555                                 nb_rx++;
1556                         }
1557                 } else {
1558                         PMD_RX_LOG(ERR,
1559                                         "No enough segments for packet.");
1560                         virtio_discard_rxbuf_inorder(vq, prev);
1561                         rxvq->stats.errors++;
1562                         break;
1563                 }
1564         }
1565
1566         rxvq->stats.packets += nb_rx;
1567
1568         /* Allocate new mbuf for the used descriptor */
1569
1570         if (likely(!virtqueue_full(vq))) {
1571                 /* free_cnt may include mrg descs */
1572                 uint16_t free_cnt = vq->vq_free_cnt;
1573                 struct rte_mbuf *new_pkts[free_cnt];
1574
1575                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1576                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1577                                         free_cnt);
1578                         if (unlikely(error)) {
1579                                 for (i = 0; i < free_cnt; i++)
1580                                         rte_pktmbuf_free(new_pkts[i]);
1581                         }
1582                         nb_enqueued += free_cnt;
1583                 } else {
1584                         struct rte_eth_dev *dev =
1585                                 &rte_eth_devices[rxvq->port_id];
1586                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1587                 }
1588         }
1589
1590         if (likely(nb_enqueued)) {
1591                 vq_update_avail_idx(vq);
1592
1593                 if (unlikely(virtqueue_kick_prepare(vq))) {
1594                         virtqueue_notify(vq);
1595                         PMD_RX_LOG(DEBUG, "Notified");
1596                 }
1597         }
1598
1599         return nb_rx;
1600 }
1601
1602 uint16_t
1603 virtio_recv_mergeable_pkts(void *rx_queue,
1604                         struct rte_mbuf **rx_pkts,
1605                         uint16_t nb_pkts)
1606 {
1607         struct virtnet_rx *rxvq = rx_queue;
1608         struct virtqueue *vq = rxvq->vq;
1609         struct virtio_hw *hw = vq->hw;
1610         struct rte_mbuf *rxm;
1611         struct rte_mbuf *prev;
1612         uint16_t nb_used, num, nb_rx = 0;
1613         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1614         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1615         int error;
1616         uint32_t nb_enqueued = 0;
1617         uint32_t seg_num = 0;
1618         uint32_t seg_res = 0;
1619         uint32_t hdr_size = hw->vtnet_hdr_size;
1620         int32_t i;
1621
1622         if (unlikely(hw->started == 0))
1623                 return nb_rx;
1624
1625         nb_used = VIRTQUEUE_NUSED(vq);
1626
1627         virtio_rmb(hw->weak_barriers);
1628
1629         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1630
1631         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1632         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1633                 num = VIRTIO_MBUF_BURST_SZ;
1634         if (likely(num > DESC_PER_CACHELINE))
1635                 num = num - ((vq->vq_used_cons_idx + num) %
1636                                 DESC_PER_CACHELINE);
1637
1638
1639         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1640
1641         for (i = 0; i < num; i++) {
1642                 struct virtio_net_hdr_mrg_rxbuf *header;
1643
1644                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1645                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1646
1647                 rxm = rcv_pkts[i];
1648
1649                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1650                         PMD_RX_LOG(ERR, "Packet drop");
1651                         nb_enqueued++;
1652                         virtio_discard_rxbuf(vq, rxm);
1653                         rxvq->stats.errors++;
1654                         continue;
1655                 }
1656
1657                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1658                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1659                          - hdr_size);
1660                 seg_num = header->num_buffers;
1661                 if (seg_num == 0)
1662                         seg_num = 1;
1663
1664                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1665                 rxm->nb_segs = seg_num;
1666                 rxm->ol_flags = 0;
1667                 rxm->vlan_tci = 0;
1668                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1669                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1670
1671                 rxm->port = rxvq->port_id;
1672
1673                 rx_pkts[nb_rx] = rxm;
1674                 prev = rxm;
1675
1676                 if (hw->has_rx_offload &&
1677                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1678                         virtio_discard_rxbuf(vq, rxm);
1679                         rxvq->stats.errors++;
1680                         continue;
1681                 }
1682
1683                 if (hw->vlan_strip)
1684                         rte_vlan_strip(rx_pkts[nb_rx]);
1685
1686                 seg_res = seg_num - 1;
1687
1688                 /* Merge remaining segments */
1689                 while (seg_res != 0 && i < (num - 1)) {
1690                         i++;
1691
1692                         rxm = rcv_pkts[i];
1693                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1694                         rxm->pkt_len = (uint32_t)(len[i]);
1695                         rxm->data_len = (uint16_t)(len[i]);
1696
1697                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1698                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1699
1700                         if (prev)
1701                                 prev->next = rxm;
1702
1703                         prev = rxm;
1704                         seg_res -= 1;
1705                 }
1706
1707                 if (!seg_res) {
1708                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1709                         nb_rx++;
1710                 }
1711         }
1712
1713         /* Last packet still need merge segments */
1714         while (seg_res != 0) {
1715                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1716                                         VIRTIO_MBUF_BURST_SZ);
1717
1718                 prev = rcv_pkts[nb_rx];
1719                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1720                         virtio_rmb(hw->weak_barriers);
1721                         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1722                                                            rcv_cnt);
1723                         uint16_t extra_idx = 0;
1724
1725                         rcv_cnt = num;
1726                         while (extra_idx < rcv_cnt) {
1727                                 rxm = rcv_pkts[extra_idx];
1728                                 rxm->data_off =
1729                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1730                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1731                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1732                                 prev->next = rxm;
1733                                 prev = rxm;
1734                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1735                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1736                                 extra_idx += 1;
1737                         };
1738                         seg_res -= rcv_cnt;
1739
1740                         if (!seg_res) {
1741                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1742                                 nb_rx++;
1743                         }
1744                 } else {
1745                         PMD_RX_LOG(ERR,
1746                                         "No enough segments for packet.");
1747                         virtio_discard_rxbuf(vq, prev);
1748                         rxvq->stats.errors++;
1749                         break;
1750                 }
1751         }
1752
1753         rxvq->stats.packets += nb_rx;
1754
1755         /* Allocate new mbuf for the used descriptor */
1756         if (likely(!virtqueue_full(vq))) {
1757                 /* free_cnt may include mrg descs */
1758                 uint16_t free_cnt = vq->vq_free_cnt;
1759                 struct rte_mbuf *new_pkts[free_cnt];
1760
1761                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1762                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1763                                         free_cnt);
1764                         if (unlikely(error)) {
1765                                 for (i = 0; i < free_cnt; i++)
1766                                         rte_pktmbuf_free(new_pkts[i]);
1767                         }
1768                         nb_enqueued += free_cnt;
1769                 } else {
1770                         struct rte_eth_dev *dev =
1771                                 &rte_eth_devices[rxvq->port_id];
1772                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1773                 }
1774         }
1775
1776         if (likely(nb_enqueued)) {
1777                 vq_update_avail_idx(vq);
1778
1779                 if (unlikely(virtqueue_kick_prepare(vq))) {
1780                         virtqueue_notify(vq);
1781                         PMD_RX_LOG(DEBUG, "Notified");
1782                 }
1783         }
1784
1785         return nb_rx;
1786 }
1787
1788 uint16_t
1789 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1790                         struct rte_mbuf **rx_pkts,
1791                         uint16_t nb_pkts)
1792 {
1793         struct virtnet_rx *rxvq = rx_queue;
1794         struct virtqueue *vq = rxvq->vq;
1795         struct virtio_hw *hw = vq->hw;
1796         struct rte_mbuf *rxm;
1797         struct rte_mbuf *prev = NULL;
1798         uint16_t num, nb_rx = 0;
1799         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1800         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1801         uint32_t nb_enqueued = 0;
1802         uint32_t seg_num = 0;
1803         uint32_t seg_res = 0;
1804         uint32_t hdr_size = hw->vtnet_hdr_size;
1805         int32_t i;
1806         int error;
1807
1808         if (unlikely(hw->started == 0))
1809                 return nb_rx;
1810
1811
1812         num = nb_pkts;
1813         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1814                 num = VIRTIO_MBUF_BURST_SZ;
1815         if (likely(num > DESC_PER_CACHELINE))
1816                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1817
1818         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1819
1820         for (i = 0; i < num; i++) {
1821                 struct virtio_net_hdr_mrg_rxbuf *header;
1822
1823                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1824                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1825
1826                 rxm = rcv_pkts[i];
1827
1828                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1829                         PMD_RX_LOG(ERR, "Packet drop");
1830                         nb_enqueued++;
1831                         virtio_discard_rxbuf(vq, rxm);
1832                         rxvq->stats.errors++;
1833                         continue;
1834                 }
1835
1836                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1837                           rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1838                 seg_num = header->num_buffers;
1839
1840                 if (seg_num == 0)
1841                         seg_num = 1;
1842
1843                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1844                 rxm->nb_segs = seg_num;
1845                 rxm->ol_flags = 0;
1846                 rxm->vlan_tci = 0;
1847                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1848                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1849
1850                 rxm->port = rxvq->port_id;
1851                 rx_pkts[nb_rx] = rxm;
1852                 prev = rxm;
1853
1854                 if (hw->has_rx_offload &&
1855                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1856                         virtio_discard_rxbuf(vq, rxm);
1857                         rxvq->stats.errors++;
1858                         continue;
1859                 }
1860
1861                 if (hw->vlan_strip)
1862                         rte_vlan_strip(rx_pkts[nb_rx]);
1863
1864                 seg_res = seg_num - 1;
1865
1866                 /* Merge remaining segments */
1867                 while (seg_res != 0 && i < (num - 1)) {
1868                         i++;
1869
1870                         rxm = rcv_pkts[i];
1871                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1872                         rxm->pkt_len = (uint32_t)(len[i]);
1873                         rxm->data_len = (uint16_t)(len[i]);
1874
1875                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1876                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1877
1878                         if (prev)
1879                                 prev->next = rxm;
1880
1881                         prev = rxm;
1882                         seg_res -= 1;
1883                 }
1884
1885                 if (!seg_res) {
1886                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1887                         nb_rx++;
1888                 }
1889         }
1890
1891         /* Last packet still need merge segments */
1892         while (seg_res != 0) {
1893                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1894                                         VIRTIO_MBUF_BURST_SZ);
1895                 if (likely(vq->vq_free_cnt >= rcv_cnt)) {
1896                         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1897                                         len, rcv_cnt);
1898                         uint16_t extra_idx = 0;
1899
1900                         rcv_cnt = num;
1901
1902                         while (extra_idx < rcv_cnt) {
1903                                 rxm = rcv_pkts[extra_idx];
1904
1905                                 rxm->data_off =
1906                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1907                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1908                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1909
1910                                 prev->next = rxm;
1911                                 prev = rxm;
1912                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1913                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1914                                 extra_idx += 1;
1915                         }
1916                         seg_res -= rcv_cnt;
1917                         if (!seg_res) {
1918                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1919                                 nb_rx++;
1920                         }
1921                 } else {
1922                         PMD_RX_LOG(ERR,
1923                                         "No enough segments for packet.");
1924                         if (prev)
1925                                 virtio_discard_rxbuf(vq, prev);
1926                         rxvq->stats.errors++;
1927                         break;
1928                 }
1929         }
1930
1931         rxvq->stats.packets += nb_rx;
1932
1933         /* Allocate new mbuf for the used descriptor */
1934         if (likely(!virtqueue_full(vq))) {
1935                 /* free_cnt may include mrg descs */
1936                 uint16_t free_cnt = vq->vq_free_cnt;
1937                 struct rte_mbuf *new_pkts[free_cnt];
1938
1939                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1940                         error = virtqueue_enqueue_recv_refill_packed(vq,
1941                                         new_pkts, free_cnt);
1942                         if (unlikely(error)) {
1943                                 for (i = 0; i < free_cnt; i++)
1944                                         rte_pktmbuf_free(new_pkts[i]);
1945                         }
1946                         nb_enqueued += free_cnt;
1947                 } else {
1948                         struct rte_eth_dev *dev =
1949                                 &rte_eth_devices[rxvq->port_id];
1950                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1951                 }
1952         }
1953
1954         if (likely(nb_enqueued)) {
1955                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1956                         virtqueue_notify(vq);
1957                         PMD_RX_LOG(DEBUG, "Notified");
1958                 }
1959         }
1960
1961         return nb_rx;
1962 }
1963
1964 uint16_t
1965 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1966                         uint16_t nb_pkts)
1967 {
1968         struct virtnet_tx *txvq = tx_queue;
1969         struct virtqueue *vq = txvq->vq;
1970         struct virtio_hw *hw = vq->hw;
1971         uint16_t hdr_size = hw->vtnet_hdr_size;
1972         uint16_t nb_tx = 0;
1973         bool in_order = hw->use_inorder_tx;
1974         int error;
1975
1976         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1977                 return nb_tx;
1978
1979         if (unlikely(nb_pkts < 1))
1980                 return nb_pkts;
1981
1982         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1983
1984         if (nb_pkts > vq->vq_free_cnt)
1985                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
1986                                            in_order);
1987
1988         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1989                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1990                 int can_push = 0, slots, need;
1991
1992                 /* Do VLAN tag insertion */
1993                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1994                         error = rte_vlan_insert(&txm);
1995                         if (unlikely(error)) {
1996                                 rte_pktmbuf_free(txm);
1997                                 continue;
1998                         }
1999                 }
2000
2001                 /* optimize ring usage */
2002                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2003                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2004                     rte_mbuf_refcnt_read(txm) == 1 &&
2005                     RTE_MBUF_DIRECT(txm) &&
2006                     txm->nb_segs == 1 &&
2007                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2008                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2009                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2010                         can_push = 1;
2011
2012                 /* How many main ring entries are needed to this Tx?
2013                  * any_layout => number of segments
2014                  * default    => number of segments + 1
2015                  */
2016                 slots = txm->nb_segs + !can_push;
2017                 need = slots - vq->vq_free_cnt;
2018
2019                 /* Positive value indicates it need free vring descriptors */
2020                 if (unlikely(need > 0)) {
2021                         virtio_xmit_cleanup_packed(vq, need, in_order);
2022                         need = slots - vq->vq_free_cnt;
2023                         if (unlikely(need > 0)) {
2024                                 PMD_TX_LOG(ERR,
2025                                            "No free tx descriptors to transmit");
2026                                 break;
2027                         }
2028                 }
2029
2030                 /* Enqueue Packet buffers */
2031                 if (can_push)
2032                         virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
2033                 else
2034                         virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
2035                                                       in_order);
2036
2037                 virtio_update_packet_stats(&txvq->stats, txm);
2038         }
2039
2040         txvq->stats.packets += nb_tx;
2041
2042         if (likely(nb_tx)) {
2043                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
2044                         virtqueue_notify(vq);
2045                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2046                 }
2047         }
2048
2049         return nb_tx;
2050 }
2051
2052 uint16_t
2053 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2054 {
2055         struct virtnet_tx *txvq = tx_queue;
2056         struct virtqueue *vq = txvq->vq;
2057         struct virtio_hw *hw = vq->hw;
2058         uint16_t hdr_size = hw->vtnet_hdr_size;
2059         uint16_t nb_used, nb_tx = 0;
2060         int error;
2061
2062         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2063                 return nb_tx;
2064
2065         if (unlikely(nb_pkts < 1))
2066                 return nb_pkts;
2067
2068         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2069         nb_used = VIRTQUEUE_NUSED(vq);
2070
2071         virtio_rmb(hw->weak_barriers);
2072         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2073                 virtio_xmit_cleanup(vq, nb_used);
2074
2075         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2076                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2077                 int can_push = 0, use_indirect = 0, slots, need;
2078
2079                 /* Do VLAN tag insertion */
2080                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2081                         error = rte_vlan_insert(&txm);
2082                         if (unlikely(error)) {
2083                                 rte_pktmbuf_free(txm);
2084                                 continue;
2085                         }
2086                 }
2087
2088                 /* optimize ring usage */
2089                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2090                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2091                     rte_mbuf_refcnt_read(txm) == 1 &&
2092                     RTE_MBUF_DIRECT(txm) &&
2093                     txm->nb_segs == 1 &&
2094                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2095                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2096                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2097                         can_push = 1;
2098                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2099                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2100                         use_indirect = 1;
2101
2102                 /* How many main ring entries are needed to this Tx?
2103                  * any_layout => number of segments
2104                  * indirect   => 1
2105                  * default    => number of segments + 1
2106                  */
2107                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2108                 need = slots - vq->vq_free_cnt;
2109
2110                 /* Positive value indicates it need free vring descriptors */
2111                 if (unlikely(need > 0)) {
2112                         nb_used = VIRTQUEUE_NUSED(vq);
2113                         virtio_rmb(hw->weak_barriers);
2114                         need = RTE_MIN(need, (int)nb_used);
2115
2116                         virtio_xmit_cleanup(vq, need);
2117                         need = slots - vq->vq_free_cnt;
2118                         if (unlikely(need > 0)) {
2119                                 PMD_TX_LOG(ERR,
2120                                            "No free tx descriptors to transmit");
2121                                 break;
2122                         }
2123                 }
2124
2125                 /* Enqueue Packet buffers */
2126                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2127                         can_push, 0);
2128
2129                 virtio_update_packet_stats(&txvq->stats, txm);
2130         }
2131
2132         txvq->stats.packets += nb_tx;
2133
2134         if (likely(nb_tx)) {
2135                 vq_update_avail_idx(vq);
2136
2137                 if (unlikely(virtqueue_kick_prepare(vq))) {
2138                         virtqueue_notify(vq);
2139                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2140                 }
2141         }
2142
2143         return nb_tx;
2144 }
2145
2146 uint16_t
2147 virtio_xmit_pkts_inorder(void *tx_queue,
2148                         struct rte_mbuf **tx_pkts,
2149                         uint16_t nb_pkts)
2150 {
2151         struct virtnet_tx *txvq = tx_queue;
2152         struct virtqueue *vq = txvq->vq;
2153         struct virtio_hw *hw = vq->hw;
2154         uint16_t hdr_size = hw->vtnet_hdr_size;
2155         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
2156         struct rte_mbuf *inorder_pkts[nb_pkts];
2157         int error;
2158
2159         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2160                 return nb_tx;
2161
2162         if (unlikely(nb_pkts < 1))
2163                 return nb_pkts;
2164
2165         VIRTQUEUE_DUMP(vq);
2166         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2167         nb_used = VIRTQUEUE_NUSED(vq);
2168
2169         virtio_rmb(hw->weak_barriers);
2170         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2171                 virtio_xmit_cleanup_inorder(vq, nb_used);
2172
2173         if (unlikely(!vq->vq_free_cnt))
2174                 virtio_xmit_cleanup_inorder(vq, nb_used);
2175
2176         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
2177
2178         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
2179                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2180                 int slots, need;
2181
2182                 /* Do VLAN tag insertion */
2183                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2184                         error = rte_vlan_insert(&txm);
2185                         if (unlikely(error)) {
2186                                 rte_pktmbuf_free(txm);
2187                                 continue;
2188                         }
2189                 }
2190
2191                 /* optimize ring usage */
2192                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2193                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2194                      rte_mbuf_refcnt_read(txm) == 1 &&
2195                      RTE_MBUF_DIRECT(txm) &&
2196                      txm->nb_segs == 1 &&
2197                      rte_pktmbuf_headroom(txm) >= hdr_size &&
2198                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2199                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2200                         inorder_pkts[nb_inorder_pkts] = txm;
2201                         nb_inorder_pkts++;
2202
2203                         virtio_update_packet_stats(&txvq->stats, txm);
2204                         continue;
2205                 }
2206
2207                 if (nb_inorder_pkts) {
2208                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2209                                                         nb_inorder_pkts);
2210                         nb_inorder_pkts = 0;
2211                 }
2212
2213                 slots = txm->nb_segs + 1;
2214                 need = slots - vq->vq_free_cnt;
2215                 if (unlikely(need > 0)) {
2216                         nb_used = VIRTQUEUE_NUSED(vq);
2217                         virtio_rmb(hw->weak_barriers);
2218                         need = RTE_MIN(need, (int)nb_used);
2219
2220                         virtio_xmit_cleanup_inorder(vq, need);
2221
2222                         need = slots - vq->vq_free_cnt;
2223
2224                         if (unlikely(need > 0)) {
2225                                 PMD_TX_LOG(ERR,
2226                                         "No free tx descriptors to transmit");
2227                                 break;
2228                         }
2229                 }
2230                 /* Enqueue Packet buffers */
2231                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2232
2233                 virtio_update_packet_stats(&txvq->stats, txm);
2234         }
2235
2236         /* Transmit all inorder packets */
2237         if (nb_inorder_pkts)
2238                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2239                                                 nb_inorder_pkts);
2240
2241         txvq->stats.packets += nb_tx;
2242
2243         if (likely(nb_tx)) {
2244                 vq_update_avail_idx(vq);
2245
2246                 if (unlikely(virtqueue_kick_prepare(vq))) {
2247                         virtqueue_notify(vq);
2248                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2249                 }
2250         }
2251
2252         VIRTQUEUE_DUMP(vq);
2253
2254         return nb_tx;
2255 }