net/virtio: add Tx preparation
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45         struct virtnet_rx *rxvq = rxq;
46         struct virtqueue *vq = rxvq->vq;
47
48         return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54         vq->vq_free_cnt += num;
55         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61         struct vring_desc *dp, *dp_tail;
62         struct vq_desc_extra *dxp;
63         uint16_t desc_idx_last = desc_idx;
64
65         dp  = &vq->vq_split.ring.desc[desc_idx];
66         dxp = &vq->vq_descx[desc_idx];
67         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69                 while (dp->flags & VRING_DESC_F_NEXT) {
70                         desc_idx_last = dp->next;
71                         dp = &vq->vq_split.ring.desc[dp->next];
72                 }
73         }
74         dxp->ndescs = 0;
75
76         /*
77          * We must append the existing free chain, if any, to the end of
78          * newly freed chain. If the virtqueue was completely used, then
79          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80          */
81         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82                 vq->vq_desc_head_idx = desc_idx;
83         } else {
84                 dp_tail = &vq->vq_split.ring.desc[vq->vq_desc_tail_idx];
85                 dp_tail->next = desc_idx;
86         }
87
88         vq->vq_desc_tail_idx = desc_idx_last;
89         dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95         struct vq_desc_extra *dxp;
96
97         dxp = &vq->vq_descx[id];
98         vq->vq_free_cnt += dxp->ndescs;
99
100         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101                 vq->vq_desc_head_idx = id;
102         else
103                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104
105         vq->vq_desc_tail_idx = id;
106         dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108
109 static uint16_t
110 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
111                                   struct rte_mbuf **rx_pkts,
112                                   uint32_t *len,
113                                   uint16_t num)
114 {
115         struct rte_mbuf *cookie;
116         uint16_t used_idx;
117         uint16_t id;
118         struct vring_packed_desc *desc;
119         uint16_t i;
120
121         desc = vq->vq_packed.ring.desc;
122
123         for (i = 0; i < num; i++) {
124                 used_idx = vq->vq_used_cons_idx;
125                 if (!desc_is_used(&desc[used_idx], vq))
126                         return i;
127                 virtio_rmb(vq->hw->weak_barriers);
128                 len[i] = desc[used_idx].len;
129                 id = desc[used_idx].id;
130                 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
131                 if (unlikely(cookie == NULL)) {
132                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
133                                 vq->vq_used_cons_idx);
134                         break;
135                 }
136                 rte_prefetch0(cookie);
137                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
138                 rx_pkts[i] = cookie;
139
140                 vq->vq_free_cnt++;
141                 vq->vq_used_cons_idx++;
142                 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
143                         vq->vq_used_cons_idx -= vq->vq_nentries;
144                         vq->vq_packed.used_wrap_counter ^= 1;
145                 }
146         }
147
148         return i;
149 }
150
151 static uint16_t
152 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
153                            uint32_t *len, uint16_t num)
154 {
155         struct vring_used_elem *uep;
156         struct rte_mbuf *cookie;
157         uint16_t used_idx, desc_idx;
158         uint16_t i;
159
160         /*  Caller does the check */
161         for (i = 0; i < num ; i++) {
162                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
163                 uep = &vq->vq_split.ring.used->ring[used_idx];
164                 desc_idx = (uint16_t) uep->id;
165                 len[i] = uep->len;
166                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
167
168                 if (unlikely(cookie == NULL)) {
169                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
170                                 vq->vq_used_cons_idx);
171                         break;
172                 }
173
174                 rte_prefetch0(cookie);
175                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
176                 rx_pkts[i]  = cookie;
177                 vq->vq_used_cons_idx++;
178                 vq_ring_free_chain(vq, desc_idx);
179                 vq->vq_descx[desc_idx].cookie = NULL;
180         }
181
182         return i;
183 }
184
185 static uint16_t
186 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
187                         struct rte_mbuf **rx_pkts,
188                         uint32_t *len,
189                         uint16_t num)
190 {
191         struct vring_used_elem *uep;
192         struct rte_mbuf *cookie;
193         uint16_t used_idx = 0;
194         uint16_t i;
195
196         if (unlikely(num == 0))
197                 return 0;
198
199         for (i = 0; i < num; i++) {
200                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
201                 /* Desc idx same as used idx */
202                 uep = &vq->vq_split.ring.used->ring[used_idx];
203                 len[i] = uep->len;
204                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
205
206                 if (unlikely(cookie == NULL)) {
207                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
208                                 vq->vq_used_cons_idx);
209                         break;
210                 }
211
212                 rte_prefetch0(cookie);
213                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
214                 rx_pkts[i]  = cookie;
215                 vq->vq_used_cons_idx++;
216                 vq->vq_descx[used_idx].cookie = NULL;
217         }
218
219         vq_ring_free_inorder(vq, used_idx, i);
220         return i;
221 }
222
223 #ifndef DEFAULT_TX_FREE_THRESH
224 #define DEFAULT_TX_FREE_THRESH 32
225 #endif
226
227 static void
228 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
229 {
230         uint16_t used_idx, id, curr_id, free_cnt = 0;
231         uint16_t size = vq->vq_nentries;
232         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
233         struct vq_desc_extra *dxp;
234
235         used_idx = vq->vq_used_cons_idx;
236         while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
237                 virtio_rmb(vq->hw->weak_barriers);
238                 id = desc[used_idx].id;
239                 do {
240                         curr_id = used_idx;
241                         dxp = &vq->vq_descx[used_idx];
242                         used_idx += dxp->ndescs;
243                         free_cnt += dxp->ndescs;
244                         num -= dxp->ndescs;
245                         if (used_idx >= size) {
246                                 used_idx -= size;
247                                 vq->vq_packed.used_wrap_counter ^= 1;
248                         }
249                         if (dxp->cookie != NULL) {
250                                 rte_pktmbuf_free(dxp->cookie);
251                                 dxp->cookie = NULL;
252                         }
253                 } while (curr_id != id);
254         }
255         vq->vq_used_cons_idx = used_idx;
256         vq->vq_free_cnt += free_cnt;
257 }
258
259 static void
260 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
261 {
262         uint16_t used_idx, id;
263         uint16_t size = vq->vq_nentries;
264         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
265         struct vq_desc_extra *dxp;
266
267         used_idx = vq->vq_used_cons_idx;
268         while (num-- && desc_is_used(&desc[used_idx], vq)) {
269                 virtio_rmb(vq->hw->weak_barriers);
270                 id = desc[used_idx].id;
271                 dxp = &vq->vq_descx[id];
272                 vq->vq_used_cons_idx += dxp->ndescs;
273                 if (vq->vq_used_cons_idx >= size) {
274                         vq->vq_used_cons_idx -= size;
275                         vq->vq_packed.used_wrap_counter ^= 1;
276                 }
277                 vq_ring_free_id_packed(vq, id);
278                 if (dxp->cookie != NULL) {
279                         rte_pktmbuf_free(dxp->cookie);
280                         dxp->cookie = NULL;
281                 }
282                 used_idx = vq->vq_used_cons_idx;
283         }
284 }
285
286 /* Cleanup from completed transmits. */
287 static inline void
288 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
289 {
290         if (in_order)
291                 virtio_xmit_cleanup_inorder_packed(vq, num);
292         else
293                 virtio_xmit_cleanup_normal_packed(vq, num);
294 }
295
296 static void
297 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
298 {
299         uint16_t i, used_idx, desc_idx;
300         for (i = 0; i < num; i++) {
301                 struct vring_used_elem *uep;
302                 struct vq_desc_extra *dxp;
303
304                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
305                 uep = &vq->vq_split.ring.used->ring[used_idx];
306
307                 desc_idx = (uint16_t) uep->id;
308                 dxp = &vq->vq_descx[desc_idx];
309                 vq->vq_used_cons_idx++;
310                 vq_ring_free_chain(vq, desc_idx);
311
312                 if (dxp->cookie != NULL) {
313                         rte_pktmbuf_free(dxp->cookie);
314                         dxp->cookie = NULL;
315                 }
316         }
317 }
318
319 /* Cleanup from completed inorder transmits. */
320 static void
321 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
322 {
323         uint16_t i, idx = vq->vq_used_cons_idx;
324         int16_t free_cnt = 0;
325         struct vq_desc_extra *dxp = NULL;
326
327         if (unlikely(num == 0))
328                 return;
329
330         for (i = 0; i < num; i++) {
331                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
332                 free_cnt += dxp->ndescs;
333                 if (dxp->cookie != NULL) {
334                         rte_pktmbuf_free(dxp->cookie);
335                         dxp->cookie = NULL;
336                 }
337         }
338
339         vq->vq_free_cnt += free_cnt;
340         vq->vq_used_cons_idx = idx;
341 }
342
343 static inline int
344 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
345                         struct rte_mbuf **cookies,
346                         uint16_t num)
347 {
348         struct vq_desc_extra *dxp;
349         struct virtio_hw *hw = vq->hw;
350         struct vring_desc *start_dp;
351         uint16_t head_idx, idx, i = 0;
352
353         if (unlikely(vq->vq_free_cnt == 0))
354                 return -ENOSPC;
355         if (unlikely(vq->vq_free_cnt < num))
356                 return -EMSGSIZE;
357
358         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
359         start_dp = vq->vq_split.ring.desc;
360
361         while (i < num) {
362                 idx = head_idx & (vq->vq_nentries - 1);
363                 dxp = &vq->vq_descx[idx];
364                 dxp->cookie = (void *)cookies[i];
365                 dxp->ndescs = 1;
366
367                 start_dp[idx].addr =
368                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
369                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
370                 start_dp[idx].len =
371                                 cookies[i]->buf_len -
372                                 RTE_PKTMBUF_HEADROOM +
373                                 hw->vtnet_hdr_size;
374                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
375
376                 vq_update_avail_ring(vq, idx);
377                 head_idx++;
378                 i++;
379         }
380
381         vq->vq_desc_head_idx += num;
382         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
383         return 0;
384 }
385
386 static inline int
387 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
388                                 uint16_t num)
389 {
390         struct vq_desc_extra *dxp;
391         struct virtio_hw *hw = vq->hw;
392         struct vring_desc *start_dp = vq->vq_split.ring.desc;
393         uint16_t idx, i;
394
395         if (unlikely(vq->vq_free_cnt == 0))
396                 return -ENOSPC;
397         if (unlikely(vq->vq_free_cnt < num))
398                 return -EMSGSIZE;
399
400         if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
401                 return -EFAULT;
402
403         for (i = 0; i < num; i++) {
404                 idx = vq->vq_desc_head_idx;
405                 dxp = &vq->vq_descx[idx];
406                 dxp->cookie = (void *)cookie[i];
407                 dxp->ndescs = 1;
408
409                 start_dp[idx].addr =
410                         VIRTIO_MBUF_ADDR(cookie[i], vq) +
411                         RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
412                 start_dp[idx].len =
413                         cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
414                         hw->vtnet_hdr_size;
415                 start_dp[idx].flags = VRING_DESC_F_WRITE;
416                 vq->vq_desc_head_idx = start_dp[idx].next;
417                 vq_update_avail_ring(vq, idx);
418                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
419                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
420                         break;
421                 }
422         }
423
424         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
425
426         return 0;
427 }
428
429 static inline int
430 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
431                                      struct rte_mbuf **cookie, uint16_t num)
432 {
433         struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
434         uint16_t flags = vq->vq_packed.cached_flags;
435         struct virtio_hw *hw = vq->hw;
436         struct vq_desc_extra *dxp;
437         uint16_t idx;
438         int i;
439
440         if (unlikely(vq->vq_free_cnt == 0))
441                 return -ENOSPC;
442         if (unlikely(vq->vq_free_cnt < num))
443                 return -EMSGSIZE;
444
445         for (i = 0; i < num; i++) {
446                 idx = vq->vq_avail_idx;
447                 dxp = &vq->vq_descx[idx];
448                 dxp->cookie = (void *)cookie[i];
449                 dxp->ndescs = 1;
450
451                 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
452                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
453                 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
454                                         + hw->vtnet_hdr_size;
455
456                 vq->vq_desc_head_idx = dxp->next;
457                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
458                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
459                 virtio_wmb(hw->weak_barriers);
460                 start_dp[idx].flags = flags;
461                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
462                         vq->vq_avail_idx -= vq->vq_nentries;
463                         vq->vq_packed.cached_flags ^=
464                                 VRING_PACKED_DESC_F_AVAIL_USED;
465                         flags = vq->vq_packed.cached_flags;
466                 }
467         }
468         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
469         return 0;
470 }
471
472 /* When doing TSO, the IP length is not included in the pseudo header
473  * checksum of the packet given to the PMD, but for virtio it is
474  * expected.
475  */
476 static void
477 virtio_tso_fix_cksum(struct rte_mbuf *m)
478 {
479         /* common case: header is not fragmented */
480         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
481                         m->l4_len)) {
482                 struct rte_ipv4_hdr *iph;
483                 struct rte_ipv6_hdr *ip6h;
484                 struct rte_tcp_hdr *th;
485                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
486                 uint32_t tmp;
487
488                 iph = rte_pktmbuf_mtod_offset(m,
489                                         struct rte_ipv4_hdr *, m->l2_len);
490                 th = RTE_PTR_ADD(iph, m->l3_len);
491                 if ((iph->version_ihl >> 4) == 4) {
492                         iph->hdr_checksum = 0;
493                         iph->hdr_checksum = rte_ipv4_cksum(iph);
494                         ip_len = iph->total_length;
495                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
496                                 m->l3_len);
497                 } else {
498                         ip6h = (struct rte_ipv6_hdr *)iph;
499                         ip_paylen = ip6h->payload_len;
500                 }
501
502                 /* calculate the new phdr checksum not including ip_paylen */
503                 prev_cksum = th->cksum;
504                 tmp = prev_cksum;
505                 tmp += ip_paylen;
506                 tmp = (tmp & 0xffff) + (tmp >> 16);
507                 new_cksum = tmp;
508
509                 /* replace it in the packet */
510                 th->cksum = new_cksum;
511         }
512 }
513
514
515 /* avoid write operation when necessary, to lessen cache issues */
516 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
517         if ((var) != (val))                     \
518                 (var) = (val);                  \
519 } while (0)
520
521 #define virtqueue_clear_net_hdr(_hdr) do {              \
522         ASSIGN_UNLESS_EQUAL((_hdr)->csum_start, 0);     \
523         ASSIGN_UNLESS_EQUAL((_hdr)->csum_offset, 0);    \
524         ASSIGN_UNLESS_EQUAL((_hdr)->flags, 0);          \
525         ASSIGN_UNLESS_EQUAL((_hdr)->gso_type, 0);       \
526         ASSIGN_UNLESS_EQUAL((_hdr)->gso_size, 0);       \
527         ASSIGN_UNLESS_EQUAL((_hdr)->hdr_len, 0);        \
528 } while (0)
529
530 static inline void
531 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
532                         struct rte_mbuf *cookie,
533                         bool offload)
534 {
535         if (offload) {
536                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
537                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
538
539                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
540                 case PKT_TX_UDP_CKSUM:
541                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
542                         hdr->csum_offset = offsetof(struct rte_udp_hdr,
543                                 dgram_cksum);
544                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
545                         break;
546
547                 case PKT_TX_TCP_CKSUM:
548                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
549                         hdr->csum_offset = offsetof(struct rte_tcp_hdr, cksum);
550                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
551                         break;
552
553                 default:
554                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
555                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
556                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
557                         break;
558                 }
559
560                 /* TCP Segmentation Offload */
561                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
562                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
563                                 VIRTIO_NET_HDR_GSO_TCPV6 :
564                                 VIRTIO_NET_HDR_GSO_TCPV4;
565                         hdr->gso_size = cookie->tso_segsz;
566                         hdr->hdr_len =
567                                 cookie->l2_len +
568                                 cookie->l3_len +
569                                 cookie->l4_len;
570                 } else {
571                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
572                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
573                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
574                 }
575         }
576 }
577
578 static inline void
579 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
580                         struct rte_mbuf **cookies,
581                         uint16_t num)
582 {
583         struct vq_desc_extra *dxp;
584         struct virtqueue *vq = txvq->vq;
585         struct vring_desc *start_dp;
586         struct virtio_net_hdr *hdr;
587         uint16_t idx;
588         uint16_t head_size = vq->hw->vtnet_hdr_size;
589         uint16_t i = 0;
590
591         idx = vq->vq_desc_head_idx;
592         start_dp = vq->vq_split.ring.desc;
593
594         while (i < num) {
595                 idx = idx & (vq->vq_nentries - 1);
596                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
597                 dxp->cookie = (void *)cookies[i];
598                 dxp->ndescs = 1;
599
600                 hdr = (struct virtio_net_hdr *)
601                         rte_pktmbuf_prepend(cookies[i], head_size);
602                 cookies[i]->pkt_len -= head_size;
603
604                 /* if offload disabled, hdr is not zeroed yet, do it now */
605                 if (!vq->hw->has_tx_offload)
606                         virtqueue_clear_net_hdr(hdr);
607                 else
608                         virtqueue_xmit_offload(hdr, cookies[i], true);
609
610                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
611                 start_dp[idx].len   = cookies[i]->data_len;
612                 start_dp[idx].flags = 0;
613
614                 vq_update_avail_ring(vq, idx);
615
616                 idx++;
617                 i++;
618         };
619
620         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
621         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
622 }
623
624 static inline void
625 virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
626                                    struct rte_mbuf *cookie,
627                                    int in_order)
628 {
629         struct virtqueue *vq = txvq->vq;
630         struct vring_packed_desc *dp;
631         struct vq_desc_extra *dxp;
632         uint16_t idx, id, flags;
633         uint16_t head_size = vq->hw->vtnet_hdr_size;
634         struct virtio_net_hdr *hdr;
635
636         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
637         idx = vq->vq_avail_idx;
638         dp = &vq->vq_packed.ring.desc[idx];
639
640         dxp = &vq->vq_descx[id];
641         dxp->ndescs = 1;
642         dxp->cookie = cookie;
643
644         flags = vq->vq_packed.cached_flags;
645
646         /* prepend cannot fail, checked by caller */
647         hdr = (struct virtio_net_hdr *)
648                 rte_pktmbuf_prepend(cookie, head_size);
649         cookie->pkt_len -= head_size;
650
651         /* if offload disabled, hdr is not zeroed yet, do it now */
652         if (!vq->hw->has_tx_offload)
653                 virtqueue_clear_net_hdr(hdr);
654         else
655                 virtqueue_xmit_offload(hdr, cookie, true);
656
657         dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
658         dp->len  = cookie->data_len;
659         dp->id   = id;
660
661         if (++vq->vq_avail_idx >= vq->vq_nentries) {
662                 vq->vq_avail_idx -= vq->vq_nentries;
663                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
664         }
665
666         vq->vq_free_cnt--;
667
668         if (!in_order) {
669                 vq->vq_desc_head_idx = dxp->next;
670                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
671                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
672         }
673
674         virtio_wmb(vq->hw->weak_barriers);
675         dp->flags = flags;
676 }
677
678 static inline void
679 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
680                               uint16_t needed, int can_push, int in_order)
681 {
682         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
683         struct vq_desc_extra *dxp;
684         struct virtqueue *vq = txvq->vq;
685         struct vring_packed_desc *start_dp, *head_dp;
686         uint16_t idx, id, head_idx, head_flags;
687         uint16_t head_size = vq->hw->vtnet_hdr_size;
688         struct virtio_net_hdr *hdr;
689         uint16_t prev;
690
691         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
692
693         dxp = &vq->vq_descx[id];
694         dxp->ndescs = needed;
695         dxp->cookie = cookie;
696
697         head_idx = vq->vq_avail_idx;
698         idx = head_idx;
699         prev = head_idx;
700         start_dp = vq->vq_packed.ring.desc;
701
702         head_dp = &vq->vq_packed.ring.desc[idx];
703         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
704         head_flags |= vq->vq_packed.cached_flags;
705
706         if (can_push) {
707                 /* prepend cannot fail, checked by caller */
708                 hdr = (struct virtio_net_hdr *)
709                         rte_pktmbuf_prepend(cookie, head_size);
710                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
711                  * which is wrong. Below subtract restores correct pkt size.
712                  */
713                 cookie->pkt_len -= head_size;
714
715                 /* if offload disabled, it is not zeroed below, do it now */
716                 if (!vq->hw->has_tx_offload)
717                         virtqueue_clear_net_hdr(hdr);
718         } else {
719                 /* setup first tx ring slot to point to header
720                  * stored in reserved region.
721                  */
722                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
723                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
724                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
725                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
726                 idx++;
727                 if (idx >= vq->vq_nentries) {
728                         idx -= vq->vq_nentries;
729                         vq->vq_packed.cached_flags ^=
730                                 VRING_PACKED_DESC_F_AVAIL_USED;
731                 }
732         }
733
734         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
735
736         do {
737                 uint16_t flags;
738
739                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
740                 start_dp[idx].len  = cookie->data_len;
741                 if (likely(idx != head_idx)) {
742                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
743                         flags |= vq->vq_packed.cached_flags;
744                         start_dp[idx].flags = flags;
745                 }
746                 prev = idx;
747                 idx++;
748                 if (idx >= vq->vq_nentries) {
749                         idx -= vq->vq_nentries;
750                         vq->vq_packed.cached_flags ^=
751                                 VRING_PACKED_DESC_F_AVAIL_USED;
752                 }
753         } while ((cookie = cookie->next) != NULL);
754
755         start_dp[prev].id = id;
756
757         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
758         vq->vq_avail_idx = idx;
759
760         if (!in_order) {
761                 vq->vq_desc_head_idx = dxp->next;
762                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
763                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
764         }
765
766         virtio_wmb(vq->hw->weak_barriers);
767         head_dp->flags = head_flags;
768 }
769
770 static inline void
771 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
772                         uint16_t needed, int use_indirect, int can_push,
773                         int in_order)
774 {
775         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
776         struct vq_desc_extra *dxp;
777         struct virtqueue *vq = txvq->vq;
778         struct vring_desc *start_dp;
779         uint16_t seg_num = cookie->nb_segs;
780         uint16_t head_idx, idx;
781         uint16_t head_size = vq->hw->vtnet_hdr_size;
782         struct virtio_net_hdr *hdr;
783
784         head_idx = vq->vq_desc_head_idx;
785         idx = head_idx;
786         if (in_order)
787                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
788         else
789                 dxp = &vq->vq_descx[idx];
790         dxp->cookie = (void *)cookie;
791         dxp->ndescs = needed;
792
793         start_dp = vq->vq_split.ring.desc;
794
795         if (can_push) {
796                 /* prepend cannot fail, checked by caller */
797                 hdr = (struct virtio_net_hdr *)
798                         rte_pktmbuf_prepend(cookie, head_size);
799                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
800                  * which is wrong. Below subtract restores correct pkt size.
801                  */
802                 cookie->pkt_len -= head_size;
803
804                 /* if offload disabled, it is not zeroed below, do it now */
805                 if (!vq->hw->has_tx_offload)
806                         virtqueue_clear_net_hdr(hdr);
807         } else if (use_indirect) {
808                 /* setup tx ring slot to point to indirect
809                  * descriptor list stored in reserved region.
810                  *
811                  * the first slot in indirect ring is already preset
812                  * to point to the header in reserved region
813                  */
814                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
815                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
816                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
817                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
818                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
819
820                 /* loop below will fill in rest of the indirect elements */
821                 start_dp = txr[idx].tx_indir;
822                 idx = 1;
823         } else {
824                 /* setup first tx ring slot to point to header
825                  * stored in reserved region.
826                  */
827                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
828                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
829                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
830                 start_dp[idx].flags = VRING_DESC_F_NEXT;
831                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
832
833                 idx = start_dp[idx].next;
834         }
835
836         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
837
838         do {
839                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
840                 start_dp[idx].len   = cookie->data_len;
841                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
842                 idx = start_dp[idx].next;
843         } while ((cookie = cookie->next) != NULL);
844
845         if (use_indirect)
846                 idx = vq->vq_split.ring.desc[head_idx].next;
847
848         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
849
850         vq->vq_desc_head_idx = idx;
851         vq_update_avail_ring(vq, head_idx);
852
853         if (!in_order) {
854                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
855                         vq->vq_desc_tail_idx = idx;
856         }
857 }
858
859 void
860 virtio_dev_cq_start(struct rte_eth_dev *dev)
861 {
862         struct virtio_hw *hw = dev->data->dev_private;
863
864         if (hw->cvq && hw->cvq->vq) {
865                 rte_spinlock_init(&hw->cvq->lock);
866                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
867         }
868 }
869
870 int
871 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
872                         uint16_t queue_idx,
873                         uint16_t nb_desc,
874                         unsigned int socket_id __rte_unused,
875                         const struct rte_eth_rxconf *rx_conf __rte_unused,
876                         struct rte_mempool *mp)
877 {
878         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
879         struct virtio_hw *hw = dev->data->dev_private;
880         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
881         struct virtnet_rx *rxvq;
882
883         PMD_INIT_FUNC_TRACE();
884
885         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
886                 nb_desc = vq->vq_nentries;
887         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
888
889         rxvq = &vq->rxq;
890         rxvq->queue_id = queue_idx;
891         rxvq->mpool = mp;
892         dev->data->rx_queues[queue_idx] = rxvq;
893
894         return 0;
895 }
896
897 int
898 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
899 {
900         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
901         struct virtio_hw *hw = dev->data->dev_private;
902         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
903         struct virtnet_rx *rxvq = &vq->rxq;
904         struct rte_mbuf *m;
905         uint16_t desc_idx;
906         int error, nbufs, i;
907
908         PMD_INIT_FUNC_TRACE();
909
910         /* Allocate blank mbufs for the each rx descriptor */
911         nbufs = 0;
912
913         if (hw->use_simple_rx) {
914                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
915                      desc_idx++) {
916                         vq->vq_split.ring.avail->ring[desc_idx] = desc_idx;
917                         vq->vq_split.ring.desc[desc_idx].flags =
918                                 VRING_DESC_F_WRITE;
919                 }
920
921                 virtio_rxq_vec_setup(rxvq);
922         }
923
924         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
925         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
926              desc_idx++) {
927                 vq->sw_ring[vq->vq_nentries + desc_idx] =
928                         &rxvq->fake_mbuf;
929         }
930
931         if (hw->use_simple_rx) {
932                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
933                         virtio_rxq_rearm_vec(rxvq);
934                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
935                 }
936         } else if (hw->use_inorder_rx) {
937                 if ((!virtqueue_full(vq))) {
938                         uint16_t free_cnt = vq->vq_free_cnt;
939                         struct rte_mbuf *pkts[free_cnt];
940
941                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
942                                 free_cnt)) {
943                                 error = virtqueue_enqueue_refill_inorder(vq,
944                                                 pkts,
945                                                 free_cnt);
946                                 if (unlikely(error)) {
947                                         for (i = 0; i < free_cnt; i++)
948                                                 rte_pktmbuf_free(pkts[i]);
949                                 }
950                         }
951
952                         nbufs += free_cnt;
953                         vq_update_avail_idx(vq);
954                 }
955         } else {
956                 while (!virtqueue_full(vq)) {
957                         m = rte_mbuf_raw_alloc(rxvq->mpool);
958                         if (m == NULL)
959                                 break;
960
961                         /* Enqueue allocated buffers */
962                         if (vtpci_packed_queue(vq->hw))
963                                 error = virtqueue_enqueue_recv_refill_packed(vq,
964                                                 &m, 1);
965                         else
966                                 error = virtqueue_enqueue_recv_refill(vq,
967                                                 &m, 1);
968                         if (error) {
969                                 rte_pktmbuf_free(m);
970                                 break;
971                         }
972                         nbufs++;
973                 }
974
975                 if (!vtpci_packed_queue(vq->hw))
976                         vq_update_avail_idx(vq);
977         }
978
979         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
980
981         VIRTQUEUE_DUMP(vq);
982
983         return 0;
984 }
985
986 /*
987  * struct rte_eth_dev *dev: Used to update dev
988  * uint16_t nb_desc: Defaults to values read from config space
989  * unsigned int socket_id: Used to allocate memzone
990  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
991  * uint16_t queue_idx: Just used as an index in dev txq list
992  */
993 int
994 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
995                         uint16_t queue_idx,
996                         uint16_t nb_desc,
997                         unsigned int socket_id __rte_unused,
998                         const struct rte_eth_txconf *tx_conf)
999 {
1000         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1001         struct virtio_hw *hw = dev->data->dev_private;
1002         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1003         struct virtnet_tx *txvq;
1004         uint16_t tx_free_thresh;
1005
1006         PMD_INIT_FUNC_TRACE();
1007
1008         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
1009                 nb_desc = vq->vq_nentries;
1010         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
1011
1012         txvq = &vq->txq;
1013         txvq->queue_id = queue_idx;
1014
1015         tx_free_thresh = tx_conf->tx_free_thresh;
1016         if (tx_free_thresh == 0)
1017                 tx_free_thresh =
1018                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
1019
1020         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
1021                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
1022                         "number of TX entries minus 3 (%u)."
1023                         " (tx_free_thresh=%u port=%u queue=%u)\n",
1024                         vq->vq_nentries - 3,
1025                         tx_free_thresh, dev->data->port_id, queue_idx);
1026                 return -EINVAL;
1027         }
1028
1029         vq->vq_free_thresh = tx_free_thresh;
1030
1031         dev->data->tx_queues[queue_idx] = txvq;
1032         return 0;
1033 }
1034
1035 int
1036 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
1037                                 uint16_t queue_idx)
1038 {
1039         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1040         struct virtio_hw *hw = dev->data->dev_private;
1041         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1042
1043         PMD_INIT_FUNC_TRACE();
1044
1045         if (!vtpci_packed_queue(hw)) {
1046                 if (hw->use_inorder_tx)
1047                         vq->vq_split.ring.desc[vq->vq_nentries - 1].next = 0;
1048         }
1049
1050         VIRTQUEUE_DUMP(vq);
1051
1052         return 0;
1053 }
1054
1055 static inline void
1056 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1057 {
1058         int error;
1059         /*
1060          * Requeue the discarded mbuf. This should always be
1061          * successful since it was just dequeued.
1062          */
1063         if (vtpci_packed_queue(vq->hw))
1064                 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1065         else
1066                 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1067
1068         if (unlikely(error)) {
1069                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1070                 rte_pktmbuf_free(m);
1071         }
1072 }
1073
1074 static inline void
1075 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1076 {
1077         int error;
1078
1079         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1080         if (unlikely(error)) {
1081                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1082                 rte_pktmbuf_free(m);
1083         }
1084 }
1085
1086 static inline void
1087 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
1088 {
1089         uint32_t s = mbuf->pkt_len;
1090         struct rte_ether_addr *ea;
1091
1092         stats->bytes += s;
1093
1094         if (s == 64) {
1095                 stats->size_bins[1]++;
1096         } else if (s > 64 && s < 1024) {
1097                 uint32_t bin;
1098
1099                 /* count zeros, and offset into correct bin */
1100                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
1101                 stats->size_bins[bin]++;
1102         } else {
1103                 if (s < 64)
1104                         stats->size_bins[0]++;
1105                 else if (s < 1519)
1106                         stats->size_bins[6]++;
1107                 else
1108                         stats->size_bins[7]++;
1109         }
1110
1111         ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
1112         if (rte_is_multicast_ether_addr(ea)) {
1113                 if (rte_is_broadcast_ether_addr(ea))
1114                         stats->broadcast++;
1115                 else
1116                         stats->multicast++;
1117         }
1118 }
1119
1120 static inline void
1121 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
1122 {
1123         VIRTIO_DUMP_PACKET(m, m->data_len);
1124
1125         virtio_update_packet_stats(&rxvq->stats, m);
1126 }
1127
1128 /* Optionally fill offload information in structure */
1129 static inline int
1130 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1131 {
1132         struct rte_net_hdr_lens hdr_lens;
1133         uint32_t hdrlen, ptype;
1134         int l4_supported = 0;
1135
1136         /* nothing to do */
1137         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1138                 return 0;
1139
1140         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1141
1142         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1143         m->packet_type = ptype;
1144         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1145             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1146             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1147                 l4_supported = 1;
1148
1149         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1150                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1151                 if (hdr->csum_start <= hdrlen && l4_supported) {
1152                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1153                 } else {
1154                         /* Unknown proto or tunnel, do sw cksum. We can assume
1155                          * the cksum field is in the first segment since the
1156                          * buffers we provided to the host are large enough.
1157                          * In case of SCTP, this will be wrong since it's a CRC
1158                          * but there's nothing we can do.
1159                          */
1160                         uint16_t csum = 0, off;
1161
1162                         rte_raw_cksum_mbuf(m, hdr->csum_start,
1163                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1164                                 &csum);
1165                         if (likely(csum != 0xffff))
1166                                 csum = ~csum;
1167                         off = hdr->csum_offset + hdr->csum_start;
1168                         if (rte_pktmbuf_data_len(m) >= off + 1)
1169                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1170                                         off) = csum;
1171                 }
1172         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1173                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1174         }
1175
1176         /* GSO request, save required information in mbuf */
1177         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1178                 /* Check unsupported modes */
1179                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1180                     (hdr->gso_size == 0)) {
1181                         return -EINVAL;
1182                 }
1183
1184                 /* Update mss lengthes in mbuf */
1185                 m->tso_segsz = hdr->gso_size;
1186                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1187                         case VIRTIO_NET_HDR_GSO_TCPV4:
1188                         case VIRTIO_NET_HDR_GSO_TCPV6:
1189                                 m->ol_flags |= PKT_RX_LRO | \
1190                                         PKT_RX_L4_CKSUM_NONE;
1191                                 break;
1192                         default:
1193                                 return -EINVAL;
1194                 }
1195         }
1196
1197         return 0;
1198 }
1199
1200 #define VIRTIO_MBUF_BURST_SZ 64
1201 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1202 uint16_t
1203 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1204 {
1205         struct virtnet_rx *rxvq = rx_queue;
1206         struct virtqueue *vq = rxvq->vq;
1207         struct virtio_hw *hw = vq->hw;
1208         struct rte_mbuf *rxm;
1209         uint16_t nb_used, num, nb_rx;
1210         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1211         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1212         int error;
1213         uint32_t i, nb_enqueued;
1214         uint32_t hdr_size;
1215         struct virtio_net_hdr *hdr;
1216
1217         nb_rx = 0;
1218         if (unlikely(hw->started == 0))
1219                 return nb_rx;
1220
1221         nb_used = VIRTQUEUE_NUSED(vq);
1222
1223         virtio_rmb(hw->weak_barriers);
1224
1225         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1226         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1227                 num = VIRTIO_MBUF_BURST_SZ;
1228         if (likely(num > DESC_PER_CACHELINE))
1229                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1230
1231         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1232         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1233
1234         nb_enqueued = 0;
1235         hdr_size = hw->vtnet_hdr_size;
1236
1237         for (i = 0; i < num ; i++) {
1238                 rxm = rcv_pkts[i];
1239
1240                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1241
1242                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1243                         PMD_RX_LOG(ERR, "Packet drop");
1244                         nb_enqueued++;
1245                         virtio_discard_rxbuf(vq, rxm);
1246                         rxvq->stats.errors++;
1247                         continue;
1248                 }
1249
1250                 rxm->port = rxvq->port_id;
1251                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1252                 rxm->ol_flags = 0;
1253                 rxm->vlan_tci = 0;
1254
1255                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1256                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1257
1258                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1259                         RTE_PKTMBUF_HEADROOM - hdr_size);
1260
1261                 if (hw->vlan_strip)
1262                         rte_vlan_strip(rxm);
1263
1264                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1265                         virtio_discard_rxbuf(vq, rxm);
1266                         rxvq->stats.errors++;
1267                         continue;
1268                 }
1269
1270                 virtio_rx_stats_updated(rxvq, rxm);
1271
1272                 rx_pkts[nb_rx++] = rxm;
1273         }
1274
1275         rxvq->stats.packets += nb_rx;
1276
1277         /* Allocate new mbuf for the used descriptor */
1278         if (likely(!virtqueue_full(vq))) {
1279                 uint16_t free_cnt = vq->vq_free_cnt;
1280                 struct rte_mbuf *new_pkts[free_cnt];
1281
1282                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1283                                                 free_cnt) == 0)) {
1284                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1285                                         free_cnt);
1286                         if (unlikely(error)) {
1287                                 for (i = 0; i < free_cnt; i++)
1288                                         rte_pktmbuf_free(new_pkts[i]);
1289                         }
1290                         nb_enqueued += free_cnt;
1291                 } else {
1292                         struct rte_eth_dev *dev =
1293                                 &rte_eth_devices[rxvq->port_id];
1294                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1295                 }
1296         }
1297
1298         if (likely(nb_enqueued)) {
1299                 vq_update_avail_idx(vq);
1300
1301                 if (unlikely(virtqueue_kick_prepare(vq))) {
1302                         virtqueue_notify(vq);
1303                         PMD_RX_LOG(DEBUG, "Notified");
1304                 }
1305         }
1306
1307         return nb_rx;
1308 }
1309
1310 uint16_t
1311 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1312                         uint16_t nb_pkts)
1313 {
1314         struct virtnet_rx *rxvq = rx_queue;
1315         struct virtqueue *vq = rxvq->vq;
1316         struct virtio_hw *hw = vq->hw;
1317         struct rte_mbuf *rxm;
1318         uint16_t num, nb_rx;
1319         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1320         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1321         int error;
1322         uint32_t i, nb_enqueued;
1323         uint32_t hdr_size;
1324         struct virtio_net_hdr *hdr;
1325
1326         nb_rx = 0;
1327         if (unlikely(hw->started == 0))
1328                 return nb_rx;
1329
1330         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1331         if (likely(num > DESC_PER_CACHELINE))
1332                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1333
1334         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1335         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1336
1337         nb_enqueued = 0;
1338         hdr_size = hw->vtnet_hdr_size;
1339
1340         for (i = 0; i < num; i++) {
1341                 rxm = rcv_pkts[i];
1342
1343                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1344
1345                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1346                         PMD_RX_LOG(ERR, "Packet drop");
1347                         nb_enqueued++;
1348                         virtio_discard_rxbuf(vq, rxm);
1349                         rxvq->stats.errors++;
1350                         continue;
1351                 }
1352
1353                 rxm->port = rxvq->port_id;
1354                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1355                 rxm->ol_flags = 0;
1356                 rxm->vlan_tci = 0;
1357
1358                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1359                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1360
1361                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1362                         RTE_PKTMBUF_HEADROOM - hdr_size);
1363
1364                 if (hw->vlan_strip)
1365                         rte_vlan_strip(rxm);
1366
1367                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1368                         virtio_discard_rxbuf(vq, rxm);
1369                         rxvq->stats.errors++;
1370                         continue;
1371                 }
1372
1373                 virtio_rx_stats_updated(rxvq, rxm);
1374
1375                 rx_pkts[nb_rx++] = rxm;
1376         }
1377
1378         rxvq->stats.packets += nb_rx;
1379
1380         /* Allocate new mbuf for the used descriptor */
1381         if (likely(!virtqueue_full(vq))) {
1382                 uint16_t free_cnt = vq->vq_free_cnt;
1383                 struct rte_mbuf *new_pkts[free_cnt];
1384
1385                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1386                                                 free_cnt) == 0)) {
1387                         error = virtqueue_enqueue_recv_refill_packed(vq,
1388                                         new_pkts, free_cnt);
1389                         if (unlikely(error)) {
1390                                 for (i = 0; i < free_cnt; i++)
1391                                         rte_pktmbuf_free(new_pkts[i]);
1392                         }
1393                         nb_enqueued += free_cnt;
1394                 } else {
1395                         struct rte_eth_dev *dev =
1396                                 &rte_eth_devices[rxvq->port_id];
1397                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1398                 }
1399         }
1400
1401         if (likely(nb_enqueued)) {
1402                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1403                         virtqueue_notify(vq);
1404                         PMD_RX_LOG(DEBUG, "Notified");
1405                 }
1406         }
1407
1408         return nb_rx;
1409 }
1410
1411
1412 uint16_t
1413 virtio_recv_pkts_inorder(void *rx_queue,
1414                         struct rte_mbuf **rx_pkts,
1415                         uint16_t nb_pkts)
1416 {
1417         struct virtnet_rx *rxvq = rx_queue;
1418         struct virtqueue *vq = rxvq->vq;
1419         struct virtio_hw *hw = vq->hw;
1420         struct rte_mbuf *rxm;
1421         struct rte_mbuf *prev = NULL;
1422         uint16_t nb_used, num, nb_rx;
1423         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1424         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1425         int error;
1426         uint32_t nb_enqueued;
1427         uint32_t seg_num;
1428         uint32_t seg_res;
1429         uint32_t hdr_size;
1430         int32_t i;
1431
1432         nb_rx = 0;
1433         if (unlikely(hw->started == 0))
1434                 return nb_rx;
1435
1436         nb_used = VIRTQUEUE_NUSED(vq);
1437         nb_used = RTE_MIN(nb_used, nb_pkts);
1438         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1439
1440         virtio_rmb(hw->weak_barriers);
1441
1442         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1443
1444         nb_enqueued = 0;
1445         seg_num = 1;
1446         seg_res = 0;
1447         hdr_size = hw->vtnet_hdr_size;
1448
1449         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1450
1451         for (i = 0; i < num; i++) {
1452                 struct virtio_net_hdr_mrg_rxbuf *header;
1453
1454                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1455                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1456
1457                 rxm = rcv_pkts[i];
1458
1459                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1460                         PMD_RX_LOG(ERR, "Packet drop");
1461                         nb_enqueued++;
1462                         virtio_discard_rxbuf_inorder(vq, rxm);
1463                         rxvq->stats.errors++;
1464                         continue;
1465                 }
1466
1467                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1468                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1469                          - hdr_size);
1470
1471                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1472                         seg_num = header->num_buffers;
1473                         if (seg_num == 0)
1474                                 seg_num = 1;
1475                 } else {
1476                         seg_num = 1;
1477                 }
1478
1479                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1480                 rxm->nb_segs = seg_num;
1481                 rxm->ol_flags = 0;
1482                 rxm->vlan_tci = 0;
1483                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1484                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1485
1486                 rxm->port = rxvq->port_id;
1487
1488                 rx_pkts[nb_rx] = rxm;
1489                 prev = rxm;
1490
1491                 if (vq->hw->has_rx_offload &&
1492                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1493                         virtio_discard_rxbuf_inorder(vq, rxm);
1494                         rxvq->stats.errors++;
1495                         continue;
1496                 }
1497
1498                 if (hw->vlan_strip)
1499                         rte_vlan_strip(rx_pkts[nb_rx]);
1500
1501                 seg_res = seg_num - 1;
1502
1503                 /* Merge remaining segments */
1504                 while (seg_res != 0 && i < (num - 1)) {
1505                         i++;
1506
1507                         rxm = rcv_pkts[i];
1508                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1509                         rxm->pkt_len = (uint32_t)(len[i]);
1510                         rxm->data_len = (uint16_t)(len[i]);
1511
1512                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1513
1514                         prev->next = rxm;
1515                         prev = rxm;
1516                         seg_res -= 1;
1517                 }
1518
1519                 if (!seg_res) {
1520                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1521                         nb_rx++;
1522                 }
1523         }
1524
1525         /* Last packet still need merge segments */
1526         while (seg_res != 0) {
1527                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1528                                         VIRTIO_MBUF_BURST_SZ);
1529
1530                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1531                         virtio_rmb(hw->weak_barriers);
1532                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1533                                                            rcv_cnt);
1534                         uint16_t extra_idx = 0;
1535
1536                         rcv_cnt = num;
1537                         while (extra_idx < rcv_cnt) {
1538                                 rxm = rcv_pkts[extra_idx];
1539                                 rxm->data_off =
1540                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1541                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1542                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1543                                 prev->next = rxm;
1544                                 prev = rxm;
1545                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1546                                 extra_idx += 1;
1547                         };
1548                         seg_res -= rcv_cnt;
1549
1550                         if (!seg_res) {
1551                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1552                                 nb_rx++;
1553                         }
1554                 } else {
1555                         PMD_RX_LOG(ERR,
1556                                         "No enough segments for packet.");
1557                         virtio_discard_rxbuf_inorder(vq, prev);
1558                         rxvq->stats.errors++;
1559                         break;
1560                 }
1561         }
1562
1563         rxvq->stats.packets += nb_rx;
1564
1565         /* Allocate new mbuf for the used descriptor */
1566
1567         if (likely(!virtqueue_full(vq))) {
1568                 /* free_cnt may include mrg descs */
1569                 uint16_t free_cnt = vq->vq_free_cnt;
1570                 struct rte_mbuf *new_pkts[free_cnt];
1571
1572                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1573                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1574                                         free_cnt);
1575                         if (unlikely(error)) {
1576                                 for (i = 0; i < free_cnt; i++)
1577                                         rte_pktmbuf_free(new_pkts[i]);
1578                         }
1579                         nb_enqueued += free_cnt;
1580                 } else {
1581                         struct rte_eth_dev *dev =
1582                                 &rte_eth_devices[rxvq->port_id];
1583                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1584                 }
1585         }
1586
1587         if (likely(nb_enqueued)) {
1588                 vq_update_avail_idx(vq);
1589
1590                 if (unlikely(virtqueue_kick_prepare(vq))) {
1591                         virtqueue_notify(vq);
1592                         PMD_RX_LOG(DEBUG, "Notified");
1593                 }
1594         }
1595
1596         return nb_rx;
1597 }
1598
1599 uint16_t
1600 virtio_recv_mergeable_pkts(void *rx_queue,
1601                         struct rte_mbuf **rx_pkts,
1602                         uint16_t nb_pkts)
1603 {
1604         struct virtnet_rx *rxvq = rx_queue;
1605         struct virtqueue *vq = rxvq->vq;
1606         struct virtio_hw *hw = vq->hw;
1607         struct rte_mbuf *rxm;
1608         struct rte_mbuf *prev = NULL;
1609         uint16_t nb_used, num, nb_rx = 0;
1610         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1611         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1612         int error;
1613         uint32_t nb_enqueued = 0;
1614         uint32_t seg_num = 0;
1615         uint32_t seg_res = 0;
1616         uint32_t hdr_size = hw->vtnet_hdr_size;
1617         int32_t i;
1618
1619         if (unlikely(hw->started == 0))
1620                 return nb_rx;
1621
1622         nb_used = VIRTQUEUE_NUSED(vq);
1623
1624         virtio_rmb(hw->weak_barriers);
1625
1626         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1627
1628         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1629         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1630                 num = VIRTIO_MBUF_BURST_SZ;
1631         if (likely(num > DESC_PER_CACHELINE))
1632                 num = num - ((vq->vq_used_cons_idx + num) %
1633                                 DESC_PER_CACHELINE);
1634
1635
1636         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1637
1638         for (i = 0; i < num; i++) {
1639                 struct virtio_net_hdr_mrg_rxbuf *header;
1640
1641                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1642                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1643
1644                 rxm = rcv_pkts[i];
1645
1646                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1647                         PMD_RX_LOG(ERR, "Packet drop");
1648                         nb_enqueued++;
1649                         virtio_discard_rxbuf(vq, rxm);
1650                         rxvq->stats.errors++;
1651                         continue;
1652                 }
1653
1654                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1655                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1656                          - hdr_size);
1657                 seg_num = header->num_buffers;
1658                 if (seg_num == 0)
1659                         seg_num = 1;
1660
1661                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1662                 rxm->nb_segs = seg_num;
1663                 rxm->ol_flags = 0;
1664                 rxm->vlan_tci = 0;
1665                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1666                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1667
1668                 rxm->port = rxvq->port_id;
1669
1670                 rx_pkts[nb_rx] = rxm;
1671                 prev = rxm;
1672
1673                 if (hw->has_rx_offload &&
1674                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1675                         virtio_discard_rxbuf(vq, rxm);
1676                         rxvq->stats.errors++;
1677                         continue;
1678                 }
1679
1680                 if (hw->vlan_strip)
1681                         rte_vlan_strip(rx_pkts[nb_rx]);
1682
1683                 seg_res = seg_num - 1;
1684
1685                 /* Merge remaining segments */
1686                 while (seg_res != 0 && i < (num - 1)) {
1687                         i++;
1688
1689                         rxm = rcv_pkts[i];
1690                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1691                         rxm->pkt_len = (uint32_t)(len[i]);
1692                         rxm->data_len = (uint16_t)(len[i]);
1693
1694                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1695
1696                         prev->next = rxm;
1697                         prev = rxm;
1698                         seg_res -= 1;
1699                 }
1700
1701                 if (!seg_res) {
1702                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1703                         nb_rx++;
1704                 }
1705         }
1706
1707         /* Last packet still need merge segments */
1708         while (seg_res != 0) {
1709                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1710                                         VIRTIO_MBUF_BURST_SZ);
1711
1712                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1713                         virtio_rmb(hw->weak_barriers);
1714                         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1715                                                            rcv_cnt);
1716                         uint16_t extra_idx = 0;
1717
1718                         rcv_cnt = num;
1719                         while (extra_idx < rcv_cnt) {
1720                                 rxm = rcv_pkts[extra_idx];
1721                                 rxm->data_off =
1722                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1723                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1724                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1725                                 prev->next = rxm;
1726                                 prev = rxm;
1727                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1728                                 extra_idx += 1;
1729                         };
1730                         seg_res -= rcv_cnt;
1731
1732                         if (!seg_res) {
1733                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1734                                 nb_rx++;
1735                         }
1736                 } else {
1737                         PMD_RX_LOG(ERR,
1738                                         "No enough segments for packet.");
1739                         virtio_discard_rxbuf(vq, prev);
1740                         rxvq->stats.errors++;
1741                         break;
1742                 }
1743         }
1744
1745         rxvq->stats.packets += nb_rx;
1746
1747         /* Allocate new mbuf for the used descriptor */
1748         if (likely(!virtqueue_full(vq))) {
1749                 /* free_cnt may include mrg descs */
1750                 uint16_t free_cnt = vq->vq_free_cnt;
1751                 struct rte_mbuf *new_pkts[free_cnt];
1752
1753                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1754                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1755                                         free_cnt);
1756                         if (unlikely(error)) {
1757                                 for (i = 0; i < free_cnt; i++)
1758                                         rte_pktmbuf_free(new_pkts[i]);
1759                         }
1760                         nb_enqueued += free_cnt;
1761                 } else {
1762                         struct rte_eth_dev *dev =
1763                                 &rte_eth_devices[rxvq->port_id];
1764                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1765                 }
1766         }
1767
1768         if (likely(nb_enqueued)) {
1769                 vq_update_avail_idx(vq);
1770
1771                 if (unlikely(virtqueue_kick_prepare(vq))) {
1772                         virtqueue_notify(vq);
1773                         PMD_RX_LOG(DEBUG, "Notified");
1774                 }
1775         }
1776
1777         return nb_rx;
1778 }
1779
1780 uint16_t
1781 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1782                         struct rte_mbuf **rx_pkts,
1783                         uint16_t nb_pkts)
1784 {
1785         struct virtnet_rx *rxvq = rx_queue;
1786         struct virtqueue *vq = rxvq->vq;
1787         struct virtio_hw *hw = vq->hw;
1788         struct rte_mbuf *rxm;
1789         struct rte_mbuf *prev = NULL;
1790         uint16_t num, nb_rx = 0;
1791         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1792         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1793         uint32_t nb_enqueued = 0;
1794         uint32_t seg_num = 0;
1795         uint32_t seg_res = 0;
1796         uint32_t hdr_size = hw->vtnet_hdr_size;
1797         int32_t i;
1798         int error;
1799
1800         if (unlikely(hw->started == 0))
1801                 return nb_rx;
1802
1803
1804         num = nb_pkts;
1805         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1806                 num = VIRTIO_MBUF_BURST_SZ;
1807         if (likely(num > DESC_PER_CACHELINE))
1808                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1809
1810         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1811
1812         for (i = 0; i < num; i++) {
1813                 struct virtio_net_hdr_mrg_rxbuf *header;
1814
1815                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1816                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1817
1818                 rxm = rcv_pkts[i];
1819
1820                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1821                         PMD_RX_LOG(ERR, "Packet drop");
1822                         nb_enqueued++;
1823                         virtio_discard_rxbuf(vq, rxm);
1824                         rxvq->stats.errors++;
1825                         continue;
1826                 }
1827
1828                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1829                           rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1830                 seg_num = header->num_buffers;
1831
1832                 if (seg_num == 0)
1833                         seg_num = 1;
1834
1835                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1836                 rxm->nb_segs = seg_num;
1837                 rxm->ol_flags = 0;
1838                 rxm->vlan_tci = 0;
1839                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1840                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1841
1842                 rxm->port = rxvq->port_id;
1843                 rx_pkts[nb_rx] = rxm;
1844                 prev = rxm;
1845
1846                 if (hw->has_rx_offload &&
1847                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1848                         virtio_discard_rxbuf(vq, rxm);
1849                         rxvq->stats.errors++;
1850                         continue;
1851                 }
1852
1853                 if (hw->vlan_strip)
1854                         rte_vlan_strip(rx_pkts[nb_rx]);
1855
1856                 seg_res = seg_num - 1;
1857
1858                 /* Merge remaining segments */
1859                 while (seg_res != 0 && i < (num - 1)) {
1860                         i++;
1861
1862                         rxm = rcv_pkts[i];
1863                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1864                         rxm->pkt_len = (uint32_t)(len[i]);
1865                         rxm->data_len = (uint16_t)(len[i]);
1866
1867                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1868
1869                         prev->next = rxm;
1870                         prev = rxm;
1871                         seg_res -= 1;
1872                 }
1873
1874                 if (!seg_res) {
1875                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1876                         nb_rx++;
1877                 }
1878         }
1879
1880         /* Last packet still need merge segments */
1881         while (seg_res != 0) {
1882                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1883                                         VIRTIO_MBUF_BURST_SZ);
1884                 if (likely(vq->vq_free_cnt >= rcv_cnt)) {
1885                         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1886                                         len, rcv_cnt);
1887                         uint16_t extra_idx = 0;
1888
1889                         rcv_cnt = num;
1890
1891                         while (extra_idx < rcv_cnt) {
1892                                 rxm = rcv_pkts[extra_idx];
1893
1894                                 rxm->data_off =
1895                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1896                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1897                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1898
1899                                 prev->next = rxm;
1900                                 prev = rxm;
1901                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1902                                 extra_idx += 1;
1903                         }
1904                         seg_res -= rcv_cnt;
1905                         if (!seg_res) {
1906                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1907                                 nb_rx++;
1908                         }
1909                 } else {
1910                         PMD_RX_LOG(ERR,
1911                                         "No enough segments for packet.");
1912                         virtio_discard_rxbuf(vq, prev);
1913                         rxvq->stats.errors++;
1914                         break;
1915                 }
1916         }
1917
1918         rxvq->stats.packets += nb_rx;
1919
1920         /* Allocate new mbuf for the used descriptor */
1921         if (likely(!virtqueue_full(vq))) {
1922                 /* free_cnt may include mrg descs */
1923                 uint16_t free_cnt = vq->vq_free_cnt;
1924                 struct rte_mbuf *new_pkts[free_cnt];
1925
1926                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1927                         error = virtqueue_enqueue_recv_refill_packed(vq,
1928                                         new_pkts, free_cnt);
1929                         if (unlikely(error)) {
1930                                 for (i = 0; i < free_cnt; i++)
1931                                         rte_pktmbuf_free(new_pkts[i]);
1932                         }
1933                         nb_enqueued += free_cnt;
1934                 } else {
1935                         struct rte_eth_dev *dev =
1936                                 &rte_eth_devices[rxvq->port_id];
1937                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1938                 }
1939         }
1940
1941         if (likely(nb_enqueued)) {
1942                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1943                         virtqueue_notify(vq);
1944                         PMD_RX_LOG(DEBUG, "Notified");
1945                 }
1946         }
1947
1948         return nb_rx;
1949 }
1950
1951 uint16_t
1952 virtio_xmit_pkts_prepare(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts,
1953                         uint16_t nb_pkts)
1954 {
1955         uint16_t nb_tx;
1956         int error;
1957
1958         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1959                 struct rte_mbuf *m = tx_pkts[nb_tx];
1960
1961 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1962                 error = rte_validate_tx_offload(m);
1963                 if (unlikely(error)) {
1964                         rte_errno = -error;
1965                         break;
1966                 }
1967 #endif
1968
1969                 error = rte_net_intel_cksum_prepare(m);
1970                 if (unlikely(error)) {
1971                         rte_errno = -error;
1972                         break;
1973                 }
1974
1975                 if (m->ol_flags & PKT_TX_TCP_SEG)
1976                         virtio_tso_fix_cksum(m);
1977         }
1978
1979         return nb_tx;
1980 }
1981
1982 uint16_t
1983 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1984                         uint16_t nb_pkts)
1985 {
1986         struct virtnet_tx *txvq = tx_queue;
1987         struct virtqueue *vq = txvq->vq;
1988         struct virtio_hw *hw = vq->hw;
1989         uint16_t hdr_size = hw->vtnet_hdr_size;
1990         uint16_t nb_tx = 0;
1991         bool in_order = hw->use_inorder_tx;
1992         int error;
1993
1994         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1995                 return nb_tx;
1996
1997         if (unlikely(nb_pkts < 1))
1998                 return nb_pkts;
1999
2000         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2001
2002         if (nb_pkts > vq->vq_free_cnt)
2003                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
2004                                            in_order);
2005
2006         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2007                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2008                 int can_push = 0, slots, need;
2009
2010                 /* Do VLAN tag insertion */
2011                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2012                         error = rte_vlan_insert(&txm);
2013                         if (unlikely(error)) {
2014                                 rte_pktmbuf_free(txm);
2015                                 continue;
2016                         }
2017                         /* vlan_insert may add a header mbuf */
2018                         tx_pkts[nb_tx] = txm;
2019                 }
2020
2021                 /* optimize ring usage */
2022                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2023                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2024                     rte_mbuf_refcnt_read(txm) == 1 &&
2025                     RTE_MBUF_DIRECT(txm) &&
2026                     txm->nb_segs == 1 &&
2027                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2028                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2029                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2030                         can_push = 1;
2031
2032                 /* How many main ring entries are needed to this Tx?
2033                  * any_layout => number of segments
2034                  * default    => number of segments + 1
2035                  */
2036                 slots = txm->nb_segs + !can_push;
2037                 need = slots - vq->vq_free_cnt;
2038
2039                 /* Positive value indicates it need free vring descriptors */
2040                 if (unlikely(need > 0)) {
2041                         virtio_xmit_cleanup_packed(vq, need, in_order);
2042                         need = slots - vq->vq_free_cnt;
2043                         if (unlikely(need > 0)) {
2044                                 PMD_TX_LOG(ERR,
2045                                            "No free tx descriptors to transmit");
2046                                 break;
2047                         }
2048                 }
2049
2050                 /* Enqueue Packet buffers */
2051                 if (can_push)
2052                         virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
2053                 else
2054                         virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
2055                                                       in_order);
2056
2057                 virtio_update_packet_stats(&txvq->stats, txm);
2058         }
2059
2060         txvq->stats.packets += nb_tx;
2061
2062         if (likely(nb_tx)) {
2063                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
2064                         virtqueue_notify(vq);
2065                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2066                 }
2067         }
2068
2069         return nb_tx;
2070 }
2071
2072 uint16_t
2073 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2074 {
2075         struct virtnet_tx *txvq = tx_queue;
2076         struct virtqueue *vq = txvq->vq;
2077         struct virtio_hw *hw = vq->hw;
2078         uint16_t hdr_size = hw->vtnet_hdr_size;
2079         uint16_t nb_used, nb_tx = 0;
2080         int error;
2081
2082         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2083                 return nb_tx;
2084
2085         if (unlikely(nb_pkts < 1))
2086                 return nb_pkts;
2087
2088         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2089         nb_used = VIRTQUEUE_NUSED(vq);
2090
2091         virtio_rmb(hw->weak_barriers);
2092         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2093                 virtio_xmit_cleanup(vq, nb_used);
2094
2095         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2096                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2097                 int can_push = 0, use_indirect = 0, slots, need;
2098
2099                 /* Do VLAN tag insertion */
2100                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2101                         error = rte_vlan_insert(&txm);
2102                         if (unlikely(error)) {
2103                                 rte_pktmbuf_free(txm);
2104                                 continue;
2105                         }
2106                         /* vlan_insert may add a header mbuf */
2107                         tx_pkts[nb_tx] = txm;
2108                 }
2109
2110                 /* optimize ring usage */
2111                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2112                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2113                     rte_mbuf_refcnt_read(txm) == 1 &&
2114                     RTE_MBUF_DIRECT(txm) &&
2115                     txm->nb_segs == 1 &&
2116                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2117                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2118                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2119                         can_push = 1;
2120                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2121                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2122                         use_indirect = 1;
2123
2124                 /* How many main ring entries are needed to this Tx?
2125                  * any_layout => number of segments
2126                  * indirect   => 1
2127                  * default    => number of segments + 1
2128                  */
2129                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2130                 need = slots - vq->vq_free_cnt;
2131
2132                 /* Positive value indicates it need free vring descriptors */
2133                 if (unlikely(need > 0)) {
2134                         nb_used = VIRTQUEUE_NUSED(vq);
2135                         virtio_rmb(hw->weak_barriers);
2136                         need = RTE_MIN(need, (int)nb_used);
2137
2138                         virtio_xmit_cleanup(vq, need);
2139                         need = slots - vq->vq_free_cnt;
2140                         if (unlikely(need > 0)) {
2141                                 PMD_TX_LOG(ERR,
2142                                            "No free tx descriptors to transmit");
2143                                 break;
2144                         }
2145                 }
2146
2147                 /* Enqueue Packet buffers */
2148                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2149                         can_push, 0);
2150
2151                 virtio_update_packet_stats(&txvq->stats, txm);
2152         }
2153
2154         txvq->stats.packets += nb_tx;
2155
2156         if (likely(nb_tx)) {
2157                 vq_update_avail_idx(vq);
2158
2159                 if (unlikely(virtqueue_kick_prepare(vq))) {
2160                         virtqueue_notify(vq);
2161                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2162                 }
2163         }
2164
2165         return nb_tx;
2166 }
2167
2168 uint16_t
2169 virtio_xmit_pkts_inorder(void *tx_queue,
2170                         struct rte_mbuf **tx_pkts,
2171                         uint16_t nb_pkts)
2172 {
2173         struct virtnet_tx *txvq = tx_queue;
2174         struct virtqueue *vq = txvq->vq;
2175         struct virtio_hw *hw = vq->hw;
2176         uint16_t hdr_size = hw->vtnet_hdr_size;
2177         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
2178         struct rte_mbuf *inorder_pkts[nb_pkts];
2179         int error;
2180
2181         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2182                 return nb_tx;
2183
2184         if (unlikely(nb_pkts < 1))
2185                 return nb_pkts;
2186
2187         VIRTQUEUE_DUMP(vq);
2188         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2189         nb_used = VIRTQUEUE_NUSED(vq);
2190
2191         virtio_rmb(hw->weak_barriers);
2192         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2193                 virtio_xmit_cleanup_inorder(vq, nb_used);
2194
2195         if (unlikely(!vq->vq_free_cnt))
2196                 virtio_xmit_cleanup_inorder(vq, nb_used);
2197
2198         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
2199
2200         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
2201                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2202                 int slots, need;
2203
2204                 /* Do VLAN tag insertion */
2205                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2206                         error = rte_vlan_insert(&txm);
2207                         if (unlikely(error)) {
2208                                 rte_pktmbuf_free(txm);
2209                                 continue;
2210                         }
2211                         /* vlan_insert may add a header mbuf */
2212                         tx_pkts[nb_tx] = txm;
2213                 }
2214
2215                 /* optimize ring usage */
2216                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2217                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2218                      rte_mbuf_refcnt_read(txm) == 1 &&
2219                      RTE_MBUF_DIRECT(txm) &&
2220                      txm->nb_segs == 1 &&
2221                      rte_pktmbuf_headroom(txm) >= hdr_size &&
2222                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2223                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2224                         inorder_pkts[nb_inorder_pkts] = txm;
2225                         nb_inorder_pkts++;
2226
2227                         virtio_update_packet_stats(&txvq->stats, txm);
2228                         continue;
2229                 }
2230
2231                 if (nb_inorder_pkts) {
2232                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2233                                                         nb_inorder_pkts);
2234                         nb_inorder_pkts = 0;
2235                 }
2236
2237                 slots = txm->nb_segs + 1;
2238                 need = slots - vq->vq_free_cnt;
2239                 if (unlikely(need > 0)) {
2240                         nb_used = VIRTQUEUE_NUSED(vq);
2241                         virtio_rmb(hw->weak_barriers);
2242                         need = RTE_MIN(need, (int)nb_used);
2243
2244                         virtio_xmit_cleanup_inorder(vq, need);
2245
2246                         need = slots - vq->vq_free_cnt;
2247
2248                         if (unlikely(need > 0)) {
2249                                 PMD_TX_LOG(ERR,
2250                                         "No free tx descriptors to transmit");
2251                                 break;
2252                         }
2253                 }
2254                 /* Enqueue Packet buffers */
2255                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2256
2257                 virtio_update_packet_stats(&txvq->stats, txm);
2258         }
2259
2260         /* Transmit all inorder packets */
2261         if (nb_inorder_pkts)
2262                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2263                                                 nb_inorder_pkts);
2264
2265         txvq->stats.packets += nb_tx;
2266
2267         if (likely(nb_tx)) {
2268                 vq_update_avail_idx(vq);
2269
2270                 if (unlikely(virtqueue_kick_prepare(vq))) {
2271                         virtqueue_notify(vq);
2272                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2273                 }
2274         }
2275
2276         VIRTQUEUE_DUMP(vq);
2277
2278         return nb_tx;
2279 }