net: add rte prefix to IP structure
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34 #include "virtio_ring.h"
35
36 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
37 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
38 #else
39 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
40 #endif
41
42 int
43 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
44 {
45         struct virtnet_rx *rxvq = rxq;
46         struct virtqueue *vq = rxvq->vq;
47
48         return VIRTQUEUE_NUSED(vq) >= offset;
49 }
50
51 void
52 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
53 {
54         vq->vq_free_cnt += num;
55         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
56 }
57
58 void
59 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
60 {
61         struct vring_desc *dp, *dp_tail;
62         struct vq_desc_extra *dxp;
63         uint16_t desc_idx_last = desc_idx;
64
65         dp  = &vq->vq_split.ring.desc[desc_idx];
66         dxp = &vq->vq_descx[desc_idx];
67         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
68         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
69                 while (dp->flags & VRING_DESC_F_NEXT) {
70                         desc_idx_last = dp->next;
71                         dp = &vq->vq_split.ring.desc[dp->next];
72                 }
73         }
74         dxp->ndescs = 0;
75
76         /*
77          * We must append the existing free chain, if any, to the end of
78          * newly freed chain. If the virtqueue was completely used, then
79          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
80          */
81         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
82                 vq->vq_desc_head_idx = desc_idx;
83         } else {
84                 dp_tail = &vq->vq_split.ring.desc[vq->vq_desc_tail_idx];
85                 dp_tail->next = desc_idx;
86         }
87
88         vq->vq_desc_tail_idx = desc_idx_last;
89         dp->next = VQ_RING_DESC_CHAIN_END;
90 }
91
92 static void
93 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
94 {
95         struct vq_desc_extra *dxp;
96
97         dxp = &vq->vq_descx[id];
98         vq->vq_free_cnt += dxp->ndescs;
99
100         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
101                 vq->vq_desc_head_idx = id;
102         else
103                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
104
105         vq->vq_desc_tail_idx = id;
106         dxp->next = VQ_RING_DESC_CHAIN_END;
107 }
108
109 static uint16_t
110 virtqueue_dequeue_burst_rx_packed(struct virtqueue *vq,
111                                   struct rte_mbuf **rx_pkts,
112                                   uint32_t *len,
113                                   uint16_t num)
114 {
115         struct rte_mbuf *cookie;
116         uint16_t used_idx;
117         uint16_t id;
118         struct vring_packed_desc *desc;
119         uint16_t i;
120
121         desc = vq->vq_packed.ring.desc;
122
123         for (i = 0; i < num; i++) {
124                 used_idx = vq->vq_used_cons_idx;
125                 if (!desc_is_used(&desc[used_idx], vq))
126                         return i;
127                 virtio_rmb(vq->hw->weak_barriers);
128                 len[i] = desc[used_idx].len;
129                 id = desc[used_idx].id;
130                 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
131                 if (unlikely(cookie == NULL)) {
132                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
133                                 vq->vq_used_cons_idx);
134                         break;
135                 }
136                 rte_prefetch0(cookie);
137                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
138                 rx_pkts[i] = cookie;
139
140                 vq->vq_free_cnt++;
141                 vq->vq_used_cons_idx++;
142                 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
143                         vq->vq_used_cons_idx -= vq->vq_nentries;
144                         vq->vq_packed.used_wrap_counter ^= 1;
145                 }
146         }
147
148         return i;
149 }
150
151 static uint16_t
152 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
153                            uint32_t *len, uint16_t num)
154 {
155         struct vring_used_elem *uep;
156         struct rte_mbuf *cookie;
157         uint16_t used_idx, desc_idx;
158         uint16_t i;
159
160         /*  Caller does the check */
161         for (i = 0; i < num ; i++) {
162                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
163                 uep = &vq->vq_split.ring.used->ring[used_idx];
164                 desc_idx = (uint16_t) uep->id;
165                 len[i] = uep->len;
166                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
167
168                 if (unlikely(cookie == NULL)) {
169                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
170                                 vq->vq_used_cons_idx);
171                         break;
172                 }
173
174                 rte_prefetch0(cookie);
175                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
176                 rx_pkts[i]  = cookie;
177                 vq->vq_used_cons_idx++;
178                 vq_ring_free_chain(vq, desc_idx);
179                 vq->vq_descx[desc_idx].cookie = NULL;
180         }
181
182         return i;
183 }
184
185 static uint16_t
186 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
187                         struct rte_mbuf **rx_pkts,
188                         uint32_t *len,
189                         uint16_t num)
190 {
191         struct vring_used_elem *uep;
192         struct rte_mbuf *cookie;
193         uint16_t used_idx = 0;
194         uint16_t i;
195
196         if (unlikely(num == 0))
197                 return 0;
198
199         for (i = 0; i < num; i++) {
200                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
201                 /* Desc idx same as used idx */
202                 uep = &vq->vq_split.ring.used->ring[used_idx];
203                 len[i] = uep->len;
204                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
205
206                 if (unlikely(cookie == NULL)) {
207                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
208                                 vq->vq_used_cons_idx);
209                         break;
210                 }
211
212                 rte_prefetch0(cookie);
213                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
214                 rx_pkts[i]  = cookie;
215                 vq->vq_used_cons_idx++;
216                 vq->vq_descx[used_idx].cookie = NULL;
217         }
218
219         vq_ring_free_inorder(vq, used_idx, i);
220         return i;
221 }
222
223 #ifndef DEFAULT_TX_FREE_THRESH
224 #define DEFAULT_TX_FREE_THRESH 32
225 #endif
226
227 static void
228 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
229 {
230         uint16_t used_idx, id, curr_id, free_cnt = 0;
231         uint16_t size = vq->vq_nentries;
232         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
233         struct vq_desc_extra *dxp;
234
235         used_idx = vq->vq_used_cons_idx;
236         while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
237                 virtio_rmb(vq->hw->weak_barriers);
238                 id = desc[used_idx].id;
239                 do {
240                         curr_id = used_idx;
241                         dxp = &vq->vq_descx[used_idx];
242                         used_idx += dxp->ndescs;
243                         free_cnt += dxp->ndescs;
244                         num -= dxp->ndescs;
245                         if (used_idx >= size) {
246                                 used_idx -= size;
247                                 vq->vq_packed.used_wrap_counter ^= 1;
248                         }
249                         if (dxp->cookie != NULL) {
250                                 rte_pktmbuf_free(dxp->cookie);
251                                 dxp->cookie = NULL;
252                         }
253                 } while (curr_id != id);
254         }
255         vq->vq_used_cons_idx = used_idx;
256         vq->vq_free_cnt += free_cnt;
257 }
258
259 static void
260 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
261 {
262         uint16_t used_idx, id;
263         uint16_t size = vq->vq_nentries;
264         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
265         struct vq_desc_extra *dxp;
266
267         used_idx = vq->vq_used_cons_idx;
268         while (num-- && desc_is_used(&desc[used_idx], vq)) {
269                 virtio_rmb(vq->hw->weak_barriers);
270                 id = desc[used_idx].id;
271                 dxp = &vq->vq_descx[id];
272                 vq->vq_used_cons_idx += dxp->ndescs;
273                 if (vq->vq_used_cons_idx >= size) {
274                         vq->vq_used_cons_idx -= size;
275                         vq->vq_packed.used_wrap_counter ^= 1;
276                 }
277                 vq_ring_free_id_packed(vq, id);
278                 if (dxp->cookie != NULL) {
279                         rte_pktmbuf_free(dxp->cookie);
280                         dxp->cookie = NULL;
281                 }
282                 used_idx = vq->vq_used_cons_idx;
283         }
284 }
285
286 /* Cleanup from completed transmits. */
287 static inline void
288 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
289 {
290         if (in_order)
291                 virtio_xmit_cleanup_inorder_packed(vq, num);
292         else
293                 virtio_xmit_cleanup_normal_packed(vq, num);
294 }
295
296 static void
297 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
298 {
299         uint16_t i, used_idx, desc_idx;
300         for (i = 0; i < num; i++) {
301                 struct vring_used_elem *uep;
302                 struct vq_desc_extra *dxp;
303
304                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
305                 uep = &vq->vq_split.ring.used->ring[used_idx];
306
307                 desc_idx = (uint16_t) uep->id;
308                 dxp = &vq->vq_descx[desc_idx];
309                 vq->vq_used_cons_idx++;
310                 vq_ring_free_chain(vq, desc_idx);
311
312                 if (dxp->cookie != NULL) {
313                         rte_pktmbuf_free(dxp->cookie);
314                         dxp->cookie = NULL;
315                 }
316         }
317 }
318
319 /* Cleanup from completed inorder transmits. */
320 static void
321 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
322 {
323         uint16_t i, idx = vq->vq_used_cons_idx;
324         int16_t free_cnt = 0;
325         struct vq_desc_extra *dxp = NULL;
326
327         if (unlikely(num == 0))
328                 return;
329
330         for (i = 0; i < num; i++) {
331                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
332                 free_cnt += dxp->ndescs;
333                 if (dxp->cookie != NULL) {
334                         rte_pktmbuf_free(dxp->cookie);
335                         dxp->cookie = NULL;
336                 }
337         }
338
339         vq->vq_free_cnt += free_cnt;
340         vq->vq_used_cons_idx = idx;
341 }
342
343 static inline int
344 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
345                         struct rte_mbuf **cookies,
346                         uint16_t num)
347 {
348         struct vq_desc_extra *dxp;
349         struct virtio_hw *hw = vq->hw;
350         struct vring_desc *start_dp;
351         uint16_t head_idx, idx, i = 0;
352
353         if (unlikely(vq->vq_free_cnt == 0))
354                 return -ENOSPC;
355         if (unlikely(vq->vq_free_cnt < num))
356                 return -EMSGSIZE;
357
358         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
359         start_dp = vq->vq_split.ring.desc;
360
361         while (i < num) {
362                 idx = head_idx & (vq->vq_nentries - 1);
363                 dxp = &vq->vq_descx[idx];
364                 dxp->cookie = (void *)cookies[i];
365                 dxp->ndescs = 1;
366
367                 start_dp[idx].addr =
368                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
369                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
370                 start_dp[idx].len =
371                                 cookies[i]->buf_len -
372                                 RTE_PKTMBUF_HEADROOM +
373                                 hw->vtnet_hdr_size;
374                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
375
376                 vq_update_avail_ring(vq, idx);
377                 head_idx++;
378                 i++;
379         }
380
381         vq->vq_desc_head_idx += num;
382         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
383         return 0;
384 }
385
386 static inline int
387 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie,
388                                 uint16_t num)
389 {
390         struct vq_desc_extra *dxp;
391         struct virtio_hw *hw = vq->hw;
392         struct vring_desc *start_dp = vq->vq_split.ring.desc;
393         uint16_t idx, i;
394
395         if (unlikely(vq->vq_free_cnt == 0))
396                 return -ENOSPC;
397         if (unlikely(vq->vq_free_cnt < num))
398                 return -EMSGSIZE;
399
400         if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries))
401                 return -EFAULT;
402
403         for (i = 0; i < num; i++) {
404                 idx = vq->vq_desc_head_idx;
405                 dxp = &vq->vq_descx[idx];
406                 dxp->cookie = (void *)cookie[i];
407                 dxp->ndescs = 1;
408
409                 start_dp[idx].addr =
410                         VIRTIO_MBUF_ADDR(cookie[i], vq) +
411                         RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
412                 start_dp[idx].len =
413                         cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +
414                         hw->vtnet_hdr_size;
415                 start_dp[idx].flags = VRING_DESC_F_WRITE;
416                 vq->vq_desc_head_idx = start_dp[idx].next;
417                 vq_update_avail_ring(vq, idx);
418                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) {
419                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
420                         break;
421                 }
422         }
423
424         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
425
426         return 0;
427 }
428
429 static inline int
430 virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
431                                      struct rte_mbuf **cookie, uint16_t num)
432 {
433         struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
434         uint16_t flags = vq->vq_packed.cached_flags;
435         struct virtio_hw *hw = vq->hw;
436         struct vq_desc_extra *dxp;
437         uint16_t idx;
438         int i;
439
440         if (unlikely(vq->vq_free_cnt == 0))
441                 return -ENOSPC;
442         if (unlikely(vq->vq_free_cnt < num))
443                 return -EMSGSIZE;
444
445         for (i = 0; i < num; i++) {
446                 idx = vq->vq_avail_idx;
447                 dxp = &vq->vq_descx[idx];
448                 dxp->cookie = (void *)cookie[i];
449                 dxp->ndescs = 1;
450
451                 start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) +
452                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
453                 start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM
454                                         + hw->vtnet_hdr_size;
455
456                 vq->vq_desc_head_idx = dxp->next;
457                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
458                         vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
459                 virtio_wmb(hw->weak_barriers);
460                 start_dp[idx].flags = flags;
461                 if (++vq->vq_avail_idx >= vq->vq_nentries) {
462                         vq->vq_avail_idx -= vq->vq_nentries;
463                         vq->vq_packed.cached_flags ^=
464                                 VRING_PACKED_DESC_F_AVAIL_USED;
465                         flags = vq->vq_packed.cached_flags;
466                 }
467         }
468         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
469         return 0;
470 }
471
472 /* When doing TSO, the IP length is not included in the pseudo header
473  * checksum of the packet given to the PMD, but for virtio it is
474  * expected.
475  */
476 static void
477 virtio_tso_fix_cksum(struct rte_mbuf *m)
478 {
479         /* common case: header is not fragmented */
480         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
481                         m->l4_len)) {
482                 struct rte_ipv4_hdr *iph;
483                 struct rte_ipv6_hdr *ip6h;
484                 struct tcp_hdr *th;
485                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
486                 uint32_t tmp;
487
488                 iph = rte_pktmbuf_mtod_offset(m,
489                                         struct rte_ipv4_hdr *, m->l2_len);
490                 th = RTE_PTR_ADD(iph, m->l3_len);
491                 if ((iph->version_ihl >> 4) == 4) {
492                         iph->hdr_checksum = 0;
493                         iph->hdr_checksum = rte_ipv4_cksum(iph);
494                         ip_len = iph->total_length;
495                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
496                                 m->l3_len);
497                 } else {
498                         ip6h = (struct rte_ipv6_hdr *)iph;
499                         ip_paylen = ip6h->payload_len;
500                 }
501
502                 /* calculate the new phdr checksum not including ip_paylen */
503                 prev_cksum = th->cksum;
504                 tmp = prev_cksum;
505                 tmp += ip_paylen;
506                 tmp = (tmp & 0xffff) + (tmp >> 16);
507                 new_cksum = tmp;
508
509                 /* replace it in the packet */
510                 th->cksum = new_cksum;
511         }
512 }
513
514
515 /* avoid write operation when necessary, to lessen cache issues */
516 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
517         if ((var) != (val))                     \
518                 (var) = (val);                  \
519 } while (0)
520
521 #define virtqueue_clear_net_hdr(_hdr) do {              \
522         ASSIGN_UNLESS_EQUAL((_hdr)->csum_start, 0);     \
523         ASSIGN_UNLESS_EQUAL((_hdr)->csum_offset, 0);    \
524         ASSIGN_UNLESS_EQUAL((_hdr)->flags, 0);          \
525         ASSIGN_UNLESS_EQUAL((_hdr)->gso_type, 0);       \
526         ASSIGN_UNLESS_EQUAL((_hdr)->gso_size, 0);       \
527         ASSIGN_UNLESS_EQUAL((_hdr)->hdr_len, 0);        \
528 } while (0)
529
530 static inline void
531 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
532                         struct rte_mbuf *cookie,
533                         bool offload)
534 {
535         if (offload) {
536                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
537                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
538
539                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
540                 case PKT_TX_UDP_CKSUM:
541                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
542                         hdr->csum_offset = offsetof(struct udp_hdr,
543                                 dgram_cksum);
544                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
545                         break;
546
547                 case PKT_TX_TCP_CKSUM:
548                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
549                         hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
550                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
551                         break;
552
553                 default:
554                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
555                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
556                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
557                         break;
558                 }
559
560                 /* TCP Segmentation Offload */
561                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
562                         virtio_tso_fix_cksum(cookie);
563                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
564                                 VIRTIO_NET_HDR_GSO_TCPV6 :
565                                 VIRTIO_NET_HDR_GSO_TCPV4;
566                         hdr->gso_size = cookie->tso_segsz;
567                         hdr->hdr_len =
568                                 cookie->l2_len +
569                                 cookie->l3_len +
570                                 cookie->l4_len;
571                 } else {
572                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
573                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
574                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
575                 }
576         }
577 }
578
579 static inline void
580 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
581                         struct rte_mbuf **cookies,
582                         uint16_t num)
583 {
584         struct vq_desc_extra *dxp;
585         struct virtqueue *vq = txvq->vq;
586         struct vring_desc *start_dp;
587         struct virtio_net_hdr *hdr;
588         uint16_t idx;
589         uint16_t head_size = vq->hw->vtnet_hdr_size;
590         uint16_t i = 0;
591
592         idx = vq->vq_desc_head_idx;
593         start_dp = vq->vq_split.ring.desc;
594
595         while (i < num) {
596                 idx = idx & (vq->vq_nentries - 1);
597                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
598                 dxp->cookie = (void *)cookies[i];
599                 dxp->ndescs = 1;
600
601                 hdr = (struct virtio_net_hdr *)
602                         rte_pktmbuf_prepend(cookies[i], head_size);
603                 cookies[i]->pkt_len -= head_size;
604
605                 /* if offload disabled, hdr is not zeroed yet, do it now */
606                 if (!vq->hw->has_tx_offload)
607                         virtqueue_clear_net_hdr(hdr);
608                 else
609                         virtqueue_xmit_offload(hdr, cookies[i], true);
610
611                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
612                 start_dp[idx].len   = cookies[i]->data_len;
613                 start_dp[idx].flags = 0;
614
615                 vq_update_avail_ring(vq, idx);
616
617                 idx++;
618                 i++;
619         };
620
621         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
622         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
623 }
624
625 static inline void
626 virtqueue_enqueue_xmit_packed_fast(struct virtnet_tx *txvq,
627                                    struct rte_mbuf *cookie,
628                                    int in_order)
629 {
630         struct virtqueue *vq = txvq->vq;
631         struct vring_packed_desc *dp;
632         struct vq_desc_extra *dxp;
633         uint16_t idx, id, flags;
634         uint16_t head_size = vq->hw->vtnet_hdr_size;
635         struct virtio_net_hdr *hdr;
636
637         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
638         idx = vq->vq_avail_idx;
639         dp = &vq->vq_packed.ring.desc[idx];
640
641         dxp = &vq->vq_descx[id];
642         dxp->ndescs = 1;
643         dxp->cookie = cookie;
644
645         flags = vq->vq_packed.cached_flags;
646
647         /* prepend cannot fail, checked by caller */
648         hdr = (struct virtio_net_hdr *)
649                 rte_pktmbuf_prepend(cookie, head_size);
650         cookie->pkt_len -= head_size;
651
652         /* if offload disabled, hdr is not zeroed yet, do it now */
653         if (!vq->hw->has_tx_offload)
654                 virtqueue_clear_net_hdr(hdr);
655         else
656                 virtqueue_xmit_offload(hdr, cookie, true);
657
658         dp->addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
659         dp->len  = cookie->data_len;
660         dp->id   = id;
661
662         if (++vq->vq_avail_idx >= vq->vq_nentries) {
663                 vq->vq_avail_idx -= vq->vq_nentries;
664                 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
665         }
666
667         vq->vq_free_cnt--;
668
669         if (!in_order) {
670                 vq->vq_desc_head_idx = dxp->next;
671                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
672                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
673         }
674
675         virtio_wmb(vq->hw->weak_barriers);
676         dp->flags = flags;
677 }
678
679 static inline void
680 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
681                               uint16_t needed, int can_push, int in_order)
682 {
683         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
684         struct vq_desc_extra *dxp;
685         struct virtqueue *vq = txvq->vq;
686         struct vring_packed_desc *start_dp, *head_dp;
687         uint16_t idx, id, head_idx, head_flags;
688         uint16_t head_size = vq->hw->vtnet_hdr_size;
689         struct virtio_net_hdr *hdr;
690         uint16_t prev;
691
692         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
693
694         dxp = &vq->vq_descx[id];
695         dxp->ndescs = needed;
696         dxp->cookie = cookie;
697
698         head_idx = vq->vq_avail_idx;
699         idx = head_idx;
700         prev = head_idx;
701         start_dp = vq->vq_packed.ring.desc;
702
703         head_dp = &vq->vq_packed.ring.desc[idx];
704         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
705         head_flags |= vq->vq_packed.cached_flags;
706
707         if (can_push) {
708                 /* prepend cannot fail, checked by caller */
709                 hdr = (struct virtio_net_hdr *)
710                         rte_pktmbuf_prepend(cookie, head_size);
711                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
712                  * which is wrong. Below subtract restores correct pkt size.
713                  */
714                 cookie->pkt_len -= head_size;
715
716                 /* if offload disabled, it is not zeroed below, do it now */
717                 if (!vq->hw->has_tx_offload)
718                         virtqueue_clear_net_hdr(hdr);
719         } else {
720                 /* setup first tx ring slot to point to header
721                  * stored in reserved region.
722                  */
723                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
724                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
725                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
726                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
727                 idx++;
728                 if (idx >= vq->vq_nentries) {
729                         idx -= vq->vq_nentries;
730                         vq->vq_packed.cached_flags ^=
731                                 VRING_PACKED_DESC_F_AVAIL_USED;
732                 }
733         }
734
735         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
736
737         do {
738                 uint16_t flags;
739
740                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
741                 start_dp[idx].len  = cookie->data_len;
742                 if (likely(idx != head_idx)) {
743                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
744                         flags |= vq->vq_packed.cached_flags;
745                         start_dp[idx].flags = flags;
746                 }
747                 prev = idx;
748                 idx++;
749                 if (idx >= vq->vq_nentries) {
750                         idx -= vq->vq_nentries;
751                         vq->vq_packed.cached_flags ^=
752                                 VRING_PACKED_DESC_F_AVAIL_USED;
753                 }
754         } while ((cookie = cookie->next) != NULL);
755
756         start_dp[prev].id = id;
757
758         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
759         vq->vq_avail_idx = idx;
760
761         if (!in_order) {
762                 vq->vq_desc_head_idx = dxp->next;
763                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
764                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
765         }
766
767         virtio_wmb(vq->hw->weak_barriers);
768         head_dp->flags = head_flags;
769 }
770
771 static inline void
772 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
773                         uint16_t needed, int use_indirect, int can_push,
774                         int in_order)
775 {
776         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
777         struct vq_desc_extra *dxp;
778         struct virtqueue *vq = txvq->vq;
779         struct vring_desc *start_dp;
780         uint16_t seg_num = cookie->nb_segs;
781         uint16_t head_idx, idx;
782         uint16_t head_size = vq->hw->vtnet_hdr_size;
783         struct virtio_net_hdr *hdr;
784
785         head_idx = vq->vq_desc_head_idx;
786         idx = head_idx;
787         if (in_order)
788                 dxp = &vq->vq_descx[vq->vq_avail_idx & (vq->vq_nentries - 1)];
789         else
790                 dxp = &vq->vq_descx[idx];
791         dxp->cookie = (void *)cookie;
792         dxp->ndescs = needed;
793
794         start_dp = vq->vq_split.ring.desc;
795
796         if (can_push) {
797                 /* prepend cannot fail, checked by caller */
798                 hdr = (struct virtio_net_hdr *)
799                         rte_pktmbuf_prepend(cookie, head_size);
800                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
801                  * which is wrong. Below subtract restores correct pkt size.
802                  */
803                 cookie->pkt_len -= head_size;
804
805                 /* if offload disabled, it is not zeroed below, do it now */
806                 if (!vq->hw->has_tx_offload)
807                         virtqueue_clear_net_hdr(hdr);
808         } else if (use_indirect) {
809                 /* setup tx ring slot to point to indirect
810                  * descriptor list stored in reserved region.
811                  *
812                  * the first slot in indirect ring is already preset
813                  * to point to the header in reserved region
814                  */
815                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
816                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
817                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
818                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
819                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
820
821                 /* loop below will fill in rest of the indirect elements */
822                 start_dp = txr[idx].tx_indir;
823                 idx = 1;
824         } else {
825                 /* setup first tx ring slot to point to header
826                  * stored in reserved region.
827                  */
828                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
829                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
830                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
831                 start_dp[idx].flags = VRING_DESC_F_NEXT;
832                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
833
834                 idx = start_dp[idx].next;
835         }
836
837         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
838
839         do {
840                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
841                 start_dp[idx].len   = cookie->data_len;
842                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
843                 idx = start_dp[idx].next;
844         } while ((cookie = cookie->next) != NULL);
845
846         if (use_indirect)
847                 idx = vq->vq_split.ring.desc[head_idx].next;
848
849         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
850
851         vq->vq_desc_head_idx = idx;
852         vq_update_avail_ring(vq, head_idx);
853
854         if (!in_order) {
855                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
856                         vq->vq_desc_tail_idx = idx;
857         }
858 }
859
860 void
861 virtio_dev_cq_start(struct rte_eth_dev *dev)
862 {
863         struct virtio_hw *hw = dev->data->dev_private;
864
865         if (hw->cvq && hw->cvq->vq) {
866                 rte_spinlock_init(&hw->cvq->lock);
867                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
868         }
869 }
870
871 int
872 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
873                         uint16_t queue_idx,
874                         uint16_t nb_desc,
875                         unsigned int socket_id __rte_unused,
876                         const struct rte_eth_rxconf *rx_conf __rte_unused,
877                         struct rte_mempool *mp)
878 {
879         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
880         struct virtio_hw *hw = dev->data->dev_private;
881         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
882         struct virtnet_rx *rxvq;
883
884         PMD_INIT_FUNC_TRACE();
885
886         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
887                 nb_desc = vq->vq_nentries;
888         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
889
890         rxvq = &vq->rxq;
891         rxvq->queue_id = queue_idx;
892         rxvq->mpool = mp;
893         if (rxvq->mpool == NULL) {
894                 rte_exit(EXIT_FAILURE,
895                         "Cannot allocate mbufs for rx virtqueue");
896         }
897
898         dev->data->rx_queues[queue_idx] = rxvq;
899
900         return 0;
901 }
902
903 int
904 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
905 {
906         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
907         struct virtio_hw *hw = dev->data->dev_private;
908         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
909         struct virtnet_rx *rxvq = &vq->rxq;
910         struct rte_mbuf *m;
911         uint16_t desc_idx;
912         int error, nbufs, i;
913
914         PMD_INIT_FUNC_TRACE();
915
916         /* Allocate blank mbufs for the each rx descriptor */
917         nbufs = 0;
918
919         if (hw->use_simple_rx) {
920                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
921                      desc_idx++) {
922                         vq->vq_split.ring.avail->ring[desc_idx] = desc_idx;
923                         vq->vq_split.ring.desc[desc_idx].flags =
924                                 VRING_DESC_F_WRITE;
925                 }
926
927                 virtio_rxq_vec_setup(rxvq);
928         }
929
930         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
931         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
932              desc_idx++) {
933                 vq->sw_ring[vq->vq_nentries + desc_idx] =
934                         &rxvq->fake_mbuf;
935         }
936
937         if (hw->use_simple_rx) {
938                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
939                         virtio_rxq_rearm_vec(rxvq);
940                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
941                 }
942         } else if (hw->use_inorder_rx) {
943                 if ((!virtqueue_full(vq))) {
944                         uint16_t free_cnt = vq->vq_free_cnt;
945                         struct rte_mbuf *pkts[free_cnt];
946
947                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
948                                 free_cnt)) {
949                                 error = virtqueue_enqueue_refill_inorder(vq,
950                                                 pkts,
951                                                 free_cnt);
952                                 if (unlikely(error)) {
953                                         for (i = 0; i < free_cnt; i++)
954                                                 rte_pktmbuf_free(pkts[i]);
955                                 }
956                         }
957
958                         nbufs += free_cnt;
959                         vq_update_avail_idx(vq);
960                 }
961         } else {
962                 while (!virtqueue_full(vq)) {
963                         m = rte_mbuf_raw_alloc(rxvq->mpool);
964                         if (m == NULL)
965                                 break;
966
967                         /* Enqueue allocated buffers */
968                         if (vtpci_packed_queue(vq->hw))
969                                 error = virtqueue_enqueue_recv_refill_packed(vq,
970                                                 &m, 1);
971                         else
972                                 error = virtqueue_enqueue_recv_refill(vq,
973                                                 &m, 1);
974                         if (error) {
975                                 rte_pktmbuf_free(m);
976                                 break;
977                         }
978                         nbufs++;
979                 }
980
981                 if (!vtpci_packed_queue(vq->hw))
982                         vq_update_avail_idx(vq);
983         }
984
985         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
986
987         VIRTQUEUE_DUMP(vq);
988
989         return 0;
990 }
991
992 /*
993  * struct rte_eth_dev *dev: Used to update dev
994  * uint16_t nb_desc: Defaults to values read from config space
995  * unsigned int socket_id: Used to allocate memzone
996  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
997  * uint16_t queue_idx: Just used as an index in dev txq list
998  */
999 int
1000 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
1001                         uint16_t queue_idx,
1002                         uint16_t nb_desc,
1003                         unsigned int socket_id __rte_unused,
1004                         const struct rte_eth_txconf *tx_conf)
1005 {
1006         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1007         struct virtio_hw *hw = dev->data->dev_private;
1008         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1009         struct virtnet_tx *txvq;
1010         uint16_t tx_free_thresh;
1011
1012         PMD_INIT_FUNC_TRACE();
1013
1014         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
1015                 nb_desc = vq->vq_nentries;
1016         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
1017
1018         txvq = &vq->txq;
1019         txvq->queue_id = queue_idx;
1020
1021         tx_free_thresh = tx_conf->tx_free_thresh;
1022         if (tx_free_thresh == 0)
1023                 tx_free_thresh =
1024                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
1025
1026         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
1027                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
1028                         "number of TX entries minus 3 (%u)."
1029                         " (tx_free_thresh=%u port=%u queue=%u)\n",
1030                         vq->vq_nentries - 3,
1031                         tx_free_thresh, dev->data->port_id, queue_idx);
1032                 return -EINVAL;
1033         }
1034
1035         vq->vq_free_thresh = tx_free_thresh;
1036
1037         dev->data->tx_queues[queue_idx] = txvq;
1038         return 0;
1039 }
1040
1041 int
1042 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
1043                                 uint16_t queue_idx)
1044 {
1045         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
1046         struct virtio_hw *hw = dev->data->dev_private;
1047         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
1048
1049         PMD_INIT_FUNC_TRACE();
1050
1051         if (!vtpci_packed_queue(hw)) {
1052                 if (hw->use_inorder_tx)
1053                         vq->vq_split.ring.desc[vq->vq_nentries - 1].next = 0;
1054         }
1055
1056         VIRTQUEUE_DUMP(vq);
1057
1058         return 0;
1059 }
1060
1061 static inline void
1062 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
1063 {
1064         int error;
1065         /*
1066          * Requeue the discarded mbuf. This should always be
1067          * successful since it was just dequeued.
1068          */
1069         if (vtpci_packed_queue(vq->hw))
1070                 error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1);
1071         else
1072                 error = virtqueue_enqueue_recv_refill(vq, &m, 1);
1073
1074         if (unlikely(error)) {
1075                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1076                 rte_pktmbuf_free(m);
1077         }
1078 }
1079
1080 static inline void
1081 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
1082 {
1083         int error;
1084
1085         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
1086         if (unlikely(error)) {
1087                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
1088                 rte_pktmbuf_free(m);
1089         }
1090 }
1091
1092 static inline void
1093 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
1094 {
1095         uint32_t s = mbuf->pkt_len;
1096         struct rte_ether_addr *ea;
1097
1098         stats->bytes += s;
1099
1100         if (s == 64) {
1101                 stats->size_bins[1]++;
1102         } else if (s > 64 && s < 1024) {
1103                 uint32_t bin;
1104
1105                 /* count zeros, and offset into correct bin */
1106                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
1107                 stats->size_bins[bin]++;
1108         } else {
1109                 if (s < 64)
1110                         stats->size_bins[0]++;
1111                 else if (s < 1519)
1112                         stats->size_bins[6]++;
1113                 else
1114                         stats->size_bins[7]++;
1115         }
1116
1117         ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
1118         if (rte_is_multicast_ether_addr(ea)) {
1119                 if (rte_is_broadcast_ether_addr(ea))
1120                         stats->broadcast++;
1121                 else
1122                         stats->multicast++;
1123         }
1124 }
1125
1126 static inline void
1127 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
1128 {
1129         VIRTIO_DUMP_PACKET(m, m->data_len);
1130
1131         virtio_update_packet_stats(&rxvq->stats, m);
1132 }
1133
1134 /* Optionally fill offload information in structure */
1135 static inline int
1136 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
1137 {
1138         struct rte_net_hdr_lens hdr_lens;
1139         uint32_t hdrlen, ptype;
1140         int l4_supported = 0;
1141
1142         /* nothing to do */
1143         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
1144                 return 0;
1145
1146         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
1147
1148         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1149         m->packet_type = ptype;
1150         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
1151             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
1152             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
1153                 l4_supported = 1;
1154
1155         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1156                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
1157                 if (hdr->csum_start <= hdrlen && l4_supported) {
1158                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
1159                 } else {
1160                         /* Unknown proto or tunnel, do sw cksum. We can assume
1161                          * the cksum field is in the first segment since the
1162                          * buffers we provided to the host are large enough.
1163                          * In case of SCTP, this will be wrong since it's a CRC
1164                          * but there's nothing we can do.
1165                          */
1166                         uint16_t csum = 0, off;
1167
1168                         rte_raw_cksum_mbuf(m, hdr->csum_start,
1169                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
1170                                 &csum);
1171                         if (likely(csum != 0xffff))
1172                                 csum = ~csum;
1173                         off = hdr->csum_offset + hdr->csum_start;
1174                         if (rte_pktmbuf_data_len(m) >= off + 1)
1175                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1176                                         off) = csum;
1177                 }
1178         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1179                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1180         }
1181
1182         /* GSO request, save required information in mbuf */
1183         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1184                 /* Check unsupported modes */
1185                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1186                     (hdr->gso_size == 0)) {
1187                         return -EINVAL;
1188                 }
1189
1190                 /* Update mss lengthes in mbuf */
1191                 m->tso_segsz = hdr->gso_size;
1192                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1193                         case VIRTIO_NET_HDR_GSO_TCPV4:
1194                         case VIRTIO_NET_HDR_GSO_TCPV6:
1195                                 m->ol_flags |= PKT_RX_LRO | \
1196                                         PKT_RX_L4_CKSUM_NONE;
1197                                 break;
1198                         default:
1199                                 return -EINVAL;
1200                 }
1201         }
1202
1203         return 0;
1204 }
1205
1206 #define VIRTIO_MBUF_BURST_SZ 64
1207 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1208 uint16_t
1209 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1210 {
1211         struct virtnet_rx *rxvq = rx_queue;
1212         struct virtqueue *vq = rxvq->vq;
1213         struct virtio_hw *hw = vq->hw;
1214         struct rte_mbuf *rxm;
1215         uint16_t nb_used, num, nb_rx;
1216         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1217         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1218         int error;
1219         uint32_t i, nb_enqueued;
1220         uint32_t hdr_size;
1221         struct virtio_net_hdr *hdr;
1222
1223         nb_rx = 0;
1224         if (unlikely(hw->started == 0))
1225                 return nb_rx;
1226
1227         nb_used = VIRTQUEUE_NUSED(vq);
1228
1229         virtio_rmb(hw->weak_barriers);
1230
1231         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1232         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1233                 num = VIRTIO_MBUF_BURST_SZ;
1234         if (likely(num > DESC_PER_CACHELINE))
1235                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1236
1237         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1238         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1239
1240         nb_enqueued = 0;
1241         hdr_size = hw->vtnet_hdr_size;
1242
1243         for (i = 0; i < num ; i++) {
1244                 rxm = rcv_pkts[i];
1245
1246                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1247
1248                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1249                         PMD_RX_LOG(ERR, "Packet drop");
1250                         nb_enqueued++;
1251                         virtio_discard_rxbuf(vq, rxm);
1252                         rxvq->stats.errors++;
1253                         continue;
1254                 }
1255
1256                 rxm->port = rxvq->port_id;
1257                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1258                 rxm->ol_flags = 0;
1259                 rxm->vlan_tci = 0;
1260
1261                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1262                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1263
1264                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1265                         RTE_PKTMBUF_HEADROOM - hdr_size);
1266
1267                 if (hw->vlan_strip)
1268                         rte_vlan_strip(rxm);
1269
1270                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1271                         virtio_discard_rxbuf(vq, rxm);
1272                         rxvq->stats.errors++;
1273                         continue;
1274                 }
1275
1276                 virtio_rx_stats_updated(rxvq, rxm);
1277
1278                 rx_pkts[nb_rx++] = rxm;
1279         }
1280
1281         rxvq->stats.packets += nb_rx;
1282
1283         /* Allocate new mbuf for the used descriptor */
1284         if (likely(!virtqueue_full(vq))) {
1285                 uint16_t free_cnt = vq->vq_free_cnt;
1286                 struct rte_mbuf *new_pkts[free_cnt];
1287
1288                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1289                                                 free_cnt) == 0)) {
1290                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1291                                         free_cnt);
1292                         if (unlikely(error)) {
1293                                 for (i = 0; i < free_cnt; i++)
1294                                         rte_pktmbuf_free(new_pkts[i]);
1295                         }
1296                         nb_enqueued += free_cnt;
1297                 } else {
1298                         struct rte_eth_dev *dev =
1299                                 &rte_eth_devices[rxvq->port_id];
1300                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1301                 }
1302         }
1303
1304         if (likely(nb_enqueued)) {
1305                 vq_update_avail_idx(vq);
1306
1307                 if (unlikely(virtqueue_kick_prepare(vq))) {
1308                         virtqueue_notify(vq);
1309                         PMD_RX_LOG(DEBUG, "Notified");
1310                 }
1311         }
1312
1313         return nb_rx;
1314 }
1315
1316 uint16_t
1317 virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
1318                         uint16_t nb_pkts)
1319 {
1320         struct virtnet_rx *rxvq = rx_queue;
1321         struct virtqueue *vq = rxvq->vq;
1322         struct virtio_hw *hw = vq->hw;
1323         struct rte_mbuf *rxm;
1324         uint16_t num, nb_rx;
1325         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1326         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1327         int error;
1328         uint32_t i, nb_enqueued;
1329         uint32_t hdr_size;
1330         struct virtio_net_hdr *hdr;
1331
1332         nb_rx = 0;
1333         if (unlikely(hw->started == 0))
1334                 return nb_rx;
1335
1336         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
1337         if (likely(num > DESC_PER_CACHELINE))
1338                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1339
1340         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1341         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1342
1343         nb_enqueued = 0;
1344         hdr_size = hw->vtnet_hdr_size;
1345
1346         for (i = 0; i < num; i++) {
1347                 rxm = rcv_pkts[i];
1348
1349                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1350
1351                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1352                         PMD_RX_LOG(ERR, "Packet drop");
1353                         nb_enqueued++;
1354                         virtio_discard_rxbuf(vq, rxm);
1355                         rxvq->stats.errors++;
1356                         continue;
1357                 }
1358
1359                 rxm->port = rxvq->port_id;
1360                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1361                 rxm->ol_flags = 0;
1362                 rxm->vlan_tci = 0;
1363
1364                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1365                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1366
1367                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1368                         RTE_PKTMBUF_HEADROOM - hdr_size);
1369
1370                 if (hw->vlan_strip)
1371                         rte_vlan_strip(rxm);
1372
1373                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1374                         virtio_discard_rxbuf(vq, rxm);
1375                         rxvq->stats.errors++;
1376                         continue;
1377                 }
1378
1379                 virtio_rx_stats_updated(rxvq, rxm);
1380
1381                 rx_pkts[nb_rx++] = rxm;
1382         }
1383
1384         rxvq->stats.packets += nb_rx;
1385
1386         /* Allocate new mbuf for the used descriptor */
1387         if (likely(!virtqueue_full(vq))) {
1388                 uint16_t free_cnt = vq->vq_free_cnt;
1389                 struct rte_mbuf *new_pkts[free_cnt];
1390
1391                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
1392                                                 free_cnt) == 0)) {
1393                         error = virtqueue_enqueue_recv_refill_packed(vq,
1394                                         new_pkts, free_cnt);
1395                         if (unlikely(error)) {
1396                                 for (i = 0; i < free_cnt; i++)
1397                                         rte_pktmbuf_free(new_pkts[i]);
1398                         }
1399                         nb_enqueued += free_cnt;
1400                 } else {
1401                         struct rte_eth_dev *dev =
1402                                 &rte_eth_devices[rxvq->port_id];
1403                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1404                 }
1405         }
1406
1407         if (likely(nb_enqueued)) {
1408                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1409                         virtqueue_notify(vq);
1410                         PMD_RX_LOG(DEBUG, "Notified");
1411                 }
1412         }
1413
1414         return nb_rx;
1415 }
1416
1417
1418 uint16_t
1419 virtio_recv_pkts_inorder(void *rx_queue,
1420                         struct rte_mbuf **rx_pkts,
1421                         uint16_t nb_pkts)
1422 {
1423         struct virtnet_rx *rxvq = rx_queue;
1424         struct virtqueue *vq = rxvq->vq;
1425         struct virtio_hw *hw = vq->hw;
1426         struct rte_mbuf *rxm;
1427         struct rte_mbuf *prev;
1428         uint16_t nb_used, num, nb_rx;
1429         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1430         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1431         int error;
1432         uint32_t nb_enqueued;
1433         uint32_t seg_num;
1434         uint32_t seg_res;
1435         uint32_t hdr_size;
1436         int32_t i;
1437
1438         nb_rx = 0;
1439         if (unlikely(hw->started == 0))
1440                 return nb_rx;
1441
1442         nb_used = VIRTQUEUE_NUSED(vq);
1443         nb_used = RTE_MIN(nb_used, nb_pkts);
1444         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1445
1446         virtio_rmb(hw->weak_barriers);
1447
1448         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1449
1450         nb_enqueued = 0;
1451         seg_num = 1;
1452         seg_res = 0;
1453         hdr_size = hw->vtnet_hdr_size;
1454
1455         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1456
1457         for (i = 0; i < num; i++) {
1458                 struct virtio_net_hdr_mrg_rxbuf *header;
1459
1460                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1461                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1462
1463                 rxm = rcv_pkts[i];
1464
1465                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1466                         PMD_RX_LOG(ERR, "Packet drop");
1467                         nb_enqueued++;
1468                         virtio_discard_rxbuf_inorder(vq, rxm);
1469                         rxvq->stats.errors++;
1470                         continue;
1471                 }
1472
1473                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1474                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1475                          - hdr_size);
1476
1477                 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1478                         seg_num = header->num_buffers;
1479                         if (seg_num == 0)
1480                                 seg_num = 1;
1481                 } else {
1482                         seg_num = 1;
1483                 }
1484
1485                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1486                 rxm->nb_segs = seg_num;
1487                 rxm->ol_flags = 0;
1488                 rxm->vlan_tci = 0;
1489                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1490                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1491
1492                 rxm->port = rxvq->port_id;
1493
1494                 rx_pkts[nb_rx] = rxm;
1495                 prev = rxm;
1496
1497                 if (vq->hw->has_rx_offload &&
1498                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1499                         virtio_discard_rxbuf_inorder(vq, rxm);
1500                         rxvq->stats.errors++;
1501                         continue;
1502                 }
1503
1504                 if (hw->vlan_strip)
1505                         rte_vlan_strip(rx_pkts[nb_rx]);
1506
1507                 seg_res = seg_num - 1;
1508
1509                 /* Merge remaining segments */
1510                 while (seg_res != 0 && i < (num - 1)) {
1511                         i++;
1512
1513                         rxm = rcv_pkts[i];
1514                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1515                         rxm->pkt_len = (uint32_t)(len[i]);
1516                         rxm->data_len = (uint16_t)(len[i]);
1517
1518                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1519                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1520
1521                         if (prev)
1522                                 prev->next = rxm;
1523
1524                         prev = rxm;
1525                         seg_res -= 1;
1526                 }
1527
1528                 if (!seg_res) {
1529                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1530                         nb_rx++;
1531                 }
1532         }
1533
1534         /* Last packet still need merge segments */
1535         while (seg_res != 0) {
1536                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1537                                         VIRTIO_MBUF_BURST_SZ);
1538
1539                 prev = rcv_pkts[nb_rx];
1540                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1541                         virtio_rmb(hw->weak_barriers);
1542                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1543                                                            rcv_cnt);
1544                         uint16_t extra_idx = 0;
1545
1546                         rcv_cnt = num;
1547                         while (extra_idx < rcv_cnt) {
1548                                 rxm = rcv_pkts[extra_idx];
1549                                 rxm->data_off =
1550                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1551                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1552                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1553                                 prev->next = rxm;
1554                                 prev = rxm;
1555                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1556                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1557                                 extra_idx += 1;
1558                         };
1559                         seg_res -= rcv_cnt;
1560
1561                         if (!seg_res) {
1562                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1563                                 nb_rx++;
1564                         }
1565                 } else {
1566                         PMD_RX_LOG(ERR,
1567                                         "No enough segments for packet.");
1568                         virtio_discard_rxbuf_inorder(vq, prev);
1569                         rxvq->stats.errors++;
1570                         break;
1571                 }
1572         }
1573
1574         rxvq->stats.packets += nb_rx;
1575
1576         /* Allocate new mbuf for the used descriptor */
1577
1578         if (likely(!virtqueue_full(vq))) {
1579                 /* free_cnt may include mrg descs */
1580                 uint16_t free_cnt = vq->vq_free_cnt;
1581                 struct rte_mbuf *new_pkts[free_cnt];
1582
1583                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1584                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1585                                         free_cnt);
1586                         if (unlikely(error)) {
1587                                 for (i = 0; i < free_cnt; i++)
1588                                         rte_pktmbuf_free(new_pkts[i]);
1589                         }
1590                         nb_enqueued += free_cnt;
1591                 } else {
1592                         struct rte_eth_dev *dev =
1593                                 &rte_eth_devices[rxvq->port_id];
1594                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1595                 }
1596         }
1597
1598         if (likely(nb_enqueued)) {
1599                 vq_update_avail_idx(vq);
1600
1601                 if (unlikely(virtqueue_kick_prepare(vq))) {
1602                         virtqueue_notify(vq);
1603                         PMD_RX_LOG(DEBUG, "Notified");
1604                 }
1605         }
1606
1607         return nb_rx;
1608 }
1609
1610 uint16_t
1611 virtio_recv_mergeable_pkts(void *rx_queue,
1612                         struct rte_mbuf **rx_pkts,
1613                         uint16_t nb_pkts)
1614 {
1615         struct virtnet_rx *rxvq = rx_queue;
1616         struct virtqueue *vq = rxvq->vq;
1617         struct virtio_hw *hw = vq->hw;
1618         struct rte_mbuf *rxm;
1619         struct rte_mbuf *prev;
1620         uint16_t nb_used, num, nb_rx = 0;
1621         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1622         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1623         int error;
1624         uint32_t nb_enqueued = 0;
1625         uint32_t seg_num = 0;
1626         uint32_t seg_res = 0;
1627         uint32_t hdr_size = hw->vtnet_hdr_size;
1628         int32_t i;
1629
1630         if (unlikely(hw->started == 0))
1631                 return nb_rx;
1632
1633         nb_used = VIRTQUEUE_NUSED(vq);
1634
1635         virtio_rmb(hw->weak_barriers);
1636
1637         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1638
1639         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1640         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1641                 num = VIRTIO_MBUF_BURST_SZ;
1642         if (likely(num > DESC_PER_CACHELINE))
1643                 num = num - ((vq->vq_used_cons_idx + num) %
1644                                 DESC_PER_CACHELINE);
1645
1646
1647         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1648
1649         for (i = 0; i < num; i++) {
1650                 struct virtio_net_hdr_mrg_rxbuf *header;
1651
1652                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1653                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1654
1655                 rxm = rcv_pkts[i];
1656
1657                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1658                         PMD_RX_LOG(ERR, "Packet drop");
1659                         nb_enqueued++;
1660                         virtio_discard_rxbuf(vq, rxm);
1661                         rxvq->stats.errors++;
1662                         continue;
1663                 }
1664
1665                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1666                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1667                          - hdr_size);
1668                 seg_num = header->num_buffers;
1669                 if (seg_num == 0)
1670                         seg_num = 1;
1671
1672                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1673                 rxm->nb_segs = seg_num;
1674                 rxm->ol_flags = 0;
1675                 rxm->vlan_tci = 0;
1676                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1677                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1678
1679                 rxm->port = rxvq->port_id;
1680
1681                 rx_pkts[nb_rx] = rxm;
1682                 prev = rxm;
1683
1684                 if (hw->has_rx_offload &&
1685                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1686                         virtio_discard_rxbuf(vq, rxm);
1687                         rxvq->stats.errors++;
1688                         continue;
1689                 }
1690
1691                 if (hw->vlan_strip)
1692                         rte_vlan_strip(rx_pkts[nb_rx]);
1693
1694                 seg_res = seg_num - 1;
1695
1696                 /* Merge remaining segments */
1697                 while (seg_res != 0 && i < (num - 1)) {
1698                         i++;
1699
1700                         rxm = rcv_pkts[i];
1701                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1702                         rxm->pkt_len = (uint32_t)(len[i]);
1703                         rxm->data_len = (uint16_t)(len[i]);
1704
1705                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1706                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1707
1708                         if (prev)
1709                                 prev->next = rxm;
1710
1711                         prev = rxm;
1712                         seg_res -= 1;
1713                 }
1714
1715                 if (!seg_res) {
1716                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1717                         nb_rx++;
1718                 }
1719         }
1720
1721         /* Last packet still need merge segments */
1722         while (seg_res != 0) {
1723                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1724                                         VIRTIO_MBUF_BURST_SZ);
1725
1726                 prev = rcv_pkts[nb_rx];
1727                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1728                         virtio_rmb(hw->weak_barriers);
1729                         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
1730                                                            rcv_cnt);
1731                         uint16_t extra_idx = 0;
1732
1733                         rcv_cnt = num;
1734                         while (extra_idx < rcv_cnt) {
1735                                 rxm = rcv_pkts[extra_idx];
1736                                 rxm->data_off =
1737                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1738                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1739                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1740                                 prev->next = rxm;
1741                                 prev = rxm;
1742                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1743                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1744                                 extra_idx += 1;
1745                         };
1746                         seg_res -= rcv_cnt;
1747
1748                         if (!seg_res) {
1749                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1750                                 nb_rx++;
1751                         }
1752                 } else {
1753                         PMD_RX_LOG(ERR,
1754                                         "No enough segments for packet.");
1755                         virtio_discard_rxbuf(vq, prev);
1756                         rxvq->stats.errors++;
1757                         break;
1758                 }
1759         }
1760
1761         rxvq->stats.packets += nb_rx;
1762
1763         /* Allocate new mbuf for the used descriptor */
1764         if (likely(!virtqueue_full(vq))) {
1765                 /* free_cnt may include mrg descs */
1766                 uint16_t free_cnt = vq->vq_free_cnt;
1767                 struct rte_mbuf *new_pkts[free_cnt];
1768
1769                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1770                         error = virtqueue_enqueue_recv_refill(vq, new_pkts,
1771                                         free_cnt);
1772                         if (unlikely(error)) {
1773                                 for (i = 0; i < free_cnt; i++)
1774                                         rte_pktmbuf_free(new_pkts[i]);
1775                         }
1776                         nb_enqueued += free_cnt;
1777                 } else {
1778                         struct rte_eth_dev *dev =
1779                                 &rte_eth_devices[rxvq->port_id];
1780                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1781                 }
1782         }
1783
1784         if (likely(nb_enqueued)) {
1785                 vq_update_avail_idx(vq);
1786
1787                 if (unlikely(virtqueue_kick_prepare(vq))) {
1788                         virtqueue_notify(vq);
1789                         PMD_RX_LOG(DEBUG, "Notified");
1790                 }
1791         }
1792
1793         return nb_rx;
1794 }
1795
1796 uint16_t
1797 virtio_recv_mergeable_pkts_packed(void *rx_queue,
1798                         struct rte_mbuf **rx_pkts,
1799                         uint16_t nb_pkts)
1800 {
1801         struct virtnet_rx *rxvq = rx_queue;
1802         struct virtqueue *vq = rxvq->vq;
1803         struct virtio_hw *hw = vq->hw;
1804         struct rte_mbuf *rxm;
1805         struct rte_mbuf *prev = NULL;
1806         uint16_t num, nb_rx = 0;
1807         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1808         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1809         uint32_t nb_enqueued = 0;
1810         uint32_t seg_num = 0;
1811         uint32_t seg_res = 0;
1812         uint32_t hdr_size = hw->vtnet_hdr_size;
1813         int32_t i;
1814         int error;
1815
1816         if (unlikely(hw->started == 0))
1817                 return nb_rx;
1818
1819
1820         num = nb_pkts;
1821         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1822                 num = VIRTIO_MBUF_BURST_SZ;
1823         if (likely(num > DESC_PER_CACHELINE))
1824                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1825
1826         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts, len, num);
1827
1828         for (i = 0; i < num; i++) {
1829                 struct virtio_net_hdr_mrg_rxbuf *header;
1830
1831                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1832                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1833
1834                 rxm = rcv_pkts[i];
1835
1836                 if (unlikely(len[i] < hdr_size + RTE_ETHER_HDR_LEN)) {
1837                         PMD_RX_LOG(ERR, "Packet drop");
1838                         nb_enqueued++;
1839                         virtio_discard_rxbuf(vq, rxm);
1840                         rxvq->stats.errors++;
1841                         continue;
1842                 }
1843
1844                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)
1845                           rxm->buf_addr + RTE_PKTMBUF_HEADROOM - hdr_size);
1846                 seg_num = header->num_buffers;
1847
1848                 if (seg_num == 0)
1849                         seg_num = 1;
1850
1851                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1852                 rxm->nb_segs = seg_num;
1853                 rxm->ol_flags = 0;
1854                 rxm->vlan_tci = 0;
1855                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1856                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1857
1858                 rxm->port = rxvq->port_id;
1859                 rx_pkts[nb_rx] = rxm;
1860                 prev = rxm;
1861
1862                 if (hw->has_rx_offload &&
1863                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1864                         virtio_discard_rxbuf(vq, rxm);
1865                         rxvq->stats.errors++;
1866                         continue;
1867                 }
1868
1869                 if (hw->vlan_strip)
1870                         rte_vlan_strip(rx_pkts[nb_rx]);
1871
1872                 seg_res = seg_num - 1;
1873
1874                 /* Merge remaining segments */
1875                 while (seg_res != 0 && i < (num - 1)) {
1876                         i++;
1877
1878                         rxm = rcv_pkts[i];
1879                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1880                         rxm->pkt_len = (uint32_t)(len[i]);
1881                         rxm->data_len = (uint16_t)(len[i]);
1882
1883                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1884                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1885
1886                         if (prev)
1887                                 prev->next = rxm;
1888
1889                         prev = rxm;
1890                         seg_res -= 1;
1891                 }
1892
1893                 if (!seg_res) {
1894                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1895                         nb_rx++;
1896                 }
1897         }
1898
1899         /* Last packet still need merge segments */
1900         while (seg_res != 0) {
1901                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1902                                         VIRTIO_MBUF_BURST_SZ);
1903                 if (likely(vq->vq_free_cnt >= rcv_cnt)) {
1904                         num = virtqueue_dequeue_burst_rx_packed(vq, rcv_pkts,
1905                                         len, rcv_cnt);
1906                         uint16_t extra_idx = 0;
1907
1908                         rcv_cnt = num;
1909
1910                         while (extra_idx < rcv_cnt) {
1911                                 rxm = rcv_pkts[extra_idx];
1912
1913                                 rxm->data_off =
1914                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1915                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1916                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1917
1918                                 prev->next = rxm;
1919                                 prev = rxm;
1920                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1921                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1922                                 extra_idx += 1;
1923                         }
1924                         seg_res -= rcv_cnt;
1925                         if (!seg_res) {
1926                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1927                                 nb_rx++;
1928                         }
1929                 } else {
1930                         PMD_RX_LOG(ERR,
1931                                         "No enough segments for packet.");
1932                         if (prev)
1933                                 virtio_discard_rxbuf(vq, prev);
1934                         rxvq->stats.errors++;
1935                         break;
1936                 }
1937         }
1938
1939         rxvq->stats.packets += nb_rx;
1940
1941         /* Allocate new mbuf for the used descriptor */
1942         if (likely(!virtqueue_full(vq))) {
1943                 /* free_cnt may include mrg descs */
1944                 uint16_t free_cnt = vq->vq_free_cnt;
1945                 struct rte_mbuf *new_pkts[free_cnt];
1946
1947                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1948                         error = virtqueue_enqueue_recv_refill_packed(vq,
1949                                         new_pkts, free_cnt);
1950                         if (unlikely(error)) {
1951                                 for (i = 0; i < free_cnt; i++)
1952                                         rte_pktmbuf_free(new_pkts[i]);
1953                         }
1954                         nb_enqueued += free_cnt;
1955                 } else {
1956                         struct rte_eth_dev *dev =
1957                                 &rte_eth_devices[rxvq->port_id];
1958                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1959                 }
1960         }
1961
1962         if (likely(nb_enqueued)) {
1963                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1964                         virtqueue_notify(vq);
1965                         PMD_RX_LOG(DEBUG, "Notified");
1966                 }
1967         }
1968
1969         return nb_rx;
1970 }
1971
1972 uint16_t
1973 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1974                         uint16_t nb_pkts)
1975 {
1976         struct virtnet_tx *txvq = tx_queue;
1977         struct virtqueue *vq = txvq->vq;
1978         struct virtio_hw *hw = vq->hw;
1979         uint16_t hdr_size = hw->vtnet_hdr_size;
1980         uint16_t nb_tx = 0;
1981         bool in_order = hw->use_inorder_tx;
1982         int error;
1983
1984         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1985                 return nb_tx;
1986
1987         if (unlikely(nb_pkts < 1))
1988                 return nb_pkts;
1989
1990         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1991
1992         if (nb_pkts > vq->vq_free_cnt)
1993                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt,
1994                                            in_order);
1995
1996         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1997                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1998                 int can_push = 0, slots, need;
1999
2000                 /* Do VLAN tag insertion */
2001                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2002                         error = rte_vlan_insert(&txm);
2003                         if (unlikely(error)) {
2004                                 rte_pktmbuf_free(txm);
2005                                 continue;
2006                         }
2007                         /* vlan_insert may add a header mbuf */
2008                         tx_pkts[nb_tx] = txm;
2009                 }
2010
2011                 /* optimize ring usage */
2012                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2013                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2014                     rte_mbuf_refcnt_read(txm) == 1 &&
2015                     RTE_MBUF_DIRECT(txm) &&
2016                     txm->nb_segs == 1 &&
2017                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2018                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2019                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2020                         can_push = 1;
2021
2022                 /* How many main ring entries are needed to this Tx?
2023                  * any_layout => number of segments
2024                  * default    => number of segments + 1
2025                  */
2026                 slots = txm->nb_segs + !can_push;
2027                 need = slots - vq->vq_free_cnt;
2028
2029                 /* Positive value indicates it need free vring descriptors */
2030                 if (unlikely(need > 0)) {
2031                         virtio_xmit_cleanup_packed(vq, need, in_order);
2032                         need = slots - vq->vq_free_cnt;
2033                         if (unlikely(need > 0)) {
2034                                 PMD_TX_LOG(ERR,
2035                                            "No free tx descriptors to transmit");
2036                                 break;
2037                         }
2038                 }
2039
2040                 /* Enqueue Packet buffers */
2041                 if (can_push)
2042                         virtqueue_enqueue_xmit_packed_fast(txvq, txm, in_order);
2043                 else
2044                         virtqueue_enqueue_xmit_packed(txvq, txm, slots, 0,
2045                                                       in_order);
2046
2047                 virtio_update_packet_stats(&txvq->stats, txm);
2048         }
2049
2050         txvq->stats.packets += nb_tx;
2051
2052         if (likely(nb_tx)) {
2053                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
2054                         virtqueue_notify(vq);
2055                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2056                 }
2057         }
2058
2059         return nb_tx;
2060 }
2061
2062 uint16_t
2063 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
2064 {
2065         struct virtnet_tx *txvq = tx_queue;
2066         struct virtqueue *vq = txvq->vq;
2067         struct virtio_hw *hw = vq->hw;
2068         uint16_t hdr_size = hw->vtnet_hdr_size;
2069         uint16_t nb_used, nb_tx = 0;
2070         int error;
2071
2072         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2073                 return nb_tx;
2074
2075         if (unlikely(nb_pkts < 1))
2076                 return nb_pkts;
2077
2078         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2079         nb_used = VIRTQUEUE_NUSED(vq);
2080
2081         virtio_rmb(hw->weak_barriers);
2082         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2083                 virtio_xmit_cleanup(vq, nb_used);
2084
2085         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
2086                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2087                 int can_push = 0, use_indirect = 0, slots, need;
2088
2089                 /* Do VLAN tag insertion */
2090                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2091                         error = rte_vlan_insert(&txm);
2092                         if (unlikely(error)) {
2093                                 rte_pktmbuf_free(txm);
2094                                 continue;
2095                         }
2096                         /* vlan_insert may add a header mbuf */
2097                         tx_pkts[nb_tx] = txm;
2098                 }
2099
2100                 /* optimize ring usage */
2101                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2102                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2103                     rte_mbuf_refcnt_read(txm) == 1 &&
2104                     RTE_MBUF_DIRECT(txm) &&
2105                     txm->nb_segs == 1 &&
2106                     rte_pktmbuf_headroom(txm) >= hdr_size &&
2107                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2108                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
2109                         can_push = 1;
2110                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
2111                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
2112                         use_indirect = 1;
2113
2114                 /* How many main ring entries are needed to this Tx?
2115                  * any_layout => number of segments
2116                  * indirect   => 1
2117                  * default    => number of segments + 1
2118                  */
2119                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
2120                 need = slots - vq->vq_free_cnt;
2121
2122                 /* Positive value indicates it need free vring descriptors */
2123                 if (unlikely(need > 0)) {
2124                         nb_used = VIRTQUEUE_NUSED(vq);
2125                         virtio_rmb(hw->weak_barriers);
2126                         need = RTE_MIN(need, (int)nb_used);
2127
2128                         virtio_xmit_cleanup(vq, need);
2129                         need = slots - vq->vq_free_cnt;
2130                         if (unlikely(need > 0)) {
2131                                 PMD_TX_LOG(ERR,
2132                                            "No free tx descriptors to transmit");
2133                                 break;
2134                         }
2135                 }
2136
2137                 /* Enqueue Packet buffers */
2138                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
2139                         can_push, 0);
2140
2141                 virtio_update_packet_stats(&txvq->stats, txm);
2142         }
2143
2144         txvq->stats.packets += nb_tx;
2145
2146         if (likely(nb_tx)) {
2147                 vq_update_avail_idx(vq);
2148
2149                 if (unlikely(virtqueue_kick_prepare(vq))) {
2150                         virtqueue_notify(vq);
2151                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2152                 }
2153         }
2154
2155         return nb_tx;
2156 }
2157
2158 uint16_t
2159 virtio_xmit_pkts_inorder(void *tx_queue,
2160                         struct rte_mbuf **tx_pkts,
2161                         uint16_t nb_pkts)
2162 {
2163         struct virtnet_tx *txvq = tx_queue;
2164         struct virtqueue *vq = txvq->vq;
2165         struct virtio_hw *hw = vq->hw;
2166         uint16_t hdr_size = hw->vtnet_hdr_size;
2167         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
2168         struct rte_mbuf *inorder_pkts[nb_pkts];
2169         int error;
2170
2171         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
2172                 return nb_tx;
2173
2174         if (unlikely(nb_pkts < 1))
2175                 return nb_pkts;
2176
2177         VIRTQUEUE_DUMP(vq);
2178         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
2179         nb_used = VIRTQUEUE_NUSED(vq);
2180
2181         virtio_rmb(hw->weak_barriers);
2182         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
2183                 virtio_xmit_cleanup_inorder(vq, nb_used);
2184
2185         if (unlikely(!vq->vq_free_cnt))
2186                 virtio_xmit_cleanup_inorder(vq, nb_used);
2187
2188         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
2189
2190         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
2191                 struct rte_mbuf *txm = tx_pkts[nb_tx];
2192                 int slots, need;
2193
2194                 /* Do VLAN tag insertion */
2195                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
2196                         error = rte_vlan_insert(&txm);
2197                         if (unlikely(error)) {
2198                                 rte_pktmbuf_free(txm);
2199                                 continue;
2200                         }
2201                         /* vlan_insert may add a header mbuf */
2202                         tx_pkts[nb_tx] = txm;
2203                 }
2204
2205                 /* optimize ring usage */
2206                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
2207                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
2208                      rte_mbuf_refcnt_read(txm) == 1 &&
2209                      RTE_MBUF_DIRECT(txm) &&
2210                      txm->nb_segs == 1 &&
2211                      rte_pktmbuf_headroom(txm) >= hdr_size &&
2212                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
2213                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
2214                         inorder_pkts[nb_inorder_pkts] = txm;
2215                         nb_inorder_pkts++;
2216
2217                         virtio_update_packet_stats(&txvq->stats, txm);
2218                         continue;
2219                 }
2220
2221                 if (nb_inorder_pkts) {
2222                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2223                                                         nb_inorder_pkts);
2224                         nb_inorder_pkts = 0;
2225                 }
2226
2227                 slots = txm->nb_segs + 1;
2228                 need = slots - vq->vq_free_cnt;
2229                 if (unlikely(need > 0)) {
2230                         nb_used = VIRTQUEUE_NUSED(vq);
2231                         virtio_rmb(hw->weak_barriers);
2232                         need = RTE_MIN(need, (int)nb_used);
2233
2234                         virtio_xmit_cleanup_inorder(vq, need);
2235
2236                         need = slots - vq->vq_free_cnt;
2237
2238                         if (unlikely(need > 0)) {
2239                                 PMD_TX_LOG(ERR,
2240                                         "No free tx descriptors to transmit");
2241                                 break;
2242                         }
2243                 }
2244                 /* Enqueue Packet buffers */
2245                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
2246
2247                 virtio_update_packet_stats(&txvq->stats, txm);
2248         }
2249
2250         /* Transmit all inorder packets */
2251         if (nb_inorder_pkts)
2252                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
2253                                                 nb_inorder_pkts);
2254
2255         txvq->stats.packets += nb_tx;
2256
2257         if (likely(nb_tx)) {
2258                 vq_update_avail_idx(vq);
2259
2260                 if (unlikely(virtqueue_kick_prepare(vq))) {
2261                         virtqueue_notify(vq);
2262                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
2263                 }
2264         }
2265
2266         VIRTQUEUE_DUMP(vq);
2267
2268         return nb_tx;
2269 }