net/virtio: extract common part for in-order functions
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34
35 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
36 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
37 #else
38 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
39 #endif
40
41 int
42 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
43 {
44         struct virtnet_rx *rxvq = rxq;
45         struct virtqueue *vq = rxvq->vq;
46
47         return VIRTQUEUE_NUSED(vq) >= offset;
48 }
49
50 void
51 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
52 {
53         vq->vq_free_cnt += num;
54         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
55 }
56
57 void
58 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
59 {
60         struct vring_desc *dp, *dp_tail;
61         struct vq_desc_extra *dxp;
62         uint16_t desc_idx_last = desc_idx;
63
64         dp  = &vq->vq_ring.desc[desc_idx];
65         dxp = &vq->vq_descx[desc_idx];
66         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
67         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
68                 while (dp->flags & VRING_DESC_F_NEXT) {
69                         desc_idx_last = dp->next;
70                         dp = &vq->vq_ring.desc[dp->next];
71                 }
72         }
73         dxp->ndescs = 0;
74
75         /*
76          * We must append the existing free chain, if any, to the end of
77          * newly freed chain. If the virtqueue was completely used, then
78          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
79          */
80         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
81                 vq->vq_desc_head_idx = desc_idx;
82         } else {
83                 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
84                 dp_tail->next = desc_idx;
85         }
86
87         vq->vq_desc_tail_idx = desc_idx_last;
88         dp->next = VQ_RING_DESC_CHAIN_END;
89 }
90
91 static uint16_t
92 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
93                            uint32_t *len, uint16_t num)
94 {
95         struct vring_used_elem *uep;
96         struct rte_mbuf *cookie;
97         uint16_t used_idx, desc_idx;
98         uint16_t i;
99
100         /*  Caller does the check */
101         for (i = 0; i < num ; i++) {
102                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
103                 uep = &vq->vq_ring.used->ring[used_idx];
104                 desc_idx = (uint16_t) uep->id;
105                 len[i] = uep->len;
106                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
107
108                 if (unlikely(cookie == NULL)) {
109                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
110                                 vq->vq_used_cons_idx);
111                         break;
112                 }
113
114                 rte_prefetch0(cookie);
115                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
116                 rx_pkts[i]  = cookie;
117                 vq->vq_used_cons_idx++;
118                 vq_ring_free_chain(vq, desc_idx);
119                 vq->vq_descx[desc_idx].cookie = NULL;
120         }
121
122         return i;
123 }
124
125 #ifndef DEFAULT_TX_FREE_THRESH
126 #define DEFAULT_TX_FREE_THRESH 32
127 #endif
128
129 /* Cleanup from completed transmits. */
130 static void
131 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
132 {
133         uint16_t i, used_idx, desc_idx;
134         for (i = 0; i < num; i++) {
135                 struct vring_used_elem *uep;
136                 struct vq_desc_extra *dxp;
137
138                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
139                 uep = &vq->vq_ring.used->ring[used_idx];
140
141                 desc_idx = (uint16_t) uep->id;
142                 dxp = &vq->vq_descx[desc_idx];
143                 vq->vq_used_cons_idx++;
144                 vq_ring_free_chain(vq, desc_idx);
145
146                 if (dxp->cookie != NULL) {
147                         rte_pktmbuf_free(dxp->cookie);
148                         dxp->cookie = NULL;
149                 }
150         }
151 }
152
153
154 static inline int
155 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
156 {
157         struct vq_desc_extra *dxp;
158         struct virtio_hw *hw = vq->hw;
159         struct vring_desc *start_dp;
160         uint16_t needed = 1;
161         uint16_t head_idx, idx;
162
163         if (unlikely(vq->vq_free_cnt == 0))
164                 return -ENOSPC;
165         if (unlikely(vq->vq_free_cnt < needed))
166                 return -EMSGSIZE;
167
168         head_idx = vq->vq_desc_head_idx;
169         if (unlikely(head_idx >= vq->vq_nentries))
170                 return -EFAULT;
171
172         idx = head_idx;
173         dxp = &vq->vq_descx[idx];
174         dxp->cookie = (void *)cookie;
175         dxp->ndescs = needed;
176
177         start_dp = vq->vq_ring.desc;
178         start_dp[idx].addr =
179                 VIRTIO_MBUF_ADDR(cookie, vq) +
180                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
181         start_dp[idx].len =
182                 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
183         start_dp[idx].flags =  VRING_DESC_F_WRITE;
184         idx = start_dp[idx].next;
185         vq->vq_desc_head_idx = idx;
186         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
187                 vq->vq_desc_tail_idx = idx;
188         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
189         vq_update_avail_ring(vq, head_idx);
190
191         return 0;
192 }
193
194 /* When doing TSO, the IP length is not included in the pseudo header
195  * checksum of the packet given to the PMD, but for virtio it is
196  * expected.
197  */
198 static void
199 virtio_tso_fix_cksum(struct rte_mbuf *m)
200 {
201         /* common case: header is not fragmented */
202         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
203                         m->l4_len)) {
204                 struct ipv4_hdr *iph;
205                 struct ipv6_hdr *ip6h;
206                 struct tcp_hdr *th;
207                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
208                 uint32_t tmp;
209
210                 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
211                 th = RTE_PTR_ADD(iph, m->l3_len);
212                 if ((iph->version_ihl >> 4) == 4) {
213                         iph->hdr_checksum = 0;
214                         iph->hdr_checksum = rte_ipv4_cksum(iph);
215                         ip_len = iph->total_length;
216                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
217                                 m->l3_len);
218                 } else {
219                         ip6h = (struct ipv6_hdr *)iph;
220                         ip_paylen = ip6h->payload_len;
221                 }
222
223                 /* calculate the new phdr checksum not including ip_paylen */
224                 prev_cksum = th->cksum;
225                 tmp = prev_cksum;
226                 tmp += ip_paylen;
227                 tmp = (tmp & 0xffff) + (tmp >> 16);
228                 new_cksum = tmp;
229
230                 /* replace it in the packet */
231                 th->cksum = new_cksum;
232         }
233 }
234
235 static inline int
236 tx_offload_enabled(struct virtio_hw *hw)
237 {
238         return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) ||
239                 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
240                 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
241 }
242
243 /* avoid write operation when necessary, to lessen cache issues */
244 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
245         if ((var) != (val))                     \
246                 (var) = (val);                  \
247 } while (0)
248
249 static inline void
250 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
251                         struct rte_mbuf *cookie,
252                         int offload)
253 {
254         if (offload) {
255                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
256                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
257
258                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
259                 case PKT_TX_UDP_CKSUM:
260                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
261                         hdr->csum_offset = offsetof(struct udp_hdr,
262                                 dgram_cksum);
263                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
264                         break;
265
266                 case PKT_TX_TCP_CKSUM:
267                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
268                         hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
269                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
270                         break;
271
272                 default:
273                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
274                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
275                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
276                         break;
277                 }
278
279                 /* TCP Segmentation Offload */
280                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
281                         virtio_tso_fix_cksum(cookie);
282                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
283                                 VIRTIO_NET_HDR_GSO_TCPV6 :
284                                 VIRTIO_NET_HDR_GSO_TCPV4;
285                         hdr->gso_size = cookie->tso_segsz;
286                         hdr->hdr_len =
287                                 cookie->l2_len +
288                                 cookie->l3_len +
289                                 cookie->l4_len;
290                 } else {
291                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
292                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
293                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
294                 }
295         }
296 }
297
298 static inline void
299 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
300                        uint16_t needed, int use_indirect, int can_push)
301 {
302         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
303         struct vq_desc_extra *dxp;
304         struct virtqueue *vq = txvq->vq;
305         struct vring_desc *start_dp;
306         uint16_t seg_num = cookie->nb_segs;
307         uint16_t head_idx, idx;
308         uint16_t head_size = vq->hw->vtnet_hdr_size;
309         struct virtio_net_hdr *hdr;
310         int offload;
311
312         offload = tx_offload_enabled(vq->hw);
313         head_idx = vq->vq_desc_head_idx;
314         idx = head_idx;
315         dxp = &vq->vq_descx[idx];
316         dxp->cookie = (void *)cookie;
317         dxp->ndescs = needed;
318
319         start_dp = vq->vq_ring.desc;
320
321         if (can_push) {
322                 /* prepend cannot fail, checked by caller */
323                 hdr = (struct virtio_net_hdr *)
324                         rte_pktmbuf_prepend(cookie, head_size);
325                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
326                  * which is wrong. Below subtract restores correct pkt size.
327                  */
328                 cookie->pkt_len -= head_size;
329                 /* if offload disabled, it is not zeroed below, do it now */
330                 if (offload == 0) {
331                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
332                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
333                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
334                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
335                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
336                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
337                 }
338         } else if (use_indirect) {
339                 /* setup tx ring slot to point to indirect
340                  * descriptor list stored in reserved region.
341                  *
342                  * the first slot in indirect ring is already preset
343                  * to point to the header in reserved region
344                  */
345                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
346                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
347                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
348                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
349                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
350
351                 /* loop below will fill in rest of the indirect elements */
352                 start_dp = txr[idx].tx_indir;
353                 idx = 1;
354         } else {
355                 /* setup first tx ring slot to point to header
356                  * stored in reserved region.
357                  */
358                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
359                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
360                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
361                 start_dp[idx].flags = VRING_DESC_F_NEXT;
362                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
363
364                 idx = start_dp[idx].next;
365         }
366
367         virtqueue_xmit_offload(hdr, cookie, offload);
368
369         do {
370                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
371                 start_dp[idx].len   = cookie->data_len;
372                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
373                 idx = start_dp[idx].next;
374         } while ((cookie = cookie->next) != NULL);
375
376         if (use_indirect)
377                 idx = vq->vq_ring.desc[head_idx].next;
378
379         vq->vq_desc_head_idx = idx;
380         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
381                 vq->vq_desc_tail_idx = idx;
382         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
383         vq_update_avail_ring(vq, head_idx);
384 }
385
386 void
387 virtio_dev_cq_start(struct rte_eth_dev *dev)
388 {
389         struct virtio_hw *hw = dev->data->dev_private;
390
391         if (hw->cvq && hw->cvq->vq) {
392                 rte_spinlock_init(&hw->cvq->lock);
393                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
394         }
395 }
396
397 int
398 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
399                         uint16_t queue_idx,
400                         uint16_t nb_desc,
401                         unsigned int socket_id __rte_unused,
402                         const struct rte_eth_rxconf *rx_conf __rte_unused,
403                         struct rte_mempool *mp)
404 {
405         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
406         struct virtio_hw *hw = dev->data->dev_private;
407         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
408         struct virtnet_rx *rxvq;
409
410         PMD_INIT_FUNC_TRACE();
411
412         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
413                 nb_desc = vq->vq_nentries;
414         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
415
416         rxvq = &vq->rxq;
417         rxvq->queue_id = queue_idx;
418         rxvq->mpool = mp;
419         if (rxvq->mpool == NULL) {
420                 rte_exit(EXIT_FAILURE,
421                         "Cannot allocate mbufs for rx virtqueue");
422         }
423
424         dev->data->rx_queues[queue_idx] = rxvq;
425
426         return 0;
427 }
428
429 int
430 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
431 {
432         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
433         struct virtio_hw *hw = dev->data->dev_private;
434         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
435         struct virtnet_rx *rxvq = &vq->rxq;
436         struct rte_mbuf *m;
437         uint16_t desc_idx;
438         int error, nbufs;
439
440         PMD_INIT_FUNC_TRACE();
441
442         /* Allocate blank mbufs for the each rx descriptor */
443         nbufs = 0;
444
445         if (hw->use_simple_rx) {
446                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
447                      desc_idx++) {
448                         vq->vq_ring.avail->ring[desc_idx] = desc_idx;
449                         vq->vq_ring.desc[desc_idx].flags =
450                                 VRING_DESC_F_WRITE;
451                 }
452
453                 virtio_rxq_vec_setup(rxvq);
454         }
455
456         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
457         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
458              desc_idx++) {
459                 vq->sw_ring[vq->vq_nentries + desc_idx] =
460                         &rxvq->fake_mbuf;
461         }
462
463         if (hw->use_simple_rx) {
464                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
465                         virtio_rxq_rearm_vec(rxvq);
466                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
467                 }
468         } else {
469                 while (!virtqueue_full(vq)) {
470                         m = rte_mbuf_raw_alloc(rxvq->mpool);
471                         if (m == NULL)
472                                 break;
473
474                         /* Enqueue allocated buffers */
475                         error = virtqueue_enqueue_recv_refill(vq, m);
476                         if (error) {
477                                 rte_pktmbuf_free(m);
478                                 break;
479                         }
480                         nbufs++;
481                 }
482
483                 vq_update_avail_idx(vq);
484         }
485
486         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
487
488         VIRTQUEUE_DUMP(vq);
489
490         return 0;
491 }
492
493 /*
494  * struct rte_eth_dev *dev: Used to update dev
495  * uint16_t nb_desc: Defaults to values read from config space
496  * unsigned int socket_id: Used to allocate memzone
497  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
498  * uint16_t queue_idx: Just used as an index in dev txq list
499  */
500 int
501 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
502                         uint16_t queue_idx,
503                         uint16_t nb_desc,
504                         unsigned int socket_id __rte_unused,
505                         const struct rte_eth_txconf *tx_conf)
506 {
507         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
508         struct virtio_hw *hw = dev->data->dev_private;
509         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
510         struct virtnet_tx *txvq;
511         uint16_t tx_free_thresh;
512
513         PMD_INIT_FUNC_TRACE();
514
515         /* cannot use simple rxtx funcs with multisegs or offloads */
516         if (dev->data->dev_conf.txmode.offloads)
517                 hw->use_simple_tx = 0;
518
519         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
520                 nb_desc = vq->vq_nentries;
521         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
522
523         txvq = &vq->txq;
524         txvq->queue_id = queue_idx;
525
526         tx_free_thresh = tx_conf->tx_free_thresh;
527         if (tx_free_thresh == 0)
528                 tx_free_thresh =
529                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
530
531         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
532                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
533                         "number of TX entries minus 3 (%u)."
534                         " (tx_free_thresh=%u port=%u queue=%u)\n",
535                         vq->vq_nentries - 3,
536                         tx_free_thresh, dev->data->port_id, queue_idx);
537                 return -EINVAL;
538         }
539
540         vq->vq_free_thresh = tx_free_thresh;
541
542         dev->data->tx_queues[queue_idx] = txvq;
543         return 0;
544 }
545
546 int
547 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
548                                 uint16_t queue_idx)
549 {
550         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
551         struct virtio_hw *hw = dev->data->dev_private;
552         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
553         uint16_t mid_idx = vq->vq_nentries >> 1;
554         struct virtnet_tx *txvq = &vq->txq;
555         uint16_t desc_idx;
556
557         PMD_INIT_FUNC_TRACE();
558
559         if (hw->use_simple_tx) {
560                 for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) {
561                         vq->vq_ring.avail->ring[desc_idx] =
562                                 desc_idx + mid_idx;
563                         vq->vq_ring.desc[desc_idx + mid_idx].next =
564                                 desc_idx;
565                         vq->vq_ring.desc[desc_idx + mid_idx].addr =
566                                 txvq->virtio_net_hdr_mem +
567                                 offsetof(struct virtio_tx_region, tx_hdr);
568                         vq->vq_ring.desc[desc_idx + mid_idx].len =
569                                 vq->hw->vtnet_hdr_size;
570                         vq->vq_ring.desc[desc_idx + mid_idx].flags =
571                                 VRING_DESC_F_NEXT;
572                         vq->vq_ring.desc[desc_idx].flags = 0;
573                 }
574                 for (desc_idx = mid_idx; desc_idx < vq->vq_nentries;
575                      desc_idx++)
576                         vq->vq_ring.avail->ring[desc_idx] = desc_idx;
577         }
578
579         VIRTQUEUE_DUMP(vq);
580
581         return 0;
582 }
583
584 static void
585 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
586 {
587         int error;
588         /*
589          * Requeue the discarded mbuf. This should always be
590          * successful since it was just dequeued.
591          */
592         error = virtqueue_enqueue_recv_refill(vq, m);
593         if (unlikely(error)) {
594                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
595                 rte_pktmbuf_free(m);
596         }
597 }
598
599 static void
600 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
601 {
602         uint32_t s = mbuf->pkt_len;
603         struct ether_addr *ea;
604
605         if (s == 64) {
606                 stats->size_bins[1]++;
607         } else if (s > 64 && s < 1024) {
608                 uint32_t bin;
609
610                 /* count zeros, and offset into correct bin */
611                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
612                 stats->size_bins[bin]++;
613         } else {
614                 if (s < 64)
615                         stats->size_bins[0]++;
616                 else if (s < 1519)
617                         stats->size_bins[6]++;
618                 else if (s >= 1519)
619                         stats->size_bins[7]++;
620         }
621
622         ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
623         if (is_multicast_ether_addr(ea)) {
624                 if (is_broadcast_ether_addr(ea))
625                         stats->broadcast++;
626                 else
627                         stats->multicast++;
628         }
629 }
630
631 static inline void
632 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
633 {
634         VIRTIO_DUMP_PACKET(m, m->data_len);
635
636         rxvq->stats.bytes += m->pkt_len;
637         virtio_update_packet_stats(&rxvq->stats, m);
638 }
639
640 /* Optionally fill offload information in structure */
641 static int
642 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
643 {
644         struct rte_net_hdr_lens hdr_lens;
645         uint32_t hdrlen, ptype;
646         int l4_supported = 0;
647
648         /* nothing to do */
649         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
650                 return 0;
651
652         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
653
654         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
655         m->packet_type = ptype;
656         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
657             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
658             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
659                 l4_supported = 1;
660
661         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
662                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
663                 if (hdr->csum_start <= hdrlen && l4_supported) {
664                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
665                 } else {
666                         /* Unknown proto or tunnel, do sw cksum. We can assume
667                          * the cksum field is in the first segment since the
668                          * buffers we provided to the host are large enough.
669                          * In case of SCTP, this will be wrong since it's a CRC
670                          * but there's nothing we can do.
671                          */
672                         uint16_t csum = 0, off;
673
674                         rte_raw_cksum_mbuf(m, hdr->csum_start,
675                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
676                                 &csum);
677                         if (likely(csum != 0xffff))
678                                 csum = ~csum;
679                         off = hdr->csum_offset + hdr->csum_start;
680                         if (rte_pktmbuf_data_len(m) >= off + 1)
681                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
682                                         off) = csum;
683                 }
684         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
685                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
686         }
687
688         /* GSO request, save required information in mbuf */
689         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
690                 /* Check unsupported modes */
691                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
692                     (hdr->gso_size == 0)) {
693                         return -EINVAL;
694                 }
695
696                 /* Update mss lengthes in mbuf */
697                 m->tso_segsz = hdr->gso_size;
698                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
699                         case VIRTIO_NET_HDR_GSO_TCPV4:
700                         case VIRTIO_NET_HDR_GSO_TCPV6:
701                                 m->ol_flags |= PKT_RX_LRO | \
702                                         PKT_RX_L4_CKSUM_NONE;
703                                 break;
704                         default:
705                                 return -EINVAL;
706                 }
707         }
708
709         return 0;
710 }
711
712 static inline int
713 rx_offload_enabled(struct virtio_hw *hw)
714 {
715         return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
716                 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
717                 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
718 }
719
720 #define VIRTIO_MBUF_BURST_SZ 64
721 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
722 uint16_t
723 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
724 {
725         struct virtnet_rx *rxvq = rx_queue;
726         struct virtqueue *vq = rxvq->vq;
727         struct virtio_hw *hw = vq->hw;
728         struct rte_mbuf *rxm, *new_mbuf;
729         uint16_t nb_used, num, nb_rx;
730         uint32_t len[VIRTIO_MBUF_BURST_SZ];
731         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
732         int error;
733         uint32_t i, nb_enqueued;
734         uint32_t hdr_size;
735         int offload;
736         struct virtio_net_hdr *hdr;
737
738         nb_rx = 0;
739         if (unlikely(hw->started == 0))
740                 return nb_rx;
741
742         nb_used = VIRTQUEUE_NUSED(vq);
743
744         virtio_rmb();
745
746         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
747         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
748                 num = VIRTIO_MBUF_BURST_SZ;
749         if (likely(num > DESC_PER_CACHELINE))
750                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
751
752         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
753         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
754
755         nb_enqueued = 0;
756         hdr_size = hw->vtnet_hdr_size;
757         offload = rx_offload_enabled(hw);
758
759         for (i = 0; i < num ; i++) {
760                 rxm = rcv_pkts[i];
761
762                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
763
764                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
765                         PMD_RX_LOG(ERR, "Packet drop");
766                         nb_enqueued++;
767                         virtio_discard_rxbuf(vq, rxm);
768                         rxvq->stats.errors++;
769                         continue;
770                 }
771
772                 rxm->port = rxvq->port_id;
773                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
774                 rxm->ol_flags = 0;
775                 rxm->vlan_tci = 0;
776
777                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
778                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
779
780                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
781                         RTE_PKTMBUF_HEADROOM - hdr_size);
782
783                 if (hw->vlan_strip)
784                         rte_vlan_strip(rxm);
785
786                 if (offload && virtio_rx_offload(rxm, hdr) < 0) {
787                         virtio_discard_rxbuf(vq, rxm);
788                         rxvq->stats.errors++;
789                         continue;
790                 }
791
792                 virtio_rx_stats_updated(rxvq, rxm);
793
794                 rx_pkts[nb_rx++] = rxm;
795         }
796
797         rxvq->stats.packets += nb_rx;
798
799         /* Allocate new mbuf for the used descriptor */
800         error = ENOSPC;
801         while (likely(!virtqueue_full(vq))) {
802                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
803                 if (unlikely(new_mbuf == NULL)) {
804                         struct rte_eth_dev *dev
805                                 = &rte_eth_devices[rxvq->port_id];
806                         dev->data->rx_mbuf_alloc_failed++;
807                         break;
808                 }
809                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
810                 if (unlikely(error)) {
811                         rte_pktmbuf_free(new_mbuf);
812                         break;
813                 }
814                 nb_enqueued++;
815         }
816
817         if (likely(nb_enqueued)) {
818                 vq_update_avail_idx(vq);
819
820                 if (unlikely(virtqueue_kick_prepare(vq))) {
821                         virtqueue_notify(vq);
822                         PMD_RX_LOG(DEBUG, "Notified");
823                 }
824         }
825
826         return nb_rx;
827 }
828
829 uint16_t
830 virtio_recv_mergeable_pkts(void *rx_queue,
831                         struct rte_mbuf **rx_pkts,
832                         uint16_t nb_pkts)
833 {
834         struct virtnet_rx *rxvq = rx_queue;
835         struct virtqueue *vq = rxvq->vq;
836         struct virtio_hw *hw = vq->hw;
837         struct rte_mbuf *rxm, *new_mbuf;
838         uint16_t nb_used, num, nb_rx;
839         uint32_t len[VIRTIO_MBUF_BURST_SZ];
840         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
841         struct rte_mbuf *prev;
842         int error;
843         uint32_t i, nb_enqueued;
844         uint32_t seg_num;
845         uint16_t extra_idx;
846         uint32_t seg_res;
847         uint32_t hdr_size;
848         int offload;
849
850         nb_rx = 0;
851         if (unlikely(hw->started == 0))
852                 return nb_rx;
853
854         nb_used = VIRTQUEUE_NUSED(vq);
855
856         virtio_rmb();
857
858         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
859
860         i = 0;
861         nb_enqueued = 0;
862         seg_num = 0;
863         extra_idx = 0;
864         seg_res = 0;
865         hdr_size = hw->vtnet_hdr_size;
866         offload = rx_offload_enabled(hw);
867
868         while (i < nb_used) {
869                 struct virtio_net_hdr_mrg_rxbuf *header;
870
871                 if (nb_rx == nb_pkts)
872                         break;
873
874                 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1);
875                 if (num != 1)
876                         continue;
877
878                 i++;
879
880                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
881                 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]);
882
883                 rxm = rcv_pkts[0];
884
885                 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
886                         PMD_RX_LOG(ERR, "Packet drop");
887                         nb_enqueued++;
888                         virtio_discard_rxbuf(vq, rxm);
889                         rxvq->stats.errors++;
890                         continue;
891                 }
892
893                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
894                         RTE_PKTMBUF_HEADROOM - hdr_size);
895                 seg_num = header->num_buffers;
896
897                 if (seg_num == 0)
898                         seg_num = 1;
899
900                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
901                 rxm->nb_segs = seg_num;
902                 rxm->ol_flags = 0;
903                 rxm->vlan_tci = 0;
904                 rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
905                 rxm->data_len = (uint16_t)(len[0] - hdr_size);
906
907                 rxm->port = rxvq->port_id;
908                 rx_pkts[nb_rx] = rxm;
909                 prev = rxm;
910
911                 if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) {
912                         virtio_discard_rxbuf(vq, rxm);
913                         rxvq->stats.errors++;
914                         continue;
915                 }
916
917                 seg_res = seg_num - 1;
918
919                 while (seg_res != 0) {
920                         /*
921                          * Get extra segments for current uncompleted packet.
922                          */
923                         uint16_t  rcv_cnt =
924                                 RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
925                         if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
926                                 uint32_t rx_num =
927                                         virtqueue_dequeue_burst_rx(vq,
928                                         rcv_pkts, len, rcv_cnt);
929                                 i += rx_num;
930                                 rcv_cnt = rx_num;
931                         } else {
932                                 PMD_RX_LOG(ERR,
933                                            "No enough segments for packet.");
934                                 nb_enqueued++;
935                                 virtio_discard_rxbuf(vq, rxm);
936                                 rxvq->stats.errors++;
937                                 break;
938                         }
939
940                         extra_idx = 0;
941
942                         while (extra_idx < rcv_cnt) {
943                                 rxm = rcv_pkts[extra_idx];
944
945                                 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
946                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
947                                 rxm->data_len = (uint16_t)(len[extra_idx]);
948
949                                 if (prev)
950                                         prev->next = rxm;
951
952                                 prev = rxm;
953                                 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
954                                 extra_idx++;
955                         };
956                         seg_res -= rcv_cnt;
957                 }
958
959                 if (hw->vlan_strip)
960                         rte_vlan_strip(rx_pkts[nb_rx]);
961
962                 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
963                         rx_pkts[nb_rx]->data_len);
964
965                 rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len;
966                 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]);
967                 nb_rx++;
968         }
969
970         rxvq->stats.packets += nb_rx;
971
972         /* Allocate new mbuf for the used descriptor */
973         error = ENOSPC;
974         while (likely(!virtqueue_full(vq))) {
975                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
976                 if (unlikely(new_mbuf == NULL)) {
977                         struct rte_eth_dev *dev
978                                 = &rte_eth_devices[rxvq->port_id];
979                         dev->data->rx_mbuf_alloc_failed++;
980                         break;
981                 }
982                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
983                 if (unlikely(error)) {
984                         rte_pktmbuf_free(new_mbuf);
985                         break;
986                 }
987                 nb_enqueued++;
988         }
989
990         if (likely(nb_enqueued)) {
991                 vq_update_avail_idx(vq);
992
993                 if (unlikely(virtqueue_kick_prepare(vq))) {
994                         virtqueue_notify(vq);
995                         PMD_RX_LOG(DEBUG, "Notified");
996                 }
997         }
998
999         return nb_rx;
1000 }
1001
1002 uint16_t
1003 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1004 {
1005         struct virtnet_tx *txvq = tx_queue;
1006         struct virtqueue *vq = txvq->vq;
1007         struct virtio_hw *hw = vq->hw;
1008         uint16_t hdr_size = hw->vtnet_hdr_size;
1009         uint16_t nb_used, nb_tx = 0;
1010         int error;
1011
1012         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1013                 return nb_tx;
1014
1015         if (unlikely(nb_pkts < 1))
1016                 return nb_pkts;
1017
1018         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1019         nb_used = VIRTQUEUE_NUSED(vq);
1020
1021         virtio_rmb();
1022         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1023                 virtio_xmit_cleanup(vq, nb_used);
1024
1025         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1026                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1027                 int can_push = 0, use_indirect = 0, slots, need;
1028
1029                 /* Do VLAN tag insertion */
1030                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1031                         error = rte_vlan_insert(&txm);
1032                         if (unlikely(error)) {
1033                                 rte_pktmbuf_free(txm);
1034                                 continue;
1035                         }
1036                 }
1037
1038                 /* optimize ring usage */
1039                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1040                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1041                     rte_mbuf_refcnt_read(txm) == 1 &&
1042                     RTE_MBUF_DIRECT(txm) &&
1043                     txm->nb_segs == 1 &&
1044                     rte_pktmbuf_headroom(txm) >= hdr_size &&
1045                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1046                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1047                         can_push = 1;
1048                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
1049                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
1050                         use_indirect = 1;
1051
1052                 /* How many main ring entries are needed to this Tx?
1053                  * any_layout => number of segments
1054                  * indirect   => 1
1055                  * default    => number of segments + 1
1056                  */
1057                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
1058                 need = slots - vq->vq_free_cnt;
1059
1060                 /* Positive value indicates it need free vring descriptors */
1061                 if (unlikely(need > 0)) {
1062                         nb_used = VIRTQUEUE_NUSED(vq);
1063                         virtio_rmb();
1064                         need = RTE_MIN(need, (int)nb_used);
1065
1066                         virtio_xmit_cleanup(vq, need);
1067                         need = slots - vq->vq_free_cnt;
1068                         if (unlikely(need > 0)) {
1069                                 PMD_TX_LOG(ERR,
1070                                            "No free tx descriptors to transmit");
1071                                 break;
1072                         }
1073                 }
1074
1075                 /* Enqueue Packet buffers */
1076                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push);
1077
1078                 txvq->stats.bytes += txm->pkt_len;
1079                 virtio_update_packet_stats(&txvq->stats, txm);
1080         }
1081
1082         txvq->stats.packets += nb_tx;
1083
1084         if (likely(nb_tx)) {
1085                 vq_update_avail_idx(vq);
1086
1087                 if (unlikely(virtqueue_kick_prepare(vq))) {
1088                         virtqueue_notify(vq);
1089                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1090                 }
1091         }
1092
1093         return nb_tx;
1094 }