fc0d84bd38f580c982187009f78ad942ada9240d
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <errno.h>
39
40 #include <rte_cycles.h>
41 #include <rte_memory.h>
42 #include <rte_memzone.h>
43 #include <rte_branch_prediction.h>
44 #include <rte_mempool.h>
45 #include <rte_malloc.h>
46 #include <rte_mbuf.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_prefetch.h>
50 #include <rte_string_fns.h>
51 #include <rte_errno.h>
52 #include <rte_byteorder.h>
53 #include <rte_cpuflags.h>
54 #include <rte_net.h>
55 #include <rte_ip.h>
56
57 #include "virtio_logs.h"
58 #include "virtio_ethdev.h"
59 #include "virtio_pci.h"
60 #include "virtqueue.h"
61 #include "virtio_rxtx.h"
62
63 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
64 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
65 #else
66 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
67 #endif
68
69
70 #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
71         ETH_TXQ_FLAGS_NOOFFLOADS)
72
73 static void
74 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
75 {
76         struct vring_desc *dp, *dp_tail;
77         struct vq_desc_extra *dxp;
78         uint16_t desc_idx_last = desc_idx;
79
80         dp  = &vq->vq_ring.desc[desc_idx];
81         dxp = &vq->vq_descx[desc_idx];
82         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
83         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
84                 while (dp->flags & VRING_DESC_F_NEXT) {
85                         desc_idx_last = dp->next;
86                         dp = &vq->vq_ring.desc[dp->next];
87                 }
88         }
89         dxp->ndescs = 0;
90
91         /*
92          * We must append the existing free chain, if any, to the end of
93          * newly freed chain. If the virtqueue was completely used, then
94          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
95          */
96         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
97                 vq->vq_desc_head_idx = desc_idx;
98         } else {
99                 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
100                 dp_tail->next = desc_idx;
101         }
102
103         vq->vq_desc_tail_idx = desc_idx_last;
104         dp->next = VQ_RING_DESC_CHAIN_END;
105 }
106
107 static uint16_t
108 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
109                            uint32_t *len, uint16_t num)
110 {
111         struct vring_used_elem *uep;
112         struct rte_mbuf *cookie;
113         uint16_t used_idx, desc_idx;
114         uint16_t i;
115
116         /*  Caller does the check */
117         for (i = 0; i < num ; i++) {
118                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
119                 uep = &vq->vq_ring.used->ring[used_idx];
120                 desc_idx = (uint16_t) uep->id;
121                 len[i] = uep->len;
122                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
123
124                 if (unlikely(cookie == NULL)) {
125                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n",
126                                 vq->vq_used_cons_idx);
127                         break;
128                 }
129
130                 rte_prefetch0(cookie);
131                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
132                 rx_pkts[i]  = cookie;
133                 vq->vq_used_cons_idx++;
134                 vq_ring_free_chain(vq, desc_idx);
135                 vq->vq_descx[desc_idx].cookie = NULL;
136         }
137
138         return i;
139 }
140
141 #ifndef DEFAULT_TX_FREE_THRESH
142 #define DEFAULT_TX_FREE_THRESH 32
143 #endif
144
145 /* Cleanup from completed transmits. */
146 static void
147 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
148 {
149         uint16_t i, used_idx, desc_idx;
150         for (i = 0; i < num; i++) {
151                 struct vring_used_elem *uep;
152                 struct vq_desc_extra *dxp;
153
154                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
155                 uep = &vq->vq_ring.used->ring[used_idx];
156
157                 desc_idx = (uint16_t) uep->id;
158                 dxp = &vq->vq_descx[desc_idx];
159                 vq->vq_used_cons_idx++;
160                 vq_ring_free_chain(vq, desc_idx);
161
162                 if (dxp->cookie != NULL) {
163                         rte_pktmbuf_free(dxp->cookie);
164                         dxp->cookie = NULL;
165                 }
166         }
167 }
168
169
170 static inline int
171 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
172 {
173         struct vq_desc_extra *dxp;
174         struct virtio_hw *hw = vq->hw;
175         struct vring_desc *start_dp;
176         uint16_t needed = 1;
177         uint16_t head_idx, idx;
178
179         if (unlikely(vq->vq_free_cnt == 0))
180                 return -ENOSPC;
181         if (unlikely(vq->vq_free_cnt < needed))
182                 return -EMSGSIZE;
183
184         head_idx = vq->vq_desc_head_idx;
185         if (unlikely(head_idx >= vq->vq_nentries))
186                 return -EFAULT;
187
188         idx = head_idx;
189         dxp = &vq->vq_descx[idx];
190         dxp->cookie = (void *)cookie;
191         dxp->ndescs = needed;
192
193         start_dp = vq->vq_ring.desc;
194         start_dp[idx].addr =
195                 VIRTIO_MBUF_ADDR(cookie, vq) +
196                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
197         start_dp[idx].len =
198                 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
199         start_dp[idx].flags =  VRING_DESC_F_WRITE;
200         idx = start_dp[idx].next;
201         vq->vq_desc_head_idx = idx;
202         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
203                 vq->vq_desc_tail_idx = idx;
204         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
205         vq_update_avail_ring(vq, head_idx);
206
207         return 0;
208 }
209
210 static inline void
211 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
212                        uint16_t needed, int use_indirect, int can_push)
213 {
214         struct vq_desc_extra *dxp;
215         struct virtqueue *vq = txvq->vq;
216         struct vring_desc *start_dp;
217         uint16_t seg_num = cookie->nb_segs;
218         uint16_t head_idx, idx;
219         uint16_t head_size = vq->hw->vtnet_hdr_size;
220         unsigned long offs;
221
222         head_idx = vq->vq_desc_head_idx;
223         idx = head_idx;
224         dxp = &vq->vq_descx[idx];
225         dxp->cookie = (void *)cookie;
226         dxp->ndescs = needed;
227
228         start_dp = vq->vq_ring.desc;
229
230         if (can_push) {
231                 /* put on zero'd transmit header (no offloads) */
232                 void *hdr = rte_pktmbuf_prepend(cookie, head_size);
233
234                 memset(hdr, 0, head_size);
235         } else if (use_indirect) {
236                 /* setup tx ring slot to point to indirect
237                  * descriptor list stored in reserved region.
238                  *
239                  * the first slot in indirect ring is already preset
240                  * to point to the header in reserved region
241                  */
242                 struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
243
244                 offs = idx * sizeof(struct virtio_tx_region)
245                         + offsetof(struct virtio_tx_region, tx_indir);
246
247                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem + offs;
248                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
249                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
250
251                 /* loop below will fill in rest of the indirect elements */
252                 start_dp = txr[idx].tx_indir;
253                 idx = 1;
254         } else {
255                 /* setup first tx ring slot to point to header
256                  * stored in reserved region.
257                  */
258                 offs = idx * sizeof(struct virtio_tx_region)
259                         + offsetof(struct virtio_tx_region, tx_hdr);
260
261                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem + offs;
262                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
263                 start_dp[idx].flags = VRING_DESC_F_NEXT;
264                 idx = start_dp[idx].next;
265         }
266
267         do {
268                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
269                 start_dp[idx].len   = cookie->data_len;
270                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
271                 idx = start_dp[idx].next;
272         } while ((cookie = cookie->next) != NULL);
273
274         if (use_indirect)
275                 idx = vq->vq_ring.desc[head_idx].next;
276
277         vq->vq_desc_head_idx = idx;
278         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
279                 vq->vq_desc_tail_idx = idx;
280         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
281         vq_update_avail_ring(vq, head_idx);
282 }
283
284 static void
285 virtio_dev_vring_start(struct virtqueue *vq)
286 {
287         int size = vq->vq_nentries;
288         struct vring *vr = &vq->vq_ring;
289         uint8_t *ring_mem = vq->vq_ring_virt_mem;
290
291         PMD_INIT_FUNC_TRACE();
292
293         /*
294          * Reinitialise since virtio port might have been stopped and restarted
295          */
296         memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
297         vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
298         vq->vq_used_cons_idx = 0;
299         vq->vq_desc_head_idx = 0;
300         vq->vq_avail_idx = 0;
301         vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
302         vq->vq_free_cnt = vq->vq_nentries;
303         memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
304
305         vring_desc_init(vr->desc, size);
306
307         /*
308          * Disable device(host) interrupting guest
309          */
310         virtqueue_disable_intr(vq);
311 }
312
313 void
314 virtio_dev_cq_start(struct rte_eth_dev *dev)
315 {
316         struct virtio_hw *hw = dev->data->dev_private;
317
318         if (hw->cvq && hw->cvq->vq) {
319                 virtio_dev_vring_start(hw->cvq->vq);
320                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
321         }
322 }
323
324 void
325 virtio_dev_rxtx_start(struct rte_eth_dev *dev)
326 {
327         /*
328          * Start receive and transmit vrings
329          * -    Setup vring structure for all queues
330          * -    Initialize descriptor for the rx vring
331          * -    Allocate blank mbufs for the each rx descriptor
332          *
333          */
334         uint16_t i;
335         uint16_t desc_idx;
336         struct virtio_hw *hw = dev->data->dev_private;
337
338         PMD_INIT_FUNC_TRACE();
339
340         /* Start rx vring. */
341         for (i = 0; i < dev->data->nb_rx_queues; i++) {
342                 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
343                 struct virtqueue *vq = rxvq->vq;
344                 int error, nbufs;
345                 struct rte_mbuf *m;
346
347                 virtio_dev_vring_start(vq);
348                 if (rxvq->mpool == NULL) {
349                         rte_exit(EXIT_FAILURE,
350                                 "Cannot allocate mbufs for rx virtqueue");
351                 }
352
353                 /* Allocate blank mbufs for the each rx descriptor */
354                 nbufs = 0;
355                 error = ENOSPC;
356
357                 if (hw->use_simple_rxtx) {
358                         for (desc_idx = 0; desc_idx < vq->vq_nentries;
359                              desc_idx++) {
360                                 vq->vq_ring.avail->ring[desc_idx] = desc_idx;
361                                 vq->vq_ring.desc[desc_idx].flags =
362                                         VRING_DESC_F_WRITE;
363                         }
364                 }
365
366                 memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
367                 for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
368                      desc_idx++) {
369                         vq->sw_ring[vq->vq_nentries + desc_idx] =
370                                 &rxvq->fake_mbuf;
371                 }
372
373                 while (!virtqueue_full(vq)) {
374                         m = rte_mbuf_raw_alloc(rxvq->mpool);
375                         if (m == NULL)
376                                 break;
377
378                         /******************************************
379                         *         Enqueue allocated buffers        *
380                         *******************************************/
381                         if (hw->use_simple_rxtx)
382                                 error = virtqueue_enqueue_recv_refill_simple(vq, m);
383                         else
384                                 error = virtqueue_enqueue_recv_refill(vq, m);
385
386                         if (error) {
387                                 rte_pktmbuf_free(m);
388                                 break;
389                         }
390                         nbufs++;
391                 }
392
393                 vq_update_avail_idx(vq);
394
395                 PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
396
397                 VIRTQUEUE_DUMP(vq);
398         }
399
400         /* Start tx vring. */
401         for (i = 0; i < dev->data->nb_tx_queues; i++) {
402                 struct virtnet_tx *txvq = dev->data->tx_queues[i];
403                 struct virtqueue *vq = txvq->vq;
404
405                 virtio_dev_vring_start(vq);
406                 if (hw->use_simple_rxtx) {
407                         uint16_t mid_idx  = vq->vq_nentries >> 1;
408
409                         for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) {
410                                 vq->vq_ring.avail->ring[desc_idx] =
411                                         desc_idx + mid_idx;
412                                 vq->vq_ring.desc[desc_idx + mid_idx].next =
413                                         desc_idx;
414                                 vq->vq_ring.desc[desc_idx + mid_idx].addr =
415                                         txvq->virtio_net_hdr_mem +
416                                         offsetof(struct virtio_tx_region, tx_hdr);
417                                 vq->vq_ring.desc[desc_idx + mid_idx].len =
418                                         vq->hw->vtnet_hdr_size;
419                                 vq->vq_ring.desc[desc_idx + mid_idx].flags =
420                                         VRING_DESC_F_NEXT;
421                                 vq->vq_ring.desc[desc_idx].flags = 0;
422                         }
423                         for (desc_idx = mid_idx; desc_idx < vq->vq_nentries;
424                              desc_idx++)
425                                 vq->vq_ring.avail->ring[desc_idx] = desc_idx;
426                 }
427
428                 VIRTQUEUE_DUMP(vq);
429         }
430 }
431
432 int
433 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
434                         uint16_t queue_idx,
435                         uint16_t nb_desc,
436                         unsigned int socket_id,
437                         __rte_unused const struct rte_eth_rxconf *rx_conf,
438                         struct rte_mempool *mp)
439 {
440         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
441         struct virtnet_rx *rxvq;
442         int ret;
443
444         PMD_INIT_FUNC_TRACE();
445         ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx,
446                         nb_desc, socket_id, (void **)&rxvq);
447         if (ret < 0) {
448                 PMD_INIT_LOG(ERR, "rvq initialization failed");
449                 return ret;
450         }
451
452         /* Create mempool for rx mbuf allocation */
453         rxvq->mpool = mp;
454
455         dev->data->rx_queues[queue_idx] = rxvq;
456
457         virtio_rxq_vec_setup(rxvq);
458
459         return 0;
460 }
461
462 void
463 virtio_dev_rx_queue_release(void *rxq)
464 {
465         struct virtnet_rx *rxvq = rxq;
466         struct virtqueue *vq;
467         const struct rte_memzone *mz;
468
469         if (rxvq == NULL)
470                 return;
471
472         /*
473          * rxvq is freed when vq is freed, and as mz should be freed after the
474          * del_queue, so we reserve the mz pointer first.
475          */
476         vq = rxvq->vq;
477         mz = rxvq->mz;
478
479         virtio_dev_queue_release(vq);
480         rte_memzone_free(mz);
481 }
482
483 static void
484 virtio_update_rxtx_handler(struct rte_eth_dev *dev,
485                            const struct rte_eth_txconf *tx_conf)
486 {
487         uint8_t use_simple_rxtx = 0;
488         struct virtio_hw *hw = dev->data->dev_private;
489
490 #if defined RTE_ARCH_X86
491         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE3))
492                 use_simple_rxtx = 1;
493 #elif defined RTE_ARCH_ARM64 || defined CONFIG_RTE_ARCH_ARM
494         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
495                 use_simple_rxtx = 1;
496 #endif
497         /* Use simple rx/tx func if single segment and no offloads */
498         if (use_simple_rxtx &&
499             (tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
500             !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
501                 PMD_INIT_LOG(INFO, "Using simple rx/tx path");
502                 dev->tx_pkt_burst = virtio_xmit_pkts_simple;
503                 dev->rx_pkt_burst = virtio_recv_pkts_vec;
504                 hw->use_simple_rxtx = use_simple_rxtx;
505         }
506 }
507
508 /*
509  * struct rte_eth_dev *dev: Used to update dev
510  * uint16_t nb_desc: Defaults to values read from config space
511  * unsigned int socket_id: Used to allocate memzone
512  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
513  * uint16_t queue_idx: Just used as an index in dev txq list
514  */
515 int
516 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
517                         uint16_t queue_idx,
518                         uint16_t nb_desc,
519                         unsigned int socket_id,
520                         const struct rte_eth_txconf *tx_conf)
521 {
522         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
523         struct virtnet_tx *txvq;
524         struct virtqueue *vq;
525         uint16_t tx_free_thresh;
526         int ret;
527
528         PMD_INIT_FUNC_TRACE();
529
530         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS)
531             != ETH_TXQ_FLAGS_NOXSUMS) {
532                 PMD_INIT_LOG(ERR, "TX checksum offload not supported\n");
533                 return -EINVAL;
534         }
535
536         virtio_update_rxtx_handler(dev, tx_conf);
537
538         ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
539                         nb_desc, socket_id, (void **)&txvq);
540         if (ret < 0) {
541                 PMD_INIT_LOG(ERR, "tvq initialization failed");
542                 return ret;
543         }
544         vq = txvq->vq;
545
546         tx_free_thresh = tx_conf->tx_free_thresh;
547         if (tx_free_thresh == 0)
548                 tx_free_thresh =
549                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
550
551         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
552                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
553                         "number of TX entries minus 3 (%u)."
554                         " (tx_free_thresh=%u port=%u queue=%u)\n",
555                         vq->vq_nentries - 3,
556                         tx_free_thresh, dev->data->port_id, queue_idx);
557                 return -EINVAL;
558         }
559
560         vq->vq_free_thresh = tx_free_thresh;
561
562         dev->data->tx_queues[queue_idx] = txvq;
563         return 0;
564 }
565
566 void
567 virtio_dev_tx_queue_release(void *txq)
568 {
569         struct virtnet_tx *txvq = txq;
570         struct virtqueue *vq;
571         const struct rte_memzone *mz;
572         const struct rte_memzone *hdr_mz;
573
574         if (txvq == NULL)
575                 return;
576
577         /*
578          * txvq is freed when vq is freed, and as mz should be freed after the
579          * del_queue, so we reserve the mz pointer first.
580          */
581         vq = txvq->vq;
582         mz = txvq->mz;
583         hdr_mz = txvq->virtio_net_hdr_mz;
584
585         virtio_dev_queue_release(vq);
586         rte_memzone_free(mz);
587         rte_memzone_free(hdr_mz);
588 }
589
590 static void
591 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
592 {
593         int error;
594         /*
595          * Requeue the discarded mbuf. This should always be
596          * successful since it was just dequeued.
597          */
598         error = virtqueue_enqueue_recv_refill(vq, m);
599         if (unlikely(error)) {
600                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
601                 rte_pktmbuf_free(m);
602         }
603 }
604
605 static void
606 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
607 {
608         uint32_t s = mbuf->pkt_len;
609         struct ether_addr *ea;
610
611         if (s == 64) {
612                 stats->size_bins[1]++;
613         } else if (s > 64 && s < 1024) {
614                 uint32_t bin;
615
616                 /* count zeros, and offset into correct bin */
617                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
618                 stats->size_bins[bin]++;
619         } else {
620                 if (s < 64)
621                         stats->size_bins[0]++;
622                 else if (s < 1519)
623                         stats->size_bins[6]++;
624                 else if (s >= 1519)
625                         stats->size_bins[7]++;
626         }
627
628         ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
629         if (is_multicast_ether_addr(ea)) {
630                 if (is_broadcast_ether_addr(ea))
631                         stats->broadcast++;
632                 else
633                         stats->multicast++;
634         }
635 }
636
637 /* Optionally fill offload information in structure */
638 static int
639 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
640 {
641         struct rte_net_hdr_lens hdr_lens;
642         uint32_t hdrlen, ptype;
643         int l4_supported = 0;
644
645         /* nothing to do */
646         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
647                 return 0;
648
649         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
650
651         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
652         m->packet_type = ptype;
653         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
654             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
655             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
656                 l4_supported = 1;
657
658         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
659                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
660                 if (hdr->csum_start <= hdrlen && l4_supported) {
661                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
662                 } else {
663                         /* Unknown proto or tunnel, do sw cksum. We can assume
664                          * the cksum field is in the first segment since the
665                          * buffers we provided to the host are large enough.
666                          * In case of SCTP, this will be wrong since it's a CRC
667                          * but there's nothing we can do.
668                          */
669                         uint16_t csum, off;
670
671                         rte_raw_cksum_mbuf(m, hdr->csum_start,
672                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
673                                 &csum);
674                         if (likely(csum != 0xffff))
675                                 csum = ~csum;
676                         off = hdr->csum_offset + hdr->csum_start;
677                         if (rte_pktmbuf_data_len(m) >= off + 1)
678                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
679                                         off) = csum;
680                 }
681         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
682                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
683         }
684
685         return 0;
686 }
687
688 static inline int
689 rx_offload_enabled(struct virtio_hw *hw)
690 {
691         return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM);
692 }
693
694 #define VIRTIO_MBUF_BURST_SZ 64
695 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
696 uint16_t
697 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
698 {
699         struct virtnet_rx *rxvq = rx_queue;
700         struct virtqueue *vq = rxvq->vq;
701         struct virtio_hw *hw;
702         struct rte_mbuf *rxm, *new_mbuf;
703         uint16_t nb_used, num, nb_rx;
704         uint32_t len[VIRTIO_MBUF_BURST_SZ];
705         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
706         int error;
707         uint32_t i, nb_enqueued;
708         uint32_t hdr_size;
709         int offload;
710         struct virtio_net_hdr *hdr;
711
712         nb_used = VIRTQUEUE_NUSED(vq);
713
714         virtio_rmb();
715
716         num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
717         num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
718         if (likely(num > DESC_PER_CACHELINE))
719                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
720
721         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
722         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
723
724         hw = vq->hw;
725         nb_rx = 0;
726         nb_enqueued = 0;
727         hdr_size = hw->vtnet_hdr_size;
728         offload = rx_offload_enabled(hw);
729
730         for (i = 0; i < num ; i++) {
731                 rxm = rcv_pkts[i];
732
733                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
734
735                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
736                         PMD_RX_LOG(ERR, "Packet drop");
737                         nb_enqueued++;
738                         virtio_discard_rxbuf(vq, rxm);
739                         rxvq->stats.errors++;
740                         continue;
741                 }
742
743                 rxm->port = rxvq->port_id;
744                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
745                 rxm->ol_flags = 0;
746                 rxm->vlan_tci = 0;
747
748                 rxm->nb_segs = 1;
749                 rxm->next = NULL;
750                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
751                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
752
753                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
754                         RTE_PKTMBUF_HEADROOM - hdr_size);
755
756                 if (hw->vlan_strip)
757                         rte_vlan_strip(rxm);
758
759                 if (offload && virtio_rx_offload(rxm, hdr) < 0) {
760                         virtio_discard_rxbuf(vq, rxm);
761                         rxvq->stats.errors++;
762                         continue;
763                 }
764
765                 VIRTIO_DUMP_PACKET(rxm, rxm->data_len);
766
767                 rx_pkts[nb_rx++] = rxm;
768
769                 rxvq->stats.bytes += rx_pkts[nb_rx - 1]->pkt_len;
770                 virtio_update_packet_stats(&rxvq->stats, rxm);
771         }
772
773         rxvq->stats.packets += nb_rx;
774
775         /* Allocate new mbuf for the used descriptor */
776         error = ENOSPC;
777         while (likely(!virtqueue_full(vq))) {
778                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
779                 if (unlikely(new_mbuf == NULL)) {
780                         struct rte_eth_dev *dev
781                                 = &rte_eth_devices[rxvq->port_id];
782                         dev->data->rx_mbuf_alloc_failed++;
783                         break;
784                 }
785                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
786                 if (unlikely(error)) {
787                         rte_pktmbuf_free(new_mbuf);
788                         break;
789                 }
790                 nb_enqueued++;
791         }
792
793         if (likely(nb_enqueued)) {
794                 vq_update_avail_idx(vq);
795
796                 if (unlikely(virtqueue_kick_prepare(vq))) {
797                         virtqueue_notify(vq);
798                         PMD_RX_LOG(DEBUG, "Notified");
799                 }
800         }
801
802         return nb_rx;
803 }
804
805 uint16_t
806 virtio_recv_mergeable_pkts(void *rx_queue,
807                         struct rte_mbuf **rx_pkts,
808                         uint16_t nb_pkts)
809 {
810         struct virtnet_rx *rxvq = rx_queue;
811         struct virtqueue *vq = rxvq->vq;
812         struct virtio_hw *hw;
813         struct rte_mbuf *rxm, *new_mbuf;
814         uint16_t nb_used, num, nb_rx;
815         uint32_t len[VIRTIO_MBUF_BURST_SZ];
816         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
817         struct rte_mbuf *prev;
818         int error;
819         uint32_t i, nb_enqueued;
820         uint32_t seg_num;
821         uint16_t extra_idx;
822         uint32_t seg_res;
823         uint32_t hdr_size;
824         int offload;
825
826         nb_used = VIRTQUEUE_NUSED(vq);
827
828         virtio_rmb();
829
830         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
831
832         hw = vq->hw;
833         nb_rx = 0;
834         i = 0;
835         nb_enqueued = 0;
836         seg_num = 0;
837         extra_idx = 0;
838         seg_res = 0;
839         hdr_size = hw->vtnet_hdr_size;
840         offload = rx_offload_enabled(hw);
841
842         while (i < nb_used) {
843                 struct virtio_net_hdr_mrg_rxbuf *header;
844
845                 if (nb_rx == nb_pkts)
846                         break;
847
848                 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1);
849                 if (num != 1)
850                         continue;
851
852                 i++;
853
854                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
855                 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]);
856
857                 rxm = rcv_pkts[0];
858
859                 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
860                         PMD_RX_LOG(ERR, "Packet drop");
861                         nb_enqueued++;
862                         virtio_discard_rxbuf(vq, rxm);
863                         rxvq->stats.errors++;
864                         continue;
865                 }
866
867                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
868                         RTE_PKTMBUF_HEADROOM - hdr_size);
869                 seg_num = header->num_buffers;
870
871                 if (seg_num == 0)
872                         seg_num = 1;
873
874                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
875                 rxm->nb_segs = seg_num;
876                 rxm->next = NULL;
877                 rxm->ol_flags = 0;
878                 rxm->vlan_tci = 0;
879                 rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
880                 rxm->data_len = (uint16_t)(len[0] - hdr_size);
881
882                 rxm->port = rxvq->port_id;
883                 rx_pkts[nb_rx] = rxm;
884                 prev = rxm;
885
886                 if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) {
887                         virtio_discard_rxbuf(vq, rxm);
888                         rxvq->stats.errors++;
889                         continue;
890                 }
891
892                 seg_res = seg_num - 1;
893
894                 while (seg_res != 0) {
895                         /*
896                          * Get extra segments for current uncompleted packet.
897                          */
898                         uint16_t  rcv_cnt =
899                                 RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
900                         if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
901                                 uint32_t rx_num =
902                                         virtqueue_dequeue_burst_rx(vq,
903                                         rcv_pkts, len, rcv_cnt);
904                                 i += rx_num;
905                                 rcv_cnt = rx_num;
906                         } else {
907                                 PMD_RX_LOG(ERR,
908                                            "No enough segments for packet.");
909                                 nb_enqueued++;
910                                 virtio_discard_rxbuf(vq, rxm);
911                                 rxvq->stats.errors++;
912                                 break;
913                         }
914
915                         extra_idx = 0;
916
917                         while (extra_idx < rcv_cnt) {
918                                 rxm = rcv_pkts[extra_idx];
919
920                                 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
921                                 rxm->next = NULL;
922                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
923                                 rxm->data_len = (uint16_t)(len[extra_idx]);
924
925                                 if (prev)
926                                         prev->next = rxm;
927
928                                 prev = rxm;
929                                 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
930                                 extra_idx++;
931                         };
932                         seg_res -= rcv_cnt;
933                 }
934
935                 if (hw->vlan_strip)
936                         rte_vlan_strip(rx_pkts[nb_rx]);
937
938                 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
939                         rx_pkts[nb_rx]->data_len);
940
941                 rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len;
942                 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]);
943                 nb_rx++;
944         }
945
946         rxvq->stats.packets += nb_rx;
947
948         /* Allocate new mbuf for the used descriptor */
949         error = ENOSPC;
950         while (likely(!virtqueue_full(vq))) {
951                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
952                 if (unlikely(new_mbuf == NULL)) {
953                         struct rte_eth_dev *dev
954                                 = &rte_eth_devices[rxvq->port_id];
955                         dev->data->rx_mbuf_alloc_failed++;
956                         break;
957                 }
958                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
959                 if (unlikely(error)) {
960                         rte_pktmbuf_free(new_mbuf);
961                         break;
962                 }
963                 nb_enqueued++;
964         }
965
966         if (likely(nb_enqueued)) {
967                 vq_update_avail_idx(vq);
968
969                 if (unlikely(virtqueue_kick_prepare(vq))) {
970                         virtqueue_notify(vq);
971                         PMD_RX_LOG(DEBUG, "Notified");
972                 }
973         }
974
975         return nb_rx;
976 }
977
978 uint16_t
979 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
980 {
981         struct virtnet_tx *txvq = tx_queue;
982         struct virtqueue *vq = txvq->vq;
983         struct virtio_hw *hw = vq->hw;
984         uint16_t hdr_size = hw->vtnet_hdr_size;
985         uint16_t nb_used, nb_tx;
986         int error;
987
988         if (unlikely(nb_pkts < 1))
989                 return nb_pkts;
990
991         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
992         nb_used = VIRTQUEUE_NUSED(vq);
993
994         virtio_rmb();
995         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
996                 virtio_xmit_cleanup(vq, nb_used);
997
998         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
999                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1000                 int can_push = 0, use_indirect = 0, slots, need;
1001
1002                 /* Do VLAN tag insertion */
1003                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1004                         error = rte_vlan_insert(&txm);
1005                         if (unlikely(error)) {
1006                                 rte_pktmbuf_free(txm);
1007                                 continue;
1008                         }
1009                 }
1010
1011                 /* optimize ring usage */
1012                 if (vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) &&
1013                     rte_mbuf_refcnt_read(txm) == 1 &&
1014                     RTE_MBUF_DIRECT(txm) &&
1015                     txm->nb_segs == 1 &&
1016                     rte_pktmbuf_headroom(txm) >= hdr_size &&
1017                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1018                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1019                         can_push = 1;
1020                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
1021                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
1022                         use_indirect = 1;
1023
1024                 /* How many main ring entries are needed to this Tx?
1025                  * any_layout => number of segments
1026                  * indirect   => 1
1027                  * default    => number of segments + 1
1028                  */
1029                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
1030                 need = slots - vq->vq_free_cnt;
1031
1032                 /* Positive value indicates it need free vring descriptors */
1033                 if (unlikely(need > 0)) {
1034                         nb_used = VIRTQUEUE_NUSED(vq);
1035                         virtio_rmb();
1036                         need = RTE_MIN(need, (int)nb_used);
1037
1038                         virtio_xmit_cleanup(vq, need);
1039                         need = slots - vq->vq_free_cnt;
1040                         if (unlikely(need > 0)) {
1041                                 PMD_TX_LOG(ERR,
1042                                            "No free tx descriptors to transmit");
1043                                 break;
1044                         }
1045                 }
1046
1047                 /* Enqueue Packet buffers */
1048                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push);
1049
1050                 txvq->stats.bytes += txm->pkt_len;
1051                 virtio_update_packet_stats(&txvq->stats, txm);
1052         }
1053
1054         txvq->stats.packets += nb_tx;
1055
1056         if (likely(nb_tx)) {
1057                 vq_update_avail_idx(vq);
1058
1059                 if (unlikely(virtqueue_kick_prepare(vq))) {
1060                         virtqueue_notify(vq);
1061                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1062                 }
1063         }
1064
1065         return nb_tx;
1066 }