virtio: use indirect ring elements
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <errno.h>
39
40 #include <rte_cycles.h>
41 #include <rte_memory.h>
42 #include <rte_memzone.h>
43 #include <rte_branch_prediction.h>
44 #include <rte_mempool.h>
45 #include <rte_malloc.h>
46 #include <rte_mbuf.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_prefetch.h>
50 #include <rte_string_fns.h>
51 #include <rte_errno.h>
52 #include <rte_byteorder.h>
53
54 #include "virtio_logs.h"
55 #include "virtio_ethdev.h"
56 #include "virtio_pci.h"
57 #include "virtqueue.h"
58 #include "virtio_rxtx.h"
59
60 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
61 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
62 #else
63 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
64 #endif
65
66
67 #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
68         ETH_TXQ_FLAGS_NOOFFLOADS)
69
70 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
71 static int use_simple_rxtx;
72 #endif
73
74 static void
75 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
76 {
77         struct vring_desc *dp, *dp_tail;
78         struct vq_desc_extra *dxp;
79         uint16_t desc_idx_last = desc_idx;
80
81         dp  = &vq->vq_ring.desc[desc_idx];
82         dxp = &vq->vq_descx[desc_idx];
83         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
84         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
85                 while (dp->flags & VRING_DESC_F_NEXT) {
86                         desc_idx_last = dp->next;
87                         dp = &vq->vq_ring.desc[dp->next];
88                 }
89         }
90         dxp->ndescs = 0;
91
92         /*
93          * We must append the existing free chain, if any, to the end of
94          * newly freed chain. If the virtqueue was completely used, then
95          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
96          */
97         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
98                 vq->vq_desc_head_idx = desc_idx;
99         } else {
100                 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
101                 dp_tail->next = desc_idx;
102         }
103
104         vq->vq_desc_tail_idx = desc_idx_last;
105         dp->next = VQ_RING_DESC_CHAIN_END;
106 }
107
108 static uint16_t
109 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
110                            uint32_t *len, uint16_t num)
111 {
112         struct vring_used_elem *uep;
113         struct rte_mbuf *cookie;
114         uint16_t used_idx, desc_idx;
115         uint16_t i;
116
117         /*  Caller does the check */
118         for (i = 0; i < num ; i++) {
119                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
120                 uep = &vq->vq_ring.used->ring[used_idx];
121                 desc_idx = (uint16_t) uep->id;
122                 len[i] = uep->len;
123                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
124
125                 if (unlikely(cookie == NULL)) {
126                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n",
127                                 vq->vq_used_cons_idx);
128                         break;
129                 }
130
131                 rte_prefetch0(cookie);
132                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
133                 rx_pkts[i]  = cookie;
134                 vq->vq_used_cons_idx++;
135                 vq_ring_free_chain(vq, desc_idx);
136                 vq->vq_descx[desc_idx].cookie = NULL;
137         }
138
139         return i;
140 }
141
142 #ifndef DEFAULT_TX_FREE_THRESH
143 #define DEFAULT_TX_FREE_THRESH 32
144 #endif
145
146 /* Cleanup from completed transmits. */
147 static void
148 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
149 {
150         uint16_t i, used_idx, desc_idx;
151         for (i = 0; i < num; i++) {
152                 struct vring_used_elem *uep;
153                 struct vq_desc_extra *dxp;
154
155                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
156                 uep = &vq->vq_ring.used->ring[used_idx];
157
158                 desc_idx = (uint16_t) uep->id;
159                 dxp = &vq->vq_descx[desc_idx];
160                 vq->vq_used_cons_idx++;
161                 vq_ring_free_chain(vq, desc_idx);
162
163                 if (dxp->cookie != NULL) {
164                         rte_pktmbuf_free(dxp->cookie);
165                         dxp->cookie = NULL;
166                 }
167         }
168 }
169
170
171 static inline int
172 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
173 {
174         struct vq_desc_extra *dxp;
175         struct virtio_hw *hw = vq->hw;
176         struct vring_desc *start_dp;
177         uint16_t needed = 1;
178         uint16_t head_idx, idx;
179
180         if (unlikely(vq->vq_free_cnt == 0))
181                 return -ENOSPC;
182         if (unlikely(vq->vq_free_cnt < needed))
183                 return -EMSGSIZE;
184
185         head_idx = vq->vq_desc_head_idx;
186         if (unlikely(head_idx >= vq->vq_nentries))
187                 return -EFAULT;
188
189         idx = head_idx;
190         dxp = &vq->vq_descx[idx];
191         dxp->cookie = (void *)cookie;
192         dxp->ndescs = needed;
193
194         start_dp = vq->vq_ring.desc;
195         start_dp[idx].addr =
196                 (uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM
197                 - hw->vtnet_hdr_size);
198         start_dp[idx].len =
199                 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
200         start_dp[idx].flags =  VRING_DESC_F_WRITE;
201         idx = start_dp[idx].next;
202         vq->vq_desc_head_idx = idx;
203         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
204                 vq->vq_desc_tail_idx = idx;
205         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
206         vq_update_avail_ring(vq, head_idx);
207
208         return 0;
209 }
210
211 static int
212 virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie,
213                        int use_indirect)
214 {
215         struct vq_desc_extra *dxp;
216         struct vring_desc *start_dp;
217         uint16_t seg_num = cookie->nb_segs;
218         uint16_t needed = use_indirect ? 1 : 1 + seg_num;
219         uint16_t head_idx, idx;
220         unsigned long offs;
221
222         if (unlikely(txvq->vq_free_cnt == 0))
223                 return -ENOSPC;
224         if (unlikely(txvq->vq_free_cnt < needed))
225                 return -EMSGSIZE;
226         head_idx = txvq->vq_desc_head_idx;
227         if (unlikely(head_idx >= txvq->vq_nentries))
228                 return -EFAULT;
229
230         idx = head_idx;
231         dxp = &txvq->vq_descx[idx];
232         dxp->cookie = (void *)cookie;
233         dxp->ndescs = needed;
234
235         start_dp = txvq->vq_ring.desc;
236
237         if (use_indirect) {
238                 /* setup tx ring slot to point to indirect
239                  * descriptor list stored in reserved region.
240                  *
241                  * the first slot in indirect ring is already preset
242                  * to point to the header in reserved region
243                  */
244                 struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
245
246                 offs = idx * sizeof(struct virtio_tx_region)
247                         + offsetof(struct virtio_tx_region, tx_indir);
248
249                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem + offs;
250                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
251                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
252
253                 /* loop below will fill in rest of the indirect elements */
254                 start_dp = txr[idx].tx_indir;
255                 idx = 0;
256         } else {
257                 /* setup first tx ring slot to point to header
258                  * stored in reserved region.
259                  */
260                 offs = idx * sizeof(struct virtio_tx_region)
261                         + offsetof(struct virtio_tx_region, tx_hdr);
262
263                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem + offs;
264                 start_dp[idx].len   = txvq->hw->vtnet_hdr_size;
265                 start_dp[idx].flags = VRING_DESC_F_NEXT;
266         }
267
268         for (; ((seg_num > 0) && (cookie != NULL)); seg_num--) {
269                 idx = start_dp[idx].next;
270                 start_dp[idx].addr  = rte_mbuf_data_dma_addr(cookie);
271                 start_dp[idx].len   = cookie->data_len;
272                 start_dp[idx].flags = VRING_DESC_F_NEXT;
273                 cookie = cookie->next;
274         }
275
276         start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
277
278         if (use_indirect)
279                 idx = txvq->vq_ring.desc[head_idx].next;
280         else
281                 idx = start_dp[idx].next;
282
283         txvq->vq_desc_head_idx = idx;
284         if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
285                 txvq->vq_desc_tail_idx = idx;
286         txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed);
287         vq_update_avail_ring(txvq, head_idx);
288
289         return 0;
290 }
291
292 static inline struct rte_mbuf *
293 rte_rxmbuf_alloc(struct rte_mempool *mp)
294 {
295         struct rte_mbuf *m;
296
297         m = __rte_mbuf_raw_alloc(mp);
298         __rte_mbuf_sanity_check_raw(m, 0);
299
300         return m;
301 }
302
303 static void
304 virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
305 {
306         struct rte_mbuf *m;
307         int i, nbufs, error, size = vq->vq_nentries;
308         struct vring *vr = &vq->vq_ring;
309         uint8_t *ring_mem = vq->vq_ring_virt_mem;
310
311         PMD_INIT_FUNC_TRACE();
312
313         /*
314          * Reinitialise since virtio port might have been stopped and restarted
315          */
316         memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
317         vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
318         vq->vq_used_cons_idx = 0;
319         vq->vq_desc_head_idx = 0;
320         vq->vq_avail_idx = 0;
321         vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
322         vq->vq_free_cnt = vq->vq_nentries;
323         memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
324
325         vring_desc_init(vr->desc, size);
326
327         /*
328          * Disable device(host) interrupting guest
329          */
330         virtqueue_disable_intr(vq);
331
332         /* Only rx virtqueue needs mbufs to be allocated at initialization */
333         if (queue_type == VTNET_RQ) {
334                 if (vq->mpool == NULL)
335                         rte_exit(EXIT_FAILURE,
336                         "Cannot allocate initial mbufs for rx virtqueue");
337
338                 /* Allocate blank mbufs for the each rx descriptor */
339                 nbufs = 0;
340                 error = ENOSPC;
341
342 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
343                 if (use_simple_rxtx)
344                         for (i = 0; i < vq->vq_nentries; i++) {
345                                 vq->vq_ring.avail->ring[i] = i;
346                                 vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
347                         }
348 #endif
349                 memset(&vq->fake_mbuf, 0, sizeof(vq->fake_mbuf));
350                 for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
351                         vq->sw_ring[vq->vq_nentries + i] = &vq->fake_mbuf;
352
353                 while (!virtqueue_full(vq)) {
354                         m = rte_rxmbuf_alloc(vq->mpool);
355                         if (m == NULL)
356                                 break;
357
358                         /******************************************
359                         *         Enqueue allocated buffers        *
360                         *******************************************/
361 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
362                         if (use_simple_rxtx)
363                                 error = virtqueue_enqueue_recv_refill_simple(vq, m);
364                         else
365 #endif
366                                 error = virtqueue_enqueue_recv_refill(vq, m);
367                         if (error) {
368                                 rte_pktmbuf_free(m);
369                                 break;
370                         }
371                         nbufs++;
372                 }
373
374                 vq_update_avail_idx(vq);
375
376                 PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
377         } else if (queue_type == VTNET_TQ) {
378 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
379                 if (use_simple_rxtx) {
380                         int mid_idx  = vq->vq_nentries >> 1;
381                         for (i = 0; i < mid_idx; i++) {
382                                 vq->vq_ring.avail->ring[i] = i + mid_idx;
383                                 vq->vq_ring.desc[i + mid_idx].next = i;
384                                 vq->vq_ring.desc[i + mid_idx].addr =
385                                         vq->virtio_net_hdr_mem +
386                                                 i * vq->hw->vtnet_hdr_size;
387                                 vq->vq_ring.desc[i + mid_idx].len =
388                                         vq->hw->vtnet_hdr_size;
389                                 vq->vq_ring.desc[i + mid_idx].flags =
390                                         VRING_DESC_F_NEXT;
391                                 vq->vq_ring.desc[i].flags = 0;
392                         }
393                         for (i = mid_idx; i < vq->vq_nentries; i++)
394                                 vq->vq_ring.avail->ring[i] = i;
395                 }
396 #endif
397         }
398 }
399
400 void
401 virtio_dev_cq_start(struct rte_eth_dev *dev)
402 {
403         struct virtio_hw *hw = dev->data->dev_private;
404
405         if (hw->cvq) {
406                 virtio_dev_vring_start(hw->cvq, VTNET_CQ);
407                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq);
408         }
409 }
410
411 void
412 virtio_dev_rxtx_start(struct rte_eth_dev *dev)
413 {
414         /*
415          * Start receive and transmit vrings
416          * -    Setup vring structure for all queues
417          * -    Initialize descriptor for the rx vring
418          * -    Allocate blank mbufs for the each rx descriptor
419          *
420          */
421         int i;
422
423         PMD_INIT_FUNC_TRACE();
424
425         /* Start rx vring. */
426         for (i = 0; i < dev->data->nb_rx_queues; i++) {
427                 virtio_dev_vring_start(dev->data->rx_queues[i], VTNET_RQ);
428                 VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
429         }
430
431         /* Start tx vring. */
432         for (i = 0; i < dev->data->nb_tx_queues; i++) {
433                 virtio_dev_vring_start(dev->data->tx_queues[i], VTNET_TQ);
434                 VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
435         }
436 }
437
438 int
439 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
440                         uint16_t queue_idx,
441                         uint16_t nb_desc,
442                         unsigned int socket_id,
443                         __rte_unused const struct rte_eth_rxconf *rx_conf,
444                         struct rte_mempool *mp)
445 {
446         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
447         struct virtqueue *vq;
448         int ret;
449
450         PMD_INIT_FUNC_TRACE();
451         ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx,
452                         nb_desc, socket_id, &vq);
453         if (ret < 0) {
454                 PMD_INIT_LOG(ERR, "rvq initialization failed");
455                 return ret;
456         }
457
458         /* Create mempool for rx mbuf allocation */
459         vq->mpool = mp;
460
461         dev->data->rx_queues[queue_idx] = vq;
462
463 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
464         virtio_rxq_vec_setup(vq);
465 #endif
466
467         return 0;
468 }
469
470 void
471 virtio_dev_rx_queue_release(void *rxq)
472 {
473         virtio_dev_queue_release(rxq);
474 }
475
476 /*
477  * struct rte_eth_dev *dev: Used to update dev
478  * uint16_t nb_desc: Defaults to values read from config space
479  * unsigned int socket_id: Used to allocate memzone
480  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
481  * uint16_t queue_idx: Just used as an index in dev txq list
482  */
483 int
484 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
485                         uint16_t queue_idx,
486                         uint16_t nb_desc,
487                         unsigned int socket_id,
488                         const struct rte_eth_txconf *tx_conf)
489 {
490         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
491
492 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
493         struct virtio_hw *hw = dev->data->dev_private;
494 #endif
495         struct virtqueue *vq;
496         uint16_t tx_free_thresh;
497         int ret;
498
499         PMD_INIT_FUNC_TRACE();
500
501         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS)
502             != ETH_TXQ_FLAGS_NOXSUMS) {
503                 PMD_INIT_LOG(ERR, "TX checksum offload not supported\n");
504                 return -EINVAL;
505         }
506
507 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
508         /* Use simple rx/tx func if single segment and no offloads */
509         if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
510              !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
511                 PMD_INIT_LOG(INFO, "Using simple rx/tx path");
512                 dev->tx_pkt_burst = virtio_xmit_pkts_simple;
513                 dev->rx_pkt_burst = virtio_recv_pkts_vec;
514                 use_simple_rxtx = 1;
515         }
516 #endif
517
518         ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
519                         nb_desc, socket_id, &vq);
520         if (ret < 0) {
521                 PMD_INIT_LOG(ERR, "rvq initialization failed");
522                 return ret;
523         }
524
525         tx_free_thresh = tx_conf->tx_free_thresh;
526         if (tx_free_thresh == 0)
527                 tx_free_thresh =
528                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
529
530         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
531                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
532                         "number of TX entries minus 3 (%u)."
533                         " (tx_free_thresh=%u port=%u queue=%u)\n",
534                         vq->vq_nentries - 3,
535                         tx_free_thresh, dev->data->port_id, queue_idx);
536                 return -EINVAL;
537         }
538
539         vq->vq_free_thresh = tx_free_thresh;
540
541         dev->data->tx_queues[queue_idx] = vq;
542         return 0;
543 }
544
545 void
546 virtio_dev_tx_queue_release(void *txq)
547 {
548         virtio_dev_queue_release(txq);
549 }
550
551 static void
552 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
553 {
554         int error;
555         /*
556          * Requeue the discarded mbuf. This should always be
557          * successful since it was just dequeued.
558          */
559         error = virtqueue_enqueue_recv_refill(vq, m);
560         if (unlikely(error)) {
561                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
562                 rte_pktmbuf_free(m);
563         }
564 }
565
566 static void
567 virtio_update_packet_stats(struct virtqueue *vq, struct rte_mbuf *mbuf)
568 {
569         uint32_t s = mbuf->pkt_len;
570         struct ether_addr *ea;
571
572         if (s == 64) {
573                 vq->size_bins[1]++;
574         } else if (s > 64 && s < 1024) {
575                 uint32_t bin;
576
577                 /* count zeros, and offset into correct bin */
578                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
579                 vq->size_bins[bin]++;
580         } else {
581                 if (s < 64)
582                         vq->size_bins[0]++;
583                 else if (s < 1519)
584                         vq->size_bins[6]++;
585                 else if (s >= 1519)
586                         vq->size_bins[7]++;
587         }
588
589         ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
590         if (is_multicast_ether_addr(ea)) {
591                 if (is_broadcast_ether_addr(ea))
592                         vq->broadcast++;
593                 else
594                         vq->multicast++;
595         }
596 }
597
598 #define VIRTIO_MBUF_BURST_SZ 64
599 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
600 uint16_t
601 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
602 {
603         struct virtqueue *rxvq = rx_queue;
604         struct virtio_hw *hw;
605         struct rte_mbuf *rxm, *new_mbuf;
606         uint16_t nb_used, num, nb_rx;
607         uint32_t len[VIRTIO_MBUF_BURST_SZ];
608         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
609         int error;
610         uint32_t i, nb_enqueued;
611         uint32_t hdr_size;
612
613         nb_used = VIRTQUEUE_NUSED(rxvq);
614
615         virtio_rmb();
616
617         num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
618         num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
619         if (likely(num > DESC_PER_CACHELINE))
620                 num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
621
622         if (num == 0)
623                 return 0;
624
625         num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num);
626         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
627
628         hw = rxvq->hw;
629         nb_rx = 0;
630         nb_enqueued = 0;
631         hdr_size = hw->vtnet_hdr_size;
632
633         for (i = 0; i < num ; i++) {
634                 rxm = rcv_pkts[i];
635
636                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
637
638                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
639                         PMD_RX_LOG(ERR, "Packet drop");
640                         nb_enqueued++;
641                         virtio_discard_rxbuf(rxvq, rxm);
642                         rxvq->errors++;
643                         continue;
644                 }
645
646                 rxm->port = rxvq->port_id;
647                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
648                 rxm->ol_flags = 0;
649                 rxm->vlan_tci = 0;
650
651                 rxm->nb_segs = 1;
652                 rxm->next = NULL;
653                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
654                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
655
656                 if (hw->vlan_strip)
657                         rte_vlan_strip(rxm);
658
659                 VIRTIO_DUMP_PACKET(rxm, rxm->data_len);
660
661                 rx_pkts[nb_rx++] = rxm;
662
663                 rxvq->bytes += rx_pkts[nb_rx - 1]->pkt_len;
664                 virtio_update_packet_stats(rxvq, rxm);
665         }
666
667         rxvq->packets += nb_rx;
668
669         /* Allocate new mbuf for the used descriptor */
670         error = ENOSPC;
671         while (likely(!virtqueue_full(rxvq))) {
672                 new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
673                 if (unlikely(new_mbuf == NULL)) {
674                         struct rte_eth_dev *dev
675                                 = &rte_eth_devices[rxvq->port_id];
676                         dev->data->rx_mbuf_alloc_failed++;
677                         break;
678                 }
679                 error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
680                 if (unlikely(error)) {
681                         rte_pktmbuf_free(new_mbuf);
682                         break;
683                 }
684                 nb_enqueued++;
685         }
686
687         if (likely(nb_enqueued)) {
688                 vq_update_avail_idx(rxvq);
689
690                 if (unlikely(virtqueue_kick_prepare(rxvq))) {
691                         virtqueue_notify(rxvq);
692                         PMD_RX_LOG(DEBUG, "Notified\n");
693                 }
694         }
695
696         return nb_rx;
697 }
698
699 uint16_t
700 virtio_recv_mergeable_pkts(void *rx_queue,
701                         struct rte_mbuf **rx_pkts,
702                         uint16_t nb_pkts)
703 {
704         struct virtqueue *rxvq = rx_queue;
705         struct virtio_hw *hw;
706         struct rte_mbuf *rxm, *new_mbuf;
707         uint16_t nb_used, num, nb_rx;
708         uint32_t len[VIRTIO_MBUF_BURST_SZ];
709         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
710         struct rte_mbuf *prev;
711         int error;
712         uint32_t i, nb_enqueued;
713         uint32_t seg_num;
714         uint16_t extra_idx;
715         uint32_t seg_res;
716         uint32_t hdr_size;
717
718         nb_used = VIRTQUEUE_NUSED(rxvq);
719
720         virtio_rmb();
721
722         if (nb_used == 0)
723                 return 0;
724
725         PMD_RX_LOG(DEBUG, "used:%d\n", nb_used);
726
727         hw = rxvq->hw;
728         nb_rx = 0;
729         i = 0;
730         nb_enqueued = 0;
731         seg_num = 0;
732         extra_idx = 0;
733         seg_res = 0;
734         hdr_size = hw->vtnet_hdr_size;
735
736         while (i < nb_used) {
737                 struct virtio_net_hdr_mrg_rxbuf *header;
738
739                 if (nb_rx == nb_pkts)
740                         break;
741
742                 num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1);
743                 if (num != 1)
744                         continue;
745
746                 i++;
747
748                 PMD_RX_LOG(DEBUG, "dequeue:%d\n", num);
749                 PMD_RX_LOG(DEBUG, "packet len:%d\n", len[0]);
750
751                 rxm = rcv_pkts[0];
752
753                 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
754                         PMD_RX_LOG(ERR, "Packet drop\n");
755                         nb_enqueued++;
756                         virtio_discard_rxbuf(rxvq, rxm);
757                         rxvq->errors++;
758                         continue;
759                 }
760
761                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
762                         RTE_PKTMBUF_HEADROOM - hdr_size);
763                 seg_num = header->num_buffers;
764
765                 if (seg_num == 0)
766                         seg_num = 1;
767
768                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
769                 rxm->nb_segs = seg_num;
770                 rxm->next = NULL;
771                 rxm->ol_flags = 0;
772                 rxm->vlan_tci = 0;
773                 rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
774                 rxm->data_len = (uint16_t)(len[0] - hdr_size);
775
776                 rxm->port = rxvq->port_id;
777                 rx_pkts[nb_rx] = rxm;
778                 prev = rxm;
779
780                 seg_res = seg_num - 1;
781
782                 while (seg_res != 0) {
783                         /*
784                          * Get extra segments for current uncompleted packet.
785                          */
786                         uint16_t  rcv_cnt =
787                                 RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
788                         if (likely(VIRTQUEUE_NUSED(rxvq) >= rcv_cnt)) {
789                                 uint32_t rx_num =
790                                         virtqueue_dequeue_burst_rx(rxvq,
791                                         rcv_pkts, len, rcv_cnt);
792                                 i += rx_num;
793                                 rcv_cnt = rx_num;
794                         } else {
795                                 PMD_RX_LOG(ERR,
796                                         "No enough segments for packet.\n");
797                                 nb_enqueued++;
798                                 virtio_discard_rxbuf(rxvq, rxm);
799                                 rxvq->errors++;
800                                 break;
801                         }
802
803                         extra_idx = 0;
804
805                         while (extra_idx < rcv_cnt) {
806                                 rxm = rcv_pkts[extra_idx];
807
808                                 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
809                                 rxm->next = NULL;
810                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
811                                 rxm->data_len = (uint16_t)(len[extra_idx]);
812
813                                 if (prev)
814                                         prev->next = rxm;
815
816                                 prev = rxm;
817                                 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
818                                 extra_idx++;
819                         };
820                         seg_res -= rcv_cnt;
821                 }
822
823                 if (hw->vlan_strip)
824                         rte_vlan_strip(rx_pkts[nb_rx]);
825
826                 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
827                         rx_pkts[nb_rx]->data_len);
828
829                 rxvq->bytes += rx_pkts[nb_rx]->pkt_len;
830                 virtio_update_packet_stats(rxvq, rx_pkts[nb_rx]);
831                 nb_rx++;
832         }
833
834         rxvq->packets += nb_rx;
835
836         /* Allocate new mbuf for the used descriptor */
837         error = ENOSPC;
838         while (likely(!virtqueue_full(rxvq))) {
839                 new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
840                 if (unlikely(new_mbuf == NULL)) {
841                         struct rte_eth_dev *dev
842                                 = &rte_eth_devices[rxvq->port_id];
843                         dev->data->rx_mbuf_alloc_failed++;
844                         break;
845                 }
846                 error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
847                 if (unlikely(error)) {
848                         rte_pktmbuf_free(new_mbuf);
849                         break;
850                 }
851                 nb_enqueued++;
852         }
853
854         if (likely(nb_enqueued)) {
855                 vq_update_avail_idx(rxvq);
856
857                 if (unlikely(virtqueue_kick_prepare(rxvq))) {
858                         virtqueue_notify(rxvq);
859                         PMD_RX_LOG(DEBUG, "Notified");
860                 }
861         }
862
863         return nb_rx;
864 }
865
866 uint16_t
867 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
868 {
869         struct virtqueue *txvq = tx_queue;
870         uint16_t nb_used, nb_tx;
871         int error;
872
873         if (unlikely(nb_pkts < 1))
874                 return nb_pkts;
875
876         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
877         nb_used = VIRTQUEUE_NUSED(txvq);
878
879         virtio_rmb();
880         if (likely(nb_used > txvq->vq_nentries - txvq->vq_free_thresh))
881                 virtio_xmit_cleanup(txvq, nb_used);
882
883         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
884                 struct rte_mbuf *txm = tx_pkts[nb_tx];
885                 int use_indirect, slots, need;
886
887                 use_indirect = vtpci_with_feature(txvq->hw,
888                                                   VIRTIO_RING_F_INDIRECT_DESC)
889                         && (txm->nb_segs < VIRTIO_MAX_TX_INDIRECT);
890
891                 /* How many main ring entries are needed to this Tx? */
892                 slots = use_indirect ? 1 : 1 + txm->nb_segs;
893                 need = slots - txvq->vq_free_cnt;
894
895                 /* Positive value indicates it need free vring descriptors */
896                 if (unlikely(need > 0)) {
897                         nb_used = VIRTQUEUE_NUSED(txvq);
898                         virtio_rmb();
899                         need = RTE_MIN(need, (int)nb_used);
900
901                         virtio_xmit_cleanup(txvq, need);
902                         need = slots - txvq->vq_free_cnt;
903                         if (unlikely(need > 0)) {
904                                 PMD_TX_LOG(ERR,
905                                            "No free tx descriptors to transmit");
906                                 break;
907                         }
908                 }
909
910                 /* Do VLAN tag insertion */
911                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
912                         error = rte_vlan_insert(&txm);
913                         if (unlikely(error)) {
914                                 rte_pktmbuf_free(txm);
915                                 continue;
916                         }
917                 }
918
919                 /* Enqueue Packet buffers */
920                 error = virtqueue_enqueue_xmit(txvq, txm, use_indirect);
921                 if (unlikely(error)) {
922                         if (error == ENOSPC)
923                                 PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0");
924                         else if (error == EMSGSIZE)
925                                 PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1");
926                         else
927                                 PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d", error);
928                         break;
929                 }
930
931                 txvq->bytes += txm->pkt_len;
932                 virtio_update_packet_stats(txvq, txm);
933         }
934
935         txvq->packets += nb_tx;
936
937         if (likely(nb_tx)) {
938                 vq_update_avail_idx(txvq);
939
940                 if (unlikely(virtqueue_kick_prepare(txvq))) {
941                         virtqueue_notify(txvq);
942                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
943                 }
944         }
945
946         return nb_tx;
947 }