virtio: optimize ring layout
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <errno.h>
39
40 #include <rte_cycles.h>
41 #include <rte_memory.h>
42 #include <rte_memzone.h>
43 #include <rte_branch_prediction.h>
44 #include <rte_mempool.h>
45 #include <rte_malloc.h>
46 #include <rte_mbuf.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_prefetch.h>
50 #include <rte_string_fns.h>
51 #include <rte_errno.h>
52 #include <rte_byteorder.h>
53
54 #include "virtio_logs.h"
55 #include "virtio_ethdev.h"
56 #include "virtqueue.h"
57
58 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
59 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
60 #else
61 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
62 #endif
63
64 static int use_simple_rxtx;
65
66 static void
67 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
68 {
69         struct vring_desc *dp, *dp_tail;
70         struct vq_desc_extra *dxp;
71         uint16_t desc_idx_last = desc_idx;
72
73         dp  = &vq->vq_ring.desc[desc_idx];
74         dxp = &vq->vq_descx[desc_idx];
75         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
76         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
77                 while (dp->flags & VRING_DESC_F_NEXT) {
78                         desc_idx_last = dp->next;
79                         dp = &vq->vq_ring.desc[dp->next];
80                 }
81         }
82         dxp->ndescs = 0;
83
84         /*
85          * We must append the existing free chain, if any, to the end of
86          * newly freed chain. If the virtqueue was completely used, then
87          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
88          */
89         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
90                 vq->vq_desc_head_idx = desc_idx;
91         } else {
92                 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
93                 dp_tail->next = desc_idx;
94         }
95
96         vq->vq_desc_tail_idx = desc_idx_last;
97         dp->next = VQ_RING_DESC_CHAIN_END;
98 }
99
100 static uint16_t
101 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
102                            uint32_t *len, uint16_t num)
103 {
104         struct vring_used_elem *uep;
105         struct rte_mbuf *cookie;
106         uint16_t used_idx, desc_idx;
107         uint16_t i;
108
109         /*  Caller does the check */
110         for (i = 0; i < num ; i++) {
111                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
112                 uep = &vq->vq_ring.used->ring[used_idx];
113                 desc_idx = (uint16_t) uep->id;
114                 len[i] = uep->len;
115                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
116
117                 if (unlikely(cookie == NULL)) {
118                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n",
119                                 vq->vq_used_cons_idx);
120                         break;
121                 }
122
123                 rte_prefetch0(cookie);
124                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
125                 rx_pkts[i]  = cookie;
126                 vq->vq_used_cons_idx++;
127                 vq_ring_free_chain(vq, desc_idx);
128                 vq->vq_descx[desc_idx].cookie = NULL;
129         }
130
131         return i;
132 }
133
134 #ifndef DEFAULT_TX_FREE_THRESH
135 #define DEFAULT_TX_FREE_THRESH 32
136 #endif
137
138 /* Cleanup from completed transmits. */
139 static void
140 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
141 {
142         uint16_t i, used_idx, desc_idx;
143         for (i = 0; i < num; i++) {
144                 struct vring_used_elem *uep;
145                 struct vq_desc_extra *dxp;
146
147                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
148                 uep = &vq->vq_ring.used->ring[used_idx];
149
150                 desc_idx = (uint16_t) uep->id;
151                 dxp = &vq->vq_descx[desc_idx];
152                 vq->vq_used_cons_idx++;
153                 vq_ring_free_chain(vq, desc_idx);
154
155                 if (dxp->cookie != NULL) {
156                         rte_pktmbuf_free(dxp->cookie);
157                         dxp->cookie = NULL;
158                 }
159         }
160 }
161
162
163 static inline int
164 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
165 {
166         struct vq_desc_extra *dxp;
167         struct virtio_hw *hw = vq->hw;
168         struct vring_desc *start_dp;
169         uint16_t needed = 1;
170         uint16_t head_idx, idx;
171
172         if (unlikely(vq->vq_free_cnt == 0))
173                 return -ENOSPC;
174         if (unlikely(vq->vq_free_cnt < needed))
175                 return -EMSGSIZE;
176
177         head_idx = vq->vq_desc_head_idx;
178         if (unlikely(head_idx >= vq->vq_nentries))
179                 return -EFAULT;
180
181         idx = head_idx;
182         dxp = &vq->vq_descx[idx];
183         dxp->cookie = (void *)cookie;
184         dxp->ndescs = needed;
185
186         start_dp = vq->vq_ring.desc;
187         start_dp[idx].addr =
188                 (uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM
189                 - hw->vtnet_hdr_size);
190         start_dp[idx].len =
191                 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
192         start_dp[idx].flags =  VRING_DESC_F_WRITE;
193         idx = start_dp[idx].next;
194         vq->vq_desc_head_idx = idx;
195         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
196                 vq->vq_desc_tail_idx = idx;
197         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
198         vq_update_avail_ring(vq, head_idx);
199
200         return 0;
201 }
202
203 static int
204 virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
205 {
206         struct vq_desc_extra *dxp;
207         struct vring_desc *start_dp;
208         uint16_t seg_num = cookie->nb_segs;
209         uint16_t needed = 1 + seg_num;
210         uint16_t head_idx, idx;
211         size_t head_size = txvq->hw->vtnet_hdr_size;
212
213         if (unlikely(txvq->vq_free_cnt == 0))
214                 return -ENOSPC;
215         if (unlikely(txvq->vq_free_cnt < needed))
216                 return -EMSGSIZE;
217         head_idx = txvq->vq_desc_head_idx;
218         if (unlikely(head_idx >= txvq->vq_nentries))
219                 return -EFAULT;
220
221         idx = head_idx;
222         dxp = &txvq->vq_descx[idx];
223         dxp->cookie = (void *)cookie;
224         dxp->ndescs = needed;
225
226         start_dp = txvq->vq_ring.desc;
227         start_dp[idx].addr =
228                 txvq->virtio_net_hdr_mem + idx * head_size;
229         start_dp[idx].len = head_size;
230         start_dp[idx].flags = VRING_DESC_F_NEXT;
231
232         for (; ((seg_num > 0) && (cookie != NULL)); seg_num--) {
233                 idx = start_dp[idx].next;
234                 start_dp[idx].addr  = RTE_MBUF_DATA_DMA_ADDR(cookie);
235                 start_dp[idx].len   = cookie->data_len;
236                 start_dp[idx].flags = VRING_DESC_F_NEXT;
237                 cookie = cookie->next;
238         }
239
240         start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
241         idx = start_dp[idx].next;
242         txvq->vq_desc_head_idx = idx;
243         if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
244                 txvq->vq_desc_tail_idx = idx;
245         txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed);
246         vq_update_avail_ring(txvq, head_idx);
247
248         return 0;
249 }
250
251 static inline struct rte_mbuf *
252 rte_rxmbuf_alloc(struct rte_mempool *mp)
253 {
254         struct rte_mbuf *m;
255
256         m = __rte_mbuf_raw_alloc(mp);
257         __rte_mbuf_sanity_check_raw(m, 0);
258
259         return m;
260 }
261
262 static void
263 virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
264 {
265         struct rte_mbuf *m;
266         int i, nbufs, error, size = vq->vq_nentries;
267         struct vring *vr = &vq->vq_ring;
268         uint8_t *ring_mem = vq->vq_ring_virt_mem;
269
270         PMD_INIT_FUNC_TRACE();
271
272         /*
273          * Reinitialise since virtio port might have been stopped and restarted
274          */
275         memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
276         vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
277         vq->vq_used_cons_idx = 0;
278         vq->vq_desc_head_idx = 0;
279         vq->vq_avail_idx = 0;
280         vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
281         vq->vq_free_cnt = vq->vq_nentries;
282         memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
283
284         /* Chain all the descriptors in the ring with an END */
285         for (i = 0; i < size - 1; i++)
286                 vr->desc[i].next = (uint16_t)(i + 1);
287         vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
288
289         /*
290          * Disable device(host) interrupting guest
291          */
292         virtqueue_disable_intr(vq);
293
294         /* Only rx virtqueue needs mbufs to be allocated at initialization */
295         if (queue_type == VTNET_RQ) {
296                 if (vq->mpool == NULL)
297                         rte_exit(EXIT_FAILURE,
298                         "Cannot allocate initial mbufs for rx virtqueue");
299
300                 /* Allocate blank mbufs for the each rx descriptor */
301                 nbufs = 0;
302                 error = ENOSPC;
303
304                 if (use_simple_rxtx)
305                         for (i = 0; i < vq->vq_nentries; i++) {
306                                 vq->vq_ring.avail->ring[i] = i;
307                                 vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
308                         }
309
310                 while (!virtqueue_full(vq)) {
311                         m = rte_rxmbuf_alloc(vq->mpool);
312                         if (m == NULL)
313                                 break;
314
315                         /******************************************
316                         *         Enqueue allocated buffers        *
317                         *******************************************/
318                         error = virtqueue_enqueue_recv_refill(vq, m);
319
320                         if (error) {
321                                 rte_pktmbuf_free(m);
322                                 break;
323                         }
324                         nbufs++;
325                 }
326
327                 vq_update_avail_idx(vq);
328
329                 PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
330
331                 VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
332                         vq->vq_queue_index);
333                 VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
334                         vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
335         } else if (queue_type == VTNET_TQ) {
336                 if (use_simple_rxtx) {
337                         int mid_idx  = vq->vq_nentries >> 1;
338                         for (i = 0; i < mid_idx; i++) {
339                                 vq->vq_ring.avail->ring[i] = i + mid_idx;
340                                 vq->vq_ring.desc[i + mid_idx].next = i;
341                                 vq->vq_ring.desc[i + mid_idx].addr =
342                                         vq->virtio_net_hdr_mem +
343                                                 mid_idx * vq->hw->vtnet_hdr_size;
344                                 vq->vq_ring.desc[i + mid_idx].len =
345                                         vq->hw->vtnet_hdr_size;
346                                 vq->vq_ring.desc[i + mid_idx].flags =
347                                         VRING_DESC_F_NEXT;
348                                 vq->vq_ring.desc[i].flags = 0;
349                         }
350                         for (i = mid_idx; i < vq->vq_nentries; i++)
351                                 vq->vq_ring.avail->ring[i] = i;
352                 }
353
354                 VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
355                         vq->vq_queue_index);
356                 VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
357                         vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
358         } else {
359                 VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
360                         vq->vq_queue_index);
361                 VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
362                         vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
363         }
364 }
365
366 void
367 virtio_dev_cq_start(struct rte_eth_dev *dev)
368 {
369         struct virtio_hw *hw = dev->data->dev_private;
370
371         if (hw->cvq) {
372                 virtio_dev_vring_start(hw->cvq, VTNET_CQ);
373                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq);
374         }
375 }
376
377 void
378 virtio_dev_rxtx_start(struct rte_eth_dev *dev)
379 {
380         /*
381          * Start receive and transmit vrings
382          * -    Setup vring structure for all queues
383          * -    Initialize descriptor for the rx vring
384          * -    Allocate blank mbufs for the each rx descriptor
385          *
386          */
387         int i;
388
389         PMD_INIT_FUNC_TRACE();
390
391         /* Start rx vring. */
392         for (i = 0; i < dev->data->nb_rx_queues; i++) {
393                 virtio_dev_vring_start(dev->data->rx_queues[i], VTNET_RQ);
394                 VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
395         }
396
397         /* Start tx vring. */
398         for (i = 0; i < dev->data->nb_tx_queues; i++) {
399                 virtio_dev_vring_start(dev->data->tx_queues[i], VTNET_TQ);
400                 VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
401         }
402 }
403
404 int
405 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
406                         uint16_t queue_idx,
407                         uint16_t nb_desc,
408                         unsigned int socket_id,
409                         __rte_unused const struct rte_eth_rxconf *rx_conf,
410                         struct rte_mempool *mp)
411 {
412         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
413         struct virtqueue *vq;
414         int ret;
415
416         PMD_INIT_FUNC_TRACE();
417         ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx,
418                         nb_desc, socket_id, &vq);
419         if (ret < 0) {
420                 PMD_INIT_LOG(ERR, "rvq initialization failed");
421                 return ret;
422         }
423
424         /* Create mempool for rx mbuf allocation */
425         vq->mpool = mp;
426
427         dev->data->rx_queues[queue_idx] = vq;
428         return 0;
429 }
430
431 void
432 virtio_dev_rx_queue_release(void *rxq)
433 {
434         virtio_dev_queue_release(rxq);
435 }
436
437 /*
438  * struct rte_eth_dev *dev: Used to update dev
439  * uint16_t nb_desc: Defaults to values read from config space
440  * unsigned int socket_id: Used to allocate memzone
441  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
442  * uint16_t queue_idx: Just used as an index in dev txq list
443  */
444 int
445 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
446                         uint16_t queue_idx,
447                         uint16_t nb_desc,
448                         unsigned int socket_id,
449                         const struct rte_eth_txconf *tx_conf)
450 {
451         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
452         struct virtqueue *vq;
453         uint16_t tx_free_thresh;
454         int ret;
455
456         PMD_INIT_FUNC_TRACE();
457
458         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS)
459             != ETH_TXQ_FLAGS_NOXSUMS) {
460                 PMD_INIT_LOG(ERR, "TX checksum offload not supported\n");
461                 return -EINVAL;
462         }
463
464         ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
465                         nb_desc, socket_id, &vq);
466         if (ret < 0) {
467                 PMD_INIT_LOG(ERR, "rvq initialization failed");
468                 return ret;
469         }
470
471         tx_free_thresh = tx_conf->tx_free_thresh;
472         if (tx_free_thresh == 0)
473                 tx_free_thresh =
474                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
475
476         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
477                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
478                         "number of TX entries minus 3 (%u)."
479                         " (tx_free_thresh=%u port=%u queue=%u)\n",
480                         vq->vq_nentries - 3,
481                         tx_free_thresh, dev->data->port_id, queue_idx);
482                 return -EINVAL;
483         }
484
485         vq->vq_free_thresh = tx_free_thresh;
486
487         dev->data->tx_queues[queue_idx] = vq;
488         return 0;
489 }
490
491 void
492 virtio_dev_tx_queue_release(void *txq)
493 {
494         virtio_dev_queue_release(txq);
495 }
496
497 static void
498 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
499 {
500         int error;
501         /*
502          * Requeue the discarded mbuf. This should always be
503          * successful since it was just dequeued.
504          */
505         error = virtqueue_enqueue_recv_refill(vq, m);
506         if (unlikely(error)) {
507                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
508                 rte_pktmbuf_free(m);
509         }
510 }
511
512 #define VIRTIO_MBUF_BURST_SZ 64
513 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
514 uint16_t
515 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
516 {
517         struct virtqueue *rxvq = rx_queue;
518         struct virtio_hw *hw;
519         struct rte_mbuf *rxm, *new_mbuf;
520         uint16_t nb_used, num, nb_rx;
521         uint32_t len[VIRTIO_MBUF_BURST_SZ];
522         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
523         int error;
524         uint32_t i, nb_enqueued;
525         const uint32_t hdr_size = sizeof(struct virtio_net_hdr);
526
527         nb_used = VIRTQUEUE_NUSED(rxvq);
528
529         virtio_rmb();
530
531         num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
532         num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
533         if (likely(num > DESC_PER_CACHELINE))
534                 num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
535
536         if (num == 0)
537                 return 0;
538
539         num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num);
540         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
541
542         hw = rxvq->hw;
543         nb_rx = 0;
544         nb_enqueued = 0;
545
546         for (i = 0; i < num ; i++) {
547                 rxm = rcv_pkts[i];
548
549                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
550
551                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
552                         PMD_RX_LOG(ERR, "Packet drop");
553                         nb_enqueued++;
554                         virtio_discard_rxbuf(rxvq, rxm);
555                         rxvq->errors++;
556                         continue;
557                 }
558
559                 rxm->port = rxvq->port_id;
560                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
561
562                 rxm->nb_segs = 1;
563                 rxm->next = NULL;
564                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
565                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
566
567                 if (hw->vlan_strip)
568                         rte_vlan_strip(rxm);
569
570                 VIRTIO_DUMP_PACKET(rxm, rxm->data_len);
571
572                 rx_pkts[nb_rx++] = rxm;
573                 rxvq->bytes += rx_pkts[nb_rx - 1]->pkt_len;
574         }
575
576         rxvq->packets += nb_rx;
577
578         /* Allocate new mbuf for the used descriptor */
579         error = ENOSPC;
580         while (likely(!virtqueue_full(rxvq))) {
581                 new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
582                 if (unlikely(new_mbuf == NULL)) {
583                         struct rte_eth_dev *dev
584                                 = &rte_eth_devices[rxvq->port_id];
585                         dev->data->rx_mbuf_alloc_failed++;
586                         break;
587                 }
588                 error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
589                 if (unlikely(error)) {
590                         rte_pktmbuf_free(new_mbuf);
591                         break;
592                 }
593                 nb_enqueued++;
594         }
595
596         if (likely(nb_enqueued)) {
597                 vq_update_avail_idx(rxvq);
598
599                 if (unlikely(virtqueue_kick_prepare(rxvq))) {
600                         virtqueue_notify(rxvq);
601                         PMD_RX_LOG(DEBUG, "Notified\n");
602                 }
603         }
604
605         return nb_rx;
606 }
607
608 uint16_t
609 virtio_recv_mergeable_pkts(void *rx_queue,
610                         struct rte_mbuf **rx_pkts,
611                         uint16_t nb_pkts)
612 {
613         struct virtqueue *rxvq = rx_queue;
614         struct virtio_hw *hw;
615         struct rte_mbuf *rxm, *new_mbuf;
616         uint16_t nb_used, num, nb_rx;
617         uint32_t len[VIRTIO_MBUF_BURST_SZ];
618         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
619         struct rte_mbuf *prev;
620         int error;
621         uint32_t i, nb_enqueued;
622         uint32_t seg_num;
623         uint16_t extra_idx;
624         uint32_t seg_res;
625         const uint32_t hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
626
627         nb_used = VIRTQUEUE_NUSED(rxvq);
628
629         virtio_rmb();
630
631         if (nb_used == 0)
632                 return 0;
633
634         PMD_RX_LOG(DEBUG, "used:%d\n", nb_used);
635
636         hw = rxvq->hw;
637         nb_rx = 0;
638         i = 0;
639         nb_enqueued = 0;
640         seg_num = 0;
641         extra_idx = 0;
642         seg_res = 0;
643
644         while (i < nb_used) {
645                 struct virtio_net_hdr_mrg_rxbuf *header;
646
647                 if (nb_rx == nb_pkts)
648                         break;
649
650                 num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1);
651                 if (num != 1)
652                         continue;
653
654                 i++;
655
656                 PMD_RX_LOG(DEBUG, "dequeue:%d\n", num);
657                 PMD_RX_LOG(DEBUG, "packet len:%d\n", len[0]);
658
659                 rxm = rcv_pkts[0];
660
661                 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
662                         PMD_RX_LOG(ERR, "Packet drop\n");
663                         nb_enqueued++;
664                         virtio_discard_rxbuf(rxvq, rxm);
665                         rxvq->errors++;
666                         continue;
667                 }
668
669                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
670                         RTE_PKTMBUF_HEADROOM - hdr_size);
671                 seg_num = header->num_buffers;
672
673                 if (seg_num == 0)
674                         seg_num = 1;
675
676                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
677                 rxm->nb_segs = seg_num;
678                 rxm->next = NULL;
679                 rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
680                 rxm->data_len = (uint16_t)(len[0] - hdr_size);
681
682                 rxm->port = rxvq->port_id;
683                 rx_pkts[nb_rx] = rxm;
684                 prev = rxm;
685
686                 seg_res = seg_num - 1;
687
688                 while (seg_res != 0) {
689                         /*
690                          * Get extra segments for current uncompleted packet.
691                          */
692                         uint16_t  rcv_cnt =
693                                 RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
694                         if (likely(VIRTQUEUE_NUSED(rxvq) >= rcv_cnt)) {
695                                 uint32_t rx_num =
696                                         virtqueue_dequeue_burst_rx(rxvq,
697                                         rcv_pkts, len, rcv_cnt);
698                                 i += rx_num;
699                                 rcv_cnt = rx_num;
700                         } else {
701                                 PMD_RX_LOG(ERR,
702                                         "No enough segments for packet.\n");
703                                 nb_enqueued++;
704                                 virtio_discard_rxbuf(rxvq, rxm);
705                                 rxvq->errors++;
706                                 break;
707                         }
708
709                         extra_idx = 0;
710
711                         while (extra_idx < rcv_cnt) {
712                                 rxm = rcv_pkts[extra_idx];
713
714                                 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
715                                 rxm->next = NULL;
716                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
717                                 rxm->data_len = (uint16_t)(len[extra_idx]);
718
719                                 if (prev)
720                                         prev->next = rxm;
721
722                                 prev = rxm;
723                                 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
724                                 extra_idx++;
725                         };
726                         seg_res -= rcv_cnt;
727                 }
728
729                 if (hw->vlan_strip)
730                         rte_vlan_strip(rx_pkts[nb_rx]);
731
732                 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
733                         rx_pkts[nb_rx]->data_len);
734
735                 rxvq->bytes += rx_pkts[nb_rx]->pkt_len;
736                 nb_rx++;
737         }
738
739         rxvq->packets += nb_rx;
740
741         /* Allocate new mbuf for the used descriptor */
742         error = ENOSPC;
743         while (likely(!virtqueue_full(rxvq))) {
744                 new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
745                 if (unlikely(new_mbuf == NULL)) {
746                         struct rte_eth_dev *dev
747                                 = &rte_eth_devices[rxvq->port_id];
748                         dev->data->rx_mbuf_alloc_failed++;
749                         break;
750                 }
751                 error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
752                 if (unlikely(error)) {
753                         rte_pktmbuf_free(new_mbuf);
754                         break;
755                 }
756                 nb_enqueued++;
757         }
758
759         if (likely(nb_enqueued)) {
760                 vq_update_avail_idx(rxvq);
761
762                 if (unlikely(virtqueue_kick_prepare(rxvq))) {
763                         virtqueue_notify(rxvq);
764                         PMD_RX_LOG(DEBUG, "Notified");
765                 }
766         }
767
768         return nb_rx;
769 }
770
771 uint16_t
772 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
773 {
774         struct virtqueue *txvq = tx_queue;
775         struct rte_mbuf *txm;
776         uint16_t nb_used, nb_tx;
777         int error;
778
779         if (unlikely(nb_pkts < 1))
780                 return nb_pkts;
781
782         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
783         nb_used = VIRTQUEUE_NUSED(txvq);
784
785         virtio_rmb();
786         if (likely(nb_used > txvq->vq_nentries - txvq->vq_free_thresh))
787                 virtio_xmit_cleanup(txvq, nb_used);
788
789         nb_tx = 0;
790
791         while (nb_tx < nb_pkts) {
792                 /* Need one more descriptor for virtio header. */
793                 int need = tx_pkts[nb_tx]->nb_segs - txvq->vq_free_cnt + 1;
794
795                 /*Positive value indicates it need free vring descriptors */
796                 if (unlikely(need > 0)) {
797                         nb_used = VIRTQUEUE_NUSED(txvq);
798                         virtio_rmb();
799                         need = RTE_MIN(need, (int)nb_used);
800
801                         virtio_xmit_cleanup(txvq, need);
802                         need = (int)tx_pkts[nb_tx]->nb_segs -
803                                 txvq->vq_free_cnt + 1;
804                 }
805
806                 /*
807                  * Zero or negative value indicates it has enough free
808                  * descriptors to use for transmitting.
809                  */
810                 if (likely(need <= 0)) {
811                         txm = tx_pkts[nb_tx];
812
813                         /* Do VLAN tag insertion */
814                         if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
815                                 error = rte_vlan_insert(&txm);
816                                 if (unlikely(error)) {
817                                         rte_pktmbuf_free(txm);
818                                         ++nb_tx;
819                                         continue;
820                                 }
821                         }
822
823                         /* Enqueue Packet buffers */
824                         error = virtqueue_enqueue_xmit(txvq, txm);
825                         if (unlikely(error)) {
826                                 if (error == ENOSPC)
827                                         PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0");
828                                 else if (error == EMSGSIZE)
829                                         PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1");
830                                 else
831                                         PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d", error);
832                                 break;
833                         }
834                         nb_tx++;
835                         txvq->bytes += txm->pkt_len;
836                 } else {
837                         PMD_TX_LOG(ERR, "No free tx descriptors to transmit");
838                         break;
839                 }
840         }
841
842         txvq->packets += nb_tx;
843
844         if (likely(nb_tx)) {
845                 vq_update_avail_idx(txvq);
846
847                 if (unlikely(virtqueue_kick_prepare(txvq))) {
848                         virtqueue_notify(txvq);
849                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
850                 }
851         }
852
853         return nb_tx;
854 }