adda2a8b7189f828940fe78260ee8c905afe4301
[dpdk.git] / lib / librte_pmd_virtio / virtio_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <errno.h>
39
40 #include <rte_cycles.h>
41 #include <rte_memory.h>
42 #include <rte_memzone.h>
43 #include <rte_branch_prediction.h>
44 #include <rte_mempool.h>
45 #include <rte_malloc.h>
46 #include <rte_mbuf.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_prefetch.h>
50 #include <rte_string_fns.h>
51 #include <rte_errno.h>
52 #include <rte_byteorder.h>
53
54 #include "virtio_logs.h"
55 #include "virtio_ethdev.h"
56 #include "virtqueue.h"
57
58 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
59 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
60 #else
61 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
62 #endif
63
64 static void
65 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
66 {
67         struct vring_desc *dp, *dp_tail;
68         struct vq_desc_extra *dxp;
69         uint16_t desc_idx_last = desc_idx;
70
71         dp  = &vq->vq_ring.desc[desc_idx];
72         dxp = &vq->vq_descx[desc_idx];
73         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
74         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
75                 while (dp->flags & VRING_DESC_F_NEXT) {
76                         desc_idx_last = dp->next;
77                         dp = &vq->vq_ring.desc[dp->next];
78                 }
79         }
80         dxp->ndescs = 0;
81
82         /*
83          * We must append the existing free chain, if any, to the end of
84          * newly freed chain. If the virtqueue was completely used, then
85          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
86          */
87         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
88                 vq->vq_desc_head_idx = desc_idx;
89         } else {
90                 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
91                 dp_tail->next = desc_idx;
92         }
93
94         vq->vq_desc_tail_idx = desc_idx_last;
95         dp->next = VQ_RING_DESC_CHAIN_END;
96 }
97
98 static uint16_t
99 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
100                            uint32_t *len, uint16_t num)
101 {
102         struct vring_used_elem *uep;
103         struct rte_mbuf *cookie;
104         uint16_t used_idx, desc_idx;
105         uint16_t i;
106
107         /*  Caller does the check */
108         for (i = 0; i < num ; i++) {
109                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
110                 uep = &vq->vq_ring.used->ring[used_idx];
111                 desc_idx = (uint16_t) uep->id;
112                 len[i] = uep->len;
113                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
114
115                 if (unlikely(cookie == NULL)) {
116                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n",
117                                 vq->vq_used_cons_idx);
118                         break;
119                 }
120
121                 rte_prefetch0(cookie);
122                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
123                 rx_pkts[i]  = cookie;
124                 vq->vq_used_cons_idx++;
125                 vq_ring_free_chain(vq, desc_idx);
126                 vq->vq_descx[desc_idx].cookie = NULL;
127         }
128
129         return i;
130 }
131
132 static void
133 virtqueue_dequeue_pkt_tx(struct virtqueue *vq)
134 {
135         struct vring_used_elem *uep;
136         uint16_t used_idx, desc_idx;
137
138         used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
139         uep = &vq->vq_ring.used->ring[used_idx];
140         desc_idx = (uint16_t) uep->id;
141         vq->vq_used_cons_idx++;
142         vq_ring_free_chain(vq, desc_idx);
143 }
144
145
146 static inline int
147 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
148 {
149         struct vq_desc_extra *dxp;
150         struct virtio_hw *hw = vq->hw;
151         struct vring_desc *start_dp;
152         uint16_t needed = 1;
153         uint16_t head_idx, idx;
154
155         if (unlikely(vq->vq_free_cnt == 0))
156                 return -ENOSPC;
157         if (unlikely(vq->vq_free_cnt < needed))
158                 return -EMSGSIZE;
159
160         head_idx = vq->vq_desc_head_idx;
161         if (unlikely(head_idx >= vq->vq_nentries))
162                 return -EFAULT;
163
164         idx = head_idx;
165         dxp = &vq->vq_descx[idx];
166         dxp->cookie = (void *)cookie;
167         dxp->ndescs = needed;
168
169         start_dp = vq->vq_ring.desc;
170         start_dp[idx].addr =
171                 (uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM
172                 - hw->vtnet_hdr_size);
173         start_dp[idx].len =
174                 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
175         start_dp[idx].flags =  VRING_DESC_F_WRITE;
176         idx = start_dp[idx].next;
177         vq->vq_desc_head_idx = idx;
178         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
179                 vq->vq_desc_tail_idx = idx;
180         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
181         vq_update_avail_ring(vq, head_idx);
182
183         return 0;
184 }
185
186 static int
187 virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
188 {
189         struct vq_desc_extra *dxp;
190         struct vring_desc *start_dp;
191         uint16_t seg_num = cookie->nb_segs;
192         uint16_t needed = 1 + seg_num;
193         uint16_t head_idx, idx;
194         uint16_t head_size = txvq->hw->vtnet_hdr_size;
195
196         if (unlikely(txvq->vq_free_cnt == 0))
197                 return -ENOSPC;
198         if (unlikely(txvq->vq_free_cnt < needed))
199                 return -EMSGSIZE;
200         head_idx = txvq->vq_desc_head_idx;
201         if (unlikely(head_idx >= txvq->vq_nentries))
202                 return -EFAULT;
203
204         idx = head_idx;
205         dxp = &txvq->vq_descx[idx];
206         if (dxp->cookie != NULL)
207                 rte_pktmbuf_free(dxp->cookie);
208         dxp->cookie = (void *)cookie;
209         dxp->ndescs = needed;
210
211         start_dp = txvq->vq_ring.desc;
212         start_dp[idx].addr =
213                 txvq->virtio_net_hdr_mem + idx * head_size;
214         start_dp[idx].len = (uint32_t)head_size;
215         start_dp[idx].flags = VRING_DESC_F_NEXT;
216
217         for (; ((seg_num > 0) && (cookie != NULL)); seg_num--) {
218                 idx = start_dp[idx].next;
219                 start_dp[idx].addr  = RTE_MBUF_DATA_DMA_ADDR(cookie);
220                 start_dp[idx].len   = cookie->data_len;
221                 start_dp[idx].flags = VRING_DESC_F_NEXT;
222                 cookie = cookie->next;
223         }
224
225         start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
226         idx = start_dp[idx].next;
227         txvq->vq_desc_head_idx = idx;
228         if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
229                 txvq->vq_desc_tail_idx = idx;
230         txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed);
231         vq_update_avail_ring(txvq, head_idx);
232
233         return 0;
234 }
235
236 static inline struct rte_mbuf *
237 rte_rxmbuf_alloc(struct rte_mempool *mp)
238 {
239         struct rte_mbuf *m;
240
241         m = __rte_mbuf_raw_alloc(mp);
242         __rte_mbuf_sanity_check_raw(m, 0);
243
244         return m;
245 }
246
247 static void
248 virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
249 {
250         struct rte_mbuf *m;
251         int i, nbufs, error, size = vq->vq_nentries;
252         struct vring *vr = &vq->vq_ring;
253         uint8_t *ring_mem = vq->vq_ring_virt_mem;
254
255         PMD_INIT_FUNC_TRACE();
256
257         /*
258          * Reinitialise since virtio port might have been stopped and restarted
259          */
260         memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
261         vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN);
262         vq->vq_used_cons_idx = 0;
263         vq->vq_desc_head_idx = 0;
264         vq->vq_avail_idx = 0;
265         vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
266         vq->vq_free_cnt = vq->vq_nentries;
267         memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
268
269         /* Chain all the descriptors in the ring with an END */
270         for (i = 0; i < size - 1; i++)
271                 vr->desc[i].next = (uint16_t)(i + 1);
272         vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
273
274         /*
275          * Disable device(host) interrupting guest
276          */
277         virtqueue_disable_intr(vq);
278
279         /* Only rx virtqueue needs mbufs to be allocated at initialization */
280         if (queue_type == VTNET_RQ) {
281                 if (vq->mpool == NULL)
282                         rte_exit(EXIT_FAILURE,
283                         "Cannot allocate initial mbufs for rx virtqueue");
284
285                 /* Allocate blank mbufs for the each rx descriptor */
286                 nbufs = 0;
287                 error = ENOSPC;
288                 while (!virtqueue_full(vq)) {
289                         m = rte_rxmbuf_alloc(vq->mpool);
290                         if (m == NULL)
291                                 break;
292
293                         /******************************************
294                         *         Enqueue allocated buffers        *
295                         *******************************************/
296                         error = virtqueue_enqueue_recv_refill(vq, m);
297
298                         if (error) {
299                                 rte_pktmbuf_free(m);
300                                 break;
301                         }
302                         nbufs++;
303                 }
304
305                 vq_update_avail_idx(vq);
306
307                 PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
308
309                 VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
310                         vq->vq_queue_index);
311                 VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
312                         vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
313         } else if (queue_type == VTNET_TQ) {
314                 VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
315                         vq->vq_queue_index);
316                 VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
317                         vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
318         } else {
319                 VIRTIO_WRITE_REG_2(vq->hw, VIRTIO_PCI_QUEUE_SEL,
320                         vq->vq_queue_index);
321                 VIRTIO_WRITE_REG_4(vq->hw, VIRTIO_PCI_QUEUE_PFN,
322                         vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
323         }
324 }
325
326 void
327 virtio_dev_cq_start(struct rte_eth_dev *dev)
328 {
329         struct virtio_hw *hw = dev->data->dev_private;
330
331         if (hw->cvq) {
332                 virtio_dev_vring_start(hw->cvq, VTNET_CQ);
333                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq);
334         }
335 }
336
337 void
338 virtio_dev_rxtx_start(struct rte_eth_dev *dev)
339 {
340         /*
341          * Start receive and transmit vrings
342          * -    Setup vring structure for all queues
343          * -    Initialize descriptor for the rx vring
344          * -    Allocate blank mbufs for the each rx descriptor
345          *
346          */
347         int i;
348
349         PMD_INIT_FUNC_TRACE();
350
351         /* Start rx vring. */
352         for (i = 0; i < dev->data->nb_rx_queues; i++) {
353                 virtio_dev_vring_start(dev->data->rx_queues[i], VTNET_RQ);
354                 VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
355         }
356
357         /* Start tx vring. */
358         for (i = 0; i < dev->data->nb_tx_queues; i++) {
359                 virtio_dev_vring_start(dev->data->tx_queues[i], VTNET_TQ);
360                 VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
361         }
362 }
363
364 int
365 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
366                         uint16_t queue_idx,
367                         uint16_t nb_desc,
368                         unsigned int socket_id,
369                         __rte_unused const struct rte_eth_rxconf *rx_conf,
370                         struct rte_mempool *mp)
371 {
372         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
373         struct virtqueue *vq;
374         int ret;
375
376         PMD_INIT_FUNC_TRACE();
377         ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx,
378                         nb_desc, socket_id, &vq);
379         if (ret < 0) {
380                 PMD_INIT_LOG(ERR, "tvq initialization failed");
381                 return ret;
382         }
383
384         /* Create mempool for rx mbuf allocation */
385         vq->mpool = mp;
386
387         dev->data->rx_queues[queue_idx] = vq;
388         return 0;
389 }
390
391 /*
392  * struct rte_eth_dev *dev: Used to update dev
393  * uint16_t nb_desc: Defaults to values read from config space
394  * unsigned int socket_id: Used to allocate memzone
395  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
396  * uint16_t queue_idx: Just used as an index in dev txq list
397  */
398 int
399 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
400                         uint16_t queue_idx,
401                         uint16_t nb_desc,
402                         unsigned int socket_id,
403                         const struct rte_eth_txconf *tx_conf)
404 {
405         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
406         struct virtqueue *vq;
407         int ret;
408
409         PMD_INIT_FUNC_TRACE();
410
411         if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS)
412             != ETH_TXQ_FLAGS_NOXSUMS) {
413                 PMD_INIT_LOG(ERR, "TX checksum offload not supported\n");
414                 return -EINVAL;
415         }
416
417         ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
418                         nb_desc, socket_id, &vq);
419         if (ret < 0) {
420                 PMD_INIT_LOG(ERR, "rvq initialization failed");
421                 return ret;
422         }
423
424         dev->data->tx_queues[queue_idx] = vq;
425         return 0;
426 }
427
428 static void
429 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
430 {
431         int error;
432         /*
433          * Requeue the discarded mbuf. This should always be
434          * successful since it was just dequeued.
435          */
436         error = virtqueue_enqueue_recv_refill(vq, m);
437         if (unlikely(error)) {
438                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
439                 rte_pktmbuf_free(m);
440         }
441 }
442
443 #define VIRTIO_MBUF_BURST_SZ 64
444 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
445 uint16_t
446 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
447 {
448         struct virtqueue *rxvq = rx_queue;
449         struct virtio_hw *hw = rxvq->hw;
450         struct rte_mbuf *rxm, *new_mbuf;
451         uint16_t nb_used, num, nb_rx = 0;
452         uint32_t len[VIRTIO_MBUF_BURST_SZ];
453         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
454         int error;
455         uint32_t i, nb_enqueued = 0;
456         const uint32_t hdr_size = sizeof(struct virtio_net_hdr);
457
458         nb_used = VIRTQUEUE_NUSED(rxvq);
459
460         virtio_rmb();
461
462         num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
463         num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
464         if (likely(num > DESC_PER_CACHELINE))
465                 num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
466
467         if (num == 0)
468                 return 0;
469
470         num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num);
471         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
472         for (i = 0; i < num ; i++) {
473                 rxm = rcv_pkts[i];
474
475                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
476
477                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
478                         PMD_RX_LOG(ERR, "Packet drop");
479                         nb_enqueued++;
480                         virtio_discard_rxbuf(rxvq, rxm);
481                         rxvq->errors++;
482                         continue;
483                 }
484
485                 rxm->port = rxvq->port_id;
486                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
487
488                 rxm->nb_segs = 1;
489                 rxm->next = NULL;
490                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
491                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
492
493                 if (hw->vlan_strip)
494                         rte_vlan_strip(rxm);
495
496                 VIRTIO_DUMP_PACKET(rxm, rxm->data_len);
497
498                 rx_pkts[nb_rx++] = rxm;
499                 rxvq->bytes += rx_pkts[nb_rx - 1]->pkt_len;
500         }
501
502         rxvq->packets += nb_rx;
503
504         /* Allocate new mbuf for the used descriptor */
505         error = ENOSPC;
506         while (likely(!virtqueue_full(rxvq))) {
507                 new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
508                 if (unlikely(new_mbuf == NULL)) {
509                         struct rte_eth_dev *dev
510                                 = &rte_eth_devices[rxvq->port_id];
511                         dev->data->rx_mbuf_alloc_failed++;
512                         break;
513                 }
514                 error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
515                 if (unlikely(error)) {
516                         rte_pktmbuf_free(new_mbuf);
517                         break;
518                 }
519                 nb_enqueued++;
520         }
521
522         if (likely(nb_enqueued)) {
523                 vq_update_avail_idx(rxvq);
524
525                 if (unlikely(virtqueue_kick_prepare(rxvq))) {
526                         virtqueue_notify(rxvq);
527                         PMD_RX_LOG(DEBUG, "Notified\n");
528                 }
529         }
530
531         return nb_rx;
532 }
533
534 uint16_t
535 virtio_recv_mergeable_pkts(void *rx_queue,
536                         struct rte_mbuf **rx_pkts,
537                         uint16_t nb_pkts)
538 {
539         struct virtqueue *rxvq = rx_queue;
540         struct rte_mbuf *rxm, *new_mbuf;
541         uint16_t nb_used, num, nb_rx = 0;
542         uint32_t len[VIRTIO_MBUF_BURST_SZ];
543         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
544         struct rte_mbuf *prev;
545         int error;
546         uint32_t i = 0, nb_enqueued = 0;
547         uint32_t seg_num = 0;
548         uint16_t extra_idx = 0;
549         uint32_t seg_res = 0;
550         const uint32_t hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
551
552         nb_used = VIRTQUEUE_NUSED(rxvq);
553
554         virtio_rmb();
555
556         if (nb_used == 0)
557                 return 0;
558
559         PMD_RX_LOG(DEBUG, "used:%d\n", nb_used);
560
561         while (i < nb_used) {
562                 struct virtio_net_hdr_mrg_rxbuf *header;
563
564                 if (nb_rx == nb_pkts)
565                         break;
566
567                 num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1);
568                 if (num != 1)
569                         continue;
570
571                 i++;
572
573                 PMD_RX_LOG(DEBUG, "dequeue:%d\n", num);
574                 PMD_RX_LOG(DEBUG, "packet len:%d\n", len[0]);
575
576                 rxm = rcv_pkts[0];
577
578                 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
579                         PMD_RX_LOG(ERR, "Packet drop\n");
580                         nb_enqueued++;
581                         virtio_discard_rxbuf(rxvq, rxm);
582                         rxvq->errors++;
583                         continue;
584                 }
585
586                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
587                         RTE_PKTMBUF_HEADROOM - hdr_size);
588                 seg_num = header->num_buffers;
589
590                 if (seg_num == 0)
591                         seg_num = 1;
592
593                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
594                 rxm->nb_segs = seg_num;
595                 rxm->next = NULL;
596                 rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
597                 rxm->data_len = (uint16_t)(len[0] - hdr_size);
598
599                 rxm->port = rxvq->port_id;
600                 rx_pkts[nb_rx] = rxm;
601                 prev = rxm;
602
603                 seg_res = seg_num - 1;
604
605                 while (seg_res != 0) {
606                         /*
607                          * Get extra segments for current uncompleted packet.
608                          */
609                         uint32_t  rcv_cnt =
610                                 RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
611                         if (likely(VIRTQUEUE_NUSED(rxvq) >= rcv_cnt)) {
612                                 uint32_t rx_num =
613                                         virtqueue_dequeue_burst_rx(rxvq,
614                                         rcv_pkts, len, rcv_cnt);
615                                 i += rx_num;
616                                 rcv_cnt = rx_num;
617                         } else {
618                                 PMD_RX_LOG(ERR,
619                                         "No enough segments for packet.\n");
620                                 nb_enqueued++;
621                                 virtio_discard_rxbuf(rxvq, rxm);
622                                 rxvq->errors++;
623                                 break;
624                         }
625
626                         extra_idx = 0;
627
628                         while (extra_idx < rcv_cnt) {
629                                 rxm = rcv_pkts[extra_idx];
630
631                                 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
632                                 rxm->next = NULL;
633                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
634                                 rxm->data_len = (uint16_t)(len[extra_idx]);
635
636                                 if (prev)
637                                         prev->next = rxm;
638
639                                 prev = rxm;
640                                 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
641                                 extra_idx++;
642                         };
643                         seg_res -= rcv_cnt;
644                 }
645
646                 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
647                         rx_pkts[nb_rx]->data_len);
648
649                 rxvq->bytes += rx_pkts[nb_rx]->pkt_len;
650                 nb_rx++;
651         }
652
653         rxvq->packets += nb_rx;
654
655         /* Allocate new mbuf for the used descriptor */
656         error = ENOSPC;
657         while (likely(!virtqueue_full(rxvq))) {
658                 new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
659                 if (unlikely(new_mbuf == NULL)) {
660                         struct rte_eth_dev *dev
661                                 = &rte_eth_devices[rxvq->port_id];
662                         dev->data->rx_mbuf_alloc_failed++;
663                         break;
664                 }
665                 error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
666                 if (unlikely(error)) {
667                         rte_pktmbuf_free(new_mbuf);
668                         break;
669                 }
670                 nb_enqueued++;
671         }
672
673         if (likely(nb_enqueued)) {
674                 vq_update_avail_idx(rxvq);
675
676                 if (unlikely(virtqueue_kick_prepare(rxvq))) {
677                         virtqueue_notify(rxvq);
678                         PMD_RX_LOG(DEBUG, "Notified");
679                 }
680         }
681
682         return nb_rx;
683 }
684
685 uint16_t
686 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
687 {
688         struct virtqueue *txvq = tx_queue;
689         struct rte_mbuf *txm;
690         uint16_t nb_used, nb_tx, num;
691         int error;
692
693         nb_tx = 0;
694
695         if (unlikely(nb_pkts < 1))
696                 return nb_pkts;
697
698         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
699         nb_used = VIRTQUEUE_NUSED(txvq);
700
701         virtio_rmb();
702
703         num = (uint16_t)(likely(nb_used < VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ);
704
705         while (nb_tx < nb_pkts) {
706                 /* Need one more descriptor for virtio header. */
707                 int need = tx_pkts[nb_tx]->nb_segs - txvq->vq_free_cnt + 1;
708                 int deq_cnt = RTE_MIN(need, (int)num);
709
710                 num -= (deq_cnt > 0) ? deq_cnt : 0;
711                 while (deq_cnt > 0) {
712                         virtqueue_dequeue_pkt_tx(txvq);
713                         deq_cnt--;
714                 }
715
716                 need = (int)tx_pkts[nb_tx]->nb_segs - txvq->vq_free_cnt + 1;
717                 /*
718                  * Zero or negative value indicates it has enough free
719                  * descriptors to use for transmitting.
720                  */
721                 if (likely(need <= 0)) {
722                         txm = tx_pkts[nb_tx];
723
724                         /* Do VLAN tag insertion */
725                         if (txm->ol_flags & PKT_TX_VLAN_PKT) {
726                                 error = rte_vlan_insert(&txm);
727                                 if (unlikely(error)) {
728                                         rte_pktmbuf_free(txm);
729                                         ++nb_tx;
730                                         continue;
731                                 }
732                         }
733
734                         /* Enqueue Packet buffers */
735                         error = virtqueue_enqueue_xmit(txvq, txm);
736                         if (unlikely(error)) {
737                                 if (error == ENOSPC)
738                                         PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0");
739                                 else if (error == EMSGSIZE)
740                                         PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1");
741                                 else
742                                         PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d", error);
743                                 break;
744                         }
745                         nb_tx++;
746                         txvq->bytes += txm->pkt_len;
747                 } else {
748                         PMD_TX_LOG(ERR, "No free tx descriptors to transmit");
749                         break;
750                 }
751         }
752
753         txvq->packets += nb_tx;
754
755         if (likely(nb_tx)) {
756                 vq_update_avail_idx(txvq);
757
758                 if (unlikely(virtqueue_kick_prepare(txvq))) {
759                         virtqueue_notify(txvq);
760                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
761                 }
762         }
763
764         return nb_tx;
765 }