84ec0e88e894ef565ace66ac68f99bb5d0639ff2
[dpdk.git] / lib / librte_vhost / vhost_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdint.h>
35 #include <linux/virtio_net.h>
36
37 #include <rte_mbuf.h>
38 #include <rte_memcpy.h>
39 #include <rte_virtio_net.h>
40
41 #include "vhost-net-cdev.h"
42
43 #define MAX_PKT_BURST 32
44
45 /**
46  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
47  * be received from the physical port or from another virtio device. A packet
48  * count is returned to indicate the number of packets that were succesfully
49  * added to the RX queue. This function works when mergeable is disabled.
50  */
51 static inline uint32_t __attribute__((always_inline))
52 virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
53         struct rte_mbuf **pkts, uint32_t count)
54 {
55         struct vhost_virtqueue *vq;
56         struct vring_desc *desc;
57         struct rte_mbuf *buff;
58         /* The virtio_hdr is initialised to 0. */
59         struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
60         uint64_t buff_addr = 0;
61         uint64_t buff_hdr_addr = 0;
62         uint32_t head[MAX_PKT_BURST], packet_len = 0;
63         uint32_t head_idx, packet_success = 0;
64         uint16_t avail_idx, res_cur_idx;
65         uint16_t res_base_idx, res_end_idx;
66         uint16_t free_entries;
67         uint8_t success = 0;
68
69         LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
70         if (unlikely(queue_id != VIRTIO_RXQ)) {
71                 LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
72                 return 0;
73         }
74
75         vq = dev->virtqueue[VIRTIO_RXQ];
76         count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
77
78         /*
79          * As many data cores may want access to available buffers, 
80          * they need to be reserved.
81          */
82         do {
83                 res_base_idx = vq->last_used_idx_res;
84                 avail_idx = *((volatile uint16_t *)&vq->avail->idx);
85
86                 free_entries = (avail_idx - res_base_idx);
87                 /*check that we have enough buffers*/
88                 if (unlikely(count > free_entries))
89                         count = free_entries;
90
91                 if (count == 0)
92                         return 0;
93
94                 res_end_idx = res_base_idx + count;
95                 /* vq->last_used_idx_res is atomically updated. */
96                 /* TODO: Allow to disable cmpset if no concurrency in application. */
97                 success = rte_atomic16_cmpset(&vq->last_used_idx_res,
98                                 res_base_idx, res_end_idx);
99         } while (unlikely(success == 0));
100         res_cur_idx = res_base_idx;
101         LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n",
102                         dev->device_fh, res_cur_idx, res_end_idx);
103
104         /* Prefetch available ring to retrieve indexes. */
105         rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);
106
107         /* Retrieve all of the head indexes first to avoid caching issues. */
108         for (head_idx = 0; head_idx < count; head_idx++)
109                 head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) &
110                                         (vq->size - 1)];
111
112         /*Prefetch descriptor index. */
113         rte_prefetch0(&vq->desc[head[packet_success]]);
114
115         while (res_cur_idx != res_end_idx) {
116                 /* Get descriptor from available ring */
117                 desc = &vq->desc[head[packet_success]];
118
119                 buff = pkts[packet_success];
120
121                 /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */
122                 buff_addr = gpa_to_vva(dev, desc->addr);
123                 /* Prefetch buffer address. */
124                 rte_prefetch0((void *)(uintptr_t)buff_addr);
125
126                 /* Copy virtio_hdr to packet and increment buffer address */
127                 buff_hdr_addr = buff_addr;
128                 packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
129
130                 /*
131                  * If the descriptors are chained the header and data are
132                  * placed in separate buffers.
133                  */
134                 if (desc->flags & VRING_DESC_F_NEXT) {
135                         desc->len = vq->vhost_hlen;
136                         desc = &vq->desc[desc->next];
137                         /* Buffer address translation. */
138                         buff_addr = gpa_to_vva(dev, desc->addr);
139                         desc->len = rte_pktmbuf_data_len(buff);
140                 } else {
141                         buff_addr += vq->vhost_hlen;
142                         desc->len = packet_len;
143                 }
144
145                 /* Update used ring with desc information */
146                 vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success];
147                 vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
148
149                 /* Copy mbuf data to buffer */
150                 /* FIXME for sg mbuf and the case that desc couldn't hold the mbuf data */
151                 rte_memcpy((void *)(uintptr_t)buff_addr,
152                         rte_pktmbuf_mtod(buff, const void *),
153                         rte_pktmbuf_data_len(buff));
154                 PRINT_PACKET(dev, (uintptr_t)buff_addr,
155                         rte_pktmbuf_data_len(buff), 0);
156
157                 res_cur_idx++;
158                 packet_success++;
159
160                 rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
161                         (const void *)&virtio_hdr, vq->vhost_hlen);
162
163                 PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
164
165                 if (res_cur_idx < res_end_idx) {
166                         /* Prefetch descriptor index. */
167                         rte_prefetch0(&vq->desc[head[packet_success]]);
168                 }
169         }
170
171         rte_compiler_barrier();
172
173         /* Wait until it's our turn to add our buffer to the used ring. */
174         while (unlikely(vq->last_used_idx != res_base_idx))
175                 rte_pause();
176
177         *(volatile uint16_t *)&vq->used->idx += count;
178         vq->last_used_idx = res_end_idx;
179
180         /* Kick the guest if necessary. */
181         if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
182                 eventfd_write((int)vq->kickfd, 1);
183         return count;
184 }
185
186 static inline uint32_t __attribute__((always_inline))
187 copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t res_base_idx,
188         uint16_t res_end_idx, struct rte_mbuf *pkt)
189 {
190         uint32_t vec_idx = 0;
191         uint32_t entry_success = 0;
192         struct vhost_virtqueue *vq;
193         /* The virtio_hdr is initialised to 0. */
194         struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {
195                 {0, 0, 0, 0, 0, 0}, 0};
196         uint16_t cur_idx = res_base_idx;
197         uint64_t vb_addr = 0;
198         uint64_t vb_hdr_addr = 0;
199         uint32_t seg_offset = 0;
200         uint32_t vb_offset = 0;
201         uint32_t seg_avail;
202         uint32_t vb_avail;
203         uint32_t cpy_len, entry_len;
204
205         if (pkt == NULL)
206                 return 0;
207
208         LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| "
209                 "End Index %d\n",
210                 dev->device_fh, cur_idx, res_end_idx);
211
212         /*
213          * Convert from gpa to vva
214          * (guest physical addr -> vhost virtual addr)
215          */
216         vq = dev->virtqueue[VIRTIO_RXQ];
217         vb_addr =
218                 gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
219         vb_hdr_addr = vb_addr;
220
221         /* Prefetch buffer address. */
222         rte_prefetch0((void *)(uintptr_t)vb_addr);
223
224         virtio_hdr.num_buffers = res_end_idx - res_base_idx;
225
226         LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n",
227                 dev->device_fh, virtio_hdr.num_buffers);
228
229         rte_memcpy((void *)(uintptr_t)vb_hdr_addr,
230                 (const void *)&virtio_hdr, vq->vhost_hlen);
231
232         PRINT_PACKET(dev, (uintptr_t)vb_hdr_addr, vq->vhost_hlen, 1);
233
234         seg_avail = rte_pktmbuf_data_len(pkt);
235         vb_offset = vq->vhost_hlen;
236         vb_avail =
237                 vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
238
239         entry_len = vq->vhost_hlen;
240
241         if (vb_avail == 0) {
242                 uint32_t desc_idx =
243                         vq->buf_vec[vec_idx].desc_idx;
244                 vq->desc[desc_idx].len = vq->vhost_hlen;
245
246                 if ((vq->desc[desc_idx].flags
247                         & VRING_DESC_F_NEXT) == 0) {
248                         /* Update used ring with desc information */
249                         vq->used->ring[cur_idx & (vq->size - 1)].id
250                                 = vq->buf_vec[vec_idx].desc_idx;
251                         vq->used->ring[cur_idx & (vq->size - 1)].len
252                                 = entry_len;
253
254                         entry_len = 0;
255                         cur_idx++;
256                         entry_success++;
257                 }
258
259                 vec_idx++;
260                 vb_addr =
261                         gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
262
263                 /* Prefetch buffer address. */
264                 rte_prefetch0((void *)(uintptr_t)vb_addr);
265                 vb_offset = 0;
266                 vb_avail = vq->buf_vec[vec_idx].buf_len;
267         }
268
269         cpy_len = RTE_MIN(vb_avail, seg_avail);
270
271         while (cpy_len > 0) {
272                 /* Copy mbuf data to vring buffer */
273                 rte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset),
274                         (const void *)(rte_pktmbuf_mtod(pkt, char*) + seg_offset),
275                         cpy_len);
276
277                 PRINT_PACKET(dev,
278                         (uintptr_t)(vb_addr + vb_offset),
279                         cpy_len, 0);
280
281                 seg_offset += cpy_len;
282                 vb_offset += cpy_len;
283                 seg_avail -= cpy_len;
284                 vb_avail -= cpy_len;
285                 entry_len += cpy_len;
286
287                 if (seg_avail != 0) {
288                         /*
289                          * The virtio buffer in this vring
290                          * entry reach to its end.
291                          * But the segment doesn't complete.
292                          */
293                         if ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags &
294                                 VRING_DESC_F_NEXT) == 0) {
295                                 /* Update used ring with desc information */
296                                 vq->used->ring[cur_idx & (vq->size - 1)].id
297                                         = vq->buf_vec[vec_idx].desc_idx;
298                                 vq->used->ring[cur_idx & (vq->size - 1)].len
299                                         = entry_len;
300                                 entry_len = 0;
301                                 cur_idx++;
302                                 entry_success++;
303                         }
304
305                         vec_idx++;
306                         vb_addr = gpa_to_vva(dev,
307                                 vq->buf_vec[vec_idx].buf_addr);
308                         vb_offset = 0;
309                         vb_avail = vq->buf_vec[vec_idx].buf_len;
310                         cpy_len = RTE_MIN(vb_avail, seg_avail);
311                 } else {
312                         /*
313                          * This current segment complete, need continue to
314                          * check if the whole packet complete or not.
315                          */
316                         pkt = pkt->next;
317                         if (pkt != NULL) {
318                                 /*
319                                  * There are more segments.
320                                  */
321                                 if (vb_avail == 0) {
322                                         /*
323                                          * This current buffer from vring is
324                                          * used up, need fetch next buffer
325                                          * from buf_vec.
326                                          */
327                                         uint32_t desc_idx =
328                                                 vq->buf_vec[vec_idx].desc_idx;
329                                         vq->desc[desc_idx].len = vb_offset;
330
331                                         if ((vq->desc[desc_idx].flags &
332                                                 VRING_DESC_F_NEXT) == 0) {
333                                                 uint16_t wrapped_idx =
334                                                         cur_idx & (vq->size - 1);
335                                                 /*
336                                                  * Update used ring with the
337                                                  * descriptor information
338                                                  */
339                                                 vq->used->ring[wrapped_idx].id
340                                                         = desc_idx;
341                                                 vq->used->ring[wrapped_idx].len
342                                                         = entry_len;
343                                                 entry_success++;
344                                                 entry_len = 0;
345                                                 cur_idx++;
346                                         }
347
348                                         /* Get next buffer from buf_vec. */
349                                         vec_idx++;
350                                         vb_addr = gpa_to_vva(dev,
351                                                 vq->buf_vec[vec_idx].buf_addr);
352                                         vb_avail =
353                                                 vq->buf_vec[vec_idx].buf_len;
354                                         vb_offset = 0;
355                                 }
356
357                                 seg_offset = 0;
358                                 seg_avail = rte_pktmbuf_data_len(pkt);
359                                 cpy_len = RTE_MIN(vb_avail, seg_avail);
360                         } else {
361                                 /*
362                                  * This whole packet completes.
363                                  */
364                                 uint32_t desc_idx =
365                                         vq->buf_vec[vec_idx].desc_idx;
366                                 vq->desc[desc_idx].len = vb_offset;
367
368                                 while (vq->desc[desc_idx].flags &
369                                         VRING_DESC_F_NEXT) {
370                                         desc_idx = vq->desc[desc_idx].next;
371                                          vq->desc[desc_idx].len = 0;
372                                 }
373
374                                 /* Update used ring with desc information */
375                                 vq->used->ring[cur_idx & (vq->size - 1)].id
376                                         = vq->buf_vec[vec_idx].desc_idx;
377                                 vq->used->ring[cur_idx & (vq->size - 1)].len
378                                         = entry_len;
379                                 entry_len = 0;
380                                 cur_idx++;
381                                 entry_success++;
382                                 seg_avail = 0;
383                                 cpy_len = RTE_MIN(vb_avail, seg_avail);
384                         }
385                 }
386         }
387
388         return entry_success;
389 }
390
391 /*
392  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
393  * be received from the physical port or from another virtio device. A packet
394  * count is returned to indicate the number of packets that were succesfully
395  * added to the RX queue. This function works for mergeable RX.
396  */
397 static inline uint32_t __attribute__((always_inline))
398 virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
399         struct rte_mbuf **pkts, uint32_t count)
400 {
401         struct vhost_virtqueue *vq;
402         uint32_t pkt_idx = 0, entry_success = 0;
403         uint16_t avail_idx, res_cur_idx;
404         uint16_t res_base_idx, res_end_idx;
405         uint8_t success = 0;
406
407         LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
408                 dev->device_fh);
409         if (unlikely(queue_id != VIRTIO_RXQ)) {
410                 LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
411         }
412
413         vq = dev->virtqueue[VIRTIO_RXQ];
414         count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
415
416         if (count == 0)
417                 return 0;
418
419         for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
420                 uint32_t secure_len = 0;
421                 uint16_t need_cnt;
422                 uint32_t vec_idx = 0;
423                 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
424                 uint16_t i, id;
425
426                 do {
427                         /*
428                          * As many data cores may want access to available
429                          * buffers, they need to be reserved.
430                          */
431                         res_base_idx = vq->last_used_idx_res;
432                         res_cur_idx = res_base_idx;
433
434                         do {
435                                 avail_idx = *((volatile uint16_t *)&vq->avail->idx);
436                                 if (unlikely(res_cur_idx == avail_idx)) {
437                                         LOG_DEBUG(VHOST_DATA,
438                                                 "(%"PRIu64") Failed "
439                                                 "to get enough desc from "
440                                                 "vring\n",
441                                                 dev->device_fh);
442                                         return pkt_idx;
443                                 } else {
444                                         uint16_t wrapped_idx =
445                                                 (res_cur_idx) & (vq->size - 1);
446                                         uint32_t idx =
447                                                 vq->avail->ring[wrapped_idx];
448                                         uint8_t next_desc;
449
450                                         do {
451                                                 next_desc = 0;
452                                                 secure_len += vq->desc[idx].len;
453                                                 if (vq->desc[idx].flags &
454                                                         VRING_DESC_F_NEXT) {
455                                                         idx = vq->desc[idx].next;
456                                                         next_desc = 1;
457                                                 }
458                                         } while (next_desc);
459
460                                         res_cur_idx++;
461                                 }
462                         } while (pkt_len > secure_len);
463
464                         /* vq->last_used_idx_res is atomically updated. */
465                         success = rte_atomic16_cmpset(&vq->last_used_idx_res,
466                                                         res_base_idx,
467                                                         res_cur_idx);
468                 } while (success == 0);
469
470                 id = res_base_idx;
471                 need_cnt = res_cur_idx - res_base_idx;
472
473                 for (i = 0; i < need_cnt; i++, id++) {
474                         uint16_t wrapped_idx = id & (vq->size - 1);
475                         uint32_t idx = vq->avail->ring[wrapped_idx];
476                         uint8_t next_desc;
477                         do {
478                                 next_desc = 0;
479                                 vq->buf_vec[vec_idx].buf_addr =
480                                         vq->desc[idx].addr;
481                                 vq->buf_vec[vec_idx].buf_len =
482                                         vq->desc[idx].len;
483                                 vq->buf_vec[vec_idx].desc_idx = idx;
484                                 vec_idx++;
485
486                                 if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
487                                         idx = vq->desc[idx].next;
488                                         next_desc = 1;
489                                 }
490                         } while (next_desc);
491                 }
492
493                 res_end_idx = res_cur_idx;
494
495                 entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
496                         res_end_idx, pkts[pkt_idx]);
497
498                 rte_compiler_barrier();
499
500                 /*
501                  * Wait until it's our turn to add our buffer
502                  * to the used ring.
503                  */
504                 while (unlikely(vq->last_used_idx != res_base_idx))
505                         rte_pause();
506
507                 *(volatile uint16_t *)&vq->used->idx += entry_success;
508                 vq->last_used_idx = res_end_idx;
509
510                 /* Kick the guest if necessary. */
511                 if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
512                         eventfd_write((int)vq->kickfd, 1);
513         }
514
515         return count;
516 }
517
518 uint16_t
519 rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
520         struct rte_mbuf **pkts, uint16_t count)
521 {
522         if (unlikely(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)))
523                 return virtio_dev_merge_rx(dev, queue_id, pkts, count);
524         else
525                 return virtio_dev_rx(dev, queue_id, pkts, count);
526 }
527
528 uint16_t
529 rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
530         struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
531 {
532         struct rte_mbuf *m, *prev;
533         struct vhost_virtqueue *vq;
534         struct vring_desc *desc;
535         uint64_t vb_addr = 0;
536         uint32_t head[MAX_PKT_BURST];
537         uint32_t used_idx;
538         uint32_t i;
539         uint16_t free_entries, entry_success = 0;
540         uint16_t avail_idx;
541
542         if (unlikely(queue_id != VIRTIO_TXQ)) {
543                 LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
544                 return 0;
545         }
546
547         vq = dev->virtqueue[VIRTIO_TXQ];
548         avail_idx =  *((volatile uint16_t *)&vq->avail->idx);
549
550         /* If there are no available buffers then return. */
551         if (vq->last_used_idx == avail_idx)
552                 return 0;
553
554         LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__,
555                 dev->device_fh);
556
557         /* Prefetch available ring to retrieve head indexes. */
558         rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);
559
560         /*get the number of free entries in the ring*/
561         free_entries = (avail_idx - vq->last_used_idx);
562
563         free_entries = RTE_MIN(free_entries, count);
564         /* Limit to MAX_PKT_BURST. */
565         free_entries = RTE_MIN(free_entries, MAX_PKT_BURST);
566
567         LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
568                         dev->device_fh, free_entries);
569         /* Retrieve all of the head indexes first to avoid caching issues. */
570         for (i = 0; i < free_entries; i++)
571                 head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)];
572
573         /* Prefetch descriptor index. */
574         rte_prefetch0(&vq->desc[head[entry_success]]);
575         rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);
576
577         while (entry_success < free_entries) {
578                 uint32_t vb_avail, vb_offset;
579                 uint32_t seg_avail, seg_offset;
580                 uint32_t cpy_len;
581                 uint32_t seg_num = 0;
582                 struct rte_mbuf *cur;
583                 uint8_t alloc_err = 0;
584
585                 desc = &vq->desc[head[entry_success]];
586
587                 /* Discard first buffer as it is the virtio header */
588                 desc = &vq->desc[desc->next];
589
590                 /* Buffer address translation. */
591                 vb_addr = gpa_to_vva(dev, desc->addr);
592                 /* Prefetch buffer address. */
593                 rte_prefetch0((void *)(uintptr_t)vb_addr);
594
595                 used_idx = vq->last_used_idx & (vq->size - 1);
596
597                 if (entry_success < (free_entries - 1)) {
598                         /* Prefetch descriptor index. */
599                         rte_prefetch0(&vq->desc[head[entry_success+1]]);
600                         rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]);
601                 }
602
603                 /* Update used index buffer information. */
604                 vq->used->ring[used_idx].id = head[entry_success];
605                 vq->used->ring[used_idx].len = 0;
606
607                 vb_offset = 0;
608                 vb_avail = desc->len;
609                 /* Allocate an mbuf and populate the structure. */
610                 m = rte_pktmbuf_alloc(mbuf_pool);
611                 if (unlikely(m == NULL)) {
612                         RTE_LOG(ERR, VHOST_DATA,
613                                 "Failed to allocate memory for mbuf.\n");
614                         return entry_success;
615                 }
616                 seg_offset = 0;
617                 seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
618                 cpy_len = RTE_MIN(vb_avail, seg_avail);
619
620                 PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);
621
622                 seg_num++;
623                 cur = m;
624                 prev = m;
625                 while (cpy_len != 0) {
626                         rte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + seg_offset),
627                                 (void *)((uintptr_t)(vb_addr + vb_offset)),
628                                 cpy_len);
629
630                         seg_offset += cpy_len;
631                         vb_offset += cpy_len;
632                         vb_avail -= cpy_len;
633                         seg_avail -= cpy_len;
634
635                         if (vb_avail != 0) {
636                                 /*
637                                  * The segment reachs to its end,
638                                  * while the virtio buffer in TX vring has
639                                  * more data to be copied.
640                                  */
641                                 cur->data_len = seg_offset;
642                                 m->pkt_len += seg_offset;
643                                 /* Allocate mbuf and populate the structure. */
644                                 cur = rte_pktmbuf_alloc(mbuf_pool);
645                                 if (unlikely(cur == NULL)) {
646                                         RTE_LOG(ERR, VHOST_DATA, "Failed to "
647                                                 "allocate memory for mbuf.\n");
648                                         rte_pktmbuf_free(m);
649                                         alloc_err = 1;
650                                         break;
651                                 }
652
653                                 seg_num++;
654                                 prev->next = cur;
655                                 prev = cur;
656                                 seg_offset = 0;
657                                 seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
658                         } else {
659                                 if (desc->flags & VRING_DESC_F_NEXT) {
660                                         /*
661                                          * There are more virtio buffers in
662                                          * same vring entry need to be copied.
663                                          */
664                                         if (seg_avail == 0) {
665                                                 /*
666                                                  * The current segment hasn't
667                                                  * room to accomodate more
668                                                  * data.
669                                                  */
670                                                 cur->data_len = seg_offset;
671                                                 m->pkt_len += seg_offset;
672                                                 /*
673                                                  * Allocate an mbuf and
674                                                  * populate the structure.
675                                                  */
676                                                 cur = rte_pktmbuf_alloc(mbuf_pool);
677                                                 if (unlikely(cur == NULL)) {
678                                                         RTE_LOG(ERR,
679                                                                 VHOST_DATA,
680                                                                 "Failed to "
681                                                                 "allocate memory "
682                                                                 "for mbuf\n");
683                                                         rte_pktmbuf_free(m);
684                                                         alloc_err = 1;
685                                                         break;
686                                                 }
687                                                 seg_num++;
688                                                 prev->next = cur;
689                                                 prev = cur;
690                                                 seg_offset = 0;
691                                                 seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
692                                         }
693
694                                         desc = &vq->desc[desc->next];
695
696                                         /* Buffer address translation. */
697                                         vb_addr = gpa_to_vva(dev, desc->addr);
698                                         /* Prefetch buffer address. */
699                                         rte_prefetch0((void *)(uintptr_t)vb_addr);
700                                         vb_offset = 0;
701                                         vb_avail = desc->len;
702
703                                         PRINT_PACKET(dev, (uintptr_t)vb_addr,
704                                                 desc->len, 0);
705                                 } else {
706                                         /* The whole packet completes. */
707                                         cur->data_len = seg_offset;
708                                         m->pkt_len += seg_offset;
709                                         vb_avail = 0;
710                                 }
711                         }
712
713                         cpy_len = RTE_MIN(vb_avail, seg_avail);
714                 }
715
716                 if (unlikely(alloc_err == 1))
717                         break;
718
719                 m->nb_segs = seg_num;
720
721                 pkts[entry_success] = m;
722                 vq->last_used_idx++;
723                 entry_success++;
724         }
725
726         rte_compiler_barrier();
727         vq->used->idx += entry_success;
728         /* Kick guest if required. */
729         if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
730                 eventfd_write((int)vq->kickfd, 1);
731         return entry_success;
732
733 }