vhost: add queue id parameter
[dpdk.git] / lib / librte_vhost / vhost_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <arpa/inet.h>
35 #include <getopt.h>
36 #include <linux/if_ether.h>
37 #include <linux/if_vlan.h>
38 #include <linux/virtio_net.h>
39 #include <linux/virtio_ring.h>
40 #include <signal.h>
41 #include <stdint.h>
42 #include <sys/eventfd.h>
43 #include <sys/param.h>
44 #include <unistd.h>
45
46 #include <rte_atomic.h>
47 #include <rte_cycles.h>
48 #include <rte_ethdev.h>
49 #include <rte_log.h>
50 #include <rte_string_fns.h>
51 #include <rte_malloc.h>
52
53 #include "main.h"
54 #include "virtio-net.h"
55 #include "vhost-net-cdev.h"
56
57 #define MAX_PKT_BURST 32                /* Max burst size for RX/TX */
58
59 /*
60  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
61  * be received from the physical port or from another virtio device. A packet
62  * count is returned to indicate the number of packets that were succesfully
63  * added to the RX queue. This function works when mergeable is disabled.
64  */
65 static inline uint32_t __attribute__((always_inline))
66 virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count)
67 {
68         struct vhost_virtqueue *vq;
69         struct vring_desc *desc;
70         struct rte_mbuf *buff;
71         /* The virtio_hdr is initialised to 0. */
72         struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0,0,0,0,0,0},0};
73         uint64_t buff_addr = 0;
74         uint64_t buff_hdr_addr = 0;
75         uint32_t head[MAX_PKT_BURST], packet_len = 0;
76         uint32_t head_idx, packet_success = 0;
77         uint16_t avail_idx, res_cur_idx;
78         uint16_t res_base_idx, res_end_idx;
79         uint16_t free_entries;
80         uint8_t success = 0;
81
82         LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
83         if (unlikely(queue_id != VIRTIO_RXQ)) {
84                 LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
85                 return 0;
86         }
87
88         vq = dev->virtqueue[VIRTIO_RXQ];
89         count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
90
91         /* As many data cores may want access to available buffers, they need to be reserved. */
92         do {
93                 res_base_idx = vq->last_used_idx_res;
94                 avail_idx = *((volatile uint16_t *)&vq->avail->idx);
95
96                 free_entries = (avail_idx - res_base_idx);
97                 /*check that we have enough buffers*/
98                 if (unlikely(count > free_entries))
99                         count = free_entries;
100
101                 if (count == 0)
102                         return 0;
103
104                 res_end_idx = res_base_idx + count;
105                 /* vq->last_used_idx_res is atomically updated. */
106                 success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx,
107                                                                         res_end_idx);
108         } while (unlikely(success == 0));
109         res_cur_idx = res_base_idx;
110         LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_end_idx);
111
112         /* Prefetch available ring to retrieve indexes. */
113         rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);
114
115         /* Retrieve all of the head indexes first to avoid caching issues. */
116         for (head_idx = 0; head_idx < count; head_idx++)
117                 head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)];
118
119         /*Prefetch descriptor index. */
120         rte_prefetch0(&vq->desc[head[packet_success]]);
121
122         while (res_cur_idx != res_end_idx) {
123                 /* Get descriptor from available ring */
124                 desc = &vq->desc[head[packet_success]];
125
126                 buff = pkts[packet_success];
127
128                 /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */
129                 buff_addr = gpa_to_vva(dev, desc->addr);
130                 /* Prefetch buffer address. */
131                 rte_prefetch0((void*)(uintptr_t)buff_addr);
132
133                 /* Copy virtio_hdr to packet and increment buffer address */
134                 buff_hdr_addr = buff_addr;
135                 packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
136
137                 /*
138                  * If the descriptors are chained the header and data are
139                  * placed in separate buffers.
140                  */
141                 if (desc->flags & VRING_DESC_F_NEXT) {
142                         desc->len = vq->vhost_hlen;
143                         desc = &vq->desc[desc->next];
144                         /* Buffer address translation. */
145                         buff_addr = gpa_to_vva(dev, desc->addr);
146                         desc->len = rte_pktmbuf_data_len(buff);
147                 } else {
148                         buff_addr += vq->vhost_hlen;
149                         desc->len = packet_len;
150                 }
151
152                 /* Update used ring with desc information */
153                 vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success];
154                 vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
155
156                 /* Copy mbuf data to buffer */
157                 rte_memcpy((void *)(uintptr_t)buff_addr,
158                         rte_pktmbuf_mtod(buff, const void *),
159                         rte_pktmbuf_data_len(buff));
160                 PRINT_PACKET(dev, (uintptr_t)buff_addr,
161                         rte_pktmbuf_data_len(buff), 0);
162
163                 res_cur_idx++;
164                 packet_success++;
165
166                 rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
167                         (const void *)&virtio_hdr, vq->vhost_hlen);
168
169                 PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
170
171                 if (res_cur_idx < res_end_idx) {
172                         /* Prefetch descriptor index. */
173                         rte_prefetch0(&vq->desc[head[packet_success]]);
174                 }
175         }
176
177         rte_compiler_barrier();
178
179         /* Wait until it's our turn to add our buffer to the used ring. */
180         while (unlikely(vq->last_used_idx != res_base_idx))
181                 rte_pause();
182
183         *(volatile uint16_t *)&vq->used->idx += count;
184         vq->last_used_idx = res_end_idx;
185
186         /* Kick the guest if necessary. */
187         if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
188                 eventfd_write((int)vq->kickfd, 1);
189         return count;
190 }
191
192 static inline uint32_t __attribute__((always_inline))
193 copy_from_mbuf_to_vring(struct virtio_net *dev,
194         uint16_t res_base_idx, uint16_t res_end_idx,
195         struct rte_mbuf *pkt)
196 {
197         uint32_t vec_idx = 0;
198         uint32_t entry_success = 0;
199         struct vhost_virtqueue *vq;
200         /* The virtio_hdr is initialised to 0. */
201         struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {
202                 {0, 0, 0, 0, 0, 0}, 0};
203         uint16_t cur_idx = res_base_idx;
204         uint64_t vb_addr = 0;
205         uint64_t vb_hdr_addr = 0;
206         uint32_t seg_offset = 0;
207         uint32_t vb_offset = 0;
208         uint32_t seg_avail;
209         uint32_t vb_avail;
210         uint32_t cpy_len, entry_len;
211
212         if (pkt == NULL)
213                 return 0;
214
215         LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| "
216                 "End Index %d\n",
217                 dev->device_fh, cur_idx, res_end_idx);
218
219         /*
220          * Convert from gpa to vva
221          * (guest physical addr -> vhost virtual addr)
222          */
223         vq = dev->virtqueue[VIRTIO_RXQ];
224         vb_addr =
225                 gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
226         vb_hdr_addr = vb_addr;
227
228         /* Prefetch buffer address. */
229         rte_prefetch0((void *)(uintptr_t)vb_addr);
230
231         virtio_hdr.num_buffers = res_end_idx - res_base_idx;
232
233         LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n",
234                 dev->device_fh, virtio_hdr.num_buffers);
235
236         rte_memcpy((void *)(uintptr_t)vb_hdr_addr,
237                 (const void *)&virtio_hdr, vq->vhost_hlen);
238
239         PRINT_PACKET(dev, (uintptr_t)vb_hdr_addr, vq->vhost_hlen, 1);
240
241         seg_avail = rte_pktmbuf_data_len(pkt);
242         vb_offset = vq->vhost_hlen;
243         vb_avail =
244                 vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
245
246         entry_len = vq->vhost_hlen;
247
248         if (vb_avail == 0) {
249                 uint32_t desc_idx =
250                         vq->buf_vec[vec_idx].desc_idx;
251                 vq->desc[desc_idx].len = vq->vhost_hlen;
252
253                 if ((vq->desc[desc_idx].flags
254                         & VRING_DESC_F_NEXT) == 0) {
255                         /* Update used ring with desc information */
256                         vq->used->ring[cur_idx & (vq->size - 1)].id
257                                 = vq->buf_vec[vec_idx].desc_idx;
258                         vq->used->ring[cur_idx & (vq->size - 1)].len
259                                 = entry_len;
260
261                         entry_len = 0;
262                         cur_idx++;
263                         entry_success++;
264                 }
265
266                 vec_idx++;
267                 vb_addr =
268                         gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
269
270                 /* Prefetch buffer address. */
271                 rte_prefetch0((void *)(uintptr_t)vb_addr);
272                 vb_offset = 0;
273                 vb_avail = vq->buf_vec[vec_idx].buf_len;
274         }
275
276         cpy_len = RTE_MIN(vb_avail, seg_avail);
277
278         while (cpy_len > 0) {
279                 /* Copy mbuf data to vring buffer */
280                 rte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset),
281                         (const void *)(rte_pktmbuf_mtod(pkt, char*) + seg_offset),
282                         cpy_len);
283
284                 PRINT_PACKET(dev,
285                         (uintptr_t)(vb_addr + vb_offset),
286                         cpy_len, 0);
287
288                 seg_offset += cpy_len;
289                 vb_offset += cpy_len;
290                 seg_avail -= cpy_len;
291                 vb_avail -= cpy_len;
292                 entry_len += cpy_len;
293
294                 if (seg_avail != 0) {
295                         /*
296                          * The virtio buffer in this vring
297                          * entry reach to its end.
298                          * But the segment doesn't complete.
299                          */
300                         if ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags &
301                                 VRING_DESC_F_NEXT) == 0) {
302                                 /* Update used ring with desc information */
303                                 vq->used->ring[cur_idx & (vq->size - 1)].id
304                                         = vq->buf_vec[vec_idx].desc_idx;
305                                 vq->used->ring[cur_idx & (vq->size - 1)].len
306                                         = entry_len;
307                                 entry_len = 0;
308                                 cur_idx++;
309                                 entry_success++;
310                         }
311
312                         vec_idx++;
313                         vb_addr = gpa_to_vva(dev,
314                                 vq->buf_vec[vec_idx].buf_addr);
315                         vb_offset = 0;
316                         vb_avail = vq->buf_vec[vec_idx].buf_len;
317                         cpy_len = RTE_MIN(vb_avail, seg_avail);
318                 } else {
319                         /*
320                          * This current segment complete, need continue to
321                          * check if the whole packet complete or not.
322                          */
323                         pkt = pkt->next;
324                         if (pkt != NULL) {
325                                 /*
326                                  * There are more segments.
327                                  */
328                                 if (vb_avail == 0) {
329                                         /*
330                                          * This current buffer from vring is
331                                          * used up, need fetch next buffer
332                                          * from buf_vec.
333                                          */
334                                         uint32_t desc_idx =
335                                                 vq->buf_vec[vec_idx].desc_idx;
336                                         vq->desc[desc_idx].len = vb_offset;
337
338                                         if ((vq->desc[desc_idx].flags &
339                                                 VRING_DESC_F_NEXT) == 0) {
340                                                 uint16_t wrapped_idx =
341                                                         cur_idx & (vq->size - 1);
342                                                 /*
343                                                  * Update used ring with the
344                                                  * descriptor information
345                                                  */
346                                                 vq->used->ring[wrapped_idx].id
347                                                         = desc_idx;
348                                                 vq->used->ring[wrapped_idx].len
349                                                         = entry_len;
350                                                 entry_success++;
351                                                 entry_len = 0;
352                                                 cur_idx++;
353                                         }
354
355                                         /* Get next buffer from buf_vec. */
356                                         vec_idx++;
357                                         vb_addr = gpa_to_vva(dev,
358                                                 vq->buf_vec[vec_idx].buf_addr);
359                                         vb_avail =
360                                                 vq->buf_vec[vec_idx].buf_len;
361                                         vb_offset = 0;
362                                 }
363
364                                 seg_offset = 0;
365                                 seg_avail = rte_pktmbuf_data_len(pkt);
366                                 cpy_len = RTE_MIN(vb_avail, seg_avail);
367                         } else {
368                                 /*
369                                  * This whole packet completes.
370                                  */
371                                 uint32_t desc_idx =
372                                         vq->buf_vec[vec_idx].desc_idx;
373                                 vq->desc[desc_idx].len = vb_offset;
374
375                                 while (vq->desc[desc_idx].flags &
376                                         VRING_DESC_F_NEXT) {
377                                         desc_idx = vq->desc[desc_idx].next;
378                                          vq->desc[desc_idx].len = 0;
379                                 }
380
381                                 /* Update used ring with desc information */
382                                 vq->used->ring[cur_idx & (vq->size - 1)].id
383                                         = vq->buf_vec[vec_idx].desc_idx;
384                                 vq->used->ring[cur_idx & (vq->size - 1)].len
385                                         = entry_len;
386                                 entry_len = 0;
387                                 cur_idx++;
388                                 entry_success++;
389                                 seg_avail = 0;
390                                 cpy_len = RTE_MIN(vb_avail, seg_avail);
391                         }
392                 }
393         }
394
395         return entry_success;
396 }
397
398 /*
399  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
400  * be received from the physical port or from another virtio device. A packet
401  * count is returned to indicate the number of packets that were succesfully
402  * added to the RX queue. This function works for mergeable RX.
403  */
404 static inline uint32_t __attribute__((always_inline))
405 virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts,
406         uint32_t count)
407 {
408         struct vhost_virtqueue *vq;
409         uint32_t pkt_idx = 0, entry_success = 0;
410         uint16_t avail_idx, res_cur_idx;
411         uint16_t res_base_idx, res_end_idx;
412         uint8_t success = 0;
413
414         LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
415                 dev->device_fh);
416         if (unlikely(queue_id != VIRTIO_RXQ)) {
417                 LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
418         }
419
420         vq = dev->virtqueue[VIRTIO_RXQ];
421         count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
422
423         if (count == 0)
424                 return 0;
425
426         for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
427                 uint32_t secure_len = 0;
428                 uint16_t need_cnt;
429                 uint32_t vec_idx = 0;
430                 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
431                 uint16_t i, id;
432
433                 do {
434                         /*
435                          * As many data cores may want access to available
436                          * buffers, they need to be reserved.
437                          */
438                         res_base_idx = vq->last_used_idx_res;
439                         res_cur_idx = res_base_idx;
440
441                         do {
442                                 avail_idx = *((volatile uint16_t *)&vq->avail->idx);
443                                 if (unlikely(res_cur_idx == avail_idx)) {
444                                         LOG_DEBUG(VHOST_DATA,
445                                                 "(%"PRIu64") Failed "
446                                                 "to get enough desc from "
447                                                 "vring\n",
448                                                 dev->device_fh);
449                                         return pkt_idx;
450                                 } else {
451                                         uint16_t wrapped_idx =
452                                                 (res_cur_idx) & (vq->size - 1);
453                                         uint32_t idx =
454                                                 vq->avail->ring[wrapped_idx];
455                                         uint8_t next_desc;
456
457                                         do {
458                                                 next_desc = 0;
459                                                 secure_len += vq->desc[idx].len;
460                                                 if (vq->desc[idx].flags &
461                                                         VRING_DESC_F_NEXT) {
462                                                         idx = vq->desc[idx].next;
463                                                         next_desc = 1;
464                                                 }
465                                         } while (next_desc);
466
467                                         res_cur_idx++;
468                                 }
469                         } while (pkt_len > secure_len);
470
471                         /* vq->last_used_idx_res is atomically updated. */
472                         success = rte_atomic16_cmpset(&vq->last_used_idx_res,
473                                                         res_base_idx,
474                                                         res_cur_idx);
475                 } while (success == 0);
476
477                 id = res_base_idx;
478                 need_cnt = res_cur_idx - res_base_idx;
479
480                 for (i = 0; i < need_cnt; i++, id++) {
481                         uint16_t wrapped_idx = id & (vq->size - 1);
482                         uint32_t idx = vq->avail->ring[wrapped_idx];
483                         uint8_t next_desc;
484                         do {
485                                 next_desc = 0;
486                                 vq->buf_vec[vec_idx].buf_addr =
487                                         vq->desc[idx].addr;
488                                 vq->buf_vec[vec_idx].buf_len =
489                                         vq->desc[idx].len;
490                                 vq->buf_vec[vec_idx].desc_idx = idx;
491                                 vec_idx++;
492
493                                 if (vq->desc[idx].flags & VRING_DESC_F_NEXT) {
494                                         idx = vq->desc[idx].next;
495                                         next_desc = 1;
496                                 }
497                         } while (next_desc);
498                 }
499
500                 res_end_idx = res_cur_idx;
501
502                 entry_success = copy_from_mbuf_to_vring(dev, res_base_idx,
503                         res_end_idx, pkts[pkt_idx]);
504
505                 rte_compiler_barrier();
506
507                 /*
508                  * Wait until it's our turn to add our buffer
509                  * to the used ring.
510                  */
511                 while (unlikely(vq->last_used_idx != res_base_idx))
512                         rte_pause();
513
514                 *(volatile uint16_t *)&vq->used->idx += entry_success;
515                 vq->last_used_idx = res_end_idx;
516
517                 /* Kick the guest if necessary. */
518                 if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
519                         eventfd_write((int)vq->kickfd, 1);
520         }
521
522         return count;
523 }
524
525 /* This function works for TX packets with mergeable feature enabled. */
526 static inline uint16_t __attribute__((always_inline))
527 virtio_dev_merge_tx(struct virtio_net *dev, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
528 {
529         struct rte_mbuf *m, *prev;
530         struct vhost_virtqueue *vq;
531         struct vring_desc *desc;
532         uint64_t vb_addr = 0;
533         uint32_t head[MAX_PKT_BURST];
534         uint32_t used_idx;
535         uint32_t i;
536         uint16_t free_entries, entry_success = 0;
537         uint16_t avail_idx;
538
539         if (unlikely(queue_id != VIRTIO_TXQ)) {
540                 LOG_DEBUG(VHOST_DATA, "mq isn't supported in this version.\n");
541                 return 0;
542         }
543
544         vq = dev->virtqueue[VIRTIO_TXQ];
545         avail_idx =  *((volatile uint16_t *)&vq->avail->idx);
546
547         /* If there are no available buffers then return. */
548         if (vq->last_used_idx == avail_idx)
549                 return 0;
550
551         LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_tx()\n",
552                 dev->device_fh);
553
554         /* Prefetch available ring to retrieve head indexes. */
555         rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);
556
557         /*get the number of free entries in the ring*/
558         free_entries = (avail_idx - vq->last_used_idx);
559
560         free_entries = RTE_MIN(free_entries, count);
561         /* Limit to MAX_PKT_BURST. */
562         free_entries = RTE_MIN(free_entries, MAX_PKT_BURST);
563
564         LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
565                 dev->device_fh, free_entries);
566         /* Retrieve all of the head indexes first to avoid caching issues. */
567         for (i = 0; i < free_entries; i++)
568                 head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)];
569
570         /* Prefetch descriptor index. */
571         rte_prefetch0(&vq->desc[head[entry_success]]);
572         rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);
573
574         while (entry_success < free_entries) {
575                 uint32_t vb_avail, vb_offset;
576                 uint32_t seg_avail, seg_offset;
577                 uint32_t cpy_len;
578                 uint32_t seg_num = 0;
579                 struct rte_mbuf *cur;
580                 uint8_t alloc_err = 0;
581
582                 desc = &vq->desc[head[entry_success]];
583
584                 /* Discard first buffer as it is the virtio header */
585                 desc = &vq->desc[desc->next];
586
587                 /* Buffer address translation. */
588                 vb_addr = gpa_to_vva(dev, desc->addr);
589                 /* Prefetch buffer address. */
590                 rte_prefetch0((void *)(uintptr_t)vb_addr);
591
592                 used_idx = vq->last_used_idx & (vq->size - 1);
593
594                 if (entry_success < (free_entries - 1)) {
595                         /* Prefetch descriptor index. */
596                         rte_prefetch0(&vq->desc[head[entry_success+1]]);
597                         rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]);
598                 }
599
600                 /* Update used index buffer information. */
601                 vq->used->ring[used_idx].id = head[entry_success];
602                 vq->used->ring[used_idx].len = 0;
603
604                 vb_offset = 0;
605                 vb_avail = desc->len;
606                 /* Allocate an mbuf and populate the structure. */
607                 m = rte_pktmbuf_alloc(mbuf_pool);
608                 if (unlikely(m == NULL)) {
609                         RTE_LOG(ERR, VHOST_DATA,
610                                 "Failed to allocate memory for mbuf.\n");
611                         return entry_success;
612                 }
613                 seg_offset = 0;
614                 seg_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
615                 cpy_len = RTE_MIN(vb_avail, seg_avail);
616
617                 PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0);
618
619                 seg_num++;
620                 cur = m;
621                 prev = m;
622                 while (cpy_len != 0) {
623                         rte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + seg_offset),
624                                 (void *)((uintptr_t)(vb_addr + vb_offset)),
625                                 cpy_len);
626
627                         seg_offset += cpy_len;
628                         vb_offset += cpy_len;
629                         vb_avail -= cpy_len;
630                         seg_avail -= cpy_len;
631
632                         if (vb_avail != 0) {
633                                 /*
634                                  * The segment reachs to its end,
635                                  * while the virtio buffer in TX vring has
636                                  * more data to be copied.
637                                  */
638                                 cur->data_len = seg_offset;
639                                 m->pkt_len += seg_offset;
640                                 /* Allocate mbuf and populate the structure. */
641                                 cur = rte_pktmbuf_alloc(mbuf_pool);
642                                 if (unlikely(cur == NULL)) {
643                                         RTE_LOG(ERR, VHOST_DATA, "Failed to "
644                                                 "allocate memory for mbuf.\n");
645                                         rte_pktmbuf_free(m);
646                                         alloc_err = 1;
647                                         break;
648                                 }
649
650                                 seg_num++;
651                                 prev->next = cur;
652                                 prev = cur;
653                                 seg_offset = 0;
654                                 seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
655                         } else {
656                                 if (desc->flags & VRING_DESC_F_NEXT) {
657                                         /*
658                                          * There are more virtio buffers in
659                                          * same vring entry need to be copied.
660                                          */
661                                         if (seg_avail == 0) {
662                                                 /*
663                                                  * The current segment hasn't
664                                                  * room to accomodate more
665                                                  * data.
666                                                  */
667                                                 cur->data_len = seg_offset;
668                                                 m->pkt_len += seg_offset;
669                                                 /*
670                                                  * Allocate an mbuf and
671                                                  * populate the structure.
672                                                  */
673                                                 cur = rte_pktmbuf_alloc(mbuf_pool);
674                                                 if (unlikely(cur == NULL)) {
675                                                         RTE_LOG(ERR,
676                                                                 VHOST_DATA,
677                                                                 "Failed to "
678                                                                 "allocate memory "
679                                                                 "for mbuf\n");
680                                                         rte_pktmbuf_free(m);
681                                                         alloc_err = 1;
682                                                         break;
683                                                 }
684                                                 seg_num++;
685                                                 prev->next = cur;
686                                                 prev = cur;
687                                                 seg_offset = 0;
688                                                 seg_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
689                                         }
690
691                                         desc = &vq->desc[desc->next];
692
693                                         /* Buffer address translation. */
694                                         vb_addr = gpa_to_vva(dev, desc->addr);
695                                         /* Prefetch buffer address. */
696                                         rte_prefetch0((void *)(uintptr_t)vb_addr);
697                                         vb_offset = 0;
698                                         vb_avail = desc->len;
699
700                                         PRINT_PACKET(dev, (uintptr_t)vb_addr,
701                                                 desc->len, 0);
702                                 } else {
703                                         /* The whole packet completes. */
704                                         cur->data_len = seg_offset;
705                                         m->pkt_len += seg_offset;
706                                         vb_avail = 0;
707                                 }
708                         }
709
710                         cpy_len = RTE_MIN(vb_avail, seg_avail);
711                 }
712
713                 if (unlikely(alloc_err == 1))
714                         break;
715
716                 m->nb_segs = seg_num;
717
718                 pkts[entry_success] = m;
719                 vq->last_used_idx++;
720                 entry_success++;
721         }
722
723         rte_compiler_barrier();
724         vq->used->idx += entry_success;
725         /* Kick guest if required. */
726         if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
727                 eventfd_write((int)vq->kickfd, 1);
728         return entry_success;
729
730 }