9bc62719eeebfc8840c706bee65d32527ead3410
[dpdk.git] / drivers / net / virtio / virtio_rxtx_packed_avx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2020 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_net.h>
12
13 #include "virtio_logs.h"
14 #include "virtio_ethdev.h"
15 #include "virtio_pci.h"
16 #include "virtqueue.h"
17
18 #define BYTE_SIZE 8
19 /* flag bits offset in packed ring desc higher 64bits */
20 #define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \
21         offsetof(struct vring_packed_desc, len)) * BYTE_SIZE)
22
23 #define PACKED_FLAGS_MASK ((0ULL | VRING_PACKED_DESC_F_AVAIL_USED) << \
24         FLAGS_BITS_OFFSET)
25
26 /* reference count offset in mbuf rearm data */
27 #define REFCNT_BITS_OFFSET ((offsetof(struct rte_mbuf, refcnt) - \
28         offsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE)
29 /* segment number offset in mbuf rearm data */
30 #define SEG_NUM_BITS_OFFSET ((offsetof(struct rte_mbuf, nb_segs) - \
31         offsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE)
32
33 /* default rearm data */
34 #define DEFAULT_REARM_DATA (1ULL << SEG_NUM_BITS_OFFSET | \
35         1ULL << REFCNT_BITS_OFFSET)
36
37 /* id bits offset in packed ring desc higher 64bits */
38 #define ID_BITS_OFFSET ((offsetof(struct vring_packed_desc, id) - \
39         offsetof(struct vring_packed_desc, len)) * BYTE_SIZE)
40
41 /* net hdr short size mask */
42 #define NET_HDR_MASK 0x3F
43
44 #define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \
45         sizeof(struct vring_packed_desc))
46 #define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1)
47
48 #ifdef VIRTIO_GCC_UNROLL_PRAGMA
49 #define virtio_for_each_try_unroll(iter, val, size) _Pragma("GCC unroll 4") \
50         for (iter = val; iter < size; iter++)
51 #endif
52
53 #ifdef VIRTIO_CLANG_UNROLL_PRAGMA
54 #define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \
55         for (iter = val; iter < size; iter++)
56 #endif
57
58 #ifdef VIRTIO_ICC_UNROLL_PRAGMA
59 #define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll (4)") \
60         for (iter = val; iter < size; iter++)
61 #endif
62
63 #ifndef virtio_for_each_try_unroll
64 #define virtio_for_each_try_unroll(iter, val, num) \
65         for (iter = val; iter < num; iter++)
66 #endif
67
68 static inline void
69 virtio_update_batch_stats(struct virtnet_stats *stats,
70                           uint16_t pkt_len1,
71                           uint16_t pkt_len2,
72                           uint16_t pkt_len3,
73                           uint16_t pkt_len4)
74 {
75         stats->bytes += pkt_len1;
76         stats->bytes += pkt_len2;
77         stats->bytes += pkt_len3;
78         stats->bytes += pkt_len4;
79 }
80
81 static inline int
82 virtqueue_enqueue_batch_packed_vec(struct virtnet_tx *txvq,
83                                    struct rte_mbuf **tx_pkts)
84 {
85         struct virtqueue *vq = txvq->vq;
86         uint16_t head_size = vq->hw->vtnet_hdr_size;
87         uint16_t idx = vq->vq_avail_idx;
88         struct virtio_net_hdr *hdr;
89         struct vq_desc_extra *dxp;
90         uint16_t i, cmp;
91
92         if (vq->vq_avail_idx & PACKED_BATCH_MASK)
93                 return -1;
94
95         if (unlikely((idx + PACKED_BATCH_SIZE) > vq->vq_nentries))
96                 return -1;
97
98         /* Load four mbufs rearm data */
99         RTE_BUILD_BUG_ON(REFCNT_BITS_OFFSET >= 64);
100         RTE_BUILD_BUG_ON(SEG_NUM_BITS_OFFSET >= 64);
101         __m256i mbufs = _mm256_set_epi64x(*tx_pkts[3]->rearm_data,
102                                           *tx_pkts[2]->rearm_data,
103                                           *tx_pkts[1]->rearm_data,
104                                           *tx_pkts[0]->rearm_data);
105
106         /* refcnt=1 and nb_segs=1 */
107         __m256i mbuf_ref = _mm256_set1_epi64x(DEFAULT_REARM_DATA);
108         __m256i head_rooms = _mm256_set1_epi16(head_size);
109
110         /* Check refcnt and nb_segs */
111         const __mmask16 mask = 0x6 | 0x6 << 4 | 0x6 << 8 | 0x6 << 12;
112         cmp = _mm256_mask_cmpneq_epu16_mask(mask, mbufs, mbuf_ref);
113         if (unlikely(cmp))
114                 return -1;
115
116         /* Check headroom is enough */
117         const __mmask16 data_mask = 0x1 | 0x1 << 4 | 0x1 << 8 | 0x1 << 12;
118         RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
119                 offsetof(struct rte_mbuf, rearm_data));
120         cmp = _mm256_mask_cmplt_epu16_mask(data_mask, mbufs, head_rooms);
121         if (unlikely(cmp))
122                 return -1;
123
124         virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
125                 dxp = &vq->vq_descx[idx + i];
126                 dxp->ndescs = 1;
127                 dxp->cookie = tx_pkts[i];
128         }
129
130         virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
131                 tx_pkts[i]->data_off -= head_size;
132                 tx_pkts[i]->data_len += head_size;
133         }
134
135         __m512i descs_base = _mm512_set_epi64(tx_pkts[3]->data_len,
136                         VIRTIO_MBUF_ADDR(tx_pkts[3], vq),
137                         tx_pkts[2]->data_len,
138                         VIRTIO_MBUF_ADDR(tx_pkts[2], vq),
139                         tx_pkts[1]->data_len,
140                         VIRTIO_MBUF_ADDR(tx_pkts[1], vq),
141                         tx_pkts[0]->data_len,
142                         VIRTIO_MBUF_ADDR(tx_pkts[0], vq));
143
144         /* id offset and data offset */
145         __m512i data_offsets = _mm512_set_epi64((uint64_t)3 << ID_BITS_OFFSET,
146                                                 tx_pkts[3]->data_off,
147                                                 (uint64_t)2 << ID_BITS_OFFSET,
148                                                 tx_pkts[2]->data_off,
149                                                 (uint64_t)1 << ID_BITS_OFFSET,
150                                                 tx_pkts[1]->data_off,
151                                                 0, tx_pkts[0]->data_off);
152
153         __m512i new_descs = _mm512_add_epi64(descs_base, data_offsets);
154
155         uint64_t flags_temp = (uint64_t)idx << ID_BITS_OFFSET |
156                 (uint64_t)vq->vq_packed.cached_flags << FLAGS_BITS_OFFSET;
157
158         /* flags offset and guest virtual address offset */
159         __m128i flag_offset = _mm_set_epi64x(flags_temp, 0);
160         __m512i v_offset = _mm512_broadcast_i32x4(flag_offset);
161         __m512i v_desc = _mm512_add_epi64(new_descs, v_offset);
162
163         if (!vq->hw->has_tx_offload) {
164                 __m128i all_mask = _mm_set1_epi16(0xFFFF);
165                 virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
166                         hdr = rte_pktmbuf_mtod_offset(tx_pkts[i],
167                                         struct virtio_net_hdr *, -head_size);
168                         __m128i v_hdr = _mm_loadu_si128((void *)hdr);
169                         if (unlikely(_mm_mask_test_epi16_mask(NET_HDR_MASK,
170                                                         v_hdr, all_mask))) {
171                                 __m128i all_zero = _mm_setzero_si128();
172                                 _mm_mask_storeu_epi16((void *)hdr,
173                                                 NET_HDR_MASK, all_zero);
174                         }
175                 }
176         } else {
177                 virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
178                         hdr = rte_pktmbuf_mtod_offset(tx_pkts[i],
179                                         struct virtio_net_hdr *, -head_size);
180                         virtqueue_xmit_offload(hdr, tx_pkts[i], true);
181                 }
182         }
183
184         /* Enqueue Packet buffers */
185         _mm512_storeu_si512((void *)&vq->vq_packed.ring.desc[idx], v_desc);
186
187         virtio_update_batch_stats(&txvq->stats, tx_pkts[0]->pkt_len,
188                         tx_pkts[1]->pkt_len, tx_pkts[2]->pkt_len,
189                         tx_pkts[3]->pkt_len);
190
191         vq->vq_avail_idx += PACKED_BATCH_SIZE;
192         vq->vq_free_cnt -= PACKED_BATCH_SIZE;
193
194         if (vq->vq_avail_idx >= vq->vq_nentries) {
195                 vq->vq_avail_idx -= vq->vq_nentries;
196                 vq->vq_packed.cached_flags ^=
197                         VRING_PACKED_DESC_F_AVAIL_USED;
198         }
199
200         return 0;
201 }
202
203 static inline int
204 virtqueue_enqueue_single_packed_vec(struct virtnet_tx *txvq,
205                                     struct rte_mbuf *txm)
206 {
207         struct virtqueue *vq = txvq->vq;
208         struct virtio_hw *hw = vq->hw;
209         uint16_t hdr_size = hw->vtnet_hdr_size;
210         uint16_t slots, can_push = 0, use_indirect = 0;
211         int16_t need;
212
213         /* optimize ring usage */
214         if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
215               vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
216             rte_mbuf_refcnt_read(txm) == 1 &&
217             RTE_MBUF_DIRECT(txm) &&
218             txm->nb_segs == 1 &&
219             rte_pktmbuf_headroom(txm) >= hdr_size)
220                 can_push = 1;
221         else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
222                  txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
223                 use_indirect = 1;
224         /* How many main ring entries are needed to this Tx?
225          * indirect   => 1
226          * any_layout => number of segments
227          * default    => number of segments + 1
228          */
229         slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
230         need = slots - vq->vq_free_cnt;
231
232         /* Positive value indicates it need free vring descriptors */
233         if (unlikely(need > 0)) {
234                 virtio_xmit_cleanup_inorder_packed(vq, need);
235                 need = slots - vq->vq_free_cnt;
236                 if (unlikely(need > 0)) {
237                         PMD_TX_LOG(ERR,
238                                    "No free tx descriptors to transmit");
239                         return -1;
240                 }
241         }
242
243         /* Enqueue Packet buffers */
244         virtqueue_enqueue_xmit_packed(txvq, txm, slots, use_indirect,
245                                 can_push, 1);
246
247         txvq->stats.bytes += txm->pkt_len;
248         return 0;
249 }
250
251 uint16_t
252 virtio_xmit_pkts_packed_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
253                         uint16_t nb_pkts)
254 {
255         struct virtnet_tx *txvq = tx_queue;
256         struct virtqueue *vq = txvq->vq;
257         struct virtio_hw *hw = vq->hw;
258         uint16_t nb_tx = 0;
259         uint16_t remained;
260
261         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
262                 return nb_tx;
263
264         if (unlikely(nb_pkts < 1))
265                 return nb_pkts;
266
267         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
268
269         if (vq->vq_free_cnt <= vq->vq_nentries - vq->vq_free_thresh)
270                 virtio_xmit_cleanup_inorder_packed(vq, vq->vq_free_thresh);
271
272         remained = RTE_MIN(nb_pkts, vq->vq_free_cnt);
273
274         while (remained) {
275                 if (remained >= PACKED_BATCH_SIZE) {
276                         if (!virtqueue_enqueue_batch_packed_vec(txvq,
277                                                 &tx_pkts[nb_tx])) {
278                                 nb_tx += PACKED_BATCH_SIZE;
279                                 remained -= PACKED_BATCH_SIZE;
280                                 continue;
281                         }
282                 }
283                 if (!virtqueue_enqueue_single_packed_vec(txvq,
284                                         tx_pkts[nb_tx])) {
285                         nb_tx++;
286                         remained--;
287                         continue;
288                 }
289                 break;
290         };
291
292         txvq->stats.packets += nb_tx;
293
294         if (likely(nb_tx)) {
295                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
296                         virtqueue_notify(vq);
297                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
298                 }
299         }
300
301         return nb_tx;
302 }
303
304 /* Optionally fill offload information in structure */
305 static inline int
306 virtio_vec_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
307 {
308         struct rte_net_hdr_lens hdr_lens;
309         uint32_t hdrlen, ptype;
310         int l4_supported = 0;
311
312         /* nothing to do */
313         if (hdr->flags == 0)
314                 return 0;
315
316         /* GSO not support in vec path, skip check */
317         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
318
319         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
320         m->packet_type = ptype;
321         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
322             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
323             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
324                 l4_supported = 1;
325
326         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
327                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
328                 if (hdr->csum_start <= hdrlen && l4_supported) {
329                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
330                 } else {
331                         /* Unknown proto or tunnel, do sw cksum. We can assume
332                          * the cksum field is in the first segment since the
333                          * buffers we provided to the host are large enough.
334                          * In case of SCTP, this will be wrong since it's a CRC
335                          * but there's nothing we can do.
336                          */
337                         uint16_t csum = 0, off;
338
339                         if (rte_raw_cksum_mbuf(m, hdr->csum_start,
340                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
341                                 &csum) < 0)
342                                 return -1;
343                         if (likely(csum != 0xffff))
344                                 csum = ~csum;
345                         off = hdr->csum_offset + hdr->csum_start;
346                         if (rte_pktmbuf_data_len(m) >= off + 1)
347                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
348                                         off) = csum;
349                 }
350         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
351                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
352         }
353
354         return 0;
355 }
356
357 static inline uint16_t
358 virtqueue_dequeue_batch_packed_vec(struct virtnet_rx *rxvq,
359                                    struct rte_mbuf **rx_pkts)
360 {
361         struct virtqueue *vq = rxvq->vq;
362         struct virtio_hw *hw = vq->hw;
363         uint16_t hdr_size = hw->vtnet_hdr_size;
364         uint64_t addrs[PACKED_BATCH_SIZE];
365         uint16_t id = vq->vq_used_cons_idx;
366         uint8_t desc_stats;
367         uint16_t i;
368         void *desc_addr;
369
370         if (id & PACKED_BATCH_MASK)
371                 return -1;
372
373         if (unlikely((id + PACKED_BATCH_SIZE) > vq->vq_nentries))
374                 return -1;
375
376         /* only care avail/used bits */
377 #if defined(RTE_ARCH_I686)
378         __m512i v_mask = _mm512_set4_epi64(PACKED_FLAGS_MASK, 0x0,
379                                            PACKED_FLAGS_MASK, 0x0);
380 #else
381         __m512i v_mask = _mm512_maskz_set1_epi64(0xaa, PACKED_FLAGS_MASK);
382 #endif
383         desc_addr = &vq->vq_packed.ring.desc[id];
384
385         __m512i v_desc = _mm512_loadu_si512(desc_addr);
386         __m512i v_flag = _mm512_and_epi64(v_desc, v_mask);
387
388         __m512i v_used_flag = _mm512_setzero_si512();
389         if (vq->vq_packed.used_wrap_counter)
390 #if defined(RTE_ARCH_I686)
391                 v_used_flag = _mm512_set4_epi64(PACKED_FLAGS_MASK, 0x0,
392                                                 PACKED_FLAGS_MASK, 0x0);
393 #else
394                 v_used_flag = _mm512_maskz_set1_epi64(0xaa, PACKED_FLAGS_MASK);
395 #endif
396
397         /* Check all descs are used */
398         desc_stats = _mm512_cmpneq_epu64_mask(v_flag, v_used_flag);
399         if (desc_stats)
400                 return -1;
401
402         virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
403                 rx_pkts[i] = (struct rte_mbuf *)vq->vq_descx[id + i].cookie;
404                 rte_packet_prefetch(rte_pktmbuf_mtod(rx_pkts[i], void *));
405
406                 addrs[i] = (uintptr_t)rx_pkts[i]->rx_descriptor_fields1;
407         }
408
409         /*
410          * load len from desc, store into mbuf pkt_len and data_len
411          * len limiated by l6bit buf_len, pkt_len[16:31] can be ignored
412          */
413         const __mmask16 mask = 0x6 | 0x6 << 4 | 0x6 << 8 | 0x6 << 12;
414         __m512i values = _mm512_maskz_shuffle_epi32(mask, v_desc, 0xAA);
415
416         /* reduce hdr_len from pkt_len and data_len */
417         __m512i mbuf_len_offset = _mm512_maskz_set1_epi32(mask,
418                         (uint32_t)-hdr_size);
419
420         __m512i v_value = _mm512_add_epi32(values, mbuf_len_offset);
421
422         /* assert offset of data_len */
423         RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
424                 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
425
426         __m512i v_index = _mm512_set_epi64(addrs[3] + 8, addrs[3],
427                                            addrs[2] + 8, addrs[2],
428                                            addrs[1] + 8, addrs[1],
429                                            addrs[0] + 8, addrs[0]);
430         /* batch store into mbufs */
431         _mm512_i64scatter_epi64(0, v_index, v_value, 1);
432
433         if (hw->has_rx_offload) {
434                 virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
435                         char *addr = (char *)rx_pkts[i]->buf_addr +
436                                 RTE_PKTMBUF_HEADROOM - hdr_size;
437                         virtio_vec_rx_offload(rx_pkts[i],
438                                         (struct virtio_net_hdr *)addr);
439                 }
440         }
441
442         virtio_update_batch_stats(&rxvq->stats, rx_pkts[0]->pkt_len,
443                         rx_pkts[1]->pkt_len, rx_pkts[2]->pkt_len,
444                         rx_pkts[3]->pkt_len);
445
446         vq->vq_free_cnt += PACKED_BATCH_SIZE;
447
448         vq->vq_used_cons_idx += PACKED_BATCH_SIZE;
449         if (vq->vq_used_cons_idx >= vq->vq_nentries) {
450                 vq->vq_used_cons_idx -= vq->vq_nentries;
451                 vq->vq_packed.used_wrap_counter ^= 1;
452         }
453
454         return 0;
455 }
456
457 static uint16_t
458 virtqueue_dequeue_single_packed_vec(struct virtnet_rx *rxvq,
459                                     struct rte_mbuf **rx_pkts)
460 {
461         uint16_t used_idx, id;
462         uint32_t len;
463         struct virtqueue *vq = rxvq->vq;
464         struct virtio_hw *hw = vq->hw;
465         uint32_t hdr_size = hw->vtnet_hdr_size;
466         struct virtio_net_hdr *hdr;
467         struct vring_packed_desc *desc;
468         struct rte_mbuf *cookie;
469
470         desc = vq->vq_packed.ring.desc;
471         used_idx = vq->vq_used_cons_idx;
472         if (!desc_is_used(&desc[used_idx], vq))
473                 return -1;
474
475         len = desc[used_idx].len;
476         id = desc[used_idx].id;
477         cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
478         if (unlikely(cookie == NULL)) {
479                 PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
480                                 vq->vq_used_cons_idx);
481                 return -1;
482         }
483         rte_prefetch0(cookie);
484         rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
485
486         cookie->data_off = RTE_PKTMBUF_HEADROOM;
487         cookie->ol_flags = 0;
488         cookie->pkt_len = (uint32_t)(len - hdr_size);
489         cookie->data_len = (uint32_t)(len - hdr_size);
490
491         hdr = (struct virtio_net_hdr *)((char *)cookie->buf_addr +
492                                         RTE_PKTMBUF_HEADROOM - hdr_size);
493         if (hw->has_rx_offload)
494                 virtio_vec_rx_offload(cookie, hdr);
495
496         *rx_pkts = cookie;
497
498         rxvq->stats.bytes += cookie->pkt_len;
499
500         vq->vq_free_cnt++;
501         vq->vq_used_cons_idx++;
502         if (vq->vq_used_cons_idx >= vq->vq_nentries) {
503                 vq->vq_used_cons_idx -= vq->vq_nentries;
504                 vq->vq_packed.used_wrap_counter ^= 1;
505         }
506
507         return 0;
508 }
509
510 static inline void
511 virtio_recv_refill_packed_vec(struct virtnet_rx *rxvq,
512                               struct rte_mbuf **cookie,
513                               uint16_t num)
514 {
515         struct virtqueue *vq = rxvq->vq;
516         struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
517         uint16_t flags = vq->vq_packed.cached_flags;
518         struct virtio_hw *hw = vq->hw;
519         struct vq_desc_extra *dxp;
520         uint16_t idx, i;
521         uint16_t batch_num, total_num = 0;
522         uint16_t head_idx = vq->vq_avail_idx;
523         uint16_t head_flag = vq->vq_packed.cached_flags;
524         uint64_t addr;
525
526         do {
527                 idx = vq->vq_avail_idx;
528
529                 batch_num = PACKED_BATCH_SIZE;
530                 if (unlikely((idx + PACKED_BATCH_SIZE) > vq->vq_nentries))
531                         batch_num = vq->vq_nentries - idx;
532                 if (unlikely((total_num + batch_num) > num))
533                         batch_num = num - total_num;
534
535                 virtio_for_each_try_unroll(i, 0, batch_num) {
536                         dxp = &vq->vq_descx[idx + i];
537                         dxp->cookie = (void *)cookie[total_num + i];
538
539                         addr = VIRTIO_MBUF_ADDR(cookie[total_num + i], vq) +
540                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
541                         start_dp[idx + i].addr = addr;
542                         start_dp[idx + i].len = cookie[total_num + i]->buf_len
543                                 - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
544                         if (total_num || i) {
545                                 virtqueue_store_flags_packed(&start_dp[idx + i],
546                                                 flags, hw->weak_barriers);
547                         }
548                 }
549
550                 vq->vq_avail_idx += batch_num;
551                 if (vq->vq_avail_idx >= vq->vq_nentries) {
552                         vq->vq_avail_idx -= vq->vq_nentries;
553                         vq->vq_packed.cached_flags ^=
554                                 VRING_PACKED_DESC_F_AVAIL_USED;
555                         flags = vq->vq_packed.cached_flags;
556                 }
557                 total_num += batch_num;
558         } while (total_num < num);
559
560         virtqueue_store_flags_packed(&start_dp[head_idx], head_flag,
561                                 hw->weak_barriers);
562         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
563 }
564
565 uint16_t
566 virtio_recv_pkts_packed_vec(void *rx_queue,
567                             struct rte_mbuf **rx_pkts,
568                             uint16_t nb_pkts)
569 {
570         struct virtnet_rx *rxvq = rx_queue;
571         struct virtqueue *vq = rxvq->vq;
572         struct virtio_hw *hw = vq->hw;
573         uint16_t num, nb_rx = 0;
574         uint32_t nb_enqueued = 0;
575         uint16_t free_cnt = vq->vq_free_thresh;
576
577         if (unlikely(hw->started == 0))
578                 return nb_rx;
579
580         num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
581         if (likely(num > PACKED_BATCH_SIZE))
582                 num = num - ((vq->vq_used_cons_idx + num) % PACKED_BATCH_SIZE);
583
584         while (num) {
585                 if (!virtqueue_dequeue_batch_packed_vec(rxvq,
586                                         &rx_pkts[nb_rx])) {
587                         nb_rx += PACKED_BATCH_SIZE;
588                         num -= PACKED_BATCH_SIZE;
589                         continue;
590                 }
591                 if (!virtqueue_dequeue_single_packed_vec(rxvq,
592                                         &rx_pkts[nb_rx])) {
593                         nb_rx++;
594                         num--;
595                         continue;
596                 }
597                 break;
598         };
599
600         PMD_RX_LOG(DEBUG, "dequeue:%d", num);
601
602         rxvq->stats.packets += nb_rx;
603
604         if (likely(vq->vq_free_cnt >= free_cnt)) {
605                 struct rte_mbuf *new_pkts[free_cnt];
606                 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
607                                                 free_cnt) == 0)) {
608                         virtio_recv_refill_packed_vec(rxvq, new_pkts,
609                                         free_cnt);
610                         nb_enqueued += free_cnt;
611                 } else {
612                         struct rte_eth_dev *dev =
613                                 &rte_eth_devices[rxvq->port_id];
614                         dev->data->rx_mbuf_alloc_failed += free_cnt;
615                 }
616         }
617
618         if (likely(nb_enqueued)) {
619                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
620                         virtqueue_notify(vq);
621                         PMD_RX_LOG(DEBUG, "Notified");
622                 }
623         }
624
625         return nb_rx;
626 }