1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2020 Intel Corporation
13 #include "virtio_logs.h"
14 #include "virtio_ethdev.h"
15 #include "virtio_pci.h"
16 #include "virtqueue.h"
19 /* flag bits offset in packed ring desc higher 64bits */
20 #define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \
21 offsetof(struct vring_packed_desc, len)) * BYTE_SIZE)
23 #define PACKED_FLAGS_MASK ((0ULL | VRING_PACKED_DESC_F_AVAIL_USED) << \
26 /* reference count offset in mbuf rearm data */
27 #define REFCNT_BITS_OFFSET ((offsetof(struct rte_mbuf, refcnt) - \
28 offsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE)
29 /* segment number offset in mbuf rearm data */
30 #define SEG_NUM_BITS_OFFSET ((offsetof(struct rte_mbuf, nb_segs) - \
31 offsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE)
33 /* default rearm data */
34 #define DEFAULT_REARM_DATA (1ULL << SEG_NUM_BITS_OFFSET | \
35 1ULL << REFCNT_BITS_OFFSET)
37 /* id bits offset in packed ring desc higher 64bits */
38 #define ID_BITS_OFFSET ((offsetof(struct vring_packed_desc, id) - \
39 offsetof(struct vring_packed_desc, len)) * BYTE_SIZE)
41 /* net hdr short size mask */
42 #define NET_HDR_MASK 0x3F
44 #define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \
45 sizeof(struct vring_packed_desc))
46 #define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1)
48 #ifdef VIRTIO_GCC_UNROLL_PRAGMA
49 #define virtio_for_each_try_unroll(iter, val, size) _Pragma("GCC unroll 4") \
50 for (iter = val; iter < size; iter++)
53 #ifdef VIRTIO_CLANG_UNROLL_PRAGMA
54 #define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \
55 for (iter = val; iter < size; iter++)
58 #ifdef VIRTIO_ICC_UNROLL_PRAGMA
59 #define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll (4)") \
60 for (iter = val; iter < size; iter++)
63 #ifndef virtio_for_each_try_unroll
64 #define virtio_for_each_try_unroll(iter, val, num) \
65 for (iter = val; iter < num; iter++)
69 virtio_update_batch_stats(struct virtnet_stats *stats,
75 stats->bytes += pkt_len1;
76 stats->bytes += pkt_len2;
77 stats->bytes += pkt_len3;
78 stats->bytes += pkt_len4;
82 virtqueue_enqueue_batch_packed_vec(struct virtnet_tx *txvq,
83 struct rte_mbuf **tx_pkts)
85 struct virtqueue *vq = txvq->vq;
86 uint16_t head_size = vq->hw->vtnet_hdr_size;
87 uint16_t idx = vq->vq_avail_idx;
88 struct virtio_net_hdr *hdr;
89 struct vq_desc_extra *dxp;
92 if (vq->vq_avail_idx & PACKED_BATCH_MASK)
95 if (unlikely((idx + PACKED_BATCH_SIZE) > vq->vq_nentries))
98 /* Load four mbufs rearm data */
99 RTE_BUILD_BUG_ON(REFCNT_BITS_OFFSET >= 64);
100 RTE_BUILD_BUG_ON(SEG_NUM_BITS_OFFSET >= 64);
101 __m256i mbufs = _mm256_set_epi64x(*tx_pkts[3]->rearm_data,
102 *tx_pkts[2]->rearm_data,
103 *tx_pkts[1]->rearm_data,
104 *tx_pkts[0]->rearm_data);
106 /* refcnt=1 and nb_segs=1 */
107 __m256i mbuf_ref = _mm256_set1_epi64x(DEFAULT_REARM_DATA);
108 __m256i head_rooms = _mm256_set1_epi16(head_size);
110 /* Check refcnt and nb_segs */
111 const __mmask16 mask = 0x6 | 0x6 << 4 | 0x6 << 8 | 0x6 << 12;
112 cmp = _mm256_mask_cmpneq_epu16_mask(mask, mbufs, mbuf_ref);
116 /* Check headroom is enough */
117 const __mmask16 data_mask = 0x1 | 0x1 << 4 | 0x1 << 8 | 0x1 << 12;
118 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
119 offsetof(struct rte_mbuf, rearm_data));
120 cmp = _mm256_mask_cmplt_epu16_mask(data_mask, mbufs, head_rooms);
124 virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
125 dxp = &vq->vq_descx[idx + i];
127 dxp->cookie = tx_pkts[i];
130 virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
131 tx_pkts[i]->data_off -= head_size;
132 tx_pkts[i]->data_len += head_size;
135 __m512i descs_base = _mm512_set_epi64(tx_pkts[3]->data_len,
136 VIRTIO_MBUF_ADDR(tx_pkts[3], vq),
137 tx_pkts[2]->data_len,
138 VIRTIO_MBUF_ADDR(tx_pkts[2], vq),
139 tx_pkts[1]->data_len,
140 VIRTIO_MBUF_ADDR(tx_pkts[1], vq),
141 tx_pkts[0]->data_len,
142 VIRTIO_MBUF_ADDR(tx_pkts[0], vq));
144 /* id offset and data offset */
145 __m512i data_offsets = _mm512_set_epi64((uint64_t)3 << ID_BITS_OFFSET,
146 tx_pkts[3]->data_off,
147 (uint64_t)2 << ID_BITS_OFFSET,
148 tx_pkts[2]->data_off,
149 (uint64_t)1 << ID_BITS_OFFSET,
150 tx_pkts[1]->data_off,
151 0, tx_pkts[0]->data_off);
153 __m512i new_descs = _mm512_add_epi64(descs_base, data_offsets);
155 uint64_t flags_temp = (uint64_t)idx << ID_BITS_OFFSET |
156 (uint64_t)vq->vq_packed.cached_flags << FLAGS_BITS_OFFSET;
158 /* flags offset and guest virtual address offset */
159 __m128i flag_offset = _mm_set_epi64x(flags_temp, 0);
160 __m512i v_offset = _mm512_broadcast_i32x4(flag_offset);
161 __m512i v_desc = _mm512_add_epi64(new_descs, v_offset);
163 if (!vq->hw->has_tx_offload) {
164 __m128i all_mask = _mm_set1_epi16(0xFFFF);
165 virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
166 hdr = rte_pktmbuf_mtod_offset(tx_pkts[i],
167 struct virtio_net_hdr *, -head_size);
168 __m128i v_hdr = _mm_loadu_si128((void *)hdr);
169 if (unlikely(_mm_mask_test_epi16_mask(NET_HDR_MASK,
171 __m128i all_zero = _mm_setzero_si128();
172 _mm_mask_storeu_epi16((void *)hdr,
173 NET_HDR_MASK, all_zero);
177 virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
178 hdr = rte_pktmbuf_mtod_offset(tx_pkts[i],
179 struct virtio_net_hdr *, -head_size);
180 virtqueue_xmit_offload(hdr, tx_pkts[i], true);
184 /* Enqueue Packet buffers */
185 _mm512_storeu_si512((void *)&vq->vq_packed.ring.desc[idx], v_desc);
187 virtio_update_batch_stats(&txvq->stats, tx_pkts[0]->pkt_len,
188 tx_pkts[1]->pkt_len, tx_pkts[2]->pkt_len,
189 tx_pkts[3]->pkt_len);
191 vq->vq_avail_idx += PACKED_BATCH_SIZE;
192 vq->vq_free_cnt -= PACKED_BATCH_SIZE;
194 if (vq->vq_avail_idx >= vq->vq_nentries) {
195 vq->vq_avail_idx -= vq->vq_nentries;
196 vq->vq_packed.cached_flags ^=
197 VRING_PACKED_DESC_F_AVAIL_USED;
204 virtqueue_enqueue_single_packed_vec(struct virtnet_tx *txvq,
205 struct rte_mbuf *txm)
207 struct virtqueue *vq = txvq->vq;
208 struct virtio_hw *hw = vq->hw;
209 uint16_t hdr_size = hw->vtnet_hdr_size;
210 uint16_t slots, can_push;
213 /* How many main ring entries are needed to this Tx?
214 * any_layout => number of segments
215 * default => number of segments + 1
217 can_push = rte_mbuf_refcnt_read(txm) == 1 &&
218 RTE_MBUF_DIRECT(txm) &&
220 rte_pktmbuf_headroom(txm) >= hdr_size;
222 slots = txm->nb_segs + !can_push;
223 need = slots - vq->vq_free_cnt;
225 /* Positive value indicates it need free vring descriptors */
226 if (unlikely(need > 0)) {
227 virtio_xmit_cleanup_inorder_packed(vq, need);
228 need = slots - vq->vq_free_cnt;
229 if (unlikely(need > 0)) {
231 "No free tx descriptors to transmit");
236 /* Enqueue Packet buffers */
237 virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push, 1);
239 txvq->stats.bytes += txm->pkt_len;
244 virtio_xmit_pkts_packed_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
247 struct virtnet_tx *txvq = tx_queue;
248 struct virtqueue *vq = txvq->vq;
249 struct virtio_hw *hw = vq->hw;
253 if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
256 if (unlikely(nb_pkts < 1))
259 PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
261 if (vq->vq_free_cnt <= vq->vq_nentries - vq->vq_free_thresh)
262 virtio_xmit_cleanup_inorder_packed(vq, vq->vq_free_thresh);
264 remained = RTE_MIN(nb_pkts, vq->vq_free_cnt);
267 if (remained >= PACKED_BATCH_SIZE) {
268 if (!virtqueue_enqueue_batch_packed_vec(txvq,
270 nb_tx += PACKED_BATCH_SIZE;
271 remained -= PACKED_BATCH_SIZE;
275 if (!virtqueue_enqueue_single_packed_vec(txvq,
284 txvq->stats.packets += nb_tx;
287 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
288 virtqueue_notify(vq);
289 PMD_TX_LOG(DEBUG, "Notified backend after xmit");
296 /* Optionally fill offload information in structure */
298 virtio_vec_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
300 struct rte_net_hdr_lens hdr_lens;
301 uint32_t hdrlen, ptype;
302 int l4_supported = 0;
308 /* GSO not support in vec path, skip check */
309 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
311 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
312 m->packet_type = ptype;
313 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
314 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
315 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
318 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
319 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
320 if (hdr->csum_start <= hdrlen && l4_supported) {
321 m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
323 /* Unknown proto or tunnel, do sw cksum. We can assume
324 * the cksum field is in the first segment since the
325 * buffers we provided to the host are large enough.
326 * In case of SCTP, this will be wrong since it's a CRC
327 * but there's nothing we can do.
329 uint16_t csum = 0, off;
331 rte_raw_cksum_mbuf(m, hdr->csum_start,
332 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
334 if (likely(csum != 0xffff))
336 off = hdr->csum_offset + hdr->csum_start;
337 if (rte_pktmbuf_data_len(m) >= off + 1)
338 *rte_pktmbuf_mtod_offset(m, uint16_t *,
341 } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
342 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
348 static inline uint16_t
349 virtqueue_dequeue_batch_packed_vec(struct virtnet_rx *rxvq,
350 struct rte_mbuf **rx_pkts)
352 struct virtqueue *vq = rxvq->vq;
353 struct virtio_hw *hw = vq->hw;
354 uint16_t hdr_size = hw->vtnet_hdr_size;
355 uint64_t addrs[PACKED_BATCH_SIZE];
356 uint16_t id = vq->vq_used_cons_idx;
361 if (id & PACKED_BATCH_MASK)
364 if (unlikely((id + PACKED_BATCH_SIZE) > vq->vq_nentries))
367 /* only care avail/used bits */
368 #if defined(RTE_ARCH_I686)
369 __m512i v_mask = _mm512_set4_epi64(PACKED_FLAGS_MASK, 0x0,
370 PACKED_FLAGS_MASK, 0x0);
372 __m512i v_mask = _mm512_maskz_set1_epi64(0xaa, PACKED_FLAGS_MASK);
374 desc_addr = &vq->vq_packed.ring.desc[id];
376 __m512i v_desc = _mm512_loadu_si512(desc_addr);
377 __m512i v_flag = _mm512_and_epi64(v_desc, v_mask);
379 __m512i v_used_flag = _mm512_setzero_si512();
380 if (vq->vq_packed.used_wrap_counter)
381 #if defined(RTE_ARCH_I686)
382 v_used_flag = _mm512_set4_epi64(PACKED_FLAGS_MASK, 0x0,
383 PACKED_FLAGS_MASK, 0x0);
385 v_used_flag = _mm512_maskz_set1_epi64(0xaa, PACKED_FLAGS_MASK);
388 /* Check all descs are used */
389 desc_stats = _mm512_cmpneq_epu64_mask(v_flag, v_used_flag);
393 virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
394 rx_pkts[i] = (struct rte_mbuf *)vq->vq_descx[id + i].cookie;
395 rte_packet_prefetch(rte_pktmbuf_mtod(rx_pkts[i], void *));
397 addrs[i] = (uintptr_t)rx_pkts[i]->rx_descriptor_fields1;
401 * load len from desc, store into mbuf pkt_len and data_len
402 * len limiated by l6bit buf_len, pkt_len[16:31] can be ignored
404 const __mmask16 mask = 0x6 | 0x6 << 4 | 0x6 << 8 | 0x6 << 12;
405 __m512i values = _mm512_maskz_shuffle_epi32(mask, v_desc, 0xAA);
407 /* reduce hdr_len from pkt_len and data_len */
408 __m512i mbuf_len_offset = _mm512_maskz_set1_epi32(mask,
409 (uint32_t)-hdr_size);
411 __m512i v_value = _mm512_add_epi32(values, mbuf_len_offset);
413 /* assert offset of data_len */
414 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
415 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
417 __m512i v_index = _mm512_set_epi64(addrs[3] + 8, addrs[3],
418 addrs[2] + 8, addrs[2],
419 addrs[1] + 8, addrs[1],
420 addrs[0] + 8, addrs[0]);
421 /* batch store into mbufs */
422 _mm512_i64scatter_epi64(0, v_index, v_value, 1);
424 if (hw->has_rx_offload) {
425 virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
426 char *addr = (char *)rx_pkts[i]->buf_addr +
427 RTE_PKTMBUF_HEADROOM - hdr_size;
428 virtio_vec_rx_offload(rx_pkts[i],
429 (struct virtio_net_hdr *)addr);
433 virtio_update_batch_stats(&rxvq->stats, rx_pkts[0]->pkt_len,
434 rx_pkts[1]->pkt_len, rx_pkts[2]->pkt_len,
435 rx_pkts[3]->pkt_len);
437 vq->vq_free_cnt += PACKED_BATCH_SIZE;
439 vq->vq_used_cons_idx += PACKED_BATCH_SIZE;
440 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
441 vq->vq_used_cons_idx -= vq->vq_nentries;
442 vq->vq_packed.used_wrap_counter ^= 1;
449 virtqueue_dequeue_single_packed_vec(struct virtnet_rx *rxvq,
450 struct rte_mbuf **rx_pkts)
452 uint16_t used_idx, id;
454 struct virtqueue *vq = rxvq->vq;
455 struct virtio_hw *hw = vq->hw;
456 uint32_t hdr_size = hw->vtnet_hdr_size;
457 struct virtio_net_hdr *hdr;
458 struct vring_packed_desc *desc;
459 struct rte_mbuf *cookie;
461 desc = vq->vq_packed.ring.desc;
462 used_idx = vq->vq_used_cons_idx;
463 if (!desc_is_used(&desc[used_idx], vq))
466 len = desc[used_idx].len;
467 id = desc[used_idx].id;
468 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
469 if (unlikely(cookie == NULL)) {
470 PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
471 vq->vq_used_cons_idx);
474 rte_prefetch0(cookie);
475 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
477 cookie->data_off = RTE_PKTMBUF_HEADROOM;
478 cookie->ol_flags = 0;
479 cookie->pkt_len = (uint32_t)(len - hdr_size);
480 cookie->data_len = (uint32_t)(len - hdr_size);
482 hdr = (struct virtio_net_hdr *)((char *)cookie->buf_addr +
483 RTE_PKTMBUF_HEADROOM - hdr_size);
484 if (hw->has_rx_offload)
485 virtio_vec_rx_offload(cookie, hdr);
489 rxvq->stats.bytes += cookie->pkt_len;
492 vq->vq_used_cons_idx++;
493 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
494 vq->vq_used_cons_idx -= vq->vq_nentries;
495 vq->vq_packed.used_wrap_counter ^= 1;
502 virtio_recv_refill_packed_vec(struct virtnet_rx *rxvq,
503 struct rte_mbuf **cookie,
506 struct virtqueue *vq = rxvq->vq;
507 struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
508 uint16_t flags = vq->vq_packed.cached_flags;
509 struct virtio_hw *hw = vq->hw;
510 struct vq_desc_extra *dxp;
512 uint16_t batch_num, total_num = 0;
513 uint16_t head_idx = vq->vq_avail_idx;
514 uint16_t head_flag = vq->vq_packed.cached_flags;
518 idx = vq->vq_avail_idx;
520 batch_num = PACKED_BATCH_SIZE;
521 if (unlikely((idx + PACKED_BATCH_SIZE) > vq->vq_nentries))
522 batch_num = vq->vq_nentries - idx;
523 if (unlikely((total_num + batch_num) > num))
524 batch_num = num - total_num;
526 virtio_for_each_try_unroll(i, 0, batch_num) {
527 dxp = &vq->vq_descx[idx + i];
528 dxp->cookie = (void *)cookie[total_num + i];
530 addr = VIRTIO_MBUF_ADDR(cookie[total_num + i], vq) +
531 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
532 start_dp[idx + i].addr = addr;
533 start_dp[idx + i].len = cookie[total_num + i]->buf_len
534 - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
535 if (total_num || i) {
536 virtqueue_store_flags_packed(&start_dp[idx + i],
537 flags, hw->weak_barriers);
541 vq->vq_avail_idx += batch_num;
542 if (vq->vq_avail_idx >= vq->vq_nentries) {
543 vq->vq_avail_idx -= vq->vq_nentries;
544 vq->vq_packed.cached_flags ^=
545 VRING_PACKED_DESC_F_AVAIL_USED;
546 flags = vq->vq_packed.cached_flags;
548 total_num += batch_num;
549 } while (total_num < num);
551 virtqueue_store_flags_packed(&start_dp[head_idx], head_flag,
553 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
557 virtio_recv_pkts_packed_vec(void *rx_queue,
558 struct rte_mbuf **rx_pkts,
561 struct virtnet_rx *rxvq = rx_queue;
562 struct virtqueue *vq = rxvq->vq;
563 struct virtio_hw *hw = vq->hw;
564 uint16_t num, nb_rx = 0;
565 uint32_t nb_enqueued = 0;
566 uint16_t free_cnt = vq->vq_free_thresh;
568 if (unlikely(hw->started == 0))
571 num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
572 if (likely(num > PACKED_BATCH_SIZE))
573 num = num - ((vq->vq_used_cons_idx + num) % PACKED_BATCH_SIZE);
576 if (!virtqueue_dequeue_batch_packed_vec(rxvq,
578 nb_rx += PACKED_BATCH_SIZE;
579 num -= PACKED_BATCH_SIZE;
582 if (!virtqueue_dequeue_single_packed_vec(rxvq,
591 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
593 rxvq->stats.packets += nb_rx;
595 if (likely(vq->vq_free_cnt >= free_cnt)) {
596 struct rte_mbuf *new_pkts[free_cnt];
597 if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
599 virtio_recv_refill_packed_vec(rxvq, new_pkts,
601 nb_enqueued += free_cnt;
603 struct rte_eth_dev *dev =
604 &rte_eth_devices[rxvq->port_id];
605 dev->data->rx_mbuf_alloc_failed += free_cnt;
609 if (likely(nb_enqueued)) {
610 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
611 virtqueue_notify(vq);
612 PMD_RX_LOG(DEBUG, "Notified");