1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
7 #include <ethdev_driver.h>
9 #include <rte_prefetch.h>
11 #include "enic_compat.h"
12 #include "rq_enet_desc.h"
14 #include "enic_rxtx_common.h"
15 #include <rte_ether.h>
19 #define RTE_PMD_USE_PREFETCH
21 #ifdef RTE_PMD_USE_PREFETCH
22 /*Prefetch a cache line into all cache levels. */
23 #define rte_enic_prefetch(p) rte_prefetch0(p)
25 #define rte_enic_prefetch(p) do {} while (0)
28 #ifdef RTE_PMD_PACKET_PREFETCH
29 #define rte_packet_prefetch(p) rte_prefetch1(p)
31 #define rte_packet_prefetch(p) do {} while (0)
34 /* dummy receive function to replace actual function in
35 * order to do safe reconfiguration operations.
38 enic_dummy_recv_pkts(__rte_unused void *rx_queue,
39 __rte_unused struct rte_mbuf **rx_pkts,
40 __rte_unused uint16_t nb_pkts)
45 static inline uint16_t
46 enic_recv_pkts_common(void *rx_queue, struct rte_mbuf **rx_pkts,
47 uint16_t nb_pkts, const bool use_64b_desc)
49 struct vnic_rq *sop_rq = rx_queue;
50 struct vnic_rq *data_rq;
52 struct enic *enic = vnic_dev_priv(sop_rq->vdev);
54 uint16_t rq_idx, max_rx;
56 struct rte_mbuf *nmb, *rxmb;
59 volatile struct cq_desc *cqd_ptr;
63 struct rte_mbuf *first_seg = sop_rq->pkt_first_seg;
64 struct rte_mbuf *last_seg = sop_rq->pkt_last_seg;
65 const int desc_size = use_64b_desc ?
66 sizeof(struct cq_enet_rq_desc_64) :
67 sizeof(struct cq_enet_rq_desc);
68 RTE_BUILD_BUG_ON(sizeof(struct cq_enet_rq_desc_64) != 64);
70 cq = &enic->cq[enic_cq_rq(enic, sop_rq->index)];
71 cq_idx = cq->to_clean; /* index of cqd, rqd, mbuf_table */
72 cqd_ptr = (struct cq_desc *)((uintptr_t)(cq->ring.descs) +
73 (uintptr_t)cq_idx * desc_size);
74 color = cq->last_color;
76 data_rq = &enic->rq[sop_rq->data_queue_idx];
78 /* Receive until the end of the ring, at most. */
79 max_rx = RTE_MIN(nb_pkts, cq->ring.desc_count - cq_idx);
82 volatile struct rq_enet_desc *rqd_ptr;
87 uint16_t rq_idx_msbs = 0;
91 tc = *(volatile uint8_t *)((uintptr_t)cqd_ptr + desc_size - 1);
92 /* Check for pkts available */
93 if ((tc & CQ_DESC_COLOR_MASK_NOSHIFT) == color)
96 /* Get the cq descriptor and extract rq info from it */
100 * The first 16B of a 64B descriptor is identical to a 16B
101 * descriptor except for the type_color and fetch index. Extract
102 * fetch index and copy the type_color from the 64B to where it
103 * would be in a 16B descriptor so sebwequent code can run
104 * without further conditionals.
107 rq_idx_msbs = (((volatile struct cq_enet_rq_desc_64 *)
108 cqd_ptr)->fetch_idx_flags
109 & CQ_ENET_RQ_DESC_FETCH_IDX_MASK)
110 << CQ_DESC_COMP_NDX_BITS;
113 rq_num = cqd.q_number & CQ_DESC_Q_NUM_MASK;
114 rq_idx = rq_idx_msbs +
115 (cqd.completed_index & CQ_DESC_COMP_NDX_MASK);
117 rq = &enic->rq[rq_num];
118 rqd_ptr = ((struct rq_enet_desc *)rq->ring.descs) + rq_idx;
120 /* allocate a new mbuf */
121 nmb = rte_mbuf_raw_alloc(rq->mp);
123 rte_atomic64_inc(&enic->soft_stats.rx_nombuf);
127 /* A packet error means descriptor and data are untrusted */
128 packet_error = enic_cq_rx_check_err(&cqd);
130 /* Get the mbuf to return and replace with one just allocated */
131 rxmb = rq->mbuf_ring[rq_idx];
132 rq->mbuf_ring[rq_idx] = nmb;
135 /* Prefetch next mbuf & desc while processing current one */
136 cqd_ptr = (struct cq_desc *)((uintptr_t)(cq->ring.descs) +
137 (uintptr_t)cq_idx * desc_size);
138 rte_enic_prefetch(cqd_ptr);
140 ciflags = enic_cq_rx_desc_ciflags(
141 (struct cq_enet_rq_desc *)&cqd);
143 /* Push descriptor for newly allocated mbuf */
144 nmb->data_off = RTE_PKTMBUF_HEADROOM;
146 * Only the address needs to be refilled. length_type of the
147 * descriptor it set during initialization
148 * (enic_alloc_rx_queue_mbufs) and does not change.
150 rqd_ptr->address = rte_cpu_to_le_64(nmb->buf_iova +
151 RTE_PKTMBUF_HEADROOM);
153 /* Fill in the rest of the mbuf */
154 seg_length = enic_cq_rx_desc_n_bytes(&cqd);
158 first_seg->pkt_len = seg_length;
160 first_seg->pkt_len = (uint16_t)(first_seg->pkt_len
162 first_seg->nb_segs++;
163 last_seg->next = rxmb;
166 rxmb->port = enic->port_id;
167 rxmb->data_len = seg_length;
171 if (!(enic_cq_rx_desc_eop(ciflags))) {
177 * When overlay offload is enabled, CQ.fcoe indicates the
178 * packet is tunnelled.
180 tnl = enic->overlay_offload &&
181 (ciflags & CQ_ENET_RQ_DESC_FLAGS_FCOE) != 0;
182 /* cq rx flags are only valid if eop bit is set */
183 first_seg->packet_type =
184 enic_cq_rx_flags_to_pkt_type(&cqd, tnl);
185 enic_cq_rx_to_pkt_flags(&cqd, first_seg);
187 /* Wipe the outer types set by enic_cq_rx_flags_to_pkt_type() */
189 first_seg->packet_type &= ~(RTE_PTYPE_L3_MASK |
192 if (unlikely(packet_error)) {
193 rte_pktmbuf_free(first_seg);
194 rte_atomic64_inc(&enic->soft_stats.rx_packet_errors);
199 /* prefetch mbuf data for caller */
200 rte_packet_prefetch(RTE_PTR_ADD(first_seg->buf_addr,
201 RTE_PKTMBUF_HEADROOM));
203 /* store the mbuf address into the next entry of the array */
204 rx_pkts[nb_rx++] = first_seg;
206 if (unlikely(cq_idx == cq->ring.desc_count)) {
208 cq->last_color ^= CQ_DESC_COLOR_MASK_NOSHIFT;
211 sop_rq->pkt_first_seg = first_seg;
212 sop_rq->pkt_last_seg = last_seg;
214 cq->to_clean = cq_idx;
216 if ((sop_rq->rx_nb_hold + data_rq->rx_nb_hold) >
217 sop_rq->rx_free_thresh) {
218 if (data_rq->in_use) {
219 data_rq->posted_index =
220 enic_ring_add(data_rq->ring.desc_count,
221 data_rq->posted_index,
222 data_rq->rx_nb_hold);
223 data_rq->rx_nb_hold = 0;
225 sop_rq->posted_index = enic_ring_add(sop_rq->ring.desc_count,
226 sop_rq->posted_index,
228 sop_rq->rx_nb_hold = 0;
232 iowrite32_relaxed(data_rq->posted_index,
233 &data_rq->ctrl->posted_index);
234 rte_compiler_barrier();
235 iowrite32_relaxed(sop_rq->posted_index,
236 &sop_rq->ctrl->posted_index);
244 enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
246 return enic_recv_pkts_common(rx_queue, rx_pkts, nb_pkts, false);
250 enic_recv_pkts_64(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
252 return enic_recv_pkts_common(rx_queue, rx_pkts, nb_pkts, true);
256 enic_noscatter_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
259 struct rte_mbuf *mb, **rx, **rxmb;
260 uint16_t cq_idx, nb_rx, max_rx;
261 struct cq_enet_rq_desc *cqd;
262 struct rq_enet_desc *rqd;
263 unsigned int port_id;
272 enic = vnic_dev_priv(rq->vdev);
273 cq = &enic->cq[enic_cq_rq(enic, rq->index)];
274 cq_idx = cq->to_clean;
277 * Fill up the reserve of free mbufs. Below, we restock the receive
278 * ring with these mbufs to avoid allocation failures.
280 if (rq->num_free_mbufs == 0) {
281 if (rte_mempool_get_bulk(rq->mp, (void **)rq->free_mbufs,
284 rq->num_free_mbufs = ENIC_RX_BURST_MAX;
287 /* Receive until the end of the ring, at most. */
288 max_rx = RTE_MIN(nb_pkts, rq->num_free_mbufs);
289 max_rx = RTE_MIN(max_rx, cq->ring.desc_count - cq_idx);
291 cqd = (struct cq_enet_rq_desc *)(cq->ring.descs) + cq_idx;
292 color = cq->last_color;
293 rxmb = rq->mbuf_ring + cq_idx;
294 port_id = enic->port_id;
295 overlay = enic->overlay_offload;
300 if ((cqd->type_color & CQ_DESC_COLOR_MASK_NOSHIFT) == color)
302 if (unlikely(cqd->bytes_written_flags &
303 CQ_ENET_RQ_DESC_FLAGS_TRUNCATED)) {
304 rte_pktmbuf_free(*rxmb++);
305 rte_atomic64_inc(&enic->soft_stats.rx_packet_errors);
311 /* prefetch mbuf data for caller */
312 rte_packet_prefetch(RTE_PTR_ADD(mb->buf_addr,
313 RTE_PKTMBUF_HEADROOM));
314 mb->data_len = cqd->bytes_written_flags &
315 CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
316 mb->pkt_len = mb->data_len;
318 tnl = overlay && (cqd->completed_index_flags &
319 CQ_ENET_RQ_DESC_FLAGS_FCOE) != 0;
321 enic_cq_rx_flags_to_pkt_type((struct cq_desc *)cqd,
323 enic_cq_rx_to_pkt_flags((struct cq_desc *)cqd, mb);
324 /* Wipe the outer types set by enic_cq_rx_flags_to_pkt_type() */
326 mb->packet_type &= ~(RTE_PTYPE_L3_MASK |
332 /* Number of descriptors visited */
333 nb_rx = cqd - (struct cq_enet_rq_desc *)(cq->ring.descs) - cq_idx;
336 rqd = ((struct rq_enet_desc *)rq->ring.descs) + cq_idx;
337 rxmb = rq->mbuf_ring + cq_idx;
339 rq->rx_nb_hold += nb_rx;
340 if (unlikely(cq_idx == cq->ring.desc_count)) {
342 cq->last_color ^= CQ_DESC_COLOR_MASK_NOSHIFT;
344 cq->to_clean = cq_idx;
346 memcpy(rxmb, rq->free_mbufs + ENIC_RX_BURST_MAX - rq->num_free_mbufs,
347 sizeof(struct rte_mbuf *) * nb_rx);
348 rq->num_free_mbufs -= nb_rx;
352 mb->data_off = RTE_PKTMBUF_HEADROOM;
353 rqd->address = mb->buf_iova + RTE_PKTMBUF_HEADROOM;
356 if (rq->rx_nb_hold > rq->rx_free_thresh) {
357 rq->posted_index = enic_ring_add(rq->ring.desc_count,
362 iowrite32_relaxed(rq->posted_index,
363 &rq->ctrl->posted_index);
369 static inline void enic_free_wq_bufs(struct vnic_wq *wq,
370 uint16_t completed_index)
372 struct rte_mbuf *buf;
373 struct rte_mbuf *m, *free[ENIC_LEGACY_MAX_WQ_DESCS];
374 unsigned int nb_to_free, nb_free = 0, i;
375 struct rte_mempool *pool;
376 unsigned int tail_idx;
377 unsigned int desc_count = wq->ring.desc_count;
380 * On 1500 Series VIC and beyond, greater than ENIC_LEGACY_MAX_WQ_DESCS
381 * may be attempted to be freed. Cap it at ENIC_LEGACY_MAX_WQ_DESCS.
383 nb_to_free = RTE_MIN(enic_ring_sub(desc_count, wq->tail_idx,
384 completed_index) + 1,
385 (uint32_t)ENIC_LEGACY_MAX_WQ_DESCS);
386 tail_idx = wq->tail_idx;
387 pool = wq->bufs[tail_idx]->pool;
388 for (i = 0; i < nb_to_free; i++) {
389 buf = wq->bufs[tail_idx];
390 m = rte_pktmbuf_prefree_seg(buf);
391 if (unlikely(m == NULL)) {
392 tail_idx = enic_ring_incr(desc_count, tail_idx);
396 if (likely(m->pool == pool)) {
397 RTE_ASSERT(nb_free < ENIC_LEGACY_MAX_WQ_DESCS);
400 rte_mempool_put_bulk(pool, (void *)free, nb_free);
405 tail_idx = enic_ring_incr(desc_count, tail_idx);
409 rte_mempool_put_bulk(pool, (void **)free, nb_free);
411 wq->tail_idx = tail_idx;
412 wq->ring.desc_avail += nb_to_free;
415 unsigned int enic_cleanup_wq(__rte_unused struct enic *enic, struct vnic_wq *wq)
417 uint16_t completed_index;
419 completed_index = *((uint32_t *)wq->cqmsg_rz->addr) & 0xffff;
421 if (wq->last_completed_index != completed_index) {
422 enic_free_wq_bufs(wq, completed_index);
423 wq->last_completed_index = completed_index;
428 uint16_t enic_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
431 struct vnic_wq *wq = (struct vnic_wq *)tx_queue;
437 for (i = 0; i != nb_pkts; i++) {
439 ol_flags = m->ol_flags;
440 if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
441 if (unlikely(m->pkt_len > ENIC_TX_MAX_PKT_SIZE)) {
448 header_len = m->l2_len + m->l3_len + m->l4_len;
449 if (m->tso_segsz + header_len > ENIC_TX_MAX_PKT_SIZE) {
455 if (ol_flags & wq->tx_offload_notsup_mask) {
459 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
460 ret = rte_validate_tx_offload(m);
466 ret = rte_net_intel_cksum_prepare(m);
476 uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
480 unsigned int pkt_len, data_len;
481 unsigned int nb_segs;
482 struct rte_mbuf *tx_pkt;
483 struct vnic_wq *wq = (struct vnic_wq *)tx_queue;
484 struct enic *enic = vnic_dev_priv(wq->vdev);
485 unsigned short vlan_id;
487 uint64_t ol_flags_mask;
488 unsigned int wq_desc_avail;
490 unsigned int desc_count;
491 struct wq_enet_desc *descs, *desc_p, desc_tmp;
493 uint8_t vlan_tag_insert;
496 uint8_t offload_mode;
499 rte_atomic64_t *tx_oversized;
501 enic_cleanup_wq(enic, wq);
502 wq_desc_avail = vnic_wq_desc_avail(wq);
503 head_idx = wq->head_idx;
504 desc_count = wq->ring.desc_count;
505 ol_flags_mask = RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK;
506 tx_oversized = &enic->soft_stats.tx_oversized;
508 nb_pkts = RTE_MIN(nb_pkts, ENIC_TX_XMIT_MAX);
510 for (index = 0; index < nb_pkts; index++) {
512 pkt_len = tx_pkt->pkt_len;
513 data_len = tx_pkt->data_len;
514 ol_flags = tx_pkt->ol_flags;
515 nb_segs = tx_pkt->nb_segs;
516 tso = ol_flags & RTE_MBUF_F_TX_TCP_SEG;
518 /* drop packet if it's too big to send */
519 if (unlikely(!tso && pkt_len > ENIC_TX_MAX_PKT_SIZE)) {
520 rte_pktmbuf_free(tx_pkt);
521 rte_atomic64_inc(tx_oversized);
525 if (nb_segs > wq_desc_avail) {
532 vlan_id = tx_pkt->vlan_tci;
533 vlan_tag_insert = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
534 bus_addr = (dma_addr_t)
535 (tx_pkt->buf_iova + tx_pkt->data_off);
537 descs = (struct wq_enet_desc *)wq->ring.descs;
538 desc_p = descs + head_idx;
540 eop = (data_len == pkt_len);
541 offload_mode = WQ_ENET_OFFLOAD_MODE_CSUM;
545 header_len = tx_pkt->l2_len + tx_pkt->l3_len +
548 /* Drop if non-TCP packet or TSO seg size is too big */
549 if (unlikely(header_len == 0 || ((tx_pkt->tso_segsz +
550 header_len) > ENIC_TX_MAX_PKT_SIZE))) {
551 rte_pktmbuf_free(tx_pkt);
552 rte_atomic64_inc(tx_oversized);
556 offload_mode = WQ_ENET_OFFLOAD_MODE_TSO;
557 mss = tx_pkt->tso_segsz;
558 /* For tunnel, need the size of outer+inner headers */
559 if (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
560 header_len += tx_pkt->outer_l2_len +
561 tx_pkt->outer_l3_len;
565 if ((ol_flags & ol_flags_mask) && (header_len == 0)) {
566 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
567 mss |= ENIC_CALC_IP_CKSUM;
569 /* Nic uses just 1 bit for UDP and TCP */
570 switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) {
571 case RTE_MBUF_F_TX_TCP_CKSUM:
572 case RTE_MBUF_F_TX_UDP_CKSUM:
573 mss |= ENIC_CALC_TCP_UDP_CKSUM;
579 if (eop && wq->cq_pend >= ENIC_WQ_CQ_THRESH) {
583 wq_enet_desc_enc(&desc_tmp, bus_addr, data_len, mss, header_len,
584 offload_mode, eop, cq, 0, vlan_tag_insert,
588 wq->bufs[head_idx] = tx_pkt;
589 head_idx = enic_ring_incr(desc_count, head_idx);
593 for (tx_pkt = tx_pkt->next; tx_pkt; tx_pkt =
595 data_len = tx_pkt->data_len;
599 if (tx_pkt->next == NULL) {
601 if (wq->cq_pend >= ENIC_WQ_CQ_THRESH) {
606 desc_p = descs + head_idx;
607 bus_addr = (dma_addr_t)(tx_pkt->buf_iova
609 wq_enet_desc_enc((struct wq_enet_desc *)
610 &desc_tmp, bus_addr, data_len,
611 mss, 0, offload_mode, eop, cq,
612 0, vlan_tag_insert, vlan_id,
616 wq->bufs[head_idx] = tx_pkt;
617 head_idx = enic_ring_incr(desc_count, head_idx);
624 iowrite32_relaxed(head_idx, &wq->ctrl->posted_index);
626 wq->ring.desc_avail = wq_desc_avail;
627 wq->head_idx = head_idx;
632 static void enqueue_simple_pkts(struct rte_mbuf **pkts,
633 struct wq_enet_desc *desc,
643 desc->address = p->buf_iova + p->data_off;
644 desc->length = p->pkt_len;
646 desc->vlan_tag = p->vlan_tci;
647 desc->header_length_flags &=
648 ((1 << WQ_ENET_FLAGS_EOP_SHIFT) |
649 (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT));
650 if (p->ol_flags & RTE_MBUF_F_TX_VLAN) {
651 desc->header_length_flags |=
652 1 << WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT;
655 * Checksum offload. We use WQ_ENET_OFFLOAD_MODE_CSUM, which
656 * is 0, so no need to set offload_mode.
659 if (p->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
660 mss |= ENIC_CALC_IP_CKSUM << WQ_ENET_MSS_SHIFT;
661 if (p->ol_flags & RTE_MBUF_F_TX_L4_MASK)
662 mss |= ENIC_CALC_TCP_UDP_CKSUM << WQ_ENET_MSS_SHIFT;
663 desc->mss_loopback = mss;
666 * The app should not send oversized
667 * packets. tx_pkt_prepare includes a check as
668 * well. But some apps ignore the device max size and
669 * tx_pkt_prepare. Oversized packets cause WQ errors
670 * and the NIC ends up disabling the whole WQ. So
673 if (unlikely(p->pkt_len > ENIC_TX_MAX_PKT_SIZE)) {
674 desc->length = ENIC_TX_MAX_PKT_SIZE;
675 rte_atomic64_inc(&enic->soft_stats.tx_oversized);
681 uint16_t enic_simple_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
684 unsigned int head_idx, desc_count;
685 struct wq_enet_desc *desc;
690 wq = (struct vnic_wq *)tx_queue;
691 enic = vnic_dev_priv(wq->vdev);
692 enic_cleanup_wq(enic, wq);
693 /* Will enqueue this many packets in this call */
694 nb_pkts = RTE_MIN(nb_pkts, wq->ring.desc_avail);
698 head_idx = wq->head_idx;
699 desc_count = wq->ring.desc_count;
701 /* Descriptors until the end of the ring */
702 n = desc_count - head_idx;
703 n = RTE_MIN(nb_pkts, n);
705 /* Save mbuf pointers to free later */
706 memcpy(wq->bufs + head_idx, tx_pkts, sizeof(struct rte_mbuf *) * n);
708 /* Enqueue until the ring end */
710 desc = ((struct wq_enet_desc *)wq->ring.descs) + head_idx;
711 enqueue_simple_pkts(tx_pkts, desc, n, enic);
713 /* Wrap to the start of the ring */
716 memcpy(wq->bufs, tx_pkts, sizeof(struct rte_mbuf *) * rem);
717 desc = (struct wq_enet_desc *)wq->ring.descs;
718 enqueue_simple_pkts(tx_pkts, desc, rem, enic);
722 /* Update head_idx and desc_avail */
723 wq->ring.desc_avail -= nb_pkts;
725 if (head_idx >= desc_count)
726 head_idx -= desc_count;
727 wq->head_idx = head_idx;
728 iowrite32_relaxed(head_idx, &wq->ctrl->posted_index);