+/**
+ * Translate RX completion flags to packet type.
+ *
+ * @param flags
+ * RX completion flags returned by poll_length_flags().
+ *
+ * @return
+ * Packet type for struct rte_mbuf.
+ */
+static inline uint32_t
+rxq_cq_to_pkt_type(uint32_t flags)
+{
+ uint32_t pkt_type;
+
+ if (flags & IBV_EXP_CQ_RX_TUNNEL_PACKET)
+ pkt_type =
+ TRANSPOSE(flags,
+ IBV_EXP_CQ_RX_OUTER_IPV4_PACKET,
+ RTE_PTYPE_L3_IPV4) |
+ TRANSPOSE(flags,
+ IBV_EXP_CQ_RX_OUTER_IPV6_PACKET,
+ RTE_PTYPE_L3_IPV6) |
+ TRANSPOSE(flags,
+ IBV_EXP_CQ_RX_IPV4_PACKET,
+ RTE_PTYPE_INNER_L3_IPV4) |
+ TRANSPOSE(flags,
+ IBV_EXP_CQ_RX_IPV6_PACKET,
+ RTE_PTYPE_INNER_L3_IPV6);
+ else
+ pkt_type =
+ TRANSPOSE(flags,
+ IBV_EXP_CQ_RX_IPV4_PACKET,
+ RTE_PTYPE_L3_IPV4) |
+ TRANSPOSE(flags,
+ IBV_EXP_CQ_RX_IPV6_PACKET,
+ RTE_PTYPE_L3_IPV6);
+ return pkt_type;
+}
+
+/**
+ * Translate RX completion flags to offload flags.
+ *
+ * @param[in] rxq
+ * Pointer to RX queue structure.
+ * @param flags
+ * RX completion flags returned by poll_length_flags().
+ *
+ * @return
+ * Offload flags (ol_flags) for struct rte_mbuf.
+ */
+static inline uint32_t
+rxq_cq_to_ol_flags(const struct rxq *rxq, uint32_t flags)
+{
+ uint32_t ol_flags = 0;
+
+ if (rxq->csum)
+ ol_flags |=
+ TRANSPOSE(~flags,
+ IBV_EXP_CQ_RX_IP_CSUM_OK,
+ PKT_RX_IP_CKSUM_BAD) |
+ TRANSPOSE(~flags,
+ IBV_EXP_CQ_RX_TCP_UDP_CSUM_OK,
+ PKT_RX_L4_CKSUM_BAD);
+ /*
+ * PKT_RX_IP_CKSUM_BAD and PKT_RX_L4_CKSUM_BAD are used in place
+ * of PKT_RX_EIP_CKSUM_BAD because the latter is not functional
+ * (its value is 0).
+ */
+ if ((flags & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
+ ol_flags |=
+ TRANSPOSE(~flags,
+ IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK,
+ PKT_RX_IP_CKSUM_BAD) |
+ TRANSPOSE(~flags,
+ IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK,
+ PKT_RX_L4_CKSUM_BAD);
+ return ol_flags;
+}
+
+/**
+ * DPDK callback for RX with scattered packets support.
+ *
+ * @param dpdk_rxq
+ * Generic pointer to RX queue structure.
+ * @param[out] pkts
+ * Array to store received packets.
+ * @param pkts_n
+ * Maximum number of packets in array.
+ *
+ * @return
+ * Number of packets successfully received (<= pkts_n).
+ */
+uint16_t
+mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+ struct rxq *rxq = (struct rxq *)dpdk_rxq;
+ struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp;
+ const unsigned int elts_n = rxq->elts_n;
+ unsigned int elts_head = rxq->elts_head;
+ unsigned int i;
+ unsigned int pkts_ret = 0;
+ int ret;
+
+ if (unlikely(!rxq->sp))
+ return mlx5_rx_burst(dpdk_rxq, pkts, pkts_n);
+ if (unlikely(elts == NULL)) /* See RTE_DEV_CMD_SET_MTU. */
+ return 0;
+ for (i = 0; (i != pkts_n); ++i) {
+ struct rxq_elt_sp *elt = &(*elts)[elts_head];
+ unsigned int len;
+ unsigned int pkt_buf_len;
+ struct rte_mbuf *pkt_buf = NULL; /* Buffer returned in pkts. */
+ struct rte_mbuf **pkt_buf_next = &pkt_buf;
+ unsigned int seg_headroom = RTE_PKTMBUF_HEADROOM;
+ unsigned int j = 0;
+ uint32_t flags;
+
+ /* Sanity checks. */
+ assert(elts_head < rxq->elts_n);
+ assert(rxq->elts_head < rxq->elts_n);
+ ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,
+ &flags);
+ if (unlikely(ret < 0)) {
+ struct ibv_wc wc;
+ int wcs_n;
+
+ DEBUG("rxq=%p, poll_length() failed (ret=%d)",
+ (void *)rxq, ret);
+ /* ibv_poll_cq() must be used in case of failure. */
+ wcs_n = ibv_poll_cq(rxq->cq, 1, &wc);
+ if (unlikely(wcs_n == 0))
+ break;
+ if (unlikely(wcs_n < 0)) {
+ DEBUG("rxq=%p, ibv_poll_cq() failed (wcs_n=%d)",
+ (void *)rxq, wcs_n);
+ break;
+ }
+ assert(wcs_n == 1);
+ if (unlikely(wc.status != IBV_WC_SUCCESS)) {
+ /* Whatever, just repost the offending WR. */
+ DEBUG("rxq=%p, wr_id=%" PRIu64 ": bad work"
+ " completion status (%d): %s",
+ (void *)rxq, wc.wr_id, wc.status,
+ ibv_wc_status_str(wc.status));
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ /* Increment dropped packets counter. */
+ ++rxq->stats.idropped;
+#endif
+ goto repost;
+ }
+ ret = wc.byte_len;
+ }
+ if (ret == 0)
+ break;
+ len = ret;
+ pkt_buf_len = len;
+ /*
+ * Replace spent segments with new ones, concatenate and
+ * return them as pkt_buf.
+ */
+ while (1) {
+ struct ibv_sge *sge = &elt->sges[j];
+ struct rte_mbuf *seg = elt->bufs[j];
+ struct rte_mbuf *rep;
+ unsigned int seg_tailroom;
+
+ assert(seg != NULL);
+ /*
+ * Fetch initial bytes of packet descriptor into a
+ * cacheline while allocating rep.
+ */
+ rte_prefetch0(seg);
+ rep = __rte_mbuf_raw_alloc(rxq->mp);
+ if (unlikely(rep == NULL)) {
+ /*
+ * Unable to allocate a replacement mbuf,
+ * repost WR.
+ */
+ DEBUG("rxq=%p: can't allocate a new mbuf",
+ (void *)rxq);
+ if (pkt_buf != NULL) {
+ *pkt_buf_next = NULL;
+ rte_pktmbuf_free(pkt_buf);
+ }
+ /* Increment out of memory counters. */
+ ++rxq->stats.rx_nombuf;
+ ++rxq->priv->dev->data->rx_mbuf_alloc_failed;
+ goto repost;
+ }
+#ifndef NDEBUG
+ /* Poison user-modifiable fields in rep. */
+ NEXT(rep) = (void *)((uintptr_t)-1);
+ SET_DATA_OFF(rep, 0xdead);
+ DATA_LEN(rep) = 0xd00d;
+ PKT_LEN(rep) = 0xdeadd00d;
+ NB_SEGS(rep) = 0x2a;
+ PORT(rep) = 0x2a;
+ rep->ol_flags = -1;
+#endif
+ assert(rep->buf_len == seg->buf_len);
+ assert(rep->buf_len == rxq->mb_len);
+ /* Reconfigure sge to use rep instead of seg. */
+ assert(sge->lkey == rxq->mr->lkey);
+ sge->addr = ((uintptr_t)rep->buf_addr + seg_headroom);
+ elt->bufs[j] = rep;
+ ++j;
+ /* Update pkt_buf if it's the first segment, or link
+ * seg to the previous one and update pkt_buf_next. */
+ *pkt_buf_next = seg;
+ pkt_buf_next = &NEXT(seg);
+ /* Update seg information. */
+ seg_tailroom = (seg->buf_len - seg_headroom);
+ assert(sge->length == seg_tailroom);
+ SET_DATA_OFF(seg, seg_headroom);
+ if (likely(len <= seg_tailroom)) {
+ /* Last segment. */
+ DATA_LEN(seg) = len;
+ PKT_LEN(seg) = len;
+ /* Sanity check. */
+ assert(rte_pktmbuf_headroom(seg) ==
+ seg_headroom);
+ assert(rte_pktmbuf_tailroom(seg) ==
+ (seg_tailroom - len));
+ break;
+ }
+ DATA_LEN(seg) = seg_tailroom;
+ PKT_LEN(seg) = seg_tailroom;
+ /* Sanity check. */
+ assert(rte_pktmbuf_headroom(seg) == seg_headroom);
+ assert(rte_pktmbuf_tailroom(seg) == 0);
+ /* Fix len and clear headroom for next segments. */
+ len -= seg_tailroom;
+ seg_headroom = 0;
+ }
+ /* Update head and tail segments. */
+ *pkt_buf_next = NULL;
+ assert(pkt_buf != NULL);
+ assert(j != 0);
+ NB_SEGS(pkt_buf) = j;
+ PORT(pkt_buf) = rxq->port_id;
+ PKT_LEN(pkt_buf) = pkt_buf_len;
+ pkt_buf->packet_type = rxq_cq_to_pkt_type(flags);
+ pkt_buf->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
+
+ /* Return packet. */
+ *(pkts++) = pkt_buf;
+ ++pkts_ret;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ /* Increment bytes counter. */
+ rxq->stats.ibytes += pkt_buf_len;
+#endif
+repost:
+ ret = rxq->if_wq->recv_sg_list(rxq->wq,
+ elt->sges,
+ RTE_DIM(elt->sges));
+ if (unlikely(ret)) {
+ /* Inability to repost WRs is fatal. */
+ DEBUG("%p: recv_sg_list(): failed (ret=%d)",
+ (void *)rxq->priv,
+ ret);
+ abort();
+ }
+ if (++elts_head >= elts_n)
+ elts_head = 0;
+ continue;
+ }
+ if (unlikely(i == 0))
+ return 0;
+ rxq->elts_head = elts_head;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ /* Increment packets counter. */
+ rxq->stats.ipackets += pkts_ret;
+#endif
+ return pkts_ret;
+}
+