*/
#include <assert.h>
-#include <inttypes.h>
#include <stdint.h>
#include <string.h>
* Make sure we read the CQE after we read the ownership bit.
*/
rte_rmb();
+#ifndef NDEBUG
if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
MLX4_CQE_OPCODE_ERROR)) {
struct mlx4_err_cqe *cqe_err =
(void *)txq, cqe_err->vendor_err,
cqe_err->syndrome);
}
+#endif /* NDEBUG */
/* Get WQE index reported in the CQE. */
new_index =
rte_be_to_cpu_16(cqe->wqe_index) & sq->txbb_cnt_mask;
* the ring consumer.
*/
cq->cons_index = cons_index;
- *cq->set_ci_db = rte_cpu_to_be_32(cq->cons_index & 0xffffff);
+ *cq->set_ci_db = rte_cpu_to_be_32(cq->cons_index & MLX4_CQ_DB_CI_MASK);
rte_wmb();
sq->tail = sq->tail + nr_txbbs;
/* Update the list of packets posted for transmission. */
return buf->pool;
}
-/**
- * Get memory region (MR) <-> memory pool (MP) association from txq->mp2mr[].
- * Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full,
- * remove an entry first.
- *
- * @param txq
- * Pointer to Tx queue structure.
- * @param[in] mp
- * Memory pool for which a memory region lkey must be returned.
- *
- * @return
- * mr->lkey on success, (uint32_t)-1 on failure.
- */
-uint32_t
-mlx4_txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
-{
- unsigned int i;
- struct ibv_mr *mr;
-
- for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
- if (unlikely(txq->mp2mr[i].mp == NULL)) {
- /* Unknown MP, add a new MR for it. */
- break;
- }
- if (txq->mp2mr[i].mp == mp) {
- assert(txq->mp2mr[i].lkey != (uint32_t)-1);
- assert(txq->mp2mr[i].mr->lkey == txq->mp2mr[i].lkey);
- return txq->mp2mr[i].lkey;
- }
- }
- /* Add a new entry, register MR first. */
- DEBUG("%p: discovered new memory pool \"%s\" (%p)",
- (void *)txq, mp->name, (void *)mp);
- mr = mlx4_mp2mr(txq->priv->pd, mp);
- if (unlikely(mr == NULL)) {
- DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
- (void *)txq);
- return (uint32_t)-1;
- }
- if (unlikely(i == RTE_DIM(txq->mp2mr))) {
- /* Table is full, remove oldest entry. */
- DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
- (void *)txq);
- --i;
- claim_zero(ibv_dereg_mr(txq->mp2mr[0].mr));
- memmove(&txq->mp2mr[0], &txq->mp2mr[1],
- (sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));
- }
- /* Store the new entry. */
- txq->mp2mr[i].mp = mp;
- txq->mp2mr[i].mr = mr;
- txq->mp2mr[i].lkey = mr->lkey;
- DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
- (void *)txq, mp->name, (void *)mp, txq->mp2mr[i].lkey);
- return txq->mp2mr[i].lkey;
-}
-
/**
* Posts a single work request to a send queue.
*
* Packet to transmit.
*
* @return
- * 0 on success, negative errno value otherwise and rte_errno is set.
+ * 0 on success, negative errno value otherwise.
*/
static inline int
mlx4_post_send(struct txq *txq, struct rte_mbuf *pkt)
struct mlx4_wqe_data_seg *dseg;
struct mlx4_sq *sq = &txq->msq;
struct rte_mbuf *buf;
+ union {
+ uint32_t flags;
+ uint16_t flags16[2];
+ } srcrb;
uint32_t head_idx = sq->head & sq->txbb_cnt_mask;
uint32_t lkey;
uintptr_t addr;
- uint32_t srcrb_flags;
uint32_t owner_opcode = MLX4_OPCODE_SEND;
uint32_t byte_count;
int wqe_real_size;
int nr_txbbs;
- int rc;
struct pv *pv = (struct pv *)txq->bounce_buf;
int pv_counter = 0;
if (((sq->head - sq->tail) + nr_txbbs +
sq->headroom_txbbs) >= sq->txbb_cnt ||
nr_txbbs > MLX4_MAX_WQE_TXBBS) {
- rc = ENOSPC;
- goto err;
+ return -ENOSPC;
}
/* Get the control and data entries of the WQE. */
ctrl = (struct mlx4_wqe_ctrl_seg *)mlx4_get_send_wqe(sq, head_idx);
addr = rte_pktmbuf_mtod(buf, uintptr_t);
rte_prefetch0((volatile void *)addr);
/* Handle WQE wraparound. */
- if (unlikely(dseg >= (struct mlx4_wqe_data_seg *)sq->eob))
+ if (dseg >= (struct mlx4_wqe_data_seg *)sq->eob)
dseg = (struct mlx4_wqe_data_seg *)sq->buf;
dseg->addr = rte_cpu_to_be_64(addr);
/* Memory region key for this memory pool. */
lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(buf));
+#ifndef NDEBUG
if (unlikely(lkey == (uint32_t)-1)) {
/* MR does not exist. */
DEBUG("%p: unable to get MP <-> MR association",
ctrl->fence_size = (wqe_real_size >> 4) & 0x3f;
mlx4_txq_stamp_freed_wqe(sq, head_idx,
(sq->head & sq->txbb_cnt) ? 0 : 1);
- rc = EFAULT;
- goto err;
+ return -EFAULT;
}
+#endif /* NDEBUG */
dseg->lkey = rte_cpu_to_be_32(lkey);
if (likely(buf->data_len)) {
byte_count = rte_cpu_to_be_32(buf->data_len);
}
/* Fill the control parameters for this packet. */
ctrl->fence_size = (wqe_real_size >> 4) & 0x3f;
- /*
- * The caller should prepare "imm" in advance in order to support
- * VF to VF communication (when the device is a virtual-function
- * device (VF)).
- */
- ctrl->imm = 0;
/*
* For raw Ethernet, the SOLICIT flag is used to indicate that no ICRC
* should be calculated.
txq->elts_comp_cd -= nr_txbbs;
if (unlikely(txq->elts_comp_cd <= 0)) {
txq->elts_comp_cd = txq->elts_comp_cd_init;
- srcrb_flags = RTE_BE32(MLX4_WQE_CTRL_SOLICIT |
+ srcrb.flags = RTE_BE32(MLX4_WQE_CTRL_SOLICIT |
MLX4_WQE_CTRL_CQ_UPDATE);
} else {
- srcrb_flags = RTE_BE32(MLX4_WQE_CTRL_SOLICIT);
+ srcrb.flags = RTE_BE32(MLX4_WQE_CTRL_SOLICIT);
}
- ctrl->srcrb_flags = srcrb_flags;
+ /* Enable HW checksum offload if requested */
+ if (txq->csum &&
+ (pkt->ol_flags &
+ (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))) {
+ const uint64_t is_tunneled = (pkt->ol_flags &
+ (PKT_TX_TUNNEL_GRE |
+ PKT_TX_TUNNEL_VXLAN));
+
+ if (is_tunneled && txq->csum_l2tun) {
+ owner_opcode |= MLX4_WQE_CTRL_IIP_HDR_CSUM |
+ MLX4_WQE_CTRL_IL4_HDR_CSUM;
+ if (pkt->ol_flags & PKT_TX_OUTER_IP_CKSUM)
+ srcrb.flags |=
+ RTE_BE32(MLX4_WQE_CTRL_IP_HDR_CSUM);
+ } else {
+ srcrb.flags |= RTE_BE32(MLX4_WQE_CTRL_IP_HDR_CSUM |
+ MLX4_WQE_CTRL_TCP_UDP_CSUM);
+ }
+ }
+ if (txq->lb) {
+ /*
+ * Copy destination MAC address to the WQE, this allows
+ * loopback in eSwitch, so that VFs and PF can communicate
+ * with each other.
+ */
+ srcrb.flags16[0] = *(rte_pktmbuf_mtod(pkt, uint16_t *));
+ ctrl->imm = *(rte_pktmbuf_mtod_offset(pkt, uint32_t *,
+ sizeof(uint16_t)));
+ } else {
+ ctrl->imm = 0;
+ }
+ ctrl->srcrb_flags = srcrb.flags;
/*
* Make sure descriptor is fully written before
* setting ownership bit (because HW can start
MLX4_BIT_WQE_OWN : 0));
sq->head += nr_txbbs;
return 0;
-err:
- rte_errno = rc;
- return -rc;
}
/**
return i;
}
+/**
+ * Translate Rx completion flags to packet type.
+ *
+ * @param flags
+ * Rx completion flags returned by mlx4_cqe_flags().
+ *
+ * @return
+ * Packet type in mbuf format.
+ */
+static inline uint32_t
+rxq_cq_to_pkt_type(uint32_t flags)
+{
+ uint32_t pkt_type;
+
+ if (flags & MLX4_CQE_L2_TUNNEL)
+ pkt_type =
+ mlx4_transpose(flags,
+ MLX4_CQE_L2_TUNNEL_IPV4,
+ RTE_PTYPE_L3_IPV4_EXT_UNKNOWN) |
+ mlx4_transpose(flags,
+ MLX4_CQE_STATUS_IPV4_PKT,
+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN);
+ else
+ pkt_type = mlx4_transpose(flags,
+ MLX4_CQE_STATUS_IPV4_PKT,
+ RTE_PTYPE_L3_IPV4_EXT_UNKNOWN);
+ return pkt_type;
+}
+
+/**
+ * Translate Rx completion flags to offload flags.
+ *
+ * @param flags
+ * Rx completion flags returned by mlx4_cqe_flags().
+ * @param csum
+ * Whether Rx checksums are enabled.
+ * @param csum_l2tun
+ * Whether Rx L2 tunnel checksums are enabled.
+ *
+ * @return
+ * Offload flags (ol_flags) in mbuf format.
+ */
+static inline uint32_t
+rxq_cq_to_ol_flags(uint32_t flags, int csum, int csum_l2tun)
+{
+ uint32_t ol_flags = 0;
+
+ if (csum)
+ ol_flags |=
+ mlx4_transpose(flags,
+ MLX4_CQE_STATUS_IP_HDR_CSUM_OK,
+ PKT_RX_IP_CKSUM_GOOD) |
+ mlx4_transpose(flags,
+ MLX4_CQE_STATUS_TCP_UDP_CSUM_OK,
+ PKT_RX_L4_CKSUM_GOOD);
+ if ((flags & MLX4_CQE_L2_TUNNEL) && csum_l2tun)
+ ol_flags |=
+ mlx4_transpose(flags,
+ MLX4_CQE_L2_TUNNEL_IPOK,
+ PKT_RX_IP_CKSUM_GOOD) |
+ mlx4_transpose(flags,
+ MLX4_CQE_L2_TUNNEL_L4_CSUM,
+ PKT_RX_L4_CKSUM_GOOD);
+ return ol_flags;
+}
+
+/**
+ * Extract checksum information from CQE flags.
+ *
+ * @param cqe
+ * Pointer to CQE structure.
+ * @param csum
+ * Whether Rx checksums are enabled.
+ * @param csum_l2tun
+ * Whether Rx L2 tunnel checksums are enabled.
+ *
+ * @return
+ * CQE checksum information.
+ */
+static inline uint32_t
+mlx4_cqe_flags(struct mlx4_cqe *cqe, int csum, int csum_l2tun)
+{
+ uint32_t flags = 0;
+
+ /*
+ * The relevant bits are in different locations on their
+ * CQE fields therefore we can join them in one 32bit
+ * variable.
+ */
+ if (csum)
+ flags = (rte_be_to_cpu_32(cqe->status) &
+ MLX4_CQE_STATUS_IPV4_CSUM_OK);
+ if (csum_l2tun)
+ flags |= (rte_be_to_cpu_32(cqe->vlan_my_qpn) &
+ (MLX4_CQE_L2_TUNNEL |
+ MLX4_CQE_L2_TUNNEL_IPOK |
+ MLX4_CQE_L2_TUNNEL_L4_CSUM |
+ MLX4_CQE_L2_TUNNEL_IPV4));
+ return flags;
+}
+
/**
* Poll one CQE from CQ.
*
goto skip;
}
pkt = seg;
- pkt->packet_type = 0;
- pkt->ol_flags = 0;
+ if (rxq->csum | rxq->csum_l2tun) {
+ uint32_t flags =
+ mlx4_cqe_flags(cqe,
+ rxq->csum,
+ rxq->csum_l2tun);
+
+ pkt->ol_flags =
+ rxq_cq_to_ol_flags(flags,
+ rxq->csum,
+ rxq->csum_l2tun);
+ pkt->packet_type = rxq_cq_to_pkt_type(flags);
+ } else {
+ pkt->packet_type = 0;
+ pkt->ol_flags = 0;
+ }
pkt->pkt_len = len;
}
rep->nb_segs = 1;
rxq->rq_ci = rq_ci >> sges_n;
rte_wmb();
*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
- *rxq->mcq.set_ci_db = rte_cpu_to_be_32(rxq->mcq.cons_index & 0xffffff);
+ *rxq->mcq.set_ci_db =
+ rte_cpu_to_be_32(rxq->mcq.cons_index & MLX4_CQ_DB_CI_MASK);
/* Increment packets counter. */
rxq->stats.ipackets += i;
return i;