(void)wr_id_t_check;
}
+/* Transpose flags. Useful to convert IBV to DPDK flags. */
+#define TRANSPOSE(val, from, to) \
+ (((from) >= (to)) ? \
+ (((val) & (from)) / ((from) / (to))) : \
+ (((val) & (from)) * ((to) / (from))))
+
struct mlx4_rxq_stats {
unsigned int idx; /**< Mapping index. */
#ifdef MLX4_PMD_SOFT_COUNTERS
struct rxq_elt (*no_sp)[]; /* RX elements. */
} elts;
unsigned int sp:1; /* Use scattered RX elements. */
+ unsigned int csum:1; /* Enable checksum offloading. */
+ unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
uint32_t mb_len; /* Length of a mp-issued mbuf. */
struct mlx4_rxq_stats stats; /* RX queue counters. */
unsigned int socket; /* CPU socket ID for allocations. */
+ struct ibv_exp_res_domain *rd; /* Resource Domain. */
};
/* TX element. */
linear_t (*elts_linear)[]; /* Linearized buffers. */
struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
unsigned int socket; /* CPU socket ID for allocations. */
+ struct ibv_exp_res_domain *rd; /* Resource Domain. */
};
struct priv {
unsigned int hw_qpg:1; /* QP groups are supported. */
unsigned int hw_tss:1; /* TSS is supported. */
unsigned int hw_rss:1; /* RSS is supported. */
+ unsigned int hw_csum:1; /* Checksum offload is supported. */
+ unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
unsigned int rss:1; /* RSS is enabled. */
unsigned int vf:1; /* This is a VF device. */
#ifdef INLINE_RECV
claim_zero(ibv_destroy_qp(txq->qp));
if (txq->cq != NULL)
claim_zero(ibv_destroy_cq(txq->cq));
+ if (txq->rd != NULL) {
+ struct ibv_exp_destroy_res_domain_attr attr = {
+ .comp_mask = 0,
+ };
+
+ assert(txq->priv != NULL);
+ assert(txq->priv->ctx != NULL);
+ claim_zero(ibv_exp_destroy_res_domain(txq->priv->ctx,
+ txq->rd,
+ &attr));
+ }
for (i = 0; (i != elemof(txq->mp2mr)); ++i) {
if (txq->mp2mr[i].mp == NULL)
break;
max = pkts_n;
for (i = 0; (i != max); ++i) {
struct rte_mbuf *buf = pkts[i];
+ unsigned int elts_head_next =
+ (((elts_head + 1) == elts_n) ? 0 : elts_head + 1);
+ struct txq_elt *elt_next = &(*txq->elts)[elts_head_next];
struct txq_elt *elt = &(*txq->elts)[elts_head];
unsigned int segs = NB_SEGS(buf);
#ifdef MLX4_PMD_SOFT_COUNTERS
++elts_comp;
send_flags |= IBV_EXP_QP_BURST_SIGNALED;
}
+ /* Should we enable HW CKSUM offload */
+ if (buf->ol_flags &
+ (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
+ send_flags |= IBV_EXP_QP_BURST_IP_CSUM;
+ /* HW does not support checksum offloads at arbitrary
+ * offsets but automatically recognizes the packet
+ * type. For inner L3/L4 checksums, only VXLAN (UDP)
+ * tunnels are currently supported.
+ *
+ * FIXME: since PKT_TX_UDP_TUNNEL_PKT has been removed,
+ * the outer packet type is unknown. All we know is
+ * that the L2 header is of unusual length (not
+ * ETHER_HDR_LEN with or without 802.1Q header). */
+ if ((buf->l2_len != ETHER_HDR_LEN) &&
+ (buf->l2_len != (ETHER_HDR_LEN + 4)))
+ send_flags |= IBV_EXP_QP_BURST_TUNNEL;
+ }
if (likely(segs == 1)) {
uintptr_t addr;
uint32_t length;
if (txq->priv->vf)
rte_prefetch0((volatile void *)
(uintptr_t)addr);
+ RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);
/* Put packet into send queue. */
#if MLX4_PMD_MAX_INLINE > 0
if (length <= txq->max_inline)
&sges);
if (ret.length == (unsigned int)-1)
goto stop;
+ RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);
/* Put SG list into send queue. */
err = txq->if_qp->send_pending_sg_list
(txq->qp,
goto stop;
#endif /* MLX4_PMD_SGE_WR_N > 1 */
}
- if (++elts_head >= elts_n)
- elts_head = 0;
+ elts_head = elts_head_next;
#ifdef MLX4_PMD_SOFT_COUNTERS
/* Increment sent bytes counter. */
txq->stats.obytes += sent_size;
};
union {
struct ibv_exp_query_intf_params params;
- struct ibv_qp_init_attr init;
+ struct ibv_exp_qp_init_attr init;
+ struct ibv_exp_res_domain_init_attr rd;
+ struct ibv_exp_cq_init_attr cq;
struct ibv_exp_qp_attr mod;
} attr;
enum ibv_exp_query_intf_status status;
}
desc /= MLX4_PMD_SGE_WR_N;
/* MRs will be registered in mp2mr[] later. */
- tmpl.cq = ibv_create_cq(priv->ctx, desc, NULL, NULL, 0);
+ attr.rd = (struct ibv_exp_res_domain_init_attr){
+ .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
+ IBV_EXP_RES_DOMAIN_MSG_MODEL),
+ .thread_model = IBV_EXP_THREAD_SINGLE,
+ .msg_model = IBV_EXP_MSG_HIGH_BW,
+ };
+ tmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd);
+ if (tmpl.rd == NULL) {
+ ret = ENOMEM;
+ ERROR("%p: RD creation failure: %s",
+ (void *)dev, strerror(ret));
+ goto error;
+ }
+ attr.cq = (struct ibv_exp_cq_init_attr){
+ .comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
+ .res_domain = tmpl.rd,
+ };
+ tmpl.cq = ibv_exp_create_cq(priv->ctx, desc, NULL, NULL, 0, &attr.cq);
if (tmpl.cq == NULL) {
ret = ENOMEM;
ERROR("%p: CQ creation failure: %s",
priv->device_attr.max_qp_wr);
DEBUG("priv->device_attr.max_sge is %d",
priv->device_attr.max_sge);
- attr.init = (struct ibv_qp_init_attr){
+ attr.init = (struct ibv_exp_qp_init_attr){
/* CQ to be associated with the send queue. */
.send_cq = tmpl.cq,
/* CQ to be associated with the receive queue. */
.qp_type = IBV_QPT_RAW_PACKET,
/* Do *NOT* enable this, completions events are managed per
* TX burst. */
- .sq_sig_all = 0
+ .sq_sig_all = 0,
+ .pd = priv->pd,
+ .res_domain = tmpl.rd,
+ .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
+ IBV_EXP_QP_INIT_ATTR_RES_DOMAIN),
};
- tmpl.qp = ibv_create_qp(priv->pd, &attr.init);
+ tmpl.qp = ibv_exp_create_qp(priv->ctx, &attr.init);
if (tmpl.qp == NULL) {
ret = (errno ? errno : EINVAL);
ERROR("%p: QP creation failure: %s",
.intf_scope = IBV_EXP_INTF_GLOBAL,
.intf = IBV_EXP_INTF_QP_BURST,
.obj = tmpl.qp,
+#ifdef HAVE_EXP_QP_BURST_CREATE_DISABLE_ETH_LOOPBACK
+ /* MC loopback must be disabled when not using a VF. */
+ .family_flags =
+ (!priv->vf ?
+ IBV_EXP_QP_BURST_CREATE_DISABLE_ETH_LOOPBACK :
+ 0),
+#endif
};
tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
if (tmpl.if_qp == NULL) {
}
if (rxq->cq != NULL)
claim_zero(ibv_destroy_cq(rxq->cq));
+ if (rxq->rd != NULL) {
+ struct ibv_exp_destroy_res_domain_attr attr = {
+ .comp_mask = 0,
+ };
+
+ assert(rxq->priv != NULL);
+ assert(rxq->priv->ctx != NULL);
+ claim_zero(ibv_exp_destroy_res_domain(rxq->priv->ctx,
+ rxq->rd,
+ &attr));
+ }
if (rxq->mr != NULL)
claim_zero(ibv_dereg_mr(rxq->mr));
memset(rxq, 0, sizeof(*rxq));
}
+/**
+ * Translate RX completion flags to offload flags.
+ *
+ * @param[in] rxq
+ * Pointer to RX queue structure.
+ * @param flags
+ * RX completion flags returned by poll_length_flags().
+ *
+ * @return
+ * Offload flags (ol_flags) for struct rte_mbuf.
+ */
+static inline uint32_t
+rxq_cq_to_ol_flags(const struct rxq *rxq, uint32_t flags)
+{
+ uint32_t ol_flags;
+
+ ol_flags =
+ TRANSPOSE(flags, IBV_EXP_CQ_RX_IPV4_PACKET, PKT_RX_IPV4_HDR) |
+ TRANSPOSE(flags, IBV_EXP_CQ_RX_IPV6_PACKET, PKT_RX_IPV6_HDR);
+ if (rxq->csum)
+ ol_flags |=
+ TRANSPOSE(~flags,
+ IBV_EXP_CQ_RX_IP_CSUM_OK,
+ PKT_RX_IP_CKSUM_BAD) |
+ TRANSPOSE(~flags,
+ IBV_EXP_CQ_RX_TCP_UDP_CSUM_OK,
+ PKT_RX_L4_CKSUM_BAD);
+ /*
+ * PKT_RX_IP_CKSUM_BAD and PKT_RX_L4_CKSUM_BAD are used in place
+ * of PKT_RX_EIP_CKSUM_BAD because the latter is not functional
+ * (its value is 0).
+ */
+ if ((flags & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
+ ol_flags |=
+ TRANSPOSE(flags,
+ IBV_EXP_CQ_RX_OUTER_IPV4_PACKET,
+ PKT_RX_TUNNEL_IPV4_HDR) |
+ TRANSPOSE(flags,
+ IBV_EXP_CQ_RX_OUTER_IPV6_PACKET,
+ PKT_RX_TUNNEL_IPV6_HDR) |
+ TRANSPOSE(~flags,
+ IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK,
+ PKT_RX_IP_CKSUM_BAD) |
+ TRANSPOSE(~flags,
+ IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK,
+ PKT_RX_L4_CKSUM_BAD);
+ return ol_flags;
+}
+
static uint16_t
mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
struct rte_mbuf **pkt_buf_next = &pkt_buf;
unsigned int seg_headroom = RTE_PKTMBUF_HEADROOM;
unsigned int j = 0;
+ uint32_t flags;
/* Sanity checks. */
#ifdef NDEBUG
assert(wr->num_sge == elemof(elt->sges));
assert(elts_head < rxq->elts_n);
assert(rxq->elts_head < rxq->elts_n);
- ret = rxq->if_cq->poll_length(rxq->cq, NULL, NULL);
+ ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,
+ &flags);
if (unlikely(ret < 0)) {
struct ibv_wc wc;
int wcs_n;
NB_SEGS(pkt_buf) = j;
PORT(pkt_buf) = rxq->port_id;
PKT_LEN(pkt_buf) = pkt_buf_len;
- pkt_buf->ol_flags = 0;
+ pkt_buf->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
/* Return packet. */
*(pkts++) = pkt_buf;
struct rte_mbuf *seg = (void *)((uintptr_t)elt->sge.addr -
WR_ID(wr_id).offset);
struct rte_mbuf *rep;
+ uint32_t flags;
/* Sanity checks. */
assert(WR_ID(wr_id).id < rxq->elts_n);
assert(wr->num_sge == 1);
assert(elts_head < rxq->elts_n);
assert(rxq->elts_head < rxq->elts_n);
- ret = rxq->if_cq->poll_length(rxq->cq, NULL, NULL);
+ ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,
+ &flags);
if (unlikely(ret < 0)) {
struct ibv_wc wc;
int wcs_n;
NEXT(seg) = NULL;
PKT_LEN(seg) = len;
DATA_LEN(seg) = len;
- seg->ol_flags = 0;
+ seg->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
/* Return packet. */
*(pkts++) = seg;
* QP pointer or NULL in case of error.
*/
static struct ibv_qp *
-rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc)
+rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
+ struct ibv_exp_res_domain *rd)
{
struct ibv_exp_qp_init_attr attr = {
/* CQ to be associated with the send queue. */
MLX4_PMD_SGE_WR_N),
},
.qp_type = IBV_QPT_RAW_PACKET,
- .comp_mask = IBV_EXP_QP_INIT_ATTR_PD,
+ .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
+ IBV_EXP_QP_INIT_ATTR_RES_DOMAIN),
.pd = priv->pd,
+ .res_domain = rd,
};
#ifdef INLINE_RECV
*/
static struct ibv_qp *
rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
- int parent)
+ int parent, struct ibv_exp_res_domain *rd)
{
struct ibv_exp_qp_init_attr attr = {
/* CQ to be associated with the send queue. */
},
.qp_type = IBV_QPT_RAW_PACKET,
.comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
+ IBV_EXP_QP_INIT_ATTR_RES_DOMAIN |
IBV_EXP_QP_INIT_ATTR_QPG),
- .pd = priv->pd
+ .pd = priv->pd,
+ .res_domain = rd,
};
#ifdef INLINE_RECV
/* Number of descriptors and mbufs currently allocated. */
desc_n = (tmpl.elts_n * (tmpl.sp ? MLX4_PMD_SGE_WR_N : 1));
mbuf_n = desc_n;
+ /* Toggle RX checksum offload if hardware supports it. */
+ if (priv->hw_csum) {
+ tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
+ rxq->csum = tmpl.csum;
+ }
+ if (priv->hw_csum_l2tun) {
+ tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
+ rxq->csum_l2tun = tmpl.csum_l2tun;
+ }
/* Enable scattered packets support for this queue if necessary. */
if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
(dev->data->dev_conf.rxmode.max_rx_pkt_len >
struct ibv_exp_qp_attr mod;
union {
struct ibv_exp_query_intf_params params;
+ struct ibv_exp_cq_init_attr cq;
+ struct ibv_exp_res_domain_init_attr rd;
} attr;
enum ibv_exp_query_intf_status status;
struct ibv_recv_wr *bad_wr;
rte_pktmbuf_tailroom(buf)) == tmpl.mb_len);
assert(rte_pktmbuf_headroom(buf) == RTE_PKTMBUF_HEADROOM);
rte_pktmbuf_free(buf);
+ /* Toggle RX checksum offload if hardware supports it. */
+ if (priv->hw_csum)
+ tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
+ if (priv->hw_csum_l2tun)
+ tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
/* Enable scattered packets support for this queue if necessary. */
if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
(dev->data->dev_conf.rxmode.max_rx_pkt_len >
goto error;
}
skip_mr:
- tmpl.cq = ibv_create_cq(priv->ctx, desc, NULL, NULL, 0);
+ attr.rd = (struct ibv_exp_res_domain_init_attr){
+ .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
+ IBV_EXP_RES_DOMAIN_MSG_MODEL),
+ .thread_model = IBV_EXP_THREAD_SINGLE,
+ .msg_model = IBV_EXP_MSG_HIGH_BW,
+ };
+ tmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd);
+ if (tmpl.rd == NULL) {
+ ret = ENOMEM;
+ ERROR("%p: RD creation failure: %s",
+ (void *)dev, strerror(ret));
+ goto error;
+ }
+ attr.cq = (struct ibv_exp_cq_init_attr){
+ .comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
+ .res_domain = tmpl.rd,
+ };
+ tmpl.cq = ibv_exp_create_cq(priv->ctx, desc, NULL, NULL, 0, &attr.cq);
if (tmpl.cq == NULL) {
ret = ENOMEM;
ERROR("%p: CQ creation failure: %s",
priv->device_attr.max_sge);
#ifdef RSS_SUPPORT
if (priv->rss)
- tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent);
+ tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent,
+ tmpl.rd);
else
#endif /* RSS_SUPPORT */
- tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc);
+ tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc, tmpl.rd);
if (tmpl.qp == NULL) {
ret = (errno ? errno : EINVAL);
ERROR("%p: QP creation failure: %s",
info->max_rx_queues = max;
info->max_tx_queues = max;
info->max_mac_addrs = elemof(priv->mac);
+ info->rx_offload_capa =
+ (priv->hw_csum ?
+ (DEV_RX_OFFLOAD_IPV4_CKSUM |
+ DEV_RX_OFFLOAD_UDP_CKSUM |
+ DEV_RX_OFFLOAD_TCP_CKSUM) :
+ 0);
+ info->tx_offload_capa =
+ (priv->hw_csum ?
+ (DEV_TX_OFFLOAD_IPV4_CKSUM |
+ DEV_TX_OFFLOAD_UDP_CKSUM |
+ DEV_TX_OFFLOAD_TCP_CKSUM) :
+ 0);
priv_unlock(priv);
}
.mac_addr_remove = mlx4_mac_addr_remove,
.mac_addr_add = mlx4_mac_addr_add,
.mtu_set = mlx4_dev_set_mtu,
+ .udp_tunnel_add = NULL,
+ .udp_tunnel_del = NULL,
.fdir_add_signature_filter = NULL,
.fdir_update_signature_filter = NULL,
.fdir_remove_signature_filter = NULL,
exp_device_attr.max_rss_tbl_sz);
#endif /* RSS_SUPPORT */
+ priv->hw_csum =
+ ((exp_device_attr.exp_device_cap_flags &
+ IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT) &&
+ (exp_device_attr.exp_device_cap_flags &
+ IBV_EXP_DEVICE_RX_CSUM_IP_PKT));
+ DEBUG("checksum offloading is %ssupported",
+ (priv->hw_csum ? "" : "not "));
+
+ priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags &
+ IBV_EXP_DEVICE_VXLAN_SUPPORT);
+ DEBUG("L2 tunnel checksum offloads are %ssupported",
+ (priv->hw_csum_l2tun ? "" : "not "));
+
#ifdef INLINE_RECV
priv->inl_recv_size = mlx4_getenv_int("MLX4_INLINE_RECV_SIZE");