} elts;
unsigned int sp:1; /* Use scattered RX elements. */
unsigned int csum:1; /* Enable checksum offloading. */
+ unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
uint32_t mb_len; /* Length of a mp-issued mbuf. */
struct mlx4_rxq_stats stats; /* RX queue counters. */
unsigned int socket; /* CPU socket ID for allocations. */
+ struct ibv_exp_res_domain *rd; /* Resource Domain. */
};
/* TX element. */
linear_t (*elts_linear)[]; /* Linearized buffers. */
struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
unsigned int socket; /* CPU socket ID for allocations. */
+ struct ibv_exp_res_domain *rd; /* Resource Domain. */
};
struct priv {
unsigned int hw_tss:1; /* TSS is supported. */
unsigned int hw_rss:1; /* RSS is supported. */
unsigned int hw_csum:1; /* Checksum offload is supported. */
+ unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
unsigned int rss:1; /* RSS is enabled. */
unsigned int vf:1; /* This is a VF device. */
#ifdef INLINE_RECV
claim_zero(ibv_destroy_qp(txq->qp));
if (txq->cq != NULL)
claim_zero(ibv_destroy_cq(txq->cq));
+ if (txq->rd != NULL) {
+ struct ibv_exp_destroy_res_domain_attr attr = {
+ .comp_mask = 0,
+ };
+
+ assert(txq->priv != NULL);
+ assert(txq->priv->ctx != NULL);
+ claim_zero(ibv_exp_destroy_res_domain(txq->priv->ctx,
+ txq->rd,
+ &attr));
+ }
for (i = 0; (i != elemof(txq->mp2mr)); ++i) {
if (txq->mp2mr[i].mp == NULL)
break;
}
/* Should we enable HW CKSUM offload */
if (buf->ol_flags &
- (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
+ (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
send_flags |= IBV_EXP_QP_BURST_IP_CSUM;
+ /* HW does not support checksum offloads at arbitrary
+ * offsets but automatically recognizes the packet
+ * type. For inner L3/L4 checksums, only VXLAN (UDP)
+ * tunnels are currently supported. */
+ if (RTE_ETH_IS_TUNNEL_PKT(buf->packet_type))
+ send_flags |= IBV_EXP_QP_BURST_TUNNEL;
+ }
if (likely(segs == 1)) {
uintptr_t addr;
uint32_t length;
};
union {
struct ibv_exp_query_intf_params params;
- struct ibv_qp_init_attr init;
+ struct ibv_exp_qp_init_attr init;
+ struct ibv_exp_res_domain_init_attr rd;
+ struct ibv_exp_cq_init_attr cq;
struct ibv_exp_qp_attr mod;
} attr;
enum ibv_exp_query_intf_status status;
}
desc /= MLX4_PMD_SGE_WR_N;
/* MRs will be registered in mp2mr[] later. */
- tmpl.cq = ibv_create_cq(priv->ctx, desc, NULL, NULL, 0);
+ attr.rd = (struct ibv_exp_res_domain_init_attr){
+ .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
+ IBV_EXP_RES_DOMAIN_MSG_MODEL),
+ .thread_model = IBV_EXP_THREAD_SINGLE,
+ .msg_model = IBV_EXP_MSG_HIGH_BW,
+ };
+ tmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd);
+ if (tmpl.rd == NULL) {
+ ret = ENOMEM;
+ ERROR("%p: RD creation failure: %s",
+ (void *)dev, strerror(ret));
+ goto error;
+ }
+ attr.cq = (struct ibv_exp_cq_init_attr){
+ .comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
+ .res_domain = tmpl.rd,
+ };
+ tmpl.cq = ibv_exp_create_cq(priv->ctx, desc, NULL, NULL, 0, &attr.cq);
if (tmpl.cq == NULL) {
ret = ENOMEM;
ERROR("%p: CQ creation failure: %s",
priv->device_attr.max_qp_wr);
DEBUG("priv->device_attr.max_sge is %d",
priv->device_attr.max_sge);
- attr.init = (struct ibv_qp_init_attr){
+ attr.init = (struct ibv_exp_qp_init_attr){
/* CQ to be associated with the send queue. */
.send_cq = tmpl.cq,
/* CQ to be associated with the receive queue. */
.qp_type = IBV_QPT_RAW_PACKET,
/* Do *NOT* enable this, completions events are managed per
* TX burst. */
- .sq_sig_all = 0
+ .sq_sig_all = 0,
+ .pd = priv->pd,
+ .res_domain = tmpl.rd,
+ .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
+ IBV_EXP_QP_INIT_ATTR_RES_DOMAIN),
};
- tmpl.qp = ibv_create_qp(priv->pd, &attr.init);
+ tmpl.qp = ibv_exp_create_qp(priv->ctx, &attr.init);
if (tmpl.qp == NULL) {
ret = (errno ? errno : EINVAL);
ERROR("%p: QP creation failure: %s",
.intf_scope = IBV_EXP_INTF_GLOBAL,
.intf = IBV_EXP_INTF_QP_BURST,
.obj = tmpl.qp,
+#ifdef HAVE_EXP_QP_BURST_CREATE_DISABLE_ETH_LOOPBACK
+ /* MC loopback must be disabled when not using a VF. */
+ .family_flags =
+ (!priv->vf ?
+ IBV_EXP_QP_BURST_CREATE_DISABLE_ETH_LOOPBACK :
+ 0),
+#endif
};
tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
if (tmpl.if_qp == NULL) {
}
if (rxq->cq != NULL)
claim_zero(ibv_destroy_cq(rxq->cq));
+ if (rxq->rd != NULL) {
+ struct ibv_exp_destroy_res_domain_attr attr = {
+ .comp_mask = 0,
+ };
+
+ assert(rxq->priv != NULL);
+ assert(rxq->priv->ctx != NULL);
+ claim_zero(ibv_exp_destroy_res_domain(rxq->priv->ctx,
+ rxq->rd,
+ &attr));
+ }
if (rxq->mr != NULL)
claim_zero(ibv_dereg_mr(rxq->mr));
memset(rxq, 0, sizeof(*rxq));
}
+/**
+ * Translate RX completion flags to packet type.
+ *
+ * @param flags
+ * RX completion flags returned by poll_length_flags().
+ *
+ * @return
+ * Packet type for struct rte_mbuf.
+ */
+static inline uint32_t
+rxq_cq_to_pkt_type(uint32_t flags)
+{
+ uint32_t pkt_type;
+
+ if (flags & IBV_EXP_CQ_RX_TUNNEL_PACKET)
+ pkt_type =
+ TRANSPOSE(flags,
+ IBV_EXP_CQ_RX_OUTER_IPV4_PACKET, RTE_PTYPE_L3_IPV4) |
+ TRANSPOSE(flags,
+ IBV_EXP_CQ_RX_OUTER_IPV6_PACKET, RTE_PTYPE_L3_IPV6) |
+ TRANSPOSE(flags,
+ IBV_EXP_CQ_RX_IPV4_PACKET, RTE_PTYPE_INNER_L3_IPV4) |
+ TRANSPOSE(flags,
+ IBV_EXP_CQ_RX_IPV6_PACKET, RTE_PTYPE_INNER_L3_IPV6);
+ else
+ pkt_type =
+ TRANSPOSE(flags,
+ IBV_EXP_CQ_RX_IPV4_PACKET, RTE_PTYPE_L3_IPV4) |
+ TRANSPOSE(flags,
+ IBV_EXP_CQ_RX_IPV6_PACKET, RTE_PTYPE_L3_IPV6);
+ return pkt_type;
+}
+
/**
* Translate RX completion flags to offload flags.
*
static inline uint32_t
rxq_cq_to_ol_flags(const struct rxq *rxq, uint32_t flags)
{
- uint32_t ol_flags;
+ uint32_t ol_flags = 0;
- ol_flags =
- TRANSPOSE(flags, IBV_EXP_CQ_RX_IPV4_PACKET, PKT_RX_IPV4_HDR) |
- TRANSPOSE(flags, IBV_EXP_CQ_RX_IPV6_PACKET, PKT_RX_IPV6_HDR);
if (rxq->csum)
ol_flags |=
TRANSPOSE(~flags,
TRANSPOSE(~flags,
IBV_EXP_CQ_RX_TCP_UDP_CSUM_OK,
PKT_RX_L4_CKSUM_BAD);
+ /*
+ * PKT_RX_IP_CKSUM_BAD and PKT_RX_L4_CKSUM_BAD are used in place
+ * of PKT_RX_EIP_CKSUM_BAD because the latter is not functional
+ * (its value is 0).
+ */
+ if ((flags & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
+ ol_flags |=
+ TRANSPOSE(~flags,
+ IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK,
+ PKT_RX_IP_CKSUM_BAD) |
+ TRANSPOSE(~flags,
+ IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK,
+ PKT_RX_L4_CKSUM_BAD);
return ol_flags;
}
NB_SEGS(pkt_buf) = j;
PORT(pkt_buf) = rxq->port_id;
PKT_LEN(pkt_buf) = pkt_buf_len;
+ pkt_buf->packet_type = rxq_cq_to_pkt_type(flags);
pkt_buf->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
/* Return packet. */
NEXT(seg) = NULL;
PKT_LEN(seg) = len;
DATA_LEN(seg) = len;
+ seg->packet_type = rxq_cq_to_pkt_type(flags);
seg->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
/* Return packet. */
* QP pointer or NULL in case of error.
*/
static struct ibv_qp *
-rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc)
+rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
+ struct ibv_exp_res_domain *rd)
{
struct ibv_exp_qp_init_attr attr = {
/* CQ to be associated with the send queue. */
MLX4_PMD_SGE_WR_N),
},
.qp_type = IBV_QPT_RAW_PACKET,
- .comp_mask = IBV_EXP_QP_INIT_ATTR_PD,
+ .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
+ IBV_EXP_QP_INIT_ATTR_RES_DOMAIN),
.pd = priv->pd,
+ .res_domain = rd,
};
#ifdef INLINE_RECV
*/
static struct ibv_qp *
rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
- int parent)
+ int parent, struct ibv_exp_res_domain *rd)
{
struct ibv_exp_qp_init_attr attr = {
/* CQ to be associated with the send queue. */
},
.qp_type = IBV_QPT_RAW_PACKET,
.comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
+ IBV_EXP_QP_INIT_ATTR_RES_DOMAIN |
IBV_EXP_QP_INIT_ATTR_QPG),
- .pd = priv->pd
+ .pd = priv->pd,
+ .res_domain = rd,
};
#ifdef INLINE_RECV
tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
rxq->csum = tmpl.csum;
}
+ if (priv->hw_csum_l2tun) {
+ tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
+ rxq->csum_l2tun = tmpl.csum_l2tun;
+ }
/* Enable scattered packets support for this queue if necessary. */
if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
(dev->data->dev_conf.rxmode.max_rx_pkt_len >
struct ibv_exp_qp_attr mod;
union {
struct ibv_exp_query_intf_params params;
+ struct ibv_exp_cq_init_attr cq;
+ struct ibv_exp_res_domain_init_attr rd;
} attr;
enum ibv_exp_query_intf_status status;
struct ibv_recv_wr *bad_wr;
/* Toggle RX checksum offload if hardware supports it. */
if (priv->hw_csum)
tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
+ if (priv->hw_csum_l2tun)
+ tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
/* Enable scattered packets support for this queue if necessary. */
if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
(dev->data->dev_conf.rxmode.max_rx_pkt_len >
goto error;
}
skip_mr:
- tmpl.cq = ibv_create_cq(priv->ctx, desc, NULL, NULL, 0);
+ attr.rd = (struct ibv_exp_res_domain_init_attr){
+ .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
+ IBV_EXP_RES_DOMAIN_MSG_MODEL),
+ .thread_model = IBV_EXP_THREAD_SINGLE,
+ .msg_model = IBV_EXP_MSG_HIGH_BW,
+ };
+ tmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd);
+ if (tmpl.rd == NULL) {
+ ret = ENOMEM;
+ ERROR("%p: RD creation failure: %s",
+ (void *)dev, strerror(ret));
+ goto error;
+ }
+ attr.cq = (struct ibv_exp_cq_init_attr){
+ .comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
+ .res_domain = tmpl.rd,
+ };
+ tmpl.cq = ibv_exp_create_cq(priv->ctx, desc, NULL, NULL, 0, &attr.cq);
if (tmpl.cq == NULL) {
ret = ENOMEM;
ERROR("%p: CQ creation failure: %s",
priv->device_attr.max_sge);
#ifdef RSS_SUPPORT
if (priv->rss)
- tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent);
+ tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent,
+ tmpl.rd);
else
#endif /* RSS_SUPPORT */
- tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc);
+ tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc, tmpl.rd);
if (tmpl.qp == NULL) {
ret = (errno ? errno : EINVAL);
ERROR("%p: QP creation failure: %s",
{
struct priv *priv = dev->data->dev_private;
unsigned int max;
+ char ifname[IF_NAMESIZE];
priv_lock(priv);
/* FIXME: we should ask the device for these values. */
max = 65535;
info->max_rx_queues = max;
info->max_tx_queues = max;
- info->max_mac_addrs = elemof(priv->mac);
+ /* Last array entry is reserved for broadcast. */
+ info->max_mac_addrs = (elemof(priv->mac) - 1);
info->rx_offload_capa =
(priv->hw_csum ?
(DEV_RX_OFFLOAD_IPV4_CKSUM |
DEV_TX_OFFLOAD_UDP_CKSUM |
DEV_TX_OFFLOAD_TCP_CKSUM) :
0);
+ if (priv_get_ifname(priv, &ifname) == 0)
+ info->if_index = if_nametoindex(ifname);
priv_unlock(priv);
}
priv_lock(priv);
DEBUG("%p: removing MAC address from index %" PRIu32,
(void *)dev, index);
- if (index >= MLX4_MAX_MAC_ADDRESSES)
- goto end;
- /* Refuse to remove the broadcast address, this one is special. */
- if (!memcmp(priv->mac[index].addr_bytes, "\xff\xff\xff\xff\xff\xff",
- ETHER_ADDR_LEN))
+ /* Last array entry is reserved for broadcast. */
+ if (index >= (elemof(priv->mac) - 1))
goto end;
priv_mac_addr_del(priv, index);
end:
priv_lock(priv);
DEBUG("%p: adding MAC address at index %" PRIu32,
(void *)dev, index);
- if (index >= MLX4_MAX_MAC_ADDRESSES)
- goto end;
- /* Refuse to add the broadcast address, this one is special. */
- if (!memcmp(mac_addr->addr_bytes, "\xff\xff\xff\xff\xff\xff",
- ETHER_ADDR_LEN))
+ /* Last array entry is reserved for broadcast. */
+ if (index >= (elemof(priv->mac) - 1))
goto end;
priv_mac_addr_add(priv, index,
(const uint8_t (*)[ETHER_ADDR_LEN])
.mac_addr_remove = mlx4_mac_addr_remove,
.mac_addr_add = mlx4_mac_addr_add,
.mtu_set = mlx4_dev_set_mtu,
- .fdir_add_signature_filter = NULL,
- .fdir_update_signature_filter = NULL,
- .fdir_remove_signature_filter = NULL,
- .fdir_add_perfect_filter = NULL,
- .fdir_update_perfect_filter = NULL,
- .fdir_remove_perfect_filter = NULL,
- .fdir_set_masks = NULL
+ .udp_tunnel_add = NULL,
+ .udp_tunnel_del = NULL,
};
/**
DEBUG("checksum offloading is %ssupported",
(priv->hw_csum ? "" : "not "));
+ priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags &
+ IBV_EXP_DEVICE_VXLAN_SUPPORT);
+ DEBUG("L2 tunnel checksum offloads are %ssupported",
+ (priv->hw_csum_l2tun ? "" : "not "));
+
#ifdef INLINE_RECV
priv->inl_recv_size = mlx4_getenv_int("MLX4_INLINE_RECV_SIZE");
claim_zero(priv_mac_addr_add(priv, 0,
(const uint8_t (*)[ETHER_ADDR_LEN])
mac.addr_bytes));
- claim_zero(priv_mac_addr_add(priv, 1,
+ claim_zero(priv_mac_addr_add(priv, (elemof(priv->mac) - 1),
&(const uint8_t [ETHER_ADDR_LEN])
{ "\xff\xff\xff\xff\xff\xff" }));
#ifndef NDEBUG