From: Adrien Mazarguil Date: Fri, 30 Oct 2015 18:52:35 +0000 (+0100) Subject: mlx5: support MTU configuration X-Git-Tag: spdx-start~8266 X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=cf37ca9563d59a2fb28ae1c3ab728a7fb2606f68;p=dpdk.git mlx5: support MTU configuration Depending on the MTU and whether jumbo frames are enabled, RX queues may switch between SG and non-SG modes for better performance. Signed-off-by: Adrien Mazarguil Signed-off-by: Nelio Laranjeiro --- diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index aafa70b29f..ddd74d0fb0 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -140,6 +140,7 @@ static const struct eth_dev_ops mlx5_dev_ops = { .tx_queue_release = mlx5_tx_queue_release, .mac_addr_remove = mlx5_mac_addr_remove, .mac_addr_add = mlx5_mac_addr_add, + .mtu_set = mlx5_dev_set_mtu, }; static struct { diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 3f47a159cf..0e2457acec 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -162,6 +162,7 @@ int priv_get_mtu(struct priv *, uint16_t *); int priv_set_flags(struct priv *, unsigned int, unsigned int); int mlx5_dev_configure(struct rte_eth_dev *); void mlx5_dev_infos_get(struct rte_eth_dev *, struct rte_eth_dev_info *); +int mlx5_dev_set_mtu(struct rte_eth_dev *, uint16_t); int mlx5_ibv_device_to_pci_addr(const struct ibv_device *, struct rte_pci_addr *); diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index 6b13cecdbd..0afc1bbc24 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -346,6 +346,23 @@ priv_get_mtu(struct priv *priv, uint16_t *mtu) return 0; } +/** + * Set device MTU. + * + * @param priv + * Pointer to private structure. + * @param mtu + * MTU value to set. + * + * @return + * 0 on success, -1 on failure and errno is set. + */ +static int +priv_set_mtu(struct priv *priv, uint16_t mtu) +{ + return priv_set_sysfs_ulong(priv, "mtu", mtu); +} + /** * Set device flags. * @@ -517,6 +534,91 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) priv_unlock(priv); } +/** + * DPDK callback to change the MTU. + * + * Setting the MTU affects hardware MRU (packets larger than the MTU cannot be + * received). Use this as a hint to enable/disable scattered packets support + * and improve performance when not needed. + * Since failure is not an option, reconfiguring queues on the fly is not + * recommended. + * + * @param dev + * Pointer to Ethernet device structure. + * @param in_mtu + * New MTU. + * + * @return + * 0 on success, negative errno value on failure. + */ +int +mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) +{ + struct priv *priv = dev->data->dev_private; + int ret = 0; + unsigned int i; + uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) = + mlx5_rx_burst; + + priv_lock(priv); + /* Set kernel interface MTU first. */ + if (priv_set_mtu(priv, mtu)) { + ret = errno; + WARN("cannot set port %u MTU to %u: %s", priv->port, mtu, + strerror(ret)); + goto out; + } else + DEBUG("adapter port %u MTU set to %u", priv->port, mtu); + priv->mtu = mtu; + /* Temporarily replace RX handler with a fake one, assuming it has not + * been copied elsewhere. */ + dev->rx_pkt_burst = removed_rx_burst; + /* Make sure everyone has left mlx5_rx_burst() and uses + * removed_rx_burst() instead. */ + rte_wmb(); + usleep(1000); + /* Reconfigure each RX queue. */ + for (i = 0; (i != priv->rxqs_n); ++i) { + struct rxq *rxq = (*priv->rxqs)[i]; + unsigned int max_frame_len; + int sp; + + if (rxq == NULL) + continue; + /* Calculate new maximum frame length according to MTU and + * toggle scattered support (sp) if necessary. */ + max_frame_len = (priv->mtu + ETHER_HDR_LEN + + (ETHER_MAX_VLAN_FRAME_LEN - ETHER_MAX_LEN)); + sp = (max_frame_len > (rxq->mb_len - RTE_PKTMBUF_HEADROOM)); + /* Provide new values to rxq_setup(). */ + dev->data->dev_conf.rxmode.jumbo_frame = sp; + dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame_len; + ret = rxq_rehash(dev, rxq); + if (ret) { + /* Force SP RX if that queue requires it and abort. */ + if (rxq->sp) + rx_func = mlx5_rx_burst_sp; + break; + } + /* Reenable non-RSS queue attributes. No need to check + * for errors at this stage. */ + if (!priv->rss) { + if (priv->started) + rxq_mac_addrs_add(rxq); + } + /* Scattered burst function takes priority. */ + if (rxq->sp) + rx_func = mlx5_rx_burst_sp; + } + /* Burst functions can now be called again. */ + rte_wmb(); + dev->rx_pkt_burst = rx_func; +out: + priv_unlock(priv); + assert(ret >= 0); + return -ret; +} + /** * Get PCI information from struct ibv_device. * diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index 1eddfc7769..71d44708f3 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -525,6 +525,184 @@ rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc, #endif /* RSS_SUPPORT */ +/** + * Reconfigure a RX queue with new parameters. + * + * rxq_rehash() does not allocate mbufs, which, if not done from the right + * thread (such as a control thread), may corrupt the pool. + * In case of failure, the queue is left untouched. + * + * @param dev + * Pointer to Ethernet device structure. + * @param rxq + * RX queue pointer. + * + * @return + * 0 on success, errno value on failure. + */ +int +rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq) +{ + struct priv *priv = rxq->priv; + struct rxq tmpl = *rxq; + unsigned int mbuf_n; + unsigned int desc_n; + struct rte_mbuf **pool; + unsigned int i, k; + struct ibv_exp_qp_attr mod; + struct ibv_recv_wr *bad_wr; + int err; + int parent = (rxq == &priv->rxq_parent); + + if (parent) { + ERROR("%p: cannot rehash parent queue %p", + (void *)dev, (void *)rxq); + return EINVAL; + } + DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq); + /* Number of descriptors and mbufs currently allocated. */ + desc_n = (tmpl.elts_n * (tmpl.sp ? MLX5_PMD_SGE_WR_N : 1)); + mbuf_n = desc_n; + /* Enable scattered packets support for this queue if necessary. */ + if ((dev->data->dev_conf.rxmode.jumbo_frame) && + (dev->data->dev_conf.rxmode.max_rx_pkt_len > + (tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) { + tmpl.sp = 1; + desc_n /= MLX5_PMD_SGE_WR_N; + } else + tmpl.sp = 0; + DEBUG("%p: %s scattered packets support (%u WRs)", + (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc_n); + /* If scatter mode is the same as before, nothing to do. */ + if (tmpl.sp == rxq->sp) { + DEBUG("%p: nothing to do", (void *)dev); + return 0; + } + /* Remove attached flows if RSS is disabled (no parent queue). */ + if (!priv->rss) { + rxq_mac_addrs_del(&tmpl); + /* Update original queue in case of failure. */ + memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow)); + } + /* From now on, any failure will render the queue unusable. + * Reinitialize QP. */ + mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET }; + err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE); + if (err) { + ERROR("%p: cannot reset QP: %s", (void *)dev, strerror(err)); + assert(err > 0); + return err; + } + err = ibv_resize_cq(tmpl.cq, desc_n); + if (err) { + ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err)); + assert(err > 0); + return err; + } + mod = (struct ibv_exp_qp_attr){ + /* Move the QP to this state. */ + .qp_state = IBV_QPS_INIT, + /* Primary port number. */ + .port_num = priv->port + }; + err = ibv_exp_modify_qp(tmpl.qp, &mod, + (IBV_EXP_QP_STATE | +#ifdef RSS_SUPPORT + (parent ? IBV_EXP_QP_GROUP_RSS : 0) | +#endif /* RSS_SUPPORT */ + IBV_EXP_QP_PORT)); + if (err) { + ERROR("%p: QP state to IBV_QPS_INIT failed: %s", + (void *)dev, strerror(err)); + assert(err > 0); + return err; + }; + /* Reconfigure flows. Do not care for errors. */ + if (!priv->rss) { + if (priv->started) + rxq_mac_addrs_add(&tmpl); + /* Update original queue in case of failure. */ + memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow)); + } + /* Allocate pool. */ + pool = rte_malloc(__func__, (mbuf_n * sizeof(*pool)), 0); + if (pool == NULL) { + ERROR("%p: cannot allocate memory", (void *)dev); + return ENOBUFS; + } + /* Snatch mbufs from original queue. */ + k = 0; + if (rxq->sp) { + struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp; + + for (i = 0; (i != RTE_DIM(*elts)); ++i) { + struct rxq_elt_sp *elt = &(*elts)[i]; + unsigned int j; + + for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) { + assert(elt->bufs[j] != NULL); + pool[k++] = elt->bufs[j]; + } + } + } else { + struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp; + + for (i = 0; (i != RTE_DIM(*elts)); ++i) { + struct rxq_elt *elt = &(*elts)[i]; + struct rte_mbuf *buf = (void *) + ((uintptr_t)elt->sge.addr - + WR_ID(elt->wr.wr_id).offset); + + assert(WR_ID(elt->wr.wr_id).id == i); + pool[k++] = buf; + } + } + assert(k == mbuf_n); + tmpl.elts_n = 0; + tmpl.elts.sp = NULL; + assert((void *)&tmpl.elts.sp == (void *)&tmpl.elts.no_sp); + err = ((tmpl.sp) ? + rxq_alloc_elts_sp(&tmpl, desc_n, pool) : + rxq_alloc_elts(&tmpl, desc_n, pool)); + if (err) { + ERROR("%p: cannot reallocate WRs, aborting", (void *)dev); + rte_free(pool); + assert(err > 0); + return err; + } + assert(tmpl.elts_n == desc_n); + assert(tmpl.elts.sp != NULL); + rte_free(pool); + /* Clean up original data. */ + rxq->elts_n = 0; + rte_free(rxq->elts.sp); + rxq->elts.sp = NULL; + /* Post WRs. */ + err = ibv_post_recv(tmpl.qp, + (tmpl.sp ? + &(*tmpl.elts.sp)[0].wr : + &(*tmpl.elts.no_sp)[0].wr), + &bad_wr); + if (err) { + ERROR("%p: ibv_post_recv() failed for WR %p: %s", + (void *)dev, + (void *)bad_wr, + strerror(err)); + goto skip_rtr; + } + mod = (struct ibv_exp_qp_attr){ + .qp_state = IBV_QPS_RTR + }; + err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE); + if (err) + ERROR("%p: QP state to IBV_QPS_RTR failed: %s", + (void *)dev, strerror(err)); +skip_rtr: + *rxq = tmpl; + assert(err >= 0); + return err; +} + /** * Configure a RX queue. * diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index c7f634e323..b6f21281e2 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -144,6 +144,7 @@ struct txq { /* mlx5_rxq.c */ void rxq_cleanup(struct rxq *); +int rxq_rehash(struct rte_eth_dev *, struct rxq *); int rxq_setup(struct rte_eth_dev *, struct rxq *, uint16_t, unsigned int, const struct rte_eth_rxconf *, struct rte_mempool *); int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,