X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx4%2Fmlx4_rxq.c;h=50f33eb0c5e3b83449347d19b8b1f86dda307551;hb=099c2c5376131b3d352d70f8904e586c0e84651e;hp=483fe9b0dbd01df04b36c008fbe93bff83090ff1;hpb=078b8b452e6baa735139baa2b89354a991a38948;p=dpdk.git diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c index 483fe9b0db..50f33eb0c5 100644 --- a/drivers/net/mlx4/mlx4_rxq.c +++ b/drivers/net/mlx4/mlx4_rxq.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox Technologies, Ltd */ /** @@ -46,20 +18,23 @@ #ifdef PEDANTIC #pragma GCC diagnostic ignored "-Wpedantic" #endif +#include #include #ifdef PEDANTIC #pragma GCC diagnostic error "-Wpedantic" #endif +#include #include #include -#include +#include #include #include #include #include #include "mlx4.h" +#include "mlx4_glue.h" #include "mlx4_flow.h" #include "mlx4_rxtx.h" #include "mlx4_utils.h" @@ -112,8 +87,8 @@ mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE] = { * Pointer to RSS context on success, NULL otherwise and rte_errno is set. */ struct mlx4_rss * -mlx4_rss_get(struct priv *priv, uint64_t fields, - uint8_t key[MLX4_RSS_HASH_KEY_SIZE], +mlx4_rss_get(struct mlx4_priv *priv, uint64_t fields, + const uint8_t key[MLX4_RSS_HASH_KEY_SIZE], uint16_t queues, const uint16_t queue_id[]) { struct mlx4_rss *rss; @@ -162,7 +137,8 @@ error: * @param rss * RSS context to release. */ -void mlx4_rss_put(struct mlx4_rss *rss) +void +mlx4_rss_put(struct mlx4_rss *rss) { assert(rss->refcnt); if (--rss->refcnt) @@ -184,8 +160,12 @@ void mlx4_rss_put(struct mlx4_rss *rss) * * @param rss * RSS context to attach to. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ -int mlx4_rss_attach(struct mlx4_rss *rss) +int +mlx4_rss_attach(struct mlx4_rss *rss) { assert(rss->refcnt); if (rss->usecnt++) { @@ -195,12 +175,14 @@ int mlx4_rss_attach(struct mlx4_rss *rss) } struct ibv_wq *ind_tbl[rss->queues]; - struct priv *priv = rss->priv; + struct mlx4_priv *priv = rss->priv; + struct rte_eth_dev *dev = ETH_DEV(priv); const char *msg; - unsigned int i; + unsigned int i = 0; int ret; if (!rte_is_power_of_2(RTE_DIM(ind_tbl))) { + ret = EINVAL; msg = "number of RSS queues must be a power of two"; goto error; } @@ -208,15 +190,22 @@ int mlx4_rss_attach(struct mlx4_rss *rss) uint16_t id = rss->queue_id[i]; struct rxq *rxq = NULL; - if (id < priv->dev->data->nb_rx_queues) - rxq = priv->dev->data->rx_queues[id]; + if (id < dev->data->nb_rx_queues) + rxq = dev->data->rx_queues[id]; if (!rxq) { + ret = EINVAL; msg = "RSS target queue is not configured"; goto error; } + ret = mlx4_rxq_attach(rxq); + if (ret) { + ret = -ret; + msg = "unable to attach RSS target queue"; + goto error; + } ind_tbl[i] = rxq->wq; } - rss->ind = ibv_create_rwq_ind_table + rss->ind = mlx4_glue->create_rwq_ind_table (priv->ctx, &(struct ibv_rwq_ind_table_init_attr){ .log_ind_tbl_size = rte_log2_u32(RTE_DIM(ind_tbl)), @@ -224,10 +213,11 @@ int mlx4_rss_attach(struct mlx4_rss *rss) .comp_mask = 0, }); if (!rss->ind) { + ret = errno ? errno : EINVAL; msg = "RSS indirection table creation failure"; goto error; } - rss->qp = ibv_create_qp_ex + rss->qp = mlx4_glue->create_qp_ex (priv->ctx, &(struct ibv_qp_init_attr_ex){ .comp_mask = (IBV_QP_INIT_ATTR_PD | @@ -244,10 +234,11 @@ int mlx4_rss_attach(struct mlx4_rss *rss) }, }); if (!rss->qp) { + ret = errno ? errno : EINVAL; msg = "RSS hash QP creation failure"; goto error; } - ret = ibv_modify_qp + ret = mlx4_glue->modify_qp (rss->qp, &(struct ibv_qp_attr){ .qp_state = IBV_QPS_INIT, @@ -258,7 +249,7 @@ int mlx4_rss_attach(struct mlx4_rss *rss) msg = "failed to switch RSS hash QP to INIT state"; goto error; } - ret = ibv_modify_qp + ret = mlx4_glue->modify_qp (rss->qp, &(struct ibv_qp_attr){ .qp_state = IBV_QPS_RTR, @@ -270,10 +261,20 @@ int mlx4_rss_attach(struct mlx4_rss *rss) } return 0; error: + if (rss->qp) { + claim_zero(mlx4_glue->destroy_qp(rss->qp)); + rss->qp = NULL; + } + if (rss->ind) { + claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind)); + rss->ind = NULL; + } + while (i--) + mlx4_rxq_detach(dev->data->rx_queues[rss->queue_id[i]]); ERROR("mlx4: %s", msg); --rss->usecnt; - rte_errno = EINVAL; - return -rte_errno; + rte_errno = ret; + return -ret; } /** @@ -287,21 +288,195 @@ error: * @param rss * RSS context to detach from. */ -void mlx4_rss_detach(struct mlx4_rss *rss) +void +mlx4_rss_detach(struct mlx4_rss *rss) { + struct mlx4_priv *priv = rss->priv; + struct rte_eth_dev *dev = ETH_DEV(priv); + unsigned int i; + assert(rss->refcnt); assert(rss->qp); assert(rss->ind); if (--rss->usecnt) return; - claim_zero(ibv_destroy_qp(rss->qp)); + claim_zero(mlx4_glue->destroy_qp(rss->qp)); rss->qp = NULL; - claim_zero(ibv_destroy_rwq_ind_table(rss->ind)); + claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind)); rss->ind = NULL; + for (i = 0; i != rss->queues; ++i) + mlx4_rxq_detach(dev->data->rx_queues[rss->queue_id[i]]); +} + +/** + * Initialize common RSS context resources. + * + * Because ConnectX-3 hardware limitations require a fixed order in the + * indirection table, WQs must be allocated sequentially to be part of a + * common RSS context. + * + * Since a newly created WQ cannot be moved to a different context, this + * function allocates them all at once, one for each configured Rx queue, + * as well as all related resources (CQs and mbufs). + * + * This must therefore be done before creating any Rx flow rules relying on + * indirection tables. + * + * @param priv + * Pointer to private structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx4_rss_init(struct mlx4_priv *priv) +{ + struct rte_eth_dev *dev = ETH_DEV(priv); + uint8_t log2_range = rte_log2_u32(dev->data->nb_rx_queues); + uint32_t wq_num_prev = 0; + const char *msg; + unsigned int i; + int ret; + + if (priv->rss_init) + return 0; + if (ETH_DEV(priv)->data->nb_rx_queues > priv->hw_rss_max_qps) { + ERROR("RSS does not support more than %d queues", + priv->hw_rss_max_qps); + rte_errno = EINVAL; + return -rte_errno; + } + /* Prepare range for RSS contexts before creating the first WQ. */ + ret = mlx4_glue->dv_set_context_attr + (priv->ctx, + MLX4DV_SET_CTX_ATTR_LOG_WQS_RANGE_SZ, + &log2_range); + if (ret) { + ERROR("cannot set up range size for RSS context to %u" + " (for %u Rx queues), error: %s", + 1 << log2_range, dev->data->nb_rx_queues, strerror(ret)); + rte_errno = ret; + return -ret; + } + for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) { + struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i]; + struct ibv_cq *cq; + struct ibv_wq *wq; + uint32_t wq_num; + + /* Attach the configured Rx queues. */ + if (rxq) { + assert(!rxq->usecnt); + ret = mlx4_rxq_attach(rxq); + if (!ret) { + wq_num = rxq->wq->wq_num; + goto wq_num_check; + } + ret = -ret; + msg = "unable to create Rx queue resources"; + goto error; + } + /* + * WQs are temporarily allocated for unconfigured Rx queues + * to maintain proper index alignment in indirection table + * by skipping unused WQ numbers. + * + * The reason this works at all even though these WQs are + * immediately destroyed is that WQNs are allocated + * sequentially and are guaranteed to never be reused in the + * same context by the underlying implementation. + */ + cq = mlx4_glue->create_cq(priv->ctx, 1, NULL, NULL, 0); + if (!cq) { + ret = ENOMEM; + msg = "placeholder CQ creation failure"; + goto error; + } + wq = mlx4_glue->create_wq + (priv->ctx, + &(struct ibv_wq_init_attr){ + .wq_type = IBV_WQT_RQ, + .max_wr = 1, + .max_sge = 1, + .pd = priv->pd, + .cq = cq, + }); + if (wq) { + wq_num = wq->wq_num; + claim_zero(mlx4_glue->destroy_wq(wq)); + } else { + wq_num = 0; /* Shut up GCC 4.8 warnings. */ + } + claim_zero(mlx4_glue->destroy_cq(cq)); + if (!wq) { + ret = ENOMEM; + msg = "placeholder WQ creation failure"; + goto error; + } +wq_num_check: + /* + * While guaranteed by the implementation, make sure WQ + * numbers are really sequential (as the saying goes, + * trust, but verify). + */ + if (i && wq_num - wq_num_prev != 1) { + if (rxq) + mlx4_rxq_detach(rxq); + ret = ERANGE; + msg = "WQ numbers are not sequential"; + goto error; + } + wq_num_prev = wq_num; + } + priv->rss_init = 1; + return 0; +error: + ERROR("cannot initialize common RSS resources (queue %u): %s: %s", + i, msg, strerror(ret)); + while (i--) { + struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i]; + + if (rxq) + mlx4_rxq_detach(rxq); + } + rte_errno = ret; + return -ret; } /** - * Allocate Rx queue elements. + * Release common RSS context resources. + * + * As the reverse of mlx4_rss_init(), this must be done after removing all + * flow rules relying on indirection tables. + * + * @param priv + * Pointer to private structure. + */ +void +mlx4_rss_deinit(struct mlx4_priv *priv) +{ + unsigned int i; + + if (!priv->rss_init) + return; + for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) { + struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i]; + + if (rxq) { + assert(rxq->usecnt == 1); + mlx4_rxq_detach(rxq); + } + } + priv->rss_init = 0; +} + +/** + * Attach a user to a Rx queue. + * + * Used when the resources of an Rx queue must be instantiated for it to + * become in a usable state. + * + * This function increments the usage count of the Rx queue. * * @param rxq * Pointer to Rx queue structure. @@ -309,72 +484,228 @@ void mlx4_rss_detach(struct mlx4_rss *rss) * @return * 0 on success, negative errno value otherwise and rte_errno is set. */ -static int -mlx4_rxq_alloc_elts(struct rxq *rxq) +int +mlx4_rxq_attach(struct rxq *rxq) { - struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts; + if (rxq->usecnt++) { + assert(rxq->cq); + assert(rxq->wq); + assert(rxq->wqes); + assert(rxq->rq_db); + return 0; + } + + struct mlx4_priv *priv = rxq->priv; + struct rte_eth_dev *dev = ETH_DEV(priv); + const uint32_t elts_n = 1 << rxq->elts_n; + const uint32_t sges_n = 1 << rxq->sges_n; + struct rte_mbuf *(*elts)[elts_n] = rxq->elts; + struct mlx4dv_obj mlxdv; + struct mlx4dv_rwq dv_rwq; + struct mlx4dv_cq dv_cq = { .comp_mask = MLX4DV_CQ_MASK_UAR, }; + const char *msg; + struct ibv_cq *cq = NULL; + struct ibv_wq *wq = NULL; + uint32_t create_flags = 0; + uint32_t comp_mask = 0; + volatile struct mlx4_wqe_data_seg (*wqes)[]; unsigned int i; + int ret; - /* For each WR (packet). */ + assert(rte_is_power_of_2(elts_n)); + cq = mlx4_glue->create_cq(priv->ctx, elts_n / sges_n, NULL, + rxq->channel, 0); + if (!cq) { + ret = ENOMEM; + msg = "CQ creation failure"; + goto error; + } + /* By default, FCS (CRC) is stripped by hardware. */ + if (rxq->crc_present) { + create_flags |= IBV_WQ_FLAGS_SCATTER_FCS; + comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; + } + wq = mlx4_glue->create_wq + (priv->ctx, + &(struct ibv_wq_init_attr){ + .wq_type = IBV_WQT_RQ, + .max_wr = elts_n / sges_n, + .max_sge = sges_n, + .pd = priv->pd, + .cq = cq, + .comp_mask = comp_mask, + .create_flags = create_flags, + }); + if (!wq) { + ret = errno ? errno : EINVAL; + msg = "WQ creation failure"; + goto error; + } + ret = mlx4_glue->modify_wq + (wq, + &(struct ibv_wq_attr){ + .attr_mask = IBV_WQ_ATTR_STATE, + .wq_state = IBV_WQS_RDY, + }); + if (ret) { + msg = "WQ state change to IBV_WQS_RDY failed"; + goto error; + } + /* Retrieve device queue information. */ + mlxdv.cq.in = cq; + mlxdv.cq.out = &dv_cq; + mlxdv.rwq.in = wq; + mlxdv.rwq.out = &dv_rwq; + ret = mlx4_glue->dv_init_obj(&mlxdv, MLX4DV_OBJ_RWQ | MLX4DV_OBJ_CQ); + if (ret) { + msg = "failed to obtain device information from WQ/CQ objects"; + goto error; + } + /* Pre-register Rx mempool. */ + DEBUG("port %u Rx queue %u registering mp %s having %u chunks", + ETH_DEV(priv)->data->port_id, rxq->stats.idx, + rxq->mp->name, rxq->mp->nb_mem_chunks); + mlx4_mr_update_mp(dev, &rxq->mr_ctrl, rxq->mp); + wqes = (volatile struct mlx4_wqe_data_seg (*)[]) + ((uintptr_t)dv_rwq.buf.buf + dv_rwq.rq.offset); for (i = 0; i != RTE_DIM(*elts); ++i) { - struct rxq_elt *elt = &(*elts)[i]; - struct ibv_recv_wr *wr = &elt->wr; - struct ibv_sge *sge = &(*elts)[i].sge; + volatile struct mlx4_wqe_data_seg *scat = &(*wqes)[i]; struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp); if (buf == NULL) { while (i--) { - rte_pktmbuf_free_seg((*elts)[i].buf); - (*elts)[i].buf = NULL; + rte_pktmbuf_free_seg((*elts)[i]); + (*elts)[i] = NULL; } - rte_errno = ENOMEM; - return -rte_errno; + ret = ENOMEM; + msg = "cannot allocate mbuf"; + goto error; } - elt->buf = buf; - wr->next = &(*elts)[(i + 1)].wr; - wr->sg_list = sge; - wr->num_sge = 1; /* Headroom is reserved by rte_pktmbuf_alloc(). */ assert(buf->data_off == RTE_PKTMBUF_HEADROOM); /* Buffer is supposed to be empty. */ assert(rte_pktmbuf_data_len(buf) == 0); assert(rte_pktmbuf_pkt_len(buf) == 0); - /* sge->addr must be able to store a pointer. */ - assert(sizeof(sge->addr) >= sizeof(uintptr_t)); - /* SGE keeps its headroom. */ - sge->addr = (uintptr_t) - ((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM); - sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM); - sge->lkey = rxq->mr->lkey; - /* Redundant check for tailroom. */ - assert(sge->length == rte_pktmbuf_tailroom(buf)); - } - /* The last WR pointer must be NULL. */ - (*elts)[(i - 1)].wr.next = NULL; + /* Only the first segment keeps headroom. */ + if (i % sges_n) + buf->data_off = 0; + buf->port = rxq->port_id; + buf->data_len = rte_pktmbuf_tailroom(buf); + buf->pkt_len = rte_pktmbuf_tailroom(buf); + buf->nb_segs = 1; + *scat = (struct mlx4_wqe_data_seg){ + .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, + uintptr_t)), + .byte_count = rte_cpu_to_be_32(buf->data_len), + .lkey = mlx4_rx_mb2mr(rxq, buf), + }; + (*elts)[i] = buf; + } + DEBUG("%p: allocated and configured %u segments (max %u packets)", + (void *)rxq, elts_n, elts_n / sges_n); + rxq->cq = cq; + rxq->wq = wq; + rxq->wqes = wqes; + rxq->rq_db = dv_rwq.rdb; + rxq->mcq.buf = dv_cq.buf.buf; + rxq->mcq.cqe_cnt = dv_cq.cqe_cnt; + rxq->mcq.set_ci_db = dv_cq.set_ci_db; + rxq->mcq.cqe_64 = (dv_cq.cqe_size & 64) ? 1 : 0; + rxq->mcq.arm_db = dv_cq.arm_db; + rxq->mcq.arm_sn = dv_cq.arm_sn; + rxq->mcq.cqn = dv_cq.cqn; + rxq->mcq.cq_uar = dv_cq.cq_uar; + rxq->mcq.cq_db_reg = (uint8_t *)dv_cq.cq_uar + MLX4_CQ_DOORBELL; + /* Update doorbell counter. */ + rxq->rq_ci = elts_n / sges_n; + rte_wmb(); + *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); return 0; +error: + if (wq) + claim_zero(mlx4_glue->destroy_wq(wq)); + if (cq) + claim_zero(mlx4_glue->destroy_cq(cq)); + --rxq->usecnt; + rte_errno = ret; + ERROR("error while attaching Rx queue %p: %s: %s", + (void *)rxq, msg, strerror(ret)); + return -ret; } /** - * Free Rx queue elements. + * Detach a user from a Rx queue. + * + * This function decrements the usage count of the Rx queue and destroys + * usage resources after reaching 0. * * @param rxq * Pointer to Rx queue structure. */ -static void -mlx4_rxq_free_elts(struct rxq *rxq) +void +mlx4_rxq_detach(struct rxq *rxq) { unsigned int i; - struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts; + struct rte_mbuf *(*elts)[1 << rxq->elts_n] = rxq->elts; - DEBUG("%p: freeing WRs", (void *)rxq); + if (--rxq->usecnt) + return; + rxq->rq_ci = 0; + memset(&rxq->mcq, 0, sizeof(rxq->mcq)); + rxq->rq_db = NULL; + rxq->wqes = NULL; + claim_zero(mlx4_glue->destroy_wq(rxq->wq)); + rxq->wq = NULL; + claim_zero(mlx4_glue->destroy_cq(rxq->cq)); + rxq->cq = NULL; + DEBUG("%p: freeing Rx queue elements", (void *)rxq); for (i = 0; (i != RTE_DIM(*elts)); ++i) { - if (!(*elts)[i].buf) + if (!(*elts)[i]) continue; - rte_pktmbuf_free_seg((*elts)[i].buf); - (*elts)[i].buf = NULL; + rte_pktmbuf_free_seg((*elts)[i]); + (*elts)[i] = NULL; } } +/** + * Returns the per-queue supported offloads. + * + * @param priv + * Pointer to private structure. + * + * @return + * Supported Tx offloads. + */ +uint64_t +mlx4_get_rx_queue_offloads(struct mlx4_priv *priv) +{ + uint64_t offloads = DEV_RX_OFFLOAD_SCATTER | + DEV_RX_OFFLOAD_KEEP_CRC | + DEV_RX_OFFLOAD_JUMBO_FRAME; + + if (priv->hw_csum) + offloads |= DEV_RX_OFFLOAD_CHECKSUM; + return offloads; +} + +/** + * Returns the per-port supported offloads. + * + * @param priv + * Pointer to private structure. + * + * @return + * Supported Rx offloads. + */ +uint64_t +mlx4_get_rx_port_offloads(struct mlx4_priv *priv) +{ + uint64_t offloads = DEV_RX_OFFLOAD_VLAN_FILTER; + + (void)priv; + return offloads; +} + /** * DPDK callback to configure a Rx queue. * @@ -399,10 +730,9 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, unsigned int socket, const struct rte_eth_rxconf *conf, struct rte_mempool *mp) { - struct priv *priv = dev->data->dev_private; + struct mlx4_priv *priv = dev->data->dev_private; uint32_t mb_len = rte_pktmbuf_data_room_size(mp); - struct rxq_elt (*elts)[desc]; - struct rte_flow_error error; + struct rte_mbuf *(*elts)[rte_align32pow2(desc)]; struct rxq *rxq; struct mlx4_malloc_vec vec[] = { { @@ -417,10 +747,14 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, }, }; int ret; + uint32_t crc_present; + uint64_t offloads; + + offloads = conf->offloads | dev->data->dev_conf.rxmode.offloads; - (void)conf; /* Thresholds configuration (ignored). */ DEBUG("%p: configuring queue %u for %u descriptors", (void *)dev, idx, desc); + if (idx >= dev->data->nb_rx_queues) { rte_errno = EOVERFLOW; ERROR("%p: queue index out of range (%u >= %u)", @@ -439,6 +773,29 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, ERROR("%p: invalid number of Rx descriptors", (void *)dev); return -rte_errno; } + if (desc != RTE_DIM(*elts)) { + desc = RTE_DIM(*elts); + WARN("%p: increased number of descriptors in Rx queue %u" + " to the next power of two (%u)", + (void *)dev, idx, desc); + } + /* By default, FCS (CRC) is stripped by hardware. */ + crc_present = 0; + if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) { + if (priv->hw_fcs_strip) { + crc_present = 1; + } else { + WARN("%p: CRC stripping has been disabled but will still" + " be performed by hardware, make sure MLNX_OFED and" + " firmware are up to date", + (void *)dev); + } + } + DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from" + " incoming frames to hide it", + (void *)dev, + crc_present ? "disabled" : "enabled", + crc_present << 2); /* Allocate and initialize Rx queue. */ mlx4_zmallocv_socket("RXQ", vec, RTE_DIM(vec), socket); if (!rxq) { @@ -450,10 +807,19 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, .priv = priv, .mp = mp, .port_id = dev->data->port_id, - .elts_n = desc, - .elts_head = 0, + .sges_n = 0, + .elts_n = rte_log2_u32(desc), .elts = elts, - .stats.idx = idx, + /* Toggle Rx checksum offload if hardware supports it. */ + .csum = priv->hw_csum && + (offloads & DEV_RX_OFFLOAD_CHECKSUM), + .csum_l2tun = priv->hw_csum_l2tun && + (offloads & DEV_RX_OFFLOAD_CHECKSUM), + .crc_present = crc_present, + .l2tun_offload = priv->hw_csum_l2tun, + .stats = { + .idx = idx, + }, .socket = socket, }; /* Enable scattered packets support for this queue if necessary. */ @@ -461,10 +827,30 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, if (dev->data->dev_conf.rxmode.max_rx_pkt_len <= (mb_len - RTE_PKTMBUF_HEADROOM)) { ; - } else if (dev->data->dev_conf.rxmode.enable_scatter) { - WARN("%p: scattered mode has been requested but is" - " not supported, this may lead to packet loss", - (void *)dev); + } else if (offloads & DEV_RX_OFFLOAD_SCATTER) { + uint32_t size = + RTE_PKTMBUF_HEADROOM + + dev->data->dev_conf.rxmode.max_rx_pkt_len; + uint32_t sges_n; + + /* + * Determine the number of SGEs needed for a full packet + * and round it to the next power of two. + */ + sges_n = rte_log2_u32((size / mb_len) + !!(size % mb_len)); + rxq->sges_n = sges_n; + /* Make sure sges_n did not overflow. */ + size = mb_len * (1 << rxq->sges_n); + size -= RTE_PKTMBUF_HEADROOM; + if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) { + rte_errno = EOVERFLOW; + ERROR("%p: too many SGEs (%u) needed to handle" + " requested maximum packet size %u", + (void *)dev, + 1 << sges_n, + dev->data->dev_conf.rxmode.max_rx_pkt_len); + goto error; + } } else { WARN("%p: the requested maximum Rx packet size (%u) is" " larger than a single mbuf (%u) and scattered" @@ -473,16 +859,24 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, dev->data->dev_conf.rxmode.max_rx_pkt_len, mb_len - RTE_PKTMBUF_HEADROOM); } - /* Use the entire Rx mempool as the memory region. */ - rxq->mr = mlx4_mp2mr(priv->pd, mp); - if (!rxq->mr) { + DEBUG("%p: maximum number of segments per packet: %u", + (void *)dev, 1 << rxq->sges_n); + if (desc % (1 << rxq->sges_n)) { rte_errno = EINVAL; - ERROR("%p: MR creation failure: %s", - (void *)dev, strerror(rte_errno)); + ERROR("%p: number of Rx queue descriptors (%u) is not a" + " multiple of maximum segments per packet (%u)", + (void *)dev, + desc, + 1 << rxq->sges_n); + goto error; + } + if (mlx4_mr_btree_init(&rxq->mr_ctrl.cache_bh, + MLX4_MR_BTREE_CACHE_N, socket)) { + /* rte_errno is already set. */ goto error; } if (dev->data->dev_conf.intr_conf.rxq) { - rxq->channel = ibv_create_comp_channel(priv->ctx); + rxq->channel = mlx4_glue->create_comp_channel(priv->ctx); if (rxq->channel == NULL) { rte_errno = ENOMEM; ERROR("%p: Rx interrupt completion channel creation" @@ -497,65 +891,9 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, goto error; } } - rxq->cq = ibv_create_cq(priv->ctx, desc, NULL, rxq->channel, 0); - if (!rxq->cq) { - rte_errno = ENOMEM; - ERROR("%p: CQ creation failure: %s", - (void *)dev, strerror(rte_errno)); - goto error; - } - rxq->wq = ibv_create_wq - (priv->ctx, - &(struct ibv_wq_init_attr){ - .wq_type = IBV_WQT_RQ, - .max_wr = RTE_MIN(priv->device_attr.max_qp_wr, desc), - .max_sge = 1, - .pd = priv->pd, - .cq = rxq->cq, - }); - if (!rxq->wq) { - rte_errno = errno ? errno : EINVAL; - ERROR("%p: WQ creation failure: %s", - (void *)dev, strerror(rte_errno)); - goto error; - } - ret = ibv_modify_wq - (rxq->wq, - &(struct ibv_wq_attr){ - .attr_mask = IBV_WQ_ATTR_STATE, - .wq_state = IBV_WQS_RDY, - }); - if (ret) { - rte_errno = ret; - ERROR("%p: WQ state to IBV_WPS_RDY failed: %s", - (void *)dev, strerror(rte_errno)); - goto error; - } - ret = mlx4_rxq_alloc_elts(rxq); - if (ret) { - ERROR("%p: RXQ allocation failed: %s", - (void *)dev, strerror(rte_errno)); - goto error; - } - ret = ibv_post_wq_recv(rxq->wq, &(*rxq->elts)[0].wr, - &(struct ibv_recv_wr *){ NULL }); - if (ret) { - rte_errno = ret; - ERROR("%p: ibv_post_recv() failed: %s", - (void *)dev, - strerror(rte_errno)); - goto error; - } DEBUG("%p: adding Rx queue %p to list", (void *)dev, (void *)rxq); dev->data->rx_queues[idx] = rxq; - /* Enable associated flows. */ - ret = mlx4_flow_sync(priv, &error); - if (!ret) - return 0; - ERROR("cannot re-attach flow rules to queue %u" - " (code %d, \"%s\"), flow error type %d, cause %p, message: %s", - idx, -ret, strerror(-ret), error.type, error.cause, - error.message ? error.message : "(unspecified)"); + return 0; error: dev->data->rx_queues[idx] = NULL; ret = rte_errno; @@ -575,28 +913,25 @@ void mlx4_rx_queue_release(void *dpdk_rxq) { struct rxq *rxq = (struct rxq *)dpdk_rxq; - struct priv *priv; + struct mlx4_priv *priv; unsigned int i; if (rxq == NULL) return; priv = rxq->priv; - for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) - if (priv->dev->data->rx_queues[i] == rxq) { + for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) + if (ETH_DEV(priv)->data->rx_queues[i] == rxq) { DEBUG("%p: removing Rx queue %p from list", - (void *)priv->dev, (void *)rxq); - priv->dev->data->rx_queues[i] = NULL; + (void *)ETH_DEV(priv), (void *)rxq); + ETH_DEV(priv)->data->rx_queues[i] = NULL; break; } - mlx4_flow_sync(priv, NULL); - mlx4_rxq_free_elts(rxq); - if (rxq->wq) - claim_zero(ibv_destroy_wq(rxq->wq)); - if (rxq->cq) - claim_zero(ibv_destroy_cq(rxq->cq)); + assert(!rxq->cq); + assert(!rxq->wq); + assert(!rxq->wqes); + assert(!rxq->rq_db); if (rxq->channel) - claim_zero(ibv_destroy_comp_channel(rxq->channel)); - if (rxq->mr) - claim_zero(ibv_dereg_mr(rxq->mr)); + claim_zero(mlx4_glue->destroy_comp_channel(rxq->channel)); + mlx4_mr_btree_free(&rxq->mr_ctrl.cache_bh); rte_free(rxq); }