- if (elts == NULL) {
- ERROR("%p: can't allocate packets array", (void *)txq);
- ret = ENOMEM;
- goto error;
- }
- for (i = 0; (i != elts_n); ++i) {
- struct txq_elt *elt = &(*elts)[i];
-
- elt->buf = NULL;
- }
- DEBUG("%p: allocated and configured %u WRs", (void *)txq, elts_n);
- txq->elts_n = elts_n;
- txq->elts = elts;
- txq->elts_head = 0;
- txq->elts_tail = 0;
- txq->elts_comp = 0;
- /*
- * Request send completion every MLX4_PMD_TX_PER_COMP_REQ packets or
- * at least 4 times per ring.
- */
- txq->elts_comp_cd_init =
- ((MLX4_PMD_TX_PER_COMP_REQ < (elts_n / 4)) ?
- MLX4_PMD_TX_PER_COMP_REQ : (elts_n / 4));
- txq->elts_comp_cd = txq->elts_comp_cd_init;
- assert(ret == 0);
- return 0;
-error:
- rte_free(elts);
- DEBUG("%p: failed, freed everything", (void *)txq);
- assert(ret > 0);
- rte_errno = ret;
- return -rte_errno;
-}
-
-/**
- * Free TX queue elements.
- *
- * @param txq
- * Pointer to TX queue structure.
- */
-static void
-txq_free_elts(struct txq *txq)
-{
- unsigned int elts_n = txq->elts_n;
- unsigned int elts_head = txq->elts_head;
- unsigned int elts_tail = txq->elts_tail;
- struct txq_elt (*elts)[elts_n] = txq->elts;
-
- DEBUG("%p: freeing WRs", (void *)txq);
- txq->elts_n = 0;
- txq->elts_head = 0;
- txq->elts_tail = 0;
- txq->elts_comp = 0;
- txq->elts_comp_cd = 0;
- txq->elts_comp_cd_init = 0;
- txq->elts = NULL;
- if (elts == NULL)
- return;
- while (elts_tail != elts_head) {
- struct txq_elt *elt = &(*elts)[elts_tail];
-
- assert(elt->buf != NULL);
- rte_pktmbuf_free(elt->buf);
-#ifndef NDEBUG
- /* Poisoning. */
- memset(elt, 0x77, sizeof(*elt));
-#endif
- if (++elts_tail == elts_n)
- elts_tail = 0;
- }
- rte_free(elts);
-}
-
-/**
- * Clean up a TX queue.
- *
- * Destroy objects, free allocated memory and reset the structure for reuse.
- *
- * @param txq
- * Pointer to TX queue structure.
- */
-static void
-txq_cleanup(struct txq *txq)
-{
- size_t i;
-
- DEBUG("cleaning up %p", (void *)txq);
- txq_free_elts(txq);
- if (txq->qp != NULL)
- claim_zero(ibv_destroy_qp(txq->qp));
- if (txq->cq != NULL)
- claim_zero(ibv_destroy_cq(txq->cq));
- for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
- if (txq->mp2mr[i].mp == NULL)
- break;
- assert(txq->mp2mr[i].mr != NULL);
- claim_zero(ibv_dereg_mr(txq->mp2mr[i].mr));
- }
- memset(txq, 0, sizeof(*txq));
-}
-
-struct mlx4_check_mempool_data {
- int ret;
- char *start;
- char *end;
-};
-
-/* Called by mlx4_check_mempool() when iterating the memory chunks. */
-static void mlx4_check_mempool_cb(struct rte_mempool *mp,
- void *opaque, struct rte_mempool_memhdr *memhdr,
- unsigned mem_idx)
-{
- struct mlx4_check_mempool_data *data = opaque;
-
- (void)mp;
- (void)mem_idx;
- /* It already failed, skip the next chunks. */
- if (data->ret != 0)
- return;
- /* It is the first chunk. */
- if (data->start == NULL && data->end == NULL) {
- data->start = memhdr->addr;
- data->end = data->start + memhdr->len;
- return;
- }
- if (data->end == memhdr->addr) {
- data->end += memhdr->len;
- return;
- }
- if (data->start == (char *)memhdr->addr + memhdr->len) {
- data->start -= memhdr->len;
- return;
- }
- /* Error, mempool is not virtually contigous. */
- data->ret = -1;
-}
-
-/**
- * Check if a mempool can be used: it must be virtually contiguous.
- *
- * @param[in] mp
- * Pointer to memory pool.
- * @param[out] start
- * Pointer to the start address of the mempool virtual memory area
- * @param[out] end
- * Pointer to the end address of the mempool virtual memory area
- *
- * @return
- * 0 on success (mempool is virtually contiguous), -1 on error.
- */
-static int mlx4_check_mempool(struct rte_mempool *mp, uintptr_t *start,
- uintptr_t *end)
-{
- struct mlx4_check_mempool_data data;
-
- memset(&data, 0, sizeof(data));
- rte_mempool_mem_iter(mp, mlx4_check_mempool_cb, &data);
- *start = (uintptr_t)data.start;
- *end = (uintptr_t)data.end;
- return data.ret;
-}
-
-/**
- * Register mempool as a memory region.
- *
- * @param pd
- * Pointer to protection domain.
- * @param mp
- * Pointer to memory pool.
- *
- * @return
- * Memory region pointer, NULL in case of error and rte_errno is set.
- */
-struct ibv_mr *
-mlx4_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
-{
- const struct rte_memseg *ms = rte_eal_get_physmem_layout();
- uintptr_t start;
- uintptr_t end;
- unsigned int i;
- struct ibv_mr *mr;
-
- if (mlx4_check_mempool(mp, &start, &end) != 0) {
- rte_errno = EINVAL;
- ERROR("mempool %p: not virtually contiguous",
- (void *)mp);
- return NULL;
- }
- DEBUG("mempool %p area start=%p end=%p size=%zu",
- (void *)mp, (void *)start, (void *)end,
- (size_t)(end - start));
- /* Round start and end to page boundary if found in memory segments. */
- for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
- uintptr_t addr = (uintptr_t)ms[i].addr;
- size_t len = ms[i].len;
- unsigned int align = ms[i].hugepage_sz;
-
- if ((start > addr) && (start < addr + len))
- start = RTE_ALIGN_FLOOR(start, align);
- if ((end > addr) && (end < addr + len))
- end = RTE_ALIGN_CEIL(end, align);
- }
- DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
- (void *)mp, (void *)start, (void *)end,
- (size_t)(end - start));
- mr = ibv_reg_mr(pd,
- (void *)start,
- end - start,
- IBV_ACCESS_LOCAL_WRITE);
- if (!mr)
- rte_errno = errno ? errno : EINVAL;
- return mr;
-}
-
-struct txq_mp2mr_mbuf_check_data {
- int ret;
-};
-
-/**
- * Callback function for rte_mempool_obj_iter() to check whether a given
- * mempool object looks like a mbuf.
- *
- * @param[in] mp
- * The mempool pointer
- * @param[in] arg
- * Context data (struct txq_mp2mr_mbuf_check_data). Contains the
- * return value.
- * @param[in] obj
- * Object address.
- * @param index
- * Object index, unused.
- */
-static void
-txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
- uint32_t index __rte_unused)
-{
- struct txq_mp2mr_mbuf_check_data *data = arg;
- struct rte_mbuf *buf = obj;
-
- /*
- * Check whether mbuf structure fits element size and whether mempool
- * pointer is valid.
- */
- if (sizeof(*buf) > mp->elt_size || buf->pool != mp)
- data->ret = -1;
-}
-
-/**
- * Iterator function for rte_mempool_walk() to register existing mempools and
- * fill the MP to MR cache of a TX queue.
- *
- * @param[in] mp
- * Memory Pool to register.
- * @param *arg
- * Pointer to TX queue structure.
- */
-static void
-txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
-{
- struct txq *txq = arg;
- struct txq_mp2mr_mbuf_check_data data = {
- .ret = 0,
- };
-
- /* Register mempool only if the first element looks like a mbuf. */
- if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
- data.ret == -1)
- return;
- mlx4_txq_mp2mr(txq, mp);
-}
-
-/**
- * Configure a TX queue.
- *
- * @param dev
- * Pointer to Ethernet device structure.
- * @param txq
- * Pointer to TX queue structure.
- * @param desc
- * Number of descriptors to configure in queue.
- * @param socket
- * NUMA socket on which memory must be allocated.
- * @param[in] conf
- * Thresholds parameters.
- *
- * @return
- * 0 on success, negative errno value otherwise and rte_errno is set.
- */
-static int
-txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
- unsigned int socket, const struct rte_eth_txconf *conf)
-{
- struct priv *priv = dev->data->dev_private;
- struct txq tmpl = {
- .priv = priv,
- .socket = socket
- };
- union {
- struct ibv_qp_init_attr init;
- struct ibv_qp_attr mod;
- } attr;
- int ret;
-
- (void)conf; /* Thresholds configuration (ignored). */
- if (priv == NULL) {
- rte_errno = EINVAL;
- goto error;
- }
- if (desc == 0) {
- rte_errno = EINVAL;
- ERROR("%p: invalid number of Tx descriptors", (void *)dev);
- goto error;
- }
- /* MRs will be registered in mp2mr[] later. */
- tmpl.cq = ibv_create_cq(priv->ctx, desc, NULL, NULL, 0);
- if (tmpl.cq == NULL) {
- rte_errno = ENOMEM;
- ERROR("%p: CQ creation failure: %s",
- (void *)dev, strerror(rte_errno));
- goto error;
- }
- DEBUG("priv->device_attr.max_qp_wr is %d",
- priv->device_attr.max_qp_wr);
- DEBUG("priv->device_attr.max_sge is %d",
- priv->device_attr.max_sge);
- attr.init = (struct ibv_qp_init_attr){
- /* CQ to be associated with the send queue. */
- .send_cq = tmpl.cq,
- /* CQ to be associated with the receive queue. */
- .recv_cq = tmpl.cq,
- .cap = {
- /* Max number of outstanding WRs. */
- .max_send_wr = ((priv->device_attr.max_qp_wr < desc) ?
- priv->device_attr.max_qp_wr :
- desc),
- /* Max number of scatter/gather elements in a WR. */
- .max_send_sge = 1,
- .max_inline_data = MLX4_PMD_MAX_INLINE,
- },
- .qp_type = IBV_QPT_RAW_PACKET,
- /*
- * Do *NOT* enable this, completions events are managed per
- * Tx burst.
- */
- .sq_sig_all = 0,
- };
- tmpl.qp = ibv_create_qp(priv->pd, &attr.init);
- if (tmpl.qp == NULL) {
- rte_errno = errno ? errno : EINVAL;
- ERROR("%p: QP creation failure: %s",
- (void *)dev, strerror(rte_errno));
- goto error;
- }
- /* ibv_create_qp() updates this value. */
- tmpl.max_inline = attr.init.cap.max_inline_data;
- attr.mod = (struct ibv_qp_attr){
- /* Move the QP to this state. */
- .qp_state = IBV_QPS_INIT,
- /* Primary port number. */
- .port_num = priv->port
- };
- ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE | IBV_QP_PORT);
- if (ret) {
- rte_errno = ret;
- ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
- (void *)dev, strerror(rte_errno));
- goto error;
- }
- ret = txq_alloc_elts(&tmpl, desc);
- if (ret) {
- rte_errno = ret;
- ERROR("%p: TXQ allocation failed: %s",
- (void *)dev, strerror(rte_errno));
- goto error;
- }
- attr.mod = (struct ibv_qp_attr){
- .qp_state = IBV_QPS_RTR
- };
- ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
- if (ret) {
- rte_errno = ret;
- ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
- (void *)dev, strerror(rte_errno));
- goto error;
- }
- attr.mod.qp_state = IBV_QPS_RTS;
- ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
- if (ret) {
- rte_errno = ret;
- ERROR("%p: QP state to IBV_QPS_RTS failed: %s",
- (void *)dev, strerror(rte_errno));
- goto error;
- }
- /* Clean up txq in case we're reinitializing it. */
- DEBUG("%p: cleaning-up old txq just in case", (void *)txq);
- txq_cleanup(txq);
- *txq = tmpl;
- DEBUG("%p: txq updated with %p", (void *)txq, (void *)&tmpl);
- /* Pre-register known mempools. */
- rte_mempool_walk(txq_mp2mr_iter, txq);
- return 0;
-error:
- ret = rte_errno;
- txq_cleanup(&tmpl);
- rte_errno = ret;
- assert(rte_errno > 0);
- return -rte_errno;
-}
-
-/**
- * DPDK callback to configure a TX queue.
- *
- * @param dev
- * Pointer to Ethernet device structure.
- * @param idx
- * TX queue index.
- * @param desc
- * Number of descriptors to configure in queue.
- * @param socket
- * NUMA socket on which memory must be allocated.
- * @param[in] conf
- * Thresholds parameters.
- *
- * @return
- * 0 on success, negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
- unsigned int socket, const struct rte_eth_txconf *conf)
-{
- struct priv *priv = dev->data->dev_private;
- struct txq *txq = (*priv->txqs)[idx];
- int ret;
-
- DEBUG("%p: configuring queue %u for %u descriptors",
- (void *)dev, idx, desc);
- if (idx >= priv->txqs_n) {
- rte_errno = EOVERFLOW;
- ERROR("%p: queue index out of range (%u >= %u)",
- (void *)dev, idx, priv->txqs_n);
- return -rte_errno;
- }
- if (txq != NULL) {
- DEBUG("%p: reusing already allocated queue index %u (%p)",
- (void *)dev, idx, (void *)txq);
- if (priv->started) {
- rte_errno = EEXIST;
- return -rte_errno;
- }
- (*priv->txqs)[idx] = NULL;
- txq_cleanup(txq);
- } else {
- txq = rte_calloc_socket("TXQ", 1, sizeof(*txq), 0, socket);
- if (txq == NULL) {
- rte_errno = ENOMEM;
- ERROR("%p: unable to allocate queue index %u",
- (void *)dev, idx);
- return -rte_errno;
- }
- }
- ret = txq_setup(dev, txq, desc, socket, conf);
- if (ret)
- rte_free(txq);
- else {
- txq->stats.idx = idx;
- DEBUG("%p: adding TX queue %p to list",
- (void *)dev, (void *)txq);
- (*priv->txqs)[idx] = txq;
- /* Update send callback. */
- dev->tx_pkt_burst = mlx4_tx_burst;
- }
- return ret;
-}
-
-/**
- * DPDK callback to release a TX queue.
- *
- * @param dpdk_txq
- * Generic TX queue pointer.
- */
-static void
-mlx4_tx_queue_release(void *dpdk_txq)
-{
- struct txq *txq = (struct txq *)dpdk_txq;
- struct priv *priv;
- unsigned int i;
-
- if (txq == NULL)
- return;
- priv = txq->priv;
- for (i = 0; (i != priv->txqs_n); ++i)
- if ((*priv->txqs)[i] == txq) {
- DEBUG("%p: removing TX queue %p from list",
- (void *)priv->dev, (void *)txq);
- (*priv->txqs)[i] = NULL;
- break;
- }
- txq_cleanup(txq);
- rte_free(txq);
-}
-
-/* RX queues handling. */
-
-/**
- * Allocate RX queue elements.
- *
- * @param rxq
- * Pointer to RX queue structure.
- * @param elts_n
- * Number of elements to allocate.
- *
- * @return
- * 0 on success, negative errno value otherwise and rte_errno is set.
- */
-static int
-rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n)
-{
- unsigned int i;
- struct rxq_elt (*elts)[elts_n] =
- rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
- rxq->socket);
-
- if (elts == NULL) {
- rte_errno = ENOMEM;
- ERROR("%p: can't allocate packets array", (void *)rxq);
- goto error;
- }
- /* For each WR (packet). */
- for (i = 0; (i != elts_n); ++i) {
- struct rxq_elt *elt = &(*elts)[i];
- struct ibv_recv_wr *wr = &elt->wr;
- struct ibv_sge *sge = &(*elts)[i].sge;
- struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
-
- if (buf == NULL) {
- rte_errno = ENOMEM;
- ERROR("%p: empty mbuf pool", (void *)rxq);
- goto error;