CONFIG_RTE_LIBRTE_MLX4_PMD=n
CONFIG_RTE_LIBRTE_MLX4_DEBUG=n
CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS=n
-CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE=8
#
# Compile burst-oriented Mellanox ConnectX-4 & ConnectX-5 (MLX5) PMD
adds additional run-time checks and debugging messages at the cost of
lower performance.
-- ``CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE`` (default **8**)
-
- Maximum number of cached memory pools (MPs) per TX queue. Each MP from
- which buffers are to be transmitted must be associated to memory regions
- (MRs). This is a slow operation that must be cached.
-
- This value is always 1 for RX queues since they use a single MP.
-
Environment variables
~~~~~~~~~~~~~~~~~~~~~
CFLAGS += -DNDEBUG -UPEDANTIC
endif
-ifdef CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE
-CFLAGS += -DMLX4_PMD_TX_MP_CACHE=$(CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE)
-endif
-
include $(RTE_SDK)/mk/rte.lib.mk
# Generate and clean-up mlx4_autoconf.h.
#include <rte_ether.h>
#include <rte_interrupts.h>
#include <rte_mempool.h>
-#include <rte_spinlock.h>
#ifndef IBV_RX_HASH_INNER
/** This is not necessarily defined by supported RDMA core versions. */
/** Fixed RSS hash key size in bytes. Cannot be modified. */
#define MLX4_RSS_HASH_KEY_SIZE 40
-/**
- * Maximum number of cached Memory Pools (MPs) per TX queue. Each RTE MP
- * from which buffers are to be transmitted will have to be mapped by this
- * driver to their own Memory Region (MR). This is a slow operation.
- *
- * This value is always 1 for RX queues.
- */
-#ifndef MLX4_PMD_TX_MP_CACHE
-#define MLX4_PMD_TX_MP_CACHE 8
-#endif
-
/** Interrupt alarm timeout value in microseconds. */
#define MLX4_INTR_ALARM_TIMEOUT 100000
struct txq;
struct rte_flow;
-/** Memory region descriptor. */
-struct mlx4_mr {
- LIST_ENTRY(mlx4_mr) next; /**< Next entry in list. */
- uintptr_t start; /**< Base address for memory region. */
- uintptr_t end; /**< End address for memory region. */
- uint32_t lkey; /**< L_Key extracted from @p mr. */
- uint32_t refcnt; /**< Reference count for this object. */
- struct priv *priv; /**< Back pointer to private data. */
- struct ibv_mr *mr; /**< Memory region associated with @p mp. */
- struct rte_mempool *mp; /**< Target memory pool (mempool). */
-};
-
/** Private data structure. */
struct priv {
struct rte_eth_dev *dev; /**< Ethernet device. */
struct mlx4_drop *drop; /**< Shared resources for drop flow rules. */
LIST_HEAD(, mlx4_rss) rss; /**< Shared targets for Rx flow rules. */
LIST_HEAD(, rte_flow) flows; /**< Configured flow rule handles. */
- LIST_HEAD(, mlx4_mr) mr; /**< Registered memory regions. */
- rte_spinlock_t mr_lock; /**< Lock for @p mr access. */
struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
/**< Configured MAC addresses. Unused entries are zeroed. */
};
int mlx4_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx);
int mlx4_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx);
-/* mlx4_mr.c */
-
-struct mlx4_mr *mlx4_mr_get(struct priv *priv, struct rte_mempool *mp);
-void mlx4_mr_put(struct mlx4_mr *mr);
-uint32_t mlx4_txq_add_mr(struct txq *txq, struct rte_mempool *mp,
- uint32_t i);
-
#endif /* RTE_PMD_MLX4_H_ */
#include <rte_malloc.h>
#include <rte_memory.h>
#include <rte_mempool.h>
-#include <rte_spinlock.h>
#include "mlx4_glue.h"
#include "mlx4_rxtx.h"
#include "mlx4_utils.h"
-struct mlx4_check_mempool_data {
- int ret;
- char *start;
- char *end;
-};
-
-/**
- * Called by mlx4_check_mempool() when iterating the memory chunks.
- *
- * @param[in] mp
- * Pointer to memory pool (unused).
- * @param[in, out] data
- * Pointer to shared buffer with mlx4_check_mempool().
- * @param[in] memhdr
- * Pointer to mempool chunk header.
- * @param mem_idx
- * Mempool element index (unused).
- */
-static void
-mlx4_check_mempool_cb(struct rte_mempool *mp, void *opaque,
- struct rte_mempool_memhdr *memhdr,
- unsigned int mem_idx)
-{
- struct mlx4_check_mempool_data *data = opaque;
-
- (void)mp;
- (void)mem_idx;
- /* It already failed, skip the next chunks. */
- if (data->ret != 0)
- return;
- /* It is the first chunk. */
- if (data->start == NULL && data->end == NULL) {
- data->start = memhdr->addr;
- data->end = data->start + memhdr->len;
- return;
- }
- if (data->end == memhdr->addr) {
- data->end += memhdr->len;
- return;
- }
- if (data->start == (char *)memhdr->addr + memhdr->len) {
- data->start -= memhdr->len;
- return;
- }
- /* Error, mempool is not virtually contiguous. */
- data->ret = -1;
-}
-
-/**
- * Check if a mempool can be used: it must be virtually contiguous.
- *
- * @param[in] mp
- * Pointer to memory pool.
- * @param[out] start
- * Pointer to the start address of the mempool virtual memory area.
- * @param[out] end
- * Pointer to the end address of the mempool virtual memory area.
- *
- * @return
- * 0 on success (mempool is virtually contiguous), -1 on error.
- */
-static int
-mlx4_check_mempool(struct rte_mempool *mp, uintptr_t *start, uintptr_t *end)
-{
- struct mlx4_check_mempool_data data;
-
- memset(&data, 0, sizeof(data));
- rte_mempool_mem_iter(mp, mlx4_check_mempool_cb, &data);
- *start = (uintptr_t)data.start;
- *end = (uintptr_t)data.end;
- return data.ret;
-}
-
-/**
- * Obtain a memory region from a memory pool.
- *
- * If a matching memory region already exists, it is returned with its
- * reference count incremented, otherwise a new one is registered.
- *
- * @param priv
- * Pointer to private structure.
- * @param mp
- * Pointer to memory pool.
- *
- * @return
- * Memory region pointer, NULL in case of error and rte_errno is set.
- */
-struct mlx4_mr *
-mlx4_mr_get(struct priv *priv, struct rte_mempool *mp)
-{
- const struct rte_memseg *ms;
- uintptr_t start;
- uintptr_t end;
- struct mlx4_mr *mr;
-
- if (mlx4_check_mempool(mp, &start, &end) != 0) {
- rte_errno = EINVAL;
- ERROR("mempool %p: not virtually contiguous",
- (void *)mp);
- return NULL;
- }
- DEBUG("mempool %p area start=%p end=%p size=%zu",
- (void *)mp, (void *)start, (void *)end,
- (size_t)(end - start));
- /* Round start and end to page boundary if found in memory segments. */
- ms = rte_mem_virt2memseg((void *)start, NULL);
- if (ms != NULL)
- start = RTE_ALIGN_FLOOR(start, ms->hugepage_sz);
- end = RTE_ALIGN_CEIL(end, ms->hugepage_sz);
- DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
- (void *)mp, (void *)start, (void *)end,
- (size_t)(end - start));
- rte_spinlock_lock(&priv->mr_lock);
- LIST_FOREACH(mr, &priv->mr, next)
- if (mp == mr->mp && start >= mr->start && end <= mr->end)
- break;
- if (mr) {
- ++mr->refcnt;
- goto release;
- }
- mr = rte_malloc(__func__, sizeof(*mr), 0);
- if (!mr) {
- rte_errno = ENOMEM;
- goto release;
- }
- *mr = (struct mlx4_mr){
- .start = start,
- .end = end,
- .refcnt = 1,
- .priv = priv,
- .mr = mlx4_glue->reg_mr(priv->pd, (void *)start, end - start,
- IBV_ACCESS_LOCAL_WRITE),
- .mp = mp,
- };
- if (mr->mr) {
- mr->lkey = mr->mr->lkey;
- LIST_INSERT_HEAD(&priv->mr, mr, next);
- } else {
- rte_free(mr);
- mr = NULL;
- rte_errno = errno ? errno : EINVAL;
- }
-release:
- rte_spinlock_unlock(&priv->mr_lock);
- return mr;
-}
-
-/**
- * Release a memory region.
- *
- * This function decrements its reference count and destroys it after
- * reaching 0.
- *
- * Note to avoid race conditions given this function may be used from the
- * data plane, it's extremely important that each user holds its own
- * reference.
- *
- * @param mr
- * Memory region to release.
- */
-void
-mlx4_mr_put(struct mlx4_mr *mr)
-{
- struct priv *priv = mr->priv;
-
- rte_spinlock_lock(&priv->mr_lock);
- assert(mr->refcnt);
- if (--mr->refcnt)
- goto release;
- LIST_REMOVE(mr, next);
- claim_zero(mlx4_glue->dereg_mr(mr->mr));
- rte_free(mr);
-release:
- rte_spinlock_unlock(&priv->mr_lock);
-}
-
-/**
- * Add memory region (MR) <-> memory pool (MP) association to txq->mp2mr[].
- * If mp2mr[] is full, remove an entry first.
- *
- * @param txq
- * Pointer to Tx queue structure.
- * @param[in] mp
- * Memory pool for which a memory region lkey must be added.
- * @param[in] i
- * Index in memory pool (MP) where to add memory region (MR).
- *
- * @return
- * Added mr->lkey on success, (uint32_t)-1 on failure.
- */
-uint32_t
-mlx4_txq_add_mr(struct txq *txq, struct rte_mempool *mp, uint32_t i)
-{
- struct mlx4_mr *mr;
-
- /* Add a new entry, register MR first. */
- DEBUG("%p: discovered new memory pool \"%s\" (%p)",
- (void *)txq, mp->name, (void *)mp);
- mr = mlx4_mr_get(txq->priv, mp);
- if (unlikely(mr == NULL)) {
- DEBUG("%p: unable to configure MR, mlx4_mr_get() failed",
- (void *)txq);
- return (uint32_t)-1;
- }
- if (unlikely(i == RTE_DIM(txq->mp2mr))) {
- /* Table is full, remove oldest entry. */
- DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
- (void *)txq);
- --i;
- mlx4_mr_put(txq->mp2mr[0].mr);
- memmove(&txq->mp2mr[0], &txq->mp2mr[1],
- (sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));
- }
- /* Store the new entry. */
- txq->mp2mr[i].mp = mp;
- txq->mp2mr[i].mr = mr;
- txq->mp2mr[i].lkey = mr->lkey;
- DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
- (void *)txq, mp->name, (void *)mp, txq->mp2mr[i].lkey);
- return txq->mp2mr[i].lkey;
-}
.addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
uintptr_t)),
.byte_count = rte_cpu_to_be_32(buf->data_len),
- .lkey = rte_cpu_to_be_32(rxq->mr->lkey),
+ .lkey = UINT32_MAX,
};
(*elts)[i] = buf;
}
1 << rxq->sges_n);
goto error;
}
- /* Use the entire Rx mempool as the memory region. */
- rxq->mr = mlx4_mr_get(priv, mp);
- if (!rxq->mr) {
- ERROR("%p: MR creation failure: %s",
- (void *)dev, strerror(rte_errno));
- goto error;
- }
if (dev->data->dev_conf.intr_conf.rxq) {
rxq->channel = mlx4_glue->create_comp_channel(priv->ctx);
if (rxq->channel == NULL) {
assert(!rxq->rq_db);
if (rxq->channel)
claim_zero(mlx4_glue->destroy_comp_channel(rxq->channel));
- if (rxq->mr)
- mlx4_mr_put(rxq->mr);
rte_free(rxq);
}
struct rxq {
struct priv *priv; /**< Back pointer to private data. */
struct rte_mempool *mp; /**< Memory pool for allocations. */
- struct mlx4_mr *mr; /**< Memory region. */
struct ibv_cq *cq; /**< Completion queue. */
struct ibv_wq *wq; /**< Work queue. */
struct ibv_comp_channel *channel; /**< Rx completion channel. */
uint32_t lb:1; /**< Whether packets should be looped back by eSwitch. */
uint8_t *bounce_buf;
/**< Memory used for storing the first DWORD of data TXBBs. */
- struct {
- const struct rte_mempool *mp; /**< Cached memory pool. */
- struct mlx4_mr *mr; /**< Memory region (for mp). */
- uint32_t lkey; /**< mr->lkey copy. */
- } mp2mr[MLX4_PMD_TX_MP_CACHE]; /**< MP to MR translation table. */
struct priv *priv; /**< Back pointer to private data. */
unsigned int socket; /**< CPU socket ID for allocations. */
struct ibv_cq *cq; /**< Completion queue. */
const struct rte_eth_txconf *conf);
void mlx4_tx_queue_release(void *dpdk_txq);
-/**
- * Get memory region (MR) <-> memory pool (MP) association from txq->mp2mr[].
- * Call mlx4_txq_add_mr() if MP is not registered yet.
- *
- * @param txq
- * Pointer to Tx queue structure.
- * @param[in] mp
- * Memory pool for which a memory region lkey must be returned.
- *
- * @return
- * mr->lkey on success, (uint32_t)-1 on failure.
- */
static inline uint32_t
mlx4_txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
{
- unsigned int i;
-
- for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
- if (unlikely(txq->mp2mr[i].mp == NULL)) {
- /* Unknown MP, add a new MR for it. */
- break;
- }
- if (txq->mp2mr[i].mp == mp) {
- /* MP found MP. */
- return txq->mp2mr[i].lkey;
- }
- }
- return mlx4_txq_add_mr(txq, mp, i);
+ (void)txq;
+ (void)mp;
+ return UINT32_MAX;
}
#endif /* MLX4_RXTX_H_ */
txq->elts_tail = txq->elts_head;
}
-struct txq_mp2mr_mbuf_check_data {
- int ret;
-};
-
-/**
- * Callback function for rte_mempool_obj_iter() to check whether a given
- * mempool object looks like a mbuf.
- *
- * @param[in] mp
- * The mempool pointer
- * @param[in] arg
- * Context data (struct mlx4_txq_mp2mr_mbuf_check_data). Contains the
- * return value.
- * @param[in] obj
- * Object address.
- * @param index
- * Object index, unused.
- */
-static void
-mlx4_txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
- uint32_t index)
-{
- struct txq_mp2mr_mbuf_check_data *data = arg;
- struct rte_mbuf *buf = obj;
-
- (void)index;
- /*
- * Check whether mbuf structure fits element size and whether mempool
- * pointer is valid.
- */
- if (sizeof(*buf) > mp->elt_size || buf->pool != mp)
- data->ret = -1;
-}
-
-/**
- * Iterator function for rte_mempool_walk() to register existing mempools and
- * fill the MP to MR cache of a Tx queue.
- *
- * @param[in] mp
- * Memory Pool to register.
- * @param *arg
- * Pointer to Tx queue structure.
- */
-static void
-mlx4_txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
-{
- struct txq *txq = arg;
- struct txq_mp2mr_mbuf_check_data data = {
- .ret = 0,
- };
-
- /* Register mempool only if the first element looks like a mbuf. */
- if (rte_mempool_obj_iter(mp, mlx4_txq_mp2mr_mbuf_check, &data) == 0 ||
- data.ret == -1)
- return;
- mlx4_txq_mp2mr(txq, mp);
-}
-
/**
* Retrieves information needed in order to directly access the Tx queue.
*
/* Save first wqe pointer in the first element. */
(&(*txq->elts)[0])->wqe =
(volatile struct mlx4_wqe_ctrl_seg *)txq->msq.buf;
- /* Pre-register known mempools. */
- rte_mempool_walk(mlx4_txq_mp2mr_iter, txq);
DEBUG("%p: adding Tx queue %p to list", (void *)dev, (void *)txq);
dev->data->tx_queues[idx] = txq;
return 0;
claim_zero(mlx4_glue->destroy_qp(txq->qp));
if (txq->cq)
claim_zero(mlx4_glue->destroy_cq(txq->cq));
- for (i = 0; i != RTE_DIM(txq->mp2mr); ++i) {
- if (!txq->mp2mr[i].mp)
- break;
- assert(txq->mp2mr[i].mr);
- mlx4_mr_put(txq->mp2mr[i].mr);
- }
rte_free(txq);
}