#include <rte_version.h>
#include <rte_log.h>
#include <rte_alarm.h>
+#include <rte_memory.h>
#ifdef PEDANTIC
#pragma GCC diagnostic error "-pedantic"
#endif
static int
rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
- unsigned int socket, const struct rte_eth_rxconf *conf,
+ unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
struct rte_mempool *mp);
static void
}
if (rxqs_n == priv->rxqs_n)
return 0;
- if ((rxqs_n & (rxqs_n - 1)) != 0) {
- ERROR("%p: invalid number of RX queues (%u),"
- " must be a power of 2",
- (void *)dev, rxqs_n);
- return EINVAL;
+ if (!rte_is_power_of_2(rxqs_n)) {
+ unsigned n_active;
+
+ n_active = rte_align32pow2(rxqs_n + 1) >> 1;
+ WARN("%p: number of RX queues must be a power"
+ " of 2: %u queues among %u will be active",
+ (void *)dev, n_active, rxqs_n);
}
+
INFO("%p: RX queues number update: %u -> %u",
(void *)dev, priv->rxqs_n, rxqs_n);
/* If RSS is enabled, disable it first. */
priv->rss = 1;
tmp = priv->rxqs_n;
priv->rxqs_n = rxqs_n;
- ret = rxq_setup(dev, &priv->rxq_parent, 0, 0, NULL, NULL);
+ ret = rxq_setup(dev, &priv->rxq_parent, 0, 0, 0, NULL, NULL);
if (!ret)
return 0;
/* Failure, rollback. */
static void
txq_free_elts(struct txq *txq)
{
- unsigned int i;
unsigned int elts_n = txq->elts_n;
+ unsigned int elts_head = txq->elts_head;
+ unsigned int elts_tail = txq->elts_tail;
struct txq_elt (*elts)[elts_n] = txq->elts;
linear_t (*elts_linear)[elts_n] = txq->elts_linear;
struct ibv_mr *mr_linear = txq->mr_linear;
DEBUG("%p: freeing WRs", (void *)txq);
txq->elts_n = 0;
+ txq->elts_head = 0;
+ txq->elts_tail = 0;
+ txq->elts_comp = 0;
+ txq->elts_comp_cd = 0;
+ txq->elts_comp_cd_init = 0;
txq->elts = NULL;
txq->elts_linear = NULL;
txq->mr_linear = NULL;
rte_free(elts_linear);
if (elts == NULL)
return;
- for (i = 0; (i != elemof(*elts)); ++i) {
- struct txq_elt *elt = &(*elts)[i];
+ while (elts_tail != elts_head) {
+ struct txq_elt *elt = &(*elts)[elts_tail];
- if (elt->buf == NULL)
- continue;
+ assert(elt->buf != NULL);
rte_pktmbuf_free(elt->buf);
+#ifndef NDEBUG
+ /* Poisoning. */
+ memset(elt, 0x77, sizeof(*elt));
+#endif
+ if (++elts_tail == elts_n)
+ elts_tail = 0;
}
rte_free(elts);
}
return 0;
}
+/* For best performance, this function should not be inlined. */
+static struct ibv_mr *mlx4_mp2mr(struct ibv_pd *, const struct rte_mempool *)
+ __attribute__((noinline));
+
+/**
+ * Register mempool as a memory region.
+ *
+ * @param pd
+ * Pointer to protection domain.
+ * @param mp
+ * Pointer to memory pool.
+ *
+ * @return
+ * Memory region pointer, NULL in case of error.
+ */
+static struct ibv_mr *
+mlx4_mp2mr(struct ibv_pd *pd, const struct rte_mempool *mp)
+{
+ const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+ uintptr_t start = (uintptr_t)STAILQ_FIRST(&mp->mem_list)->addr;
+ uintptr_t end = start + STAILQ_FIRST(&mp->mem_list)->len;
+ unsigned int i;
+
+ DEBUG("mempool %p area start=%p end=%p size=%zu",
+ (const void *)mp, (void *)start, (void *)end,
+ (size_t)(end - start));
+ /* Round start and end to page boundary if found in memory segments. */
+ for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
+ uintptr_t addr = (uintptr_t)ms[i].addr;
+ size_t len = ms[i].len;
+ unsigned int align = ms[i].hugepage_sz;
+
+ if ((start > addr) && (start < addr + len))
+ start = RTE_ALIGN_FLOOR(start, align);
+ if ((end > addr) && (end < addr + len))
+ end = RTE_ALIGN_CEIL(end, align);
+ }
+ DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
+ (const void *)mp, (void *)start, (void *)end,
+ (size_t)(end - start));
+ return ibv_reg_mr(pd,
+ (void *)start,
+ end - start,
+ IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
+}
+
/**
* Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which
* the cloned mbuf is allocated is returned instead.
/* Add a new entry, register MR first. */
DEBUG("%p: discovered new memory pool \"%s\" (%p)",
(void *)txq, mp->name, (const void *)mp);
- mr = ibv_reg_mr(txq->priv->pd,
- (void *)mp->elt_va_start,
- (mp->elt_va_end - mp->elt_va_start),
- (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE));
+ mr = mlx4_mp2mr(txq->priv->pd, mp);
if (unlikely(mr == NULL)) {
DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
(void *)txq);
}
struct txq_mp2mr_mbuf_check_data {
- const struct rte_mempool *mp;
int ret;
};
* Callback function for rte_mempool_obj_iter() to check whether a given
* mempool object looks like a mbuf.
*
- * @param[in, out] arg
- * Context data (struct txq_mp2mr_mbuf_check_data). Contains mempool pointer
- * and return value.
- * @param[in] start
- * Object start address.
- * @param[in] end
- * Object end address.
+ * @param[in] mp
+ * The mempool pointer
+ * @param[in] arg
+ * Context data (struct txq_mp2mr_mbuf_check_data). Contains the
+ * return value.
+ * @param[in] obj
+ * Object address.
* @param index
- * Unused.
- *
- * @return
- * Nonzero value when object is not a mbuf.
+ * Object index, unused.
*/
static void
-txq_mp2mr_mbuf_check(void *arg, void *start, void *end,
- uint32_t index __rte_unused)
+txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
+ uint32_t index __rte_unused)
{
struct txq_mp2mr_mbuf_check_data *data = arg;
- struct rte_mbuf *buf =
- (void *)((uintptr_t)start + data->mp->header_size);
+ struct rte_mbuf *buf = obj;
- (void)index;
/* Check whether mbuf structure fits element size and whether mempool
* pointer is valid. */
- if (((uintptr_t)end >= (uintptr_t)(buf + 1)) &&
- (buf->pool == data->mp))
- data->ret = 0;
- else
+ if (sizeof(*buf) > mp->elt_size || buf->pool != mp)
data->ret = -1;
}
* Pointer to TX queue structure.
*/
static void
-txq_mp2mr_iter(const struct rte_mempool *mp, void *arg)
+txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
{
struct txq *txq = arg;
struct txq_mp2mr_mbuf_check_data data = {
- .mp = mp,
- .ret = -1,
+ .ret = 0,
};
- /* Discard empty mempools. */
- if (mp->size == 0)
- return;
/* Register mempool only if the first element looks like a mbuf. */
- rte_mempool_obj_iter((void *)mp->elt_va_start,
- 1,
- mp->header_size + mp->elt_size + mp->trailer_size,
- 1,
- mp->elt_pa,
- mp->pg_num,
- mp->pg_shift,
- txq_mp2mr_mbuf_check,
- &data);
- if (data.ret)
+ if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
+ data.ret == -1)
return;
txq_mp2mr(txq, mp);
}
if (likely(elt->buf != NULL)) {
struct rte_mbuf *tmp = elt->buf;
+#ifndef NDEBUG
+ /* Poisoning. */
+ memset(elt, 0x66, sizeof(*elt));
+#endif
/* Faster than rte_pktmbuf_free(). */
do {
struct rte_mbuf *next = NEXT(tmp);
* @param flags
* RX completion flags returned by poll_length_flags().
*
+ * @note: fix mlx4_dev_supported_ptypes_get() if any change here.
+ *
* @return
* Packet type for struct rte_mbuf.
*/
* cacheline while allocating rep.
*/
rte_prefetch0(seg);
- rep = __rte_mbuf_raw_alloc(rxq->mp);
+ rep = rte_mbuf_raw_alloc(rxq->mp);
if (unlikely(rep == NULL)) {
/*
* Unable to allocate a replacement mbuf,
if (ret == 0)
break;
len = ret;
- rep = __rte_mbuf_raw_alloc(rxq->mp);
+ rep = rte_mbuf_raw_alloc(rxq->mp);
if (unlikely(rep == NULL)) {
/*
* Unable to allocate a replacement mbuf,
attr.qpg.qpg_type = IBV_EXP_QPG_PARENT;
/* TSS isn't necessary. */
attr.qpg.parent_attrib.tss_child_count = 0;
- attr.qpg.parent_attrib.rss_child_count = priv->rxqs_n;
+ attr.qpg.parent_attrib.rss_child_count =
+ rte_align32pow2(priv->rxqs_n + 1) >> 1;
DEBUG("initializing parent RSS queue");
} else {
attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX;
* Number of descriptors to configure in queue.
* @param socket
* NUMA socket on which memory must be allocated.
+ * @param inactive
+ * If true, the queue is disabled because its index is higher or
+ * equal to the real number of queues, which must be a power of 2.
* @param[in] conf
* Thresholds parameters.
* @param mp
*/
static int
rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
- unsigned int socket, const struct rte_eth_rxconf *conf,
+ unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
struct rte_mempool *mp)
{
struct priv *priv = dev->data->dev_private;
DEBUG("%p: %s scattered packets support (%u WRs)",
(void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc);
/* Use the entire RX mempool as the memory region. */
- tmpl.mr = ibv_reg_mr(priv->pd,
- (void *)mp->elt_va_start,
- (mp->elt_va_end - mp->elt_va_start),
- (IBV_ACCESS_LOCAL_WRITE |
- IBV_ACCESS_REMOTE_WRITE));
+ tmpl.mr = mlx4_mp2mr(priv->pd, mp);
if (tmpl.mr == NULL) {
ret = EINVAL;
ERROR("%p: MR creation failure: %s",
DEBUG("priv->device_attr.max_sge is %d",
priv->device_attr.max_sge);
#ifdef RSS_SUPPORT
- if (priv->rss)
+ if (priv->rss && !inactive)
tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent,
tmpl.rd);
else
{
struct priv *priv = dev->data->dev_private;
struct rxq *rxq = (*priv->rxqs)[idx];
+ int inactive = 0;
int ret;
if (mlx4_is_secondary())
return -ENOMEM;
}
}
- ret = rxq_setup(dev, rxq, desc, socket, conf, mp);
+ if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
+ inactive = 1;
+ ret = rxq_setup(dev, rxq, desc, socket, inactive, conf, mp);
if (ret)
rte_free(rxq);
else {
0);
if (priv_get_ifname(priv, &ifname) == 0)
info->if_index = if_nametoindex(ifname);
+ info->speed_capa =
+ ETH_LINK_SPEED_1G |
+ ETH_LINK_SPEED_10G |
+ ETH_LINK_SPEED_20G |
+ ETH_LINK_SPEED_40G |
+ ETH_LINK_SPEED_56G;
priv_unlock(priv);
}
+static const uint32_t *
+mlx4_dev_supported_ptypes_get(struct rte_eth_dev *dev)
+{
+ static const uint32_t ptypes[] = {
+ /* refers to rxq_cq_to_pkt_type() */
+ RTE_PTYPE_L3_IPV4,
+ RTE_PTYPE_L3_IPV6,
+ RTE_PTYPE_INNER_L3_IPV4,
+ RTE_PTYPE_INNER_L3_IPV6,
+ RTE_PTYPE_UNKNOWN
+ };
+
+ if (dev->rx_pkt_burst == mlx4_rx_burst ||
+ dev->rx_pkt_burst == mlx4_rx_burst_sp)
+ return ptypes;
+ return NULL;
+}
+
/**
* DPDK callback to get device statistics.
*
dev_link.link_speed = link_speed;
dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ?
ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
+ dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
+ ETH_LINK_SPEED_FIXED);
if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) {
/* Link status changed. */
dev->data->dev_link = dev_link;
.stats_reset = mlx4_stats_reset,
.queue_stats_mapping_set = NULL,
.dev_infos_get = mlx4_dev_infos_get,
+ .dev_supported_ptypes_get = mlx4_dev_supported_ptypes_get,
.vlan_filter_set = mlx4_vlan_filter_set,
.vlan_tpid_set = NULL,
.vlan_strip_queue_set = NULL,
ERROR("port query failed: %s", strerror(err));
goto port_error;
}
+
+ if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
+ ERROR("port %d is not configured in Ethernet mode",
+ port);
+ goto port_error;
+ }
+
if (port_attr.state != IBV_PORT_ACTIVE)
DEBUG("port %d is not active: \"%s\" (%d)",
port, ibv_port_state_str(port_attr.state),