#include <sys/ioctl.h>
#include <sys/socket.h>
#include <netinet/in.h>
-#include <linux/if.h>
#include <linux/ethtool.h>
#include <linux/sockios.h>
+#include <fcntl.h>
/* Verbs header. */
/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
#ifdef PEDANTIC
#pragma GCC diagnostic ignored "-pedantic"
#endif
-#include <rte_config.h>
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_dev.h>
#include <rte_atomic.h>
#include <rte_version.h>
#include <rte_log.h>
+#include <rte_alarm.h>
+#include <rte_memory.h>
#ifdef PEDANTIC
#pragma GCC diagnostic error "-pedantic"
#endif
#define WR_ID(o) (((wr_id_t *)&(o))->data)
-/* Compile-time check. */
-static inline void wr_id_t_check(void)
-{
- wr_id_t check[1 + (2 * -!(sizeof(wr_id_t) == sizeof(uint64_t)))];
-
- (void)check;
- (void)wr_id_t_check;
-}
-
/* Transpose flags. Useful to convert IBV to DPDK flags. */
#define TRANSPOSE(val, from, to) \
(((from) >= (to)) ? \
struct txq {
struct priv *priv; /* Back pointer to private data. */
struct {
- struct rte_mempool *mp; /* Cached Memory Pool. */
+ const struct rte_mempool *mp; /* Cached Memory Pool. */
struct ibv_mr *mr; /* Memory Region (for mp). */
uint32_t lkey; /* mr->lkey */
} mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
unsigned int rss:1; /* RSS is enabled. */
unsigned int vf:1; /* This is a VF device. */
+ unsigned int pending_alarm:1; /* An alarm is pending. */
#ifdef INLINE_RECV
unsigned int inl_recv_size; /* Inline recv size */
#endif
unsigned int txqs_n; /* TX queues array size. */
struct rxq *(*rxqs)[]; /* RX queues. */
struct txq *(*txqs)[]; /* TX queues. */
+ struct rte_intr_handle intr_handle; /* Interrupt handler. */
rte_spinlock_t lock; /* Lock for control functions. */
};
+/* Local storage for secondary process data. */
+struct mlx4_secondary_data {
+ struct rte_eth_dev_data data; /* Local device data. */
+ struct priv *primary_priv; /* Private structure from primary. */
+ struct rte_eth_dev_data *shared_dev_data; /* Shared device data. */
+ rte_spinlock_t lock; /* Port configuration lock. */
+} mlx4_secondary_data[RTE_MAX_ETHPORTS];
+
+/**
+ * Check if running as a secondary process.
+ *
+ * @return
+ * Nonzero if running as a secondary process.
+ */
+static inline int
+mlx4_is_secondary(void)
+{
+ return rte_eal_process_type() != RTE_PROC_PRIMARY;
+}
+
+/**
+ * Return private structure associated with an Ethernet device.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ *
+ * @return
+ * Pointer to private structure.
+ */
+static struct priv *
+mlx4_get_priv(struct rte_eth_dev *dev)
+{
+ struct mlx4_secondary_data *sd;
+
+ if (!mlx4_is_secondary())
+ return dev->data->dev_private;
+ sd = &mlx4_secondary_data[dev->data->port_id];
+ return sd->data.dev_private;
+}
+
/**
* Lock private structure to protect it from concurrent access in the
* control path.
/* Device configuration. */
+static int
+txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
+ unsigned int socket, const struct rte_eth_txconf *conf);
+
+static void
+txq_cleanup(struct txq *txq);
+
static int
rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
- unsigned int socket, const struct rte_eth_rxconf *conf,
+ unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
struct rte_mempool *mp);
static void
}
if (rxqs_n == priv->rxqs_n)
return 0;
+ if (!rte_is_power_of_2(rxqs_n)) {
+ unsigned n_active;
+
+ n_active = rte_align32pow2(rxqs_n + 1) >> 1;
+ WARN("%p: number of RX queues must be a power"
+ " of 2: %u queues among %u will be active",
+ (void *)dev, n_active, rxqs_n);
+ }
+
INFO("%p: RX queues number update: %u -> %u",
(void *)dev, priv->rxqs_n, rxqs_n);
/* If RSS is enabled, disable it first. */
priv->rss = 1;
tmp = priv->rxqs_n;
priv->rxqs_n = rxqs_n;
- ret = rxq_setup(dev, &priv->rxq_parent, 0, 0, NULL, NULL);
+ ret = rxq_setup(dev, &priv->rxq_parent, 0, 0, 0, NULL, NULL);
if (!ret)
return 0;
/* Failure, rollback. */
struct priv *priv = dev->data->dev_private;
int ret;
+ if (mlx4_is_secondary())
+ return -E_RTE_SECONDARY;
priv_lock(priv);
ret = dev_configure(dev);
assert(ret >= 0);
return -ret;
}
+static uint16_t mlx4_tx_burst(void *, struct rte_mbuf **, uint16_t);
+static uint16_t removed_rx_burst(void *, struct rte_mbuf **, uint16_t);
+
+/**
+ * Configure secondary process queues from a private data pointer (primary
+ * or secondary) and update burst callbacks. Can take place only once.
+ *
+ * All queues must have been previously created by the primary process to
+ * avoid undefined behavior.
+ *
+ * @param priv
+ * Private data pointer from either primary or secondary process.
+ *
+ * @return
+ * Private data pointer from secondary process, NULL in case of error.
+ */
+static struct priv *
+mlx4_secondary_data_setup(struct priv *priv)
+{
+ unsigned int port_id = 0;
+ struct mlx4_secondary_data *sd;
+ void **tx_queues;
+ void **rx_queues;
+ unsigned int nb_tx_queues;
+ unsigned int nb_rx_queues;
+ unsigned int i;
+
+ /* priv must be valid at this point. */
+ assert(priv != NULL);
+ /* priv->dev must also be valid but may point to local memory from
+ * another process, possibly with the same address and must not
+ * be dereferenced yet. */
+ assert(priv->dev != NULL);
+ /* Determine port ID by finding out where priv comes from. */
+ while (1) {
+ sd = &mlx4_secondary_data[port_id];
+ rte_spinlock_lock(&sd->lock);
+ /* Primary process? */
+ if (sd->primary_priv == priv)
+ break;
+ /* Secondary process? */
+ if (sd->data.dev_private == priv)
+ break;
+ rte_spinlock_unlock(&sd->lock);
+ if (++port_id == RTE_DIM(mlx4_secondary_data))
+ port_id = 0;
+ }
+ /* Switch to secondary private structure. If private data has already
+ * been updated by another thread, there is nothing else to do. */
+ priv = sd->data.dev_private;
+ if (priv->dev->data == &sd->data)
+ goto end;
+ /* Sanity checks. Secondary private structure is supposed to point
+ * to local eth_dev, itself still pointing to the shared device data
+ * structure allocated by the primary process. */
+ assert(sd->shared_dev_data != &sd->data);
+ assert(sd->data.nb_tx_queues == 0);
+ assert(sd->data.tx_queues == NULL);
+ assert(sd->data.nb_rx_queues == 0);
+ assert(sd->data.rx_queues == NULL);
+ assert(priv != sd->primary_priv);
+ assert(priv->dev->data == sd->shared_dev_data);
+ assert(priv->txqs_n == 0);
+ assert(priv->txqs == NULL);
+ assert(priv->rxqs_n == 0);
+ assert(priv->rxqs == NULL);
+ nb_tx_queues = sd->shared_dev_data->nb_tx_queues;
+ nb_rx_queues = sd->shared_dev_data->nb_rx_queues;
+ /* Allocate local storage for queues. */
+ tx_queues = rte_zmalloc("secondary ethdev->tx_queues",
+ sizeof(sd->data.tx_queues[0]) * nb_tx_queues,
+ RTE_CACHE_LINE_SIZE);
+ rx_queues = rte_zmalloc("secondary ethdev->rx_queues",
+ sizeof(sd->data.rx_queues[0]) * nb_rx_queues,
+ RTE_CACHE_LINE_SIZE);
+ if (tx_queues == NULL || rx_queues == NULL)
+ goto error;
+ /* Lock to prevent control operations during setup. */
+ priv_lock(priv);
+ /* TX queues. */
+ for (i = 0; i != nb_tx_queues; ++i) {
+ struct txq *primary_txq = (*sd->primary_priv->txqs)[i];
+ struct txq *txq;
+
+ if (primary_txq == NULL)
+ continue;
+ txq = rte_calloc_socket("TXQ", 1, sizeof(*txq), 0,
+ primary_txq->socket);
+ if (txq != NULL) {
+ if (txq_setup(priv->dev,
+ txq,
+ primary_txq->elts_n * MLX4_PMD_SGE_WR_N,
+ primary_txq->socket,
+ NULL) == 0) {
+ txq->stats.idx = primary_txq->stats.idx;
+ tx_queues[i] = txq;
+ continue;
+ }
+ rte_free(txq);
+ }
+ while (i) {
+ txq = tx_queues[--i];
+ txq_cleanup(txq);
+ rte_free(txq);
+ }
+ goto error;
+ }
+ /* RX queues. */
+ for (i = 0; i != nb_rx_queues; ++i) {
+ struct rxq *primary_rxq = (*sd->primary_priv->rxqs)[i];
+
+ if (primary_rxq == NULL)
+ continue;
+ /* Not supported yet. */
+ rx_queues[i] = NULL;
+ }
+ /* Update everything. */
+ priv->txqs = (void *)tx_queues;
+ priv->txqs_n = nb_tx_queues;
+ priv->rxqs = (void *)rx_queues;
+ priv->rxqs_n = nb_rx_queues;
+ sd->data.rx_queues = rx_queues;
+ sd->data.tx_queues = tx_queues;
+ sd->data.nb_rx_queues = nb_rx_queues;
+ sd->data.nb_tx_queues = nb_tx_queues;
+ sd->data.dev_link = sd->shared_dev_data->dev_link;
+ sd->data.mtu = sd->shared_dev_data->mtu;
+ memcpy(sd->data.rx_queue_state, sd->shared_dev_data->rx_queue_state,
+ sizeof(sd->data.rx_queue_state));
+ memcpy(sd->data.tx_queue_state, sd->shared_dev_data->tx_queue_state,
+ sizeof(sd->data.tx_queue_state));
+ sd->data.dev_flags = sd->shared_dev_data->dev_flags;
+ /* Use local data from now on. */
+ rte_mb();
+ priv->dev->data = &sd->data;
+ rte_mb();
+ priv->dev->tx_pkt_burst = mlx4_tx_burst;
+ priv->dev->rx_pkt_burst = removed_rx_burst;
+ priv_unlock(priv);
+end:
+ /* More sanity checks. */
+ assert(priv->dev->tx_pkt_burst == mlx4_tx_burst);
+ assert(priv->dev->rx_pkt_burst == removed_rx_burst);
+ assert(priv->dev->data == &sd->data);
+ rte_spinlock_unlock(&sd->lock);
+ return priv;
+error:
+ priv_unlock(priv);
+ rte_free(tx_queues);
+ rte_free(rx_queues);
+ rte_spinlock_unlock(&sd->lock);
+ return NULL;
+}
+
/* TX queues handling. */
/**
}
mr_linear =
ibv_reg_mr(txq->priv->pd, elts_linear, sizeof(*elts_linear),
- (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE));
+ IBV_ACCESS_LOCAL_WRITE);
if (mr_linear == NULL) {
ERROR("%p: unable to configure MR, ibv_reg_mr() failed",
(void *)txq);
static void
txq_free_elts(struct txq *txq)
{
- unsigned int i;
unsigned int elts_n = txq->elts_n;
+ unsigned int elts_head = txq->elts_head;
+ unsigned int elts_tail = txq->elts_tail;
struct txq_elt (*elts)[elts_n] = txq->elts;
linear_t (*elts_linear)[elts_n] = txq->elts_linear;
struct ibv_mr *mr_linear = txq->mr_linear;
DEBUG("%p: freeing WRs", (void *)txq);
txq->elts_n = 0;
+ txq->elts_head = 0;
+ txq->elts_tail = 0;
+ txq->elts_comp = 0;
+ txq->elts_comp_cd = 0;
+ txq->elts_comp_cd_init = 0;
txq->elts = NULL;
txq->elts_linear = NULL;
txq->mr_linear = NULL;
rte_free(elts_linear);
if (elts == NULL)
return;
- for (i = 0; (i != elemof(*elts)); ++i) {
- struct txq_elt *elt = &(*elts)[i];
+ while (elts_tail != elts_head) {
+ struct txq_elt *elt = &(*elts)[elts_tail];
- if (elt->buf == NULL)
- continue;
+ assert(elt->buf != NULL);
rte_pktmbuf_free(elt->buf);
+#ifndef NDEBUG
+ /* Poisoning. */
+ memset(elt, 0x77, sizeof(*elt));
+#endif
+ if (++elts_tail == elts_n)
+ elts_tail = 0;
}
rte_free(elts);
}
return 0;
}
+struct mlx4_check_mempool_data {
+ int ret;
+ char *start;
+ char *end;
+};
+
+/* Called by mlx4_check_mempool() when iterating the memory chunks. */
+static void mlx4_check_mempool_cb(struct rte_mempool *mp,
+ void *opaque, struct rte_mempool_memhdr *memhdr,
+ unsigned mem_idx)
+{
+ struct mlx4_check_mempool_data *data = opaque;
+
+ (void)mp;
+ (void)mem_idx;
+
+ /* It already failed, skip the next chunks. */
+ if (data->ret != 0)
+ return;
+ /* It is the first chunk. */
+ if (data->start == NULL && data->end == NULL) {
+ data->start = memhdr->addr;
+ data->end = data->start + memhdr->len;
+ return;
+ }
+ if (data->end == memhdr->addr) {
+ data->end += memhdr->len;
+ return;
+ }
+ if (data->start == (char *)memhdr->addr + memhdr->len) {
+ data->start -= memhdr->len;
+ return;
+ }
+ /* Error, mempool is not virtually contigous. */
+ data->ret = -1;
+}
+
+/**
+ * Check if a mempool can be used: it must be virtually contiguous.
+ *
+ * @param[in] mp
+ * Pointer to memory pool.
+ * @param[out] start
+ * Pointer to the start address of the mempool virtual memory area
+ * @param[out] end
+ * Pointer to the end address of the mempool virtual memory area
+ *
+ * @return
+ * 0 on success (mempool is virtually contiguous), -1 on error.
+ */
+static int mlx4_check_mempool(struct rte_mempool *mp, uintptr_t *start,
+ uintptr_t *end)
+{
+ struct mlx4_check_mempool_data data;
+
+ memset(&data, 0, sizeof(data));
+ rte_mempool_mem_iter(mp, mlx4_check_mempool_cb, &data);
+ *start = (uintptr_t)data.start;
+ *end = (uintptr_t)data.end;
+
+ return data.ret;
+}
+
+/* For best performance, this function should not be inlined. */
+static struct ibv_mr *mlx4_mp2mr(struct ibv_pd *, struct rte_mempool *)
+ __attribute__((noinline));
+
+/**
+ * Register mempool as a memory region.
+ *
+ * @param pd
+ * Pointer to protection domain.
+ * @param mp
+ * Pointer to memory pool.
+ *
+ * @return
+ * Memory region pointer, NULL in case of error.
+ */
+static struct ibv_mr *
+mlx4_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
+{
+ const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+ uintptr_t start;
+ uintptr_t end;
+ unsigned int i;
+
+ if (mlx4_check_mempool(mp, &start, &end) != 0) {
+ ERROR("mempool %p: not virtually contiguous",
+ (void *)mp);
+ return NULL;
+ }
+
+ DEBUG("mempool %p area start=%p end=%p size=%zu",
+ (void *)mp, (void *)start, (void *)end,
+ (size_t)(end - start));
+ /* Round start and end to page boundary if found in memory segments. */
+ for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
+ uintptr_t addr = (uintptr_t)ms[i].addr;
+ size_t len = ms[i].len;
+ unsigned int align = ms[i].hugepage_sz;
+
+ if ((start > addr) && (start < addr + len))
+ start = RTE_ALIGN_FLOOR(start, align);
+ if ((end > addr) && (end < addr + len))
+ end = RTE_ALIGN_CEIL(end, align);
+ }
+ DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
+ (void *)mp, (void *)start, (void *)end,
+ (size_t)(end - start));
+ return ibv_reg_mr(pd,
+ (void *)start,
+ end - start,
+ IBV_ACCESS_LOCAL_WRITE);
+}
+
+/**
+ * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which
+ * the cloned mbuf is allocated is returned instead.
+ *
+ * @param buf
+ * Pointer to mbuf.
+ *
+ * @return
+ * Memory pool where data is located for given mbuf.
+ */
+static struct rte_mempool *
+txq_mb2mp(struct rte_mbuf *buf)
+{
+ if (unlikely(RTE_MBUF_INDIRECT(buf)))
+ return rte_mbuf_from_indirect(buf)->pool;
+ return buf->pool;
+}
+
/**
* Get Memory Region (MR) <-> Memory Pool (MP) association from txq->mp2mr[].
* Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full,
}
}
/* Add a new entry, register MR first. */
- DEBUG("%p: discovered new memory pool %p", (void *)txq, (void *)mp);
- mr = ibv_reg_mr(txq->priv->pd,
- (void *)mp->elt_va_start,
- (mp->elt_va_end - mp->elt_va_start),
- (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE));
+ DEBUG("%p: discovered new memory pool \"%s\" (%p)",
+ (void *)txq, mp->name, (void *)mp);
+ mr = mlx4_mp2mr(txq->priv->pd, mp);
if (unlikely(mr == NULL)) {
DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
(void *)txq);
DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
(void *)txq);
--i;
- claim_zero(ibv_dereg_mr(txq->mp2mr[i].mr));
+ claim_zero(ibv_dereg_mr(txq->mp2mr[0].mr));
memmove(&txq->mp2mr[0], &txq->mp2mr[1],
(sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));
}
txq->mp2mr[i].mp = mp;
txq->mp2mr[i].mr = mr;
txq->mp2mr[i].lkey = mr->lkey;
- DEBUG("%p: new MR lkey for MP %p: 0x%08" PRIu32,
- (void *)txq, (void *)mp, txq->mp2mr[i].lkey);
+ DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
+ (void *)txq, mp->name, (void *)mp, txq->mp2mr[i].lkey);
return txq->mp2mr[i].lkey;
}
+struct txq_mp2mr_mbuf_check_data {
+ int ret;
+};
+
+/**
+ * Callback function for rte_mempool_obj_iter() to check whether a given
+ * mempool object looks like a mbuf.
+ *
+ * @param[in] mp
+ * The mempool pointer
+ * @param[in] arg
+ * Context data (struct txq_mp2mr_mbuf_check_data). Contains the
+ * return value.
+ * @param[in] obj
+ * Object address.
+ * @param index
+ * Object index, unused.
+ */
+static void
+txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
+ uint32_t index __rte_unused)
+{
+ struct txq_mp2mr_mbuf_check_data *data = arg;
+ struct rte_mbuf *buf = obj;
+
+ /* Check whether mbuf structure fits element size and whether mempool
+ * pointer is valid. */
+ if (sizeof(*buf) > mp->elt_size || buf->pool != mp)
+ data->ret = -1;
+}
+
+/**
+ * Iterator function for rte_mempool_walk() to register existing mempools and
+ * fill the MP to MR cache of a TX queue.
+ *
+ * @param[in] mp
+ * Memory Pool to register.
+ * @param *arg
+ * Pointer to TX queue structure.
+ */
+static void
+txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
+{
+ struct txq *txq = arg;
+ struct txq_mp2mr_mbuf_check_data data = {
+ .ret = 0,
+ };
+
+ /* Register mempool only if the first element looks like a mbuf. */
+ if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
+ data.ret == -1)
+ return;
+ txq_mp2mr(txq, mp);
+}
+
#if MLX4_PMD_SGE_WR_N > 1
/**
uint32_t lkey;
/* Retrieve Memory Region key for this memory pool. */
- lkey = txq_mp2mr(txq, buf->pool);
+ lkey = txq_mp2mr(txq, txq_mb2mp(buf));
if (unlikely(lkey == (uint32_t)-1)) {
/* MR does not exist. */
DEBUG("%p: unable to get MP <-> MR association",
sge->length = size;
sge->lkey = txq->mr_linear->lkey;
sent_size += size;
+ /* Include last segment. */
+ segs++;
}
return (struct tx_burst_sg_ret){
.length = sent_size,
{
struct txq *txq = (struct txq *)dpdk_txq;
unsigned int elts_head = txq->elts_head;
- const unsigned int elts_tail = txq->elts_tail;
const unsigned int elts_n = txq->elts_n;
unsigned int elts_comp_cd = txq->elts_comp_cd;
unsigned int elts_comp = 0;
assert(elts_comp_cd != 0);
txq_complete(txq);
- max = (elts_n - (elts_head - elts_tail));
+ max = (elts_n - (elts_head - txq->elts_tail));
if (max > elts_n)
max -= elts_n;
assert(max >= 1);
if (likely(elt->buf != NULL)) {
struct rte_mbuf *tmp = elt->buf;
+#ifndef NDEBUG
+ /* Poisoning. */
+ memset(elt, 0x66, sizeof(*elt));
+#endif
/* Faster than rte_pktmbuf_free(). */
do {
struct rte_mbuf *next = NEXT(tmp);
addr = rte_pktmbuf_mtod(buf, uintptr_t);
length = DATA_LEN(buf);
/* Retrieve Memory Region key for this memory pool. */
- lkey = txq_mp2mr(txq, buf->pool);
+ lkey = txq_mp2mr(txq, txq_mb2mp(buf));
if (unlikely(lkey == (uint32_t)-1)) {
/* MR does not exist. */
DEBUG("%p: unable to get MP <-> MR"
return i;
}
+/**
+ * DPDK callback for TX in secondary processes.
+ *
+ * This function configures all queues from primary process information
+ * if necessary before reverting to the normal TX burst callback.
+ *
+ * @param dpdk_txq
+ * Generic pointer to TX queue structure.
+ * @param[in] pkts
+ * Packets to transmit.
+ * @param pkts_n
+ * Number of packets in array.
+ *
+ * @return
+ * Number of packets successfully transmitted (<= pkts_n).
+ */
+static uint16_t
+mlx4_tx_burst_secondary_setup(void *dpdk_txq, struct rte_mbuf **pkts,
+ uint16_t pkts_n)
+{
+ struct txq *txq = dpdk_txq;
+ struct priv *priv = mlx4_secondary_data_setup(txq->priv);
+ struct priv *primary_priv;
+ unsigned int index;
+
+ if (priv == NULL)
+ return 0;
+ primary_priv =
+ mlx4_secondary_data[priv->dev->data->port_id].primary_priv;
+ /* Look for queue index in both private structures. */
+ for (index = 0; index != priv->txqs_n; ++index)
+ if (((*primary_priv->txqs)[index] == txq) ||
+ ((*priv->txqs)[index] == txq))
+ break;
+ if (index == priv->txqs_n)
+ return 0;
+ txq = (*priv->txqs)[index];
+ return priv->dev->tx_pkt_burst(txq, pkts, pkts_n);
+}
+
/**
* Configure a TX queue.
*
txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
unsigned int socket, const struct rte_eth_txconf *conf)
{
- struct priv *priv = dev->data->dev_private;
+ struct priv *priv = mlx4_get_priv(dev);
struct txq tmpl = {
.priv = priv,
.socket = socket
int ret = 0;
(void)conf; /* Thresholds configuration (ignored). */
+ if (priv == NULL)
+ return EINVAL;
if ((desc == 0) || (desc % MLX4_PMD_SGE_WR_N)) {
ERROR("%p: invalid number of TX descriptors (must be a"
" multiple of %d)", (void *)dev, MLX4_PMD_SGE_WR_N);
txq_cleanup(txq);
*txq = tmpl;
DEBUG("%p: txq updated with %p", (void *)txq, (void *)&tmpl);
+ /* Pre-register known mempools. */
+ rte_mempool_walk(txq_mp2mr_iter, txq);
assert(ret == 0);
return 0;
error:
struct txq *txq = (*priv->txqs)[idx];
int ret;
+ if (mlx4_is_secondary())
+ return -E_RTE_SECONDARY;
priv_lock(priv);
DEBUG("%p: configuring queue %u for %u descriptors",
(void *)dev, idx, desc);
struct priv *priv;
unsigned int i;
+ if (mlx4_is_secondary())
+ return;
if (txq == NULL)
return;
priv = txq->priv;
* @param flags
* RX completion flags returned by poll_length_flags().
*
+ * @note: fix mlx4_dev_supported_ptypes_get() if any change here.
+ *
* @return
* Packet type for struct rte_mbuf.
*/
* cacheline while allocating rep.
*/
rte_prefetch0(seg);
- rep = __rte_mbuf_raw_alloc(rxq->mp);
+ rep = rte_mbuf_raw_alloc(rxq->mp);
if (unlikely(rep == NULL)) {
/*
* Unable to allocate a replacement mbuf,
assert(wr->num_sge == 1);
assert(elts_head < rxq->elts_n);
assert(rxq->elts_head < rxq->elts_n);
+ /*
+ * Fetch initial bytes of packet descriptor into a
+ * cacheline while allocating rep.
+ */
+ rte_mbuf_prefetch_part1(seg);
+ rte_mbuf_prefetch_part2(seg);
ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,
&flags);
if (unlikely(ret < 0)) {
if (ret == 0)
break;
len = ret;
- /*
- * Fetch initial bytes of packet descriptor into a
- * cacheline while allocating rep.
- */
- rte_prefetch0(seg);
- rep = __rte_mbuf_raw_alloc(rxq->mp);
+ rep = rte_mbuf_raw_alloc(rxq->mp);
if (unlikely(rep == NULL)) {
/*
* Unable to allocate a replacement mbuf,
return pkts_ret;
}
+/**
+ * DPDK callback for RX in secondary processes.
+ *
+ * This function configures all queues from primary process information
+ * if necessary before reverting to the normal RX burst callback.
+ *
+ * @param dpdk_rxq
+ * Generic pointer to RX queue structure.
+ * @param[out] pkts
+ * Array to store received packets.
+ * @param pkts_n
+ * Maximum number of packets in array.
+ *
+ * @return
+ * Number of packets successfully received (<= pkts_n).
+ */
+static uint16_t
+mlx4_rx_burst_secondary_setup(void *dpdk_rxq, struct rte_mbuf **pkts,
+ uint16_t pkts_n)
+{
+ struct rxq *rxq = dpdk_rxq;
+ struct priv *priv = mlx4_secondary_data_setup(rxq->priv);
+ struct priv *primary_priv;
+ unsigned int index;
+
+ if (priv == NULL)
+ return 0;
+ primary_priv =
+ mlx4_secondary_data[priv->dev->data->port_id].primary_priv;
+ /* Look for queue index in both private structures. */
+ for (index = 0; index != priv->rxqs_n; ++index)
+ if (((*primary_priv->rxqs)[index] == rxq) ||
+ ((*priv->rxqs)[index] == rxq))
+ break;
+ if (index == priv->rxqs_n)
+ return 0;
+ rxq = (*priv->rxqs)[index];
+ return priv->dev->rx_pkt_burst(rxq, pkts, pkts_n);
+}
+
/**
* Allocate a Queue Pair.
* Optionally setup inline receive if supported.
attr.qpg.qpg_type = IBV_EXP_QPG_PARENT;
/* TSS isn't necessary. */
attr.qpg.parent_attrib.tss_child_count = 0;
- attr.qpg.parent_attrib.rss_child_count = priv->rxqs_n;
+ attr.qpg.parent_attrib.rss_child_count =
+ rte_align32pow2(priv->rxqs_n + 1) >> 1;
DEBUG("initializing parent RSS queue");
} else {
attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX;
* Number of descriptors to configure in queue.
* @param socket
* NUMA socket on which memory must be allocated.
+ * @param inactive
+ * If true, the queue is disabled because its index is higher or
+ * equal to the real number of queues, which must be a power of 2.
* @param[in] conf
* Thresholds parameters.
* @param mp
*/
static int
rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
- unsigned int socket, const struct rte_eth_rxconf *conf,
+ unsigned int socket, int inactive, const struct rte_eth_rxconf *conf,
struct rte_mempool *mp)
{
struct priv *priv = dev->data->dev_private;
DEBUG("%p: %s scattered packets support (%u WRs)",
(void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc);
/* Use the entire RX mempool as the memory region. */
- tmpl.mr = ibv_reg_mr(priv->pd,
- (void *)mp->elt_va_start,
- (mp->elt_va_end - mp->elt_va_start),
- (IBV_ACCESS_LOCAL_WRITE |
- IBV_ACCESS_REMOTE_WRITE));
+ tmpl.mr = mlx4_mp2mr(priv->pd, mp);
if (tmpl.mr == NULL) {
ret = EINVAL;
ERROR("%p: MR creation failure: %s",
DEBUG("priv->device_attr.max_sge is %d",
priv->device_attr.max_sge);
#ifdef RSS_SUPPORT
- if (priv->rss)
+ if (priv->rss && !inactive)
tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent,
tmpl.rd);
else
{
struct priv *priv = dev->data->dev_private;
struct rxq *rxq = (*priv->rxqs)[idx];
+ int inactive = 0;
int ret;
+ if (mlx4_is_secondary())
+ return -E_RTE_SECONDARY;
priv_lock(priv);
DEBUG("%p: configuring queue %u for %u descriptors",
(void *)dev, idx, desc);
return -ENOMEM;
}
}
- ret = rxq_setup(dev, rxq, desc, socket, conf, mp);
+ if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
+ inactive = 1;
+ ret = rxq_setup(dev, rxq, desc, socket, inactive, conf, mp);
if (ret)
rte_free(rxq);
else {
struct priv *priv;
unsigned int i;
+ if (mlx4_is_secondary())
+ return;
if (rxq == NULL)
return;
priv = rxq->priv;
priv_unlock(priv);
}
+static void
+priv_dev_interrupt_handler_install(struct priv *, struct rte_eth_dev *);
+
/**
* DPDK callback to start the device.
*
unsigned int r;
struct rxq *rxq;
+ if (mlx4_is_secondary())
+ return -E_RTE_SECONDARY;
priv_lock(priv);
if (priv->started) {
priv_unlock(priv);
}
}
priv->started = 0;
+ priv_unlock(priv);
return -ret;
} while ((--r) && ((rxq = (*priv->rxqs)[++i]), i));
+ priv_dev_interrupt_handler_install(priv, dev);
priv_unlock(priv);
return 0;
}
unsigned int r;
struct rxq *rxq;
+ if (mlx4_is_secondary())
+ return;
priv_lock(priv);
if (!priv->started) {
priv_unlock(priv);
return 0;
}
+static void
+priv_dev_interrupt_handler_uninstall(struct priv *, struct rte_eth_dev *);
+
/**
* DPDK callback to close the device.
*
static void
mlx4_dev_close(struct rte_eth_dev *dev)
{
- struct priv *priv = dev->data->dev_private;
+ struct priv *priv = mlx4_get_priv(dev);
void *tmp;
unsigned int i;
+ if (priv == NULL)
+ return;
priv_lock(priv);
DEBUG("%p: closing device \"%s\"",
(void *)dev,
claim_zero(ibv_close_device(priv->ctx));
} else
assert(priv->ctx == NULL);
+ priv_dev_interrupt_handler_uninstall(priv, dev);
priv_unlock(priv);
memset(priv, 0, sizeof(*priv));
}
static void
mlx4_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
{
- struct priv *priv = dev->data->dev_private;
+ struct priv *priv = mlx4_get_priv(dev);
unsigned int max;
+ char ifname[IF_NAMESIZE];
+ if (priv == NULL)
+ return;
priv_lock(priv);
/* FIXME: we should ask the device for these values. */
info->min_rx_bufsize = 32;
max = 65535;
info->max_rx_queues = max;
info->max_tx_queues = max;
- info->max_mac_addrs = elemof(priv->mac);
+ /* Last array entry is reserved for broadcast. */
+ info->max_mac_addrs = (elemof(priv->mac) - 1);
info->rx_offload_capa =
(priv->hw_csum ?
(DEV_RX_OFFLOAD_IPV4_CKSUM |
DEV_TX_OFFLOAD_UDP_CKSUM |
DEV_TX_OFFLOAD_TCP_CKSUM) :
0);
+ if (priv_get_ifname(priv, &ifname) == 0)
+ info->if_index = if_nametoindex(ifname);
+ info->speed_capa =
+ ETH_LINK_SPEED_1G |
+ ETH_LINK_SPEED_10G |
+ ETH_LINK_SPEED_20G |
+ ETH_LINK_SPEED_40G |
+ ETH_LINK_SPEED_56G;
priv_unlock(priv);
}
+static const uint32_t *
+mlx4_dev_supported_ptypes_get(struct rte_eth_dev *dev)
+{
+ static const uint32_t ptypes[] = {
+ /* refers to rxq_cq_to_pkt_type() */
+ RTE_PTYPE_L3_IPV4,
+ RTE_PTYPE_L3_IPV6,
+ RTE_PTYPE_INNER_L3_IPV4,
+ RTE_PTYPE_INNER_L3_IPV6,
+ RTE_PTYPE_UNKNOWN
+ };
+
+ if (dev->rx_pkt_burst == mlx4_rx_burst ||
+ dev->rx_pkt_burst == mlx4_rx_burst_sp)
+ return ptypes;
+ return NULL;
+}
+
/**
* DPDK callback to get device statistics.
*
static void
mlx4_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
{
- struct priv *priv = dev->data->dev_private;
+ struct priv *priv = mlx4_get_priv(dev);
struct rte_eth_stats tmp = {0};
unsigned int i;
unsigned int idx;
+ if (priv == NULL)
+ return;
priv_lock(priv);
/* Add software counters. */
for (i = 0; (i != priv->rxqs_n); ++i) {
static void
mlx4_stats_reset(struct rte_eth_dev *dev)
{
- struct priv *priv = dev->data->dev_private;
+ struct priv *priv = mlx4_get_priv(dev);
unsigned int i;
unsigned int idx;
+ if (priv == NULL)
+ return;
priv_lock(priv);
for (i = 0; (i != priv->rxqs_n); ++i) {
if ((*priv->rxqs)[i] == NULL)
for (i = 0; (i != priv->txqs_n); ++i) {
if ((*priv->txqs)[i] == NULL)
continue;
- idx = (*priv->rxqs)[i]->stats.idx;
+ idx = (*priv->txqs)[i]->stats.idx;
(*priv->txqs)[i]->stats =
(struct mlx4_txq_stats){ .idx = idx };
}
{
struct priv *priv = dev->data->dev_private;
+ if (mlx4_is_secondary())
+ return;
priv_lock(priv);
DEBUG("%p: removing MAC address from index %" PRIu32,
(void *)dev, index);
- if (index >= MLX4_MAX_MAC_ADDRESSES)
- goto end;
- /* Refuse to remove the broadcast address, this one is special. */
- if (!memcmp(priv->mac[index].addr_bytes, "\xff\xff\xff\xff\xff\xff",
- ETHER_ADDR_LEN))
+ /* Last array entry is reserved for broadcast. */
+ if (index >= (elemof(priv->mac) - 1))
goto end;
priv_mac_addr_del(priv, index);
end:
{
struct priv *priv = dev->data->dev_private;
+ if (mlx4_is_secondary())
+ return;
(void)vmdq;
priv_lock(priv);
DEBUG("%p: adding MAC address at index %" PRIu32,
(void *)dev, index);
- if (index >= MLX4_MAX_MAC_ADDRESSES)
- goto end;
- /* Refuse to add the broadcast address, this one is special. */
- if (!memcmp(mac_addr->addr_bytes, "\xff\xff\xff\xff\xff\xff",
- ETHER_ADDR_LEN))
+ /* Last array entry is reserved for broadcast. */
+ if (index >= (elemof(priv->mac) - 1))
goto end;
priv_mac_addr_add(priv, index,
(const uint8_t (*)[ETHER_ADDR_LEN])
priv_unlock(priv);
}
+/**
+ * DPDK callback to set the primary MAC address.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param mac_addr
+ * MAC address to register.
+ */
+static void
+mlx4_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
+{
+ DEBUG("%p: setting primary MAC address", (void *)dev);
+ mlx4_mac_addr_remove(dev, 0);
+ mlx4_mac_addr_add(dev, mac_addr, 0, 0);
+}
+
/**
* DPDK callback to enable promiscuous mode.
*
unsigned int i;
int ret;
+ if (mlx4_is_secondary())
+ return;
priv_lock(priv);
if (priv->promisc) {
priv_unlock(priv);
struct priv *priv = dev->data->dev_private;
unsigned int i;
+ if (mlx4_is_secondary())
+ return;
priv_lock(priv);
if (!priv->promisc) {
priv_unlock(priv);
unsigned int i;
int ret;
+ if (mlx4_is_secondary())
+ return;
priv_lock(priv);
if (priv->allmulti) {
priv_unlock(priv);
struct priv *priv = dev->data->dev_private;
unsigned int i;
+ if (mlx4_is_secondary())
+ return;
priv_lock(priv);
if (!priv->allmulti) {
priv_unlock(priv);
static int
mlx4_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete)
{
- struct priv *priv = dev->data->dev_private;
+ struct priv *priv = mlx4_get_priv(dev);
struct ethtool_cmd edata = {
.cmd = ETHTOOL_GSET
};
struct rte_eth_link dev_link;
int link_speed = 0;
+ if (priv == NULL)
+ return -EINVAL;
(void)wait_to_complete;
if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
memset(&dev_link, 0, sizeof(dev_link));
dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
(ifr.ifr_flags & IFF_RUNNING));
- ifr.ifr_data = &edata;
+ ifr.ifr_data = (void *)&edata;
if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s",
strerror(errno));
dev_link.link_speed = link_speed;
dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ?
ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
+ dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
+ ETH_LINK_SPEED_FIXED);
if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) {
/* Link status changed. */
dev->data->dev_link = dev_link;
static int
mlx4_link_update(struct rte_eth_dev *dev, int wait_to_complete)
{
- struct priv *priv = dev->data->dev_private;
+ struct priv *priv = mlx4_get_priv(dev);
int ret;
+ if (priv == NULL)
+ return -EINVAL;
priv_lock(priv);
ret = mlx4_link_update_unlocked(dev, wait_to_complete);
priv_unlock(priv);
uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) =
mlx4_rx_burst;
+ if (mlx4_is_secondary())
+ return -E_RTE_SECONDARY;
priv_lock(priv);
/* Set kernel interface MTU first. */
if (priv_set_mtu(priv, mtu)) {
};
int ret;
- ifr.ifr_data = ðpause;
+ if (mlx4_is_secondary())
+ return -E_RTE_SECONDARY;
+ ifr.ifr_data = (void *)ðpause;
priv_lock(priv);
if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
ret = errno;
};
int ret;
- ifr.ifr_data = ðpause;
+ if (mlx4_is_secondary())
+ return -E_RTE_SECONDARY;
+ ifr.ifr_data = (void *)ðpause;
ethpause.autoneg = fc_conf->autoneg;
if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
(fc_conf->mode & RTE_FC_RX_PAUSE))
struct priv *priv = dev->data->dev_private;
int ret;
+ if (mlx4_is_secondary())
+ return -E_RTE_SECONDARY;
priv_lock(priv);
ret = vlan_filter_set(dev, vlan_id, on);
priv_unlock(priv);
.stats_reset = mlx4_stats_reset,
.queue_stats_mapping_set = NULL,
.dev_infos_get = mlx4_dev_infos_get,
+ .dev_supported_ptypes_get = mlx4_dev_supported_ptypes_get,
.vlan_filter_set = mlx4_vlan_filter_set,
.vlan_tpid_set = NULL,
.vlan_strip_queue_set = NULL,
.priority_flow_ctrl_set = NULL,
.mac_addr_remove = mlx4_mac_addr_remove,
.mac_addr_add = mlx4_mac_addr_add,
+ .mac_addr_set = mlx4_mac_addr_set,
.mtu_set = mlx4_dev_set_mtu,
- .udp_tunnel_add = NULL,
- .udp_tunnel_del = NULL,
};
/**
return atoi(val);
}
+static void
+mlx4_dev_link_status_handler(void *);
+static void
+mlx4_dev_interrupt_handler(struct rte_intr_handle *, void *);
+
+/**
+ * Link status handler.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param dev
+ * Pointer to the rte_eth_dev structure.
+ *
+ * @return
+ * Nonzero if the callback process can be called immediately.
+ */
+static int
+priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev)
+{
+ struct ibv_async_event event;
+ int port_change = 0;
+ int ret = 0;
+
+ /* Read all message and acknowledge them. */
+ for (;;) {
+ if (ibv_get_async_event(priv->ctx, &event))
+ break;
+
+ if (event.event_type == IBV_EVENT_PORT_ACTIVE ||
+ event.event_type == IBV_EVENT_PORT_ERR)
+ port_change = 1;
+ else
+ DEBUG("event type %d on port %d not handled",
+ event.event_type, event.element.port_num);
+ ibv_ack_async_event(&event);
+ }
+
+ if (port_change ^ priv->pending_alarm) {
+ struct rte_eth_link *link = &dev->data->dev_link;
+
+ priv->pending_alarm = 0;
+ mlx4_link_update_unlocked(dev, 0);
+ if (((link->link_speed == 0) && link->link_status) ||
+ ((link->link_speed != 0) && !link->link_status)) {
+ /* Inconsistent status, check again later. */
+ priv->pending_alarm = 1;
+ rte_eal_alarm_set(MLX4_ALARM_TIMEOUT_US,
+ mlx4_dev_link_status_handler,
+ dev);
+ } else
+ ret = 1;
+ }
+ return ret;
+}
+
+/**
+ * Handle delayed link status event.
+ *
+ * @param arg
+ * Registered argument.
+ */
+static void
+mlx4_dev_link_status_handler(void *arg)
+{
+ struct rte_eth_dev *dev = arg;
+ struct priv *priv = dev->data->dev_private;
+ int ret;
+
+ priv_lock(priv);
+ assert(priv->pending_alarm == 1);
+ ret = priv_dev_link_status_handler(priv, dev);
+ priv_unlock(priv);
+ if (ret)
+ _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC);
+}
+
+/**
+ * Handle interrupts from the NIC.
+ *
+ * @param[in] intr_handle
+ * Interrupt handler.
+ * @param cb_arg
+ * Callback argument.
+ */
+static void
+mlx4_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *cb_arg)
+{
+ struct rte_eth_dev *dev = cb_arg;
+ struct priv *priv = dev->data->dev_private;
+ int ret;
+
+ (void)intr_handle;
+ priv_lock(priv);
+ ret = priv_dev_link_status_handler(priv, dev);
+ priv_unlock(priv);
+ if (ret)
+ _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC);
+}
+
+/**
+ * Uninstall interrupt handler.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param dev
+ * Pointer to the rte_eth_dev structure.
+ */
+static void
+priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev)
+{
+ if (!dev->data->dev_conf.intr_conf.lsc)
+ return;
+ rte_intr_callback_unregister(&priv->intr_handle,
+ mlx4_dev_interrupt_handler,
+ dev);
+ if (priv->pending_alarm)
+ rte_eal_alarm_cancel(mlx4_dev_link_status_handler, dev);
+ priv->pending_alarm = 0;
+ priv->intr_handle.fd = 0;
+ priv->intr_handle.type = 0;
+}
+
+/**
+ * Install interrupt handler.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param dev
+ * Pointer to the rte_eth_dev structure.
+ */
+static void
+priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev)
+{
+ int rc, flags;
+
+ if (!dev->data->dev_conf.intr_conf.lsc)
+ return;
+ assert(priv->ctx->async_fd > 0);
+ flags = fcntl(priv->ctx->async_fd, F_GETFL);
+ rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
+ if (rc < 0) {
+ INFO("failed to change file descriptor async event queue");
+ dev->data->dev_conf.intr_conf.lsc = 0;
+ } else {
+ priv->intr_handle.fd = priv->ctx->async_fd;
+ priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
+ rte_intr_callback_register(&priv->intr_handle,
+ mlx4_dev_interrupt_handler,
+ dev);
+ }
+}
+
static struct eth_driver mlx4_driver;
/**
struct ibv_port_attr port_attr;
struct ibv_pd *pd = NULL;
struct priv *priv = NULL;
- struct rte_eth_dev *eth_dev;
+ struct rte_eth_dev *eth_dev = NULL;
#ifdef HAVE_EXP_QUERY_DEVICE
struct ibv_exp_device_attr exp_device_attr;
#endif /* HAVE_EXP_QUERY_DEVICE */
ERROR("port query failed: %s", strerror(err));
goto port_error;
}
+
+ if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
+ ERROR("port %d is not configured in Ethernet mode",
+ port);
+ goto port_error;
+ }
+
if (port_attr.state != IBV_PORT_ACTIVE)
- WARN("bad state for port %d: \"%s\" (%d)",
- port, ibv_port_state_str(port_attr.state),
- port_attr.state);
+ DEBUG("port %d is not active: \"%s\" (%d)",
+ port, ibv_port_state_str(port_attr.state),
+ port_attr.state);
/* Allocate protection domain. */
pd = ibv_alloc_pd(ctx);
claim_zero(priv_mac_addr_add(priv, 0,
(const uint8_t (*)[ETHER_ADDR_LEN])
mac.addr_bytes));
- claim_zero(priv_mac_addr_add(priv, 1,
+ claim_zero(priv_mac_addr_add(priv, (elemof(priv->mac) - 1),
&(const uint8_t [ETHER_ADDR_LEN])
{ "\xff\xff\xff\xff\xff\xff" }));
#ifndef NDEBUG
goto port_error;
}
- eth_dev->data->dev_private = priv;
+ /* Secondary processes have to use local storage for their
+ * private data as well as a copy of eth_dev->data, but this
+ * pointer must not be modified before burst functions are
+ * actually called. */
+ if (mlx4_is_secondary()) {
+ struct mlx4_secondary_data *sd =
+ &mlx4_secondary_data[eth_dev->data->port_id];
+
+ sd->primary_priv = eth_dev->data->dev_private;
+ if (sd->primary_priv == NULL) {
+ ERROR("no private data for port %u",
+ eth_dev->data->port_id);
+ err = EINVAL;
+ goto port_error;
+ }
+ sd->shared_dev_data = eth_dev->data;
+ rte_spinlock_init(&sd->lock);
+ memcpy(sd->data.name, sd->shared_dev_data->name,
+ sizeof(sd->data.name));
+ sd->data.dev_private = priv;
+ sd->data.rx_mbuf_alloc_failed = 0;
+ sd->data.mtu = ETHER_MTU;
+ sd->data.port_id = sd->shared_dev_data->port_id;
+ sd->data.mac_addrs = priv->mac;
+ eth_dev->tx_pkt_burst = mlx4_tx_burst_secondary_setup;
+ eth_dev->rx_pkt_burst = mlx4_rx_burst_secondary_setup;
+ } else {
+ eth_dev->data->dev_private = priv;
+ eth_dev->data->rx_mbuf_alloc_failed = 0;
+ eth_dev->data->mtu = ETHER_MTU;
+ eth_dev->data->mac_addrs = priv->mac;
+ }
eth_dev->pci_dev = pci_dev;
+
+ rte_eth_copy_pci_info(eth_dev, pci_dev);
+
eth_dev->driver = &mlx4_driver;
- eth_dev->data->rx_mbuf_alloc_failed = 0;
- eth_dev->data->mtu = ETHER_MTU;
priv->dev = eth_dev;
eth_dev->dev_ops = &mlx4_dev_ops;
- eth_dev->data->mac_addrs = priv->mac;
+ TAILQ_INIT(ð_dev->link_intr_cbs);
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
claim_zero(ibv_dealloc_pd(pd));
if (ctx)
claim_zero(ibv_close_device(ctx));
+ if (eth_dev)
+ rte_eth_dev_release_port(eth_dev);
break;
}
.name = MLX4_DRIVER_NAME,
.id_table = mlx4_pci_id_map,
.devinit = mlx4_pci_devinit,
+ .drv_flags = RTE_PCI_DRV_INTR_LSC,
},
.dev_private_size = sizeof(struct priv)
};
{
(void)name;
(void)args;
+
+ RTE_BUILD_BUG_ON(sizeof(wr_id_t) != sizeof(uint64_t));
/*
* RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
* huge pages. Calling ibv_fork_init() during init allows