#include <rte_mempool.h>
#include <rte_prefetch.h>
#include <rte_malloc.h>
-#include <rte_spinlock.h>
-#include <rte_log.h>
#include <rte_alarm.h>
#include <rte_memory.h>
#include <rte_flow.h>
#include <rte_kvargs.h>
#include <rte_interrupts.h>
#include <rte_branch_prediction.h>
+#include <rte_common.h>
/* Generated configuration header. */
#include "mlx4_autoconf.h"
/* PMD headers. */
#include "mlx4.h"
#include "mlx4_flow.h"
-
-/* Convenience macros for accessing mbuf fields. */
-#define NEXT(m) ((m)->next)
-#define DATA_LEN(m) ((m)->data_len)
-#define PKT_LEN(m) ((m)->pkt_len)
-#define DATA_OFF(m) ((m)->data_off)
-#define SET_DATA_OFF(m, o) ((m)->data_off = (o))
-#define NB_SEGS(m) ((m)->nb_segs)
-#define PORT(m) ((m)->port)
+#include "mlx4_utils.h"
/** Configuration structure for device arguments. */
struct mlx4_conf {
static void
priv_rx_intr_vec_disable(struct priv *priv);
-/**
- * Lock private structure to protect it from concurrent access in the
- * control path.
- *
- * @param priv
- * Pointer to private structure.
- */
-void priv_lock(struct priv *priv)
-{
- rte_spinlock_lock(&priv->lock);
-}
-
-/**
- * Unlock private structure.
- *
- * @param priv
- * Pointer to private structure.
- */
-void priv_unlock(struct priv *priv)
-{
- rte_spinlock_unlock(&priv->lock);
-}
-
/* Allocate a buffer on the stack and fill it with a printf format string. */
#define MKSTR(name, ...) \
char name[snprintf(NULL, 0, __VA_ARGS__) + 1]; \
}
/**
- * Set device MTU.
+ * DPDK callback to change the MTU.
*
* @param priv
- * Pointer to private structure.
+ * Pointer to Ethernet device structure.
* @param mtu
* MTU value to set.
*
* 0 on success, negative errno value otherwise and rte_errno is set.
*/
static int
-priv_set_mtu(struct priv *priv, uint16_t mtu)
+mlx4_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
{
+ struct priv *priv = dev->data->dev_private;
uint16_t new_mtu;
int ret = priv_set_sysfs_ulong(priv, "mtu", mtu);
ret = priv_get_mtu(priv, &new_mtu);
if (ret)
return ret;
- if (new_mtu == mtu)
+ if (new_mtu == mtu) {
+ priv->mtu = mtu;
return 0;
+ }
rte_errno = EINVAL;
return -rte_errno;
}
priv_mac_addr_del(struct priv *priv);
/**
- * Ethernet device configuration.
+ * DPDK callback for Ethernet device configuration.
*
* Prepare the driver for a given number of TX and RX queues.
*
* 0 on success, negative errno value otherwise and rte_errno is set.
*/
static int
-dev_configure(struct rte_eth_dev *dev)
+mlx4_dev_configure(struct rte_eth_dev *dev)
{
struct priv *priv = dev->data->dev_private;
unsigned int rxqs_n = dev->data->nb_rx_queues;
return 0;
}
-/**
- * DPDK callback for Ethernet device configuration.
- *
- * @param dev
- * Pointer to Ethernet device structure.
- *
- * @return
- * 0 on success, negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_dev_configure(struct rte_eth_dev *dev)
-{
- struct priv *priv = dev->data->dev_private;
- int ret;
-
- priv_lock(priv);
- ret = dev_configure(dev);
- priv_unlock(priv);
- return ret;
-}
-
static uint16_t mlx4_tx_burst(void *, struct rte_mbuf **, uint16_t);
static uint16_t removed_rx_burst(void *, struct rte_mbuf **, uint16_t);
claim_zero(ibv_destroy_qp(txq->qp));
if (txq->cq != NULL)
claim_zero(ibv_destroy_cq(txq->cq));
- for (i = 0; (i != elemof(txq->mp2mr)); ++i) {
+ for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
if (txq->mp2mr[i].mp == NULL)
break;
assert(txq->mp2mr[i].mr != NULL);
unsigned int i;
struct ibv_mr *mr;
- for (i = 0; (i != elemof(txq->mp2mr)); ++i) {
+ for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
if (unlikely(txq->mp2mr[i].mp == NULL)) {
/* Unknown MP, add a new MR for it. */
break;
(void *)txq);
return (uint32_t)-1;
}
- if (unlikely(i == elemof(txq->mp2mr))) {
+ if (unlikely(i == RTE_DIM(txq->mp2mr))) {
/* Table is full, remove oldest entry. */
DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
(void *)txq);
struct txq_elt *elt_next = &(*txq->elts)[elts_head_next];
struct txq_elt *elt = &(*txq->elts)[elts_head];
struct ibv_send_wr *wr = &elt->wr;
- unsigned int segs = NB_SEGS(buf);
+ unsigned int segs = buf->nb_segs;
unsigned int sent_size = 0;
uint32_t send_flags = 0;
#endif
/* Faster than rte_pktmbuf_free(). */
do {
- struct rte_mbuf *next = NEXT(tmp);
+ struct rte_mbuf *next = tmp->next;
rte_pktmbuf_free_seg(tmp);
tmp = next;
/* Retrieve buffer information. */
addr = rte_pktmbuf_mtod(buf, uintptr_t);
- length = DATA_LEN(buf);
+ length = buf->data_len;
/* Retrieve Memory Region key for this memory pool. */
lkey = txq_mp2mr(txq, txq_mb2mp(buf));
if (unlikely(lkey == (uint32_t)-1)) {
struct txq *txq = (*priv->txqs)[idx];
int ret;
- priv_lock(priv);
DEBUG("%p: configuring queue %u for %u descriptors",
(void *)dev, idx, desc);
if (idx >= priv->txqs_n) {
rte_errno = EOVERFLOW;
ERROR("%p: queue index out of range (%u >= %u)",
(void *)dev, idx, priv->txqs_n);
- priv_unlock(priv);
return -rte_errno;
}
if (txq != NULL) {
(void *)dev, idx, (void *)txq);
if (priv->started) {
rte_errno = EEXIST;
- priv_unlock(priv);
return -rte_errno;
}
(*priv->txqs)[idx] = NULL;
rte_errno = ENOMEM;
ERROR("%p: unable to allocate queue index %u",
(void *)dev, idx);
- priv_unlock(priv);
return -rte_errno;
}
}
/* Update send callback. */
dev->tx_pkt_burst = mlx4_tx_burst;
}
- priv_unlock(priv);
return ret;
}
if (txq == NULL)
return;
priv = txq->priv;
- priv_lock(priv);
for (i = 0; (i != priv->txqs_n); ++i)
if ((*priv->txqs)[i] == txq) {
DEBUG("%p: removing TX queue %p from list",
}
txq_cleanup(txq);
rte_free(txq);
- priv_unlock(priv);
}
/* RX queues handling. */
wr->sg_list = sge;
wr->num_sge = 1;
/* Headroom is reserved by rte_pktmbuf_alloc(). */
- assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
+ assert(buf->data_off == RTE_PKTMBUF_HEADROOM);
/* Buffer is supposed to be empty. */
assert(rte_pktmbuf_data_len(buf) == 0);
assert(rte_pktmbuf_pkt_len(buf) == 0);
return 0;
error:
if (elts != NULL) {
- for (i = 0; (i != elemof(*elts)); ++i)
+ for (i = 0; (i != RTE_DIM(*elts)); ++i)
rte_pktmbuf_free_seg((*elts)[i].buf);
rte_free(elts);
}
rxq->elts = NULL;
if (elts == NULL)
return;
- for (i = 0; (i != elemof(*elts)); ++i)
+ for (i = 0; (i != RTE_DIM(*elts)); ++i)
rte_pktmbuf_free_seg((*elts)[i].buf);
rte_free(elts);
}
assert(elt->sge.lkey == rxq->mr->lkey);
elt->buf = rep;
/* Update seg information. */
- SET_DATA_OFF(seg, RTE_PKTMBUF_HEADROOM);
- NB_SEGS(seg) = 1;
- PORT(seg) = rxq->port_id;
- NEXT(seg) = NULL;
- PKT_LEN(seg) = len;
- DATA_LEN(seg) = len;
+ seg->data_off = RTE_PKTMBUF_HEADROOM;
+ seg->nb_segs = 1;
+ seg->port = rxq->port_id;
+ seg->next = NULL;
+ seg->pkt_len = len;
+ seg->data_len = len;
seg->packet_type = 0;
seg->ol_flags = 0;
/* Return packet. */
struct rxq *rxq = (*priv->rxqs)[idx];
int ret;
- priv_lock(priv);
DEBUG("%p: configuring queue %u for %u descriptors",
(void *)dev, idx, desc);
if (idx >= priv->rxqs_n) {
rte_errno = EOVERFLOW;
ERROR("%p: queue index out of range (%u >= %u)",
(void *)dev, idx, priv->rxqs_n);
- priv_unlock(priv);
return -rte_errno;
}
if (rxq != NULL) {
(void *)dev, idx, (void *)rxq);
if (priv->started) {
rte_errno = EEXIST;
- priv_unlock(priv);
return -rte_errno;
}
(*priv->rxqs)[idx] = NULL;
rte_errno = ENOMEM;
ERROR("%p: unable to allocate queue index %u",
(void *)dev, idx);
- priv_unlock(priv);
return -rte_errno;
}
}
/* Update receive callback. */
dev->rx_pkt_burst = mlx4_rx_burst;
}
- priv_unlock(priv);
return ret;
}
if (rxq == NULL)
return;
priv = rxq->priv;
- priv_lock(priv);
for (i = 0; (i != priv->rxqs_n); ++i)
if ((*priv->rxqs)[i] == rxq) {
DEBUG("%p: removing RX queue %p from list",
}
rxq_cleanup(rxq);
rte_free(rxq);
- priv_unlock(priv);
}
static int
struct priv *priv = dev->data->dev_private;
int ret;
- priv_lock(priv);
- if (priv->started) {
- priv_unlock(priv);
+ if (priv->started)
return 0;
- }
DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
priv->started = 1;
ret = priv_mac_addr_add(priv);
(void *)dev, strerror(ret));
goto err;
}
- priv_unlock(priv);
return 0;
err:
/* Rollback. */
priv_mac_addr_del(priv);
priv->started = 0;
- priv_unlock(priv);
return ret;
}
{
struct priv *priv = dev->data->dev_private;
- priv_lock(priv);
- if (!priv->started) {
- priv_unlock(priv);
+ if (!priv->started)
return;
- }
DEBUG("%p: detaching flows from all RX queues", (void *)dev);
priv->started = 0;
mlx4_priv_flow_stop(priv);
priv_mac_addr_del(priv);
- priv_unlock(priv);
}
/**
if (priv == NULL)
return;
- priv_lock(priv);
DEBUG("%p: closing device \"%s\"",
(void *)dev,
((priv->ctx != NULL) ? priv->ctx->device->name : ""));
priv_dev_removal_interrupt_handler_uninstall(priv, dev);
priv_dev_link_interrupt_handler_uninstall(priv, dev);
priv_rx_intr_vec_disable(priv);
- priv_unlock(priv);
memset(priv, 0, sizeof(*priv));
}
mlx4_set_link_down(struct rte_eth_dev *dev)
{
struct priv *priv = dev->data->dev_private;
- int err;
- priv_lock(priv);
- err = priv_set_link(priv, 0);
- priv_unlock(priv);
- return err;
+ return priv_set_link(priv, 0);
}
/**
mlx4_set_link_up(struct rte_eth_dev *dev)
{
struct priv *priv = dev->data->dev_private;
- int err;
- priv_lock(priv);
- err = priv_set_link(priv, 1);
- priv_unlock(priv);
- return err;
+ return priv_set_link(priv, 1);
}
/**
info->pci_dev = RTE_ETH_DEV_TO_PCI(dev);
if (priv == NULL)
return;
- priv_lock(priv);
/* FIXME: we should ask the device for these values. */
info->min_rx_bufsize = 32;
info->max_rx_pktlen = 65536;
ETH_LINK_SPEED_20G |
ETH_LINK_SPEED_40G |
ETH_LINK_SPEED_56G;
- priv_unlock(priv);
}
/**
if (priv == NULL)
return;
- priv_lock(priv);
/* Add software counters. */
for (i = 0; (i != priv->rxqs_n); ++i) {
struct rxq *rxq = (*priv->rxqs)[i];
tmp.oerrors += txq->stats.odropped;
}
*stats = tmp;
- priv_unlock(priv);
}
/**
if (priv == NULL)
return;
- priv_lock(priv);
for (i = 0; (i != priv->rxqs_n); ++i) {
if ((*priv->rxqs)[i] == NULL)
continue;
(*priv->txqs)[i]->stats =
(struct mlx4_txq_stats){ .idx = idx };
}
- priv_unlock(priv);
}
/**
struct rte_eth_link dev_link;
int link_speed = 0;
- /* priv_lock() is not taken to allow concurrent calls. */
if (priv == NULL) {
rte_errno = EINVAL;
return -rte_errno;
return 0;
}
-/**
- * DPDK callback to change the MTU.
- *
- * @param dev
- * Pointer to Ethernet device structure.
- * @param in_mtu
- * New MTU.
- *
- * @return
- * 0 on success, negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
-{
- struct priv *priv = dev->data->dev_private;
- int ret = 0;
-
- priv_lock(priv);
- /* Set kernel interface MTU first. */
- if (priv_set_mtu(priv, mtu)) {
- ret = rte_errno;
- WARN("cannot set port %u MTU to %u: %s", priv->port, mtu,
- strerror(rte_errno));
- goto out;
- } else
- DEBUG("adapter port %u MTU set to %u", priv->port, mtu);
- priv->mtu = mtu;
-out:
- priv_unlock(priv);
- assert(ret >= 0);
- return -ret;
-}
-
/**
* DPDK callback to get flow control status.
*
int ret;
ifr.ifr_data = (void *)ðpause;
- priv_lock(priv);
if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
ret = rte_errno;
WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)"
fc_conf->mode = RTE_FC_NONE;
ret = 0;
out:
- priv_unlock(priv);
assert(ret >= 0);
return -ret;
}
ethpause.tx_pause = 1;
else
ethpause.tx_pause = 0;
- priv_lock(priv);
if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
ret = rte_errno;
WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
}
ret = 0;
out:
- priv_unlock(priv);
assert(ret >= 0);
return -ret;
}
uint32_t events;
int ret;
- priv_lock(priv);
assert(priv->pending_alarm == 1);
priv->pending_alarm = 0;
ret = priv_dev_status_handler(priv, dev, &events);
- priv_unlock(priv);
if (ret > 0 && events & (1 << RTE_ETH_EVENT_INTR_LSC))
_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC,
NULL, NULL);
uint32_t ev;
int i;
- priv_lock(priv);
ret = priv_dev_status_handler(priv, dev, &ev);
- priv_unlock(priv);
if (ret > 0) {
for (i = RTE_ETH_EVENT_UNKNOWN;
i < RTE_ETH_EVENT_MAX;
ERROR("rte_intr_callback_unregister failed with %d %s",
ret, strerror(rte_errno));
}
- priv->intr_handle.fd = 0;
- priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ priv->intr_handle.fd = -1;
return ret;
}
return -rte_errno;
} else {
priv->intr_handle.fd = priv->ctx->async_fd;
- priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
rc = rte_intr_callback_register(&priv->intr_handle,
mlx4_dev_interrupt_handler,
dev);
rte_errno = -rc;
ERROR("rte_intr_callback_register failed "
" (rte_errno: %s)", strerror(rte_errno));
+ priv->intr_handle.fd = -1;
return -rte_errno;
}
}
unsigned int rxqs_n = priv->rxqs_n;
unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
unsigned int count = 0;
- struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
+ struct rte_intr_handle *intr_handle = &priv->intr_handle;
if (!priv->dev->data->dev_conf.intr_conf.rxq)
return 0;
" Rx interrupts will not be supported");
return -rte_errno;
}
- intr_handle->type = RTE_INTR_HANDLE_EXT;
for (i = 0; i != n; ++i) {
struct rxq *rxq = (*priv->rxqs)[i];
int fd;
static void
priv_rx_intr_vec_disable(struct priv *priv)
{
- struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
+ struct rte_intr_handle *intr_handle = &priv->intr_handle;
rte_intr_free_epoll_fd(intr_handle);
free(intr_handle->intr_vec);
eth_dev->device = &pci_dev->device;
rte_eth_copy_pci_info(eth_dev, pci_dev);
eth_dev->device->driver = &mlx4_driver.driver;
+ /* Initialize local interrupt handle for current port. */
+ priv->intr_handle = (struct rte_intr_handle){
+ .fd = -1,
+ .type = RTE_INTR_HANDLE_EXT,
+ };
/*
- * Copy and override interrupt handle to prevent it from
- * being shared between all ethdev instances of a given PCI
- * device. This is required to properly handle Rx interrupts
- * on all ports.
+ * Override ethdev interrupt handle pointer with private
+ * handle instead of that of the parent PCI device used by
+ * default. This prevents it from being shared between all
+ * ports of the same PCI device since each of them is
+ * associated its own Verbs context.
+ *
+ * Rx interrupts in particular require this as the PMD has
+ * no control over the registration of queue interrupts
+ * besides setting up eth_dev->intr_handle, the rest is
+ * handled by rte_intr_rx_ctl().
*/
- priv->intr_handle_dev = *eth_dev->intr_handle;
- eth_dev->intr_handle = &priv->intr_handle_dev;
+ eth_dev->intr_handle = &priv->intr_handle;
priv->dev = eth_dev;
eth_dev->dev_ops = &mlx4_dev_ops;
eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;