net/mlx4: remove Tx inline compilation option
[dpdk.git] / drivers / net / mlx4 / mlx4.c
index 055de49..394b87c 100644 (file)
@@ -1,8 +1,8 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright 2012-2017 6WIND S.A.
- *   Copyright 2012-2017 Mellanox.
+ *   Copyright 2012 6WIND S.A.
+ *   Copyright 2012 Mellanox
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -34,7 +34,6 @@
 /*
  * Known limitations:
  * - RSS hash key and options cannot be modified.
- * - Hardware counters aren't implemented.
  */
 
 /* System headers. */
 #include <rte_malloc.h>
 #include <rte_spinlock.h>
 #include <rte_atomic.h>
-#include <rte_version.h>
 #include <rte_log.h>
 #include <rte_alarm.h>
 #include <rte_memory.h>
 #include <rte_flow.h>
 #include <rte_kvargs.h>
 #include <rte_interrupts.h>
+#include <rte_branch_prediction.h>
 
 /* Generated configuration header. */
 #include "mlx4_autoconf.h"
@@ -110,16 +109,12 @@ typedef union {
         (((val) & (from)) / ((from) / (to))) : \
         (((val) & (from)) * ((to) / (from))))
 
-/* Local storage for secondary process data. */
-struct mlx4_secondary_data {
-       struct rte_eth_dev_data data; /* Local device data. */
-       struct priv *primary_priv; /* Private structure from primary. */
-       struct rte_eth_dev_data *shared_dev_data; /* Shared device data. */
-       rte_spinlock_t lock; /* Port configuration lock. */
-} mlx4_secondary_data[RTE_MAX_ETHPORTS];
-
+/** Configuration structure for device arguments. */
 struct mlx4_conf {
-       uint8_t active_ports;
+       struct {
+               uint32_t present; /**< Bit-field for existing ports. */
+               uint32_t enabled; /**< Bit-field for user-enabled ports. */
+       } ports;
 };
 
 /* Available parameters list. */
@@ -140,38 +135,6 @@ priv_rx_intr_vec_enable(struct priv *priv);
 static void
 priv_rx_intr_vec_disable(struct priv *priv);
 
-/**
- * Check if running as a secondary process.
- *
- * @return
- *   Nonzero if running as a secondary process.
- */
-static inline int
-mlx4_is_secondary(void)
-{
-       return rte_eal_process_type() != RTE_PROC_PRIMARY;
-}
-
-/**
- * Return private structure associated with an Ethernet device.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- *
- * @return
- *   Pointer to private structure.
- */
-static struct priv *
-mlx4_get_priv(struct rte_eth_dev *dev)
-{
-       struct mlx4_secondary_data *sd;
-
-       if (!mlx4_is_secondary())
-               return dev->data->dev_private;
-       sd = &mlx4_secondary_data[dev->data->port_id];
-       return sd->data.dev_private;
-}
-
 /**
  * Lock private structure to protect it from concurrent access in the
  * control path.
@@ -730,8 +693,6 @@ mlx4_dev_configure(struct rte_eth_dev *dev)
        struct priv *priv = dev->data->dev_private;
        int ret;
 
-       if (mlx4_is_secondary())
-               return -E_RTE_SECONDARY;
        priv_lock(priv);
        ret = dev_configure(dev);
        assert(ret >= 0);
@@ -742,157 +703,6 @@ mlx4_dev_configure(struct rte_eth_dev *dev)
 static uint16_t mlx4_tx_burst(void *, struct rte_mbuf **, uint16_t);
 static uint16_t removed_rx_burst(void *, struct rte_mbuf **, uint16_t);
 
-/**
- * Configure secondary process queues from a private data pointer (primary
- * or secondary) and update burst callbacks. Can take place only once.
- *
- * All queues must have been previously created by the primary process to
- * avoid undefined behavior.
- *
- * @param priv
- *   Private data pointer from either primary or secondary process.
- *
- * @return
- *   Private data pointer from secondary process, NULL in case of error.
- */
-static struct priv *
-mlx4_secondary_data_setup(struct priv *priv)
-{
-       unsigned int port_id = 0;
-       struct mlx4_secondary_data *sd;
-       void **tx_queues;
-       void **rx_queues;
-       unsigned int nb_tx_queues;
-       unsigned int nb_rx_queues;
-       unsigned int i;
-
-       /* priv must be valid at this point. */
-       assert(priv != NULL);
-       /* priv->dev must also be valid but may point to local memory from
-        * another process, possibly with the same address and must not
-        * be dereferenced yet. */
-       assert(priv->dev != NULL);
-       /* Determine port ID by finding out where priv comes from. */
-       while (1) {
-               sd = &mlx4_secondary_data[port_id];
-               rte_spinlock_lock(&sd->lock);
-               /* Primary process? */
-               if (sd->primary_priv == priv)
-                       break;
-               /* Secondary process? */
-               if (sd->data.dev_private == priv)
-                       break;
-               rte_spinlock_unlock(&sd->lock);
-               if (++port_id == RTE_DIM(mlx4_secondary_data))
-                       port_id = 0;
-       }
-       /* Switch to secondary private structure. If private data has already
-        * been updated by another thread, there is nothing else to do. */
-       priv = sd->data.dev_private;
-       if (priv->dev->data == &sd->data)
-               goto end;
-       /* Sanity checks. Secondary private structure is supposed to point
-        * to local eth_dev, itself still pointing to the shared device data
-        * structure allocated by the primary process. */
-       assert(sd->shared_dev_data != &sd->data);
-       assert(sd->data.nb_tx_queues == 0);
-       assert(sd->data.tx_queues == NULL);
-       assert(sd->data.nb_rx_queues == 0);
-       assert(sd->data.rx_queues == NULL);
-       assert(priv != sd->primary_priv);
-       assert(priv->dev->data == sd->shared_dev_data);
-       assert(priv->txqs_n == 0);
-       assert(priv->txqs == NULL);
-       assert(priv->rxqs_n == 0);
-       assert(priv->rxqs == NULL);
-       nb_tx_queues = sd->shared_dev_data->nb_tx_queues;
-       nb_rx_queues = sd->shared_dev_data->nb_rx_queues;
-       /* Allocate local storage for queues. */
-       tx_queues = rte_zmalloc("secondary ethdev->tx_queues",
-                               sizeof(sd->data.tx_queues[0]) * nb_tx_queues,
-                               RTE_CACHE_LINE_SIZE);
-       rx_queues = rte_zmalloc("secondary ethdev->rx_queues",
-                               sizeof(sd->data.rx_queues[0]) * nb_rx_queues,
-                               RTE_CACHE_LINE_SIZE);
-       if (tx_queues == NULL || rx_queues == NULL)
-               goto error;
-       /* Lock to prevent control operations during setup. */
-       priv_lock(priv);
-       /* TX queues. */
-       for (i = 0; i != nb_tx_queues; ++i) {
-               struct txq *primary_txq = (*sd->primary_priv->txqs)[i];
-               struct txq *txq;
-
-               if (primary_txq == NULL)
-                       continue;
-               txq = rte_calloc_socket("TXQ", 1, sizeof(*txq), 0,
-                                       primary_txq->socket);
-               if (txq != NULL) {
-                       if (txq_setup(priv->dev,
-                                     txq,
-                                     primary_txq->elts_n * MLX4_PMD_SGE_WR_N,
-                                     primary_txq->socket,
-                                     NULL) == 0) {
-                               txq->stats.idx = primary_txq->stats.idx;
-                               tx_queues[i] = txq;
-                               continue;
-                       }
-                       rte_free(txq);
-               }
-               while (i) {
-                       txq = tx_queues[--i];
-                       txq_cleanup(txq);
-                       rte_free(txq);
-               }
-               goto error;
-       }
-       /* RX queues. */
-       for (i = 0; i != nb_rx_queues; ++i) {
-               struct rxq *primary_rxq = (*sd->primary_priv->rxqs)[i];
-
-               if (primary_rxq == NULL)
-                       continue;
-               /* Not supported yet. */
-               rx_queues[i] = NULL;
-       }
-       /* Update everything. */
-       priv->txqs = (void *)tx_queues;
-       priv->txqs_n = nb_tx_queues;
-       priv->rxqs = (void *)rx_queues;
-       priv->rxqs_n = nb_rx_queues;
-       sd->data.rx_queues = rx_queues;
-       sd->data.tx_queues = tx_queues;
-       sd->data.nb_rx_queues = nb_rx_queues;
-       sd->data.nb_tx_queues = nb_tx_queues;
-       sd->data.dev_link = sd->shared_dev_data->dev_link;
-       sd->data.mtu = sd->shared_dev_data->mtu;
-       memcpy(sd->data.rx_queue_state, sd->shared_dev_data->rx_queue_state,
-              sizeof(sd->data.rx_queue_state));
-       memcpy(sd->data.tx_queue_state, sd->shared_dev_data->tx_queue_state,
-              sizeof(sd->data.tx_queue_state));
-       sd->data.dev_flags = sd->shared_dev_data->dev_flags;
-       /* Use local data from now on. */
-       rte_mb();
-       priv->dev->data = &sd->data;
-       rte_mb();
-       priv->dev->tx_pkt_burst = mlx4_tx_burst;
-       priv->dev->rx_pkt_burst = removed_rx_burst;
-       priv_unlock(priv);
-end:
-       /* More sanity checks. */
-       assert(priv->dev->tx_pkt_burst == mlx4_tx_burst);
-       assert(priv->dev->rx_pkt_burst == removed_rx_burst);
-       assert(priv->dev->data == &sd->data);
-       rte_spinlock_unlock(&sd->lock);
-       return priv;
-error:
-       priv_unlock(priv);
-       rte_free(tx_queues);
-       rte_free(rx_queues);
-       rte_spinlock_unlock(&sd->lock);
-       return NULL;
-}
-
 /* TX queues handling. */
 
 /**
@@ -1099,10 +909,6 @@ txq_complete(struct txq *txq)
 
        if (unlikely(elts_comp == 0))
                return 0;
-#ifdef DEBUG_SEND
-       DEBUG("%p: processing %u work requests completions",
-             (void *)txq, elts_comp);
-#endif
        wcs_n = txq->if_cq->poll_cnt(txq->cq, elts_comp);
        if (unlikely(wcs_n == 0))
                return 0;
@@ -1370,8 +1176,6 @@ txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
        txq_mp2mr(txq, mp);
 }
 
-#if MLX4_PMD_SGE_WR_N > 1
-
 /**
  * Copy scattered mbuf contents to a single linear buffer.
  *
@@ -1518,8 +1322,6 @@ stop:
        };
 }
 
-#endif /* MLX4_PMD_SGE_WR_N > 1 */
-
 /**
  * DPDK callback for TX.
  *
@@ -1565,9 +1367,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                struct txq_elt *elt_next = &(*txq->elts)[elts_head_next];
                struct txq_elt *elt = &(*txq->elts)[elts_head];
                unsigned int segs = NB_SEGS(buf);
-#ifdef MLX4_PMD_SOFT_COUNTERS
                unsigned int sent_size = 0;
-#endif
                uint32_t send_flags = 0;
 
                /* Clean up old buffer. */
@@ -1628,7 +1428,6 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                                              (uintptr_t)addr);
                        RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);
                        /* Put packet into send queue. */
-#if MLX4_PMD_MAX_INLINE > 0
                        if (length <= txq->max_inline)
                                err = txq->if_qp->send_pending_inline
                                        (txq->qp,
@@ -1636,7 +1435,6 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                                         length,
                                         send_flags);
                        else
-#endif
                                err = txq->if_qp->send_pending
                                        (txq->qp,
                                         addr,
@@ -1645,11 +1443,8 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                                         send_flags);
                        if (unlikely(err))
                                goto stop;
-#ifdef MLX4_PMD_SOFT_COUNTERS
                        sent_size += length;
-#endif
                } else {
-#if MLX4_PMD_SGE_WR_N > 1
                        struct ibv_sge sges[MLX4_PMD_SGE_WR_N];
                        struct tx_burst_sg_ret ret;
 
@@ -1666,29 +1461,18 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                                 send_flags);
                        if (unlikely(err))
                                goto stop;
-#ifdef MLX4_PMD_SOFT_COUNTERS
                        sent_size += ret.length;
-#endif
-#else /* MLX4_PMD_SGE_WR_N > 1 */
-                       DEBUG("%p: TX scattered buffers support not"
-                             " compiled in", (void *)txq);
-                       goto stop;
-#endif /* MLX4_PMD_SGE_WR_N > 1 */
                }
                elts_head = elts_head_next;
-#ifdef MLX4_PMD_SOFT_COUNTERS
                /* Increment sent bytes counter. */
                txq->stats.obytes += sent_size;
-#endif
        }
 stop:
        /* Take a shortcut if nothing must be sent. */
        if (unlikely(i == 0))
                return 0;
-#ifdef MLX4_PMD_SOFT_COUNTERS
        /* Increment sent packets counter. */
        txq->stats.opackets += i;
-#endif
        /* Ring QP doorbell. */
        err = txq->if_qp->send_flush(txq->qp);
        if (unlikely(err)) {
@@ -1703,46 +1487,6 @@ stop:
        return i;
 }
 
-/**
- * DPDK callback for TX in secondary processes.
- *
- * This function configures all queues from primary process information
- * if necessary before reverting to the normal TX burst callback.
- *
- * @param dpdk_txq
- *   Generic pointer to TX queue structure.
- * @param[in] pkts
- *   Packets to transmit.
- * @param pkts_n
- *   Number of packets in array.
- *
- * @return
- *   Number of packets successfully transmitted (<= pkts_n).
- */
-static uint16_t
-mlx4_tx_burst_secondary_setup(void *dpdk_txq, struct rte_mbuf **pkts,
-                             uint16_t pkts_n)
-{
-       struct txq *txq = dpdk_txq;
-       struct priv *priv = mlx4_secondary_data_setup(txq->priv);
-       struct priv *primary_priv;
-       unsigned int index;
-
-       if (priv == NULL)
-               return 0;
-       primary_priv =
-               mlx4_secondary_data[priv->dev->data->port_id].primary_priv;
-       /* Look for queue index in both private structures. */
-       for (index = 0; index != priv->txqs_n; ++index)
-               if (((*primary_priv->txqs)[index] == txq) ||
-                   ((*priv->txqs)[index] == txq))
-                       break;
-       if (index == priv->txqs_n)
-               return 0;
-       txq = (*priv->txqs)[index];
-       return priv->dev->tx_pkt_burst(txq, pkts, pkts_n);
-}
-
 /**
  * Configure a TX queue.
  *
@@ -1764,7 +1508,7 @@ static int
 txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
          unsigned int socket, const struct rte_eth_txconf *conf)
 {
-       struct priv *priv = mlx4_get_priv(dev);
+       struct priv *priv = dev->data->dev_private;
        struct txq tmpl = {
                .priv = priv,
                .socket = socket
@@ -1832,9 +1576,7 @@ txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
                                          MLX4_PMD_SGE_WR_N) ?
                                         priv->device_attr.max_sge :
                                         MLX4_PMD_SGE_WR_N),
-#if MLX4_PMD_MAX_INLINE > 0
                        .max_inline_data = MLX4_PMD_MAX_INLINE,
-#endif
                },
                .qp_type = IBV_QPT_RAW_PACKET,
                /* Do *NOT* enable this, completions events are managed per
@@ -1852,10 +1594,8 @@ txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
                      (void *)dev, strerror(ret));
                goto error;
        }
-#if MLX4_PMD_MAX_INLINE > 0
        /* ibv_create_qp() updates this value. */
        tmpl.max_inline = attr.init.cap.max_inline_data;
-#endif
        attr.mod = (struct ibv_exp_qp_attr){
                /* Move the QP to this state. */
                .qp_state = IBV_QPS_INIT,
@@ -1960,8 +1700,6 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
        struct txq *txq = (*priv->txqs)[idx];
        int ret;
 
-       if (mlx4_is_secondary())
-               return -E_RTE_SECONDARY;
        priv_lock(priv);
        DEBUG("%p: configuring queue %u for %u descriptors",
              (void *)dev, idx, desc);
@@ -2017,8 +1755,6 @@ mlx4_tx_queue_release(void *dpdk_txq)
        struct priv *priv;
        unsigned int i;
 
-       if (mlx4_is_secondary())
-               return;
        if (txq == NULL)
                return;
        priv = txq->priv;
@@ -3023,10 +2759,8 @@ mlx4_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                                      " completion status (%d): %s",
                                      (void *)rxq, wc.wr_id, wc.status,
                                      ibv_wc_status_str(wc.status));
-#ifdef MLX4_PMD_SOFT_COUNTERS
                                /* Increment dropped packets counter. */
                                ++rxq->stats.idropped;
-#endif
                                /* Link completed WRs together for repost. */
                                *next = wr;
                                next = &wr->next;
@@ -3138,10 +2872,8 @@ mlx4_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                /* Return packet. */
                *(pkts++) = pkt_buf;
                ++pkts_ret;
-#ifdef MLX4_PMD_SOFT_COUNTERS
                /* Increase bytes counter. */
                rxq->stats.ibytes += pkt_buf_len;
-#endif
 repost:
                if (++elts_head >= elts_n)
                        elts_head = 0;
@@ -3151,9 +2883,6 @@ repost:
                return 0;
        *next = NULL;
        /* Repost WRs. */
-#ifdef DEBUG_RECV
-       DEBUG("%p: reposting %d WRs", (void *)rxq, i);
-#endif
        ret = ibv_post_recv(rxq->qp, head.next, &bad_wr);
        if (unlikely(ret)) {
                /* Inability to repost WRs is fatal. */
@@ -3164,10 +2893,8 @@ repost:
                abort();
        }
        rxq->elts_head = elts_head;
-#ifdef MLX4_PMD_SOFT_COUNTERS
        /* Increase packets counter. */
        rxq->stats.ipackets += pkts_ret;
-#endif
        return pkts_ret;
 }
 
@@ -3248,10 +2975,8 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                                      " completion status (%d): %s",
                                      (void *)rxq, wc.wr_id, wc.status,
                                      ibv_wc_status_str(wc.status));
-#ifdef MLX4_PMD_SOFT_COUNTERS
                                /* Increment dropped packets counter. */
                                ++rxq->stats.idropped;
-#endif
                                /* Add SGE to array for repost. */
                                sges[i] = elt->sge;
                                goto repost;
@@ -3302,10 +3027,8 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                /* Return packet. */
                *(pkts++) = seg;
                ++pkts_ret;
-#ifdef MLX4_PMD_SOFT_COUNTERS
                /* Increase bytes counter. */
                rxq->stats.ibytes += len;
-#endif
 repost:
                if (++elts_head >= elts_n)
                        elts_head = 0;
@@ -3314,9 +3037,6 @@ repost:
        if (unlikely(i == 0))
                return 0;
        /* Repost WRs. */
-#ifdef DEBUG_RECV
-       DEBUG("%p: reposting %u WRs", (void *)rxq, i);
-#endif
        ret = rxq->if_qp->recv_burst(rxq->qp, sges, i);
        if (unlikely(ret)) {
                /* Inability to repost WRs is fatal. */
@@ -3326,53 +3046,11 @@ repost:
                abort();
        }
        rxq->elts_head = elts_head;
-#ifdef MLX4_PMD_SOFT_COUNTERS
        /* Increase packets counter. */
        rxq->stats.ipackets += pkts_ret;
-#endif
        return pkts_ret;
 }
 
-/**
- * DPDK callback for RX in secondary processes.
- *
- * This function configures all queues from primary process information
- * if necessary before reverting to the normal RX burst callback.
- *
- * @param dpdk_rxq
- *   Generic pointer to RX queue structure.
- * @param[out] pkts
- *   Array to store received packets.
- * @param pkts_n
- *   Maximum number of packets in array.
- *
- * @return
- *   Number of packets successfully received (<= pkts_n).
- */
-static uint16_t
-mlx4_rx_burst_secondary_setup(void *dpdk_rxq, struct rte_mbuf **pkts,
-                             uint16_t pkts_n)
-{
-       struct rxq *rxq = dpdk_rxq;
-       struct priv *priv = mlx4_secondary_data_setup(rxq->priv);
-       struct priv *primary_priv;
-       unsigned int index;
-
-       if (priv == NULL)
-               return 0;
-       primary_priv =
-               mlx4_secondary_data[priv->dev->data->port_id].primary_priv;
-       /* Look for queue index in both private structures. */
-       for (index = 0; index != priv->rxqs_n; ++index)
-               if (((*primary_priv->rxqs)[index] == rxq) ||
-                   ((*priv->rxqs)[index] == rxq))
-                       break;
-       if (index == priv->rxqs_n)
-               return 0;
-       rxq = (*priv->rxqs)[index];
-       return priv->dev->rx_pkt_burst(rxq, pkts, pkts_n);
-}
-
 /**
  * Allocate a Queue Pair.
  * Optionally setup inline receive if supported.
@@ -3414,15 +3092,11 @@ rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
                .res_domain = rd,
        };
 
-#ifdef INLINE_RECV
        attr.max_inl_recv = priv->inl_recv_size;
        attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_INL_RECV;
-#endif
        return ibv_exp_create_qp(priv->ctx, &attr);
 }
 
-#ifdef RSS_SUPPORT
-
 /**
  * Allocate a RSS Queue Pair.
  * Optionally setup inline receive if supported.
@@ -3470,10 +3144,8 @@ rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
                .res_domain = rd,
        };
 
-#ifdef INLINE_RECV
        attr.max_inl_recv = priv->inl_recv_size,
        attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_INL_RECV;
-#endif
        if (children_n > 0) {
                attr.qpg.qpg_type = IBV_EXP_QPG_PARENT;
                /* TSS isn't necessary. */
@@ -3489,8 +3161,6 @@ rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
        return ibv_exp_create_qp(priv->ctx, &attr);
 }
 
-#endif /* RSS_SUPPORT */
-
 /**
  * Reconfigure a RX queue with new parameters.
  *
@@ -3724,13 +3394,11 @@ rxq_create_qp(struct rxq *rxq,
        int parent = (children_n > 0);
        struct priv *priv = rxq->priv;
 
-#ifdef RSS_SUPPORT
        if (priv->rss && !inactive && (rxq_parent || parent))
                rxq->qp = rxq_setup_qp_rss(priv, rxq->cq, desc,
                                           children_n, rxq->rd,
                                           rxq_parent);
        else
-#endif /* RSS_SUPPORT */
                rxq->qp = rxq_setup_qp(priv, rxq->cq, desc, rxq->rd);
        if (rxq->qp == NULL) {
                ret = (errno ? errno : EINVAL);
@@ -3746,9 +3414,7 @@ rxq_create_qp(struct rxq *rxq,
        };
        ret = ibv_exp_modify_qp(rxq->qp, &mod,
                                (IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
                                 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
                                 IBV_EXP_QP_PORT));
        if (ret) {
                ERROR("QP state to IBV_QPS_INIT failed: %s",
@@ -4016,8 +3682,6 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
        int inactive = 0;
        int ret;
 
-       if (mlx4_is_secondary())
-               return -E_RTE_SECONDARY;
        priv_lock(priv);
        DEBUG("%p: configuring queue %u for %u descriptors",
              (void *)dev, idx, desc);
@@ -4085,8 +3749,6 @@ mlx4_rx_queue_release(void *dpdk_rxq)
        struct priv *priv;
        unsigned int i;
 
-       if (mlx4_is_secondary())
-               return;
        if (rxq == NULL)
                return;
        priv = rxq->priv;
@@ -4132,8 +3794,6 @@ mlx4_dev_start(struct rte_eth_dev *dev)
        struct rxq *rxq;
        int ret;
 
-       if (mlx4_is_secondary())
-               return -E_RTE_SECONDARY;
        priv_lock(priv);
        if (priv->started) {
                priv_unlock(priv);
@@ -4224,8 +3884,6 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
        unsigned int r;
        struct rxq *rxq;
 
-       if (mlx4_is_secondary())
-               return;
        priv_lock(priv);
        if (!priv->started) {
                priv_unlock(priv);
@@ -4327,7 +3985,7 @@ priv_dev_link_interrupt_handler_uninstall(struct priv *, struct rte_eth_dev *);
 static void
 mlx4_dev_close(struct rte_eth_dev *dev)
 {
-       struct priv *priv = mlx4_get_priv(dev);
+       struct priv *priv = dev->data->dev_private;
        void *tmp;
        unsigned int i;
 
@@ -4480,7 +4138,7 @@ mlx4_set_link_up(struct rte_eth_dev *dev)
 static void
 mlx4_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 {
-       struct priv *priv = mlx4_get_priv(dev);
+       struct priv *priv = dev->data->dev_private;
        unsigned int max;
        char ifname[IF_NAMESIZE];
 
@@ -4557,7 +4215,7 @@ mlx4_dev_supported_ptypes_get(struct rte_eth_dev *dev)
 static void
 mlx4_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-       struct priv *priv = mlx4_get_priv(dev);
+       struct priv *priv = dev->data->dev_private;
        struct rte_eth_stats tmp = {0};
        unsigned int i;
        unsigned int idx;
@@ -4573,17 +4231,13 @@ mlx4_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
                        continue;
                idx = rxq->stats.idx;
                if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
-#ifdef MLX4_PMD_SOFT_COUNTERS
                        tmp.q_ipackets[idx] += rxq->stats.ipackets;
                        tmp.q_ibytes[idx] += rxq->stats.ibytes;
-#endif
                        tmp.q_errors[idx] += (rxq->stats.idropped +
                                              rxq->stats.rx_nombuf);
                }
-#ifdef MLX4_PMD_SOFT_COUNTERS
                tmp.ipackets += rxq->stats.ipackets;
                tmp.ibytes += rxq->stats.ibytes;
-#endif
                tmp.ierrors += rxq->stats.idropped;
                tmp.rx_nombuf += rxq->stats.rx_nombuf;
        }
@@ -4594,21 +4248,14 @@ mlx4_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
                        continue;
                idx = txq->stats.idx;
                if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
-#ifdef MLX4_PMD_SOFT_COUNTERS
                        tmp.q_opackets[idx] += txq->stats.opackets;
                        tmp.q_obytes[idx] += txq->stats.obytes;
-#endif
                        tmp.q_errors[idx] += txq->stats.odropped;
                }
-#ifdef MLX4_PMD_SOFT_COUNTERS
                tmp.opackets += txq->stats.opackets;
                tmp.obytes += txq->stats.obytes;
-#endif
                tmp.oerrors += txq->stats.odropped;
        }
-#ifndef MLX4_PMD_SOFT_COUNTERS
-       /* FIXME: retrieve and add hardware counters. */
-#endif
        *stats = tmp;
        priv_unlock(priv);
 }
@@ -4622,7 +4269,7 @@ mlx4_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 static void
 mlx4_stats_reset(struct rte_eth_dev *dev)
 {
-       struct priv *priv = mlx4_get_priv(dev);
+       struct priv *priv = dev->data->dev_private;
        unsigned int i;
        unsigned int idx;
 
@@ -4643,9 +4290,6 @@ mlx4_stats_reset(struct rte_eth_dev *dev)
                (*priv->txqs)[i]->stats =
                        (struct mlx4_txq_stats){ .idx = idx };
        }
-#ifndef MLX4_PMD_SOFT_COUNTERS
-       /* FIXME: reset hardware counters. */
-#endif
        priv_unlock(priv);
 }
 
@@ -4662,8 +4306,6 @@ mlx4_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
 {
        struct priv *priv = dev->data->dev_private;
 
-       if (mlx4_is_secondary())
-               return;
        priv_lock(priv);
        if (priv->isolated)
                goto end;
@@ -4696,8 +4338,6 @@ mlx4_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
        struct priv *priv = dev->data->dev_private;
        int re;
 
-       if (mlx4_is_secondary())
-               return -ENOTSUP;
        (void)vmdq;
        priv_lock(priv);
        if (priv->isolated) {
@@ -4750,8 +4390,6 @@ mlx4_promiscuous_enable(struct rte_eth_dev *dev)
        unsigned int i;
        int ret;
 
-       if (mlx4_is_secondary())
-               return;
        priv_lock(priv);
        if (priv->isolated) {
                DEBUG("%p: cannot enable promiscuous, "
@@ -4804,8 +4442,6 @@ mlx4_promiscuous_disable(struct rte_eth_dev *dev)
        struct priv *priv = dev->data->dev_private;
        unsigned int i;
 
-       if (mlx4_is_secondary())
-               return;
        priv_lock(priv);
        if (!priv->promisc || priv->isolated) {
                priv_unlock(priv);
@@ -4836,8 +4472,6 @@ mlx4_allmulticast_enable(struct rte_eth_dev *dev)
        unsigned int i;
        int ret;
 
-       if (mlx4_is_secondary())
-               return;
        priv_lock(priv);
        if (priv->isolated) {
                DEBUG("%p: cannot enable allmulticast, "
@@ -4890,8 +4524,6 @@ mlx4_allmulticast_disable(struct rte_eth_dev *dev)
        struct priv *priv = dev->data->dev_private;
        unsigned int i;
 
-       if (mlx4_is_secondary())
-               return;
        priv_lock(priv);
        if (!priv->allmulti || priv->isolated) {
                priv_unlock(priv);
@@ -4920,7 +4552,7 @@ end:
 static int
 mlx4_link_update(struct rte_eth_dev *dev, int wait_to_complete)
 {
-       const struct priv *priv = mlx4_get_priv(dev);
+       const struct priv *priv = dev->data->dev_private;
        struct ethtool_cmd edata = {
                .cmd = ETHTOOL_GSET
        };
@@ -4964,10 +4596,6 @@ mlx4_link_update(struct rte_eth_dev *dev, int wait_to_complete)
        return -1;
 }
 
-static int
-mlx4_ibv_device_to_pci_addr(const struct ibv_device *device,
-                           struct rte_pci_addr *pci_addr);
-
 /**
  * DPDK callback to change the MTU.
  *
@@ -4994,8 +4622,6 @@ mlx4_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
        uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) =
                mlx4_rx_burst;
 
-       if (mlx4_is_secondary())
-               return -E_RTE_SECONDARY;
        priv_lock(priv);
        /* Set kernel interface MTU first. */
        if (priv_set_mtu(priv, mtu)) {
@@ -5077,8 +4703,6 @@ mlx4_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
        };
        int ret;
 
-       if (mlx4_is_secondary())
-               return -E_RTE_SECONDARY;
        ifr.ifr_data = (void *)&ethpause;
        priv_lock(priv);
        if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
@@ -5127,8 +4751,6 @@ mlx4_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
        };
        int ret;
 
-       if (mlx4_is_secondary())
-               return -E_RTE_SECONDARY;
        ifr.ifr_data = (void *)&ethpause;
        ethpause.autoneg = fc_conf->autoneg;
        if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
@@ -5268,8 +4890,6 @@ mlx4_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
        struct priv *priv = dev->data->dev_private;
        int ret;
 
-       if (mlx4_is_secondary())
-               return -E_RTE_SECONDARY;
        priv_lock(priv);
        if (priv->isolated) {
                DEBUG("%p: cannot set vlan filter, "
@@ -5321,10 +4941,6 @@ mlx4_dev_filter_ctrl(struct rte_eth_dev *dev,
                        return -EINVAL;
                *(const void **)arg = &mlx4_flow_ops;
                return 0;
-       case RTE_ETH_FILTER_FDIR:
-               DEBUG("%p: filter type FDIR is not supported by this PMD",
-                     (void *)dev);
-               break;
        default:
                ERROR("%p: filter type (%d) not supported",
                      (void *)dev, filter_type);
@@ -5347,22 +4963,15 @@ static const struct eth_dev_ops mlx4_dev_ops = {
        .link_update = mlx4_link_update,
        .stats_get = mlx4_stats_get,
        .stats_reset = mlx4_stats_reset,
-       .queue_stats_mapping_set = NULL,
        .dev_infos_get = mlx4_dev_infos_get,
        .dev_supported_ptypes_get = mlx4_dev_supported_ptypes_get,
        .vlan_filter_set = mlx4_vlan_filter_set,
-       .vlan_tpid_set = NULL,
-       .vlan_strip_queue_set = NULL,
-       .vlan_offload_set = NULL,
        .rx_queue_setup = mlx4_rx_queue_setup,
        .tx_queue_setup = mlx4_tx_queue_setup,
        .rx_queue_release = mlx4_rx_queue_release,
        .tx_queue_release = mlx4_tx_queue_release,
-       .dev_led_on = NULL,
-       .dev_led_off = NULL,
        .flow_ctrl_get = mlx4_dev_get_flow_ctrl,
        .flow_ctrl_set = mlx4_dev_set_flow_ctrl,
-       .priority_flow_ctrl_set = NULL,
        .mac_addr_remove = mlx4_mac_addr_remove,
        .mac_addr_add = mlx4_mac_addr_add,
        .mac_addr_set = mlx4_mac_addr_set,
@@ -5444,40 +5053,6 @@ priv_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN])
        return 0;
 }
 
-/* Support up to 32 adapters. */
-static struct {
-       struct rte_pci_addr pci_addr; /* associated PCI address */
-       uint32_t ports; /* physical ports bitfield. */
-} mlx4_dev[32];
-
-/**
- * Get device index in mlx4_dev[] from PCI bus address.
- *
- * @param[in] pci_addr
- *   PCI bus address to look for.
- *
- * @return
- *   mlx4_dev[] index on success, -1 on failure.
- */
-static int
-mlx4_dev_idx(struct rte_pci_addr *pci_addr)
-{
-       unsigned int i;
-       int ret = -1;
-
-       assert(pci_addr != NULL);
-       for (i = 0; (i != elemof(mlx4_dev)); ++i) {
-               if ((mlx4_dev[i].pci_addr.domain == pci_addr->domain) &&
-                   (mlx4_dev[i].pci_addr.bus == pci_addr->bus) &&
-                   (mlx4_dev[i].pci_addr.devid == pci_addr->devid) &&
-                   (mlx4_dev[i].pci_addr.function == pci_addr->function))
-                       return i;
-               if ((mlx4_dev[i].ports == 0) && (ret == -1))
-                       ret = i;
-       }
-       return ret;
-}
-
 /**
  * Retrieve integer value from environment variable.
  *
@@ -5961,16 +5536,15 @@ mlx4_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx)
  *   Key argument to verify.
  * @param[in] val
  *   Value associated with key.
- * @param out
- *   User data.
+ * @param[in, out] conf
+ *   Shared configuration data.
  *
  * @return
  *   0 on success, negative errno value on failure.
  */
 static int
-mlx4_arg_parse(const char *key, const char *val, void *out)
+mlx4_arg_parse(const char *key, const char *val, struct mlx4_conf *conf)
 {
-       struct mlx4_conf *conf = out;
        unsigned long tmp;
 
        errno = 0;
@@ -5980,12 +5554,18 @@ mlx4_arg_parse(const char *key, const char *val, void *out)
                return -errno;
        }
        if (strcmp(MLX4_PMD_PORT_KVARG, key) == 0) {
-               if (tmp >= MLX4_PMD_MAX_PHYS_PORTS) {
-                       ERROR("invalid port index %lu (max: %u)",
-                               tmp, MLX4_PMD_MAX_PHYS_PORTS - 1);
+               uint32_t ports = rte_log2_u32(conf->ports.present);
+
+               if (tmp >= ports) {
+                       ERROR("port index %lu outside range [0,%" PRIu32 ")",
+                             tmp, ports);
+                       return -EINVAL;
+               }
+               if (!(conf->ports.present & (1 << tmp))) {
+                       ERROR("invalid port index %lu", tmp);
                        return -EINVAL;
                }
-               conf->active_ports |= 1 << tmp;
+               conf->ports.enabled |= 1 << tmp;
        } else {
                WARN("%s: unknown parameter", key);
                return -EINVAL;
@@ -6021,8 +5601,13 @@ mlx4_args(struct rte_devargs *devargs, struct mlx4_conf *conf)
        for (i = 0; pmd_mlx4_init_params[i]; ++i) {
                arg_count = rte_kvargs_count(kvlist, MLX4_PMD_PORT_KVARG);
                while (arg_count-- > 0) {
-                       ret = rte_kvargs_process(kvlist, MLX4_PMD_PORT_KVARG,
-                                       mlx4_arg_parse, conf);
+                       ret = rte_kvargs_process(kvlist,
+                                                MLX4_PMD_PORT_KVARG,
+                                                (int (*)(const char *,
+                                                         const char *,
+                                                         void *))
+                                                mlx4_arg_parse,
+                                                conf);
                        if (ret != 0)
                                goto free_kvlist;
                }
@@ -6057,24 +5642,14 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
        struct ibv_context *attr_ctx = NULL;
        struct ibv_device_attr device_attr;
        struct mlx4_conf conf = {
-               .active_ports = 0,
+               .ports.present = 0,
        };
        unsigned int vf;
-       int idx;
        int i;
 
        (void)pci_drv;
        assert(pci_drv == &mlx4_driver);
-       /* Get mlx4_dev[] index. */
-       idx = mlx4_dev_idx(&pci_dev->addr);
-       if (idx == -1) {
-               ERROR("this driver cannot support any more adapters");
-               return -ENOMEM;
-       }
-       DEBUG("using driver device index %d", idx);
 
-       /* Save PCI address. */
-       mlx4_dev[idx].pci_addr = pci_dev->addr;
        list = ibv_get_device_list(&i);
        if (list == NULL) {
                assert(errno);
@@ -6129,40 +5704,32 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
        }
        INFO("%u port(s) detected", device_attr.phys_port_cnt);
 
+       conf.ports.present |= (UINT64_C(1) << device_attr.phys_port_cnt) - 1;
        if (mlx4_args(pci_dev->device.devargs, &conf)) {
                ERROR("failed to process device arguments");
                err = EINVAL;
                goto error;
        }
        /* Use all ports when none are defined */
-       if (conf.active_ports == 0) {
-               for (i = 0; i < MLX4_PMD_MAX_PHYS_PORTS; i++)
-                       conf.active_ports |= 1 << i;
-       }
+       if (!conf.ports.enabled)
+               conf.ports.enabled = conf.ports.present;
        for (i = 0; i < device_attr.phys_port_cnt; i++) {
                uint32_t port = i + 1; /* ports are indexed from one */
-               uint32_t test = (1 << i);
                struct ibv_context *ctx = NULL;
                struct ibv_port_attr port_attr;
                struct ibv_pd *pd = NULL;
                struct priv *priv = NULL;
                struct rte_eth_dev *eth_dev = NULL;
-#ifdef HAVE_EXP_QUERY_DEVICE
                struct ibv_exp_device_attr exp_device_attr;
-#endif /* HAVE_EXP_QUERY_DEVICE */
                struct ether_addr mac;
 
-               /* If port is not active, skip. */
-               if (!(conf.active_ports & (1 << i)))
+               /* If port is not enabled, skip. */
+               if (!(conf.ports.enabled & (1 << i)))
                        continue;
-#ifdef HAVE_EXP_QUERY_DEVICE
                exp_device_attr.comp_mask = IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS;
-#ifdef RSS_SUPPORT
                exp_device_attr.comp_mask |= IBV_EXP_DEVICE_ATTR_RSS_TBL_SZ;
-#endif /* RSS_SUPPORT */
-#endif /* HAVE_EXP_QUERY_DEVICE */
 
-               DEBUG("using port %u (%08" PRIx32 ")", port, test);
+               DEBUG("using port %u", port);
 
                ctx = ibv_open_device(ibv_dev);
                if (ctx == NULL) {
@@ -6198,8 +5765,6 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                        goto port_error;
                }
 
-               mlx4_dev[idx].ports |= test;
-
                /* from rte_ethdev.c */
                priv = rte_zmalloc("ethdev private structure",
                                   sizeof(*priv),
@@ -6215,13 +5780,11 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                priv->port = port;
                priv->pd = pd;
                priv->mtu = ETHER_MTU;
-#ifdef HAVE_EXP_QUERY_DEVICE
                if (ibv_exp_query_device(ctx, &exp_device_attr)) {
                        ERROR("ibv_exp_query_device() failed");
                        err = ENODEV;
                        goto port_error;
                }
-#ifdef RSS_SUPPORT
                if ((exp_device_attr.exp_device_cap_flags &
                     IBV_EXP_DEVICE_QPG) &&
                    (exp_device_attr.exp_device_cap_flags &
@@ -6246,7 +5809,6 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                if (priv->hw_rss)
                        DEBUG("maximum RSS indirection table size: %u",
                              exp_device_attr.max_rss_tbl_sz);
-#endif /* RSS_SUPPORT */
 
                priv->hw_csum =
                        ((exp_device_attr.exp_device_cap_flags &
@@ -6261,7 +5823,6 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                DEBUG("L2 tunnel checksum offloads are %ssupported",
                      (priv->hw_csum_l2tun ? "" : "not "));
 
-#ifdef INLINE_RECV
                priv->inl_recv_size = mlx4_getenv_int("MLX4_INLINE_RECV_SIZE");
 
                if (priv->inl_recv_size) {
@@ -6285,10 +5846,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                        INFO("Set inline receive size to %u",
                             priv->inl_recv_size);
                }
-#endif /* INLINE_RECV */
-#endif /* HAVE_EXP_QUERY_DEVICE */
 
-               (void)mlx4_getenv_int;
                priv->vf = vf;
                /* Configure the first MAC address by default. */
                if (priv_get_mac(priv, &mac.addr_bytes)) {
@@ -6338,36 +5896,8 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                        goto port_error;
                }
 
-               /* Secondary processes have to use local storage for their
-                * private data as well as a copy of eth_dev->data, but this
-                * pointer must not be modified before burst functions are
-                * actually called. */
-               if (mlx4_is_secondary()) {
-                       struct mlx4_secondary_data *sd =
-                               &mlx4_secondary_data[eth_dev->data->port_id];
-
-                       sd->primary_priv = eth_dev->data->dev_private;
-                       if (sd->primary_priv == NULL) {
-                               ERROR("no private data for port %u",
-                                     eth_dev->data->port_id);
-                               err = EINVAL;
-                               goto port_error;
-                       }
-                       sd->shared_dev_data = eth_dev->data;
-                       rte_spinlock_init(&sd->lock);
-                       memcpy(sd->data.name, sd->shared_dev_data->name,
-                              sizeof(sd->data.name));
-                       sd->data.dev_private = priv;
-                       sd->data.rx_mbuf_alloc_failed = 0;
-                       sd->data.mtu = ETHER_MTU;
-                       sd->data.port_id = sd->shared_dev_data->port_id;
-                       sd->data.mac_addrs = priv->mac;
-                       eth_dev->tx_pkt_burst = mlx4_tx_burst_secondary_setup;
-                       eth_dev->rx_pkt_burst = mlx4_rx_burst_secondary_setup;
-               } else {
-                       eth_dev->data->dev_private = priv;
-                       eth_dev->data->mac_addrs = priv->mac;
-               }
+               eth_dev->data->dev_private = priv;
+               eth_dev->data->mac_addrs = priv->mac;
                eth_dev->device = &pci_dev->device;
 
                rte_eth_copy_pci_info(eth_dev, pci_dev);
@@ -6405,6 +5935,8 @@ port_error:
                        rte_eth_dev_release_port(eth_dev);
                break;
        }
+       if (i == device_attr.phys_port_cnt)
+               return 0;
 
        /*
         * XXX if something went wrong in the loop above, there is a resource
@@ -6413,12 +5945,6 @@ port_error:
         * way to enumerate the registered ethdevs to free the previous ones.
         */
 
-       /* no port found, complain */
-       if (!mlx4_dev[idx].ports) {
-               err = ENODEV;
-               goto error;
-       }
-
 error:
        if (attr_ctx)
                claim_zero(ibv_close_device(attr_ctx));