return ret;
}
-/**
- * Uninitialize shared data between primary and secondary process.
- *
- * The pointer of secondary process is dereferenced and primary process frees
- * the memzone.
- */
-static void
-mlx4_uninit_shared_data(void)
-{
- const struct rte_memzone *mz;
-
- rte_spinlock_lock(&mlx4_shared_data_lock);
- if (mlx4_shared_data) {
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA);
- rte_memzone_free(mz);
- } else {
- memset(&mlx4_local_data, 0, sizeof(mlx4_local_data));
- }
- mlx4_shared_data = NULL;
- }
- rte_spinlock_unlock(&mlx4_shared_data_lock);
-}
-
#ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
/**
* Verbs callback to allocate a memory. This function should allocate the space
}
#endif
+/**
+ * Initialize process private data structure.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_proc_priv_init(struct rte_eth_dev *dev)
+{
+ struct mlx4_proc_priv *ppriv;
+ size_t ppriv_size;
+
+ /*
+ * UAR register table follows the process private structure. BlueFlame
+ * registers for Tx queues are stored in the table.
+ */
+ ppriv_size = sizeof(struct mlx4_proc_priv) +
+ dev->data->nb_tx_queues * sizeof(void *);
+ ppriv = rte_malloc_socket("mlx4_proc_priv", ppriv_size,
+ RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+ if (!ppriv) {
+ rte_errno = ENOMEM;
+ return -rte_errno;
+ }
+ ppriv->uar_table_sz = ppriv_size;
+ dev->process_private = ppriv;
+ return 0;
+}
+
+/**
+ * Un-initialize process private data structure.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ */
+static void
+mlx4_proc_priv_uninit(struct rte_eth_dev *dev)
+{
+ if (!dev->process_private)
+ return;
+ rte_free(dev->process_private);
+ dev->process_private = NULL;
+}
+
/**
* DPDK callback for Ethernet device configuration.
*
goto exit;
}
ret = mlx4_intr_install(priv);
- if (ret)
+ if (ret) {
ERROR("%p: interrupt handler installation failed",
(void *)dev);
+ goto exit;
+ }
+ ret = mlx4_proc_priv_init(dev);
+ if (ret) {
+ ERROR("%p: process private data allocation failed",
+ (void *)dev);
+ goto exit;
+ }
exit:
return ret;
}
return 0;
DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
priv->started = 1;
- ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd);
- if (ret) {
- ERROR("%p: cannot remap UAR", (void *)dev);
- goto err;
- }
ret = mlx4_rss_init(priv);
if (ret) {
ERROR("%p: cannot initialize RSS resources: %s",
mlx4_dev_stop(struct rte_eth_dev *dev)
{
struct mlx4_priv *priv = dev->data->dev_private;
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
- const size_t page_size = sysconf(_SC_PAGESIZE);
- int i;
-#endif
if (!priv->started)
return;
mlx4_flow_sync(priv, NULL);
mlx4_rxq_intr_disable(priv);
mlx4_rss_deinit(priv);
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
- for (i = 0; i != dev->data->nb_tx_queues; ++i) {
- struct txq *txq;
-
- txq = dev->data->tx_queues[i];
- if (!txq)
- continue;
- munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db,
- page_size), page_size);
- }
-#endif
}
/**
mlx4_rx_queue_release(dev->data->rx_queues[i]);
for (i = 0; i != dev->data->nb_tx_queues; ++i)
mlx4_tx_queue_release(dev->data->tx_queues[i]);
+ mlx4_proc_priv_uninit(dev);
mlx4_mr_release(dev);
if (priv->pd != NULL) {
assert(priv->ctx != NULL);
static struct rte_pci_driver mlx4_driver;
-static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
- const struct rte_memseg *ms, void *arg)
-{
- void **addr = arg;
-
- if (msl->external)
- return 0;
- if (*addr == NULL)
- *addr = ms->addr;
- else
- *addr = RTE_MIN(*addr, ms->addr);
-
- return 0;
-}
-
-/**
- * Reserve UAR address space for primary process.
- *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_uar_init_primary(void)
-{
- struct mlx4_shared_data *sd = mlx4_shared_data;
- void *addr = (void *)0;
-
- if (sd->uar_base)
- return 0;
- /* find out lower bound of hugepage segments */
- rte_memseg_walk(find_lower_va_bound, &addr);
- /* keep distance to hugepages to minimize potential conflicts. */
- addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE));
- /* anonymous mmap, no real memory consumption. */
- addr = mmap(addr, MLX4_UAR_SIZE,
- PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (addr == MAP_FAILED) {
- ERROR("failed to reserve UAR address space, please"
- " adjust MLX4_UAR_SIZE or try --base-virtaddr");
- rte_errno = ENOMEM;
- return -rte_errno;
- }
- /* Accept either same addr or a new addr returned from mmap if target
- * range occupied.
- */
- INFO("reserved UAR address space: %p", addr);
- sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
- return 0;
-}
-
-/**
- * Unmap UAR address space reserved for primary process.
- */
-static void
-mlx4_uar_uninit_primary(void)
-{
- struct mlx4_shared_data *sd = mlx4_shared_data;
-
- if (!sd->uar_base)
- return;
- munmap(sd->uar_base, MLX4_UAR_SIZE);
- sd->uar_base = NULL;
-}
-
-/**
- * Reserve UAR address space for secondary process, align with primary process.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_uar_init_secondary(void)
-{
- struct mlx4_shared_data *sd = mlx4_shared_data;
- struct mlx4_local_data *ld = &mlx4_local_data;
- void *addr;
-
- if (ld->uar_base) { /* Already reserved. */
- assert(sd->uar_base == ld->uar_base);
- return 0;
- }
- assert(sd->uar_base);
- /* anonymous mmap, no real memory consumption. */
- addr = mmap(sd->uar_base, MLX4_UAR_SIZE,
- PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (addr == MAP_FAILED) {
- ERROR("UAR mmap failed: %p size: %llu",
- sd->uar_base, MLX4_UAR_SIZE);
- rte_errno = ENXIO;
- return -rte_errno;
- }
- if (sd->uar_base != addr) {
- ERROR("UAR address %p size %llu occupied, please"
- " adjust MLX4_UAR_OFFSET or try EAL parameter"
- " --base-virtaddr",
- sd->uar_base, MLX4_UAR_SIZE);
- rte_errno = ENXIO;
- return -rte_errno;
- }
- ld->uar_base = addr;
- INFO("reserved UAR address space: %p", addr);
- return 0;
-}
-
-/**
- * Unmap UAR address space reserved for secondary process.
- */
-static void
-mlx4_uar_uninit_secondary(void)
-{
- struct mlx4_local_data *ld = &mlx4_local_data;
-
- if (!ld->uar_base)
- return;
- munmap(ld->uar_base, MLX4_UAR_SIZE);
- ld->uar_base = NULL;
-}
-
/**
* PMD global initialization.
*
{
struct mlx4_shared_data *sd;
struct mlx4_local_data *ld = &mlx4_local_data;
- int ret;
if (mlx4_init_shared_data())
return -rte_errno;
rte_mem_event_callback_register("MLX4_MEM_EVENT_CB",
mlx4_mr_mem_event_cb, NULL);
mlx4_mp_init_primary();
- ret = mlx4_uar_init_primary();
- if (ret)
- goto error;
sd->init_done = true;
break;
case RTE_PROC_SECONDARY:
if (ld->init_done)
break;
mlx4_mp_init_secondary();
- ret = mlx4_uar_init_secondary();
- if (ret)
- goto error;
++sd->secondary_cnt;
ld->init_done = true;
break;
}
rte_spinlock_unlock(&sd->lock);
return 0;
-error:
- switch (rte_eal_process_type()) {
- case RTE_PROC_PRIMARY:
- mlx4_uar_uninit_primary();
- mlx4_mp_uninit_primary();
- rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL);
- break;
- case RTE_PROC_SECONDARY:
- mlx4_uar_uninit_secondary();
- mlx4_mp_uninit_secondary();
- break;
- default:
- break;
- }
- rte_spinlock_unlock(&sd->lock);
- mlx4_uninit_shared_data();
- return -rte_errno;
}
/**
}
eth_dev->device = &pci_dev->device;
eth_dev->dev_ops = &mlx4_dev_sec_ops;
+ err = mlx4_proc_priv_init(eth_dev);
+ if (err)
+ goto error;
/* Receive command fd from primary process. */
err = mlx4_mp_req_verbs_cmd_fd(eth_dev);
if (err < 0) {
goto error;
}
/* Remap UAR for Tx queues. */
- err = mlx4_tx_uar_remap(eth_dev, err);
+ err = mlx4_tx_uar_init_secondary(eth_dev, err);
if (err) {
err = rte_errno;
goto error;
/** Enable extending memsegs when creating a MR. */
#define MLX4_MR_EXT_MEMSEG_EN_KVARG "mr_ext_memseg_en"
-/* Reserved address space for UAR mapping. */
-#define MLX4_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
-
-/* Offset of reserved UAR address space to hugepage memory. Offset is used here
- * to minimize possibility of address next to hugepage being used by other code
- * in either primary or secondary process, failing to map TX UAR would make TX
- * packets invisible to HW.
- */
-#define MLX4_UAR_OFFSET (2ULL << (sizeof(uintptr_t) * 4))
-
enum {
PCI_VENDOR_ID_MELLANOX = 0x15b3,
};
/* Global spinlock for primary and secondary processes. */
int init_done; /* Whether primary has done initialization. */
unsigned int secondary_cnt; /* Number of secondary processes init'd. */
- void *uar_base;
- /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
struct mlx4_dev_list mem_event_cb_list;
rte_rwlock_t mem_event_rwlock;
};
/* Per-process data structure, not visible to other processes. */
struct mlx4_local_data {
int init_done; /* Whether a secondary has done initialization. */
- void *uar_base;
- /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
};
extern struct mlx4_shared_data *mlx4_shared_data;
+/* Per-process private structure. */
+struct mlx4_proc_priv {
+ size_t uar_table_sz;
+ /* Size of UAR register table. */
+ void *uar_table[];
+ /* Table of UAR registers for each process. */
+};
+
+#define MLX4_PROC_PRIV(port_id) \
+ ((struct mlx4_proc_priv *)rte_eth_devices[port_id].process_private)
+
/** Private data structure. */
struct mlx4_priv {
LIST_ENTRY(mlx4_priv) mem_event_cb;
uint32_t owner_opcode;
/**< Default owner opcode with HW valid owner bit. */
uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */
- volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */
- volatile uint32_t *db; /**< Pointer to the doorbell remapped. */
+ uint32_t *db; /**< Pointer to the doorbell. */
off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
};
/* Make sure that descriptors are written before doorbell record. */
rte_wmb();
/* Ring QP doorbell. */
- rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
+ rte_write32(txq->msq.doorbell_qpn, MLX4_TX_BFREG(txq));
txq->elts_head += i;
return i;
}
struct txq {
struct mlx4_sq msq; /**< Info for directly manipulating the SQ. */
struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */
+ uint16_t port_id; /**< Port ID of device. */
unsigned int elts_head; /**< Current index in (*elts)[]. */
unsigned int elts_tail; /**< First element awaiting completion. */
int elts_comp_cd; /**< Countdown for next completion. */
uint8_t data[]; /**< Remaining queue resources. */
};
+#define MLX4_TX_BFREG(txq) \
+ (MLX4_PROC_PRIV((txq)->port_id)->uar_table[(txq)->stats.idx])
+
/* mlx4_rxq.c */
uint8_t mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE];
/* mlx4_txq.c */
-int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv);
int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
uint16_t desc, unsigned int socket,
#include "mlx4_utils.h"
/**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize Tx UAR registers for primary process.
*
- * @param[in] dev
+ * @param txq
+ * Pointer to Tx queue structure.
+ */
+static void
+txq_uar_init(struct txq *txq)
+{
+ struct mlx4_priv *priv = txq->priv;
+ struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv));
+
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ assert(ppriv);
+ ppriv->uar_table[txq->stats.idx] = txq->msq.db;
+}
+
+#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
+/**
+ * Remap UAR register of a Tx queue for secondary process.
+ *
+ * Remapped address is stored at the table in the process private structure of
+ * the device, indexed by queue index.
+ *
+ * @param txq
+ * Pointer to Tx queue structure.
+ * @param fd
+ * Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+txq_uar_init_secondary(struct txq *txq, int fd)
+{
+ struct mlx4_priv *priv = txq->priv;
+ struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv));
+ void *addr;
+ uintptr_t uar_va;
+ uintptr_t offset;
+ const size_t page_size = sysconf(_SC_PAGESIZE);
+
+ assert(ppriv);
+ /*
+ * As rdma-core, UARs are mapped in size of OS page
+ * size. Ref to libmlx4 function: mlx4_init_context()
+ */
+ uar_va = (uintptr_t)txq->msq.db;
+ offset = uar_va & (page_size - 1); /* Offset in page. */
+ addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+ txq->msq.uar_mmap_offset);
+ if (addr == MAP_FAILED) {
+ ERROR("port %u mmap failed for BF reg of txq %u",
+ txq->port_id, txq->stats.idx);
+ rte_errno = ENXIO;
+ return -rte_errno;
+ }
+ addr = RTE_PTR_ADD(addr, offset);
+ ppriv->uar_table[txq->stats.idx] = addr;
+ return 0;
+}
+
+/**
+ * Unmap UAR register of a Tx queue for secondary process.
+ *
+ * @param txq
+ * Pointer to Tx queue structure.
+ */
+static void
+txq_uar_uninit_secondary(struct txq *txq)
+{
+ struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(txq->priv));
+ const size_t page_size = sysconf(_SC_PAGESIZE);
+ void *addr;
+
+ addr = ppriv->uar_table[txq->stats.idx];
+ munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
+}
+
+/**
+ * Initialize Tx UAR registers for secondary process.
+ *
+ * @param dev
* Pointer to Ethernet device.
* @param fd
* Verbs file descriptor to map UAR pages.
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd)
{
- unsigned int i, j;
const unsigned int txqs_n = dev->data->nb_tx_queues;
- uintptr_t pages[txqs_n];
- unsigned int pages_n = 0;
- uintptr_t uar_va;
- uintptr_t off;
- void *addr;
- void *ret;
struct txq *txq;
- int already_mapped;
- size_t page_size = sysconf(_SC_PAGESIZE);
+ unsigned int i;
+ int ret;
- memset(pages, 0, txqs_n * sizeof(uintptr_t));
- /*
- * As rdma-core, UARs are mapped in size of OS page size.
- * Use aligned address to avoid duplicate mmap.
- * Ref to libmlx4 function: mlx4_init_context()
- */
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
for (i = 0; i != txqs_n; ++i) {
txq = dev->data->tx_queues[i];
if (!txq)
continue;
- /* UAR addr form verbs used to find dup and offset in page. */
- uar_va = (uintptr_t)txq->msq.qp_sdb;
- off = uar_va & (page_size - 1); /* offset in page. */
- uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
- already_mapped = 0;
- for (j = 0; j != pages_n; ++j) {
- if (pages[j] == uar_va) {
- already_mapped = 1;
- break;
- }
- }
- /* new address in reserved UAR address space. */
- addr = RTE_PTR_ADD(mlx4_shared_data->uar_base,
- uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1));
- if (!already_mapped) {
- pages[pages_n++] = uar_va;
- /* fixed mmap to specified address in reserved
- * address space.
- */
- ret = mmap(addr, page_size,
- PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
- txq->msq.uar_mmap_offset);
- if (ret != addr) {
- /* fixed mmap has to return same address. */
- ERROR("port %u call to mmap failed on UAR"
- " for txq %u",
- dev->data->port_id, i);
- rte_errno = ENXIO;
- return -rte_errno;
- }
- }
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */
- txq->msq.db = RTE_PTR_ADD((void *)addr, off);
- else
- assert(txq->msq.db ==
- RTE_PTR_ADD((void *)addr, off));
+ assert(txq->stats.idx == (uint16_t)i);
+ ret = txq_uar_init_secondary(txq, fd);
+ if (ret)
+ goto error;
}
return 0;
+error:
+ /* Rollback. */
+ do {
+ txq = dev->data->tx_queues[i];
+ if (!txq)
+ continue;
+ txq_uar_uninit_secondary(txq);
+ } while (i--);
+ return -rte_errno;
}
#else
int
-mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused)
+mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev __rte_unused,
+ int fd __rte_unused)
{
- /*
- * Even if rdma-core doesn't support UAR remap, primary process
- * shouldn't be interrupted.
- */
- if (rte_eal_process_type() == RTE_PROC_PRIMARY)
- return 0;
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
ERROR("UAR remap is not supported");
rte_errno = ENOTSUP;
return -rte_errno;
(0u << MLX4_SQ_OWNER_BIT));
#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
sq->uar_mmap_offset = dqp->uar_mmap_offset;
- sq->qp_sdb = dqp->sdb;
#else
sq->uar_mmap_offset = -1; /* Make mmap() fail. */
- sq->db = dqp->sdb;
#endif
+ sq->db = dqp->sdb;
sq->doorbell_qpn = dqp->doorbell_qpn;
cq->buf = dcq->buf.buf;
cq->cqe_cnt = dcq->cqe_cnt;
}
*txq = (struct txq){
.priv = priv,
+ .port_id = dev->data->port_id,
.stats = {
.idx = idx,
},
}
#endif
mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
+ txq_uar_init(txq);
/* Save first wqe pointer in the first element. */
(&(*txq->elts)[0])->wqe =
(volatile struct mlx4_wqe_ctrl_seg *)txq->msq.buf;