return ret;
}
-/**
- * Uninitialize shared data between primary and secondary process.
- *
- * The pointer of secondary process is dereferenced and primary process frees
- * the memzone.
- */
-static void
-mlx5_uninit_shared_data(void)
-{
- const struct rte_memzone *mz;
-
- rte_spinlock_lock(&mlx5_shared_data_lock);
- if (mlx5_shared_data) {
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- mz = rte_memzone_lookup(MZ_MLX5_PMD_SHARED_DATA);
- rte_memzone_free(mz);
- } else {
- memset(&mlx5_local_data, 0, sizeof(mlx5_local_data));
- }
- mlx5_shared_data = NULL;
- }
- rte_spinlock_unlock(&mlx5_shared_data_lock);
-}
-
/**
* Retrieve integer value from environment variable.
*
rte_free(ptr);
}
+/**
+ * Initialize process private data structure.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_proc_priv_init(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_proc_priv *ppriv;
+ size_t ppriv_size;
+
+ /*
+ * UAR register table follows the process private structure. BlueFlame
+ * registers for Tx queues are stored in the table.
+ */
+ ppriv_size =
+ sizeof(struct mlx5_proc_priv) + priv->txqs_n * sizeof(void *);
+ ppriv = rte_malloc_socket("mlx5_proc_priv", ppriv_size,
+ RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+ if (!ppriv) {
+ rte_errno = ENOMEM;
+ return -rte_errno;
+ }
+ ppriv->uar_table_sz = ppriv_size;
+ dev->process_private = ppriv;
+ return 0;
+}
+
+/**
+ * Un-initialize process private data structure.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ */
+static void
+mlx5_proc_priv_uninit(struct rte_eth_dev *dev)
+{
+ if (!dev->process_private)
+ return;
+ rte_free(dev->process_private);
+ dev->process_private = NULL;
+}
+
/**
* DPDK callback to close the device.
*
priv->txqs_n = 0;
priv->txqs = NULL;
}
+ mlx5_proc_priv_uninit(dev);
mlx5_mprq_free_mp(dev);
mlx5_mr_release(dev);
assert(priv->sh);
static struct rte_pci_driver mlx5_driver;
-static int
-find_lower_va_bound(const struct rte_memseg_list *msl,
- const struct rte_memseg *ms, void *arg)
-{
- void **addr = arg;
-
- if (msl->external)
- return 0;
- if (*addr == NULL)
- *addr = ms->addr;
- else
- *addr = RTE_MIN(*addr, ms->addr);
-
- return 0;
-}
-
-/**
- * Reserve UAR address space for primary process.
- *
- * Process local resource is used by both primary and secondary to avoid
- * duplicate reservation. The space has to be available on both primary and
- * secondary process, TXQ UAR maps to this area using fixed mmap w/o double
- * check.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_uar_init_primary(void)
-{
- struct mlx5_shared_data *sd = mlx5_shared_data;
- void *addr = (void *)0;
-
- if (sd->uar_base)
- return 0;
- /* find out lower bound of hugepage segments */
- rte_memseg_walk(find_lower_va_bound, &addr);
- /* keep distance to hugepages to minimize potential conflicts. */
- addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET + MLX5_UAR_SIZE));
- /* anonymous mmap, no real memory consumption. */
- addr = mmap(addr, MLX5_UAR_SIZE,
- PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (addr == MAP_FAILED) {
- DRV_LOG(ERR,
- "Failed to reserve UAR address space, please"
- " adjust MLX5_UAR_SIZE or try --base-virtaddr");
- rte_errno = ENOMEM;
- return -rte_errno;
- }
- /* Accept either same addr or a new addr returned from mmap if target
- * range occupied.
- */
- DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
- sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
- return 0;
-}
-
-/**
- * Unmap UAR address space reserved for primary process.
- */
-static void
-mlx5_uar_uninit_primary(void)
-{
- struct mlx5_shared_data *sd = mlx5_shared_data;
-
- if (!sd->uar_base)
- return;
- munmap(sd->uar_base, MLX5_UAR_SIZE);
- sd->uar_base = NULL;
-}
-
-/**
- * Reserve UAR address space for secondary process, align with primary process.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_uar_init_secondary(void)
-{
- struct mlx5_shared_data *sd = mlx5_shared_data;
- struct mlx5_local_data *ld = &mlx5_local_data;
- void *addr;
-
- if (ld->uar_base) { /* Already reserved. */
- assert(sd->uar_base == ld->uar_base);
- return 0;
- }
- assert(sd->uar_base);
- /* anonymous mmap, no real memory consumption. */
- addr = mmap(sd->uar_base, MLX5_UAR_SIZE,
- PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (addr == MAP_FAILED) {
- DRV_LOG(ERR, "UAR mmap failed: %p size: %llu",
- sd->uar_base, MLX5_UAR_SIZE);
- rte_errno = ENXIO;
- return -rte_errno;
- }
- if (sd->uar_base != addr) {
- DRV_LOG(ERR,
- "UAR address %p size %llu occupied, please"
- " adjust MLX5_UAR_OFFSET or try EAL parameter"
- " --base-virtaddr",
- sd->uar_base, MLX5_UAR_SIZE);
- rte_errno = ENXIO;
- return -rte_errno;
- }
- ld->uar_base = addr;
- DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
- return 0;
-}
-
-/**
- * Unmap UAR address space reserved for secondary process.
- */
-static void
-mlx5_uar_uninit_secondary(void)
-{
- struct mlx5_local_data *ld = &mlx5_local_data;
-
- if (!ld->uar_base)
- return;
- munmap(ld->uar_base, MLX5_UAR_SIZE);
- ld->uar_base = NULL;
-}
-
/**
* PMD global initialization.
*
{
struct mlx5_shared_data *sd;
struct mlx5_local_data *ld = &mlx5_local_data;
- int ret;
if (mlx5_init_shared_data())
return -rte_errno;
rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
mlx5_mr_mem_event_cb, NULL);
mlx5_mp_init_primary();
- ret = mlx5_uar_init_primary();
- if (ret)
- goto error;
sd->init_done = true;
break;
case RTE_PROC_SECONDARY:
if (ld->init_done)
break;
mlx5_mp_init_secondary();
- ret = mlx5_uar_init_secondary();
- if (ret)
- goto error;
++sd->secondary_cnt;
ld->init_done = true;
break;
}
rte_spinlock_unlock(&sd->lock);
return 0;
-error:
- switch (rte_eal_process_type()) {
- case RTE_PROC_PRIMARY:
- mlx5_uar_uninit_primary();
- mlx5_mp_uninit_primary();
- rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", NULL);
- break;
- case RTE_PROC_SECONDARY:
- mlx5_uar_uninit_secondary();
- mlx5_mp_uninit_secondary();
- break;
- default:
- break;
- }
- rte_spinlock_unlock(&sd->lock);
- mlx5_uninit_shared_data();
- return -rte_errno;
}
/**
}
eth_dev->device = dpdk_dev;
eth_dev->dev_ops = &mlx5_dev_sec_ops;
+ err = mlx5_proc_priv_init(eth_dev);
+ if (err)
+ return NULL;
/* Receive command fd from primary process */
err = mlx5_mp_req_verbs_cmd_fd(eth_dev);
if (err < 0)
return NULL;
/* Remap UAR for Tx queues. */
- err = mlx5_tx_uar_remap(eth_dev, err);
+ err = mlx5_tx_uar_init_secondary(eth_dev, err);
if (err)
return NULL;
/*
/* Global spinlock for primary and secondary processes. */
int init_done; /* Whether primary has done initialization. */
unsigned int secondary_cnt; /* Number of secondary processes init'd. */
- void *uar_base;
- /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
struct mlx5_dev_list mem_event_cb_list;
rte_rwlock_t mem_event_rwlock;
};
/* Per-process data structure, not visible to other processes. */
struct mlx5_local_data {
int init_done; /* Whether a secondary has done initialization. */
- void *uar_base;
- /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */
};
extern struct mlx5_shared_data *mlx5_shared_data;
struct mlx5_ibv_shared_port port[]; /* per device port data array. */
};
+/* Per-process private structure. */
+struct mlx5_proc_priv {
+ size_t uar_table_sz;
+ /* Size of UAR register table. */
+ void *uar_table[];
+ /* Table of UAR registers for each process. */
+};
+
+#define MLX5_PROC_PRIV(port_id) \
+ ((struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private)
+
struct mlx5_priv {
LIST_ENTRY(mlx5_priv) mem_event_cb;
/**< Called by memory event callback. */
/* mlx5.c */
int mlx5_getenv_int(const char *);
+int mlx5_proc_priv_init(struct rte_eth_dev *dev);
/* mlx5_ethdev.c */
/* Timeout in seconds to get a valid link status. */
#define MLX5_LINK_STATUS_TIMEOUT 10
-/* Reserved address space for UAR mapping. */
-#define MLX5_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
-
-/* Offset of reserved UAR address space to hugepage memory. Offset is used here
- * to minimize possibility of address next to hugepage being used by other code
- * in either primary or secondary process, failing to map TX UAR would make TX
- * packets invisible to HW.
- */
-#define MLX5_UAR_OFFSET (1ULL << (sizeof(uintptr_t) * 4))
-
/* Maximum number of UAR pages used by a port,
* These are the size and mask for an array of mutexes used to synchronize
* the access to port's UARs on platforms that do not support 64 bit writes.
if (++j == rxqs_n)
j = 0;
}
+ ret = mlx5_proc_priv_init(dev);
+ if (ret)
+ return ret;
return 0;
}
volatile void *wqes; /* Work queue (use volatile to write into). */
volatile uint32_t *qp_db; /* Work queue doorbell. */
volatile uint32_t *cq_db; /* Completion queue doorbell. */
- volatile void *bf_reg; /* Blueflame register remapped. */
struct rte_mbuf *(*elts)[]; /* TX elements. */
+ uint16_t port_id; /* Port ID of device. */
uint16_t idx; /* Queue index. */
struct mlx5_txq_stats stats; /* TX queue counters. */
#ifndef RTE_ARCH_64
struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
struct mlx5_priv *priv; /* Back pointer to private data. */
off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
- volatile void *bf_reg_orig; /* Blueflame register from verbs. */
+ void *bf_reg; /* BlueFlame register from Verbs. */
};
+#define MLX5_TX_BFREG(txq) \
+ (MLX5_PROC_PRIV((txq)->port_id)->uar_table[(txq)->idx])
+
/* mlx5_rxq.c */
extern uint8_t rss_hash_default_key[];
int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
unsigned int socket, const struct rte_eth_txconf *conf);
void mlx5_tx_queue_release(void *dpdk_txq);
-int mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd);
struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx);
struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx);
int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv);
mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
int cond)
{
- uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
+ uint64_t *dst = MLX5_TX_BFREG(txq);
volatile uint64_t *src = ((volatile uint64_t *)wqe);
rte_cio_wmb();
goto error;
}
}
- ret = mlx5_tx_uar_remap(dev, priv->sh->ctx->cmd_fd);
- if (ret) {
- /* Adjust index for rollback. */
- i = priv->txqs_n - 1;
- goto error;
- }
return 0;
error:
ret = rte_errno; /* Save rte_errno before cleanup. */
}
}
+/**
+ * Initialize Tx UAR registers for primary process.
+ *
+ * @param txq_ctrl
+ * Pointer to Tx queue control structure.
+ */
+static void
+txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl)
+{
+ struct mlx5_priv *priv = txq_ctrl->priv;
+ struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
+#ifndef RTE_ARCH_64
+ unsigned int lock_idx;
+ const size_t page_size = sysconf(_SC_PAGESIZE);
+#endif
+
+ assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ assert(ppriv);
+ ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg;
+#ifndef RTE_ARCH_64
+ /* Assign an UAR lock according to UAR page number */
+ lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
+ MLX5_UAR_PAGE_NUM_MASK;
+ txq_ctrl->txq.uar_lock = &priv->uar_lock[lock_idx];
+#endif
+}
+
+/**
+ * Remap UAR register of a Tx queue for secondary process.
+ *
+ * Remapped address is stored at the table in the process private structure of
+ * the device, indexed by queue index.
+ *
+ * @param txq_ctrl
+ * Pointer to Tx queue control structure.
+ * @param fd
+ * Verbs file descriptor to map UAR pages.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd)
+{
+ struct mlx5_priv *priv = txq_ctrl->priv;
+ struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
+ struct mlx5_txq_data *txq = &txq_ctrl->txq;
+ void *addr;
+ uintptr_t uar_va;
+ uintptr_t offset;
+ const size_t page_size = sysconf(_SC_PAGESIZE);
+
+ assert(ppriv);
+ /*
+ * As rdma-core, UARs are mapped in size of OS page
+ * size. Ref to libmlx5 function: mlx5_init_context()
+ */
+ uar_va = (uintptr_t)txq_ctrl->bf_reg;
+ offset = uar_va & (page_size - 1); /* Offset in page. */
+ addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+ txq_ctrl->uar_mmap_offset);
+ if (addr == MAP_FAILED) {
+ DRV_LOG(ERR,
+ "port %u mmap failed for BF reg of txq %u",
+ txq->port_id, txq->idx);
+ rte_errno = ENXIO;
+ return -rte_errno;
+ }
+ addr = RTE_PTR_ADD(addr, offset);
+ ppriv->uar_table[txq->idx] = addr;
+ return 0;
+}
+
+/**
+ * Unmap UAR register of a Tx queue for secondary process.
+ *
+ * @param txq_ctrl
+ * Pointer to Tx queue control structure.
+ */
+static void
+txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl)
+{
+ struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl->priv));
+ const size_t page_size = sysconf(_SC_PAGESIZE);
+ void *addr;
+
+ addr = ppriv->uar_table[txq_ctrl->txq.idx];
+ munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
+}
/**
- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
- * Both primary and secondary process do mmap to make UAR address
- * aligned.
+ * Initialize Tx UAR registers for secondary process.
*
- * @param[in] dev
+ * @param dev
* Pointer to Ethernet device.
* @param fd
* Verbs file descriptor to map UAR pages.
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
-mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
+mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd)
{
struct mlx5_priv *priv = dev->data->dev_private;
- unsigned int i, j;
- uintptr_t pages[priv->txqs_n];
- unsigned int pages_n = 0;
- uintptr_t uar_va;
- uintptr_t off;
- void *addr;
- void *ret;
struct mlx5_txq_data *txq;
struct mlx5_txq_ctrl *txq_ctrl;
- int already_mapped;
- size_t page_size = sysconf(_SC_PAGESIZE);
-#ifndef RTE_ARCH_64
- unsigned int lock_idx;
-#endif
+ unsigned int i;
+ int ret;
- memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
- /*
- * As rdma-core, UARs are mapped in size of OS page size.
- * Use aligned address to avoid duplicate mmap.
- * Ref to libmlx5 function: mlx5_init_context()
- */
+ assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
for (i = 0; i != priv->txqs_n; ++i) {
if (!(*priv->txqs)[i])
continue;
txq = (*priv->txqs)[i];
txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
assert(txq->idx == (uint16_t)i);
- /* UAR addr form verbs used to find dup and offset in page. */
- uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
- off = uar_va & (page_size - 1); /* offset in page. */
- uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
- already_mapped = 0;
- for (j = 0; j != pages_n; ++j) {
- if (pages[j] == uar_va) {
- already_mapped = 1;
- break;
- }
- }
- /* new address in reserved UAR address space. */
- addr = RTE_PTR_ADD(mlx5_shared_data->uar_base,
- uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
- if (!already_mapped) {
- pages[pages_n++] = uar_va;
- /* fixed mmap to specified address in reserved
- * address space.
- */
- ret = mmap(addr, page_size,
- PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
- txq_ctrl->uar_mmap_offset);
- if (ret != addr) {
- /* fixed mmap have to return same address */
- DRV_LOG(ERR,
- "port %u call to mmap failed on UAR"
- " for txq %u",
- dev->data->port_id, txq->idx);
- rte_errno = ENXIO;
- return -rte_errno;
- }
- }
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */
- txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off);
- else
- assert(txq_ctrl->txq.bf_reg ==
- RTE_PTR_ADD((void *)addr, off));
-#ifndef RTE_ARCH_64
- /* Assign a UAR lock according to UAR page number */
- lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
- MLX5_UAR_PAGE_NUM_MASK;
- txq->uar_lock = &priv->uar_lock[lock_idx];
-#endif
+ ret = txq_uar_init_secondary(txq_ctrl, fd);
+ if (ret)
+ goto error;
}
return 0;
+error:
+ /* Rollback. */
+ do {
+ if (!(*priv->txqs)[i])
+ continue;
+ txq = (*priv->txqs)[i];
+ txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
+ txq_uar_uninit_secondary(txq_ctrl);
+ } while (i--);
+ return -rte_errno;
}
/**
txq_data->wqes = qp.sq.buf;
txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
- txq_ctrl->bf_reg_orig = qp.bf.reg;
txq_data->cq_db = cq_info.dbrec;
txq_data->cqes =
(volatile struct mlx5_cqe (*)[])
txq_ibv->qp = tmpl.qp;
txq_ibv->cq = tmpl.cq;
rte_atomic32_inc(&txq_ibv->refcnt);
+ txq_ctrl->bf_reg = qp.bf.reg;
+ txq_uar_init(txq_ctrl);
if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%lx",
tmpl->priv = priv;
tmpl->socket = socket;
tmpl->txq.elts_n = log2above(desc);
+ tmpl->txq.port_id = dev->data->port_id;
tmpl->txq.idx = idx;
txq_set_params(tmpl);
DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_txq_ctrl *txq;
- size_t page_size = sysconf(_SC_PAGESIZE);
if (!(*priv->txqs)[idx])
return 0;
txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
txq->ibv = NULL;
- munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg, page_size),
- page_size);
if (rte_atomic32_dec_and_test(&txq->refcnt)) {
txq_free_elts(txq);
mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh);