1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2017 6WIND S.A.
3 * Copyright 2017 Mellanox Technologies, Ltd
8 * Tx queues configuration for mlx4 driver.
20 /* Verbs headers do not support -pedantic. */
22 #pragma GCC diagnostic ignored "-Wpedantic"
24 #include <infiniband/verbs.h>
26 #pragma GCC diagnostic error "-Wpedantic"
29 #include <rte_common.h>
30 #include <rte_errno.h>
31 #include <rte_ethdev_driver.h>
32 #include <rte_malloc.h>
34 #include <rte_mempool.h>
37 #include "mlx4_glue.h"
39 #include "mlx4_rxtx.h"
40 #include "mlx4_utils.h"
43 * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
44 * Both primary and secondary process do mmap to make UAR address
48 * Pointer to Ethernet device.
50 * Verbs file descriptor to map UAR pages.
53 * 0 on success, a negative errno value otherwise and rte_errno is set.
55 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
57 mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd)
60 const unsigned int txqs_n = dev->data->nb_tx_queues;
61 uintptr_t pages[txqs_n];
62 unsigned int pages_n = 0;
69 size_t page_size = sysconf(_SC_PAGESIZE);
71 memset(pages, 0, txqs_n * sizeof(uintptr_t));
73 * As rdma-core, UARs are mapped in size of OS page size.
74 * Use aligned address to avoid duplicate mmap.
75 * Ref to libmlx4 function: mlx4_init_context()
77 for (i = 0; i != txqs_n; ++i) {
78 txq = dev->data->tx_queues[i];
81 /* UAR addr form verbs used to find dup and offset in page. */
82 uar_va = (uintptr_t)txq->msq.qp_sdb;
83 off = uar_va & (page_size - 1); /* offset in page. */
84 uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
86 for (j = 0; j != pages_n; ++j) {
87 if (pages[j] == uar_va) {
92 /* new address in reserved UAR address space. */
93 addr = RTE_PTR_ADD(mlx4_shared_data->uar_base,
94 uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1));
95 if (!already_mapped) {
96 pages[pages_n++] = uar_va;
97 /* fixed mmap to specified address in reserved
100 ret = mmap(addr, page_size,
101 PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
102 txq->msq.uar_mmap_offset);
104 /* fixed mmap has to return same address. */
105 ERROR("port %u call to mmap failed on UAR"
107 dev->data->port_id, i);
112 if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */
113 txq->msq.db = RTE_PTR_ADD((void *)addr, off);
115 assert(txq->msq.db ==
116 RTE_PTR_ADD((void *)addr, off));
122 mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused)
125 * Even if rdma-core doesn't support UAR remap, primary process
126 * shouldn't be interrupted.
128 if (rte_eal_process_type() == RTE_PROC_PRIMARY)
130 ERROR("UAR remap is not supported");
137 * Free Tx queue elements.
140 * Pointer to Tx queue structure.
143 mlx4_txq_free_elts(struct txq *txq)
145 unsigned int elts_head = txq->elts_head;
146 unsigned int elts_tail = txq->elts_tail;
147 struct txq_elt (*elts)[txq->elts_n] = txq->elts;
148 unsigned int elts_m = txq->elts_n - 1;
150 DEBUG("%p: freeing WRs", (void *)txq);
151 while (elts_tail != elts_head) {
152 struct txq_elt *elt = &(*elts)[elts_tail++ & elts_m];
154 assert(elt->buf != NULL);
155 rte_pktmbuf_free(elt->buf);
159 txq->elts_tail = txq->elts_head;
163 * Retrieves information needed in order to directly access the Tx queue.
166 * Pointer to Tx queue structure.
168 * Pointer to device information for this Tx queue.
171 mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
173 struct mlx4_sq *sq = &txq->msq;
174 struct mlx4_cq *cq = &txq->mcq;
175 struct mlx4dv_qp *dqp = mlxdv->qp.out;
176 struct mlx4dv_cq *dcq = mlxdv->cq.out;
178 /* Total length, including headroom and spare WQEs. */
179 sq->size = (uint32_t)dqp->rq.offset - (uint32_t)dqp->sq.offset;
180 sq->buf = (uint8_t *)dqp->buf.buf + dqp->sq.offset;
181 sq->eob = sq->buf + sq->size;
182 uint32_t headroom_size = 2048 + (1 << dqp->sq.wqe_shift);
183 /* Continuous headroom size bytes must always stay freed. */
184 sq->remain_size = sq->size - headroom_size;
185 sq->owner_opcode = MLX4_OPCODE_SEND | (0u << MLX4_SQ_OWNER_BIT);
186 sq->stamp = rte_cpu_to_be_32(MLX4_SQ_STAMP_VAL |
187 (0u << MLX4_SQ_OWNER_BIT));
188 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
189 sq->uar_mmap_offset = dqp->uar_mmap_offset;
190 sq->qp_sdb = dqp->sdb;
192 sq->uar_mmap_offset = -1; /* Make mmap() fail. */
195 sq->doorbell_qpn = dqp->doorbell_qpn;
196 cq->buf = dcq->buf.buf;
197 cq->cqe_cnt = dcq->cqe_cnt;
198 cq->set_ci_db = dcq->set_ci_db;
199 cq->cqe_64 = (dcq->cqe_size & 64) ? 1 : 0;
203 * Returns the per-port supported offloads.
206 * Pointer to private structure.
209 * Supported Tx offloads.
212 mlx4_get_tx_port_offloads(struct mlx4_priv *priv)
214 uint64_t offloads = DEV_TX_OFFLOAD_MULTI_SEGS;
217 offloads |= (DEV_TX_OFFLOAD_IPV4_CKSUM |
218 DEV_TX_OFFLOAD_UDP_CKSUM |
219 DEV_TX_OFFLOAD_TCP_CKSUM);
222 offloads |= DEV_TX_OFFLOAD_TCP_TSO;
223 if (priv->hw_csum_l2tun) {
224 offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
226 offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
227 DEV_TX_OFFLOAD_GRE_TNL_TSO);
233 * DPDK callback to configure a Tx queue.
236 * Pointer to Ethernet device structure.
240 * Number of descriptors to configure in queue.
242 * NUMA socket on which memory must be allocated.
244 * Thresholds parameters.
247 * 0 on success, negative errno value otherwise and rte_errno is set.
250 mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
251 unsigned int socket, const struct rte_eth_txconf *conf)
253 struct mlx4_priv *priv = dev->data->dev_private;
254 struct mlx4dv_obj mlxdv;
255 struct mlx4dv_qp dv_qp;
256 struct mlx4dv_cq dv_cq;
257 struct txq_elt (*elts)[rte_align32pow2(desc)];
258 struct ibv_qp_init_attr qp_init_attr;
261 struct mlx4_malloc_vec vec[] = {
263 .align = RTE_CACHE_LINE_SIZE,
264 .size = sizeof(*txq),
265 .addr = (void **)&txq,
268 .align = RTE_CACHE_LINE_SIZE,
269 .size = sizeof(*elts),
270 .addr = (void **)&elts,
273 .align = RTE_CACHE_LINE_SIZE,
274 .size = MLX4_MAX_WQE_SIZE,
275 .addr = (void **)&bounce_buf,
281 offloads = conf->offloads | dev->data->dev_conf.txmode.offloads;
282 DEBUG("%p: configuring queue %u for %u descriptors",
283 (void *)dev, idx, desc);
284 if (idx >= dev->data->nb_tx_queues) {
285 rte_errno = EOVERFLOW;
286 ERROR("%p: queue index out of range (%u >= %u)",
287 (void *)dev, idx, dev->data->nb_tx_queues);
290 txq = dev->data->tx_queues[idx];
293 DEBUG("%p: Tx queue %u already configured, release it first",
299 ERROR("%p: invalid number of Tx descriptors", (void *)dev);
302 if (desc != RTE_DIM(*elts)) {
303 desc = RTE_DIM(*elts);
304 WARN("%p: increased number of descriptors in Tx queue %u"
305 " to the next power of two (%u)",
306 (void *)dev, idx, desc);
308 /* Allocate and initialize Tx queue. */
309 mlx4_zmallocv_socket("TXQ", vec, RTE_DIM(vec), socket);
311 ERROR("%p: unable to allocate queue index %u",
326 * Request send completion every MLX4_PMD_TX_PER_COMP_REQ
327 * packets or at least 4 times per ring.
330 RTE_MIN(MLX4_PMD_TX_PER_COMP_REQ, desc / 4),
332 RTE_MIN(MLX4_PMD_TX_PER_COMP_REQ, desc / 4),
333 .csum = priv->hw_csum &&
334 (offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM |
335 DEV_TX_OFFLOAD_UDP_CKSUM |
336 DEV_TX_OFFLOAD_TCP_CKSUM)),
337 .csum_l2tun = priv->hw_csum_l2tun &&
339 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM),
340 /* Enable Tx loopback for VF devices. */
342 .bounce_buf = bounce_buf,
344 priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_TX_QUEUE;
345 priv->verbs_alloc_ctx.obj = txq;
346 txq->cq = mlx4_glue->create_cq(priv->ctx, desc, NULL, NULL, 0);
349 ERROR("%p: CQ creation failure: %s",
350 (void *)dev, strerror(rte_errno));
353 qp_init_attr = (struct ibv_qp_init_attr){
358 RTE_MIN(priv->device_attr.max_qp_wr, desc),
360 .max_inline_data = MLX4_PMD_MAX_INLINE,
362 .qp_type = IBV_QPT_RAW_PACKET,
363 /* No completion events must occur by default. */
366 txq->qp = mlx4_glue->create_qp(priv->pd, &qp_init_attr);
368 rte_errno = errno ? errno : EINVAL;
369 ERROR("%p: QP creation failure: %s",
370 (void *)dev, strerror(rte_errno));
373 txq->max_inline = qp_init_attr.cap.max_inline_data;
374 ret = mlx4_glue->modify_qp
376 &(struct ibv_qp_attr){
377 .qp_state = IBV_QPS_INIT,
378 .port_num = priv->port,
380 IBV_QP_STATE | IBV_QP_PORT);
383 ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
384 (void *)dev, strerror(rte_errno));
387 ret = mlx4_glue->modify_qp
389 &(struct ibv_qp_attr){
390 .qp_state = IBV_QPS_RTR,
395 ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
396 (void *)dev, strerror(rte_errno));
399 ret = mlx4_glue->modify_qp
401 &(struct ibv_qp_attr){
402 .qp_state = IBV_QPS_RTS,
407 ERROR("%p: QP state to IBV_QPS_RTS failed: %s",
408 (void *)dev, strerror(rte_errno));
411 /* Retrieve device queue information. */
412 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
413 dv_qp = (struct mlx4dv_qp){
414 .comp_mask = MLX4DV_QP_MASK_UAR_MMAP_OFFSET,
417 mlxdv.cq.in = txq->cq;
418 mlxdv.cq.out = &dv_cq;
419 mlxdv.qp.in = txq->qp;
420 mlxdv.qp.out = &dv_qp;
421 ret = mlx4_glue->dv_init_obj(&mlxdv, MLX4DV_OBJ_QP | MLX4DV_OBJ_CQ);
424 ERROR("%p: failed to obtain information needed for"
425 " accessing the device queues", (void *)dev);
428 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
429 if (!(dv_qp.comp_mask & MLX4DV_QP_MASK_UAR_MMAP_OFFSET)) {
430 WARN("%p: failed to obtain UAR mmap offset", (void *)dev);
431 dv_qp.uar_mmap_offset = -1; /* Make mmap() fail. */
434 mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
435 /* Save first wqe pointer in the first element. */
436 (&(*txq->elts)[0])->wqe =
437 (volatile struct mlx4_wqe_ctrl_seg *)txq->msq.buf;
438 if (mlx4_mr_btree_init(&txq->mr_ctrl.cache_bh,
439 MLX4_MR_BTREE_CACHE_N, socket)) {
440 /* rte_errno is already set. */
443 /* Save pointer of global generation number to check memory event. */
444 txq->mr_ctrl.dev_gen_ptr = &priv->mr.dev_gen;
445 DEBUG("%p: adding Tx queue %p to list", (void *)dev, (void *)txq);
446 dev->data->tx_queues[idx] = txq;
447 priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
450 dev->data->tx_queues[idx] = NULL;
452 mlx4_tx_queue_release(txq);
454 assert(rte_errno > 0);
455 priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
460 * DPDK callback to release a Tx queue.
463 * Generic Tx queue pointer.
466 mlx4_tx_queue_release(void *dpdk_txq)
468 struct txq *txq = (struct txq *)dpdk_txq;
469 struct mlx4_priv *priv;
475 for (i = 0; i != ETH_DEV(priv)->data->nb_tx_queues; ++i)
476 if (ETH_DEV(priv)->data->tx_queues[i] == txq) {
477 DEBUG("%p: removing Tx queue %p from list",
478 (void *)ETH_DEV(priv), (void *)txq);
479 ETH_DEV(priv)->data->tx_queues[i] = NULL;
482 mlx4_txq_free_elts(txq);
484 claim_zero(mlx4_glue->destroy_qp(txq->qp));
486 claim_zero(mlx4_glue->destroy_cq(txq->cq));
487 mlx4_mr_btree_free(&txq->mr_ctrl.cache_bh);