4 * Copyright 2017 6WIND S.A.
5 * Copyright 2017 Mellanox
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 * Tx queues configuration for mlx4 driver.
45 /* Verbs headers do not support -pedantic. */
47 #pragma GCC diagnostic ignored "-Wpedantic"
49 #include <infiniband/verbs.h>
51 #pragma GCC diagnostic error "-Wpedantic"
54 #include <rte_common.h>
55 #include <rte_errno.h>
56 #include <rte_ethdev.h>
57 #include <rte_malloc.h>
59 #include <rte_mempool.h>
62 #include "mlx4_autoconf.h"
63 #include "mlx4_rxtx.h"
64 #include "mlx4_utils.h"
67 * Allocate Tx queue elements.
70 * Pointer to Tx queue structure.
72 * Number of elements to allocate.
75 * 0 on success, negative errno value otherwise and rte_errno is set.
78 mlx4_txq_alloc_elts(struct txq *txq, unsigned int elts_n)
81 struct txq_elt (*elts)[elts_n] =
82 rte_calloc_socket("TXQ", 1, sizeof(*elts), 0, txq->socket);
86 ERROR("%p: can't allocate packets array", (void *)txq);
90 for (i = 0; (i != elts_n); ++i) {
91 struct txq_elt *elt = &(*elts)[i];
95 DEBUG("%p: allocated and configured %u WRs", (void *)txq, elts_n);
102 * Request send completion every MLX4_PMD_TX_PER_COMP_REQ packets or
103 * at least 4 times per ring.
105 txq->elts_comp_cd_init =
106 ((MLX4_PMD_TX_PER_COMP_REQ < (elts_n / 4)) ?
107 MLX4_PMD_TX_PER_COMP_REQ : (elts_n / 4));
108 txq->elts_comp_cd = txq->elts_comp_cd_init;
113 DEBUG("%p: failed, freed everything", (void *)txq);
120 * Free Tx queue elements.
123 * Pointer to Tx queue structure.
126 mlx4_txq_free_elts(struct txq *txq)
128 unsigned int elts_n = txq->elts_n;
129 unsigned int elts_head = txq->elts_head;
130 unsigned int elts_tail = txq->elts_tail;
131 struct txq_elt (*elts)[elts_n] = txq->elts;
133 DEBUG("%p: freeing WRs", (void *)txq);
138 txq->elts_comp_cd = 0;
139 txq->elts_comp_cd_init = 0;
143 while (elts_tail != elts_head) {
144 struct txq_elt *elt = &(*elts)[elts_tail];
146 assert(elt->buf != NULL);
147 rte_pktmbuf_free(elt->buf);
150 memset(elt, 0x77, sizeof(*elt));
152 if (++elts_tail == elts_n)
159 * Clean up a Tx queue.
161 * Destroy objects, free allocated memory and reset the structure for reuse.
164 * Pointer to Tx queue structure.
167 mlx4_txq_cleanup(struct txq *txq)
171 DEBUG("cleaning up %p", (void *)txq);
172 mlx4_txq_free_elts(txq);
174 claim_zero(ibv_destroy_qp(txq->qp));
176 claim_zero(ibv_destroy_cq(txq->cq));
177 for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
178 if (txq->mp2mr[i].mp == NULL)
180 assert(txq->mp2mr[i].mr != NULL);
181 claim_zero(ibv_dereg_mr(txq->mp2mr[i].mr));
183 memset(txq, 0, sizeof(*txq));
186 struct txq_mp2mr_mbuf_check_data {
191 * Callback function for rte_mempool_obj_iter() to check whether a given
192 * mempool object looks like a mbuf.
195 * The mempool pointer
197 * Context data (struct mlx4_txq_mp2mr_mbuf_check_data). Contains the
202 * Object index, unused.
205 mlx4_txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
208 struct txq_mp2mr_mbuf_check_data *data = arg;
209 struct rte_mbuf *buf = obj;
213 * Check whether mbuf structure fits element size and whether mempool
216 if (sizeof(*buf) > mp->elt_size || buf->pool != mp)
221 * Iterator function for rte_mempool_walk() to register existing mempools and
222 * fill the MP to MR cache of a Tx queue.
225 * Memory Pool to register.
227 * Pointer to Tx queue structure.
230 mlx4_txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
232 struct txq *txq = arg;
233 struct txq_mp2mr_mbuf_check_data data = {
237 /* Register mempool only if the first element looks like a mbuf. */
238 if (rte_mempool_obj_iter(mp, mlx4_txq_mp2mr_mbuf_check, &data) == 0 ||
241 mlx4_txq_mp2mr(txq, mp);
245 * Configure a Tx queue.
248 * Pointer to Ethernet device structure.
250 * Pointer to Tx queue structure.
252 * Number of descriptors to configure in queue.
254 * NUMA socket on which memory must be allocated.
256 * Thresholds parameters.
259 * 0 on success, negative errno value otherwise and rte_errno is set.
262 mlx4_txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
263 unsigned int socket, const struct rte_eth_txconf *conf)
265 struct priv *priv = dev->data->dev_private;
271 struct ibv_qp_init_attr init;
272 struct ibv_qp_attr mod;
276 (void)conf; /* Thresholds configuration (ignored). */
283 ERROR("%p: invalid number of Tx descriptors", (void *)dev);
286 /* MRs will be registered in mp2mr[] later. */
287 tmpl.cq = ibv_create_cq(priv->ctx, desc, NULL, NULL, 0);
288 if (tmpl.cq == NULL) {
290 ERROR("%p: CQ creation failure: %s",
291 (void *)dev, strerror(rte_errno));
294 DEBUG("priv->device_attr.max_qp_wr is %d",
295 priv->device_attr.max_qp_wr);
296 DEBUG("priv->device_attr.max_sge is %d",
297 priv->device_attr.max_sge);
298 attr.init = (struct ibv_qp_init_attr){
299 /* CQ to be associated with the send queue. */
301 /* CQ to be associated with the receive queue. */
304 /* Max number of outstanding WRs. */
305 .max_send_wr = ((priv->device_attr.max_qp_wr < desc) ?
306 priv->device_attr.max_qp_wr :
308 /* Max number of scatter/gather elements in a WR. */
310 .max_inline_data = MLX4_PMD_MAX_INLINE,
312 .qp_type = IBV_QPT_RAW_PACKET,
314 * Do *NOT* enable this, completions events are managed per
319 tmpl.qp = ibv_create_qp(priv->pd, &attr.init);
320 if (tmpl.qp == NULL) {
321 rte_errno = errno ? errno : EINVAL;
322 ERROR("%p: QP creation failure: %s",
323 (void *)dev, strerror(rte_errno));
326 /* ibv_create_qp() updates this value. */
327 tmpl.max_inline = attr.init.cap.max_inline_data;
328 attr.mod = (struct ibv_qp_attr){
329 /* Move the QP to this state. */
330 .qp_state = IBV_QPS_INIT,
331 /* Primary port number. */
332 .port_num = priv->port
334 ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE | IBV_QP_PORT);
337 ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
338 (void *)dev, strerror(rte_errno));
341 ret = mlx4_txq_alloc_elts(&tmpl, desc);
344 ERROR("%p: TXQ allocation failed: %s",
345 (void *)dev, strerror(rte_errno));
348 attr.mod = (struct ibv_qp_attr){
349 .qp_state = IBV_QPS_RTR
351 ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
354 ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
355 (void *)dev, strerror(rte_errno));
358 attr.mod.qp_state = IBV_QPS_RTS;
359 ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
362 ERROR("%p: QP state to IBV_QPS_RTS failed: %s",
363 (void *)dev, strerror(rte_errno));
366 /* Clean up txq in case we're reinitializing it. */
367 DEBUG("%p: cleaning-up old txq just in case", (void *)txq);
368 mlx4_txq_cleanup(txq);
370 DEBUG("%p: txq updated with %p", (void *)txq, (void *)&tmpl);
371 /* Pre-register known mempools. */
372 rte_mempool_walk(mlx4_txq_mp2mr_iter, txq);
376 mlx4_txq_cleanup(&tmpl);
378 assert(rte_errno > 0);
383 * DPDK callback to configure a Tx queue.
386 * Pointer to Ethernet device structure.
390 * Number of descriptors to configure in queue.
392 * NUMA socket on which memory must be allocated.
394 * Thresholds parameters.
397 * 0 on success, negative errno value otherwise and rte_errno is set.
400 mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
401 unsigned int socket, const struct rte_eth_txconf *conf)
403 struct priv *priv = dev->data->dev_private;
404 struct txq *txq = dev->data->tx_queues[idx];
407 DEBUG("%p: configuring queue %u for %u descriptors",
408 (void *)dev, idx, desc);
409 if (idx >= dev->data->nb_tx_queues) {
410 rte_errno = EOVERFLOW;
411 ERROR("%p: queue index out of range (%u >= %u)",
412 (void *)dev, idx, dev->data->nb_tx_queues);
416 DEBUG("%p: reusing already allocated queue index %u (%p)",
417 (void *)dev, idx, (void *)txq);
422 dev->data->tx_queues[idx] = NULL;
423 mlx4_txq_cleanup(txq);
425 txq = rte_calloc_socket("TXQ", 1, sizeof(*txq), 0, socket);
428 ERROR("%p: unable to allocate queue index %u",
433 ret = mlx4_txq_setup(dev, txq, desc, socket, conf);
437 txq->stats.idx = idx;
438 DEBUG("%p: adding Tx queue %p to list",
439 (void *)dev, (void *)txq);
440 dev->data->tx_queues[idx] = txq;
441 /* Update send callback. */
442 dev->tx_pkt_burst = mlx4_tx_burst;
448 * DPDK callback to release a Tx queue.
451 * Generic Tx queue pointer.
454 mlx4_tx_queue_release(void *dpdk_txq)
456 struct txq *txq = (struct txq *)dpdk_txq;
463 for (i = 0; i != priv->dev->data->nb_tx_queues; ++i)
464 if (priv->dev->data->tx_queues[i] == txq) {
465 DEBUG("%p: removing Tx queue %p from list",
466 (void *)priv->dev, (void *)txq);
467 priv->dev->data->tx_queues[i] = NULL;
470 mlx4_txq_cleanup(txq);