1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2015 6WIND S.A.
3 * Copyright 2015 Mellanox Technologies, Ltd
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
14 #include <mlx5_malloc.h>
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
24 * Stop traffic on Tx queues.
27 * Pointer to Ethernet device structure.
30 mlx5_txq_stop(struct rte_eth_dev *dev)
32 struct mlx5_priv *priv = dev->data->dev_private;
35 for (i = 0; i != priv->txqs_n; ++i)
36 mlx5_txq_release(dev, i);
40 * Start traffic on Tx queues.
43 * Pointer to Ethernet device structure.
46 * 0 on success, a negative errno value otherwise and rte_errno is set.
49 mlx5_txq_start(struct rte_eth_dev *dev)
51 struct mlx5_priv *priv = dev->data->dev_private;
55 for (i = 0; i != priv->txqs_n; ++i) {
56 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
62 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
63 txq_alloc_elts(txq_ctrl);
64 MLX5_ASSERT(!txq_ctrl->obj);
65 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
68 DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69 "memory resources.", dev->data->port_id,
74 ret = priv->obj_ops.txq_obj_new(dev, i);
76 mlx5_free(txq_ctrl->obj);
80 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
81 size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
83 txq_data->fcqs = mlx5_malloc(flags, size,
86 if (!txq_data->fcqs) {
87 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88 "allocate memory (FCQ).",
89 dev->data->port_id, i);
94 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95 dev->data->port_id, i, (void *)&txq_ctrl->obj);
96 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
100 ret = rte_errno; /* Save rte_errno before cleanup. */
102 mlx5_txq_release(dev, i);
104 rte_errno = ret; /* Restore rte_errno. */
109 * Translate the chunk address to MR key in order to put in into the cache.
112 mlx5_rxq_mempool_register_cb(struct rte_mempool *mp, void *opaque,
113 struct rte_mempool_memhdr *memhdr,
116 struct mlx5_rxq_data *rxq = opaque;
120 mlx5_rx_addr2mr(rxq, (uintptr_t)memhdr->addr);
124 * Register Rx queue mempools and fill the Rx queue cache.
125 * This function tolerates repeated mempool registration.
127 * @param[in] rxq_ctrl
128 * Rx queue control data.
131 * 0 on success, (-1) on failure and rte_errno is set.
134 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
136 struct mlx5_priv *priv = rxq_ctrl->priv;
137 struct rte_mempool *mp;
141 mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
142 /* MPRQ mempool is registered on creation, just fill the cache. */
143 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
144 rte_mempool_mem_iter(rxq_ctrl->rxq.mprq_mp,
145 mlx5_rxq_mempool_register_cb,
149 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
150 mp = rxq_ctrl->rxq.rxseg[s].mp;
151 ret = mlx5_mr_mempool_register(&priv->sh->share_cache,
152 priv->sh->pd, mp, &priv->mp_id);
153 if (ret < 0 && rte_errno != EEXIST)
155 rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb,
162 * Stop traffic on Rx queues.
165 * Pointer to Ethernet device structure.
168 mlx5_rxq_stop(struct rte_eth_dev *dev)
170 struct mlx5_priv *priv = dev->data->dev_private;
173 for (i = 0; i != priv->rxqs_n; ++i)
174 mlx5_rxq_release(dev, i);
178 * Start traffic on Rx queues.
181 * Pointer to Ethernet device structure.
184 * 0 on success, a negative errno value otherwise and rte_errno is set.
187 mlx5_rxq_start(struct rte_eth_dev *dev)
189 struct mlx5_priv *priv = dev->data->dev_private;
193 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
194 if (mlx5_mprq_alloc_mp(dev)) {
195 /* Should not release Rx queues but return immediately. */
198 DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
199 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
200 DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
201 dev->data->port_id, priv->sh->device_attr.max_sge);
202 for (i = 0; i != priv->rxqs_n; ++i) {
203 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
207 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
209 * Pre-register the mempools. Regardless of whether
210 * the implicit registration is enabled or not,
211 * Rx mempool destruction is tracked to free MRs.
213 if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
215 ret = rxq_alloc_elts(rxq_ctrl);
219 MLX5_ASSERT(!rxq_ctrl->obj);
220 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
221 sizeof(*rxq_ctrl->obj), 0,
223 if (!rxq_ctrl->obj) {
225 "Port %u Rx queue %u can't allocate resources.",
226 dev->data->port_id, (*priv->rxqs)[i]->idx);
230 ret = priv->obj_ops.rxq_obj_new(dev, i);
232 mlx5_free(rxq_ctrl->obj);
235 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
236 dev->data->port_id, i, (void *)&rxq_ctrl->obj);
237 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
241 ret = rte_errno; /* Save rte_errno before cleanup. */
243 mlx5_rxq_release(dev, i);
245 rte_errno = ret; /* Restore rte_errno. */
250 * Binds Tx queues to Rx queues for hairpin.
252 * Binds Tx queues to the target Rx queues.
255 * Pointer to Ethernet device structure.
258 * 0 on success, a negative errno value otherwise and rte_errno is set.
261 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
263 struct mlx5_priv *priv = dev->data->dev_private;
264 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
265 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
266 struct mlx5_txq_ctrl *txq_ctrl;
267 struct mlx5_rxq_ctrl *rxq_ctrl;
268 struct mlx5_devx_obj *sq;
269 struct mlx5_devx_obj *rq;
272 bool need_auto = false;
273 uint16_t self_port = dev->data->port_id;
275 for (i = 0; i != priv->txqs_n; ++i) {
276 txq_ctrl = mlx5_txq_get(dev, i);
279 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
280 txq_ctrl->hairpin_conf.peers[0].port != self_port) {
281 mlx5_txq_release(dev, i);
284 if (txq_ctrl->hairpin_conf.manual_bind) {
285 mlx5_txq_release(dev, i);
289 mlx5_txq_release(dev, i);
293 for (i = 0; i != priv->txqs_n; ++i) {
294 txq_ctrl = mlx5_txq_get(dev, i);
297 /* Skip hairpin queues with other peer ports. */
298 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
299 txq_ctrl->hairpin_conf.peers[0].port != self_port) {
300 mlx5_txq_release(dev, i);
303 if (!txq_ctrl->obj) {
305 DRV_LOG(ERR, "port %u no txq object found: %d",
306 dev->data->port_id, i);
307 mlx5_txq_release(dev, i);
310 sq = txq_ctrl->obj->sq;
311 rxq_ctrl = mlx5_rxq_get(dev,
312 txq_ctrl->hairpin_conf.peers[0].queue);
314 mlx5_txq_release(dev, i);
316 DRV_LOG(ERR, "port %u no rxq object found: %d",
318 txq_ctrl->hairpin_conf.peers[0].queue);
321 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
322 rxq_ctrl->hairpin_conf.peers[0].queue != i) {
324 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
325 "Rx queue %d", dev->data->port_id,
326 i, txq_ctrl->hairpin_conf.peers[0].queue);
329 rq = rxq_ctrl->obj->rq;
332 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
334 txq_ctrl->hairpin_conf.peers[0].queue);
337 sq_attr.state = MLX5_SQC_STATE_RDY;
338 sq_attr.sq_state = MLX5_SQC_STATE_RST;
339 sq_attr.hairpin_peer_rq = rq->id;
340 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
341 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
344 rq_attr.state = MLX5_SQC_STATE_RDY;
345 rq_attr.rq_state = MLX5_SQC_STATE_RST;
346 rq_attr.hairpin_peer_sq = sq->id;
347 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
348 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
351 /* Qs with auto-bind will be destroyed directly. */
352 rxq_ctrl->hairpin_status = 1;
353 txq_ctrl->hairpin_status = 1;
354 mlx5_txq_release(dev, i);
355 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
359 mlx5_txq_release(dev, i);
360 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
365 * Fetch the peer queue's SW & HW information.
368 * Pointer to Ethernet device structure.
370 * Index of the queue to fetch the information.
371 * @param current_info
372 * Pointer to the input peer information, not used currently.
374 * Pointer to the structure to store the information, output.
376 * Positive to get the RxQ information, zero to get the TxQ information.
379 * 0 on success, a negative errno value otherwise and rte_errno is set.
382 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
383 struct rte_hairpin_peer_info *current_info,
384 struct rte_hairpin_peer_info *peer_info,
387 struct mlx5_priv *priv = dev->data->dev_private;
388 RTE_SET_USED(current_info);
390 if (dev->data->dev_started == 0) {
392 DRV_LOG(ERR, "peer port %u is not started",
397 * Peer port used as egress. In the current design, hairpin Tx queue
398 * will be bound to the peer Rx queue. Indeed, only the information of
399 * peer Rx queue needs to be fetched.
401 if (direction == 0) {
402 struct mlx5_txq_ctrl *txq_ctrl;
404 txq_ctrl = mlx5_txq_get(dev, peer_queue);
405 if (txq_ctrl == NULL) {
407 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
408 dev->data->port_id, peer_queue);
411 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
413 DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
414 dev->data->port_id, peer_queue);
415 mlx5_txq_release(dev, peer_queue);
418 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
420 DRV_LOG(ERR, "port %u no Txq object found: %d",
421 dev->data->port_id, peer_queue);
422 mlx5_txq_release(dev, peer_queue);
425 peer_info->qp_id = txq_ctrl->obj->sq->id;
426 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
427 /* 1-to-1 mapping, only the first one is used. */
428 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
429 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
430 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
431 mlx5_txq_release(dev, peer_queue);
432 } else { /* Peer port used as ingress. */
433 struct mlx5_rxq_ctrl *rxq_ctrl;
435 rxq_ctrl = mlx5_rxq_get(dev, peer_queue);
436 if (rxq_ctrl == NULL) {
438 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
439 dev->data->port_id, peer_queue);
442 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
444 DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
445 dev->data->port_id, peer_queue);
446 mlx5_rxq_release(dev, peer_queue);
449 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
451 DRV_LOG(ERR, "port %u no Rxq object found: %d",
452 dev->data->port_id, peer_queue);
453 mlx5_rxq_release(dev, peer_queue);
456 peer_info->qp_id = rxq_ctrl->obj->rq->id;
457 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
458 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
459 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
460 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
461 mlx5_rxq_release(dev, peer_queue);
467 * Bind the hairpin queue with the peer HW information.
468 * This needs to be called twice both for Tx and Rx queues of a pair.
469 * If the queue is already bound, it is considered successful.
472 * Pointer to Ethernet device structure.
474 * Index of the queue to change the HW configuration to bind.
476 * Pointer to information of the peer queue.
478 * Positive to configure the TxQ, zero to configure the RxQ.
481 * 0 on success, a negative errno value otherwise and rte_errno is set.
484 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
485 struct rte_hairpin_peer_info *peer_info,
491 * Consistency checking of the peer queue: opposite direction is used
492 * to get the peer queue info with ethdev port ID, no need to check.
494 if (peer_info->peer_q != cur_queue) {
496 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
497 dev->data->port_id, cur_queue, peer_info->peer_q);
500 if (direction != 0) {
501 struct mlx5_txq_ctrl *txq_ctrl;
502 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
504 txq_ctrl = mlx5_txq_get(dev, cur_queue);
505 if (txq_ctrl == NULL) {
507 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
508 dev->data->port_id, cur_queue);
511 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
513 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
514 dev->data->port_id, cur_queue);
515 mlx5_txq_release(dev, cur_queue);
518 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
520 DRV_LOG(ERR, "port %u no Txq object found: %d",
521 dev->data->port_id, cur_queue);
522 mlx5_txq_release(dev, cur_queue);
525 if (txq_ctrl->hairpin_status != 0) {
526 DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
527 dev->data->port_id, cur_queue);
528 mlx5_txq_release(dev, cur_queue);
532 * All queues' of one port consistency checking is done in the
533 * bind() function, and that is optional.
535 if (peer_info->tx_explicit !=
536 txq_ctrl->hairpin_conf.tx_explicit) {
538 DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
539 " mismatch", dev->data->port_id, cur_queue);
540 mlx5_txq_release(dev, cur_queue);
543 if (peer_info->manual_bind !=
544 txq_ctrl->hairpin_conf.manual_bind) {
546 DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
547 " mismatch", dev->data->port_id, cur_queue);
548 mlx5_txq_release(dev, cur_queue);
551 sq_attr.state = MLX5_SQC_STATE_RDY;
552 sq_attr.sq_state = MLX5_SQC_STATE_RST;
553 sq_attr.hairpin_peer_rq = peer_info->qp_id;
554 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
555 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
557 txq_ctrl->hairpin_status = 1;
558 mlx5_txq_release(dev, cur_queue);
560 struct mlx5_rxq_ctrl *rxq_ctrl;
561 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
563 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
564 if (rxq_ctrl == NULL) {
566 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
567 dev->data->port_id, cur_queue);
570 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
572 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
573 dev->data->port_id, cur_queue);
574 mlx5_rxq_release(dev, cur_queue);
577 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
579 DRV_LOG(ERR, "port %u no Rxq object found: %d",
580 dev->data->port_id, cur_queue);
581 mlx5_rxq_release(dev, cur_queue);
584 if (rxq_ctrl->hairpin_status != 0) {
585 DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
586 dev->data->port_id, cur_queue);
587 mlx5_rxq_release(dev, cur_queue);
590 if (peer_info->tx_explicit !=
591 rxq_ctrl->hairpin_conf.tx_explicit) {
593 DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
594 " mismatch", dev->data->port_id, cur_queue);
595 mlx5_rxq_release(dev, cur_queue);
598 if (peer_info->manual_bind !=
599 rxq_ctrl->hairpin_conf.manual_bind) {
601 DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
602 " mismatch", dev->data->port_id, cur_queue);
603 mlx5_rxq_release(dev, cur_queue);
606 rq_attr.state = MLX5_SQC_STATE_RDY;
607 rq_attr.rq_state = MLX5_SQC_STATE_RST;
608 rq_attr.hairpin_peer_sq = peer_info->qp_id;
609 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
610 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
612 rxq_ctrl->hairpin_status = 1;
613 mlx5_rxq_release(dev, cur_queue);
619 * Unbind the hairpin queue and reset its HW configuration.
620 * This needs to be called twice both for Tx and Rx queues of a pair.
621 * If the queue is already unbound, it is considered successful.
624 * Pointer to Ethernet device structure.
626 * Index of the queue to change the HW configuration to unbind.
628 * Positive to reset the TxQ, zero to reset the RxQ.
631 * 0 on success, a negative errno value otherwise and rte_errno is set.
634 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
639 if (direction != 0) {
640 struct mlx5_txq_ctrl *txq_ctrl;
641 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
643 txq_ctrl = mlx5_txq_get(dev, cur_queue);
644 if (txq_ctrl == NULL) {
646 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
647 dev->data->port_id, cur_queue);
650 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
652 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
653 dev->data->port_id, cur_queue);
654 mlx5_txq_release(dev, cur_queue);
657 /* Already unbound, return success before obj checking. */
658 if (txq_ctrl->hairpin_status == 0) {
659 DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
660 dev->data->port_id, cur_queue);
661 mlx5_txq_release(dev, cur_queue);
664 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
666 DRV_LOG(ERR, "port %u no Txq object found: %d",
667 dev->data->port_id, cur_queue);
668 mlx5_txq_release(dev, cur_queue);
671 sq_attr.state = MLX5_SQC_STATE_RST;
672 sq_attr.sq_state = MLX5_SQC_STATE_RST;
673 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
675 txq_ctrl->hairpin_status = 0;
676 mlx5_txq_release(dev, cur_queue);
678 struct mlx5_rxq_ctrl *rxq_ctrl;
679 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
681 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
682 if (rxq_ctrl == NULL) {
684 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
685 dev->data->port_id, cur_queue);
688 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
690 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
691 dev->data->port_id, cur_queue);
692 mlx5_rxq_release(dev, cur_queue);
695 if (rxq_ctrl->hairpin_status == 0) {
696 DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
697 dev->data->port_id, cur_queue);
698 mlx5_rxq_release(dev, cur_queue);
701 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
703 DRV_LOG(ERR, "port %u no Rxq object found: %d",
704 dev->data->port_id, cur_queue);
705 mlx5_rxq_release(dev, cur_queue);
708 rq_attr.state = MLX5_SQC_STATE_RST;
709 rq_attr.rq_state = MLX5_SQC_STATE_RST;
710 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
712 rxq_ctrl->hairpin_status = 0;
713 mlx5_rxq_release(dev, cur_queue);
719 * Bind the hairpin port pairs, from the Tx to the peer Rx.
720 * This function only supports to bind the Tx to one Rx.
723 * Pointer to Ethernet device structure.
725 * Port identifier of the Rx port.
728 * 0 on success, a negative errno value otherwise and rte_errno is set.
731 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
733 struct mlx5_priv *priv = dev->data->dev_private;
735 struct mlx5_txq_ctrl *txq_ctrl;
737 struct rte_hairpin_peer_info peer = {0xffffff};
738 struct rte_hairpin_peer_info cur;
739 const struct rte_eth_hairpin_conf *conf;
741 uint16_t local_port = priv->dev_data->port_id;
746 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
748 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
752 * Before binding TxQ to peer RxQ, first round loop will be used for
753 * checking the queues' configuration consistency. This would be a
754 * little time consuming but better than doing the rollback.
756 for (i = 0; i != priv->txqs_n; i++) {
757 txq_ctrl = mlx5_txq_get(dev, i);
758 if (txq_ctrl == NULL)
760 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
761 mlx5_txq_release(dev, i);
765 * All hairpin Tx queues of a single port that connected to the
766 * same peer Rx port should have the same "auto binding" and
767 * "implicit Tx flow" modes.
768 * Peer consistency checking will be done in per queue binding.
770 conf = &txq_ctrl->hairpin_conf;
771 if (conf->peers[0].port == rx_port) {
773 manual = conf->manual_bind;
774 explicit = conf->tx_explicit;
776 if (manual != conf->manual_bind ||
777 explicit != conf->tx_explicit) {
779 DRV_LOG(ERR, "port %u queue %d mode"
780 " mismatch: %u %u, %u %u",
781 local_port, i, manual,
782 conf->manual_bind, explicit,
784 mlx5_txq_release(dev, i);
790 mlx5_txq_release(dev, i);
792 /* Once no queue is configured, success is returned directly. */
795 /* All the hairpin TX queues need to be traversed again. */
796 for (i = 0; i != priv->txqs_n; i++) {
797 txq_ctrl = mlx5_txq_get(dev, i);
798 if (txq_ctrl == NULL)
800 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
801 mlx5_txq_release(dev, i);
804 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
805 mlx5_txq_release(dev, i);
808 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
810 * Fetch peer RxQ's information.
811 * No need to pass the information of the current queue.
813 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
816 mlx5_txq_release(dev, i);
819 /* Accessing its own device, inside mlx5 PMD. */
820 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
822 mlx5_txq_release(dev, i);
825 /* Pass TxQ's information to peer RxQ and try binding. */
826 cur.peer_q = rx_queue;
827 cur.qp_id = txq_ctrl->obj->sq->id;
828 cur.vhca_id = priv->config.hca_attr.vhca_id;
829 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
830 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
832 * In order to access another device in a proper way, RTE level
833 * private function is needed.
835 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
838 mlx5_txq_release(dev, i);
841 mlx5_txq_release(dev, i);
846 * Do roll-back process for the queues already bound.
847 * No need to check the return value of the queue unbind function.
850 /* No validation is needed here. */
851 txq_ctrl = mlx5_txq_get(dev, i);
852 if (txq_ctrl == NULL)
854 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
855 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
856 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
857 mlx5_txq_release(dev, i);
863 * Unbind the hairpin port pair, HW configuration of both devices will be clear
864 * and status will be reset for all the queues used between the them.
865 * This function only supports to unbind the Tx from one Rx.
868 * Pointer to Ethernet device structure.
870 * Port identifier of the Rx port.
873 * 0 on success, a negative errno value otherwise and rte_errno is set.
876 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
878 struct mlx5_priv *priv = dev->data->dev_private;
879 struct mlx5_txq_ctrl *txq_ctrl;
882 uint16_t cur_port = priv->dev_data->port_id;
884 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
886 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
889 for (i = 0; i != priv->txqs_n; i++) {
892 txq_ctrl = mlx5_txq_get(dev, i);
893 if (txq_ctrl == NULL)
895 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
896 mlx5_txq_release(dev, i);
899 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
900 mlx5_txq_release(dev, i);
903 /* Indeed, only the first used queue needs to be checked. */
904 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
905 if (cur_port != rx_port) {
907 DRV_LOG(ERR, "port %u and port %u are in"
908 " auto-bind mode", cur_port, rx_port);
909 mlx5_txq_release(dev, i);
915 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
916 mlx5_txq_release(dev, i);
917 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
919 DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
923 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
925 DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
934 * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
935 * @see mlx5_hairpin_bind_single_port()
938 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
944 * If the Rx port has no hairpin configuration with the current port,
945 * the binding will be skipped in the called function of single port.
946 * Device started status will be checked only before the queue
947 * information updating.
949 if (rx_port == RTE_MAX_ETHPORTS) {
950 MLX5_ETH_FOREACH_DEV(p, dev->device) {
951 ret = mlx5_hairpin_bind_single_port(dev, p);
957 return mlx5_hairpin_bind_single_port(dev, rx_port);
960 MLX5_ETH_FOREACH_DEV(pp, dev->device)
962 mlx5_hairpin_unbind_single_port(dev, pp);
967 * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
968 * @see mlx5_hairpin_unbind_single_port()
971 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
976 if (rx_port == RTE_MAX_ETHPORTS)
977 MLX5_ETH_FOREACH_DEV(p, dev->device) {
978 ret = mlx5_hairpin_unbind_single_port(dev, p);
983 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
988 * DPDK callback to get the hairpin peer ports list.
989 * This will return the actual number of peer ports and save the identifiers
990 * into the array (sorted, may be different from that when setting up the
991 * hairpin peer queues).
992 * The peer port ID could be the same as the port ID of the current device.
995 * Pointer to Ethernet device structure.
997 * Pointer to array to save the port identifiers.
999 * The length of the array.
1001 * Current port to peer port direction.
1002 * positive - current used as Tx to get all peer Rx ports.
1003 * zero - current used as Rx to get all peer Tx ports.
1006 * 0 or positive value on success, actual number of peer ports.
1007 * a negative errno value otherwise and rte_errno is set.
1010 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1011 size_t len, uint32_t direction)
1013 struct mlx5_priv *priv = dev->data->dev_private;
1014 struct mlx5_txq_ctrl *txq_ctrl;
1015 struct mlx5_rxq_ctrl *rxq_ctrl;
1018 uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1022 for (i = 0; i < priv->txqs_n; i++) {
1023 txq_ctrl = mlx5_txq_get(dev, i);
1026 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
1027 mlx5_txq_release(dev, i);
1030 pp = txq_ctrl->hairpin_conf.peers[0].port;
1031 if (pp >= RTE_MAX_ETHPORTS) {
1033 mlx5_txq_release(dev, i);
1034 DRV_LOG(ERR, "port %hu queue %u peer port "
1036 priv->dev_data->port_id, i, pp);
1039 bits[pp / 32] |= 1 << (pp % 32);
1040 mlx5_txq_release(dev, i);
1043 for (i = 0; i < priv->rxqs_n; i++) {
1044 rxq_ctrl = mlx5_rxq_get(dev, i);
1047 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
1048 mlx5_rxq_release(dev, i);
1051 pp = rxq_ctrl->hairpin_conf.peers[0].port;
1052 if (pp >= RTE_MAX_ETHPORTS) {
1054 mlx5_rxq_release(dev, i);
1055 DRV_LOG(ERR, "port %hu queue %u peer port "
1057 priv->dev_data->port_id, i, pp);
1060 bits[pp / 32] |= 1 << (pp % 32);
1061 mlx5_rxq_release(dev, i);
1064 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1065 if (bits[i / 32] & (1 << (i % 32))) {
1066 if ((size_t)ret >= len) {
1070 peer_ports[ret++] = i;
1077 * DPDK callback to start the device.
1079 * Simulate device start by attaching all configured flows.
1082 * Pointer to Ethernet device structure.
1085 * 0 on success, a negative errno value otherwise and rte_errno is set.
1088 mlx5_dev_start(struct rte_eth_dev *dev)
1090 struct mlx5_priv *priv = dev->data->dev_private;
1094 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1095 fine_inline = rte_mbuf_dynflag_lookup
1096 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1097 if (fine_inline >= 0)
1098 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1100 rte_net_mlx5_dynf_inline_mask = 0;
1101 if (dev->data->nb_rx_queues > 0) {
1102 ret = mlx5_dev_configure_rss_reta(dev);
1104 DRV_LOG(ERR, "port %u reta config failed: %s",
1105 dev->data->port_id, strerror(rte_errno));
1109 ret = mlx5_txpp_start(dev);
1111 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1112 dev->data->port_id, strerror(rte_errno));
1115 if ((priv->config.devx && priv->config.dv_flow_en &&
1116 priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1117 ret = priv->obj_ops.lb_dummy_queue_create(dev);
1121 ret = mlx5_txq_start(dev);
1123 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1124 dev->data->port_id, strerror(rte_errno));
1127 ret = mlx5_rxq_start(dev);
1129 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1130 dev->data->port_id, strerror(rte_errno));
1134 * Such step will be skipped if there is no hairpin TX queue configured
1135 * with RX peer queue from the same device.
1137 ret = mlx5_hairpin_auto_bind(dev);
1139 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1140 dev->data->port_id, strerror(rte_errno));
1143 /* Set started flag here for the following steps like control flow. */
1144 dev->data->dev_started = 1;
1145 ret = mlx5_rx_intr_vec_enable(dev);
1147 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1148 dev->data->port_id);
1151 mlx5_os_stats_init(dev);
1152 ret = mlx5_traffic_enable(dev);
1154 DRV_LOG(ERR, "port %u failed to set defaults flows",
1155 dev->data->port_id);
1158 /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1159 mlx5_flow_rxq_dynf_metadata_set(dev);
1160 /* Set flags and context to convert Rx timestamps. */
1161 mlx5_rxq_timestamp_set(dev);
1162 /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1163 mlx5_txq_dynf_timestamp_set(dev);
1165 * In non-cached mode, it only needs to start the default mreg copy
1166 * action and no flow created by application exists anymore.
1167 * But it is worth wrapping the interface for further usage.
1169 ret = mlx5_flow_start_default(dev);
1171 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1172 dev->data->port_id, strerror(rte_errno));
1175 if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1176 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1177 dev->data->port_id, rte_strerror(rte_errno));
1181 dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1182 dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1183 /* Enable datapath on secondary process. */
1184 mlx5_mp_os_req_start_rxtx(dev);
1185 if (priv->sh->intr_handle.fd >= 0) {
1186 priv->sh->port[priv->dev_port - 1].ih_port_id =
1187 (uint32_t)dev->data->port_id;
1189 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1190 dev->data->port_id);
1191 dev->data->dev_conf.intr_conf.lsc = 0;
1192 dev->data->dev_conf.intr_conf.rmv = 0;
1194 if (priv->sh->intr_handle_devx.fd >= 0)
1195 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1196 (uint32_t)dev->data->port_id;
1199 ret = rte_errno; /* Save rte_errno before cleanup. */
1201 dev->data->dev_started = 0;
1202 mlx5_flow_stop_default(dev);
1203 mlx5_traffic_disable(dev);
1206 if (priv->obj_ops.lb_dummy_queue_release)
1207 priv->obj_ops.lb_dummy_queue_release(dev);
1208 mlx5_txpp_stop(dev); /* Stop last. */
1209 rte_errno = ret; /* Restore rte_errno. */
1214 * DPDK callback to stop the device.
1216 * Simulate device stop by detaching all configured flows.
1219 * Pointer to Ethernet device structure.
1222 mlx5_dev_stop(struct rte_eth_dev *dev)
1224 struct mlx5_priv *priv = dev->data->dev_private;
1226 dev->data->dev_started = 0;
1227 /* Prevent crashes when queues are still in use. */
1228 dev->rx_pkt_burst = removed_rx_burst;
1229 dev->tx_pkt_burst = removed_tx_burst;
1231 /* Disable datapath on secondary process. */
1232 mlx5_mp_os_req_stop_rxtx(dev);
1233 rte_delay_us_sleep(1000 * priv->rxqs_n);
1234 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1235 mlx5_flow_stop_default(dev);
1236 /* Control flows for default traffic can be removed firstly. */
1237 mlx5_traffic_disable(dev);
1238 /* All RX queue flags will be cleared in the flush interface. */
1239 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1240 mlx5_flow_meter_rxq_flush(dev);
1241 mlx5_rx_intr_vec_disable(dev);
1242 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1243 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1246 if (priv->obj_ops.lb_dummy_queue_release)
1247 priv->obj_ops.lb_dummy_queue_release(dev);
1248 mlx5_txpp_stop(dev);
1254 * Enable traffic flows configured by control plane
1257 * Pointer to Ethernet device private data.
1259 * Pointer to Ethernet device structure.
1262 * 0 on success, a negative errno value otherwise and rte_errno is set.
1265 mlx5_traffic_enable(struct rte_eth_dev *dev)
1267 struct mlx5_priv *priv = dev->data->dev_private;
1268 struct rte_flow_item_eth bcast = {
1269 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1271 struct rte_flow_item_eth ipv6_multi_spec = {
1272 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1274 struct rte_flow_item_eth ipv6_multi_mask = {
1275 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1277 struct rte_flow_item_eth unicast = {
1278 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1280 struct rte_flow_item_eth unicast_mask = {
1281 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1283 const unsigned int vlan_filter_n = priv->vlan_filter_n;
1284 const struct rte_ether_addr cmp = {
1285 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1292 * Hairpin txq default flow should be created no matter if it is
1293 * isolation mode. Or else all the packets to be sent will be sent
1294 * out directly without the TX flow actions, e.g. encapsulation.
1296 for (i = 0; i != priv->txqs_n; ++i) {
1297 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1300 /* Only Tx implicit mode requires the default Tx flow. */
1301 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1302 txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1303 txq_ctrl->hairpin_conf.peers[0].port ==
1304 priv->dev_data->port_id) {
1305 ret = mlx5_ctrl_flow_source_queue(dev, i);
1307 mlx5_txq_release(dev, i);
1311 mlx5_txq_release(dev, i);
1313 if (priv->config.dv_esw_en && !priv->config.vf && !priv->config.sf) {
1314 if (mlx5_flow_create_esw_table_zero_flow(dev))
1315 priv->fdb_def_rule = 1;
1317 DRV_LOG(INFO, "port %u FDB default rule cannot be"
1318 " configured - only Eswitch group 0 flows are"
1319 " supported.", dev->data->port_id);
1321 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1322 ret = mlx5_flow_lacp_miss(dev);
1324 DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1325 "forward LACP to kernel.", dev->data->port_id);
1327 DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1328 , dev->data->port_id);
1332 if (dev->data->promiscuous) {
1333 struct rte_flow_item_eth promisc = {
1334 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1335 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1339 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1343 if (dev->data->all_multicast) {
1344 struct rte_flow_item_eth multicast = {
1345 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1346 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1350 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1354 /* Add broadcast/multicast flows. */
1355 for (i = 0; i != vlan_filter_n; ++i) {
1356 uint16_t vlan = priv->vlan_filter[i];
1358 struct rte_flow_item_vlan vlan_spec = {
1359 .tci = rte_cpu_to_be_16(vlan),
1361 struct rte_flow_item_vlan vlan_mask =
1362 rte_flow_item_vlan_mask;
1364 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1365 &vlan_spec, &vlan_mask);
1368 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1370 &vlan_spec, &vlan_mask);
1374 if (!vlan_filter_n) {
1375 ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1378 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1381 /* Do not fail on IPv6 broadcast creation failure. */
1383 "IPv6 broadcast is not supported");
1388 /* Add MAC address flows. */
1389 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1390 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1392 if (!memcmp(mac, &cmp, sizeof(*mac)))
1394 memcpy(&unicast.dst.addr_bytes,
1396 RTE_ETHER_ADDR_LEN);
1397 for (j = 0; j != vlan_filter_n; ++j) {
1398 uint16_t vlan = priv->vlan_filter[j];
1400 struct rte_flow_item_vlan vlan_spec = {
1401 .tci = rte_cpu_to_be_16(vlan),
1403 struct rte_flow_item_vlan vlan_mask =
1404 rte_flow_item_vlan_mask;
1406 ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1413 if (!vlan_filter_n) {
1414 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1421 ret = rte_errno; /* Save rte_errno before cleanup. */
1422 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1423 rte_errno = ret; /* Restore rte_errno. */
1429 * Disable traffic flows configured by control plane
1432 * Pointer to Ethernet device private data.
1435 mlx5_traffic_disable(struct rte_eth_dev *dev)
1437 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1441 * Restart traffic flows configured by control plane
1444 * Pointer to Ethernet device private data.
1447 * 0 on success, a negative errno value otherwise and rte_errno is set.
1450 mlx5_traffic_restart(struct rte_eth_dev *dev)
1452 if (dev->data->dev_started) {
1453 mlx5_traffic_disable(dev);
1454 return mlx5_traffic_enable(dev);