1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2015 6WIND S.A.
3 * Copyright 2015 Mellanox Technologies, Ltd
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
14 #include <mlx5_malloc.h>
17 #include "mlx5_flow.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
24 * Stop traffic on Tx queues.
27 * Pointer to Ethernet device structure.
30 mlx5_txq_stop(struct rte_eth_dev *dev)
32 struct mlx5_priv *priv = dev->data->dev_private;
35 for (i = 0; i != priv->txqs_n; ++i)
36 mlx5_txq_release(dev, i);
40 * Start traffic on Tx queues.
43 * Pointer to Ethernet device structure.
46 * 0 on success, a negative errno value otherwise and rte_errno is set.
49 mlx5_txq_start(struct rte_eth_dev *dev)
51 struct mlx5_priv *priv = dev->data->dev_private;
55 for (i = 0; i != priv->txqs_n; ++i) {
56 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
62 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
63 txq_alloc_elts(txq_ctrl);
64 MLX5_ASSERT(!txq_ctrl->obj);
65 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
68 DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69 "memory resources.", dev->data->port_id,
74 ret = priv->obj_ops.txq_obj_new(dev, i);
76 mlx5_free(txq_ctrl->obj);
80 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
81 size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
83 txq_data->fcqs = mlx5_malloc(flags, size,
86 if (!txq_data->fcqs) {
87 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88 "allocate memory (FCQ).",
89 dev->data->port_id, i);
94 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95 dev->data->port_id, i, (void *)&txq_ctrl->obj);
96 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
100 ret = rte_errno; /* Save rte_errno before cleanup. */
102 mlx5_txq_release(dev, i);
104 rte_errno = ret; /* Restore rte_errno. */
109 * Translate the chunk address to MR key in order to put in into the cache.
112 mlx5_rxq_mempool_register_cb(struct rte_mempool *mp, void *opaque,
113 struct rte_mempool_memhdr *memhdr,
116 struct mlx5_rxq_data *rxq = opaque;
120 mlx5_rx_addr2mr(rxq, (uintptr_t)memhdr->addr);
124 * Register Rx queue mempools and fill the Rx queue cache.
125 * This function tolerates repeated mempool registration.
127 * @param[in] rxq_ctrl
128 * Rx queue control data.
131 * 0 on success, (-1) on failure and rte_errno is set.
134 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
136 struct mlx5_priv *priv = rxq_ctrl->priv;
137 struct rte_mempool *mp;
141 mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
142 /* MPRQ mempool is registered on creation, just fill the cache. */
143 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
144 rte_mempool_mem_iter(rxq_ctrl->rxq.mprq_mp,
145 mlx5_rxq_mempool_register_cb,
149 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
150 mp = rxq_ctrl->rxq.rxseg[s].mp;
151 ret = mlx5_mr_mempool_register(&priv->sh->cdev->mr_scache,
152 priv->sh->cdev->pd, mp,
154 if (ret < 0 && rte_errno != EEXIST)
156 rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb,
163 * Stop traffic on Rx queues.
166 * Pointer to Ethernet device structure.
169 mlx5_rxq_stop(struct rte_eth_dev *dev)
171 struct mlx5_priv *priv = dev->data->dev_private;
174 for (i = 0; i != priv->rxqs_n; ++i)
175 mlx5_rxq_release(dev, i);
179 * Start traffic on Rx queues.
182 * Pointer to Ethernet device structure.
185 * 0 on success, a negative errno value otherwise and rte_errno is set.
188 mlx5_rxq_start(struct rte_eth_dev *dev)
190 struct mlx5_priv *priv = dev->data->dev_private;
194 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
195 if (mlx5_mprq_alloc_mp(dev)) {
196 /* Should not release Rx queues but return immediately. */
199 DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
200 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
201 DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
202 dev->data->port_id, priv->sh->device_attr.max_sge);
203 for (i = 0; i != priv->rxqs_n; ++i) {
204 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
208 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
210 * Pre-register the mempools. Regardless of whether
211 * the implicit registration is enabled or not,
212 * Rx mempool destruction is tracked to free MRs.
214 if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
216 ret = rxq_alloc_elts(rxq_ctrl);
220 MLX5_ASSERT(!rxq_ctrl->obj);
221 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
222 sizeof(*rxq_ctrl->obj), 0,
224 if (!rxq_ctrl->obj) {
226 "Port %u Rx queue %u can't allocate resources.",
227 dev->data->port_id, (*priv->rxqs)[i]->idx);
231 ret = priv->obj_ops.rxq_obj_new(dev, i);
233 mlx5_free(rxq_ctrl->obj);
234 rxq_ctrl->obj = NULL;
237 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
238 dev->data->port_id, i, (void *)&rxq_ctrl->obj);
239 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
243 ret = rte_errno; /* Save rte_errno before cleanup. */
245 mlx5_rxq_release(dev, i);
247 rte_errno = ret; /* Restore rte_errno. */
252 * Binds Tx queues to Rx queues for hairpin.
254 * Binds Tx queues to the target Rx queues.
257 * Pointer to Ethernet device structure.
260 * 0 on success, a negative errno value otherwise and rte_errno is set.
263 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
265 struct mlx5_priv *priv = dev->data->dev_private;
266 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
267 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
268 struct mlx5_txq_ctrl *txq_ctrl;
269 struct mlx5_rxq_ctrl *rxq_ctrl;
270 struct mlx5_devx_obj *sq;
271 struct mlx5_devx_obj *rq;
274 bool need_auto = false;
275 uint16_t self_port = dev->data->port_id;
277 for (i = 0; i != priv->txqs_n; ++i) {
278 txq_ctrl = mlx5_txq_get(dev, i);
281 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
282 txq_ctrl->hairpin_conf.peers[0].port != self_port) {
283 mlx5_txq_release(dev, i);
286 if (txq_ctrl->hairpin_conf.manual_bind) {
287 mlx5_txq_release(dev, i);
291 mlx5_txq_release(dev, i);
295 for (i = 0; i != priv->txqs_n; ++i) {
296 txq_ctrl = mlx5_txq_get(dev, i);
299 /* Skip hairpin queues with other peer ports. */
300 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
301 txq_ctrl->hairpin_conf.peers[0].port != self_port) {
302 mlx5_txq_release(dev, i);
305 if (!txq_ctrl->obj) {
307 DRV_LOG(ERR, "port %u no txq object found: %d",
308 dev->data->port_id, i);
309 mlx5_txq_release(dev, i);
312 sq = txq_ctrl->obj->sq;
313 rxq_ctrl = mlx5_rxq_get(dev,
314 txq_ctrl->hairpin_conf.peers[0].queue);
316 mlx5_txq_release(dev, i);
318 DRV_LOG(ERR, "port %u no rxq object found: %d",
320 txq_ctrl->hairpin_conf.peers[0].queue);
323 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
324 rxq_ctrl->hairpin_conf.peers[0].queue != i) {
326 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
327 "Rx queue %d", dev->data->port_id,
328 i, txq_ctrl->hairpin_conf.peers[0].queue);
331 rq = rxq_ctrl->obj->rq;
334 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
336 txq_ctrl->hairpin_conf.peers[0].queue);
339 sq_attr.state = MLX5_SQC_STATE_RDY;
340 sq_attr.sq_state = MLX5_SQC_STATE_RST;
341 sq_attr.hairpin_peer_rq = rq->id;
342 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
343 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
346 rq_attr.state = MLX5_SQC_STATE_RDY;
347 rq_attr.rq_state = MLX5_SQC_STATE_RST;
348 rq_attr.hairpin_peer_sq = sq->id;
349 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
350 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
353 /* Qs with auto-bind will be destroyed directly. */
354 rxq_ctrl->hairpin_status = 1;
355 txq_ctrl->hairpin_status = 1;
356 mlx5_txq_release(dev, i);
357 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
361 mlx5_txq_release(dev, i);
362 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
367 * Fetch the peer queue's SW & HW information.
370 * Pointer to Ethernet device structure.
372 * Index of the queue to fetch the information.
373 * @param current_info
374 * Pointer to the input peer information, not used currently.
376 * Pointer to the structure to store the information, output.
378 * Positive to get the RxQ information, zero to get the TxQ information.
381 * 0 on success, a negative errno value otherwise and rte_errno is set.
384 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
385 struct rte_hairpin_peer_info *current_info,
386 struct rte_hairpin_peer_info *peer_info,
389 struct mlx5_priv *priv = dev->data->dev_private;
390 RTE_SET_USED(current_info);
392 if (dev->data->dev_started == 0) {
394 DRV_LOG(ERR, "peer port %u is not started",
399 * Peer port used as egress. In the current design, hairpin Tx queue
400 * will be bound to the peer Rx queue. Indeed, only the information of
401 * peer Rx queue needs to be fetched.
403 if (direction == 0) {
404 struct mlx5_txq_ctrl *txq_ctrl;
406 txq_ctrl = mlx5_txq_get(dev, peer_queue);
407 if (txq_ctrl == NULL) {
409 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
410 dev->data->port_id, peer_queue);
413 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
415 DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
416 dev->data->port_id, peer_queue);
417 mlx5_txq_release(dev, peer_queue);
420 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
422 DRV_LOG(ERR, "port %u no Txq object found: %d",
423 dev->data->port_id, peer_queue);
424 mlx5_txq_release(dev, peer_queue);
427 peer_info->qp_id = txq_ctrl->obj->sq->id;
428 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
429 /* 1-to-1 mapping, only the first one is used. */
430 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
431 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
432 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
433 mlx5_txq_release(dev, peer_queue);
434 } else { /* Peer port used as ingress. */
435 struct mlx5_rxq_ctrl *rxq_ctrl;
437 rxq_ctrl = mlx5_rxq_get(dev, peer_queue);
438 if (rxq_ctrl == NULL) {
440 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
441 dev->data->port_id, peer_queue);
444 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
446 DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
447 dev->data->port_id, peer_queue);
448 mlx5_rxq_release(dev, peer_queue);
451 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
453 DRV_LOG(ERR, "port %u no Rxq object found: %d",
454 dev->data->port_id, peer_queue);
455 mlx5_rxq_release(dev, peer_queue);
458 peer_info->qp_id = rxq_ctrl->obj->rq->id;
459 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
460 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
461 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
462 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
463 mlx5_rxq_release(dev, peer_queue);
469 * Bind the hairpin queue with the peer HW information.
470 * This needs to be called twice both for Tx and Rx queues of a pair.
471 * If the queue is already bound, it is considered successful.
474 * Pointer to Ethernet device structure.
476 * Index of the queue to change the HW configuration to bind.
478 * Pointer to information of the peer queue.
480 * Positive to configure the TxQ, zero to configure the RxQ.
483 * 0 on success, a negative errno value otherwise and rte_errno is set.
486 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
487 struct rte_hairpin_peer_info *peer_info,
493 * Consistency checking of the peer queue: opposite direction is used
494 * to get the peer queue info with ethdev port ID, no need to check.
496 if (peer_info->peer_q != cur_queue) {
498 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
499 dev->data->port_id, cur_queue, peer_info->peer_q);
502 if (direction != 0) {
503 struct mlx5_txq_ctrl *txq_ctrl;
504 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
506 txq_ctrl = mlx5_txq_get(dev, cur_queue);
507 if (txq_ctrl == NULL) {
509 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
510 dev->data->port_id, cur_queue);
513 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
515 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
516 dev->data->port_id, cur_queue);
517 mlx5_txq_release(dev, cur_queue);
520 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
522 DRV_LOG(ERR, "port %u no Txq object found: %d",
523 dev->data->port_id, cur_queue);
524 mlx5_txq_release(dev, cur_queue);
527 if (txq_ctrl->hairpin_status != 0) {
528 DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
529 dev->data->port_id, cur_queue);
530 mlx5_txq_release(dev, cur_queue);
534 * All queues' of one port consistency checking is done in the
535 * bind() function, and that is optional.
537 if (peer_info->tx_explicit !=
538 txq_ctrl->hairpin_conf.tx_explicit) {
540 DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
541 " mismatch", dev->data->port_id, cur_queue);
542 mlx5_txq_release(dev, cur_queue);
545 if (peer_info->manual_bind !=
546 txq_ctrl->hairpin_conf.manual_bind) {
548 DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
549 " mismatch", dev->data->port_id, cur_queue);
550 mlx5_txq_release(dev, cur_queue);
553 sq_attr.state = MLX5_SQC_STATE_RDY;
554 sq_attr.sq_state = MLX5_SQC_STATE_RST;
555 sq_attr.hairpin_peer_rq = peer_info->qp_id;
556 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
557 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
559 txq_ctrl->hairpin_status = 1;
560 mlx5_txq_release(dev, cur_queue);
562 struct mlx5_rxq_ctrl *rxq_ctrl;
563 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
565 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
566 if (rxq_ctrl == NULL) {
568 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
569 dev->data->port_id, cur_queue);
572 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
574 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
575 dev->data->port_id, cur_queue);
576 mlx5_rxq_release(dev, cur_queue);
579 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
581 DRV_LOG(ERR, "port %u no Rxq object found: %d",
582 dev->data->port_id, cur_queue);
583 mlx5_rxq_release(dev, cur_queue);
586 if (rxq_ctrl->hairpin_status != 0) {
587 DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
588 dev->data->port_id, cur_queue);
589 mlx5_rxq_release(dev, cur_queue);
592 if (peer_info->tx_explicit !=
593 rxq_ctrl->hairpin_conf.tx_explicit) {
595 DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
596 " mismatch", dev->data->port_id, cur_queue);
597 mlx5_rxq_release(dev, cur_queue);
600 if (peer_info->manual_bind !=
601 rxq_ctrl->hairpin_conf.manual_bind) {
603 DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
604 " mismatch", dev->data->port_id, cur_queue);
605 mlx5_rxq_release(dev, cur_queue);
608 rq_attr.state = MLX5_SQC_STATE_RDY;
609 rq_attr.rq_state = MLX5_SQC_STATE_RST;
610 rq_attr.hairpin_peer_sq = peer_info->qp_id;
611 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
612 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
614 rxq_ctrl->hairpin_status = 1;
615 mlx5_rxq_release(dev, cur_queue);
621 * Unbind the hairpin queue and reset its HW configuration.
622 * This needs to be called twice both for Tx and Rx queues of a pair.
623 * If the queue is already unbound, it is considered successful.
626 * Pointer to Ethernet device structure.
628 * Index of the queue to change the HW configuration to unbind.
630 * Positive to reset the TxQ, zero to reset the RxQ.
633 * 0 on success, a negative errno value otherwise and rte_errno is set.
636 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
641 if (direction != 0) {
642 struct mlx5_txq_ctrl *txq_ctrl;
643 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
645 txq_ctrl = mlx5_txq_get(dev, cur_queue);
646 if (txq_ctrl == NULL) {
648 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
649 dev->data->port_id, cur_queue);
652 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
654 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
655 dev->data->port_id, cur_queue);
656 mlx5_txq_release(dev, cur_queue);
659 /* Already unbound, return success before obj checking. */
660 if (txq_ctrl->hairpin_status == 0) {
661 DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
662 dev->data->port_id, cur_queue);
663 mlx5_txq_release(dev, cur_queue);
666 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
668 DRV_LOG(ERR, "port %u no Txq object found: %d",
669 dev->data->port_id, cur_queue);
670 mlx5_txq_release(dev, cur_queue);
673 sq_attr.state = MLX5_SQC_STATE_RST;
674 sq_attr.sq_state = MLX5_SQC_STATE_RST;
675 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
677 txq_ctrl->hairpin_status = 0;
678 mlx5_txq_release(dev, cur_queue);
680 struct mlx5_rxq_ctrl *rxq_ctrl;
681 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
683 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
684 if (rxq_ctrl == NULL) {
686 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
687 dev->data->port_id, cur_queue);
690 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
692 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
693 dev->data->port_id, cur_queue);
694 mlx5_rxq_release(dev, cur_queue);
697 if (rxq_ctrl->hairpin_status == 0) {
698 DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
699 dev->data->port_id, cur_queue);
700 mlx5_rxq_release(dev, cur_queue);
703 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
705 DRV_LOG(ERR, "port %u no Rxq object found: %d",
706 dev->data->port_id, cur_queue);
707 mlx5_rxq_release(dev, cur_queue);
710 rq_attr.state = MLX5_SQC_STATE_RST;
711 rq_attr.rq_state = MLX5_SQC_STATE_RST;
712 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
714 rxq_ctrl->hairpin_status = 0;
715 mlx5_rxq_release(dev, cur_queue);
721 * Bind the hairpin port pairs, from the Tx to the peer Rx.
722 * This function only supports to bind the Tx to one Rx.
725 * Pointer to Ethernet device structure.
727 * Port identifier of the Rx port.
730 * 0 on success, a negative errno value otherwise and rte_errno is set.
733 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
735 struct mlx5_priv *priv = dev->data->dev_private;
737 struct mlx5_txq_ctrl *txq_ctrl;
739 struct rte_hairpin_peer_info peer = {0xffffff};
740 struct rte_hairpin_peer_info cur;
741 const struct rte_eth_hairpin_conf *conf;
743 uint16_t local_port = priv->dev_data->port_id;
748 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
750 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
754 * Before binding TxQ to peer RxQ, first round loop will be used for
755 * checking the queues' configuration consistency. This would be a
756 * little time consuming but better than doing the rollback.
758 for (i = 0; i != priv->txqs_n; i++) {
759 txq_ctrl = mlx5_txq_get(dev, i);
760 if (txq_ctrl == NULL)
762 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
763 mlx5_txq_release(dev, i);
767 * All hairpin Tx queues of a single port that connected to the
768 * same peer Rx port should have the same "auto binding" and
769 * "implicit Tx flow" modes.
770 * Peer consistency checking will be done in per queue binding.
772 conf = &txq_ctrl->hairpin_conf;
773 if (conf->peers[0].port == rx_port) {
775 manual = conf->manual_bind;
776 explicit = conf->tx_explicit;
778 if (manual != conf->manual_bind ||
779 explicit != conf->tx_explicit) {
781 DRV_LOG(ERR, "port %u queue %d mode"
782 " mismatch: %u %u, %u %u",
783 local_port, i, manual,
784 conf->manual_bind, explicit,
786 mlx5_txq_release(dev, i);
792 mlx5_txq_release(dev, i);
794 /* Once no queue is configured, success is returned directly. */
797 /* All the hairpin TX queues need to be traversed again. */
798 for (i = 0; i != priv->txqs_n; i++) {
799 txq_ctrl = mlx5_txq_get(dev, i);
800 if (txq_ctrl == NULL)
802 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
803 mlx5_txq_release(dev, i);
806 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
807 mlx5_txq_release(dev, i);
810 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
812 * Fetch peer RxQ's information.
813 * No need to pass the information of the current queue.
815 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
818 mlx5_txq_release(dev, i);
821 /* Accessing its own device, inside mlx5 PMD. */
822 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
824 mlx5_txq_release(dev, i);
827 /* Pass TxQ's information to peer RxQ and try binding. */
828 cur.peer_q = rx_queue;
829 cur.qp_id = txq_ctrl->obj->sq->id;
830 cur.vhca_id = priv->config.hca_attr.vhca_id;
831 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
832 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
834 * In order to access another device in a proper way, RTE level
835 * private function is needed.
837 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
840 mlx5_txq_release(dev, i);
843 mlx5_txq_release(dev, i);
848 * Do roll-back process for the queues already bound.
849 * No need to check the return value of the queue unbind function.
852 /* No validation is needed here. */
853 txq_ctrl = mlx5_txq_get(dev, i);
854 if (txq_ctrl == NULL)
856 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
857 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
858 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
859 mlx5_txq_release(dev, i);
865 * Unbind the hairpin port pair, HW configuration of both devices will be clear
866 * and status will be reset for all the queues used between the them.
867 * This function only supports to unbind the Tx from one Rx.
870 * Pointer to Ethernet device structure.
872 * Port identifier of the Rx port.
875 * 0 on success, a negative errno value otherwise and rte_errno is set.
878 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
880 struct mlx5_priv *priv = dev->data->dev_private;
881 struct mlx5_txq_ctrl *txq_ctrl;
884 uint16_t cur_port = priv->dev_data->port_id;
886 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
888 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
891 for (i = 0; i != priv->txqs_n; i++) {
894 txq_ctrl = mlx5_txq_get(dev, i);
895 if (txq_ctrl == NULL)
897 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
898 mlx5_txq_release(dev, i);
901 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
902 mlx5_txq_release(dev, i);
905 /* Indeed, only the first used queue needs to be checked. */
906 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
907 if (cur_port != rx_port) {
909 DRV_LOG(ERR, "port %u and port %u are in"
910 " auto-bind mode", cur_port, rx_port);
911 mlx5_txq_release(dev, i);
917 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
918 mlx5_txq_release(dev, i);
919 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
921 DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
925 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
927 DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
936 * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
937 * @see mlx5_hairpin_bind_single_port()
940 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
946 * If the Rx port has no hairpin configuration with the current port,
947 * the binding will be skipped in the called function of single port.
948 * Device started status will be checked only before the queue
949 * information updating.
951 if (rx_port == RTE_MAX_ETHPORTS) {
952 MLX5_ETH_FOREACH_DEV(p, dev->device) {
953 ret = mlx5_hairpin_bind_single_port(dev, p);
959 return mlx5_hairpin_bind_single_port(dev, rx_port);
962 MLX5_ETH_FOREACH_DEV(pp, dev->device)
964 mlx5_hairpin_unbind_single_port(dev, pp);
969 * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
970 * @see mlx5_hairpin_unbind_single_port()
973 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
978 if (rx_port == RTE_MAX_ETHPORTS)
979 MLX5_ETH_FOREACH_DEV(p, dev->device) {
980 ret = mlx5_hairpin_unbind_single_port(dev, p);
985 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
990 * DPDK callback to get the hairpin peer ports list.
991 * This will return the actual number of peer ports and save the identifiers
992 * into the array (sorted, may be different from that when setting up the
993 * hairpin peer queues).
994 * The peer port ID could be the same as the port ID of the current device.
997 * Pointer to Ethernet device structure.
999 * Pointer to array to save the port identifiers.
1001 * The length of the array.
1003 * Current port to peer port direction.
1004 * positive - current used as Tx to get all peer Rx ports.
1005 * zero - current used as Rx to get all peer Tx ports.
1008 * 0 or positive value on success, actual number of peer ports.
1009 * a negative errno value otherwise and rte_errno is set.
1012 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1013 size_t len, uint32_t direction)
1015 struct mlx5_priv *priv = dev->data->dev_private;
1016 struct mlx5_txq_ctrl *txq_ctrl;
1017 struct mlx5_rxq_ctrl *rxq_ctrl;
1020 uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1024 for (i = 0; i < priv->txqs_n; i++) {
1025 txq_ctrl = mlx5_txq_get(dev, i);
1028 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
1029 mlx5_txq_release(dev, i);
1032 pp = txq_ctrl->hairpin_conf.peers[0].port;
1033 if (pp >= RTE_MAX_ETHPORTS) {
1035 mlx5_txq_release(dev, i);
1036 DRV_LOG(ERR, "port %hu queue %u peer port "
1038 priv->dev_data->port_id, i, pp);
1041 bits[pp / 32] |= 1 << (pp % 32);
1042 mlx5_txq_release(dev, i);
1045 for (i = 0; i < priv->rxqs_n; i++) {
1046 rxq_ctrl = mlx5_rxq_get(dev, i);
1049 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
1050 mlx5_rxq_release(dev, i);
1053 pp = rxq_ctrl->hairpin_conf.peers[0].port;
1054 if (pp >= RTE_MAX_ETHPORTS) {
1056 mlx5_rxq_release(dev, i);
1057 DRV_LOG(ERR, "port %hu queue %u peer port "
1059 priv->dev_data->port_id, i, pp);
1062 bits[pp / 32] |= 1 << (pp % 32);
1063 mlx5_rxq_release(dev, i);
1066 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1067 if (bits[i / 32] & (1 << (i % 32))) {
1068 if ((size_t)ret >= len) {
1072 peer_ports[ret++] = i;
1079 * DPDK callback to start the device.
1081 * Simulate device start by attaching all configured flows.
1084 * Pointer to Ethernet device structure.
1087 * 0 on success, a negative errno value otherwise and rte_errno is set.
1090 mlx5_dev_start(struct rte_eth_dev *dev)
1092 struct mlx5_priv *priv = dev->data->dev_private;
1096 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1097 fine_inline = rte_mbuf_dynflag_lookup
1098 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1099 if (fine_inline >= 0)
1100 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1102 rte_net_mlx5_dynf_inline_mask = 0;
1103 if (dev->data->nb_rx_queues > 0) {
1104 ret = mlx5_dev_configure_rss_reta(dev);
1106 DRV_LOG(ERR, "port %u reta config failed: %s",
1107 dev->data->port_id, strerror(rte_errno));
1111 ret = mlx5_txpp_start(dev);
1113 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1114 dev->data->port_id, strerror(rte_errno));
1117 if ((priv->sh->devx && priv->config.dv_flow_en &&
1118 priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1119 ret = priv->obj_ops.lb_dummy_queue_create(dev);
1123 ret = mlx5_txq_start(dev);
1125 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1126 dev->data->port_id, strerror(rte_errno));
1129 ret = mlx5_rxq_start(dev);
1131 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1132 dev->data->port_id, strerror(rte_errno));
1136 * Such step will be skipped if there is no hairpin TX queue configured
1137 * with RX peer queue from the same device.
1139 ret = mlx5_hairpin_auto_bind(dev);
1141 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1142 dev->data->port_id, strerror(rte_errno));
1145 /* Set started flag here for the following steps like control flow. */
1146 dev->data->dev_started = 1;
1147 ret = mlx5_rx_intr_vec_enable(dev);
1149 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1150 dev->data->port_id);
1153 mlx5_os_stats_init(dev);
1154 ret = mlx5_traffic_enable(dev);
1156 DRV_LOG(ERR, "port %u failed to set defaults flows",
1157 dev->data->port_id);
1160 /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1161 mlx5_flow_rxq_dynf_metadata_set(dev);
1162 /* Set flags and context to convert Rx timestamps. */
1163 mlx5_rxq_timestamp_set(dev);
1164 /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1165 mlx5_txq_dynf_timestamp_set(dev);
1166 /* Attach indirection table objects detached on port stop. */
1167 ret = mlx5_action_handle_attach(dev);
1170 "port %u failed to attach indirect actions: %s",
1171 dev->data->port_id, rte_strerror(rte_errno));
1175 * In non-cached mode, it only needs to start the default mreg copy
1176 * action and no flow created by application exists anymore.
1177 * But it is worth wrapping the interface for further usage.
1179 ret = mlx5_flow_start_default(dev);
1181 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1182 dev->data->port_id, strerror(rte_errno));
1185 if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1186 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1187 dev->data->port_id, rte_strerror(rte_errno));
1191 dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1192 dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1193 /* Enable datapath on secondary process. */
1194 mlx5_mp_os_req_start_rxtx(dev);
1195 if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) {
1196 priv->sh->port[priv->dev_port - 1].ih_port_id =
1197 (uint32_t)dev->data->port_id;
1199 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1200 dev->data->port_id);
1201 dev->data->dev_conf.intr_conf.lsc = 0;
1202 dev->data->dev_conf.intr_conf.rmv = 0;
1204 if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
1205 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1206 (uint32_t)dev->data->port_id;
1209 ret = rte_errno; /* Save rte_errno before cleanup. */
1211 dev->data->dev_started = 0;
1212 mlx5_flow_stop_default(dev);
1213 mlx5_traffic_disable(dev);
1216 if (priv->obj_ops.lb_dummy_queue_release)
1217 priv->obj_ops.lb_dummy_queue_release(dev);
1218 mlx5_txpp_stop(dev); /* Stop last. */
1219 rte_errno = ret; /* Restore rte_errno. */
1224 * DPDK callback to stop the device.
1226 * Simulate device stop by detaching all configured flows.
1229 * Pointer to Ethernet device structure.
1232 mlx5_dev_stop(struct rte_eth_dev *dev)
1234 struct mlx5_priv *priv = dev->data->dev_private;
1236 dev->data->dev_started = 0;
1237 /* Prevent crashes when queues are still in use. */
1238 dev->rx_pkt_burst = removed_rx_burst;
1239 dev->tx_pkt_burst = removed_tx_burst;
1241 /* Disable datapath on secondary process. */
1242 mlx5_mp_os_req_stop_rxtx(dev);
1243 rte_delay_us_sleep(1000 * priv->rxqs_n);
1244 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1245 mlx5_flow_stop_default(dev);
1246 /* Control flows for default traffic can be removed firstly. */
1247 mlx5_traffic_disable(dev);
1248 /* All RX queue flags will be cleared in the flush interface. */
1249 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1250 mlx5_flow_meter_rxq_flush(dev);
1251 mlx5_action_handle_detach(dev);
1252 mlx5_rx_intr_vec_disable(dev);
1253 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1254 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1257 if (priv->obj_ops.lb_dummy_queue_release)
1258 priv->obj_ops.lb_dummy_queue_release(dev);
1259 mlx5_txpp_stop(dev);
1265 * Enable traffic flows configured by control plane
1268 * Pointer to Ethernet device private data.
1270 * Pointer to Ethernet device structure.
1273 * 0 on success, a negative errno value otherwise and rte_errno is set.
1276 mlx5_traffic_enable(struct rte_eth_dev *dev)
1278 struct mlx5_priv *priv = dev->data->dev_private;
1279 struct rte_flow_item_eth bcast = {
1280 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1282 struct rte_flow_item_eth ipv6_multi_spec = {
1283 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1285 struct rte_flow_item_eth ipv6_multi_mask = {
1286 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1288 struct rte_flow_item_eth unicast = {
1289 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1291 struct rte_flow_item_eth unicast_mask = {
1292 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1294 const unsigned int vlan_filter_n = priv->vlan_filter_n;
1295 const struct rte_ether_addr cmp = {
1296 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1303 * Hairpin txq default flow should be created no matter if it is
1304 * isolation mode. Or else all the packets to be sent will be sent
1305 * out directly without the TX flow actions, e.g. encapsulation.
1307 for (i = 0; i != priv->txqs_n; ++i) {
1308 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1311 /* Only Tx implicit mode requires the default Tx flow. */
1312 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1313 txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1314 txq_ctrl->hairpin_conf.peers[0].port ==
1315 priv->dev_data->port_id) {
1316 ret = mlx5_ctrl_flow_source_queue(dev, i);
1318 mlx5_txq_release(dev, i);
1322 if ((priv->representor || priv->master) &&
1323 priv->config.dv_esw_en) {
1324 if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) {
1326 "Port %u Tx queue %u SQ create representor devx default miss rule failed.",
1327 dev->data->port_id, i);
1331 mlx5_txq_release(dev, i);
1333 if ((priv->master || priv->representor) && priv->config.dv_esw_en) {
1334 if (mlx5_flow_create_esw_table_zero_flow(dev))
1335 priv->fdb_def_rule = 1;
1337 DRV_LOG(INFO, "port %u FDB default rule cannot be"
1338 " configured - only Eswitch group 0 flows are"
1339 " supported.", dev->data->port_id);
1341 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1342 ret = mlx5_flow_lacp_miss(dev);
1344 DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1345 "forward LACP to kernel.", dev->data->port_id);
1347 DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1348 , dev->data->port_id);
1352 if (dev->data->promiscuous) {
1353 struct rte_flow_item_eth promisc = {
1354 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1355 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1359 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1363 if (dev->data->all_multicast) {
1364 struct rte_flow_item_eth multicast = {
1365 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1366 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1370 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1374 /* Add broadcast/multicast flows. */
1375 for (i = 0; i != vlan_filter_n; ++i) {
1376 uint16_t vlan = priv->vlan_filter[i];
1378 struct rte_flow_item_vlan vlan_spec = {
1379 .tci = rte_cpu_to_be_16(vlan),
1381 struct rte_flow_item_vlan vlan_mask =
1382 rte_flow_item_vlan_mask;
1384 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1385 &vlan_spec, &vlan_mask);
1388 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1390 &vlan_spec, &vlan_mask);
1394 if (!vlan_filter_n) {
1395 ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1398 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1401 /* Do not fail on IPv6 broadcast creation failure. */
1403 "IPv6 broadcast is not supported");
1408 /* Add MAC address flows. */
1409 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1410 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1412 if (!memcmp(mac, &cmp, sizeof(*mac)))
1414 memcpy(&unicast.dst.addr_bytes,
1416 RTE_ETHER_ADDR_LEN);
1417 for (j = 0; j != vlan_filter_n; ++j) {
1418 uint16_t vlan = priv->vlan_filter[j];
1420 struct rte_flow_item_vlan vlan_spec = {
1421 .tci = rte_cpu_to_be_16(vlan),
1423 struct rte_flow_item_vlan vlan_mask =
1424 rte_flow_item_vlan_mask;
1426 ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1433 if (!vlan_filter_n) {
1434 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1441 ret = rte_errno; /* Save rte_errno before cleanup. */
1442 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1443 rte_errno = ret; /* Restore rte_errno. */
1449 * Disable traffic flows configured by control plane
1452 * Pointer to Ethernet device private data.
1455 mlx5_traffic_disable(struct rte_eth_dev *dev)
1457 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1461 * Restart traffic flows configured by control plane
1464 * Pointer to Ethernet device private data.
1467 * 0 on success, a negative errno value otherwise and rte_errno is set.
1470 mlx5_traffic_restart(struct rte_eth_dev *dev)
1472 if (dev->data->dev_started) {
1473 mlx5_traffic_disable(dev);
1474 return mlx5_traffic_enable(dev);