1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2015 6WIND S.A.
3 * Copyright 2015 Mellanox Technologies, Ltd
9 #include <rte_ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
13 #include <mlx5_malloc.h>
17 #include "mlx5_rxtx.h"
18 #include "mlx5_utils.h"
19 #include "rte_pmd_mlx5.h"
22 * Stop traffic on Tx queues.
25 * Pointer to Ethernet device structure.
28 mlx5_txq_stop(struct rte_eth_dev *dev)
30 struct mlx5_priv *priv = dev->data->dev_private;
33 for (i = 0; i != priv->txqs_n; ++i)
34 mlx5_txq_release(dev, i);
38 * Start traffic on Tx queues.
41 * Pointer to Ethernet device structure.
44 * 0 on success, a negative errno value otherwise and rte_errno is set.
47 mlx5_txq_start(struct rte_eth_dev *dev)
49 struct mlx5_priv *priv = dev->data->dev_private;
53 for (i = 0; i != priv->txqs_n; ++i) {
54 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
55 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
56 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
60 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
61 txq_alloc_elts(txq_ctrl);
62 MLX5_ASSERT(!txq_ctrl->obj);
63 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66 DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
67 "memory resources.", dev->data->port_id,
72 ret = priv->obj_ops.txq_obj_new(dev, i);
74 mlx5_free(txq_ctrl->obj);
78 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
79 size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
80 txq_data->fcqs = mlx5_malloc(flags, size,
83 if (!txq_data->fcqs) {
84 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
85 "allocate memory (FCQ).",
86 dev->data->port_id, i);
91 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
92 dev->data->port_id, i, (void *)&txq_ctrl->obj);
93 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97 ret = rte_errno; /* Save rte_errno before cleanup. */
99 mlx5_txq_release(dev, i);
101 rte_errno = ret; /* Restore rte_errno. */
106 * Stop traffic on Rx queues.
109 * Pointer to Ethernet device structure.
112 mlx5_rxq_stop(struct rte_eth_dev *dev)
114 struct mlx5_priv *priv = dev->data->dev_private;
117 for (i = 0; i != priv->rxqs_n; ++i)
118 mlx5_rxq_release(dev, i);
122 * Start traffic on Rx queues.
125 * Pointer to Ethernet device structure.
128 * 0 on success, a negative errno value otherwise and rte_errno is set.
131 mlx5_rxq_start(struct rte_eth_dev *dev)
133 struct mlx5_priv *priv = dev->data->dev_private;
137 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
138 if (mlx5_mprq_alloc_mp(dev)) {
139 /* Should not release Rx queues but return immediately. */
142 DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
143 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
144 DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
145 dev->data->port_id, priv->sh->device_attr.max_sge);
146 for (i = 0; i != priv->rxqs_n; ++i) {
147 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
151 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
152 /* Pre-register Rx mempools. */
153 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
154 mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl,
155 rxq_ctrl->rxq.mprq_mp);
159 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++)
161 (dev, &rxq_ctrl->rxq.mr_ctrl,
162 rxq_ctrl->rxq.rxseg[s].mp);
164 ret = rxq_alloc_elts(rxq_ctrl);
168 MLX5_ASSERT(!rxq_ctrl->obj);
169 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
170 sizeof(*rxq_ctrl->obj), 0,
172 if (!rxq_ctrl->obj) {
174 "Port %u Rx queue %u can't allocate resources.",
175 dev->data->port_id, (*priv->rxqs)[i]->idx);
179 ret = priv->obj_ops.rxq_obj_new(dev, i);
181 mlx5_free(rxq_ctrl->obj);
184 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
185 dev->data->port_id, i, (void *)&rxq_ctrl->obj);
186 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
190 ret = rte_errno; /* Save rte_errno before cleanup. */
192 mlx5_rxq_release(dev, i);
194 rte_errno = ret; /* Restore rte_errno. */
199 * Binds Tx queues to Rx queues for hairpin.
201 * Binds Tx queues to the target Rx queues.
204 * Pointer to Ethernet device structure.
207 * 0 on success, a negative errno value otherwise and rte_errno is set.
210 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
212 struct mlx5_priv *priv = dev->data->dev_private;
213 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
214 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
215 struct mlx5_txq_ctrl *txq_ctrl;
216 struct mlx5_rxq_ctrl *rxq_ctrl;
217 struct mlx5_devx_obj *sq;
218 struct mlx5_devx_obj *rq;
221 bool need_auto = false;
222 uint16_t self_port = dev->data->port_id;
224 for (i = 0; i != priv->txqs_n; ++i) {
225 txq_ctrl = mlx5_txq_get(dev, i);
228 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
229 mlx5_txq_release(dev, i);
232 if (txq_ctrl->hairpin_conf.peers[0].port != self_port)
234 if (txq_ctrl->hairpin_conf.manual_bind) {
235 mlx5_txq_release(dev, i);
239 mlx5_txq_release(dev, i);
243 for (i = 0; i != priv->txqs_n; ++i) {
244 txq_ctrl = mlx5_txq_get(dev, i);
247 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
248 mlx5_txq_release(dev, i);
251 /* Skip hairpin queues with other peer ports. */
252 if (txq_ctrl->hairpin_conf.peers[0].port != self_port)
254 if (!txq_ctrl->obj) {
256 DRV_LOG(ERR, "port %u no txq object found: %d",
257 dev->data->port_id, i);
258 mlx5_txq_release(dev, i);
261 sq = txq_ctrl->obj->sq;
262 rxq_ctrl = mlx5_rxq_get(dev,
263 txq_ctrl->hairpin_conf.peers[0].queue);
265 mlx5_txq_release(dev, i);
267 DRV_LOG(ERR, "port %u no rxq object found: %d",
269 txq_ctrl->hairpin_conf.peers[0].queue);
272 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
273 rxq_ctrl->hairpin_conf.peers[0].queue != i) {
275 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
276 "Rx queue %d", dev->data->port_id,
277 i, txq_ctrl->hairpin_conf.peers[0].queue);
280 rq = rxq_ctrl->obj->rq;
283 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
285 txq_ctrl->hairpin_conf.peers[0].queue);
288 sq_attr.state = MLX5_SQC_STATE_RDY;
289 sq_attr.sq_state = MLX5_SQC_STATE_RST;
290 sq_attr.hairpin_peer_rq = rq->id;
291 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
292 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
295 rq_attr.state = MLX5_SQC_STATE_RDY;
296 rq_attr.rq_state = MLX5_SQC_STATE_RST;
297 rq_attr.hairpin_peer_sq = sq->id;
298 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
299 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
302 /* Qs with auto-bind will be destroyed directly. */
303 rxq_ctrl->hairpin_status = 1;
304 txq_ctrl->hairpin_status = 1;
305 mlx5_txq_release(dev, i);
306 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
310 mlx5_txq_release(dev, i);
311 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
316 * Fetch the peer queue's SW & HW information.
319 * Pointer to Ethernet device structure.
321 * Index of the queue to fetch the information.
322 * @param current_info
323 * Pointer to the input peer information, not used currently.
325 * Pointer to the structure to store the information, output.
327 * Positive to get the RxQ information, zero to get the TxQ information.
330 * 0 on success, a negative errno value otherwise and rte_errno is set.
333 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
334 struct rte_hairpin_peer_info *current_info,
335 struct rte_hairpin_peer_info *peer_info,
338 struct mlx5_priv *priv = dev->data->dev_private;
339 RTE_SET_USED(current_info);
341 if (dev->data->dev_started == 0) {
343 DRV_LOG(ERR, "peer port %u is not started",
348 * Peer port used as egress. In the current design, hairpin Tx queue
349 * will be bound to the peer Rx queue. Indeed, only the information of
350 * peer Rx queue needs to be fetched.
352 if (direction == 0) {
353 struct mlx5_txq_ctrl *txq_ctrl;
355 txq_ctrl = mlx5_txq_get(dev, peer_queue);
356 if (txq_ctrl == NULL) {
358 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
359 dev->data->port_id, peer_queue);
362 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
364 DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
365 dev->data->port_id, peer_queue);
366 mlx5_txq_release(dev, peer_queue);
369 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
371 DRV_LOG(ERR, "port %u no Txq object found: %d",
372 dev->data->port_id, peer_queue);
373 mlx5_txq_release(dev, peer_queue);
376 peer_info->qp_id = txq_ctrl->obj->sq->id;
377 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
378 /* 1-to-1 mapping, only the first one is used. */
379 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
380 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
381 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
382 mlx5_txq_release(dev, peer_queue);
383 } else { /* Peer port used as ingress. */
384 struct mlx5_rxq_ctrl *rxq_ctrl;
386 rxq_ctrl = mlx5_rxq_get(dev, peer_queue);
387 if (rxq_ctrl == NULL) {
389 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
390 dev->data->port_id, peer_queue);
393 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
395 DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
396 dev->data->port_id, peer_queue);
397 mlx5_rxq_release(dev, peer_queue);
400 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
402 DRV_LOG(ERR, "port %u no Rxq object found: %d",
403 dev->data->port_id, peer_queue);
404 mlx5_rxq_release(dev, peer_queue);
407 peer_info->qp_id = rxq_ctrl->obj->rq->id;
408 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
409 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
410 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
411 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
412 mlx5_rxq_release(dev, peer_queue);
418 * Bind the hairpin queue with the peer HW information.
419 * This needs to be called twice both for Tx and Rx queues of a pair.
420 * If the queue is already bound, it is considered successful.
423 * Pointer to Ethernet device structure.
425 * Index of the queue to change the HW configuration to bind.
427 * Pointer to information of the peer queue.
429 * Positive to configure the TxQ, zero to configure the RxQ.
432 * 0 on success, a negative errno value otherwise and rte_errno is set.
435 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
436 struct rte_hairpin_peer_info *peer_info,
442 * Consistency checking of the peer queue: opposite direction is used
443 * to get the peer queue info with ethdev port ID, no need to check.
445 if (peer_info->peer_q != cur_queue) {
447 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
448 dev->data->port_id, cur_queue, peer_info->peer_q);
451 if (direction != 0) {
452 struct mlx5_txq_ctrl *txq_ctrl;
453 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
455 txq_ctrl = mlx5_txq_get(dev, cur_queue);
456 if (txq_ctrl == NULL) {
458 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
459 dev->data->port_id, cur_queue);
462 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
464 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
465 dev->data->port_id, cur_queue);
466 mlx5_txq_release(dev, cur_queue);
469 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
471 DRV_LOG(ERR, "port %u no Txq object found: %d",
472 dev->data->port_id, cur_queue);
473 mlx5_txq_release(dev, cur_queue);
476 if (txq_ctrl->hairpin_status != 0) {
477 DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
478 dev->data->port_id, cur_queue);
479 mlx5_txq_release(dev, cur_queue);
483 * All queues' of one port consistency checking is done in the
484 * bind() function, and that is optional.
486 if (peer_info->tx_explicit !=
487 txq_ctrl->hairpin_conf.tx_explicit) {
489 DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
490 " mismatch", dev->data->port_id, cur_queue);
491 mlx5_txq_release(dev, cur_queue);
494 if (peer_info->manual_bind !=
495 txq_ctrl->hairpin_conf.manual_bind) {
497 DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
498 " mismatch", dev->data->port_id, cur_queue);
499 mlx5_txq_release(dev, cur_queue);
502 sq_attr.state = MLX5_SQC_STATE_RDY;
503 sq_attr.sq_state = MLX5_SQC_STATE_RST;
504 sq_attr.hairpin_peer_rq = peer_info->qp_id;
505 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
506 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
508 txq_ctrl->hairpin_status = 1;
509 mlx5_txq_release(dev, cur_queue);
511 struct mlx5_rxq_ctrl *rxq_ctrl;
512 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
514 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
515 if (rxq_ctrl == NULL) {
517 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
518 dev->data->port_id, cur_queue);
521 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
523 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
524 dev->data->port_id, cur_queue);
525 mlx5_rxq_release(dev, cur_queue);
528 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
530 DRV_LOG(ERR, "port %u no Rxq object found: %d",
531 dev->data->port_id, cur_queue);
532 mlx5_rxq_release(dev, cur_queue);
535 if (rxq_ctrl->hairpin_status != 0) {
536 DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
537 dev->data->port_id, cur_queue);
538 mlx5_rxq_release(dev, cur_queue);
541 if (peer_info->tx_explicit !=
542 rxq_ctrl->hairpin_conf.tx_explicit) {
544 DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
545 " mismatch", dev->data->port_id, cur_queue);
546 mlx5_rxq_release(dev, cur_queue);
549 if (peer_info->manual_bind !=
550 rxq_ctrl->hairpin_conf.manual_bind) {
552 DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
553 " mismatch", dev->data->port_id, cur_queue);
554 mlx5_rxq_release(dev, cur_queue);
557 rq_attr.state = MLX5_SQC_STATE_RDY;
558 rq_attr.rq_state = MLX5_SQC_STATE_RST;
559 rq_attr.hairpin_peer_sq = peer_info->qp_id;
560 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
561 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
563 rxq_ctrl->hairpin_status = 1;
564 mlx5_rxq_release(dev, cur_queue);
570 * Unbind the hairpin queue and reset its HW configuration.
571 * This needs to be called twice both for Tx and Rx queues of a pair.
572 * If the queue is already unbound, it is considered successful.
575 * Pointer to Ethernet device structure.
577 * Index of the queue to change the HW configuration to unbind.
579 * Positive to reset the TxQ, zero to reset the RxQ.
582 * 0 on success, a negative errno value otherwise and rte_errno is set.
585 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
590 if (direction != 0) {
591 struct mlx5_txq_ctrl *txq_ctrl;
592 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
594 txq_ctrl = mlx5_txq_get(dev, cur_queue);
595 if (txq_ctrl == NULL) {
597 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
598 dev->data->port_id, cur_queue);
601 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
603 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
604 dev->data->port_id, cur_queue);
605 mlx5_txq_release(dev, cur_queue);
608 /* Already unbound, return success before obj checking. */
609 if (txq_ctrl->hairpin_status == 0) {
610 DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
611 dev->data->port_id, cur_queue);
612 mlx5_txq_release(dev, cur_queue);
615 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
617 DRV_LOG(ERR, "port %u no Txq object found: %d",
618 dev->data->port_id, cur_queue);
619 mlx5_txq_release(dev, cur_queue);
622 sq_attr.state = MLX5_SQC_STATE_RST;
623 sq_attr.sq_state = MLX5_SQC_STATE_RST;
624 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
626 txq_ctrl->hairpin_status = 0;
627 mlx5_txq_release(dev, cur_queue);
629 struct mlx5_rxq_ctrl *rxq_ctrl;
630 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
632 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
633 if (rxq_ctrl == NULL) {
635 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
636 dev->data->port_id, cur_queue);
639 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
641 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
642 dev->data->port_id, cur_queue);
643 mlx5_rxq_release(dev, cur_queue);
646 if (rxq_ctrl->hairpin_status == 0) {
647 DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
648 dev->data->port_id, cur_queue);
649 mlx5_rxq_release(dev, cur_queue);
652 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
654 DRV_LOG(ERR, "port %u no Rxq object found: %d",
655 dev->data->port_id, cur_queue);
656 mlx5_rxq_release(dev, cur_queue);
659 rq_attr.state = MLX5_SQC_STATE_RST;
660 rq_attr.rq_state = MLX5_SQC_STATE_RST;
661 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
663 rxq_ctrl->hairpin_status = 0;
664 mlx5_rxq_release(dev, cur_queue);
670 * Bind the hairpin port pairs, from the Tx to the peer Rx.
671 * This function only supports to bind the Tx to one Rx.
674 * Pointer to Ethernet device structure.
676 * Port identifier of the Rx port.
679 * 0 on success, a negative errno value otherwise and rte_errno is set.
682 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
684 struct mlx5_priv *priv = dev->data->dev_private;
686 struct mlx5_txq_ctrl *txq_ctrl;
688 struct rte_hairpin_peer_info peer = {0xffffff};
689 struct rte_hairpin_peer_info cur;
690 const struct rte_eth_hairpin_conf *conf;
692 uint16_t local_port = priv->dev_data->port_id;
697 if (mlx5_eth_find_next(rx_port, priv->pci_dev) != rx_port) {
699 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
703 * Before binding TxQ to peer RxQ, first round loop will be used for
704 * checking the queues' configuration consistency. This would be a
705 * little time consuming but better than doing the rollback.
707 for (i = 0; i != priv->txqs_n; i++) {
708 txq_ctrl = mlx5_txq_get(dev, i);
709 if (txq_ctrl == NULL)
711 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
712 mlx5_txq_release(dev, i);
716 * All hairpin Tx queues of a single port that connected to the
717 * same peer Rx port should have the same "auto binding" and
718 * "implicit Tx flow" modes.
719 * Peer consistency checking will be done in per queue binding.
721 conf = &txq_ctrl->hairpin_conf;
722 if (conf->peers[0].port == rx_port) {
724 manual = conf->manual_bind;
725 explicit = conf->tx_explicit;
727 if (manual != conf->manual_bind ||
728 explicit != conf->tx_explicit) {
730 DRV_LOG(ERR, "port %u queue %d mode"
731 " mismatch: %u %u, %u %u",
732 local_port, i, manual,
733 conf->manual_bind, explicit,
735 mlx5_txq_release(dev, i);
741 mlx5_txq_release(dev, i);
743 /* Once no queue is configured, success is returned directly. */
746 /* All the hairpin TX queues need to be traversed again. */
747 for (i = 0; i != priv->txqs_n; i++) {
748 txq_ctrl = mlx5_txq_get(dev, i);
749 if (txq_ctrl == NULL)
751 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
752 mlx5_txq_release(dev, i);
755 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
756 mlx5_txq_release(dev, i);
759 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
761 * Fetch peer RxQ's information.
762 * No need to pass the information of the current queue.
764 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
767 mlx5_txq_release(dev, i);
770 /* Accessing its own device, inside mlx5 PMD. */
771 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
773 mlx5_txq_release(dev, i);
776 /* Pass TxQ's information to peer RxQ and try binding. */
777 cur.peer_q = rx_queue;
778 cur.qp_id = txq_ctrl->obj->sq->id;
779 cur.vhca_id = priv->config.hca_attr.vhca_id;
780 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
781 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
783 * In order to access another device in a proper way, RTE level
784 * private function is needed.
786 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
789 mlx5_txq_release(dev, i);
792 mlx5_txq_release(dev, i);
797 * Do roll-back process for the queues already bound.
798 * No need to check the return value of the queue unbind function.
801 /* No validation is needed here. */
802 txq_ctrl = mlx5_txq_get(dev, i);
803 if (txq_ctrl == NULL)
805 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
806 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
807 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
808 mlx5_txq_release(dev, i);
814 * Unbind the hairpin port pair, HW configuration of both devices will be clear
815 * and status will be reset for all the queues used between the them.
816 * This function only supports to unbind the Tx from one Rx.
819 * Pointer to Ethernet device structure.
821 * Port identifier of the Rx port.
824 * 0 on success, a negative errno value otherwise and rte_errno is set.
827 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
829 struct mlx5_priv *priv = dev->data->dev_private;
830 struct mlx5_txq_ctrl *txq_ctrl;
833 uint16_t cur_port = priv->dev_data->port_id;
835 if (mlx5_eth_find_next(rx_port, priv->pci_dev) != rx_port) {
837 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
840 for (i = 0; i != priv->txqs_n; i++) {
843 txq_ctrl = mlx5_txq_get(dev, i);
844 if (txq_ctrl == NULL)
846 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
847 mlx5_txq_release(dev, i);
850 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
851 mlx5_txq_release(dev, i);
854 /* Indeed, only the first used queue needs to be checked. */
855 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
856 if (cur_port != rx_port) {
858 DRV_LOG(ERR, "port %u and port %u are in"
859 " auto-bind mode", cur_port, rx_port);
860 mlx5_txq_release(dev, i);
866 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
867 mlx5_txq_release(dev, i);
868 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
870 DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
874 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
876 DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
885 * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
886 * @see mlx5_hairpin_bind_single_port()
889 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
893 struct mlx5_priv *priv = dev->data->dev_private;
896 * If the Rx port has no hairpin configuration with the current port,
897 * the binding will be skipped in the called function of single port.
898 * Device started status will be checked only before the queue
899 * information updating.
901 if (rx_port == RTE_MAX_ETHPORTS) {
902 MLX5_ETH_FOREACH_DEV(p, priv->pci_dev) {
903 ret = mlx5_hairpin_bind_single_port(dev, p);
909 return mlx5_hairpin_bind_single_port(dev, rx_port);
912 MLX5_ETH_FOREACH_DEV(pp, priv->pci_dev)
914 mlx5_hairpin_unbind_single_port(dev, pp);
919 * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
920 * @see mlx5_hairpin_unbind_single_port()
923 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
927 struct mlx5_priv *priv = dev->data->dev_private;
929 if (rx_port == RTE_MAX_ETHPORTS)
930 MLX5_ETH_FOREACH_DEV(p, priv->pci_dev) {
931 ret = mlx5_hairpin_unbind_single_port(dev, p);
936 ret = mlx5_hairpin_bind_single_port(dev, rx_port);
941 * DPDK callback to get the hairpin peer ports list.
942 * This will return the actual number of peer ports and save the identifiers
943 * into the array (sorted, may be different from that when setting up the
944 * hairpin peer queues).
945 * The peer port ID could be the same as the port ID of the current device.
948 * Pointer to Ethernet device structure.
950 * Pointer to array to save the port identifiers.
952 * The length of the array.
954 * Current port to peer port direction.
955 * positive - current used as Tx to get all peer Rx ports.
956 * zero - current used as Rx to get all peer Tx ports.
959 * 0 or positive value on success, actual number of peer ports.
960 * a negative errno value otherwise and rte_errno is set.
963 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
964 size_t len, uint32_t direction)
966 struct mlx5_priv *priv = dev->data->dev_private;
967 struct mlx5_txq_ctrl *txq_ctrl;
968 struct mlx5_rxq_ctrl *rxq_ctrl;
971 uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
975 for (i = 0; i < priv->txqs_n; i++) {
976 txq_ctrl = mlx5_txq_get(dev, i);
979 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
980 mlx5_txq_release(dev, i);
983 pp = txq_ctrl->hairpin_conf.peers[0].port;
984 if (pp >= RTE_MAX_ETHPORTS) {
986 mlx5_txq_release(dev, i);
987 DRV_LOG(ERR, "port %hu queue %u peer port "
989 priv->dev_data->port_id, i, pp);
992 bits[pp / 32] |= 1 << (pp % 32);
993 mlx5_txq_release(dev, i);
996 for (i = 0; i < priv->rxqs_n; i++) {
997 rxq_ctrl = mlx5_rxq_get(dev, i);
1000 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
1001 mlx5_rxq_release(dev, i);
1004 pp = rxq_ctrl->hairpin_conf.peers[0].port;
1005 if (pp >= RTE_MAX_ETHPORTS) {
1007 mlx5_rxq_release(dev, i);
1008 DRV_LOG(ERR, "port %hu queue %u peer port "
1010 priv->dev_data->port_id, i, pp);
1013 bits[pp / 32] |= 1 << (pp % 32);
1014 mlx5_rxq_release(dev, i);
1017 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1018 if (bits[i / 32] & (1 << (i % 32))) {
1019 if ((size_t)ret >= len) {
1023 peer_ports[ret++] = i;
1030 * DPDK callback to start the device.
1032 * Simulate device start by attaching all configured flows.
1035 * Pointer to Ethernet device structure.
1038 * 0 on success, a negative errno value otherwise and rte_errno is set.
1041 mlx5_dev_start(struct rte_eth_dev *dev)
1043 struct mlx5_priv *priv = dev->data->dev_private;
1047 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1048 fine_inline = rte_mbuf_dynflag_lookup
1049 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1050 if (fine_inline >= 0)
1051 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1053 rte_net_mlx5_dynf_inline_mask = 0;
1054 if (dev->data->nb_rx_queues > 0) {
1055 ret = mlx5_dev_configure_rss_reta(dev);
1057 DRV_LOG(ERR, "port %u reta config failed: %s",
1058 dev->data->port_id, strerror(rte_errno));
1062 ret = mlx5_txpp_start(dev);
1064 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1065 dev->data->port_id, strerror(rte_errno));
1068 ret = mlx5_txq_start(dev);
1070 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1071 dev->data->port_id, strerror(rte_errno));
1074 ret = mlx5_rxq_start(dev);
1076 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1077 dev->data->port_id, strerror(rte_errno));
1081 * Such step will be skipped if there is no hairpin TX queue configured
1082 * with RX peer queue from the same device.
1084 ret = mlx5_hairpin_auto_bind(dev);
1086 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1087 dev->data->port_id, strerror(rte_errno));
1090 /* Set started flag here for the following steps like control flow. */
1091 dev->data->dev_started = 1;
1092 ret = mlx5_rx_intr_vec_enable(dev);
1094 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1095 dev->data->port_id);
1098 mlx5_os_stats_init(dev);
1099 ret = mlx5_traffic_enable(dev);
1101 DRV_LOG(ERR, "port %u failed to set defaults flows",
1102 dev->data->port_id);
1105 /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1106 mlx5_flow_rxq_dynf_metadata_set(dev);
1107 /* Set flags and context to convert Rx timestamps. */
1108 mlx5_rxq_timestamp_set(dev);
1109 /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1110 mlx5_txq_dynf_timestamp_set(dev);
1112 * In non-cached mode, it only needs to start the default mreg copy
1113 * action and no flow created by application exists anymore.
1114 * But it is worth wrapping the interface for further usage.
1116 ret = mlx5_flow_start_default(dev);
1118 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1119 dev->data->port_id, strerror(rte_errno));
1123 dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1124 dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1125 /* Enable datapath on secondary process. */
1126 mlx5_mp_os_req_start_rxtx(dev);
1127 if (priv->sh->intr_handle.fd >= 0) {
1128 priv->sh->port[priv->dev_port - 1].ih_port_id =
1129 (uint32_t)dev->data->port_id;
1131 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1132 dev->data->port_id);
1133 dev->data->dev_conf.intr_conf.lsc = 0;
1134 dev->data->dev_conf.intr_conf.rmv = 0;
1136 if (priv->sh->intr_handle_devx.fd >= 0)
1137 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1138 (uint32_t)dev->data->port_id;
1141 ret = rte_errno; /* Save rte_errno before cleanup. */
1143 dev->data->dev_started = 0;
1144 mlx5_flow_stop_default(dev);
1145 mlx5_traffic_disable(dev);
1148 mlx5_txpp_stop(dev); /* Stop last. */
1149 rte_errno = ret; /* Restore rte_errno. */
1154 * DPDK callback to stop the device.
1156 * Simulate device stop by detaching all configured flows.
1159 * Pointer to Ethernet device structure.
1162 mlx5_dev_stop(struct rte_eth_dev *dev)
1164 struct mlx5_priv *priv = dev->data->dev_private;
1166 dev->data->dev_started = 0;
1167 /* Prevent crashes when queues are still in use. */
1168 dev->rx_pkt_burst = removed_rx_burst;
1169 dev->tx_pkt_burst = removed_tx_burst;
1171 /* Disable datapath on secondary process. */
1172 mlx5_mp_os_req_stop_rxtx(dev);
1173 usleep(1000 * priv->rxqs_n);
1174 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1175 mlx5_flow_stop_default(dev);
1176 /* Control flows for default traffic can be removed firstly. */
1177 mlx5_traffic_disable(dev);
1178 /* All RX queue flags will be cleared in the flush interface. */
1179 mlx5_flow_list_flush(dev, &priv->flows, true);
1180 mlx5_rx_intr_vec_disable(dev);
1181 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1182 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1185 mlx5_txpp_stop(dev);
1191 * Enable traffic flows configured by control plane
1194 * Pointer to Ethernet device private data.
1196 * Pointer to Ethernet device structure.
1199 * 0 on success, a negative errno value otherwise and rte_errno is set.
1202 mlx5_traffic_enable(struct rte_eth_dev *dev)
1204 struct mlx5_priv *priv = dev->data->dev_private;
1205 struct rte_flow_item_eth bcast = {
1206 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1208 struct rte_flow_item_eth ipv6_multi_spec = {
1209 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1211 struct rte_flow_item_eth ipv6_multi_mask = {
1212 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1214 struct rte_flow_item_eth unicast = {
1215 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1217 struct rte_flow_item_eth unicast_mask = {
1218 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1220 const unsigned int vlan_filter_n = priv->vlan_filter_n;
1221 const struct rte_ether_addr cmp = {
1222 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1229 * Hairpin txq default flow should be created no matter if it is
1230 * isolation mode. Or else all the packets to be sent will be sent
1231 * out directly without the TX flow actions, e.g. encapsulation.
1233 for (i = 0; i != priv->txqs_n; ++i) {
1234 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1237 /* Only Tx implicit mode requires the default Tx flow. */
1238 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1239 txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1240 txq_ctrl->hairpin_conf.peers[0].port ==
1241 priv->dev_data->port_id) {
1242 ret = mlx5_ctrl_flow_source_queue(dev, i);
1244 mlx5_txq_release(dev, i);
1248 mlx5_txq_release(dev, i);
1250 if (priv->config.dv_esw_en && !priv->config.vf) {
1251 if (mlx5_flow_create_esw_table_zero_flow(dev))
1252 priv->fdb_def_rule = 1;
1254 DRV_LOG(INFO, "port %u FDB default rule cannot be"
1255 " configured - only Eswitch group 0 flows are"
1256 " supported.", dev->data->port_id);
1258 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1259 ret = mlx5_flow_lacp_miss(dev);
1261 DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1262 "forward LACP to kernel.", dev->data->port_id);
1264 DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1265 , dev->data->port_id);
1269 if (dev->data->promiscuous) {
1270 struct rte_flow_item_eth promisc = {
1271 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1272 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1276 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1280 if (dev->data->all_multicast) {
1281 struct rte_flow_item_eth multicast = {
1282 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1283 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1287 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1291 /* Add broadcast/multicast flows. */
1292 for (i = 0; i != vlan_filter_n; ++i) {
1293 uint16_t vlan = priv->vlan_filter[i];
1295 struct rte_flow_item_vlan vlan_spec = {
1296 .tci = rte_cpu_to_be_16(vlan),
1298 struct rte_flow_item_vlan vlan_mask =
1299 rte_flow_item_vlan_mask;
1301 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1302 &vlan_spec, &vlan_mask);
1305 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1307 &vlan_spec, &vlan_mask);
1311 if (!vlan_filter_n) {
1312 ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1315 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1321 /* Add MAC address flows. */
1322 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1323 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1325 if (!memcmp(mac, &cmp, sizeof(*mac)))
1327 memcpy(&unicast.dst.addr_bytes,
1329 RTE_ETHER_ADDR_LEN);
1330 for (j = 0; j != vlan_filter_n; ++j) {
1331 uint16_t vlan = priv->vlan_filter[j];
1333 struct rte_flow_item_vlan vlan_spec = {
1334 .tci = rte_cpu_to_be_16(vlan),
1336 struct rte_flow_item_vlan vlan_mask =
1337 rte_flow_item_vlan_mask;
1339 ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1346 if (!vlan_filter_n) {
1347 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1354 ret = rte_errno; /* Save rte_errno before cleanup. */
1355 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
1356 rte_errno = ret; /* Restore rte_errno. */
1362 * Disable traffic flows configured by control plane
1365 * Pointer to Ethernet device private data.
1368 mlx5_traffic_disable(struct rte_eth_dev *dev)
1370 struct mlx5_priv *priv = dev->data->dev_private;
1372 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
1376 * Restart traffic flows configured by control plane
1379 * Pointer to Ethernet device private data.
1382 * 0 on success, a negative errno value otherwise and rte_errno is set.
1385 mlx5_traffic_restart(struct rte_eth_dev *dev)
1387 if (dev->data->dev_started) {
1388 mlx5_traffic_disable(dev);
1389 return mlx5_traffic_enable(dev);