1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2015 6WIND S.A.
3 * Copyright 2015 Mellanox Technologies, Ltd
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
14 #include <mlx5_malloc.h>
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
24 * Stop traffic on Tx queues.
27 * Pointer to Ethernet device structure.
30 mlx5_txq_stop(struct rte_eth_dev *dev)
32 struct mlx5_priv *priv = dev->data->dev_private;
35 for (i = 0; i != priv->txqs_n; ++i)
36 mlx5_txq_release(dev, i);
40 * Start traffic on Tx queues.
43 * Pointer to Ethernet device structure.
46 * 0 on success, a negative errno value otherwise and rte_errno is set.
49 mlx5_txq_start(struct rte_eth_dev *dev)
51 struct mlx5_priv *priv = dev->data->dev_private;
55 for (i = 0; i != priv->txqs_n; ++i) {
56 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
62 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
63 txq_alloc_elts(txq_ctrl);
64 MLX5_ASSERT(!txq_ctrl->obj);
65 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
68 DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69 "memory resources.", dev->data->port_id,
74 ret = priv->obj_ops.txq_obj_new(dev, i);
76 mlx5_free(txq_ctrl->obj);
80 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
81 size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
83 txq_data->fcqs = mlx5_malloc(flags, size,
86 if (!txq_data->fcqs) {
87 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88 "allocate memory (FCQ).",
89 dev->data->port_id, i);
94 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95 dev->data->port_id, i, (void *)&txq_ctrl->obj);
96 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
100 ret = rte_errno; /* Save rte_errno before cleanup. */
102 mlx5_txq_release(dev, i);
104 rte_errno = ret; /* Restore rte_errno. */
109 * Stop traffic on Rx queues.
112 * Pointer to Ethernet device structure.
115 mlx5_rxq_stop(struct rte_eth_dev *dev)
117 struct mlx5_priv *priv = dev->data->dev_private;
120 for (i = 0; i != priv->rxqs_n; ++i)
121 mlx5_rxq_release(dev, i);
125 * Start traffic on Rx queues.
128 * Pointer to Ethernet device structure.
131 * 0 on success, a negative errno value otherwise and rte_errno is set.
134 mlx5_rxq_start(struct rte_eth_dev *dev)
136 struct mlx5_priv *priv = dev->data->dev_private;
140 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
141 if (mlx5_mprq_alloc_mp(dev)) {
142 /* Should not release Rx queues but return immediately. */
145 DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
146 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
147 DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
148 dev->data->port_id, priv->sh->device_attr.max_sge);
149 for (i = 0; i != priv->rxqs_n; ++i) {
150 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
154 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
155 /* Pre-register Rx mempools. */
156 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
157 mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl,
158 rxq_ctrl->rxq.mprq_mp);
162 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++)
164 (dev, &rxq_ctrl->rxq.mr_ctrl,
165 rxq_ctrl->rxq.rxseg[s].mp);
167 ret = rxq_alloc_elts(rxq_ctrl);
171 MLX5_ASSERT(!rxq_ctrl->obj);
172 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
173 sizeof(*rxq_ctrl->obj), 0,
175 if (!rxq_ctrl->obj) {
177 "Port %u Rx queue %u can't allocate resources.",
178 dev->data->port_id, (*priv->rxqs)[i]->idx);
182 ret = priv->obj_ops.rxq_obj_new(dev, i);
184 mlx5_free(rxq_ctrl->obj);
187 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
188 dev->data->port_id, i, (void *)&rxq_ctrl->obj);
189 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
193 ret = rte_errno; /* Save rte_errno before cleanup. */
195 mlx5_rxq_release(dev, i);
197 rte_errno = ret; /* Restore rte_errno. */
202 * Binds Tx queues to Rx queues for hairpin.
204 * Binds Tx queues to the target Rx queues.
207 * Pointer to Ethernet device structure.
210 * 0 on success, a negative errno value otherwise and rte_errno is set.
213 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
215 struct mlx5_priv *priv = dev->data->dev_private;
216 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
217 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
218 struct mlx5_txq_ctrl *txq_ctrl;
219 struct mlx5_rxq_ctrl *rxq_ctrl;
220 struct mlx5_devx_obj *sq;
221 struct mlx5_devx_obj *rq;
224 bool need_auto = false;
225 uint16_t self_port = dev->data->port_id;
227 for (i = 0; i != priv->txqs_n; ++i) {
228 txq_ctrl = mlx5_txq_get(dev, i);
231 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
232 mlx5_txq_release(dev, i);
235 if (txq_ctrl->hairpin_conf.peers[0].port != self_port)
237 if (txq_ctrl->hairpin_conf.manual_bind) {
238 mlx5_txq_release(dev, i);
242 mlx5_txq_release(dev, i);
246 for (i = 0; i != priv->txqs_n; ++i) {
247 txq_ctrl = mlx5_txq_get(dev, i);
250 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
251 mlx5_txq_release(dev, i);
254 /* Skip hairpin queues with other peer ports. */
255 if (txq_ctrl->hairpin_conf.peers[0].port != self_port)
257 if (!txq_ctrl->obj) {
259 DRV_LOG(ERR, "port %u no txq object found: %d",
260 dev->data->port_id, i);
261 mlx5_txq_release(dev, i);
264 sq = txq_ctrl->obj->sq;
265 rxq_ctrl = mlx5_rxq_get(dev,
266 txq_ctrl->hairpin_conf.peers[0].queue);
268 mlx5_txq_release(dev, i);
270 DRV_LOG(ERR, "port %u no rxq object found: %d",
272 txq_ctrl->hairpin_conf.peers[0].queue);
275 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
276 rxq_ctrl->hairpin_conf.peers[0].queue != i) {
278 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
279 "Rx queue %d", dev->data->port_id,
280 i, txq_ctrl->hairpin_conf.peers[0].queue);
283 rq = rxq_ctrl->obj->rq;
286 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
288 txq_ctrl->hairpin_conf.peers[0].queue);
291 sq_attr.state = MLX5_SQC_STATE_RDY;
292 sq_attr.sq_state = MLX5_SQC_STATE_RST;
293 sq_attr.hairpin_peer_rq = rq->id;
294 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
295 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
298 rq_attr.state = MLX5_SQC_STATE_RDY;
299 rq_attr.rq_state = MLX5_SQC_STATE_RST;
300 rq_attr.hairpin_peer_sq = sq->id;
301 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
302 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
305 /* Qs with auto-bind will be destroyed directly. */
306 rxq_ctrl->hairpin_status = 1;
307 txq_ctrl->hairpin_status = 1;
308 mlx5_txq_release(dev, i);
309 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
313 mlx5_txq_release(dev, i);
314 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
319 * Fetch the peer queue's SW & HW information.
322 * Pointer to Ethernet device structure.
324 * Index of the queue to fetch the information.
325 * @param current_info
326 * Pointer to the input peer information, not used currently.
328 * Pointer to the structure to store the information, output.
330 * Positive to get the RxQ information, zero to get the TxQ information.
333 * 0 on success, a negative errno value otherwise and rte_errno is set.
336 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
337 struct rte_hairpin_peer_info *current_info,
338 struct rte_hairpin_peer_info *peer_info,
341 struct mlx5_priv *priv = dev->data->dev_private;
342 RTE_SET_USED(current_info);
344 if (dev->data->dev_started == 0) {
346 DRV_LOG(ERR, "peer port %u is not started",
351 * Peer port used as egress. In the current design, hairpin Tx queue
352 * will be bound to the peer Rx queue. Indeed, only the information of
353 * peer Rx queue needs to be fetched.
355 if (direction == 0) {
356 struct mlx5_txq_ctrl *txq_ctrl;
358 txq_ctrl = mlx5_txq_get(dev, peer_queue);
359 if (txq_ctrl == NULL) {
361 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
362 dev->data->port_id, peer_queue);
365 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
367 DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
368 dev->data->port_id, peer_queue);
369 mlx5_txq_release(dev, peer_queue);
372 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
374 DRV_LOG(ERR, "port %u no Txq object found: %d",
375 dev->data->port_id, peer_queue);
376 mlx5_txq_release(dev, peer_queue);
379 peer_info->qp_id = txq_ctrl->obj->sq->id;
380 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
381 /* 1-to-1 mapping, only the first one is used. */
382 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
383 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
384 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
385 mlx5_txq_release(dev, peer_queue);
386 } else { /* Peer port used as ingress. */
387 struct mlx5_rxq_ctrl *rxq_ctrl;
389 rxq_ctrl = mlx5_rxq_get(dev, peer_queue);
390 if (rxq_ctrl == NULL) {
392 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
393 dev->data->port_id, peer_queue);
396 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
398 DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
399 dev->data->port_id, peer_queue);
400 mlx5_rxq_release(dev, peer_queue);
403 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
405 DRV_LOG(ERR, "port %u no Rxq object found: %d",
406 dev->data->port_id, peer_queue);
407 mlx5_rxq_release(dev, peer_queue);
410 peer_info->qp_id = rxq_ctrl->obj->rq->id;
411 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
412 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
413 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
414 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
415 mlx5_rxq_release(dev, peer_queue);
421 * Bind the hairpin queue with the peer HW information.
422 * This needs to be called twice both for Tx and Rx queues of a pair.
423 * If the queue is already bound, it is considered successful.
426 * Pointer to Ethernet device structure.
428 * Index of the queue to change the HW configuration to bind.
430 * Pointer to information of the peer queue.
432 * Positive to configure the TxQ, zero to configure the RxQ.
435 * 0 on success, a negative errno value otherwise and rte_errno is set.
438 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
439 struct rte_hairpin_peer_info *peer_info,
445 * Consistency checking of the peer queue: opposite direction is used
446 * to get the peer queue info with ethdev port ID, no need to check.
448 if (peer_info->peer_q != cur_queue) {
450 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
451 dev->data->port_id, cur_queue, peer_info->peer_q);
454 if (direction != 0) {
455 struct mlx5_txq_ctrl *txq_ctrl;
456 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
458 txq_ctrl = mlx5_txq_get(dev, cur_queue);
459 if (txq_ctrl == NULL) {
461 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
462 dev->data->port_id, cur_queue);
465 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
467 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
468 dev->data->port_id, cur_queue);
469 mlx5_txq_release(dev, cur_queue);
472 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
474 DRV_LOG(ERR, "port %u no Txq object found: %d",
475 dev->data->port_id, cur_queue);
476 mlx5_txq_release(dev, cur_queue);
479 if (txq_ctrl->hairpin_status != 0) {
480 DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
481 dev->data->port_id, cur_queue);
482 mlx5_txq_release(dev, cur_queue);
486 * All queues' of one port consistency checking is done in the
487 * bind() function, and that is optional.
489 if (peer_info->tx_explicit !=
490 txq_ctrl->hairpin_conf.tx_explicit) {
492 DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
493 " mismatch", dev->data->port_id, cur_queue);
494 mlx5_txq_release(dev, cur_queue);
497 if (peer_info->manual_bind !=
498 txq_ctrl->hairpin_conf.manual_bind) {
500 DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
501 " mismatch", dev->data->port_id, cur_queue);
502 mlx5_txq_release(dev, cur_queue);
505 sq_attr.state = MLX5_SQC_STATE_RDY;
506 sq_attr.sq_state = MLX5_SQC_STATE_RST;
507 sq_attr.hairpin_peer_rq = peer_info->qp_id;
508 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
509 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
511 txq_ctrl->hairpin_status = 1;
512 mlx5_txq_release(dev, cur_queue);
514 struct mlx5_rxq_ctrl *rxq_ctrl;
515 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
517 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
518 if (rxq_ctrl == NULL) {
520 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
521 dev->data->port_id, cur_queue);
524 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
526 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
527 dev->data->port_id, cur_queue);
528 mlx5_rxq_release(dev, cur_queue);
531 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
533 DRV_LOG(ERR, "port %u no Rxq object found: %d",
534 dev->data->port_id, cur_queue);
535 mlx5_rxq_release(dev, cur_queue);
538 if (rxq_ctrl->hairpin_status != 0) {
539 DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
540 dev->data->port_id, cur_queue);
541 mlx5_rxq_release(dev, cur_queue);
544 if (peer_info->tx_explicit !=
545 rxq_ctrl->hairpin_conf.tx_explicit) {
547 DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
548 " mismatch", dev->data->port_id, cur_queue);
549 mlx5_rxq_release(dev, cur_queue);
552 if (peer_info->manual_bind !=
553 rxq_ctrl->hairpin_conf.manual_bind) {
555 DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
556 " mismatch", dev->data->port_id, cur_queue);
557 mlx5_rxq_release(dev, cur_queue);
560 rq_attr.state = MLX5_SQC_STATE_RDY;
561 rq_attr.rq_state = MLX5_SQC_STATE_RST;
562 rq_attr.hairpin_peer_sq = peer_info->qp_id;
563 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
564 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
566 rxq_ctrl->hairpin_status = 1;
567 mlx5_rxq_release(dev, cur_queue);
573 * Unbind the hairpin queue and reset its HW configuration.
574 * This needs to be called twice both for Tx and Rx queues of a pair.
575 * If the queue is already unbound, it is considered successful.
578 * Pointer to Ethernet device structure.
580 * Index of the queue to change the HW configuration to unbind.
582 * Positive to reset the TxQ, zero to reset the RxQ.
585 * 0 on success, a negative errno value otherwise and rte_errno is set.
588 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
593 if (direction != 0) {
594 struct mlx5_txq_ctrl *txq_ctrl;
595 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
597 txq_ctrl = mlx5_txq_get(dev, cur_queue);
598 if (txq_ctrl == NULL) {
600 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
601 dev->data->port_id, cur_queue);
604 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
606 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
607 dev->data->port_id, cur_queue);
608 mlx5_txq_release(dev, cur_queue);
611 /* Already unbound, return success before obj checking. */
612 if (txq_ctrl->hairpin_status == 0) {
613 DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
614 dev->data->port_id, cur_queue);
615 mlx5_txq_release(dev, cur_queue);
618 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
620 DRV_LOG(ERR, "port %u no Txq object found: %d",
621 dev->data->port_id, cur_queue);
622 mlx5_txq_release(dev, cur_queue);
625 sq_attr.state = MLX5_SQC_STATE_RST;
626 sq_attr.sq_state = MLX5_SQC_STATE_RST;
627 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
629 txq_ctrl->hairpin_status = 0;
630 mlx5_txq_release(dev, cur_queue);
632 struct mlx5_rxq_ctrl *rxq_ctrl;
633 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
635 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
636 if (rxq_ctrl == NULL) {
638 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
639 dev->data->port_id, cur_queue);
642 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
644 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
645 dev->data->port_id, cur_queue);
646 mlx5_rxq_release(dev, cur_queue);
649 if (rxq_ctrl->hairpin_status == 0) {
650 DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
651 dev->data->port_id, cur_queue);
652 mlx5_rxq_release(dev, cur_queue);
655 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
657 DRV_LOG(ERR, "port %u no Rxq object found: %d",
658 dev->data->port_id, cur_queue);
659 mlx5_rxq_release(dev, cur_queue);
662 rq_attr.state = MLX5_SQC_STATE_RST;
663 rq_attr.rq_state = MLX5_SQC_STATE_RST;
664 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
666 rxq_ctrl->hairpin_status = 0;
667 mlx5_rxq_release(dev, cur_queue);
673 * Bind the hairpin port pairs, from the Tx to the peer Rx.
674 * This function only supports to bind the Tx to one Rx.
677 * Pointer to Ethernet device structure.
679 * Port identifier of the Rx port.
682 * 0 on success, a negative errno value otherwise and rte_errno is set.
685 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
687 struct mlx5_priv *priv = dev->data->dev_private;
689 struct mlx5_txq_ctrl *txq_ctrl;
691 struct rte_hairpin_peer_info peer = {0xffffff};
692 struct rte_hairpin_peer_info cur;
693 const struct rte_eth_hairpin_conf *conf;
695 uint16_t local_port = priv->dev_data->port_id;
700 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
702 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
706 * Before binding TxQ to peer RxQ, first round loop will be used for
707 * checking the queues' configuration consistency. This would be a
708 * little time consuming but better than doing the rollback.
710 for (i = 0; i != priv->txqs_n; i++) {
711 txq_ctrl = mlx5_txq_get(dev, i);
712 if (txq_ctrl == NULL)
714 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
715 mlx5_txq_release(dev, i);
719 * All hairpin Tx queues of a single port that connected to the
720 * same peer Rx port should have the same "auto binding" and
721 * "implicit Tx flow" modes.
722 * Peer consistency checking will be done in per queue binding.
724 conf = &txq_ctrl->hairpin_conf;
725 if (conf->peers[0].port == rx_port) {
727 manual = conf->manual_bind;
728 explicit = conf->tx_explicit;
730 if (manual != conf->manual_bind ||
731 explicit != conf->tx_explicit) {
733 DRV_LOG(ERR, "port %u queue %d mode"
734 " mismatch: %u %u, %u %u",
735 local_port, i, manual,
736 conf->manual_bind, explicit,
738 mlx5_txq_release(dev, i);
744 mlx5_txq_release(dev, i);
746 /* Once no queue is configured, success is returned directly. */
749 /* All the hairpin TX queues need to be traversed again. */
750 for (i = 0; i != priv->txqs_n; i++) {
751 txq_ctrl = mlx5_txq_get(dev, i);
752 if (txq_ctrl == NULL)
754 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
755 mlx5_txq_release(dev, i);
758 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
759 mlx5_txq_release(dev, i);
762 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
764 * Fetch peer RxQ's information.
765 * No need to pass the information of the current queue.
767 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
770 mlx5_txq_release(dev, i);
773 /* Accessing its own device, inside mlx5 PMD. */
774 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
776 mlx5_txq_release(dev, i);
779 /* Pass TxQ's information to peer RxQ and try binding. */
780 cur.peer_q = rx_queue;
781 cur.qp_id = txq_ctrl->obj->sq->id;
782 cur.vhca_id = priv->config.hca_attr.vhca_id;
783 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
784 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
786 * In order to access another device in a proper way, RTE level
787 * private function is needed.
789 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
792 mlx5_txq_release(dev, i);
795 mlx5_txq_release(dev, i);
800 * Do roll-back process for the queues already bound.
801 * No need to check the return value of the queue unbind function.
804 /* No validation is needed here. */
805 txq_ctrl = mlx5_txq_get(dev, i);
806 if (txq_ctrl == NULL)
808 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
809 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
810 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
811 mlx5_txq_release(dev, i);
817 * Unbind the hairpin port pair, HW configuration of both devices will be clear
818 * and status will be reset for all the queues used between the them.
819 * This function only supports to unbind the Tx from one Rx.
822 * Pointer to Ethernet device structure.
824 * Port identifier of the Rx port.
827 * 0 on success, a negative errno value otherwise and rte_errno is set.
830 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
832 struct mlx5_priv *priv = dev->data->dev_private;
833 struct mlx5_txq_ctrl *txq_ctrl;
836 uint16_t cur_port = priv->dev_data->port_id;
838 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
840 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
843 for (i = 0; i != priv->txqs_n; i++) {
846 txq_ctrl = mlx5_txq_get(dev, i);
847 if (txq_ctrl == NULL)
849 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
850 mlx5_txq_release(dev, i);
853 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
854 mlx5_txq_release(dev, i);
857 /* Indeed, only the first used queue needs to be checked. */
858 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
859 if (cur_port != rx_port) {
861 DRV_LOG(ERR, "port %u and port %u are in"
862 " auto-bind mode", cur_port, rx_port);
863 mlx5_txq_release(dev, i);
869 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
870 mlx5_txq_release(dev, i);
871 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
873 DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
877 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
879 DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
888 * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
889 * @see mlx5_hairpin_bind_single_port()
892 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
898 * If the Rx port has no hairpin configuration with the current port,
899 * the binding will be skipped in the called function of single port.
900 * Device started status will be checked only before the queue
901 * information updating.
903 if (rx_port == RTE_MAX_ETHPORTS) {
904 MLX5_ETH_FOREACH_DEV(p, dev->device) {
905 ret = mlx5_hairpin_bind_single_port(dev, p);
911 return mlx5_hairpin_bind_single_port(dev, rx_port);
914 MLX5_ETH_FOREACH_DEV(pp, dev->device)
916 mlx5_hairpin_unbind_single_port(dev, pp);
921 * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
922 * @see mlx5_hairpin_unbind_single_port()
925 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
930 if (rx_port == RTE_MAX_ETHPORTS)
931 MLX5_ETH_FOREACH_DEV(p, dev->device) {
932 ret = mlx5_hairpin_unbind_single_port(dev, p);
937 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
942 * DPDK callback to get the hairpin peer ports list.
943 * This will return the actual number of peer ports and save the identifiers
944 * into the array (sorted, may be different from that when setting up the
945 * hairpin peer queues).
946 * The peer port ID could be the same as the port ID of the current device.
949 * Pointer to Ethernet device structure.
951 * Pointer to array to save the port identifiers.
953 * The length of the array.
955 * Current port to peer port direction.
956 * positive - current used as Tx to get all peer Rx ports.
957 * zero - current used as Rx to get all peer Tx ports.
960 * 0 or positive value on success, actual number of peer ports.
961 * a negative errno value otherwise and rte_errno is set.
964 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
965 size_t len, uint32_t direction)
967 struct mlx5_priv *priv = dev->data->dev_private;
968 struct mlx5_txq_ctrl *txq_ctrl;
969 struct mlx5_rxq_ctrl *rxq_ctrl;
972 uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
976 for (i = 0; i < priv->txqs_n; i++) {
977 txq_ctrl = mlx5_txq_get(dev, i);
980 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
981 mlx5_txq_release(dev, i);
984 pp = txq_ctrl->hairpin_conf.peers[0].port;
985 if (pp >= RTE_MAX_ETHPORTS) {
987 mlx5_txq_release(dev, i);
988 DRV_LOG(ERR, "port %hu queue %u peer port "
990 priv->dev_data->port_id, i, pp);
993 bits[pp / 32] |= 1 << (pp % 32);
994 mlx5_txq_release(dev, i);
997 for (i = 0; i < priv->rxqs_n; i++) {
998 rxq_ctrl = mlx5_rxq_get(dev, i);
1001 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
1002 mlx5_rxq_release(dev, i);
1005 pp = rxq_ctrl->hairpin_conf.peers[0].port;
1006 if (pp >= RTE_MAX_ETHPORTS) {
1008 mlx5_rxq_release(dev, i);
1009 DRV_LOG(ERR, "port %hu queue %u peer port "
1011 priv->dev_data->port_id, i, pp);
1014 bits[pp / 32] |= 1 << (pp % 32);
1015 mlx5_rxq_release(dev, i);
1018 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1019 if (bits[i / 32] & (1 << (i % 32))) {
1020 if ((size_t)ret >= len) {
1024 peer_ports[ret++] = i;
1031 * DPDK callback to start the device.
1033 * Simulate device start by attaching all configured flows.
1036 * Pointer to Ethernet device structure.
1039 * 0 on success, a negative errno value otherwise and rte_errno is set.
1042 mlx5_dev_start(struct rte_eth_dev *dev)
1044 struct mlx5_priv *priv = dev->data->dev_private;
1048 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1049 fine_inline = rte_mbuf_dynflag_lookup
1050 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1051 if (fine_inline >= 0)
1052 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1054 rte_net_mlx5_dynf_inline_mask = 0;
1055 if (dev->data->nb_rx_queues > 0) {
1056 ret = mlx5_dev_configure_rss_reta(dev);
1058 DRV_LOG(ERR, "port %u reta config failed: %s",
1059 dev->data->port_id, strerror(rte_errno));
1063 ret = mlx5_txpp_start(dev);
1065 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1066 dev->data->port_id, strerror(rte_errno));
1069 if ((priv->config.devx && priv->config.dv_flow_en &&
1070 priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1071 ret = priv->obj_ops.lb_dummy_queue_create(dev);
1075 ret = mlx5_txq_start(dev);
1077 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1078 dev->data->port_id, strerror(rte_errno));
1081 ret = mlx5_rxq_start(dev);
1083 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1084 dev->data->port_id, strerror(rte_errno));
1088 * Such step will be skipped if there is no hairpin TX queue configured
1089 * with RX peer queue from the same device.
1091 ret = mlx5_hairpin_auto_bind(dev);
1093 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1094 dev->data->port_id, strerror(rte_errno));
1097 /* Set started flag here for the following steps like control flow. */
1098 dev->data->dev_started = 1;
1099 ret = mlx5_rx_intr_vec_enable(dev);
1101 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1102 dev->data->port_id);
1105 mlx5_os_stats_init(dev);
1106 ret = mlx5_traffic_enable(dev);
1108 DRV_LOG(ERR, "port %u failed to set defaults flows",
1109 dev->data->port_id);
1112 /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1113 mlx5_flow_rxq_dynf_metadata_set(dev);
1114 /* Set flags and context to convert Rx timestamps. */
1115 mlx5_rxq_timestamp_set(dev);
1116 /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1117 mlx5_txq_dynf_timestamp_set(dev);
1119 * In non-cached mode, it only needs to start the default mreg copy
1120 * action and no flow created by application exists anymore.
1121 * But it is worth wrapping the interface for further usage.
1123 ret = mlx5_flow_start_default(dev);
1125 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1126 dev->data->port_id, strerror(rte_errno));
1130 dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1131 dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1132 /* Enable datapath on secondary process. */
1133 mlx5_mp_os_req_start_rxtx(dev);
1134 if (priv->sh->intr_handle.fd >= 0) {
1135 priv->sh->port[priv->dev_port - 1].ih_port_id =
1136 (uint32_t)dev->data->port_id;
1138 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1139 dev->data->port_id);
1140 dev->data->dev_conf.intr_conf.lsc = 0;
1141 dev->data->dev_conf.intr_conf.rmv = 0;
1143 if (priv->sh->intr_handle_devx.fd >= 0)
1144 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1145 (uint32_t)dev->data->port_id;
1148 ret = rte_errno; /* Save rte_errno before cleanup. */
1150 dev->data->dev_started = 0;
1151 mlx5_flow_stop_default(dev);
1152 mlx5_traffic_disable(dev);
1155 if (priv->obj_ops.lb_dummy_queue_release)
1156 priv->obj_ops.lb_dummy_queue_release(dev);
1157 mlx5_txpp_stop(dev); /* Stop last. */
1158 rte_errno = ret; /* Restore rte_errno. */
1163 * DPDK callback to stop the device.
1165 * Simulate device stop by detaching all configured flows.
1168 * Pointer to Ethernet device structure.
1171 mlx5_dev_stop(struct rte_eth_dev *dev)
1173 struct mlx5_priv *priv = dev->data->dev_private;
1175 dev->data->dev_started = 0;
1176 /* Prevent crashes when queues are still in use. */
1177 dev->rx_pkt_burst = removed_rx_burst;
1178 dev->tx_pkt_burst = removed_tx_burst;
1180 /* Disable datapath on secondary process. */
1181 mlx5_mp_os_req_stop_rxtx(dev);
1182 rte_delay_us_sleep(1000 * priv->rxqs_n);
1183 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1184 mlx5_flow_stop_default(dev);
1185 /* Control flows for default traffic can be removed firstly. */
1186 mlx5_traffic_disable(dev);
1187 /* All RX queue flags will be cleared in the flush interface. */
1188 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1189 mlx5_flow_meter_rxq_flush(dev);
1190 mlx5_rx_intr_vec_disable(dev);
1191 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1192 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1195 if (priv->obj_ops.lb_dummy_queue_release)
1196 priv->obj_ops.lb_dummy_queue_release(dev);
1197 mlx5_txpp_stop(dev);
1203 * Enable traffic flows configured by control plane
1206 * Pointer to Ethernet device private data.
1208 * Pointer to Ethernet device structure.
1211 * 0 on success, a negative errno value otherwise and rte_errno is set.
1214 mlx5_traffic_enable(struct rte_eth_dev *dev)
1216 struct mlx5_priv *priv = dev->data->dev_private;
1217 struct rte_flow_item_eth bcast = {
1218 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1220 struct rte_flow_item_eth ipv6_multi_spec = {
1221 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1223 struct rte_flow_item_eth ipv6_multi_mask = {
1224 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1226 struct rte_flow_item_eth unicast = {
1227 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1229 struct rte_flow_item_eth unicast_mask = {
1230 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1232 const unsigned int vlan_filter_n = priv->vlan_filter_n;
1233 const struct rte_ether_addr cmp = {
1234 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1241 * Hairpin txq default flow should be created no matter if it is
1242 * isolation mode. Or else all the packets to be sent will be sent
1243 * out directly without the TX flow actions, e.g. encapsulation.
1245 for (i = 0; i != priv->txqs_n; ++i) {
1246 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1249 /* Only Tx implicit mode requires the default Tx flow. */
1250 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1251 txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1252 txq_ctrl->hairpin_conf.peers[0].port ==
1253 priv->dev_data->port_id) {
1254 ret = mlx5_ctrl_flow_source_queue(dev, i);
1256 mlx5_txq_release(dev, i);
1260 mlx5_txq_release(dev, i);
1262 if (priv->config.dv_esw_en && !priv->config.vf && !priv->config.sf) {
1263 if (mlx5_flow_create_esw_table_zero_flow(dev))
1264 priv->fdb_def_rule = 1;
1266 DRV_LOG(INFO, "port %u FDB default rule cannot be"
1267 " configured - only Eswitch group 0 flows are"
1268 " supported.", dev->data->port_id);
1270 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1271 ret = mlx5_flow_lacp_miss(dev);
1273 DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1274 "forward LACP to kernel.", dev->data->port_id);
1276 DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1277 , dev->data->port_id);
1281 if (dev->data->promiscuous) {
1282 struct rte_flow_item_eth promisc = {
1283 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1284 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1288 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1292 if (dev->data->all_multicast) {
1293 struct rte_flow_item_eth multicast = {
1294 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1295 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1299 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1303 /* Add broadcast/multicast flows. */
1304 for (i = 0; i != vlan_filter_n; ++i) {
1305 uint16_t vlan = priv->vlan_filter[i];
1307 struct rte_flow_item_vlan vlan_spec = {
1308 .tci = rte_cpu_to_be_16(vlan),
1310 struct rte_flow_item_vlan vlan_mask =
1311 rte_flow_item_vlan_mask;
1313 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1314 &vlan_spec, &vlan_mask);
1317 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1319 &vlan_spec, &vlan_mask);
1323 if (!vlan_filter_n) {
1324 ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1327 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1330 /* Do not fail on IPv6 broadcast creation failure. */
1332 "IPv6 broadcast is not supported");
1337 /* Add MAC address flows. */
1338 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1339 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1341 if (!memcmp(mac, &cmp, sizeof(*mac)))
1343 memcpy(&unicast.dst.addr_bytes,
1345 RTE_ETHER_ADDR_LEN);
1346 for (j = 0; j != vlan_filter_n; ++j) {
1347 uint16_t vlan = priv->vlan_filter[j];
1349 struct rte_flow_item_vlan vlan_spec = {
1350 .tci = rte_cpu_to_be_16(vlan),
1352 struct rte_flow_item_vlan vlan_mask =
1353 rte_flow_item_vlan_mask;
1355 ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1362 if (!vlan_filter_n) {
1363 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1370 ret = rte_errno; /* Save rte_errno before cleanup. */
1371 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1372 rte_errno = ret; /* Restore rte_errno. */
1378 * Disable traffic flows configured by control plane
1381 * Pointer to Ethernet device private data.
1384 mlx5_traffic_disable(struct rte_eth_dev *dev)
1386 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1390 * Restart traffic flows configured by control plane
1393 * Pointer to Ethernet device private data.
1396 * 0 on success, a negative errno value otherwise and rte_errno is set.
1399 mlx5_traffic_restart(struct rte_eth_dev *dev)
1401 if (dev->data->dev_started) {
1402 mlx5_traffic_disable(dev);
1403 return mlx5_traffic_enable(dev);