1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2015 6WIND S.A.
3 * Copyright 2015 Mellanox Technologies, Ltd
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
14 #include <mlx5_malloc.h>
18 #include "mlx5_rxtx.h"
19 #include "mlx5_utils.h"
20 #include "rte_pmd_mlx5.h"
23 * Stop traffic on Tx queues.
26 * Pointer to Ethernet device structure.
29 mlx5_txq_stop(struct rte_eth_dev *dev)
31 struct mlx5_priv *priv = dev->data->dev_private;
34 for (i = 0; i != priv->txqs_n; ++i)
35 mlx5_txq_release(dev, i);
39 * Start traffic on Tx queues.
42 * Pointer to Ethernet device structure.
45 * 0 on success, a negative errno value otherwise and rte_errno is set.
48 mlx5_txq_start(struct rte_eth_dev *dev)
50 struct mlx5_priv *priv = dev->data->dev_private;
54 for (i = 0; i != priv->txqs_n; ++i) {
55 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
56 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
57 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
61 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
62 txq_alloc_elts(txq_ctrl);
63 MLX5_ASSERT(!txq_ctrl->obj);
64 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
67 DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
68 "memory resources.", dev->data->port_id,
73 ret = priv->obj_ops.txq_obj_new(dev, i);
75 mlx5_free(txq_ctrl->obj);
79 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
80 size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
82 txq_data->fcqs = mlx5_malloc(flags, size,
85 if (!txq_data->fcqs) {
86 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
87 "allocate memory (FCQ).",
88 dev->data->port_id, i);
93 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
94 dev->data->port_id, i, (void *)&txq_ctrl->obj);
95 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
99 ret = rte_errno; /* Save rte_errno before cleanup. */
101 mlx5_txq_release(dev, i);
103 rte_errno = ret; /* Restore rte_errno. */
108 * Stop traffic on Rx queues.
111 * Pointer to Ethernet device structure.
114 mlx5_rxq_stop(struct rte_eth_dev *dev)
116 struct mlx5_priv *priv = dev->data->dev_private;
119 for (i = 0; i != priv->rxqs_n; ++i)
120 mlx5_rxq_release(dev, i);
124 * Start traffic on Rx queues.
127 * Pointer to Ethernet device structure.
130 * 0 on success, a negative errno value otherwise and rte_errno is set.
133 mlx5_rxq_start(struct rte_eth_dev *dev)
135 struct mlx5_priv *priv = dev->data->dev_private;
139 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
140 if (mlx5_mprq_alloc_mp(dev)) {
141 /* Should not release Rx queues but return immediately. */
144 DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
145 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
146 DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
147 dev->data->port_id, priv->sh->device_attr.max_sge);
148 for (i = 0; i != priv->rxqs_n; ++i) {
149 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
153 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
154 /* Pre-register Rx mempools. */
155 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
156 mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl,
157 rxq_ctrl->rxq.mprq_mp);
161 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++)
163 (dev, &rxq_ctrl->rxq.mr_ctrl,
164 rxq_ctrl->rxq.rxseg[s].mp);
166 ret = rxq_alloc_elts(rxq_ctrl);
170 MLX5_ASSERT(!rxq_ctrl->obj);
171 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
172 sizeof(*rxq_ctrl->obj), 0,
174 if (!rxq_ctrl->obj) {
176 "Port %u Rx queue %u can't allocate resources.",
177 dev->data->port_id, (*priv->rxqs)[i]->idx);
181 ret = priv->obj_ops.rxq_obj_new(dev, i);
183 mlx5_free(rxq_ctrl->obj);
186 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
187 dev->data->port_id, i, (void *)&rxq_ctrl->obj);
188 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
192 ret = rte_errno; /* Save rte_errno before cleanup. */
194 mlx5_rxq_release(dev, i);
196 rte_errno = ret; /* Restore rte_errno. */
201 * Binds Tx queues to Rx queues for hairpin.
203 * Binds Tx queues to the target Rx queues.
206 * Pointer to Ethernet device structure.
209 * 0 on success, a negative errno value otherwise and rte_errno is set.
212 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
214 struct mlx5_priv *priv = dev->data->dev_private;
215 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
216 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
217 struct mlx5_txq_ctrl *txq_ctrl;
218 struct mlx5_rxq_ctrl *rxq_ctrl;
219 struct mlx5_devx_obj *sq;
220 struct mlx5_devx_obj *rq;
223 bool need_auto = false;
224 uint16_t self_port = dev->data->port_id;
226 for (i = 0; i != priv->txqs_n; ++i) {
227 txq_ctrl = mlx5_txq_get(dev, i);
230 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
231 mlx5_txq_release(dev, i);
234 if (txq_ctrl->hairpin_conf.peers[0].port != self_port)
236 if (txq_ctrl->hairpin_conf.manual_bind) {
237 mlx5_txq_release(dev, i);
241 mlx5_txq_release(dev, i);
245 for (i = 0; i != priv->txqs_n; ++i) {
246 txq_ctrl = mlx5_txq_get(dev, i);
249 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
250 mlx5_txq_release(dev, i);
253 /* Skip hairpin queues with other peer ports. */
254 if (txq_ctrl->hairpin_conf.peers[0].port != self_port)
256 if (!txq_ctrl->obj) {
258 DRV_LOG(ERR, "port %u no txq object found: %d",
259 dev->data->port_id, i);
260 mlx5_txq_release(dev, i);
263 sq = txq_ctrl->obj->sq;
264 rxq_ctrl = mlx5_rxq_get(dev,
265 txq_ctrl->hairpin_conf.peers[0].queue);
267 mlx5_txq_release(dev, i);
269 DRV_LOG(ERR, "port %u no rxq object found: %d",
271 txq_ctrl->hairpin_conf.peers[0].queue);
274 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
275 rxq_ctrl->hairpin_conf.peers[0].queue != i) {
277 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
278 "Rx queue %d", dev->data->port_id,
279 i, txq_ctrl->hairpin_conf.peers[0].queue);
282 rq = rxq_ctrl->obj->rq;
285 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
287 txq_ctrl->hairpin_conf.peers[0].queue);
290 sq_attr.state = MLX5_SQC_STATE_RDY;
291 sq_attr.sq_state = MLX5_SQC_STATE_RST;
292 sq_attr.hairpin_peer_rq = rq->id;
293 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
294 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
297 rq_attr.state = MLX5_SQC_STATE_RDY;
298 rq_attr.rq_state = MLX5_SQC_STATE_RST;
299 rq_attr.hairpin_peer_sq = sq->id;
300 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
301 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
304 /* Qs with auto-bind will be destroyed directly. */
305 rxq_ctrl->hairpin_status = 1;
306 txq_ctrl->hairpin_status = 1;
307 mlx5_txq_release(dev, i);
308 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
312 mlx5_txq_release(dev, i);
313 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
318 * Fetch the peer queue's SW & HW information.
321 * Pointer to Ethernet device structure.
323 * Index of the queue to fetch the information.
324 * @param current_info
325 * Pointer to the input peer information, not used currently.
327 * Pointer to the structure to store the information, output.
329 * Positive to get the RxQ information, zero to get the TxQ information.
332 * 0 on success, a negative errno value otherwise and rte_errno is set.
335 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
336 struct rte_hairpin_peer_info *current_info,
337 struct rte_hairpin_peer_info *peer_info,
340 struct mlx5_priv *priv = dev->data->dev_private;
341 RTE_SET_USED(current_info);
343 if (dev->data->dev_started == 0) {
345 DRV_LOG(ERR, "peer port %u is not started",
350 * Peer port used as egress. In the current design, hairpin Tx queue
351 * will be bound to the peer Rx queue. Indeed, only the information of
352 * peer Rx queue needs to be fetched.
354 if (direction == 0) {
355 struct mlx5_txq_ctrl *txq_ctrl;
357 txq_ctrl = mlx5_txq_get(dev, peer_queue);
358 if (txq_ctrl == NULL) {
360 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
361 dev->data->port_id, peer_queue);
364 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
366 DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
367 dev->data->port_id, peer_queue);
368 mlx5_txq_release(dev, peer_queue);
371 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
373 DRV_LOG(ERR, "port %u no Txq object found: %d",
374 dev->data->port_id, peer_queue);
375 mlx5_txq_release(dev, peer_queue);
378 peer_info->qp_id = txq_ctrl->obj->sq->id;
379 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
380 /* 1-to-1 mapping, only the first one is used. */
381 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
382 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
383 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
384 mlx5_txq_release(dev, peer_queue);
385 } else { /* Peer port used as ingress. */
386 struct mlx5_rxq_ctrl *rxq_ctrl;
388 rxq_ctrl = mlx5_rxq_get(dev, peer_queue);
389 if (rxq_ctrl == NULL) {
391 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
392 dev->data->port_id, peer_queue);
395 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
397 DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
398 dev->data->port_id, peer_queue);
399 mlx5_rxq_release(dev, peer_queue);
402 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
404 DRV_LOG(ERR, "port %u no Rxq object found: %d",
405 dev->data->port_id, peer_queue);
406 mlx5_rxq_release(dev, peer_queue);
409 peer_info->qp_id = rxq_ctrl->obj->rq->id;
410 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
411 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
412 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
413 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
414 mlx5_rxq_release(dev, peer_queue);
420 * Bind the hairpin queue with the peer HW information.
421 * This needs to be called twice both for Tx and Rx queues of a pair.
422 * If the queue is already bound, it is considered successful.
425 * Pointer to Ethernet device structure.
427 * Index of the queue to change the HW configuration to bind.
429 * Pointer to information of the peer queue.
431 * Positive to configure the TxQ, zero to configure the RxQ.
434 * 0 on success, a negative errno value otherwise and rte_errno is set.
437 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
438 struct rte_hairpin_peer_info *peer_info,
444 * Consistency checking of the peer queue: opposite direction is used
445 * to get the peer queue info with ethdev port ID, no need to check.
447 if (peer_info->peer_q != cur_queue) {
449 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
450 dev->data->port_id, cur_queue, peer_info->peer_q);
453 if (direction != 0) {
454 struct mlx5_txq_ctrl *txq_ctrl;
455 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
457 txq_ctrl = mlx5_txq_get(dev, cur_queue);
458 if (txq_ctrl == NULL) {
460 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
461 dev->data->port_id, cur_queue);
464 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
466 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
467 dev->data->port_id, cur_queue);
468 mlx5_txq_release(dev, cur_queue);
471 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
473 DRV_LOG(ERR, "port %u no Txq object found: %d",
474 dev->data->port_id, cur_queue);
475 mlx5_txq_release(dev, cur_queue);
478 if (txq_ctrl->hairpin_status != 0) {
479 DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
480 dev->data->port_id, cur_queue);
481 mlx5_txq_release(dev, cur_queue);
485 * All queues' of one port consistency checking is done in the
486 * bind() function, and that is optional.
488 if (peer_info->tx_explicit !=
489 txq_ctrl->hairpin_conf.tx_explicit) {
491 DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
492 " mismatch", dev->data->port_id, cur_queue);
493 mlx5_txq_release(dev, cur_queue);
496 if (peer_info->manual_bind !=
497 txq_ctrl->hairpin_conf.manual_bind) {
499 DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
500 " mismatch", dev->data->port_id, cur_queue);
501 mlx5_txq_release(dev, cur_queue);
504 sq_attr.state = MLX5_SQC_STATE_RDY;
505 sq_attr.sq_state = MLX5_SQC_STATE_RST;
506 sq_attr.hairpin_peer_rq = peer_info->qp_id;
507 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
508 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
510 txq_ctrl->hairpin_status = 1;
511 mlx5_txq_release(dev, cur_queue);
513 struct mlx5_rxq_ctrl *rxq_ctrl;
514 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
516 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
517 if (rxq_ctrl == NULL) {
519 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
520 dev->data->port_id, cur_queue);
523 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
525 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
526 dev->data->port_id, cur_queue);
527 mlx5_rxq_release(dev, cur_queue);
530 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
532 DRV_LOG(ERR, "port %u no Rxq object found: %d",
533 dev->data->port_id, cur_queue);
534 mlx5_rxq_release(dev, cur_queue);
537 if (rxq_ctrl->hairpin_status != 0) {
538 DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
539 dev->data->port_id, cur_queue);
540 mlx5_rxq_release(dev, cur_queue);
543 if (peer_info->tx_explicit !=
544 rxq_ctrl->hairpin_conf.tx_explicit) {
546 DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
547 " mismatch", dev->data->port_id, cur_queue);
548 mlx5_rxq_release(dev, cur_queue);
551 if (peer_info->manual_bind !=
552 rxq_ctrl->hairpin_conf.manual_bind) {
554 DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
555 " mismatch", dev->data->port_id, cur_queue);
556 mlx5_rxq_release(dev, cur_queue);
559 rq_attr.state = MLX5_SQC_STATE_RDY;
560 rq_attr.rq_state = MLX5_SQC_STATE_RST;
561 rq_attr.hairpin_peer_sq = peer_info->qp_id;
562 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
563 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
565 rxq_ctrl->hairpin_status = 1;
566 mlx5_rxq_release(dev, cur_queue);
572 * Unbind the hairpin queue and reset its HW configuration.
573 * This needs to be called twice both for Tx and Rx queues of a pair.
574 * If the queue is already unbound, it is considered successful.
577 * Pointer to Ethernet device structure.
579 * Index of the queue to change the HW configuration to unbind.
581 * Positive to reset the TxQ, zero to reset the RxQ.
584 * 0 on success, a negative errno value otherwise and rte_errno is set.
587 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
592 if (direction != 0) {
593 struct mlx5_txq_ctrl *txq_ctrl;
594 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
596 txq_ctrl = mlx5_txq_get(dev, cur_queue);
597 if (txq_ctrl == NULL) {
599 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
600 dev->data->port_id, cur_queue);
603 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
605 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
606 dev->data->port_id, cur_queue);
607 mlx5_txq_release(dev, cur_queue);
610 /* Already unbound, return success before obj checking. */
611 if (txq_ctrl->hairpin_status == 0) {
612 DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
613 dev->data->port_id, cur_queue);
614 mlx5_txq_release(dev, cur_queue);
617 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
619 DRV_LOG(ERR, "port %u no Txq object found: %d",
620 dev->data->port_id, cur_queue);
621 mlx5_txq_release(dev, cur_queue);
624 sq_attr.state = MLX5_SQC_STATE_RST;
625 sq_attr.sq_state = MLX5_SQC_STATE_RST;
626 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
628 txq_ctrl->hairpin_status = 0;
629 mlx5_txq_release(dev, cur_queue);
631 struct mlx5_rxq_ctrl *rxq_ctrl;
632 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
634 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
635 if (rxq_ctrl == NULL) {
637 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
638 dev->data->port_id, cur_queue);
641 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
643 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
644 dev->data->port_id, cur_queue);
645 mlx5_rxq_release(dev, cur_queue);
648 if (rxq_ctrl->hairpin_status == 0) {
649 DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
650 dev->data->port_id, cur_queue);
651 mlx5_rxq_release(dev, cur_queue);
654 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
656 DRV_LOG(ERR, "port %u no Rxq object found: %d",
657 dev->data->port_id, cur_queue);
658 mlx5_rxq_release(dev, cur_queue);
661 rq_attr.state = MLX5_SQC_STATE_RST;
662 rq_attr.rq_state = MLX5_SQC_STATE_RST;
663 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
665 rxq_ctrl->hairpin_status = 0;
666 mlx5_rxq_release(dev, cur_queue);
672 * Bind the hairpin port pairs, from the Tx to the peer Rx.
673 * This function only supports to bind the Tx to one Rx.
676 * Pointer to Ethernet device structure.
678 * Port identifier of the Rx port.
681 * 0 on success, a negative errno value otherwise and rte_errno is set.
684 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
686 struct mlx5_priv *priv = dev->data->dev_private;
688 struct mlx5_txq_ctrl *txq_ctrl;
690 struct rte_hairpin_peer_info peer = {0xffffff};
691 struct rte_hairpin_peer_info cur;
692 const struct rte_eth_hairpin_conf *conf;
694 uint16_t local_port = priv->dev_data->port_id;
699 if (mlx5_eth_find_next(rx_port, priv->pci_dev) != rx_port) {
701 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
705 * Before binding TxQ to peer RxQ, first round loop will be used for
706 * checking the queues' configuration consistency. This would be a
707 * little time consuming but better than doing the rollback.
709 for (i = 0; i != priv->txqs_n; i++) {
710 txq_ctrl = mlx5_txq_get(dev, i);
711 if (txq_ctrl == NULL)
713 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
714 mlx5_txq_release(dev, i);
718 * All hairpin Tx queues of a single port that connected to the
719 * same peer Rx port should have the same "auto binding" and
720 * "implicit Tx flow" modes.
721 * Peer consistency checking will be done in per queue binding.
723 conf = &txq_ctrl->hairpin_conf;
724 if (conf->peers[0].port == rx_port) {
726 manual = conf->manual_bind;
727 explicit = conf->tx_explicit;
729 if (manual != conf->manual_bind ||
730 explicit != conf->tx_explicit) {
732 DRV_LOG(ERR, "port %u queue %d mode"
733 " mismatch: %u %u, %u %u",
734 local_port, i, manual,
735 conf->manual_bind, explicit,
737 mlx5_txq_release(dev, i);
743 mlx5_txq_release(dev, i);
745 /* Once no queue is configured, success is returned directly. */
748 /* All the hairpin TX queues need to be traversed again. */
749 for (i = 0; i != priv->txqs_n; i++) {
750 txq_ctrl = mlx5_txq_get(dev, i);
751 if (txq_ctrl == NULL)
753 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
754 mlx5_txq_release(dev, i);
757 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
758 mlx5_txq_release(dev, i);
761 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
763 * Fetch peer RxQ's information.
764 * No need to pass the information of the current queue.
766 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
769 mlx5_txq_release(dev, i);
772 /* Accessing its own device, inside mlx5 PMD. */
773 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
775 mlx5_txq_release(dev, i);
778 /* Pass TxQ's information to peer RxQ and try binding. */
779 cur.peer_q = rx_queue;
780 cur.qp_id = txq_ctrl->obj->sq->id;
781 cur.vhca_id = priv->config.hca_attr.vhca_id;
782 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
783 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
785 * In order to access another device in a proper way, RTE level
786 * private function is needed.
788 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
791 mlx5_txq_release(dev, i);
794 mlx5_txq_release(dev, i);
799 * Do roll-back process for the queues already bound.
800 * No need to check the return value of the queue unbind function.
803 /* No validation is needed here. */
804 txq_ctrl = mlx5_txq_get(dev, i);
805 if (txq_ctrl == NULL)
807 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
808 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
809 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
810 mlx5_txq_release(dev, i);
816 * Unbind the hairpin port pair, HW configuration of both devices will be clear
817 * and status will be reset for all the queues used between the them.
818 * This function only supports to unbind the Tx from one Rx.
821 * Pointer to Ethernet device structure.
823 * Port identifier of the Rx port.
826 * 0 on success, a negative errno value otherwise and rte_errno is set.
829 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
831 struct mlx5_priv *priv = dev->data->dev_private;
832 struct mlx5_txq_ctrl *txq_ctrl;
835 uint16_t cur_port = priv->dev_data->port_id;
837 if (mlx5_eth_find_next(rx_port, priv->pci_dev) != rx_port) {
839 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
842 for (i = 0; i != priv->txqs_n; i++) {
845 txq_ctrl = mlx5_txq_get(dev, i);
846 if (txq_ctrl == NULL)
848 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
849 mlx5_txq_release(dev, i);
852 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
853 mlx5_txq_release(dev, i);
856 /* Indeed, only the first used queue needs to be checked. */
857 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
858 if (cur_port != rx_port) {
860 DRV_LOG(ERR, "port %u and port %u are in"
861 " auto-bind mode", cur_port, rx_port);
862 mlx5_txq_release(dev, i);
868 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
869 mlx5_txq_release(dev, i);
870 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
872 DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
876 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
878 DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
887 * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
888 * @see mlx5_hairpin_bind_single_port()
891 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
895 struct mlx5_priv *priv = dev->data->dev_private;
898 * If the Rx port has no hairpin configuration with the current port,
899 * the binding will be skipped in the called function of single port.
900 * Device started status will be checked only before the queue
901 * information updating.
903 if (rx_port == RTE_MAX_ETHPORTS) {
904 MLX5_ETH_FOREACH_DEV(p, priv->pci_dev) {
905 ret = mlx5_hairpin_bind_single_port(dev, p);
911 return mlx5_hairpin_bind_single_port(dev, rx_port);
914 MLX5_ETH_FOREACH_DEV(pp, priv->pci_dev)
916 mlx5_hairpin_unbind_single_port(dev, pp);
921 * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
922 * @see mlx5_hairpin_unbind_single_port()
925 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
929 struct mlx5_priv *priv = dev->data->dev_private;
931 if (rx_port == RTE_MAX_ETHPORTS)
932 MLX5_ETH_FOREACH_DEV(p, priv->pci_dev) {
933 ret = mlx5_hairpin_unbind_single_port(dev, p);
938 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
943 * DPDK callback to get the hairpin peer ports list.
944 * This will return the actual number of peer ports and save the identifiers
945 * into the array (sorted, may be different from that when setting up the
946 * hairpin peer queues).
947 * The peer port ID could be the same as the port ID of the current device.
950 * Pointer to Ethernet device structure.
952 * Pointer to array to save the port identifiers.
954 * The length of the array.
956 * Current port to peer port direction.
957 * positive - current used as Tx to get all peer Rx ports.
958 * zero - current used as Rx to get all peer Tx ports.
961 * 0 or positive value on success, actual number of peer ports.
962 * a negative errno value otherwise and rte_errno is set.
965 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
966 size_t len, uint32_t direction)
968 struct mlx5_priv *priv = dev->data->dev_private;
969 struct mlx5_txq_ctrl *txq_ctrl;
970 struct mlx5_rxq_ctrl *rxq_ctrl;
973 uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
977 for (i = 0; i < priv->txqs_n; i++) {
978 txq_ctrl = mlx5_txq_get(dev, i);
981 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
982 mlx5_txq_release(dev, i);
985 pp = txq_ctrl->hairpin_conf.peers[0].port;
986 if (pp >= RTE_MAX_ETHPORTS) {
988 mlx5_txq_release(dev, i);
989 DRV_LOG(ERR, "port %hu queue %u peer port "
991 priv->dev_data->port_id, i, pp);
994 bits[pp / 32] |= 1 << (pp % 32);
995 mlx5_txq_release(dev, i);
998 for (i = 0; i < priv->rxqs_n; i++) {
999 rxq_ctrl = mlx5_rxq_get(dev, i);
1002 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
1003 mlx5_rxq_release(dev, i);
1006 pp = rxq_ctrl->hairpin_conf.peers[0].port;
1007 if (pp >= RTE_MAX_ETHPORTS) {
1009 mlx5_rxq_release(dev, i);
1010 DRV_LOG(ERR, "port %hu queue %u peer port "
1012 priv->dev_data->port_id, i, pp);
1015 bits[pp / 32] |= 1 << (pp % 32);
1016 mlx5_rxq_release(dev, i);
1019 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1020 if (bits[i / 32] & (1 << (i % 32))) {
1021 if ((size_t)ret >= len) {
1025 peer_ports[ret++] = i;
1032 * DPDK callback to start the device.
1034 * Simulate device start by attaching all configured flows.
1037 * Pointer to Ethernet device structure.
1040 * 0 on success, a negative errno value otherwise and rte_errno is set.
1043 mlx5_dev_start(struct rte_eth_dev *dev)
1045 struct mlx5_priv *priv = dev->data->dev_private;
1049 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1050 fine_inline = rte_mbuf_dynflag_lookup
1051 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1052 if (fine_inline >= 0)
1053 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1055 rte_net_mlx5_dynf_inline_mask = 0;
1056 if (dev->data->nb_rx_queues > 0) {
1057 ret = mlx5_dev_configure_rss_reta(dev);
1059 DRV_LOG(ERR, "port %u reta config failed: %s",
1060 dev->data->port_id, strerror(rte_errno));
1064 ret = mlx5_txpp_start(dev);
1066 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1067 dev->data->port_id, strerror(rte_errno));
1070 ret = mlx5_txq_start(dev);
1072 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1073 dev->data->port_id, strerror(rte_errno));
1076 ret = mlx5_rxq_start(dev);
1078 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1079 dev->data->port_id, strerror(rte_errno));
1083 * Such step will be skipped if there is no hairpin TX queue configured
1084 * with RX peer queue from the same device.
1086 ret = mlx5_hairpin_auto_bind(dev);
1088 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1089 dev->data->port_id, strerror(rte_errno));
1092 /* Set started flag here for the following steps like control flow. */
1093 dev->data->dev_started = 1;
1094 ret = mlx5_rx_intr_vec_enable(dev);
1096 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1097 dev->data->port_id);
1100 mlx5_os_stats_init(dev);
1101 ret = mlx5_traffic_enable(dev);
1103 DRV_LOG(ERR, "port %u failed to set defaults flows",
1104 dev->data->port_id);
1107 /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1108 mlx5_flow_rxq_dynf_metadata_set(dev);
1109 /* Set flags and context to convert Rx timestamps. */
1110 mlx5_rxq_timestamp_set(dev);
1111 /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1112 mlx5_txq_dynf_timestamp_set(dev);
1114 * In non-cached mode, it only needs to start the default mreg copy
1115 * action and no flow created by application exists anymore.
1116 * But it is worth wrapping the interface for further usage.
1118 ret = mlx5_flow_start_default(dev);
1120 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1121 dev->data->port_id, strerror(rte_errno));
1125 dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1126 dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1127 /* Enable datapath on secondary process. */
1128 mlx5_mp_os_req_start_rxtx(dev);
1129 if (priv->sh->intr_handle.fd >= 0) {
1130 priv->sh->port[priv->dev_port - 1].ih_port_id =
1131 (uint32_t)dev->data->port_id;
1133 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1134 dev->data->port_id);
1135 dev->data->dev_conf.intr_conf.lsc = 0;
1136 dev->data->dev_conf.intr_conf.rmv = 0;
1138 if (priv->sh->intr_handle_devx.fd >= 0)
1139 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1140 (uint32_t)dev->data->port_id;
1143 ret = rte_errno; /* Save rte_errno before cleanup. */
1145 dev->data->dev_started = 0;
1146 mlx5_flow_stop_default(dev);
1147 mlx5_traffic_disable(dev);
1150 mlx5_txpp_stop(dev); /* Stop last. */
1151 rte_errno = ret; /* Restore rte_errno. */
1156 * DPDK callback to stop the device.
1158 * Simulate device stop by detaching all configured flows.
1161 * Pointer to Ethernet device structure.
1164 mlx5_dev_stop(struct rte_eth_dev *dev)
1166 struct mlx5_priv *priv = dev->data->dev_private;
1168 dev->data->dev_started = 0;
1169 /* Prevent crashes when queues are still in use. */
1170 dev->rx_pkt_burst = removed_rx_burst;
1171 dev->tx_pkt_burst = removed_tx_burst;
1173 /* Disable datapath on secondary process. */
1174 mlx5_mp_os_req_stop_rxtx(dev);
1175 rte_delay_us_sleep(1000 * priv->rxqs_n);
1176 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1177 mlx5_flow_stop_default(dev);
1178 /* Control flows for default traffic can be removed firstly. */
1179 mlx5_traffic_disable(dev);
1180 /* All RX queue flags will be cleared in the flush interface. */
1181 mlx5_flow_list_flush(dev, &priv->flows, true);
1182 mlx5_rx_intr_vec_disable(dev);
1183 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1184 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1187 mlx5_txpp_stop(dev);
1193 * Enable traffic flows configured by control plane
1196 * Pointer to Ethernet device private data.
1198 * Pointer to Ethernet device structure.
1201 * 0 on success, a negative errno value otherwise and rte_errno is set.
1204 mlx5_traffic_enable(struct rte_eth_dev *dev)
1206 struct mlx5_priv *priv = dev->data->dev_private;
1207 struct rte_flow_item_eth bcast = {
1208 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1210 struct rte_flow_item_eth ipv6_multi_spec = {
1211 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1213 struct rte_flow_item_eth ipv6_multi_mask = {
1214 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1216 struct rte_flow_item_eth unicast = {
1217 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1219 struct rte_flow_item_eth unicast_mask = {
1220 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1222 const unsigned int vlan_filter_n = priv->vlan_filter_n;
1223 const struct rte_ether_addr cmp = {
1224 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1231 * Hairpin txq default flow should be created no matter if it is
1232 * isolation mode. Or else all the packets to be sent will be sent
1233 * out directly without the TX flow actions, e.g. encapsulation.
1235 for (i = 0; i != priv->txqs_n; ++i) {
1236 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1239 /* Only Tx implicit mode requires the default Tx flow. */
1240 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1241 txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1242 txq_ctrl->hairpin_conf.peers[0].port ==
1243 priv->dev_data->port_id) {
1244 ret = mlx5_ctrl_flow_source_queue(dev, i);
1246 mlx5_txq_release(dev, i);
1250 mlx5_txq_release(dev, i);
1252 if (priv->config.dv_esw_en && !priv->config.vf) {
1253 if (mlx5_flow_create_esw_table_zero_flow(dev))
1254 priv->fdb_def_rule = 1;
1256 DRV_LOG(INFO, "port %u FDB default rule cannot be"
1257 " configured - only Eswitch group 0 flows are"
1258 " supported.", dev->data->port_id);
1260 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1261 ret = mlx5_flow_lacp_miss(dev);
1263 DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1264 "forward LACP to kernel.", dev->data->port_id);
1266 DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1267 , dev->data->port_id);
1271 if (dev->data->promiscuous) {
1272 struct rte_flow_item_eth promisc = {
1273 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1274 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1278 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1282 if (dev->data->all_multicast) {
1283 struct rte_flow_item_eth multicast = {
1284 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1285 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1289 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1293 /* Add broadcast/multicast flows. */
1294 for (i = 0; i != vlan_filter_n; ++i) {
1295 uint16_t vlan = priv->vlan_filter[i];
1297 struct rte_flow_item_vlan vlan_spec = {
1298 .tci = rte_cpu_to_be_16(vlan),
1300 struct rte_flow_item_vlan vlan_mask =
1301 rte_flow_item_vlan_mask;
1303 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1304 &vlan_spec, &vlan_mask);
1307 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1309 &vlan_spec, &vlan_mask);
1313 if (!vlan_filter_n) {
1314 ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1317 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1320 /* Do not fail on IPv6 broadcast creation failure. */
1322 "IPv6 broadcast is not supported");
1327 /* Add MAC address flows. */
1328 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1329 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1331 if (!memcmp(mac, &cmp, sizeof(*mac)))
1333 memcpy(&unicast.dst.addr_bytes,
1335 RTE_ETHER_ADDR_LEN);
1336 for (j = 0; j != vlan_filter_n; ++j) {
1337 uint16_t vlan = priv->vlan_filter[j];
1339 struct rte_flow_item_vlan vlan_spec = {
1340 .tci = rte_cpu_to_be_16(vlan),
1342 struct rte_flow_item_vlan vlan_mask =
1343 rte_flow_item_vlan_mask;
1345 ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1352 if (!vlan_filter_n) {
1353 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1360 ret = rte_errno; /* Save rte_errno before cleanup. */
1361 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
1362 rte_errno = ret; /* Restore rte_errno. */
1368 * Disable traffic flows configured by control plane
1371 * Pointer to Ethernet device private data.
1374 mlx5_traffic_disable(struct rte_eth_dev *dev)
1376 struct mlx5_priv *priv = dev->data->dev_private;
1378 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
1382 * Restart traffic flows configured by control plane
1385 * Pointer to Ethernet device private data.
1388 * 0 on success, a negative errno value otherwise and rte_errno is set.
1391 mlx5_traffic_restart(struct rte_eth_dev *dev)
1393 if (dev->data->dev_started) {
1394 mlx5_traffic_disable(dev);
1395 return mlx5_traffic_enable(dev);