1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2015 6WIND S.A.
3 * Copyright 2015 Mellanox Technologies, Ltd
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
14 #include <mlx5_malloc.h>
19 #include "mlx5_utils.h"
20 #include "rte_pmd_mlx5.h"
23 * Stop traffic on Tx queues.
26 * Pointer to Ethernet device structure.
29 mlx5_txq_stop(struct rte_eth_dev *dev)
31 struct mlx5_priv *priv = dev->data->dev_private;
34 for (i = 0; i != priv->txqs_n; ++i)
35 mlx5_txq_release(dev, i);
39 * Start traffic on Tx queues.
42 * Pointer to Ethernet device structure.
45 * 0 on success, a negative errno value otherwise and rte_errno is set.
48 mlx5_txq_start(struct rte_eth_dev *dev)
50 struct mlx5_priv *priv = dev->data->dev_private;
54 for (i = 0; i != priv->txqs_n; ++i) {
55 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
56 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
57 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
61 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
62 txq_alloc_elts(txq_ctrl);
63 MLX5_ASSERT(!txq_ctrl->obj);
64 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
67 DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
68 "memory resources.", dev->data->port_id,
73 ret = priv->obj_ops.txq_obj_new(dev, i);
75 mlx5_free(txq_ctrl->obj);
79 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
80 size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
82 txq_data->fcqs = mlx5_malloc(flags, size,
85 if (!txq_data->fcqs) {
86 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
87 "allocate memory (FCQ).",
88 dev->data->port_id, i);
93 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
94 dev->data->port_id, i, (void *)&txq_ctrl->obj);
95 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
99 ret = rte_errno; /* Save rte_errno before cleanup. */
101 mlx5_txq_release(dev, i);
103 rte_errno = ret; /* Restore rte_errno. */
108 * Translate the chunk address to MR key in order to put in into the cache.
111 mlx5_rxq_mempool_register_cb(struct rte_mempool *mp, void *opaque,
112 struct rte_mempool_memhdr *memhdr,
115 struct mlx5_rxq_data *rxq = opaque;
119 mlx5_rx_addr2mr(rxq, (uintptr_t)memhdr->addr);
123 * Register Rx queue mempools and fill the Rx queue cache.
124 * This function tolerates repeated mempool registration.
126 * @param[in] rxq_ctrl
127 * Rx queue control data.
130 * 0 on success, (-1) on failure and rte_errno is set.
133 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
135 struct mlx5_priv *priv = rxq_ctrl->priv;
136 struct rte_mempool *mp;
140 mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
141 /* MPRQ mempool is registered on creation, just fill the cache. */
142 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
143 rte_mempool_mem_iter(rxq_ctrl->rxq.mprq_mp,
144 mlx5_rxq_mempool_register_cb,
148 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
149 mp = rxq_ctrl->rxq.rxseg[s].mp;
150 ret = mlx5_mr_mempool_register(&priv->sh->cdev->mr_scache,
151 priv->sh->cdev->pd, mp,
153 if (ret < 0 && rte_errno != EEXIST)
155 rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb,
162 * Stop traffic on Rx queues.
165 * Pointer to Ethernet device structure.
168 mlx5_rxq_stop(struct rte_eth_dev *dev)
170 struct mlx5_priv *priv = dev->data->dev_private;
173 for (i = 0; i != priv->rxqs_n; ++i)
174 mlx5_rxq_release(dev, i);
178 * Start traffic on Rx queues.
181 * Pointer to Ethernet device structure.
184 * 0 on success, a negative errno value otherwise and rte_errno is set.
187 mlx5_rxq_start(struct rte_eth_dev *dev)
189 struct mlx5_priv *priv = dev->data->dev_private;
193 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
194 if (mlx5_mprq_alloc_mp(dev)) {
195 /* Should not release Rx queues but return immediately. */
198 DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
199 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
200 DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
201 dev->data->port_id, priv->sh->device_attr.max_sge);
202 for (i = 0; i != priv->rxqs_n; ++i) {
203 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
207 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
209 * Pre-register the mempools. Regardless of whether
210 * the implicit registration is enabled or not,
211 * Rx mempool destruction is tracked to free MRs.
213 if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
215 ret = rxq_alloc_elts(rxq_ctrl);
219 MLX5_ASSERT(!rxq_ctrl->obj);
220 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
221 sizeof(*rxq_ctrl->obj), 0,
223 if (!rxq_ctrl->obj) {
225 "Port %u Rx queue %u can't allocate resources.",
226 dev->data->port_id, (*priv->rxqs)[i]->idx);
230 ret = priv->obj_ops.rxq_obj_new(dev, i);
232 mlx5_free(rxq_ctrl->obj);
233 rxq_ctrl->obj = NULL;
236 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
237 dev->data->port_id, i, (void *)&rxq_ctrl->obj);
238 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
242 ret = rte_errno; /* Save rte_errno before cleanup. */
244 mlx5_rxq_release(dev, i);
246 rte_errno = ret; /* Restore rte_errno. */
251 * Binds Tx queues to Rx queues for hairpin.
253 * Binds Tx queues to the target Rx queues.
256 * Pointer to Ethernet device structure.
259 * 0 on success, a negative errno value otherwise and rte_errno is set.
262 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
264 struct mlx5_priv *priv = dev->data->dev_private;
265 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
266 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
267 struct mlx5_txq_ctrl *txq_ctrl;
268 struct mlx5_rxq_ctrl *rxq_ctrl;
269 struct mlx5_devx_obj *sq;
270 struct mlx5_devx_obj *rq;
273 bool need_auto = false;
274 uint16_t self_port = dev->data->port_id;
276 for (i = 0; i != priv->txqs_n; ++i) {
277 txq_ctrl = mlx5_txq_get(dev, i);
280 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
281 txq_ctrl->hairpin_conf.peers[0].port != self_port) {
282 mlx5_txq_release(dev, i);
285 if (txq_ctrl->hairpin_conf.manual_bind) {
286 mlx5_txq_release(dev, i);
290 mlx5_txq_release(dev, i);
294 for (i = 0; i != priv->txqs_n; ++i) {
295 txq_ctrl = mlx5_txq_get(dev, i);
298 /* Skip hairpin queues with other peer ports. */
299 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
300 txq_ctrl->hairpin_conf.peers[0].port != self_port) {
301 mlx5_txq_release(dev, i);
304 if (!txq_ctrl->obj) {
306 DRV_LOG(ERR, "port %u no txq object found: %d",
307 dev->data->port_id, i);
308 mlx5_txq_release(dev, i);
311 sq = txq_ctrl->obj->sq;
312 rxq_ctrl = mlx5_rxq_get(dev,
313 txq_ctrl->hairpin_conf.peers[0].queue);
315 mlx5_txq_release(dev, i);
317 DRV_LOG(ERR, "port %u no rxq object found: %d",
319 txq_ctrl->hairpin_conf.peers[0].queue);
322 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
323 rxq_ctrl->hairpin_conf.peers[0].queue != i) {
325 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
326 "Rx queue %d", dev->data->port_id,
327 i, txq_ctrl->hairpin_conf.peers[0].queue);
330 rq = rxq_ctrl->obj->rq;
333 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
335 txq_ctrl->hairpin_conf.peers[0].queue);
338 sq_attr.state = MLX5_SQC_STATE_RDY;
339 sq_attr.sq_state = MLX5_SQC_STATE_RST;
340 sq_attr.hairpin_peer_rq = rq->id;
341 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
342 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
345 rq_attr.state = MLX5_SQC_STATE_RDY;
346 rq_attr.rq_state = MLX5_SQC_STATE_RST;
347 rq_attr.hairpin_peer_sq = sq->id;
348 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
349 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
352 /* Qs with auto-bind will be destroyed directly. */
353 rxq_ctrl->hairpin_status = 1;
354 txq_ctrl->hairpin_status = 1;
355 mlx5_txq_release(dev, i);
356 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
360 mlx5_txq_release(dev, i);
361 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
366 * Fetch the peer queue's SW & HW information.
369 * Pointer to Ethernet device structure.
371 * Index of the queue to fetch the information.
372 * @param current_info
373 * Pointer to the input peer information, not used currently.
375 * Pointer to the structure to store the information, output.
377 * Positive to get the RxQ information, zero to get the TxQ information.
380 * 0 on success, a negative errno value otherwise and rte_errno is set.
383 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
384 struct rte_hairpin_peer_info *current_info,
385 struct rte_hairpin_peer_info *peer_info,
388 struct mlx5_priv *priv = dev->data->dev_private;
389 RTE_SET_USED(current_info);
391 if (dev->data->dev_started == 0) {
393 DRV_LOG(ERR, "peer port %u is not started",
398 * Peer port used as egress. In the current design, hairpin Tx queue
399 * will be bound to the peer Rx queue. Indeed, only the information of
400 * peer Rx queue needs to be fetched.
402 if (direction == 0) {
403 struct mlx5_txq_ctrl *txq_ctrl;
405 txq_ctrl = mlx5_txq_get(dev, peer_queue);
406 if (txq_ctrl == NULL) {
408 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
409 dev->data->port_id, peer_queue);
412 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
414 DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
415 dev->data->port_id, peer_queue);
416 mlx5_txq_release(dev, peer_queue);
419 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
421 DRV_LOG(ERR, "port %u no Txq object found: %d",
422 dev->data->port_id, peer_queue);
423 mlx5_txq_release(dev, peer_queue);
426 peer_info->qp_id = txq_ctrl->obj->sq->id;
427 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
428 /* 1-to-1 mapping, only the first one is used. */
429 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
430 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
431 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
432 mlx5_txq_release(dev, peer_queue);
433 } else { /* Peer port used as ingress. */
434 struct mlx5_rxq_ctrl *rxq_ctrl;
436 rxq_ctrl = mlx5_rxq_get(dev, peer_queue);
437 if (rxq_ctrl == NULL) {
439 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
440 dev->data->port_id, peer_queue);
443 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
445 DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
446 dev->data->port_id, peer_queue);
447 mlx5_rxq_release(dev, peer_queue);
450 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
452 DRV_LOG(ERR, "port %u no Rxq object found: %d",
453 dev->data->port_id, peer_queue);
454 mlx5_rxq_release(dev, peer_queue);
457 peer_info->qp_id = rxq_ctrl->obj->rq->id;
458 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
459 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
460 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
461 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
462 mlx5_rxq_release(dev, peer_queue);
468 * Bind the hairpin queue with the peer HW information.
469 * This needs to be called twice both for Tx and Rx queues of a pair.
470 * If the queue is already bound, it is considered successful.
473 * Pointer to Ethernet device structure.
475 * Index of the queue to change the HW configuration to bind.
477 * Pointer to information of the peer queue.
479 * Positive to configure the TxQ, zero to configure the RxQ.
482 * 0 on success, a negative errno value otherwise and rte_errno is set.
485 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
486 struct rte_hairpin_peer_info *peer_info,
492 * Consistency checking of the peer queue: opposite direction is used
493 * to get the peer queue info with ethdev port ID, no need to check.
495 if (peer_info->peer_q != cur_queue) {
497 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
498 dev->data->port_id, cur_queue, peer_info->peer_q);
501 if (direction != 0) {
502 struct mlx5_txq_ctrl *txq_ctrl;
503 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
505 txq_ctrl = mlx5_txq_get(dev, cur_queue);
506 if (txq_ctrl == NULL) {
508 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
509 dev->data->port_id, cur_queue);
512 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
514 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
515 dev->data->port_id, cur_queue);
516 mlx5_txq_release(dev, cur_queue);
519 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
521 DRV_LOG(ERR, "port %u no Txq object found: %d",
522 dev->data->port_id, cur_queue);
523 mlx5_txq_release(dev, cur_queue);
526 if (txq_ctrl->hairpin_status != 0) {
527 DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
528 dev->data->port_id, cur_queue);
529 mlx5_txq_release(dev, cur_queue);
533 * All queues' of one port consistency checking is done in the
534 * bind() function, and that is optional.
536 if (peer_info->tx_explicit !=
537 txq_ctrl->hairpin_conf.tx_explicit) {
539 DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
540 " mismatch", dev->data->port_id, cur_queue);
541 mlx5_txq_release(dev, cur_queue);
544 if (peer_info->manual_bind !=
545 txq_ctrl->hairpin_conf.manual_bind) {
547 DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
548 " mismatch", dev->data->port_id, cur_queue);
549 mlx5_txq_release(dev, cur_queue);
552 sq_attr.state = MLX5_SQC_STATE_RDY;
553 sq_attr.sq_state = MLX5_SQC_STATE_RST;
554 sq_attr.hairpin_peer_rq = peer_info->qp_id;
555 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
556 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
558 txq_ctrl->hairpin_status = 1;
559 mlx5_txq_release(dev, cur_queue);
561 struct mlx5_rxq_ctrl *rxq_ctrl;
562 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
564 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
565 if (rxq_ctrl == NULL) {
567 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
568 dev->data->port_id, cur_queue);
571 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
573 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
574 dev->data->port_id, cur_queue);
575 mlx5_rxq_release(dev, cur_queue);
578 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
580 DRV_LOG(ERR, "port %u no Rxq object found: %d",
581 dev->data->port_id, cur_queue);
582 mlx5_rxq_release(dev, cur_queue);
585 if (rxq_ctrl->hairpin_status != 0) {
586 DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
587 dev->data->port_id, cur_queue);
588 mlx5_rxq_release(dev, cur_queue);
591 if (peer_info->tx_explicit !=
592 rxq_ctrl->hairpin_conf.tx_explicit) {
594 DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
595 " mismatch", dev->data->port_id, cur_queue);
596 mlx5_rxq_release(dev, cur_queue);
599 if (peer_info->manual_bind !=
600 rxq_ctrl->hairpin_conf.manual_bind) {
602 DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
603 " mismatch", dev->data->port_id, cur_queue);
604 mlx5_rxq_release(dev, cur_queue);
607 rq_attr.state = MLX5_SQC_STATE_RDY;
608 rq_attr.rq_state = MLX5_SQC_STATE_RST;
609 rq_attr.hairpin_peer_sq = peer_info->qp_id;
610 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
611 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
613 rxq_ctrl->hairpin_status = 1;
614 mlx5_rxq_release(dev, cur_queue);
620 * Unbind the hairpin queue and reset its HW configuration.
621 * This needs to be called twice both for Tx and Rx queues of a pair.
622 * If the queue is already unbound, it is considered successful.
625 * Pointer to Ethernet device structure.
627 * Index of the queue to change the HW configuration to unbind.
629 * Positive to reset the TxQ, zero to reset the RxQ.
632 * 0 on success, a negative errno value otherwise and rte_errno is set.
635 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
640 if (direction != 0) {
641 struct mlx5_txq_ctrl *txq_ctrl;
642 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
644 txq_ctrl = mlx5_txq_get(dev, cur_queue);
645 if (txq_ctrl == NULL) {
647 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
648 dev->data->port_id, cur_queue);
651 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
653 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
654 dev->data->port_id, cur_queue);
655 mlx5_txq_release(dev, cur_queue);
658 /* Already unbound, return success before obj checking. */
659 if (txq_ctrl->hairpin_status == 0) {
660 DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
661 dev->data->port_id, cur_queue);
662 mlx5_txq_release(dev, cur_queue);
665 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
667 DRV_LOG(ERR, "port %u no Txq object found: %d",
668 dev->data->port_id, cur_queue);
669 mlx5_txq_release(dev, cur_queue);
672 sq_attr.state = MLX5_SQC_STATE_RST;
673 sq_attr.sq_state = MLX5_SQC_STATE_RST;
674 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
676 txq_ctrl->hairpin_status = 0;
677 mlx5_txq_release(dev, cur_queue);
679 struct mlx5_rxq_ctrl *rxq_ctrl;
680 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
682 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
683 if (rxq_ctrl == NULL) {
685 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
686 dev->data->port_id, cur_queue);
689 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
691 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
692 dev->data->port_id, cur_queue);
693 mlx5_rxq_release(dev, cur_queue);
696 if (rxq_ctrl->hairpin_status == 0) {
697 DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
698 dev->data->port_id, cur_queue);
699 mlx5_rxq_release(dev, cur_queue);
702 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
704 DRV_LOG(ERR, "port %u no Rxq object found: %d",
705 dev->data->port_id, cur_queue);
706 mlx5_rxq_release(dev, cur_queue);
709 rq_attr.state = MLX5_SQC_STATE_RST;
710 rq_attr.rq_state = MLX5_SQC_STATE_RST;
711 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
713 rxq_ctrl->hairpin_status = 0;
714 mlx5_rxq_release(dev, cur_queue);
720 * Bind the hairpin port pairs, from the Tx to the peer Rx.
721 * This function only supports to bind the Tx to one Rx.
724 * Pointer to Ethernet device structure.
726 * Port identifier of the Rx port.
729 * 0 on success, a negative errno value otherwise and rte_errno is set.
732 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
734 struct mlx5_priv *priv = dev->data->dev_private;
736 struct mlx5_txq_ctrl *txq_ctrl;
738 struct rte_hairpin_peer_info peer = {0xffffff};
739 struct rte_hairpin_peer_info cur;
740 const struct rte_eth_hairpin_conf *conf;
742 uint16_t local_port = priv->dev_data->port_id;
747 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
749 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
753 * Before binding TxQ to peer RxQ, first round loop will be used for
754 * checking the queues' configuration consistency. This would be a
755 * little time consuming but better than doing the rollback.
757 for (i = 0; i != priv->txqs_n; i++) {
758 txq_ctrl = mlx5_txq_get(dev, i);
759 if (txq_ctrl == NULL)
761 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
762 mlx5_txq_release(dev, i);
766 * All hairpin Tx queues of a single port that connected to the
767 * same peer Rx port should have the same "auto binding" and
768 * "implicit Tx flow" modes.
769 * Peer consistency checking will be done in per queue binding.
771 conf = &txq_ctrl->hairpin_conf;
772 if (conf->peers[0].port == rx_port) {
774 manual = conf->manual_bind;
775 explicit = conf->tx_explicit;
777 if (manual != conf->manual_bind ||
778 explicit != conf->tx_explicit) {
780 DRV_LOG(ERR, "port %u queue %d mode"
781 " mismatch: %u %u, %u %u",
782 local_port, i, manual,
783 conf->manual_bind, explicit,
785 mlx5_txq_release(dev, i);
791 mlx5_txq_release(dev, i);
793 /* Once no queue is configured, success is returned directly. */
796 /* All the hairpin TX queues need to be traversed again. */
797 for (i = 0; i != priv->txqs_n; i++) {
798 txq_ctrl = mlx5_txq_get(dev, i);
799 if (txq_ctrl == NULL)
801 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
802 mlx5_txq_release(dev, i);
805 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
806 mlx5_txq_release(dev, i);
809 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
811 * Fetch peer RxQ's information.
812 * No need to pass the information of the current queue.
814 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
817 mlx5_txq_release(dev, i);
820 /* Accessing its own device, inside mlx5 PMD. */
821 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
823 mlx5_txq_release(dev, i);
826 /* Pass TxQ's information to peer RxQ and try binding. */
827 cur.peer_q = rx_queue;
828 cur.qp_id = txq_ctrl->obj->sq->id;
829 cur.vhca_id = priv->config.hca_attr.vhca_id;
830 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
831 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
833 * In order to access another device in a proper way, RTE level
834 * private function is needed.
836 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
839 mlx5_txq_release(dev, i);
842 mlx5_txq_release(dev, i);
847 * Do roll-back process for the queues already bound.
848 * No need to check the return value of the queue unbind function.
851 /* No validation is needed here. */
852 txq_ctrl = mlx5_txq_get(dev, i);
853 if (txq_ctrl == NULL)
855 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
856 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
857 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
858 mlx5_txq_release(dev, i);
864 * Unbind the hairpin port pair, HW configuration of both devices will be clear
865 * and status will be reset for all the queues used between the them.
866 * This function only supports to unbind the Tx from one Rx.
869 * Pointer to Ethernet device structure.
871 * Port identifier of the Rx port.
874 * 0 on success, a negative errno value otherwise and rte_errno is set.
877 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
879 struct mlx5_priv *priv = dev->data->dev_private;
880 struct mlx5_txq_ctrl *txq_ctrl;
883 uint16_t cur_port = priv->dev_data->port_id;
885 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
887 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
890 for (i = 0; i != priv->txqs_n; i++) {
893 txq_ctrl = mlx5_txq_get(dev, i);
894 if (txq_ctrl == NULL)
896 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
897 mlx5_txq_release(dev, i);
900 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
901 mlx5_txq_release(dev, i);
904 /* Indeed, only the first used queue needs to be checked. */
905 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
906 if (cur_port != rx_port) {
908 DRV_LOG(ERR, "port %u and port %u are in"
909 " auto-bind mode", cur_port, rx_port);
910 mlx5_txq_release(dev, i);
916 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
917 mlx5_txq_release(dev, i);
918 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
920 DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
924 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
926 DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
935 * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
936 * @see mlx5_hairpin_bind_single_port()
939 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
945 * If the Rx port has no hairpin configuration with the current port,
946 * the binding will be skipped in the called function of single port.
947 * Device started status will be checked only before the queue
948 * information updating.
950 if (rx_port == RTE_MAX_ETHPORTS) {
951 MLX5_ETH_FOREACH_DEV(p, dev->device) {
952 ret = mlx5_hairpin_bind_single_port(dev, p);
958 return mlx5_hairpin_bind_single_port(dev, rx_port);
961 MLX5_ETH_FOREACH_DEV(pp, dev->device)
963 mlx5_hairpin_unbind_single_port(dev, pp);
968 * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
969 * @see mlx5_hairpin_unbind_single_port()
972 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
977 if (rx_port == RTE_MAX_ETHPORTS)
978 MLX5_ETH_FOREACH_DEV(p, dev->device) {
979 ret = mlx5_hairpin_unbind_single_port(dev, p);
984 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
989 * DPDK callback to get the hairpin peer ports list.
990 * This will return the actual number of peer ports and save the identifiers
991 * into the array (sorted, may be different from that when setting up the
992 * hairpin peer queues).
993 * The peer port ID could be the same as the port ID of the current device.
996 * Pointer to Ethernet device structure.
998 * Pointer to array to save the port identifiers.
1000 * The length of the array.
1002 * Current port to peer port direction.
1003 * positive - current used as Tx to get all peer Rx ports.
1004 * zero - current used as Rx to get all peer Tx ports.
1007 * 0 or positive value on success, actual number of peer ports.
1008 * a negative errno value otherwise and rte_errno is set.
1011 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1012 size_t len, uint32_t direction)
1014 struct mlx5_priv *priv = dev->data->dev_private;
1015 struct mlx5_txq_ctrl *txq_ctrl;
1016 struct mlx5_rxq_ctrl *rxq_ctrl;
1019 uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1023 for (i = 0; i < priv->txqs_n; i++) {
1024 txq_ctrl = mlx5_txq_get(dev, i);
1027 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
1028 mlx5_txq_release(dev, i);
1031 pp = txq_ctrl->hairpin_conf.peers[0].port;
1032 if (pp >= RTE_MAX_ETHPORTS) {
1034 mlx5_txq_release(dev, i);
1035 DRV_LOG(ERR, "port %hu queue %u peer port "
1037 priv->dev_data->port_id, i, pp);
1040 bits[pp / 32] |= 1 << (pp % 32);
1041 mlx5_txq_release(dev, i);
1044 for (i = 0; i < priv->rxqs_n; i++) {
1045 rxq_ctrl = mlx5_rxq_get(dev, i);
1048 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
1049 mlx5_rxq_release(dev, i);
1052 pp = rxq_ctrl->hairpin_conf.peers[0].port;
1053 if (pp >= RTE_MAX_ETHPORTS) {
1055 mlx5_rxq_release(dev, i);
1056 DRV_LOG(ERR, "port %hu queue %u peer port "
1058 priv->dev_data->port_id, i, pp);
1061 bits[pp / 32] |= 1 << (pp % 32);
1062 mlx5_rxq_release(dev, i);
1065 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1066 if (bits[i / 32] & (1 << (i % 32))) {
1067 if ((size_t)ret >= len) {
1071 peer_ports[ret++] = i;
1078 * DPDK callback to start the device.
1080 * Simulate device start by attaching all configured flows.
1083 * Pointer to Ethernet device structure.
1086 * 0 on success, a negative errno value otherwise and rte_errno is set.
1089 mlx5_dev_start(struct rte_eth_dev *dev)
1091 struct mlx5_priv *priv = dev->data->dev_private;
1095 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1096 fine_inline = rte_mbuf_dynflag_lookup
1097 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1098 if (fine_inline >= 0)
1099 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1101 rte_net_mlx5_dynf_inline_mask = 0;
1102 if (dev->data->nb_rx_queues > 0) {
1103 ret = mlx5_dev_configure_rss_reta(dev);
1105 DRV_LOG(ERR, "port %u reta config failed: %s",
1106 dev->data->port_id, strerror(rte_errno));
1110 ret = mlx5_txpp_start(dev);
1112 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1113 dev->data->port_id, strerror(rte_errno));
1116 if ((priv->sh->devx && priv->config.dv_flow_en &&
1117 priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1118 ret = priv->obj_ops.lb_dummy_queue_create(dev);
1122 ret = mlx5_txq_start(dev);
1124 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1125 dev->data->port_id, strerror(rte_errno));
1128 ret = mlx5_rxq_start(dev);
1130 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1131 dev->data->port_id, strerror(rte_errno));
1135 * Such step will be skipped if there is no hairpin TX queue configured
1136 * with RX peer queue from the same device.
1138 ret = mlx5_hairpin_auto_bind(dev);
1140 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1141 dev->data->port_id, strerror(rte_errno));
1144 /* Set started flag here for the following steps like control flow. */
1145 dev->data->dev_started = 1;
1146 ret = mlx5_rx_intr_vec_enable(dev);
1148 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1149 dev->data->port_id);
1152 mlx5_os_stats_init(dev);
1153 ret = mlx5_traffic_enable(dev);
1155 DRV_LOG(ERR, "port %u failed to set defaults flows",
1156 dev->data->port_id);
1159 /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1160 mlx5_flow_rxq_dynf_metadata_set(dev);
1161 /* Set flags and context to convert Rx timestamps. */
1162 mlx5_rxq_timestamp_set(dev);
1163 /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1164 mlx5_txq_dynf_timestamp_set(dev);
1166 * In non-cached mode, it only needs to start the default mreg copy
1167 * action and no flow created by application exists anymore.
1168 * But it is worth wrapping the interface for further usage.
1170 ret = mlx5_flow_start_default(dev);
1172 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1173 dev->data->port_id, strerror(rte_errno));
1176 if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1177 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1178 dev->data->port_id, rte_strerror(rte_errno));
1182 dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1183 dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1184 /* Enable datapath on secondary process. */
1185 mlx5_mp_os_req_start_rxtx(dev);
1186 if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) {
1187 priv->sh->port[priv->dev_port - 1].ih_port_id =
1188 (uint32_t)dev->data->port_id;
1190 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1191 dev->data->port_id);
1192 dev->data->dev_conf.intr_conf.lsc = 0;
1193 dev->data->dev_conf.intr_conf.rmv = 0;
1195 if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
1196 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1197 (uint32_t)dev->data->port_id;
1200 ret = rte_errno; /* Save rte_errno before cleanup. */
1202 dev->data->dev_started = 0;
1203 mlx5_flow_stop_default(dev);
1204 mlx5_traffic_disable(dev);
1207 if (priv->obj_ops.lb_dummy_queue_release)
1208 priv->obj_ops.lb_dummy_queue_release(dev);
1209 mlx5_txpp_stop(dev); /* Stop last. */
1210 rte_errno = ret; /* Restore rte_errno. */
1215 * DPDK callback to stop the device.
1217 * Simulate device stop by detaching all configured flows.
1220 * Pointer to Ethernet device structure.
1223 mlx5_dev_stop(struct rte_eth_dev *dev)
1225 struct mlx5_priv *priv = dev->data->dev_private;
1227 dev->data->dev_started = 0;
1228 /* Prevent crashes when queues are still in use. */
1229 dev->rx_pkt_burst = removed_rx_burst;
1230 dev->tx_pkt_burst = removed_tx_burst;
1232 /* Disable datapath on secondary process. */
1233 mlx5_mp_os_req_stop_rxtx(dev);
1234 rte_delay_us_sleep(1000 * priv->rxqs_n);
1235 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1236 mlx5_flow_stop_default(dev);
1237 /* Control flows for default traffic can be removed firstly. */
1238 mlx5_traffic_disable(dev);
1239 /* All RX queue flags will be cleared in the flush interface. */
1240 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1241 mlx5_flow_meter_rxq_flush(dev);
1242 mlx5_rx_intr_vec_disable(dev);
1243 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1244 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1247 if (priv->obj_ops.lb_dummy_queue_release)
1248 priv->obj_ops.lb_dummy_queue_release(dev);
1249 mlx5_txpp_stop(dev);
1255 * Enable traffic flows configured by control plane
1258 * Pointer to Ethernet device private data.
1260 * Pointer to Ethernet device structure.
1263 * 0 on success, a negative errno value otherwise and rte_errno is set.
1266 mlx5_traffic_enable(struct rte_eth_dev *dev)
1268 struct mlx5_priv *priv = dev->data->dev_private;
1269 struct rte_flow_item_eth bcast = {
1270 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1272 struct rte_flow_item_eth ipv6_multi_spec = {
1273 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1275 struct rte_flow_item_eth ipv6_multi_mask = {
1276 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1278 struct rte_flow_item_eth unicast = {
1279 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1281 struct rte_flow_item_eth unicast_mask = {
1282 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1284 const unsigned int vlan_filter_n = priv->vlan_filter_n;
1285 const struct rte_ether_addr cmp = {
1286 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1293 * Hairpin txq default flow should be created no matter if it is
1294 * isolation mode. Or else all the packets to be sent will be sent
1295 * out directly without the TX flow actions, e.g. encapsulation.
1297 for (i = 0; i != priv->txqs_n; ++i) {
1298 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1301 /* Only Tx implicit mode requires the default Tx flow. */
1302 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1303 txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1304 txq_ctrl->hairpin_conf.peers[0].port ==
1305 priv->dev_data->port_id) {
1306 ret = mlx5_ctrl_flow_source_queue(dev, i);
1308 mlx5_txq_release(dev, i);
1312 if ((priv->representor || priv->master) &&
1313 priv->config.dv_esw_en) {
1314 if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) {
1316 "Port %u Tx queue %u SQ create representor devx default miss rule failed.",
1317 dev->data->port_id, i);
1321 mlx5_txq_release(dev, i);
1323 if ((priv->master || priv->representor) && priv->config.dv_esw_en) {
1324 if (mlx5_flow_create_esw_table_zero_flow(dev))
1325 priv->fdb_def_rule = 1;
1327 DRV_LOG(INFO, "port %u FDB default rule cannot be"
1328 " configured - only Eswitch group 0 flows are"
1329 " supported.", dev->data->port_id);
1331 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1332 ret = mlx5_flow_lacp_miss(dev);
1334 DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1335 "forward LACP to kernel.", dev->data->port_id);
1337 DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1338 , dev->data->port_id);
1342 if (dev->data->promiscuous) {
1343 struct rte_flow_item_eth promisc = {
1344 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1345 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1349 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1353 if (dev->data->all_multicast) {
1354 struct rte_flow_item_eth multicast = {
1355 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1356 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1360 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1364 /* Add broadcast/multicast flows. */
1365 for (i = 0; i != vlan_filter_n; ++i) {
1366 uint16_t vlan = priv->vlan_filter[i];
1368 struct rte_flow_item_vlan vlan_spec = {
1369 .tci = rte_cpu_to_be_16(vlan),
1371 struct rte_flow_item_vlan vlan_mask =
1372 rte_flow_item_vlan_mask;
1374 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1375 &vlan_spec, &vlan_mask);
1378 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1380 &vlan_spec, &vlan_mask);
1384 if (!vlan_filter_n) {
1385 ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1388 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1391 /* Do not fail on IPv6 broadcast creation failure. */
1393 "IPv6 broadcast is not supported");
1398 /* Add MAC address flows. */
1399 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1400 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1402 if (!memcmp(mac, &cmp, sizeof(*mac)))
1404 memcpy(&unicast.dst.addr_bytes,
1406 RTE_ETHER_ADDR_LEN);
1407 for (j = 0; j != vlan_filter_n; ++j) {
1408 uint16_t vlan = priv->vlan_filter[j];
1410 struct rte_flow_item_vlan vlan_spec = {
1411 .tci = rte_cpu_to_be_16(vlan),
1413 struct rte_flow_item_vlan vlan_mask =
1414 rte_flow_item_vlan_mask;
1416 ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1423 if (!vlan_filter_n) {
1424 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1431 ret = rte_errno; /* Save rte_errno before cleanup. */
1432 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1433 rte_errno = ret; /* Restore rte_errno. */
1439 * Disable traffic flows configured by control plane
1442 * Pointer to Ethernet device private data.
1445 mlx5_traffic_disable(struct rte_eth_dev *dev)
1447 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1451 * Restart traffic flows configured by control plane
1454 * Pointer to Ethernet device private data.
1457 * 0 on success, a negative errno value otherwise and rte_errno is set.
1460 mlx5_traffic_restart(struct rte_eth_dev *dev)
1462 if (dev->data->dev_started) {
1463 mlx5_traffic_disable(dev);
1464 return mlx5_traffic_enable(dev);