1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2015 6WIND S.A.
3 * Copyright 2015 Mellanox Technologies, Ltd
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
14 #include <mlx5_malloc.h>
17 #include "mlx5_flow.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
24 * Stop traffic on Tx queues.
27 * Pointer to Ethernet device structure.
30 mlx5_txq_stop(struct rte_eth_dev *dev)
32 struct mlx5_priv *priv = dev->data->dev_private;
35 for (i = 0; i != priv->txqs_n; ++i)
36 mlx5_txq_release(dev, i);
40 * Start traffic on Tx queues.
43 * Pointer to Ethernet device structure.
46 * 0 on success, a negative errno value otherwise and rte_errno is set.
49 mlx5_txq_start(struct rte_eth_dev *dev)
51 struct mlx5_priv *priv = dev->data->dev_private;
55 for (i = 0; i != priv->txqs_n; ++i) {
56 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
62 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
63 txq_alloc_elts(txq_ctrl);
64 MLX5_ASSERT(!txq_ctrl->obj);
65 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
68 DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69 "memory resources.", dev->data->port_id,
74 ret = priv->obj_ops.txq_obj_new(dev, i);
76 mlx5_free(txq_ctrl->obj);
80 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
81 size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
83 txq_data->fcqs = mlx5_malloc(flags, size,
86 if (!txq_data->fcqs) {
87 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88 "allocate memory (FCQ).",
89 dev->data->port_id, i);
94 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95 dev->data->port_id, i, (void *)&txq_ctrl->obj);
96 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
100 ret = rte_errno; /* Save rte_errno before cleanup. */
102 mlx5_txq_release(dev, i);
104 rte_errno = ret; /* Restore rte_errno. */
109 * Translate the chunk address to MR key in order to put in into the cache.
112 mlx5_rxq_mempool_register_cb(struct rte_mempool *mp, void *opaque,
113 struct rte_mempool_memhdr *memhdr,
116 struct mlx5_rxq_data *rxq = opaque;
120 mlx5_rx_addr2mr(rxq, (uintptr_t)memhdr->addr);
124 * Register Rx queue mempools and fill the Rx queue cache.
125 * This function tolerates repeated mempool registration.
127 * @param[in] rxq_ctrl
128 * Rx queue control data.
131 * 0 on success, (-1) on failure and rte_errno is set.
134 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
136 struct rte_mempool *mp;
140 mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
141 /* MPRQ mempool is registered on creation, just fill the cache. */
142 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
143 rte_mempool_mem_iter(rxq_ctrl->rxq.mprq_mp,
144 mlx5_rxq_mempool_register_cb,
148 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
151 mp = rxq_ctrl->rxq.rxseg[s].mp;
152 flags = mp != rxq_ctrl->rxq.mprq_mp ?
153 rte_pktmbuf_priv_flags(mp) : 0;
154 ret = mlx5_mr_mempool_register(rxq_ctrl->sh->cdev, mp);
155 if (ret < 0 && rte_errno != EEXIST)
157 if ((flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) == 0)
158 rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb,
165 * Stop traffic on Rx queues.
168 * Pointer to Ethernet device structure.
171 mlx5_rxq_stop(struct rte_eth_dev *dev)
173 struct mlx5_priv *priv = dev->data->dev_private;
176 for (i = 0; i != priv->rxqs_n; ++i)
177 mlx5_rxq_release(dev, i);
181 mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
186 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
188 * Pre-register the mempools. Regardless of whether
189 * the implicit registration is enabled or not,
190 * Rx mempool destruction is tracked to free MRs.
192 if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
194 ret = rxq_alloc_elts(rxq_ctrl);
198 MLX5_ASSERT(!rxq_ctrl->obj);
199 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
200 sizeof(*rxq_ctrl->obj), 0,
202 if (!rxq_ctrl->obj) {
203 DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.",
204 dev->data->port_id, idx);
208 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", dev->data->port_id,
209 idx, (void *)&rxq_ctrl->obj);
214 * Start traffic on Rx queues.
217 * Pointer to Ethernet device structure.
220 * 0 on success, a negative errno value otherwise and rte_errno is set.
223 mlx5_rxq_start(struct rte_eth_dev *dev)
225 struct mlx5_priv *priv = dev->data->dev_private;
229 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
230 if (mlx5_mprq_alloc_mp(dev)) {
231 /* Should not release Rx queues but return immediately. */
234 DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
235 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
236 DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
237 dev->data->port_id, priv->sh->device_attr.max_sge);
238 for (i = 0; i != priv->rxqs_n; ++i) {
239 struct mlx5_rxq_priv *rxq = mlx5_rxq_ref(dev, i);
240 struct mlx5_rxq_ctrl *rxq_ctrl;
244 rxq_ctrl = rxq->ctrl;
245 if (!rxq_ctrl->started) {
246 if (mlx5_rxq_ctrl_prepare(dev, rxq_ctrl, i) < 0)
248 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
250 ret = priv->obj_ops.rxq_obj_new(rxq);
252 mlx5_free(rxq_ctrl->obj);
253 rxq_ctrl->obj = NULL;
256 rxq_ctrl->started = true;
260 ret = rte_errno; /* Save rte_errno before cleanup. */
262 mlx5_rxq_release(dev, i);
264 rte_errno = ret; /* Restore rte_errno. */
269 * Binds Tx queues to Rx queues for hairpin.
271 * Binds Tx queues to the target Rx queues.
274 * Pointer to Ethernet device structure.
277 * 0 on success, a negative errno value otherwise and rte_errno is set.
280 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
282 struct mlx5_priv *priv = dev->data->dev_private;
283 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
284 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
285 struct mlx5_txq_ctrl *txq_ctrl;
286 struct mlx5_rxq_priv *rxq;
287 struct mlx5_rxq_ctrl *rxq_ctrl;
288 struct mlx5_devx_obj *sq;
289 struct mlx5_devx_obj *rq;
292 bool need_auto = false;
293 uint16_t self_port = dev->data->port_id;
295 for (i = 0; i != priv->txqs_n; ++i) {
296 txq_ctrl = mlx5_txq_get(dev, i);
299 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
300 txq_ctrl->hairpin_conf.peers[0].port != self_port) {
301 mlx5_txq_release(dev, i);
304 if (txq_ctrl->hairpin_conf.manual_bind) {
305 mlx5_txq_release(dev, i);
309 mlx5_txq_release(dev, i);
313 for (i = 0; i != priv->txqs_n; ++i) {
314 txq_ctrl = mlx5_txq_get(dev, i);
317 /* Skip hairpin queues with other peer ports. */
318 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
319 txq_ctrl->hairpin_conf.peers[0].port != self_port) {
320 mlx5_txq_release(dev, i);
323 if (!txq_ctrl->obj) {
325 DRV_LOG(ERR, "port %u no txq object found: %d",
326 dev->data->port_id, i);
327 mlx5_txq_release(dev, i);
330 sq = txq_ctrl->obj->sq;
331 rxq = mlx5_rxq_get(dev, txq_ctrl->hairpin_conf.peers[0].queue);
333 mlx5_txq_release(dev, i);
335 DRV_LOG(ERR, "port %u no rxq object found: %d",
337 txq_ctrl->hairpin_conf.peers[0].queue);
340 rxq_ctrl = rxq->ctrl;
341 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
342 rxq->hairpin_conf.peers[0].queue != i) {
344 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
345 "Rx queue %d", dev->data->port_id,
346 i, txq_ctrl->hairpin_conf.peers[0].queue);
349 rq = rxq_ctrl->obj->rq;
352 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
354 txq_ctrl->hairpin_conf.peers[0].queue);
357 sq_attr.state = MLX5_SQC_STATE_RDY;
358 sq_attr.sq_state = MLX5_SQC_STATE_RST;
359 sq_attr.hairpin_peer_rq = rq->id;
360 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
361 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
364 rq_attr.state = MLX5_SQC_STATE_RDY;
365 rq_attr.rq_state = MLX5_SQC_STATE_RST;
366 rq_attr.hairpin_peer_sq = sq->id;
367 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
368 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
371 /* Qs with auto-bind will be destroyed directly. */
372 rxq->hairpin_status = 1;
373 txq_ctrl->hairpin_status = 1;
374 mlx5_txq_release(dev, i);
378 mlx5_txq_release(dev, i);
383 * Fetch the peer queue's SW & HW information.
386 * Pointer to Ethernet device structure.
388 * Index of the queue to fetch the information.
389 * @param current_info
390 * Pointer to the input peer information, not used currently.
392 * Pointer to the structure to store the information, output.
394 * Positive to get the RxQ information, zero to get the TxQ information.
397 * 0 on success, a negative errno value otherwise and rte_errno is set.
400 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
401 struct rte_hairpin_peer_info *current_info,
402 struct rte_hairpin_peer_info *peer_info,
405 struct mlx5_priv *priv = dev->data->dev_private;
406 RTE_SET_USED(current_info);
408 if (dev->data->dev_started == 0) {
410 DRV_LOG(ERR, "peer port %u is not started",
415 * Peer port used as egress. In the current design, hairpin Tx queue
416 * will be bound to the peer Rx queue. Indeed, only the information of
417 * peer Rx queue needs to be fetched.
419 if (direction == 0) {
420 struct mlx5_txq_ctrl *txq_ctrl;
422 txq_ctrl = mlx5_txq_get(dev, peer_queue);
423 if (txq_ctrl == NULL) {
425 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
426 dev->data->port_id, peer_queue);
429 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
431 DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
432 dev->data->port_id, peer_queue);
433 mlx5_txq_release(dev, peer_queue);
436 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
438 DRV_LOG(ERR, "port %u no Txq object found: %d",
439 dev->data->port_id, peer_queue);
440 mlx5_txq_release(dev, peer_queue);
443 peer_info->qp_id = txq_ctrl->obj->sq->id;
444 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
445 /* 1-to-1 mapping, only the first one is used. */
446 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
447 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
448 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
449 mlx5_txq_release(dev, peer_queue);
450 } else { /* Peer port used as ingress. */
451 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, peer_queue);
452 struct mlx5_rxq_ctrl *rxq_ctrl;
456 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
457 dev->data->port_id, peer_queue);
460 rxq_ctrl = rxq->ctrl;
461 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
463 DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
464 dev->data->port_id, peer_queue);
467 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
469 DRV_LOG(ERR, "port %u no Rxq object found: %d",
470 dev->data->port_id, peer_queue);
473 peer_info->qp_id = rxq_ctrl->obj->rq->id;
474 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
475 peer_info->peer_q = rxq->hairpin_conf.peers[0].queue;
476 peer_info->tx_explicit = rxq->hairpin_conf.tx_explicit;
477 peer_info->manual_bind = rxq->hairpin_conf.manual_bind;
483 * Bind the hairpin queue with the peer HW information.
484 * This needs to be called twice both for Tx and Rx queues of a pair.
485 * If the queue is already bound, it is considered successful.
488 * Pointer to Ethernet device structure.
490 * Index of the queue to change the HW configuration to bind.
492 * Pointer to information of the peer queue.
494 * Positive to configure the TxQ, zero to configure the RxQ.
497 * 0 on success, a negative errno value otherwise and rte_errno is set.
500 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
501 struct rte_hairpin_peer_info *peer_info,
507 * Consistency checking of the peer queue: opposite direction is used
508 * to get the peer queue info with ethdev port ID, no need to check.
510 if (peer_info->peer_q != cur_queue) {
512 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
513 dev->data->port_id, cur_queue, peer_info->peer_q);
516 if (direction != 0) {
517 struct mlx5_txq_ctrl *txq_ctrl;
518 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
520 txq_ctrl = mlx5_txq_get(dev, cur_queue);
521 if (txq_ctrl == NULL) {
523 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
524 dev->data->port_id, cur_queue);
527 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
529 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
530 dev->data->port_id, cur_queue);
531 mlx5_txq_release(dev, cur_queue);
534 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
536 DRV_LOG(ERR, "port %u no Txq object found: %d",
537 dev->data->port_id, cur_queue);
538 mlx5_txq_release(dev, cur_queue);
541 if (txq_ctrl->hairpin_status != 0) {
542 DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
543 dev->data->port_id, cur_queue);
544 mlx5_txq_release(dev, cur_queue);
548 * All queues' of one port consistency checking is done in the
549 * bind() function, and that is optional.
551 if (peer_info->tx_explicit !=
552 txq_ctrl->hairpin_conf.tx_explicit) {
554 DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
555 " mismatch", dev->data->port_id, cur_queue);
556 mlx5_txq_release(dev, cur_queue);
559 if (peer_info->manual_bind !=
560 txq_ctrl->hairpin_conf.manual_bind) {
562 DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
563 " mismatch", dev->data->port_id, cur_queue);
564 mlx5_txq_release(dev, cur_queue);
567 sq_attr.state = MLX5_SQC_STATE_RDY;
568 sq_attr.sq_state = MLX5_SQC_STATE_RST;
569 sq_attr.hairpin_peer_rq = peer_info->qp_id;
570 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
571 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
573 txq_ctrl->hairpin_status = 1;
574 mlx5_txq_release(dev, cur_queue);
576 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
577 struct mlx5_rxq_ctrl *rxq_ctrl;
578 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
582 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
583 dev->data->port_id, cur_queue);
586 rxq_ctrl = rxq->ctrl;
587 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
589 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
590 dev->data->port_id, cur_queue);
593 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
595 DRV_LOG(ERR, "port %u no Rxq object found: %d",
596 dev->data->port_id, cur_queue);
599 if (rxq->hairpin_status != 0) {
600 DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
601 dev->data->port_id, cur_queue);
604 if (peer_info->tx_explicit !=
605 rxq->hairpin_conf.tx_explicit) {
607 DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
608 " mismatch", dev->data->port_id, cur_queue);
611 if (peer_info->manual_bind !=
612 rxq->hairpin_conf.manual_bind) {
614 DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
615 " mismatch", dev->data->port_id, cur_queue);
618 rq_attr.state = MLX5_SQC_STATE_RDY;
619 rq_attr.rq_state = MLX5_SQC_STATE_RST;
620 rq_attr.hairpin_peer_sq = peer_info->qp_id;
621 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
622 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
624 rxq->hairpin_status = 1;
630 * Unbind the hairpin queue and reset its HW configuration.
631 * This needs to be called twice both for Tx and Rx queues of a pair.
632 * If the queue is already unbound, it is considered successful.
635 * Pointer to Ethernet device structure.
637 * Index of the queue to change the HW configuration to unbind.
639 * Positive to reset the TxQ, zero to reset the RxQ.
642 * 0 on success, a negative errno value otherwise and rte_errno is set.
645 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
650 if (direction != 0) {
651 struct mlx5_txq_ctrl *txq_ctrl;
652 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
654 txq_ctrl = mlx5_txq_get(dev, cur_queue);
655 if (txq_ctrl == NULL) {
657 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
658 dev->data->port_id, cur_queue);
661 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
663 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
664 dev->data->port_id, cur_queue);
665 mlx5_txq_release(dev, cur_queue);
668 /* Already unbound, return success before obj checking. */
669 if (txq_ctrl->hairpin_status == 0) {
670 DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
671 dev->data->port_id, cur_queue);
672 mlx5_txq_release(dev, cur_queue);
675 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
677 DRV_LOG(ERR, "port %u no Txq object found: %d",
678 dev->data->port_id, cur_queue);
679 mlx5_txq_release(dev, cur_queue);
682 sq_attr.state = MLX5_SQC_STATE_RST;
683 sq_attr.sq_state = MLX5_SQC_STATE_RST;
684 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
686 txq_ctrl->hairpin_status = 0;
687 mlx5_txq_release(dev, cur_queue);
689 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
690 struct mlx5_rxq_ctrl *rxq_ctrl;
691 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
695 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
696 dev->data->port_id, cur_queue);
699 rxq_ctrl = rxq->ctrl;
700 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
702 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
703 dev->data->port_id, cur_queue);
706 if (rxq->hairpin_status == 0) {
707 DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
708 dev->data->port_id, cur_queue);
711 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
713 DRV_LOG(ERR, "port %u no Rxq object found: %d",
714 dev->data->port_id, cur_queue);
717 rq_attr.state = MLX5_SQC_STATE_RST;
718 rq_attr.rq_state = MLX5_SQC_STATE_RST;
719 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
721 rxq->hairpin_status = 0;
727 * Bind the hairpin port pairs, from the Tx to the peer Rx.
728 * This function only supports to bind the Tx to one Rx.
731 * Pointer to Ethernet device structure.
733 * Port identifier of the Rx port.
736 * 0 on success, a negative errno value otherwise and rte_errno is set.
739 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
741 struct mlx5_priv *priv = dev->data->dev_private;
743 struct mlx5_txq_ctrl *txq_ctrl;
745 struct rte_hairpin_peer_info peer = {0xffffff};
746 struct rte_hairpin_peer_info cur;
747 const struct rte_eth_hairpin_conf *conf;
749 uint16_t local_port = priv->dev_data->port_id;
754 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
756 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
760 * Before binding TxQ to peer RxQ, first round loop will be used for
761 * checking the queues' configuration consistency. This would be a
762 * little time consuming but better than doing the rollback.
764 for (i = 0; i != priv->txqs_n; i++) {
765 txq_ctrl = mlx5_txq_get(dev, i);
766 if (txq_ctrl == NULL)
768 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
769 mlx5_txq_release(dev, i);
773 * All hairpin Tx queues of a single port that connected to the
774 * same peer Rx port should have the same "auto binding" and
775 * "implicit Tx flow" modes.
776 * Peer consistency checking will be done in per queue binding.
778 conf = &txq_ctrl->hairpin_conf;
779 if (conf->peers[0].port == rx_port) {
781 manual = conf->manual_bind;
782 explicit = conf->tx_explicit;
784 if (manual != conf->manual_bind ||
785 explicit != conf->tx_explicit) {
787 DRV_LOG(ERR, "port %u queue %d mode"
788 " mismatch: %u %u, %u %u",
789 local_port, i, manual,
790 conf->manual_bind, explicit,
792 mlx5_txq_release(dev, i);
798 mlx5_txq_release(dev, i);
800 /* Once no queue is configured, success is returned directly. */
803 /* All the hairpin TX queues need to be traversed again. */
804 for (i = 0; i != priv->txqs_n; i++) {
805 txq_ctrl = mlx5_txq_get(dev, i);
806 if (txq_ctrl == NULL)
808 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
809 mlx5_txq_release(dev, i);
812 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
813 mlx5_txq_release(dev, i);
816 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
818 * Fetch peer RxQ's information.
819 * No need to pass the information of the current queue.
821 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
824 mlx5_txq_release(dev, i);
827 /* Accessing its own device, inside mlx5 PMD. */
828 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
830 mlx5_txq_release(dev, i);
833 /* Pass TxQ's information to peer RxQ and try binding. */
834 cur.peer_q = rx_queue;
835 cur.qp_id = txq_ctrl->obj->sq->id;
836 cur.vhca_id = priv->config.hca_attr.vhca_id;
837 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
838 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
840 * In order to access another device in a proper way, RTE level
841 * private function is needed.
843 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
846 mlx5_txq_release(dev, i);
849 mlx5_txq_release(dev, i);
854 * Do roll-back process for the queues already bound.
855 * No need to check the return value of the queue unbind function.
858 /* No validation is needed here. */
859 txq_ctrl = mlx5_txq_get(dev, i);
860 if (txq_ctrl == NULL)
862 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
863 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
864 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
865 mlx5_txq_release(dev, i);
871 * Unbind the hairpin port pair, HW configuration of both devices will be clear
872 * and status will be reset for all the queues used between the them.
873 * This function only supports to unbind the Tx from one Rx.
876 * Pointer to Ethernet device structure.
878 * Port identifier of the Rx port.
881 * 0 on success, a negative errno value otherwise and rte_errno is set.
884 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
886 struct mlx5_priv *priv = dev->data->dev_private;
887 struct mlx5_txq_ctrl *txq_ctrl;
890 uint16_t cur_port = priv->dev_data->port_id;
892 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
894 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
897 for (i = 0; i != priv->txqs_n; i++) {
900 txq_ctrl = mlx5_txq_get(dev, i);
901 if (txq_ctrl == NULL)
903 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
904 mlx5_txq_release(dev, i);
907 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
908 mlx5_txq_release(dev, i);
911 /* Indeed, only the first used queue needs to be checked. */
912 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
913 if (cur_port != rx_port) {
915 DRV_LOG(ERR, "port %u and port %u are in"
916 " auto-bind mode", cur_port, rx_port);
917 mlx5_txq_release(dev, i);
923 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
924 mlx5_txq_release(dev, i);
925 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
927 DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
931 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
933 DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
942 * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
943 * @see mlx5_hairpin_bind_single_port()
946 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
952 * If the Rx port has no hairpin configuration with the current port,
953 * the binding will be skipped in the called function of single port.
954 * Device started status will be checked only before the queue
955 * information updating.
957 if (rx_port == RTE_MAX_ETHPORTS) {
958 MLX5_ETH_FOREACH_DEV(p, dev->device) {
959 ret = mlx5_hairpin_bind_single_port(dev, p);
965 return mlx5_hairpin_bind_single_port(dev, rx_port);
968 MLX5_ETH_FOREACH_DEV(pp, dev->device)
970 mlx5_hairpin_unbind_single_port(dev, pp);
975 * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
976 * @see mlx5_hairpin_unbind_single_port()
979 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
984 if (rx_port == RTE_MAX_ETHPORTS)
985 MLX5_ETH_FOREACH_DEV(p, dev->device) {
986 ret = mlx5_hairpin_unbind_single_port(dev, p);
991 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
996 * DPDK callback to get the hairpin peer ports list.
997 * This will return the actual number of peer ports and save the identifiers
998 * into the array (sorted, may be different from that when setting up the
999 * hairpin peer queues).
1000 * The peer port ID could be the same as the port ID of the current device.
1003 * Pointer to Ethernet device structure.
1005 * Pointer to array to save the port identifiers.
1007 * The length of the array.
1009 * Current port to peer port direction.
1010 * positive - current used as Tx to get all peer Rx ports.
1011 * zero - current used as Rx to get all peer Tx ports.
1014 * 0 or positive value on success, actual number of peer ports.
1015 * a negative errno value otherwise and rte_errno is set.
1018 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1019 size_t len, uint32_t direction)
1021 struct mlx5_priv *priv = dev->data->dev_private;
1022 struct mlx5_txq_ctrl *txq_ctrl;
1025 uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1029 for (i = 0; i < priv->txqs_n; i++) {
1030 txq_ctrl = mlx5_txq_get(dev, i);
1033 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
1034 mlx5_txq_release(dev, i);
1037 pp = txq_ctrl->hairpin_conf.peers[0].port;
1038 if (pp >= RTE_MAX_ETHPORTS) {
1040 mlx5_txq_release(dev, i);
1041 DRV_LOG(ERR, "port %hu queue %u peer port "
1043 priv->dev_data->port_id, i, pp);
1046 bits[pp / 32] |= 1 << (pp % 32);
1047 mlx5_txq_release(dev, i);
1050 for (i = 0; i < priv->rxqs_n; i++) {
1051 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1052 struct mlx5_rxq_ctrl *rxq_ctrl;
1056 rxq_ctrl = rxq->ctrl;
1057 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN)
1059 pp = rxq->hairpin_conf.peers[0].port;
1060 if (pp >= RTE_MAX_ETHPORTS) {
1062 DRV_LOG(ERR, "port %hu queue %u peer port "
1064 priv->dev_data->port_id, i, pp);
1067 bits[pp / 32] |= 1 << (pp % 32);
1070 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1071 if (bits[i / 32] & (1 << (i % 32))) {
1072 if ((size_t)ret >= len) {
1076 peer_ports[ret++] = i;
1083 * DPDK callback to start the device.
1085 * Simulate device start by attaching all configured flows.
1088 * Pointer to Ethernet device structure.
1091 * 0 on success, a negative errno value otherwise and rte_errno is set.
1094 mlx5_dev_start(struct rte_eth_dev *dev)
1096 struct mlx5_priv *priv = dev->data->dev_private;
1100 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1101 fine_inline = rte_mbuf_dynflag_lookup
1102 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1103 if (fine_inline >= 0)
1104 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1106 rte_net_mlx5_dynf_inline_mask = 0;
1107 if (dev->data->nb_rx_queues > 0) {
1108 ret = mlx5_dev_configure_rss_reta(dev);
1110 DRV_LOG(ERR, "port %u reta config failed: %s",
1111 dev->data->port_id, strerror(rte_errno));
1115 ret = mlx5_txpp_start(dev);
1117 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1118 dev->data->port_id, strerror(rte_errno));
1121 if ((priv->sh->devx && priv->config.dv_flow_en &&
1122 priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1123 ret = priv->obj_ops.lb_dummy_queue_create(dev);
1127 ret = mlx5_txq_start(dev);
1129 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1130 dev->data->port_id, strerror(rte_errno));
1133 if (priv->config.std_delay_drop || priv->config.hp_delay_drop) {
1134 if (!priv->config.vf && !priv->config.sf &&
1135 !priv->representor) {
1136 ret = mlx5_get_flag_dropless_rq(dev);
1139 "port %u cannot query dropless flag",
1140 dev->data->port_id);
1143 "port %u dropless_rq OFF, no rearming",
1144 dev->data->port_id);
1147 "port %u doesn't support dropless_rq flag",
1148 dev->data->port_id);
1151 ret = mlx5_rxq_start(dev);
1153 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1154 dev->data->port_id, strerror(rte_errno));
1158 * Such step will be skipped if there is no hairpin TX queue configured
1159 * with RX peer queue from the same device.
1161 ret = mlx5_hairpin_auto_bind(dev);
1163 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1164 dev->data->port_id, strerror(rte_errno));
1167 /* Set started flag here for the following steps like control flow. */
1168 dev->data->dev_started = 1;
1169 ret = mlx5_rx_intr_vec_enable(dev);
1171 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1172 dev->data->port_id);
1175 mlx5_os_stats_init(dev);
1177 * Attach indirection table objects detached on port stop.
1178 * They may be needed to create RSS in non-isolated mode.
1180 ret = mlx5_action_handle_attach(dev);
1183 "port %u failed to attach indirect actions: %s",
1184 dev->data->port_id, rte_strerror(rte_errno));
1187 ret = mlx5_traffic_enable(dev);
1189 DRV_LOG(ERR, "port %u failed to set defaults flows",
1190 dev->data->port_id);
1193 /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1194 mlx5_flow_rxq_dynf_metadata_set(dev);
1195 /* Set flags and context to convert Rx timestamps. */
1196 mlx5_rxq_timestamp_set(dev);
1197 /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1198 mlx5_txq_dynf_timestamp_set(dev);
1200 * In non-cached mode, it only needs to start the default mreg copy
1201 * action and no flow created by application exists anymore.
1202 * But it is worth wrapping the interface for further usage.
1204 ret = mlx5_flow_start_default(dev);
1206 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1207 dev->data->port_id, strerror(rte_errno));
1210 if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1211 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1212 dev->data->port_id, rte_strerror(rte_errno));
1216 dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1217 dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1218 /* Enable datapath on secondary process. */
1219 mlx5_mp_os_req_start_rxtx(dev);
1220 if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) {
1221 priv->sh->port[priv->dev_port - 1].ih_port_id =
1222 (uint32_t)dev->data->port_id;
1224 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1225 dev->data->port_id);
1226 dev->data->dev_conf.intr_conf.lsc = 0;
1227 dev->data->dev_conf.intr_conf.rmv = 0;
1229 if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
1230 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1231 (uint32_t)dev->data->port_id;
1234 ret = rte_errno; /* Save rte_errno before cleanup. */
1236 dev->data->dev_started = 0;
1237 mlx5_flow_stop_default(dev);
1238 mlx5_traffic_disable(dev);
1241 if (priv->obj_ops.lb_dummy_queue_release)
1242 priv->obj_ops.lb_dummy_queue_release(dev);
1243 mlx5_txpp_stop(dev); /* Stop last. */
1244 rte_errno = ret; /* Restore rte_errno. */
1249 * DPDK callback to stop the device.
1251 * Simulate device stop by detaching all configured flows.
1254 * Pointer to Ethernet device structure.
1257 mlx5_dev_stop(struct rte_eth_dev *dev)
1259 struct mlx5_priv *priv = dev->data->dev_private;
1261 dev->data->dev_started = 0;
1262 /* Prevent crashes when queues are still in use. */
1263 dev->rx_pkt_burst = removed_rx_burst;
1264 dev->tx_pkt_burst = removed_tx_burst;
1266 /* Disable datapath on secondary process. */
1267 mlx5_mp_os_req_stop_rxtx(dev);
1268 rte_delay_us_sleep(1000 * priv->rxqs_n);
1269 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1270 mlx5_flow_stop_default(dev);
1271 /* Control flows for default traffic can be removed firstly. */
1272 mlx5_traffic_disable(dev);
1273 /* All RX queue flags will be cleared in the flush interface. */
1274 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1275 mlx5_flow_meter_rxq_flush(dev);
1276 mlx5_action_handle_detach(dev);
1277 mlx5_rx_intr_vec_disable(dev);
1278 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1279 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1282 if (priv->obj_ops.lb_dummy_queue_release)
1283 priv->obj_ops.lb_dummy_queue_release(dev);
1284 mlx5_txpp_stop(dev);
1290 * Enable traffic flows configured by control plane
1293 * Pointer to Ethernet device private data.
1295 * Pointer to Ethernet device structure.
1298 * 0 on success, a negative errno value otherwise and rte_errno is set.
1301 mlx5_traffic_enable(struct rte_eth_dev *dev)
1303 struct mlx5_priv *priv = dev->data->dev_private;
1304 struct rte_flow_item_eth bcast = {
1305 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1307 struct rte_flow_item_eth ipv6_multi_spec = {
1308 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1310 struct rte_flow_item_eth ipv6_multi_mask = {
1311 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1313 struct rte_flow_item_eth unicast = {
1314 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1316 struct rte_flow_item_eth unicast_mask = {
1317 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1319 const unsigned int vlan_filter_n = priv->vlan_filter_n;
1320 const struct rte_ether_addr cmp = {
1321 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1328 * Hairpin txq default flow should be created no matter if it is
1329 * isolation mode. Or else all the packets to be sent will be sent
1330 * out directly without the TX flow actions, e.g. encapsulation.
1332 for (i = 0; i != priv->txqs_n; ++i) {
1333 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1336 /* Only Tx implicit mode requires the default Tx flow. */
1337 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1338 txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1339 txq_ctrl->hairpin_conf.peers[0].port ==
1340 priv->dev_data->port_id) {
1341 ret = mlx5_ctrl_flow_source_queue(dev, i);
1343 mlx5_txq_release(dev, i);
1347 if ((priv->representor || priv->master) &&
1348 priv->config.dv_esw_en) {
1349 if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) {
1351 "Port %u Tx queue %u SQ create representor devx default miss rule failed.",
1352 dev->data->port_id, i);
1356 mlx5_txq_release(dev, i);
1358 if ((priv->master || priv->representor) && priv->config.dv_esw_en) {
1359 if (mlx5_flow_create_esw_table_zero_flow(dev))
1360 priv->fdb_def_rule = 1;
1362 DRV_LOG(INFO, "port %u FDB default rule cannot be"
1363 " configured - only Eswitch group 0 flows are"
1364 " supported.", dev->data->port_id);
1366 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1367 ret = mlx5_flow_lacp_miss(dev);
1369 DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1370 "forward LACP to kernel.", dev->data->port_id);
1372 DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1373 , dev->data->port_id);
1377 if (dev->data->promiscuous) {
1378 struct rte_flow_item_eth promisc = {
1379 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1380 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1384 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1388 if (dev->data->all_multicast) {
1389 struct rte_flow_item_eth multicast = {
1390 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1391 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1395 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1399 /* Add broadcast/multicast flows. */
1400 for (i = 0; i != vlan_filter_n; ++i) {
1401 uint16_t vlan = priv->vlan_filter[i];
1403 struct rte_flow_item_vlan vlan_spec = {
1404 .tci = rte_cpu_to_be_16(vlan),
1406 struct rte_flow_item_vlan vlan_mask =
1407 rte_flow_item_vlan_mask;
1409 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1410 &vlan_spec, &vlan_mask);
1413 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1415 &vlan_spec, &vlan_mask);
1419 if (!vlan_filter_n) {
1420 ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1423 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1426 /* Do not fail on IPv6 broadcast creation failure. */
1428 "IPv6 broadcast is not supported");
1433 /* Add MAC address flows. */
1434 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1435 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1437 if (!memcmp(mac, &cmp, sizeof(*mac)))
1439 memcpy(&unicast.dst.addr_bytes,
1441 RTE_ETHER_ADDR_LEN);
1442 for (j = 0; j != vlan_filter_n; ++j) {
1443 uint16_t vlan = priv->vlan_filter[j];
1445 struct rte_flow_item_vlan vlan_spec = {
1446 .tci = rte_cpu_to_be_16(vlan),
1448 struct rte_flow_item_vlan vlan_mask =
1449 rte_flow_item_vlan_mask;
1451 ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1458 if (!vlan_filter_n) {
1459 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1466 ret = rte_errno; /* Save rte_errno before cleanup. */
1467 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1468 rte_errno = ret; /* Restore rte_errno. */
1474 * Disable traffic flows configured by control plane
1477 * Pointer to Ethernet device private data.
1480 mlx5_traffic_disable(struct rte_eth_dev *dev)
1482 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1486 * Restart traffic flows configured by control plane
1489 * Pointer to Ethernet device private data.
1492 * 0 on success, a negative errno value otherwise and rte_errno is set.
1495 mlx5_traffic_restart(struct rte_eth_dev *dev)
1497 if (dev->data->dev_started) {
1498 mlx5_traffic_disable(dev);
1499 return mlx5_traffic_enable(dev);