net/mlx5: fix queue leaking in hairpin auto bind check
[dpdk.git] / drivers / net / mlx5 / mlx5_trigger.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5
6 #include <unistd.h>
7
8 #include <rte_ether.h>
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13
14 #include <mlx5_malloc.h>
15
16 #include "mlx5.h"
17 #include "mlx5_mr.h"
18 #include "mlx5_rx.h"
19 #include "mlx5_tx.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
22
23 /**
24  * Stop traffic on Tx queues.
25  *
26  * @param dev
27  *   Pointer to Ethernet device structure.
28  */
29 static void
30 mlx5_txq_stop(struct rte_eth_dev *dev)
31 {
32         struct mlx5_priv *priv = dev->data->dev_private;
33         unsigned int i;
34
35         for (i = 0; i != priv->txqs_n; ++i)
36                 mlx5_txq_release(dev, i);
37 }
38
39 /**
40  * Start traffic on Tx queues.
41  *
42  * @param dev
43  *   Pointer to Ethernet device structure.
44  *
45  * @return
46  *   0 on success, a negative errno value otherwise and rte_errno is set.
47  */
48 static int
49 mlx5_txq_start(struct rte_eth_dev *dev)
50 {
51         struct mlx5_priv *priv = dev->data->dev_private;
52         unsigned int i;
53         int ret;
54
55         for (i = 0; i != priv->txqs_n; ++i) {
56                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57                 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58                 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
59
60                 if (!txq_ctrl)
61                         continue;
62                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
63                         txq_alloc_elts(txq_ctrl);
64                 MLX5_ASSERT(!txq_ctrl->obj);
65                 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66                                             0, txq_ctrl->socket);
67                 if (!txq_ctrl->obj) {
68                         DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69                                 "memory resources.", dev->data->port_id,
70                                 txq_data->idx);
71                         rte_errno = ENOMEM;
72                         goto error;
73                 }
74                 ret = priv->obj_ops.txq_obj_new(dev, i);
75                 if (ret < 0) {
76                         mlx5_free(txq_ctrl->obj);
77                         txq_ctrl->obj = NULL;
78                         goto error;
79                 }
80                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
81                         size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
82
83                         txq_data->fcqs = mlx5_malloc(flags, size,
84                                                      RTE_CACHE_LINE_SIZE,
85                                                      txq_ctrl->socket);
86                         if (!txq_data->fcqs) {
87                                 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88                                         "allocate memory (FCQ).",
89                                         dev->data->port_id, i);
90                                 rte_errno = ENOMEM;
91                                 goto error;
92                         }
93                 }
94                 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95                         dev->data->port_id, i, (void *)&txq_ctrl->obj);
96                 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97         }
98         return 0;
99 error:
100         ret = rte_errno; /* Save rte_errno before cleanup. */
101         do {
102                 mlx5_txq_release(dev, i);
103         } while (i-- != 0);
104         rte_errno = ret; /* Restore rte_errno. */
105         return -rte_errno;
106 }
107
108 /**
109  * Stop traffic on Rx queues.
110  *
111  * @param dev
112  *   Pointer to Ethernet device structure.
113  */
114 static void
115 mlx5_rxq_stop(struct rte_eth_dev *dev)
116 {
117         struct mlx5_priv *priv = dev->data->dev_private;
118         unsigned int i;
119
120         for (i = 0; i != priv->rxqs_n; ++i)
121                 mlx5_rxq_release(dev, i);
122 }
123
124 /**
125  * Start traffic on Rx queues.
126  *
127  * @param dev
128  *   Pointer to Ethernet device structure.
129  *
130  * @return
131  *   0 on success, a negative errno value otherwise and rte_errno is set.
132  */
133 static int
134 mlx5_rxq_start(struct rte_eth_dev *dev)
135 {
136         struct mlx5_priv *priv = dev->data->dev_private;
137         unsigned int i;
138         int ret = 0;
139
140         /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
141         if (mlx5_mprq_alloc_mp(dev)) {
142                 /* Should not release Rx queues but return immediately. */
143                 return -rte_errno;
144         }
145         DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
146                 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
147         DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
148                 dev->data->port_id, priv->sh->device_attr.max_sge);
149         for (i = 0; i != priv->rxqs_n; ++i) {
150                 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
151
152                 if (!rxq_ctrl)
153                         continue;
154                 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
155                         /* Pre-register Rx mempools. */
156                         if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
157                                 mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl,
158                                                   rxq_ctrl->rxq.mprq_mp);
159                         } else {
160                                 uint32_t s;
161
162                                 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++)
163                                         mlx5_mr_update_mp
164                                                 (dev, &rxq_ctrl->rxq.mr_ctrl,
165                                                 rxq_ctrl->rxq.rxseg[s].mp);
166                         }
167                         ret = rxq_alloc_elts(rxq_ctrl);
168                         if (ret)
169                                 goto error;
170                 }
171                 MLX5_ASSERT(!rxq_ctrl->obj);
172                 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
173                                             sizeof(*rxq_ctrl->obj), 0,
174                                             rxq_ctrl->socket);
175                 if (!rxq_ctrl->obj) {
176                         DRV_LOG(ERR,
177                                 "Port %u Rx queue %u can't allocate resources.",
178                                 dev->data->port_id, (*priv->rxqs)[i]->idx);
179                         rte_errno = ENOMEM;
180                         goto error;
181                 }
182                 ret = priv->obj_ops.rxq_obj_new(dev, i);
183                 if (ret) {
184                         mlx5_free(rxq_ctrl->obj);
185                         goto error;
186                 }
187                 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
188                         dev->data->port_id, i, (void *)&rxq_ctrl->obj);
189                 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
190         }
191         return 0;
192 error:
193         ret = rte_errno; /* Save rte_errno before cleanup. */
194         do {
195                 mlx5_rxq_release(dev, i);
196         } while (i-- != 0);
197         rte_errno = ret; /* Restore rte_errno. */
198         return -rte_errno;
199 }
200
201 /**
202  * Binds Tx queues to Rx queues for hairpin.
203  *
204  * Binds Tx queues to the target Rx queues.
205  *
206  * @param dev
207  *   Pointer to Ethernet device structure.
208  *
209  * @return
210  *   0 on success, a negative errno value otherwise and rte_errno is set.
211  */
212 static int
213 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
214 {
215         struct mlx5_priv *priv = dev->data->dev_private;
216         struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
217         struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
218         struct mlx5_txq_ctrl *txq_ctrl;
219         struct mlx5_rxq_ctrl *rxq_ctrl;
220         struct mlx5_devx_obj *sq;
221         struct mlx5_devx_obj *rq;
222         unsigned int i;
223         int ret = 0;
224         bool need_auto = false;
225         uint16_t self_port = dev->data->port_id;
226
227         for (i = 0; i != priv->txqs_n; ++i) {
228                 txq_ctrl = mlx5_txq_get(dev, i);
229                 if (!txq_ctrl)
230                         continue;
231                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
232                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
233                         mlx5_txq_release(dev, i);
234                         continue;
235                 }
236                 if (txq_ctrl->hairpin_conf.manual_bind) {
237                         mlx5_txq_release(dev, i);
238                         return 0;
239                 }
240                 need_auto = true;
241                 mlx5_txq_release(dev, i);
242         }
243         if (!need_auto)
244                 return 0;
245         for (i = 0; i != priv->txqs_n; ++i) {
246                 txq_ctrl = mlx5_txq_get(dev, i);
247                 if (!txq_ctrl)
248                         continue;
249                 /* Skip hairpin queues with other peer ports. */
250                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
251                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
252                         mlx5_txq_release(dev, i);
253                         continue;
254                 }
255                 if (!txq_ctrl->obj) {
256                         rte_errno = ENOMEM;
257                         DRV_LOG(ERR, "port %u no txq object found: %d",
258                                 dev->data->port_id, i);
259                         mlx5_txq_release(dev, i);
260                         return -rte_errno;
261                 }
262                 sq = txq_ctrl->obj->sq;
263                 rxq_ctrl = mlx5_rxq_get(dev,
264                                         txq_ctrl->hairpin_conf.peers[0].queue);
265                 if (!rxq_ctrl) {
266                         mlx5_txq_release(dev, i);
267                         rte_errno = EINVAL;
268                         DRV_LOG(ERR, "port %u no rxq object found: %d",
269                                 dev->data->port_id,
270                                 txq_ctrl->hairpin_conf.peers[0].queue);
271                         return -rte_errno;
272                 }
273                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
274                     rxq_ctrl->hairpin_conf.peers[0].queue != i) {
275                         rte_errno = ENOMEM;
276                         DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
277                                 "Rx queue %d", dev->data->port_id,
278                                 i, txq_ctrl->hairpin_conf.peers[0].queue);
279                         goto error;
280                 }
281                 rq = rxq_ctrl->obj->rq;
282                 if (!rq) {
283                         rte_errno = ENOMEM;
284                         DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
285                                 dev->data->port_id,
286                                 txq_ctrl->hairpin_conf.peers[0].queue);
287                         goto error;
288                 }
289                 sq_attr.state = MLX5_SQC_STATE_RDY;
290                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
291                 sq_attr.hairpin_peer_rq = rq->id;
292                 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
293                 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
294                 if (ret)
295                         goto error;
296                 rq_attr.state = MLX5_SQC_STATE_RDY;
297                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
298                 rq_attr.hairpin_peer_sq = sq->id;
299                 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
300                 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
301                 if (ret)
302                         goto error;
303                 /* Qs with auto-bind will be destroyed directly. */
304                 rxq_ctrl->hairpin_status = 1;
305                 txq_ctrl->hairpin_status = 1;
306                 mlx5_txq_release(dev, i);
307                 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
308         }
309         return 0;
310 error:
311         mlx5_txq_release(dev, i);
312         mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
313         return -rte_errno;
314 }
315
316 /*
317  * Fetch the peer queue's SW & HW information.
318  *
319  * @param dev
320  *   Pointer to Ethernet device structure.
321  * @param peer_queue
322  *   Index of the queue to fetch the information.
323  * @param current_info
324  *   Pointer to the input peer information, not used currently.
325  * @param peer_info
326  *   Pointer to the structure to store the information, output.
327  * @param direction
328  *   Positive to get the RxQ information, zero to get the TxQ information.
329  *
330  * @return
331  *   0 on success, a negative errno value otherwise and rte_errno is set.
332  */
333 int
334 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
335                                struct rte_hairpin_peer_info *current_info,
336                                struct rte_hairpin_peer_info *peer_info,
337                                uint32_t direction)
338 {
339         struct mlx5_priv *priv = dev->data->dev_private;
340         RTE_SET_USED(current_info);
341
342         if (dev->data->dev_started == 0) {
343                 rte_errno = EBUSY;
344                 DRV_LOG(ERR, "peer port %u is not started",
345                         dev->data->port_id);
346                 return -rte_errno;
347         }
348         /*
349          * Peer port used as egress. In the current design, hairpin Tx queue
350          * will be bound to the peer Rx queue. Indeed, only the information of
351          * peer Rx queue needs to be fetched.
352          */
353         if (direction == 0) {
354                 struct mlx5_txq_ctrl *txq_ctrl;
355
356                 txq_ctrl = mlx5_txq_get(dev, peer_queue);
357                 if (txq_ctrl == NULL) {
358                         rte_errno = EINVAL;
359                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
360                                 dev->data->port_id, peer_queue);
361                         return -rte_errno;
362                 }
363                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
364                         rte_errno = EINVAL;
365                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
366                                 dev->data->port_id, peer_queue);
367                         mlx5_txq_release(dev, peer_queue);
368                         return -rte_errno;
369                 }
370                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
371                         rte_errno = ENOMEM;
372                         DRV_LOG(ERR, "port %u no Txq object found: %d",
373                                 dev->data->port_id, peer_queue);
374                         mlx5_txq_release(dev, peer_queue);
375                         return -rte_errno;
376                 }
377                 peer_info->qp_id = txq_ctrl->obj->sq->id;
378                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
379                 /* 1-to-1 mapping, only the first one is used. */
380                 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
381                 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
382                 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
383                 mlx5_txq_release(dev, peer_queue);
384         } else { /* Peer port used as ingress. */
385                 struct mlx5_rxq_ctrl *rxq_ctrl;
386
387                 rxq_ctrl = mlx5_rxq_get(dev, peer_queue);
388                 if (rxq_ctrl == NULL) {
389                         rte_errno = EINVAL;
390                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
391                                 dev->data->port_id, peer_queue);
392                         return -rte_errno;
393                 }
394                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
395                         rte_errno = EINVAL;
396                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
397                                 dev->data->port_id, peer_queue);
398                         mlx5_rxq_release(dev, peer_queue);
399                         return -rte_errno;
400                 }
401                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
402                         rte_errno = ENOMEM;
403                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
404                                 dev->data->port_id, peer_queue);
405                         mlx5_rxq_release(dev, peer_queue);
406                         return -rte_errno;
407                 }
408                 peer_info->qp_id = rxq_ctrl->obj->rq->id;
409                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
410                 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
411                 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
412                 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
413                 mlx5_rxq_release(dev, peer_queue);
414         }
415         return 0;
416 }
417
418 /*
419  * Bind the hairpin queue with the peer HW information.
420  * This needs to be called twice both for Tx and Rx queues of a pair.
421  * If the queue is already bound, it is considered successful.
422  *
423  * @param dev
424  *   Pointer to Ethernet device structure.
425  * @param cur_queue
426  *   Index of the queue to change the HW configuration to bind.
427  * @param peer_info
428  *   Pointer to information of the peer queue.
429  * @param direction
430  *   Positive to configure the TxQ, zero to configure the RxQ.
431  *
432  * @return
433  *   0 on success, a negative errno value otherwise and rte_errno is set.
434  */
435 int
436 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
437                              struct rte_hairpin_peer_info *peer_info,
438                              uint32_t direction)
439 {
440         int ret = 0;
441
442         /*
443          * Consistency checking of the peer queue: opposite direction is used
444          * to get the peer queue info with ethdev port ID, no need to check.
445          */
446         if (peer_info->peer_q != cur_queue) {
447                 rte_errno = EINVAL;
448                 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
449                         dev->data->port_id, cur_queue, peer_info->peer_q);
450                 return -rte_errno;
451         }
452         if (direction != 0) {
453                 struct mlx5_txq_ctrl *txq_ctrl;
454                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
455
456                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
457                 if (txq_ctrl == NULL) {
458                         rte_errno = EINVAL;
459                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
460                                 dev->data->port_id, cur_queue);
461                         return -rte_errno;
462                 }
463                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
464                         rte_errno = EINVAL;
465                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
466                                 dev->data->port_id, cur_queue);
467                         mlx5_txq_release(dev, cur_queue);
468                         return -rte_errno;
469                 }
470                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
471                         rte_errno = ENOMEM;
472                         DRV_LOG(ERR, "port %u no Txq object found: %d",
473                                 dev->data->port_id, cur_queue);
474                         mlx5_txq_release(dev, cur_queue);
475                         return -rte_errno;
476                 }
477                 if (txq_ctrl->hairpin_status != 0) {
478                         DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
479                                 dev->data->port_id, cur_queue);
480                         mlx5_txq_release(dev, cur_queue);
481                         return 0;
482                 }
483                 /*
484                  * All queues' of one port consistency checking is done in the
485                  * bind() function, and that is optional.
486                  */
487                 if (peer_info->tx_explicit !=
488                     txq_ctrl->hairpin_conf.tx_explicit) {
489                         rte_errno = EINVAL;
490                         DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
491                                 " mismatch", dev->data->port_id, cur_queue);
492                         mlx5_txq_release(dev, cur_queue);
493                         return -rte_errno;
494                 }
495                 if (peer_info->manual_bind !=
496                     txq_ctrl->hairpin_conf.manual_bind) {
497                         rte_errno = EINVAL;
498                         DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
499                                 " mismatch", dev->data->port_id, cur_queue);
500                         mlx5_txq_release(dev, cur_queue);
501                         return -rte_errno;
502                 }
503                 sq_attr.state = MLX5_SQC_STATE_RDY;
504                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
505                 sq_attr.hairpin_peer_rq = peer_info->qp_id;
506                 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
507                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
508                 if (ret == 0)
509                         txq_ctrl->hairpin_status = 1;
510                 mlx5_txq_release(dev, cur_queue);
511         } else {
512                 struct mlx5_rxq_ctrl *rxq_ctrl;
513                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
514
515                 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
516                 if (rxq_ctrl == NULL) {
517                         rte_errno = EINVAL;
518                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
519                                 dev->data->port_id, cur_queue);
520                         return -rte_errno;
521                 }
522                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
523                         rte_errno = EINVAL;
524                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
525                                 dev->data->port_id, cur_queue);
526                         mlx5_rxq_release(dev, cur_queue);
527                         return -rte_errno;
528                 }
529                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
530                         rte_errno = ENOMEM;
531                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
532                                 dev->data->port_id, cur_queue);
533                         mlx5_rxq_release(dev, cur_queue);
534                         return -rte_errno;
535                 }
536                 if (rxq_ctrl->hairpin_status != 0) {
537                         DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
538                                 dev->data->port_id, cur_queue);
539                         mlx5_rxq_release(dev, cur_queue);
540                         return 0;
541                 }
542                 if (peer_info->tx_explicit !=
543                     rxq_ctrl->hairpin_conf.tx_explicit) {
544                         rte_errno = EINVAL;
545                         DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
546                                 " mismatch", dev->data->port_id, cur_queue);
547                         mlx5_rxq_release(dev, cur_queue);
548                         return -rte_errno;
549                 }
550                 if (peer_info->manual_bind !=
551                     rxq_ctrl->hairpin_conf.manual_bind) {
552                         rte_errno = EINVAL;
553                         DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
554                                 " mismatch", dev->data->port_id, cur_queue);
555                         mlx5_rxq_release(dev, cur_queue);
556                         return -rte_errno;
557                 }
558                 rq_attr.state = MLX5_SQC_STATE_RDY;
559                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
560                 rq_attr.hairpin_peer_sq = peer_info->qp_id;
561                 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
562                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
563                 if (ret == 0)
564                         rxq_ctrl->hairpin_status = 1;
565                 mlx5_rxq_release(dev, cur_queue);
566         }
567         return ret;
568 }
569
570 /*
571  * Unbind the hairpin queue and reset its HW configuration.
572  * This needs to be called twice both for Tx and Rx queues of a pair.
573  * If the queue is already unbound, it is considered successful.
574  *
575  * @param dev
576  *   Pointer to Ethernet device structure.
577  * @param cur_queue
578  *   Index of the queue to change the HW configuration to unbind.
579  * @param direction
580  *   Positive to reset the TxQ, zero to reset the RxQ.
581  *
582  * @return
583  *   0 on success, a negative errno value otherwise and rte_errno is set.
584  */
585 int
586 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
587                                uint32_t direction)
588 {
589         int ret = 0;
590
591         if (direction != 0) {
592                 struct mlx5_txq_ctrl *txq_ctrl;
593                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
594
595                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
596                 if (txq_ctrl == NULL) {
597                         rte_errno = EINVAL;
598                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
599                                 dev->data->port_id, cur_queue);
600                         return -rte_errno;
601                 }
602                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
603                         rte_errno = EINVAL;
604                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
605                                 dev->data->port_id, cur_queue);
606                         mlx5_txq_release(dev, cur_queue);
607                         return -rte_errno;
608                 }
609                 /* Already unbound, return success before obj checking. */
610                 if (txq_ctrl->hairpin_status == 0) {
611                         DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
612                                 dev->data->port_id, cur_queue);
613                         mlx5_txq_release(dev, cur_queue);
614                         return 0;
615                 }
616                 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
617                         rte_errno = ENOMEM;
618                         DRV_LOG(ERR, "port %u no Txq object found: %d",
619                                 dev->data->port_id, cur_queue);
620                         mlx5_txq_release(dev, cur_queue);
621                         return -rte_errno;
622                 }
623                 sq_attr.state = MLX5_SQC_STATE_RST;
624                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
625                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
626                 if (ret == 0)
627                         txq_ctrl->hairpin_status = 0;
628                 mlx5_txq_release(dev, cur_queue);
629         } else {
630                 struct mlx5_rxq_ctrl *rxq_ctrl;
631                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
632
633                 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
634                 if (rxq_ctrl == NULL) {
635                         rte_errno = EINVAL;
636                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
637                                 dev->data->port_id, cur_queue);
638                         return -rte_errno;
639                 }
640                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
641                         rte_errno = EINVAL;
642                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
643                                 dev->data->port_id, cur_queue);
644                         mlx5_rxq_release(dev, cur_queue);
645                         return -rte_errno;
646                 }
647                 if (rxq_ctrl->hairpin_status == 0) {
648                         DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
649                                 dev->data->port_id, cur_queue);
650                         mlx5_rxq_release(dev, cur_queue);
651                         return 0;
652                 }
653                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
654                         rte_errno = ENOMEM;
655                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
656                                 dev->data->port_id, cur_queue);
657                         mlx5_rxq_release(dev, cur_queue);
658                         return -rte_errno;
659                 }
660                 rq_attr.state = MLX5_SQC_STATE_RST;
661                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
662                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
663                 if (ret == 0)
664                         rxq_ctrl->hairpin_status = 0;
665                 mlx5_rxq_release(dev, cur_queue);
666         }
667         return ret;
668 }
669
670 /*
671  * Bind the hairpin port pairs, from the Tx to the peer Rx.
672  * This function only supports to bind the Tx to one Rx.
673  *
674  * @param dev
675  *   Pointer to Ethernet device structure.
676  * @param rx_port
677  *   Port identifier of the Rx port.
678  *
679  * @return
680  *   0 on success, a negative errno value otherwise and rte_errno is set.
681  */
682 static int
683 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
684 {
685         struct mlx5_priv *priv = dev->data->dev_private;
686         int ret = 0;
687         struct mlx5_txq_ctrl *txq_ctrl;
688         uint32_t i;
689         struct rte_hairpin_peer_info peer = {0xffffff};
690         struct rte_hairpin_peer_info cur;
691         const struct rte_eth_hairpin_conf *conf;
692         uint16_t num_q = 0;
693         uint16_t local_port = priv->dev_data->port_id;
694         uint32_t manual;
695         uint32_t explicit;
696         uint16_t rx_queue;
697
698         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
699                 rte_errno = ENODEV;
700                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
701                 return -rte_errno;
702         }
703         /*
704          * Before binding TxQ to peer RxQ, first round loop will be used for
705          * checking the queues' configuration consistency. This would be a
706          * little time consuming but better than doing the rollback.
707          */
708         for (i = 0; i != priv->txqs_n; i++) {
709                 txq_ctrl = mlx5_txq_get(dev, i);
710                 if (txq_ctrl == NULL)
711                         continue;
712                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
713                         mlx5_txq_release(dev, i);
714                         continue;
715                 }
716                 /*
717                  * All hairpin Tx queues of a single port that connected to the
718                  * same peer Rx port should have the same "auto binding" and
719                  * "implicit Tx flow" modes.
720                  * Peer consistency checking will be done in per queue binding.
721                  */
722                 conf = &txq_ctrl->hairpin_conf;
723                 if (conf->peers[0].port == rx_port) {
724                         if (num_q == 0) {
725                                 manual = conf->manual_bind;
726                                 explicit = conf->tx_explicit;
727                         } else {
728                                 if (manual != conf->manual_bind ||
729                                     explicit != conf->tx_explicit) {
730                                         rte_errno = EINVAL;
731                                         DRV_LOG(ERR, "port %u queue %d mode"
732                                                 " mismatch: %u %u, %u %u",
733                                                 local_port, i, manual,
734                                                 conf->manual_bind, explicit,
735                                                 conf->tx_explicit);
736                                         mlx5_txq_release(dev, i);
737                                         return -rte_errno;
738                                 }
739                         }
740                         num_q++;
741                 }
742                 mlx5_txq_release(dev, i);
743         }
744         /* Once no queue is configured, success is returned directly. */
745         if (num_q == 0)
746                 return ret;
747         /* All the hairpin TX queues need to be traversed again. */
748         for (i = 0; i != priv->txqs_n; i++) {
749                 txq_ctrl = mlx5_txq_get(dev, i);
750                 if (txq_ctrl == NULL)
751                         continue;
752                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
753                         mlx5_txq_release(dev, i);
754                         continue;
755                 }
756                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
757                         mlx5_txq_release(dev, i);
758                         continue;
759                 }
760                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
761                 /*
762                  * Fetch peer RxQ's information.
763                  * No need to pass the information of the current queue.
764                  */
765                 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
766                                                         NULL, &peer, 1);
767                 if (ret != 0) {
768                         mlx5_txq_release(dev, i);
769                         goto error;
770                 }
771                 /* Accessing its own device, inside mlx5 PMD. */
772                 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
773                 if (ret != 0) {
774                         mlx5_txq_release(dev, i);
775                         goto error;
776                 }
777                 /* Pass TxQ's information to peer RxQ and try binding. */
778                 cur.peer_q = rx_queue;
779                 cur.qp_id = txq_ctrl->obj->sq->id;
780                 cur.vhca_id = priv->config.hca_attr.vhca_id;
781                 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
782                 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
783                 /*
784                  * In order to access another device in a proper way, RTE level
785                  * private function is needed.
786                  */
787                 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
788                                                       &cur, 0);
789                 if (ret != 0) {
790                         mlx5_txq_release(dev, i);
791                         goto error;
792                 }
793                 mlx5_txq_release(dev, i);
794         }
795         return 0;
796 error:
797         /*
798          * Do roll-back process for the queues already bound.
799          * No need to check the return value of the queue unbind function.
800          */
801         do {
802                 /* No validation is needed here. */
803                 txq_ctrl = mlx5_txq_get(dev, i);
804                 if (txq_ctrl == NULL)
805                         continue;
806                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
807                 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
808                 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
809                 mlx5_txq_release(dev, i);
810         } while (i--);
811         return ret;
812 }
813
814 /*
815  * Unbind the hairpin port pair, HW configuration of both devices will be clear
816  * and status will be reset for all the queues used between the them.
817  * This function only supports to unbind the Tx from one Rx.
818  *
819  * @param dev
820  *   Pointer to Ethernet device structure.
821  * @param rx_port
822  *   Port identifier of the Rx port.
823  *
824  * @return
825  *   0 on success, a negative errno value otherwise and rte_errno is set.
826  */
827 static int
828 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
829 {
830         struct mlx5_priv *priv = dev->data->dev_private;
831         struct mlx5_txq_ctrl *txq_ctrl;
832         uint32_t i;
833         int ret;
834         uint16_t cur_port = priv->dev_data->port_id;
835
836         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
837                 rte_errno = ENODEV;
838                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
839                 return -rte_errno;
840         }
841         for (i = 0; i != priv->txqs_n; i++) {
842                 uint16_t rx_queue;
843
844                 txq_ctrl = mlx5_txq_get(dev, i);
845                 if (txq_ctrl == NULL)
846                         continue;
847                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
848                         mlx5_txq_release(dev, i);
849                         continue;
850                 }
851                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
852                         mlx5_txq_release(dev, i);
853                         continue;
854                 }
855                 /* Indeed, only the first used queue needs to be checked. */
856                 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
857                         if (cur_port != rx_port) {
858                                 rte_errno = EINVAL;
859                                 DRV_LOG(ERR, "port %u and port %u are in"
860                                         " auto-bind mode", cur_port, rx_port);
861                                 mlx5_txq_release(dev, i);
862                                 return -rte_errno;
863                         } else {
864                                 return 0;
865                         }
866                 }
867                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
868                 mlx5_txq_release(dev, i);
869                 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
870                 if (ret) {
871                         DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
872                                 rx_port, rx_queue);
873                         return ret;
874                 }
875                 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
876                 if (ret) {
877                         DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
878                                 cur_port, i);
879                         return ret;
880                 }
881         }
882         return 0;
883 }
884
885 /*
886  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
887  * @see mlx5_hairpin_bind_single_port()
888  */
889 int
890 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
891 {
892         int ret = 0;
893         uint16_t p, pp;
894
895         /*
896          * If the Rx port has no hairpin configuration with the current port,
897          * the binding will be skipped in the called function of single port.
898          * Device started status will be checked only before the queue
899          * information updating.
900          */
901         if (rx_port == RTE_MAX_ETHPORTS) {
902                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
903                         ret = mlx5_hairpin_bind_single_port(dev, p);
904                         if (ret != 0)
905                                 goto unbind;
906                 }
907                 return ret;
908         } else {
909                 return mlx5_hairpin_bind_single_port(dev, rx_port);
910         }
911 unbind:
912         MLX5_ETH_FOREACH_DEV(pp, dev->device)
913                 if (pp < p)
914                         mlx5_hairpin_unbind_single_port(dev, pp);
915         return ret;
916 }
917
918 /*
919  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
920  * @see mlx5_hairpin_unbind_single_port()
921  */
922 int
923 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
924 {
925         int ret = 0;
926         uint16_t p;
927
928         if (rx_port == RTE_MAX_ETHPORTS)
929                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
930                         ret = mlx5_hairpin_unbind_single_port(dev, p);
931                         if (ret != 0)
932                                 return ret;
933                 }
934         else
935                 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
936         return ret;
937 }
938
939 /*
940  * DPDK callback to get the hairpin peer ports list.
941  * This will return the actual number of peer ports and save the identifiers
942  * into the array (sorted, may be different from that when setting up the
943  * hairpin peer queues).
944  * The peer port ID could be the same as the port ID of the current device.
945  *
946  * @param dev
947  *   Pointer to Ethernet device structure.
948  * @param peer_ports
949  *   Pointer to array to save the port identifiers.
950  * @param len
951  *   The length of the array.
952  * @param direction
953  *   Current port to peer port direction.
954  *   positive - current used as Tx to get all peer Rx ports.
955  *   zero - current used as Rx to get all peer Tx ports.
956  *
957  * @return
958  *   0 or positive value on success, actual number of peer ports.
959  *   a negative errno value otherwise and rte_errno is set.
960  */
961 int
962 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
963                             size_t len, uint32_t direction)
964 {
965         struct mlx5_priv *priv = dev->data->dev_private;
966         struct mlx5_txq_ctrl *txq_ctrl;
967         struct mlx5_rxq_ctrl *rxq_ctrl;
968         uint32_t i;
969         uint16_t pp;
970         uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
971         int ret = 0;
972
973         if (direction) {
974                 for (i = 0; i < priv->txqs_n; i++) {
975                         txq_ctrl = mlx5_txq_get(dev, i);
976                         if (!txq_ctrl)
977                                 continue;
978                         if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
979                                 mlx5_txq_release(dev, i);
980                                 continue;
981                         }
982                         pp = txq_ctrl->hairpin_conf.peers[0].port;
983                         if (pp >= RTE_MAX_ETHPORTS) {
984                                 rte_errno = ERANGE;
985                                 mlx5_txq_release(dev, i);
986                                 DRV_LOG(ERR, "port %hu queue %u peer port "
987                                         "out of range %hu",
988                                         priv->dev_data->port_id, i, pp);
989                                 return -rte_errno;
990                         }
991                         bits[pp / 32] |= 1 << (pp % 32);
992                         mlx5_txq_release(dev, i);
993                 }
994         } else {
995                 for (i = 0; i < priv->rxqs_n; i++) {
996                         rxq_ctrl = mlx5_rxq_get(dev, i);
997                         if (!rxq_ctrl)
998                                 continue;
999                         if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
1000                                 mlx5_rxq_release(dev, i);
1001                                 continue;
1002                         }
1003                         pp = rxq_ctrl->hairpin_conf.peers[0].port;
1004                         if (pp >= RTE_MAX_ETHPORTS) {
1005                                 rte_errno = ERANGE;
1006                                 mlx5_rxq_release(dev, i);
1007                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1008                                         "out of range %hu",
1009                                         priv->dev_data->port_id, i, pp);
1010                                 return -rte_errno;
1011                         }
1012                         bits[pp / 32] |= 1 << (pp % 32);
1013                         mlx5_rxq_release(dev, i);
1014                 }
1015         }
1016         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1017                 if (bits[i / 32] & (1 << (i % 32))) {
1018                         if ((size_t)ret >= len) {
1019                                 rte_errno = E2BIG;
1020                                 return -rte_errno;
1021                         }
1022                         peer_ports[ret++] = i;
1023                 }
1024         }
1025         return ret;
1026 }
1027
1028 /**
1029  * DPDK callback to start the device.
1030  *
1031  * Simulate device start by attaching all configured flows.
1032  *
1033  * @param dev
1034  *   Pointer to Ethernet device structure.
1035  *
1036  * @return
1037  *   0 on success, a negative errno value otherwise and rte_errno is set.
1038  */
1039 int
1040 mlx5_dev_start(struct rte_eth_dev *dev)
1041 {
1042         struct mlx5_priv *priv = dev->data->dev_private;
1043         int ret;
1044         int fine_inline;
1045
1046         DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1047         fine_inline = rte_mbuf_dynflag_lookup
1048                 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1049         if (fine_inline >= 0)
1050                 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1051         else
1052                 rte_net_mlx5_dynf_inline_mask = 0;
1053         if (dev->data->nb_rx_queues > 0) {
1054                 ret = mlx5_dev_configure_rss_reta(dev);
1055                 if (ret) {
1056                         DRV_LOG(ERR, "port %u reta config failed: %s",
1057                                 dev->data->port_id, strerror(rte_errno));
1058                         return -rte_errno;
1059                 }
1060         }
1061         ret = mlx5_txpp_start(dev);
1062         if (ret) {
1063                 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1064                         dev->data->port_id, strerror(rte_errno));
1065                 goto error;
1066         }
1067         if ((priv->config.devx && priv->config.dv_flow_en &&
1068             priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1069                 ret = priv->obj_ops.lb_dummy_queue_create(dev);
1070                 if (ret)
1071                         goto error;
1072         }
1073         ret = mlx5_txq_start(dev);
1074         if (ret) {
1075                 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1076                         dev->data->port_id, strerror(rte_errno));
1077                 goto error;
1078         }
1079         ret = mlx5_rxq_start(dev);
1080         if (ret) {
1081                 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1082                         dev->data->port_id, strerror(rte_errno));
1083                 goto error;
1084         }
1085         /*
1086          * Such step will be skipped if there is no hairpin TX queue configured
1087          * with RX peer queue from the same device.
1088          */
1089         ret = mlx5_hairpin_auto_bind(dev);
1090         if (ret) {
1091                 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1092                         dev->data->port_id, strerror(rte_errno));
1093                 goto error;
1094         }
1095         /* Set started flag here for the following steps like control flow. */
1096         dev->data->dev_started = 1;
1097         ret = mlx5_rx_intr_vec_enable(dev);
1098         if (ret) {
1099                 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1100                         dev->data->port_id);
1101                 goto error;
1102         }
1103         mlx5_os_stats_init(dev);
1104         ret = mlx5_traffic_enable(dev);
1105         if (ret) {
1106                 DRV_LOG(ERR, "port %u failed to set defaults flows",
1107                         dev->data->port_id);
1108                 goto error;
1109         }
1110         /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1111         mlx5_flow_rxq_dynf_metadata_set(dev);
1112         /* Set flags and context to convert Rx timestamps. */
1113         mlx5_rxq_timestamp_set(dev);
1114         /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1115         mlx5_txq_dynf_timestamp_set(dev);
1116         /*
1117          * In non-cached mode, it only needs to start the default mreg copy
1118          * action and no flow created by application exists anymore.
1119          * But it is worth wrapping the interface for further usage.
1120          */
1121         ret = mlx5_flow_start_default(dev);
1122         if (ret) {
1123                 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1124                         dev->data->port_id, strerror(rte_errno));
1125                 goto error;
1126         }
1127         rte_wmb();
1128         dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1129         dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1130         /* Enable datapath on secondary process. */
1131         mlx5_mp_os_req_start_rxtx(dev);
1132         if (priv->sh->intr_handle.fd >= 0) {
1133                 priv->sh->port[priv->dev_port - 1].ih_port_id =
1134                                         (uint32_t)dev->data->port_id;
1135         } else {
1136                 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1137                         dev->data->port_id);
1138                 dev->data->dev_conf.intr_conf.lsc = 0;
1139                 dev->data->dev_conf.intr_conf.rmv = 0;
1140         }
1141         if (priv->sh->intr_handle_devx.fd >= 0)
1142                 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1143                                         (uint32_t)dev->data->port_id;
1144         return 0;
1145 error:
1146         ret = rte_errno; /* Save rte_errno before cleanup. */
1147         /* Rollback. */
1148         dev->data->dev_started = 0;
1149         mlx5_flow_stop_default(dev);
1150         mlx5_traffic_disable(dev);
1151         mlx5_txq_stop(dev);
1152         mlx5_rxq_stop(dev);
1153         if (priv->obj_ops.lb_dummy_queue_release)
1154                 priv->obj_ops.lb_dummy_queue_release(dev);
1155         mlx5_txpp_stop(dev); /* Stop last. */
1156         rte_errno = ret; /* Restore rte_errno. */
1157         return -rte_errno;
1158 }
1159
1160 /**
1161  * DPDK callback to stop the device.
1162  *
1163  * Simulate device stop by detaching all configured flows.
1164  *
1165  * @param dev
1166  *   Pointer to Ethernet device structure.
1167  */
1168 int
1169 mlx5_dev_stop(struct rte_eth_dev *dev)
1170 {
1171         struct mlx5_priv *priv = dev->data->dev_private;
1172
1173         dev->data->dev_started = 0;
1174         /* Prevent crashes when queues are still in use. */
1175         dev->rx_pkt_burst = removed_rx_burst;
1176         dev->tx_pkt_burst = removed_tx_burst;
1177         rte_wmb();
1178         /* Disable datapath on secondary process. */
1179         mlx5_mp_os_req_stop_rxtx(dev);
1180         rte_delay_us_sleep(1000 * priv->rxqs_n);
1181         DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1182         mlx5_flow_stop_default(dev);
1183         /* Control flows for default traffic can be removed firstly. */
1184         mlx5_traffic_disable(dev);
1185         /* All RX queue flags will be cleared in the flush interface. */
1186         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1187         mlx5_flow_meter_rxq_flush(dev);
1188         mlx5_rx_intr_vec_disable(dev);
1189         priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1190         priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1191         mlx5_txq_stop(dev);
1192         mlx5_rxq_stop(dev);
1193         if (priv->obj_ops.lb_dummy_queue_release)
1194                 priv->obj_ops.lb_dummy_queue_release(dev);
1195         mlx5_txpp_stop(dev);
1196
1197         return 0;
1198 }
1199
1200 /**
1201  * Enable traffic flows configured by control plane
1202  *
1203  * @param dev
1204  *   Pointer to Ethernet device private data.
1205  * @param dev
1206  *   Pointer to Ethernet device structure.
1207  *
1208  * @return
1209  *   0 on success, a negative errno value otherwise and rte_errno is set.
1210  */
1211 int
1212 mlx5_traffic_enable(struct rte_eth_dev *dev)
1213 {
1214         struct mlx5_priv *priv = dev->data->dev_private;
1215         struct rte_flow_item_eth bcast = {
1216                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1217         };
1218         struct rte_flow_item_eth ipv6_multi_spec = {
1219                 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1220         };
1221         struct rte_flow_item_eth ipv6_multi_mask = {
1222                 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1223         };
1224         struct rte_flow_item_eth unicast = {
1225                 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1226         };
1227         struct rte_flow_item_eth unicast_mask = {
1228                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1229         };
1230         const unsigned int vlan_filter_n = priv->vlan_filter_n;
1231         const struct rte_ether_addr cmp = {
1232                 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1233         };
1234         unsigned int i;
1235         unsigned int j;
1236         int ret;
1237
1238         /*
1239          * Hairpin txq default flow should be created no matter if it is
1240          * isolation mode. Or else all the packets to be sent will be sent
1241          * out directly without the TX flow actions, e.g. encapsulation.
1242          */
1243         for (i = 0; i != priv->txqs_n; ++i) {
1244                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1245                 if (!txq_ctrl)
1246                         continue;
1247                 /* Only Tx implicit mode requires the default Tx flow. */
1248                 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1249                     txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1250                     txq_ctrl->hairpin_conf.peers[0].port ==
1251                     priv->dev_data->port_id) {
1252                         ret = mlx5_ctrl_flow_source_queue(dev, i);
1253                         if (ret) {
1254                                 mlx5_txq_release(dev, i);
1255                                 goto error;
1256                         }
1257                 }
1258                 mlx5_txq_release(dev, i);
1259         }
1260         if (priv->config.dv_esw_en && !priv->config.vf && !priv->config.sf) {
1261                 if (mlx5_flow_create_esw_table_zero_flow(dev))
1262                         priv->fdb_def_rule = 1;
1263                 else
1264                         DRV_LOG(INFO, "port %u FDB default rule cannot be"
1265                                 " configured - only Eswitch group 0 flows are"
1266                                 " supported.", dev->data->port_id);
1267         }
1268         if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1269                 ret = mlx5_flow_lacp_miss(dev);
1270                 if (ret)
1271                         DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1272                                 "forward LACP to kernel.", dev->data->port_id);
1273                 else
1274                         DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1275                                 , dev->data->port_id);
1276         }
1277         if (priv->isolated)
1278                 return 0;
1279         if (dev->data->promiscuous) {
1280                 struct rte_flow_item_eth promisc = {
1281                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1282                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1283                         .type = 0,
1284                 };
1285
1286                 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1287                 if (ret)
1288                         goto error;
1289         }
1290         if (dev->data->all_multicast) {
1291                 struct rte_flow_item_eth multicast = {
1292                         .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1293                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1294                         .type = 0,
1295                 };
1296
1297                 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1298                 if (ret)
1299                         goto error;
1300         } else {
1301                 /* Add broadcast/multicast flows. */
1302                 for (i = 0; i != vlan_filter_n; ++i) {
1303                         uint16_t vlan = priv->vlan_filter[i];
1304
1305                         struct rte_flow_item_vlan vlan_spec = {
1306                                 .tci = rte_cpu_to_be_16(vlan),
1307                         };
1308                         struct rte_flow_item_vlan vlan_mask =
1309                                 rte_flow_item_vlan_mask;
1310
1311                         ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1312                                                   &vlan_spec, &vlan_mask);
1313                         if (ret)
1314                                 goto error;
1315                         ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1316                                                   &ipv6_multi_mask,
1317                                                   &vlan_spec, &vlan_mask);
1318                         if (ret)
1319                                 goto error;
1320                 }
1321                 if (!vlan_filter_n) {
1322                         ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1323                         if (ret)
1324                                 goto error;
1325                         ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1326                                              &ipv6_multi_mask);
1327                         if (ret) {
1328                                 /* Do not fail on IPv6 broadcast creation failure. */
1329                                 DRV_LOG(WARNING,
1330                                         "IPv6 broadcast is not supported");
1331                                 ret = 0;
1332                         }
1333                 }
1334         }
1335         /* Add MAC address flows. */
1336         for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1337                 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1338
1339                 if (!memcmp(mac, &cmp, sizeof(*mac)))
1340                         continue;
1341                 memcpy(&unicast.dst.addr_bytes,
1342                        mac->addr_bytes,
1343                        RTE_ETHER_ADDR_LEN);
1344                 for (j = 0; j != vlan_filter_n; ++j) {
1345                         uint16_t vlan = priv->vlan_filter[j];
1346
1347                         struct rte_flow_item_vlan vlan_spec = {
1348                                 .tci = rte_cpu_to_be_16(vlan),
1349                         };
1350                         struct rte_flow_item_vlan vlan_mask =
1351                                 rte_flow_item_vlan_mask;
1352
1353                         ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1354                                                   &unicast_mask,
1355                                                   &vlan_spec,
1356                                                   &vlan_mask);
1357                         if (ret)
1358                                 goto error;
1359                 }
1360                 if (!vlan_filter_n) {
1361                         ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1362                         if (ret)
1363                                 goto error;
1364                 }
1365         }
1366         return 0;
1367 error:
1368         ret = rte_errno; /* Save rte_errno before cleanup. */
1369         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1370         rte_errno = ret; /* Restore rte_errno. */
1371         return -rte_errno;
1372 }
1373
1374
1375 /**
1376  * Disable traffic flows configured by control plane
1377  *
1378  * @param dev
1379  *   Pointer to Ethernet device private data.
1380  */
1381 void
1382 mlx5_traffic_disable(struct rte_eth_dev *dev)
1383 {
1384         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1385 }
1386
1387 /**
1388  * Restart traffic flows configured by control plane
1389  *
1390  * @param dev
1391  *   Pointer to Ethernet device private data.
1392  *
1393  * @return
1394  *   0 on success, a negative errno value otherwise and rte_errno is set.
1395  */
1396 int
1397 mlx5_traffic_restart(struct rte_eth_dev *dev)
1398 {
1399         if (dev->data->dev_started) {
1400                 mlx5_traffic_disable(dev);
1401                 return mlx5_traffic_enable(dev);
1402         }
1403         return 0;
1404 }