net/mlx5: replace flow list with indexed pool
[dpdk.git] / drivers / net / mlx5 / mlx5_trigger.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5
6 #include <unistd.h>
7
8 #include <rte_ether.h>
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13
14 #include <mlx5_malloc.h>
15
16 #include "mlx5.h"
17 #include "mlx5_mr.h"
18 #include "mlx5_rx.h"
19 #include "mlx5_tx.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
22
23 /**
24  * Stop traffic on Tx queues.
25  *
26  * @param dev
27  *   Pointer to Ethernet device structure.
28  */
29 static void
30 mlx5_txq_stop(struct rte_eth_dev *dev)
31 {
32         struct mlx5_priv *priv = dev->data->dev_private;
33         unsigned int i;
34
35         for (i = 0; i != priv->txqs_n; ++i)
36                 mlx5_txq_release(dev, i);
37 }
38
39 /**
40  * Start traffic on Tx queues.
41  *
42  * @param dev
43  *   Pointer to Ethernet device structure.
44  *
45  * @return
46  *   0 on success, a negative errno value otherwise and rte_errno is set.
47  */
48 static int
49 mlx5_txq_start(struct rte_eth_dev *dev)
50 {
51         struct mlx5_priv *priv = dev->data->dev_private;
52         unsigned int i;
53         int ret;
54
55         for (i = 0; i != priv->txqs_n; ++i) {
56                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57                 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58                 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
59
60                 if (!txq_ctrl)
61                         continue;
62                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
63                         txq_alloc_elts(txq_ctrl);
64                 MLX5_ASSERT(!txq_ctrl->obj);
65                 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66                                             0, txq_ctrl->socket);
67                 if (!txq_ctrl->obj) {
68                         DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69                                 "memory resources.", dev->data->port_id,
70                                 txq_data->idx);
71                         rte_errno = ENOMEM;
72                         goto error;
73                 }
74                 ret = priv->obj_ops.txq_obj_new(dev, i);
75                 if (ret < 0) {
76                         mlx5_free(txq_ctrl->obj);
77                         txq_ctrl->obj = NULL;
78                         goto error;
79                 }
80                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
81                         size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
82
83                         txq_data->fcqs = mlx5_malloc(flags, size,
84                                                      RTE_CACHE_LINE_SIZE,
85                                                      txq_ctrl->socket);
86                         if (!txq_data->fcqs) {
87                                 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88                                         "allocate memory (FCQ).",
89                                         dev->data->port_id, i);
90                                 rte_errno = ENOMEM;
91                                 goto error;
92                         }
93                 }
94                 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95                         dev->data->port_id, i, (void *)&txq_ctrl->obj);
96                 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97         }
98         return 0;
99 error:
100         ret = rte_errno; /* Save rte_errno before cleanup. */
101         do {
102                 mlx5_txq_release(dev, i);
103         } while (i-- != 0);
104         rte_errno = ret; /* Restore rte_errno. */
105         return -rte_errno;
106 }
107
108 /**
109  * Stop traffic on Rx queues.
110  *
111  * @param dev
112  *   Pointer to Ethernet device structure.
113  */
114 static void
115 mlx5_rxq_stop(struct rte_eth_dev *dev)
116 {
117         struct mlx5_priv *priv = dev->data->dev_private;
118         unsigned int i;
119
120         for (i = 0; i != priv->rxqs_n; ++i)
121                 mlx5_rxq_release(dev, i);
122 }
123
124 /**
125  * Start traffic on Rx queues.
126  *
127  * @param dev
128  *   Pointer to Ethernet device structure.
129  *
130  * @return
131  *   0 on success, a negative errno value otherwise and rte_errno is set.
132  */
133 static int
134 mlx5_rxq_start(struct rte_eth_dev *dev)
135 {
136         struct mlx5_priv *priv = dev->data->dev_private;
137         unsigned int i;
138         int ret = 0;
139
140         /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
141         if (mlx5_mprq_alloc_mp(dev)) {
142                 /* Should not release Rx queues but return immediately. */
143                 return -rte_errno;
144         }
145         DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
146                 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
147         DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
148                 dev->data->port_id, priv->sh->device_attr.max_sge);
149         for (i = 0; i != priv->rxqs_n; ++i) {
150                 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
151
152                 if (!rxq_ctrl)
153                         continue;
154                 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
155                         /* Pre-register Rx mempools. */
156                         if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
157                                 mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl,
158                                                   rxq_ctrl->rxq.mprq_mp);
159                         } else {
160                                 uint32_t s;
161
162                                 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++)
163                                         mlx5_mr_update_mp
164                                                 (dev, &rxq_ctrl->rxq.mr_ctrl,
165                                                 rxq_ctrl->rxq.rxseg[s].mp);
166                         }
167                         ret = rxq_alloc_elts(rxq_ctrl);
168                         if (ret)
169                                 goto error;
170                 }
171                 MLX5_ASSERT(!rxq_ctrl->obj);
172                 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
173                                             sizeof(*rxq_ctrl->obj), 0,
174                                             rxq_ctrl->socket);
175                 if (!rxq_ctrl->obj) {
176                         DRV_LOG(ERR,
177                                 "Port %u Rx queue %u can't allocate resources.",
178                                 dev->data->port_id, (*priv->rxqs)[i]->idx);
179                         rte_errno = ENOMEM;
180                         goto error;
181                 }
182                 ret = priv->obj_ops.rxq_obj_new(dev, i);
183                 if (ret) {
184                         mlx5_free(rxq_ctrl->obj);
185                         goto error;
186                 }
187                 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
188                         dev->data->port_id, i, (void *)&rxq_ctrl->obj);
189                 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
190         }
191         return 0;
192 error:
193         ret = rte_errno; /* Save rte_errno before cleanup. */
194         do {
195                 mlx5_rxq_release(dev, i);
196         } while (i-- != 0);
197         rte_errno = ret; /* Restore rte_errno. */
198         return -rte_errno;
199 }
200
201 /**
202  * Binds Tx queues to Rx queues for hairpin.
203  *
204  * Binds Tx queues to the target Rx queues.
205  *
206  * @param dev
207  *   Pointer to Ethernet device structure.
208  *
209  * @return
210  *   0 on success, a negative errno value otherwise and rte_errno is set.
211  */
212 static int
213 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
214 {
215         struct mlx5_priv *priv = dev->data->dev_private;
216         struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
217         struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
218         struct mlx5_txq_ctrl *txq_ctrl;
219         struct mlx5_rxq_ctrl *rxq_ctrl;
220         struct mlx5_devx_obj *sq;
221         struct mlx5_devx_obj *rq;
222         unsigned int i;
223         int ret = 0;
224         bool need_auto = false;
225         uint16_t self_port = dev->data->port_id;
226
227         for (i = 0; i != priv->txqs_n; ++i) {
228                 txq_ctrl = mlx5_txq_get(dev, i);
229                 if (!txq_ctrl)
230                         continue;
231                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
232                         mlx5_txq_release(dev, i);
233                         continue;
234                 }
235                 if (txq_ctrl->hairpin_conf.peers[0].port != self_port)
236                         continue;
237                 if (txq_ctrl->hairpin_conf.manual_bind) {
238                         mlx5_txq_release(dev, i);
239                         return 0;
240                 }
241                 need_auto = true;
242                 mlx5_txq_release(dev, i);
243         }
244         if (!need_auto)
245                 return 0;
246         for (i = 0; i != priv->txqs_n; ++i) {
247                 txq_ctrl = mlx5_txq_get(dev, i);
248                 if (!txq_ctrl)
249                         continue;
250                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
251                         mlx5_txq_release(dev, i);
252                         continue;
253                 }
254                 /* Skip hairpin queues with other peer ports. */
255                 if (txq_ctrl->hairpin_conf.peers[0].port != self_port)
256                         continue;
257                 if (!txq_ctrl->obj) {
258                         rte_errno = ENOMEM;
259                         DRV_LOG(ERR, "port %u no txq object found: %d",
260                                 dev->data->port_id, i);
261                         mlx5_txq_release(dev, i);
262                         return -rte_errno;
263                 }
264                 sq = txq_ctrl->obj->sq;
265                 rxq_ctrl = mlx5_rxq_get(dev,
266                                         txq_ctrl->hairpin_conf.peers[0].queue);
267                 if (!rxq_ctrl) {
268                         mlx5_txq_release(dev, i);
269                         rte_errno = EINVAL;
270                         DRV_LOG(ERR, "port %u no rxq object found: %d",
271                                 dev->data->port_id,
272                                 txq_ctrl->hairpin_conf.peers[0].queue);
273                         return -rte_errno;
274                 }
275                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
276                     rxq_ctrl->hairpin_conf.peers[0].queue != i) {
277                         rte_errno = ENOMEM;
278                         DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
279                                 "Rx queue %d", dev->data->port_id,
280                                 i, txq_ctrl->hairpin_conf.peers[0].queue);
281                         goto error;
282                 }
283                 rq = rxq_ctrl->obj->rq;
284                 if (!rq) {
285                         rte_errno = ENOMEM;
286                         DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
287                                 dev->data->port_id,
288                                 txq_ctrl->hairpin_conf.peers[0].queue);
289                         goto error;
290                 }
291                 sq_attr.state = MLX5_SQC_STATE_RDY;
292                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
293                 sq_attr.hairpin_peer_rq = rq->id;
294                 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
295                 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
296                 if (ret)
297                         goto error;
298                 rq_attr.state = MLX5_SQC_STATE_RDY;
299                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
300                 rq_attr.hairpin_peer_sq = sq->id;
301                 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
302                 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
303                 if (ret)
304                         goto error;
305                 /* Qs with auto-bind will be destroyed directly. */
306                 rxq_ctrl->hairpin_status = 1;
307                 txq_ctrl->hairpin_status = 1;
308                 mlx5_txq_release(dev, i);
309                 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
310         }
311         return 0;
312 error:
313         mlx5_txq_release(dev, i);
314         mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
315         return -rte_errno;
316 }
317
318 /*
319  * Fetch the peer queue's SW & HW information.
320  *
321  * @param dev
322  *   Pointer to Ethernet device structure.
323  * @param peer_queue
324  *   Index of the queue to fetch the information.
325  * @param current_info
326  *   Pointer to the input peer information, not used currently.
327  * @param peer_info
328  *   Pointer to the structure to store the information, output.
329  * @param direction
330  *   Positive to get the RxQ information, zero to get the TxQ information.
331  *
332  * @return
333  *   0 on success, a negative errno value otherwise and rte_errno is set.
334  */
335 int
336 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
337                                struct rte_hairpin_peer_info *current_info,
338                                struct rte_hairpin_peer_info *peer_info,
339                                uint32_t direction)
340 {
341         struct mlx5_priv *priv = dev->data->dev_private;
342         RTE_SET_USED(current_info);
343
344         if (dev->data->dev_started == 0) {
345                 rte_errno = EBUSY;
346                 DRV_LOG(ERR, "peer port %u is not started",
347                         dev->data->port_id);
348                 return -rte_errno;
349         }
350         /*
351          * Peer port used as egress. In the current design, hairpin Tx queue
352          * will be bound to the peer Rx queue. Indeed, only the information of
353          * peer Rx queue needs to be fetched.
354          */
355         if (direction == 0) {
356                 struct mlx5_txq_ctrl *txq_ctrl;
357
358                 txq_ctrl = mlx5_txq_get(dev, peer_queue);
359                 if (txq_ctrl == NULL) {
360                         rte_errno = EINVAL;
361                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
362                                 dev->data->port_id, peer_queue);
363                         return -rte_errno;
364                 }
365                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
366                         rte_errno = EINVAL;
367                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
368                                 dev->data->port_id, peer_queue);
369                         mlx5_txq_release(dev, peer_queue);
370                         return -rte_errno;
371                 }
372                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
373                         rte_errno = ENOMEM;
374                         DRV_LOG(ERR, "port %u no Txq object found: %d",
375                                 dev->data->port_id, peer_queue);
376                         mlx5_txq_release(dev, peer_queue);
377                         return -rte_errno;
378                 }
379                 peer_info->qp_id = txq_ctrl->obj->sq->id;
380                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
381                 /* 1-to-1 mapping, only the first one is used. */
382                 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
383                 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
384                 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
385                 mlx5_txq_release(dev, peer_queue);
386         } else { /* Peer port used as ingress. */
387                 struct mlx5_rxq_ctrl *rxq_ctrl;
388
389                 rxq_ctrl = mlx5_rxq_get(dev, peer_queue);
390                 if (rxq_ctrl == NULL) {
391                         rte_errno = EINVAL;
392                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
393                                 dev->data->port_id, peer_queue);
394                         return -rte_errno;
395                 }
396                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
397                         rte_errno = EINVAL;
398                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
399                                 dev->data->port_id, peer_queue);
400                         mlx5_rxq_release(dev, peer_queue);
401                         return -rte_errno;
402                 }
403                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
404                         rte_errno = ENOMEM;
405                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
406                                 dev->data->port_id, peer_queue);
407                         mlx5_rxq_release(dev, peer_queue);
408                         return -rte_errno;
409                 }
410                 peer_info->qp_id = rxq_ctrl->obj->rq->id;
411                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
412                 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
413                 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
414                 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
415                 mlx5_rxq_release(dev, peer_queue);
416         }
417         return 0;
418 }
419
420 /*
421  * Bind the hairpin queue with the peer HW information.
422  * This needs to be called twice both for Tx and Rx queues of a pair.
423  * If the queue is already bound, it is considered successful.
424  *
425  * @param dev
426  *   Pointer to Ethernet device structure.
427  * @param cur_queue
428  *   Index of the queue to change the HW configuration to bind.
429  * @param peer_info
430  *   Pointer to information of the peer queue.
431  * @param direction
432  *   Positive to configure the TxQ, zero to configure the RxQ.
433  *
434  * @return
435  *   0 on success, a negative errno value otherwise and rte_errno is set.
436  */
437 int
438 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
439                              struct rte_hairpin_peer_info *peer_info,
440                              uint32_t direction)
441 {
442         int ret = 0;
443
444         /*
445          * Consistency checking of the peer queue: opposite direction is used
446          * to get the peer queue info with ethdev port ID, no need to check.
447          */
448         if (peer_info->peer_q != cur_queue) {
449                 rte_errno = EINVAL;
450                 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
451                         dev->data->port_id, cur_queue, peer_info->peer_q);
452                 return -rte_errno;
453         }
454         if (direction != 0) {
455                 struct mlx5_txq_ctrl *txq_ctrl;
456                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
457
458                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
459                 if (txq_ctrl == NULL) {
460                         rte_errno = EINVAL;
461                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
462                                 dev->data->port_id, cur_queue);
463                         return -rte_errno;
464                 }
465                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
466                         rte_errno = EINVAL;
467                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
468                                 dev->data->port_id, cur_queue);
469                         mlx5_txq_release(dev, cur_queue);
470                         return -rte_errno;
471                 }
472                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
473                         rte_errno = ENOMEM;
474                         DRV_LOG(ERR, "port %u no Txq object found: %d",
475                                 dev->data->port_id, cur_queue);
476                         mlx5_txq_release(dev, cur_queue);
477                         return -rte_errno;
478                 }
479                 if (txq_ctrl->hairpin_status != 0) {
480                         DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
481                                 dev->data->port_id, cur_queue);
482                         mlx5_txq_release(dev, cur_queue);
483                         return 0;
484                 }
485                 /*
486                  * All queues' of one port consistency checking is done in the
487                  * bind() function, and that is optional.
488                  */
489                 if (peer_info->tx_explicit !=
490                     txq_ctrl->hairpin_conf.tx_explicit) {
491                         rte_errno = EINVAL;
492                         DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
493                                 " mismatch", dev->data->port_id, cur_queue);
494                         mlx5_txq_release(dev, cur_queue);
495                         return -rte_errno;
496                 }
497                 if (peer_info->manual_bind !=
498                     txq_ctrl->hairpin_conf.manual_bind) {
499                         rte_errno = EINVAL;
500                         DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
501                                 " mismatch", dev->data->port_id, cur_queue);
502                         mlx5_txq_release(dev, cur_queue);
503                         return -rte_errno;
504                 }
505                 sq_attr.state = MLX5_SQC_STATE_RDY;
506                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
507                 sq_attr.hairpin_peer_rq = peer_info->qp_id;
508                 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
509                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
510                 if (ret == 0)
511                         txq_ctrl->hairpin_status = 1;
512                 mlx5_txq_release(dev, cur_queue);
513         } else {
514                 struct mlx5_rxq_ctrl *rxq_ctrl;
515                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
516
517                 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
518                 if (rxq_ctrl == NULL) {
519                         rte_errno = EINVAL;
520                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
521                                 dev->data->port_id, cur_queue);
522                         return -rte_errno;
523                 }
524                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
525                         rte_errno = EINVAL;
526                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
527                                 dev->data->port_id, cur_queue);
528                         mlx5_rxq_release(dev, cur_queue);
529                         return -rte_errno;
530                 }
531                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
532                         rte_errno = ENOMEM;
533                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
534                                 dev->data->port_id, cur_queue);
535                         mlx5_rxq_release(dev, cur_queue);
536                         return -rte_errno;
537                 }
538                 if (rxq_ctrl->hairpin_status != 0) {
539                         DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
540                                 dev->data->port_id, cur_queue);
541                         mlx5_rxq_release(dev, cur_queue);
542                         return 0;
543                 }
544                 if (peer_info->tx_explicit !=
545                     rxq_ctrl->hairpin_conf.tx_explicit) {
546                         rte_errno = EINVAL;
547                         DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
548                                 " mismatch", dev->data->port_id, cur_queue);
549                         mlx5_rxq_release(dev, cur_queue);
550                         return -rte_errno;
551                 }
552                 if (peer_info->manual_bind !=
553                     rxq_ctrl->hairpin_conf.manual_bind) {
554                         rte_errno = EINVAL;
555                         DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
556                                 " mismatch", dev->data->port_id, cur_queue);
557                         mlx5_rxq_release(dev, cur_queue);
558                         return -rte_errno;
559                 }
560                 rq_attr.state = MLX5_SQC_STATE_RDY;
561                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
562                 rq_attr.hairpin_peer_sq = peer_info->qp_id;
563                 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
564                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
565                 if (ret == 0)
566                         rxq_ctrl->hairpin_status = 1;
567                 mlx5_rxq_release(dev, cur_queue);
568         }
569         return ret;
570 }
571
572 /*
573  * Unbind the hairpin queue and reset its HW configuration.
574  * This needs to be called twice both for Tx and Rx queues of a pair.
575  * If the queue is already unbound, it is considered successful.
576  *
577  * @param dev
578  *   Pointer to Ethernet device structure.
579  * @param cur_queue
580  *   Index of the queue to change the HW configuration to unbind.
581  * @param direction
582  *   Positive to reset the TxQ, zero to reset the RxQ.
583  *
584  * @return
585  *   0 on success, a negative errno value otherwise and rte_errno is set.
586  */
587 int
588 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
589                                uint32_t direction)
590 {
591         int ret = 0;
592
593         if (direction != 0) {
594                 struct mlx5_txq_ctrl *txq_ctrl;
595                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
596
597                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
598                 if (txq_ctrl == NULL) {
599                         rte_errno = EINVAL;
600                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
601                                 dev->data->port_id, cur_queue);
602                         return -rte_errno;
603                 }
604                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
605                         rte_errno = EINVAL;
606                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
607                                 dev->data->port_id, cur_queue);
608                         mlx5_txq_release(dev, cur_queue);
609                         return -rte_errno;
610                 }
611                 /* Already unbound, return success before obj checking. */
612                 if (txq_ctrl->hairpin_status == 0) {
613                         DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
614                                 dev->data->port_id, cur_queue);
615                         mlx5_txq_release(dev, cur_queue);
616                         return 0;
617                 }
618                 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
619                         rte_errno = ENOMEM;
620                         DRV_LOG(ERR, "port %u no Txq object found: %d",
621                                 dev->data->port_id, cur_queue);
622                         mlx5_txq_release(dev, cur_queue);
623                         return -rte_errno;
624                 }
625                 sq_attr.state = MLX5_SQC_STATE_RST;
626                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
627                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
628                 if (ret == 0)
629                         txq_ctrl->hairpin_status = 0;
630                 mlx5_txq_release(dev, cur_queue);
631         } else {
632                 struct mlx5_rxq_ctrl *rxq_ctrl;
633                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
634
635                 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
636                 if (rxq_ctrl == NULL) {
637                         rte_errno = EINVAL;
638                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
639                                 dev->data->port_id, cur_queue);
640                         return -rte_errno;
641                 }
642                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
643                         rte_errno = EINVAL;
644                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
645                                 dev->data->port_id, cur_queue);
646                         mlx5_rxq_release(dev, cur_queue);
647                         return -rte_errno;
648                 }
649                 if (rxq_ctrl->hairpin_status == 0) {
650                         DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
651                                 dev->data->port_id, cur_queue);
652                         mlx5_rxq_release(dev, cur_queue);
653                         return 0;
654                 }
655                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
656                         rte_errno = ENOMEM;
657                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
658                                 dev->data->port_id, cur_queue);
659                         mlx5_rxq_release(dev, cur_queue);
660                         return -rte_errno;
661                 }
662                 rq_attr.state = MLX5_SQC_STATE_RST;
663                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
664                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
665                 if (ret == 0)
666                         rxq_ctrl->hairpin_status = 0;
667                 mlx5_rxq_release(dev, cur_queue);
668         }
669         return ret;
670 }
671
672 /*
673  * Bind the hairpin port pairs, from the Tx to the peer Rx.
674  * This function only supports to bind the Tx to one Rx.
675  *
676  * @param dev
677  *   Pointer to Ethernet device structure.
678  * @param rx_port
679  *   Port identifier of the Rx port.
680  *
681  * @return
682  *   0 on success, a negative errno value otherwise and rte_errno is set.
683  */
684 static int
685 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
686 {
687         struct mlx5_priv *priv = dev->data->dev_private;
688         int ret = 0;
689         struct mlx5_txq_ctrl *txq_ctrl;
690         uint32_t i;
691         struct rte_hairpin_peer_info peer = {0xffffff};
692         struct rte_hairpin_peer_info cur;
693         const struct rte_eth_hairpin_conf *conf;
694         uint16_t num_q = 0;
695         uint16_t local_port = priv->dev_data->port_id;
696         uint32_t manual;
697         uint32_t explicit;
698         uint16_t rx_queue;
699
700         if (mlx5_eth_find_next(rx_port, priv->pci_dev) != rx_port) {
701                 rte_errno = ENODEV;
702                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
703                 return -rte_errno;
704         }
705         /*
706          * Before binding TxQ to peer RxQ, first round loop will be used for
707          * checking the queues' configuration consistency. This would be a
708          * little time consuming but better than doing the rollback.
709          */
710         for (i = 0; i != priv->txqs_n; i++) {
711                 txq_ctrl = mlx5_txq_get(dev, i);
712                 if (txq_ctrl == NULL)
713                         continue;
714                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
715                         mlx5_txq_release(dev, i);
716                         continue;
717                 }
718                 /*
719                  * All hairpin Tx queues of a single port that connected to the
720                  * same peer Rx port should have the same "auto binding" and
721                  * "implicit Tx flow" modes.
722                  * Peer consistency checking will be done in per queue binding.
723                  */
724                 conf = &txq_ctrl->hairpin_conf;
725                 if (conf->peers[0].port == rx_port) {
726                         if (num_q == 0) {
727                                 manual = conf->manual_bind;
728                                 explicit = conf->tx_explicit;
729                         } else {
730                                 if (manual != conf->manual_bind ||
731                                     explicit != conf->tx_explicit) {
732                                         rte_errno = EINVAL;
733                                         DRV_LOG(ERR, "port %u queue %d mode"
734                                                 " mismatch: %u %u, %u %u",
735                                                 local_port, i, manual,
736                                                 conf->manual_bind, explicit,
737                                                 conf->tx_explicit);
738                                         mlx5_txq_release(dev, i);
739                                         return -rte_errno;
740                                 }
741                         }
742                         num_q++;
743                 }
744                 mlx5_txq_release(dev, i);
745         }
746         /* Once no queue is configured, success is returned directly. */
747         if (num_q == 0)
748                 return ret;
749         /* All the hairpin TX queues need to be traversed again. */
750         for (i = 0; i != priv->txqs_n; i++) {
751                 txq_ctrl = mlx5_txq_get(dev, i);
752                 if (txq_ctrl == NULL)
753                         continue;
754                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
755                         mlx5_txq_release(dev, i);
756                         continue;
757                 }
758                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
759                         mlx5_txq_release(dev, i);
760                         continue;
761                 }
762                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
763                 /*
764                  * Fetch peer RxQ's information.
765                  * No need to pass the information of the current queue.
766                  */
767                 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
768                                                         NULL, &peer, 1);
769                 if (ret != 0) {
770                         mlx5_txq_release(dev, i);
771                         goto error;
772                 }
773                 /* Accessing its own device, inside mlx5 PMD. */
774                 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
775                 if (ret != 0) {
776                         mlx5_txq_release(dev, i);
777                         goto error;
778                 }
779                 /* Pass TxQ's information to peer RxQ and try binding. */
780                 cur.peer_q = rx_queue;
781                 cur.qp_id = txq_ctrl->obj->sq->id;
782                 cur.vhca_id = priv->config.hca_attr.vhca_id;
783                 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
784                 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
785                 /*
786                  * In order to access another device in a proper way, RTE level
787                  * private function is needed.
788                  */
789                 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
790                                                       &cur, 0);
791                 if (ret != 0) {
792                         mlx5_txq_release(dev, i);
793                         goto error;
794                 }
795                 mlx5_txq_release(dev, i);
796         }
797         return 0;
798 error:
799         /*
800          * Do roll-back process for the queues already bound.
801          * No need to check the return value of the queue unbind function.
802          */
803         do {
804                 /* No validation is needed here. */
805                 txq_ctrl = mlx5_txq_get(dev, i);
806                 if (txq_ctrl == NULL)
807                         continue;
808                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
809                 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
810                 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
811                 mlx5_txq_release(dev, i);
812         } while (i--);
813         return ret;
814 }
815
816 /*
817  * Unbind the hairpin port pair, HW configuration of both devices will be clear
818  * and status will be reset for all the queues used between the them.
819  * This function only supports to unbind the Tx from one Rx.
820  *
821  * @param dev
822  *   Pointer to Ethernet device structure.
823  * @param rx_port
824  *   Port identifier of the Rx port.
825  *
826  * @return
827  *   0 on success, a negative errno value otherwise and rte_errno is set.
828  */
829 static int
830 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
831 {
832         struct mlx5_priv *priv = dev->data->dev_private;
833         struct mlx5_txq_ctrl *txq_ctrl;
834         uint32_t i;
835         int ret;
836         uint16_t cur_port = priv->dev_data->port_id;
837
838         if (mlx5_eth_find_next(rx_port, priv->pci_dev) != rx_port) {
839                 rte_errno = ENODEV;
840                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
841                 return -rte_errno;
842         }
843         for (i = 0; i != priv->txqs_n; i++) {
844                 uint16_t rx_queue;
845
846                 txq_ctrl = mlx5_txq_get(dev, i);
847                 if (txq_ctrl == NULL)
848                         continue;
849                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
850                         mlx5_txq_release(dev, i);
851                         continue;
852                 }
853                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
854                         mlx5_txq_release(dev, i);
855                         continue;
856                 }
857                 /* Indeed, only the first used queue needs to be checked. */
858                 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
859                         if (cur_port != rx_port) {
860                                 rte_errno = EINVAL;
861                                 DRV_LOG(ERR, "port %u and port %u are in"
862                                         " auto-bind mode", cur_port, rx_port);
863                                 mlx5_txq_release(dev, i);
864                                 return -rte_errno;
865                         } else {
866                                 return 0;
867                         }
868                 }
869                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
870                 mlx5_txq_release(dev, i);
871                 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
872                 if (ret) {
873                         DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
874                                 rx_port, rx_queue);
875                         return ret;
876                 }
877                 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
878                 if (ret) {
879                         DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
880                                 cur_port, i);
881                         return ret;
882                 }
883         }
884         return 0;
885 }
886
887 /*
888  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
889  * @see mlx5_hairpin_bind_single_port()
890  */
891 int
892 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
893 {
894         int ret = 0;
895         uint16_t p, pp;
896         struct mlx5_priv *priv = dev->data->dev_private;
897
898         /*
899          * If the Rx port has no hairpin configuration with the current port,
900          * the binding will be skipped in the called function of single port.
901          * Device started status will be checked only before the queue
902          * information updating.
903          */
904         if (rx_port == RTE_MAX_ETHPORTS) {
905                 MLX5_ETH_FOREACH_DEV(p, priv->pci_dev) {
906                         ret = mlx5_hairpin_bind_single_port(dev, p);
907                         if (ret != 0)
908                                 goto unbind;
909                 }
910                 return ret;
911         } else {
912                 return mlx5_hairpin_bind_single_port(dev, rx_port);
913         }
914 unbind:
915         MLX5_ETH_FOREACH_DEV(pp, priv->pci_dev)
916                 if (pp < p)
917                         mlx5_hairpin_unbind_single_port(dev, pp);
918         return ret;
919 }
920
921 /*
922  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
923  * @see mlx5_hairpin_unbind_single_port()
924  */
925 int
926 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
927 {
928         int ret = 0;
929         uint16_t p;
930         struct mlx5_priv *priv = dev->data->dev_private;
931
932         if (rx_port == RTE_MAX_ETHPORTS)
933                 MLX5_ETH_FOREACH_DEV(p, priv->pci_dev) {
934                         ret = mlx5_hairpin_unbind_single_port(dev, p);
935                         if (ret != 0)
936                                 return ret;
937                 }
938         else
939                 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
940         return ret;
941 }
942
943 /*
944  * DPDK callback to get the hairpin peer ports list.
945  * This will return the actual number of peer ports and save the identifiers
946  * into the array (sorted, may be different from that when setting up the
947  * hairpin peer queues).
948  * The peer port ID could be the same as the port ID of the current device.
949  *
950  * @param dev
951  *   Pointer to Ethernet device structure.
952  * @param peer_ports
953  *   Pointer to array to save the port identifiers.
954  * @param len
955  *   The length of the array.
956  * @param direction
957  *   Current port to peer port direction.
958  *   positive - current used as Tx to get all peer Rx ports.
959  *   zero - current used as Rx to get all peer Tx ports.
960  *
961  * @return
962  *   0 or positive value on success, actual number of peer ports.
963  *   a negative errno value otherwise and rte_errno is set.
964  */
965 int
966 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
967                             size_t len, uint32_t direction)
968 {
969         struct mlx5_priv *priv = dev->data->dev_private;
970         struct mlx5_txq_ctrl *txq_ctrl;
971         struct mlx5_rxq_ctrl *rxq_ctrl;
972         uint32_t i;
973         uint16_t pp;
974         uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
975         int ret = 0;
976
977         if (direction) {
978                 for (i = 0; i < priv->txqs_n; i++) {
979                         txq_ctrl = mlx5_txq_get(dev, i);
980                         if (!txq_ctrl)
981                                 continue;
982                         if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
983                                 mlx5_txq_release(dev, i);
984                                 continue;
985                         }
986                         pp = txq_ctrl->hairpin_conf.peers[0].port;
987                         if (pp >= RTE_MAX_ETHPORTS) {
988                                 rte_errno = ERANGE;
989                                 mlx5_txq_release(dev, i);
990                                 DRV_LOG(ERR, "port %hu queue %u peer port "
991                                         "out of range %hu",
992                                         priv->dev_data->port_id, i, pp);
993                                 return -rte_errno;
994                         }
995                         bits[pp / 32] |= 1 << (pp % 32);
996                         mlx5_txq_release(dev, i);
997                 }
998         } else {
999                 for (i = 0; i < priv->rxqs_n; i++) {
1000                         rxq_ctrl = mlx5_rxq_get(dev, i);
1001                         if (!rxq_ctrl)
1002                                 continue;
1003                         if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
1004                                 mlx5_rxq_release(dev, i);
1005                                 continue;
1006                         }
1007                         pp = rxq_ctrl->hairpin_conf.peers[0].port;
1008                         if (pp >= RTE_MAX_ETHPORTS) {
1009                                 rte_errno = ERANGE;
1010                                 mlx5_rxq_release(dev, i);
1011                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1012                                         "out of range %hu",
1013                                         priv->dev_data->port_id, i, pp);
1014                                 return -rte_errno;
1015                         }
1016                         bits[pp / 32] |= 1 << (pp % 32);
1017                         mlx5_rxq_release(dev, i);
1018                 }
1019         }
1020         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1021                 if (bits[i / 32] & (1 << (i % 32))) {
1022                         if ((size_t)ret >= len) {
1023                                 rte_errno = E2BIG;
1024                                 return -rte_errno;
1025                         }
1026                         peer_ports[ret++] = i;
1027                 }
1028         }
1029         return ret;
1030 }
1031
1032 /**
1033  * DPDK callback to start the device.
1034  *
1035  * Simulate device start by attaching all configured flows.
1036  *
1037  * @param dev
1038  *   Pointer to Ethernet device structure.
1039  *
1040  * @return
1041  *   0 on success, a negative errno value otherwise and rte_errno is set.
1042  */
1043 int
1044 mlx5_dev_start(struct rte_eth_dev *dev)
1045 {
1046         struct mlx5_priv *priv = dev->data->dev_private;
1047         int ret;
1048         int fine_inline;
1049
1050         DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1051         fine_inline = rte_mbuf_dynflag_lookup
1052                 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1053         if (fine_inline >= 0)
1054                 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1055         else
1056                 rte_net_mlx5_dynf_inline_mask = 0;
1057         if (dev->data->nb_rx_queues > 0) {
1058                 ret = mlx5_dev_configure_rss_reta(dev);
1059                 if (ret) {
1060                         DRV_LOG(ERR, "port %u reta config failed: %s",
1061                                 dev->data->port_id, strerror(rte_errno));
1062                         return -rte_errno;
1063                 }
1064         }
1065         ret = mlx5_txpp_start(dev);
1066         if (ret) {
1067                 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1068                         dev->data->port_id, strerror(rte_errno));
1069                 goto error;
1070         }
1071         if ((priv->config.devx && priv->config.dv_flow_en &&
1072             priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1073                 ret = priv->obj_ops.lb_dummy_queue_create(dev);
1074                 if (ret)
1075                         goto error;
1076         }
1077         ret = mlx5_txq_start(dev);
1078         if (ret) {
1079                 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1080                         dev->data->port_id, strerror(rte_errno));
1081                 goto error;
1082         }
1083         ret = mlx5_rxq_start(dev);
1084         if (ret) {
1085                 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1086                         dev->data->port_id, strerror(rte_errno));
1087                 goto error;
1088         }
1089         /*
1090          * Such step will be skipped if there is no hairpin TX queue configured
1091          * with RX peer queue from the same device.
1092          */
1093         ret = mlx5_hairpin_auto_bind(dev);
1094         if (ret) {
1095                 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1096                         dev->data->port_id, strerror(rte_errno));
1097                 goto error;
1098         }
1099         /* Set started flag here for the following steps like control flow. */
1100         dev->data->dev_started = 1;
1101         ret = mlx5_rx_intr_vec_enable(dev);
1102         if (ret) {
1103                 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1104                         dev->data->port_id);
1105                 goto error;
1106         }
1107         mlx5_os_stats_init(dev);
1108         ret = mlx5_traffic_enable(dev);
1109         if (ret) {
1110                 DRV_LOG(ERR, "port %u failed to set defaults flows",
1111                         dev->data->port_id);
1112                 goto error;
1113         }
1114         /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1115         mlx5_flow_rxq_dynf_metadata_set(dev);
1116         /* Set flags and context to convert Rx timestamps. */
1117         mlx5_rxq_timestamp_set(dev);
1118         /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1119         mlx5_txq_dynf_timestamp_set(dev);
1120         /*
1121          * In non-cached mode, it only needs to start the default mreg copy
1122          * action and no flow created by application exists anymore.
1123          * But it is worth wrapping the interface for further usage.
1124          */
1125         ret = mlx5_flow_start_default(dev);
1126         if (ret) {
1127                 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1128                         dev->data->port_id, strerror(rte_errno));
1129                 goto error;
1130         }
1131         rte_wmb();
1132         dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1133         dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1134         /* Enable datapath on secondary process. */
1135         mlx5_mp_os_req_start_rxtx(dev);
1136         if (priv->sh->intr_handle.fd >= 0) {
1137                 priv->sh->port[priv->dev_port - 1].ih_port_id =
1138                                         (uint32_t)dev->data->port_id;
1139         } else {
1140                 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1141                         dev->data->port_id);
1142                 dev->data->dev_conf.intr_conf.lsc = 0;
1143                 dev->data->dev_conf.intr_conf.rmv = 0;
1144         }
1145         if (priv->sh->intr_handle_devx.fd >= 0)
1146                 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1147                                         (uint32_t)dev->data->port_id;
1148         return 0;
1149 error:
1150         ret = rte_errno; /* Save rte_errno before cleanup. */
1151         /* Rollback. */
1152         dev->data->dev_started = 0;
1153         mlx5_flow_stop_default(dev);
1154         mlx5_traffic_disable(dev);
1155         mlx5_txq_stop(dev);
1156         mlx5_rxq_stop(dev);
1157         if (priv->obj_ops.lb_dummy_queue_release)
1158                 priv->obj_ops.lb_dummy_queue_release(dev);
1159         mlx5_txpp_stop(dev); /* Stop last. */
1160         rte_errno = ret; /* Restore rte_errno. */
1161         return -rte_errno;
1162 }
1163
1164 /**
1165  * DPDK callback to stop the device.
1166  *
1167  * Simulate device stop by detaching all configured flows.
1168  *
1169  * @param dev
1170  *   Pointer to Ethernet device structure.
1171  */
1172 int
1173 mlx5_dev_stop(struct rte_eth_dev *dev)
1174 {
1175         struct mlx5_priv *priv = dev->data->dev_private;
1176
1177         dev->data->dev_started = 0;
1178         /* Prevent crashes when queues are still in use. */
1179         dev->rx_pkt_burst = removed_rx_burst;
1180         dev->tx_pkt_burst = removed_tx_burst;
1181         rte_wmb();
1182         /* Disable datapath on secondary process. */
1183         mlx5_mp_os_req_stop_rxtx(dev);
1184         rte_delay_us_sleep(1000 * priv->rxqs_n);
1185         DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1186         mlx5_flow_stop_default(dev);
1187         /* Control flows for default traffic can be removed firstly. */
1188         mlx5_traffic_disable(dev);
1189         /* All RX queue flags will be cleared in the flush interface. */
1190         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1191         mlx5_flow_meter_rxq_flush(dev);
1192         mlx5_rx_intr_vec_disable(dev);
1193         priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1194         priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1195         mlx5_txq_stop(dev);
1196         mlx5_rxq_stop(dev);
1197         if (priv->obj_ops.lb_dummy_queue_release)
1198                 priv->obj_ops.lb_dummy_queue_release(dev);
1199         mlx5_txpp_stop(dev);
1200
1201         return 0;
1202 }
1203
1204 /**
1205  * Enable traffic flows configured by control plane
1206  *
1207  * @param dev
1208  *   Pointer to Ethernet device private data.
1209  * @param dev
1210  *   Pointer to Ethernet device structure.
1211  *
1212  * @return
1213  *   0 on success, a negative errno value otherwise and rte_errno is set.
1214  */
1215 int
1216 mlx5_traffic_enable(struct rte_eth_dev *dev)
1217 {
1218         struct mlx5_priv *priv = dev->data->dev_private;
1219         struct rte_flow_item_eth bcast = {
1220                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1221         };
1222         struct rte_flow_item_eth ipv6_multi_spec = {
1223                 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1224         };
1225         struct rte_flow_item_eth ipv6_multi_mask = {
1226                 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1227         };
1228         struct rte_flow_item_eth unicast = {
1229                 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1230         };
1231         struct rte_flow_item_eth unicast_mask = {
1232                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1233         };
1234         const unsigned int vlan_filter_n = priv->vlan_filter_n;
1235         const struct rte_ether_addr cmp = {
1236                 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1237         };
1238         unsigned int i;
1239         unsigned int j;
1240         int ret;
1241
1242         /*
1243          * Hairpin txq default flow should be created no matter if it is
1244          * isolation mode. Or else all the packets to be sent will be sent
1245          * out directly without the TX flow actions, e.g. encapsulation.
1246          */
1247         for (i = 0; i != priv->txqs_n; ++i) {
1248                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1249                 if (!txq_ctrl)
1250                         continue;
1251                 /* Only Tx implicit mode requires the default Tx flow. */
1252                 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1253                     txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1254                     txq_ctrl->hairpin_conf.peers[0].port ==
1255                     priv->dev_data->port_id) {
1256                         ret = mlx5_ctrl_flow_source_queue(dev, i);
1257                         if (ret) {
1258                                 mlx5_txq_release(dev, i);
1259                                 goto error;
1260                         }
1261                 }
1262                 mlx5_txq_release(dev, i);
1263         }
1264         if (priv->config.dv_esw_en && !priv->config.vf) {
1265                 if (mlx5_flow_create_esw_table_zero_flow(dev))
1266                         priv->fdb_def_rule = 1;
1267                 else
1268                         DRV_LOG(INFO, "port %u FDB default rule cannot be"
1269                                 " configured - only Eswitch group 0 flows are"
1270                                 " supported.", dev->data->port_id);
1271         }
1272         if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1273                 ret = mlx5_flow_lacp_miss(dev);
1274                 if (ret)
1275                         DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1276                                 "forward LACP to kernel.", dev->data->port_id);
1277                 else
1278                         DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1279                                 , dev->data->port_id);
1280         }
1281         if (priv->isolated)
1282                 return 0;
1283         if (dev->data->promiscuous) {
1284                 struct rte_flow_item_eth promisc = {
1285                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1286                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1287                         .type = 0,
1288                 };
1289
1290                 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1291                 if (ret)
1292                         goto error;
1293         }
1294         if (dev->data->all_multicast) {
1295                 struct rte_flow_item_eth multicast = {
1296                         .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1297                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1298                         .type = 0,
1299                 };
1300
1301                 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1302                 if (ret)
1303                         goto error;
1304         } else {
1305                 /* Add broadcast/multicast flows. */
1306                 for (i = 0; i != vlan_filter_n; ++i) {
1307                         uint16_t vlan = priv->vlan_filter[i];
1308
1309                         struct rte_flow_item_vlan vlan_spec = {
1310                                 .tci = rte_cpu_to_be_16(vlan),
1311                         };
1312                         struct rte_flow_item_vlan vlan_mask =
1313                                 rte_flow_item_vlan_mask;
1314
1315                         ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1316                                                   &vlan_spec, &vlan_mask);
1317                         if (ret)
1318                                 goto error;
1319                         ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1320                                                   &ipv6_multi_mask,
1321                                                   &vlan_spec, &vlan_mask);
1322                         if (ret)
1323                                 goto error;
1324                 }
1325                 if (!vlan_filter_n) {
1326                         ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1327                         if (ret)
1328                                 goto error;
1329                         ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1330                                              &ipv6_multi_mask);
1331                         if (ret) {
1332                                 /* Do not fail on IPv6 broadcast creation failure. */
1333                                 DRV_LOG(WARNING,
1334                                         "IPv6 broadcast is not supported");
1335                                 ret = 0;
1336                         }
1337                 }
1338         }
1339         /* Add MAC address flows. */
1340         for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1341                 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1342
1343                 if (!memcmp(mac, &cmp, sizeof(*mac)))
1344                         continue;
1345                 memcpy(&unicast.dst.addr_bytes,
1346                        mac->addr_bytes,
1347                        RTE_ETHER_ADDR_LEN);
1348                 for (j = 0; j != vlan_filter_n; ++j) {
1349                         uint16_t vlan = priv->vlan_filter[j];
1350
1351                         struct rte_flow_item_vlan vlan_spec = {
1352                                 .tci = rte_cpu_to_be_16(vlan),
1353                         };
1354                         struct rte_flow_item_vlan vlan_mask =
1355                                 rte_flow_item_vlan_mask;
1356
1357                         ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1358                                                   &unicast_mask,
1359                                                   &vlan_spec,
1360                                                   &vlan_mask);
1361                         if (ret)
1362                                 goto error;
1363                 }
1364                 if (!vlan_filter_n) {
1365                         ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1366                         if (ret)
1367                                 goto error;
1368                 }
1369         }
1370         return 0;
1371 error:
1372         ret = rte_errno; /* Save rte_errno before cleanup. */
1373         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1374         rte_errno = ret; /* Restore rte_errno. */
1375         return -rte_errno;
1376 }
1377
1378
1379 /**
1380  * Disable traffic flows configured by control plane
1381  *
1382  * @param dev
1383  *   Pointer to Ethernet device private data.
1384  */
1385 void
1386 mlx5_traffic_disable(struct rte_eth_dev *dev)
1387 {
1388         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1389 }
1390
1391 /**
1392  * Restart traffic flows configured by control plane
1393  *
1394  * @param dev
1395  *   Pointer to Ethernet device private data.
1396  *
1397  * @return
1398  *   0 on success, a negative errno value otherwise and rte_errno is set.
1399  */
1400 int
1401 mlx5_traffic_restart(struct rte_eth_dev *dev)
1402 {
1403         if (dev->data->dev_started) {
1404                 mlx5_traffic_disable(dev);
1405                 return mlx5_traffic_enable(dev);
1406         }
1407         return 0;
1408 }