cba736b4bc0bcbc258a1aae0f3da7fb75a964611
[dpdk.git] / drivers / net / mlx5 / mlx5_trigger.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5
6 #include <unistd.h>
7
8 #include <rte_ether.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12
13 #include <mlx5_malloc.h>
14
15 #include "mlx5.h"
16 #include "mlx5_mr.h"
17 #include "mlx5_rxtx.h"
18 #include "mlx5_utils.h"
19 #include "rte_pmd_mlx5.h"
20
21 /**
22  * Stop traffic on Tx queues.
23  *
24  * @param dev
25  *   Pointer to Ethernet device structure.
26  */
27 static void
28 mlx5_txq_stop(struct rte_eth_dev *dev)
29 {
30         struct mlx5_priv *priv = dev->data->dev_private;
31         unsigned int i;
32
33         for (i = 0; i != priv->txqs_n; ++i)
34                 mlx5_txq_release(dev, i);
35 }
36
37 /**
38  * Start traffic on Tx queues.
39  *
40  * @param dev
41  *   Pointer to Ethernet device structure.
42  *
43  * @return
44  *   0 on success, a negative errno value otherwise and rte_errno is set.
45  */
46 static int
47 mlx5_txq_start(struct rte_eth_dev *dev)
48 {
49         struct mlx5_priv *priv = dev->data->dev_private;
50         unsigned int i;
51         int ret;
52
53         for (i = 0; i != priv->txqs_n; ++i) {
54                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
55                 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
56                 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
57
58                 if (!txq_ctrl)
59                         continue;
60                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
61                         txq_alloc_elts(txq_ctrl);
62                 MLX5_ASSERT(!txq_ctrl->obj);
63                 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
64                                             0, txq_ctrl->socket);
65                 if (!txq_ctrl->obj) {
66                         DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
67                                 "memory resources.", dev->data->port_id,
68                                 txq_data->idx);
69                         rte_errno = ENOMEM;
70                         goto error;
71                 }
72                 ret = priv->obj_ops.txq_obj_new(dev, i);
73                 if (ret < 0) {
74                         mlx5_free(txq_ctrl->obj);
75                         txq_ctrl->obj = NULL;
76                         goto error;
77                 }
78                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
79                         size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
80                         txq_data->fcqs = mlx5_malloc(flags, size,
81                                                      RTE_CACHE_LINE_SIZE,
82                                                      txq_ctrl->socket);
83                         if (!txq_data->fcqs) {
84                                 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
85                                         "allocate memory (FCQ).",
86                                         dev->data->port_id, i);
87                                 rte_errno = ENOMEM;
88                                 goto error;
89                         }
90                 }
91                 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
92                         dev->data->port_id, i, (void *)&txq_ctrl->obj);
93                 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
94         }
95         return 0;
96 error:
97         ret = rte_errno; /* Save rte_errno before cleanup. */
98         do {
99                 mlx5_txq_release(dev, i);
100         } while (i-- != 0);
101         rte_errno = ret; /* Restore rte_errno. */
102         return -rte_errno;
103 }
104
105 /**
106  * Stop traffic on Rx queues.
107  *
108  * @param dev
109  *   Pointer to Ethernet device structure.
110  */
111 static void
112 mlx5_rxq_stop(struct rte_eth_dev *dev)
113 {
114         struct mlx5_priv *priv = dev->data->dev_private;
115         unsigned int i;
116
117         for (i = 0; i != priv->rxqs_n; ++i)
118                 mlx5_rxq_release(dev, i);
119 }
120
121 /**
122  * Start traffic on Rx queues.
123  *
124  * @param dev
125  *   Pointer to Ethernet device structure.
126  *
127  * @return
128  *   0 on success, a negative errno value otherwise and rte_errno is set.
129  */
130 static int
131 mlx5_rxq_start(struct rte_eth_dev *dev)
132 {
133         struct mlx5_priv *priv = dev->data->dev_private;
134         unsigned int i;
135         int ret = 0;
136
137         /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
138         if (mlx5_mprq_alloc_mp(dev)) {
139                 /* Should not release Rx queues but return immediately. */
140                 return -rte_errno;
141         }
142         DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
143                 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
144         DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
145                 dev->data->port_id, priv->sh->device_attr.max_sge);
146         for (i = 0; i != priv->rxqs_n; ++i) {
147                 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
148
149                 if (!rxq_ctrl)
150                         continue;
151                 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
152                         /* Pre-register Rx mempools. */
153                         if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
154                                 mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl,
155                                                   rxq_ctrl->rxq.mprq_mp);
156                         } else {
157                                 uint32_t s;
158
159                                 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++)
160                                         mlx5_mr_update_mp
161                                                 (dev, &rxq_ctrl->rxq.mr_ctrl,
162                                                 rxq_ctrl->rxq.rxseg[s].mp);
163                         }
164                         ret = rxq_alloc_elts(rxq_ctrl);
165                         if (ret)
166                                 goto error;
167                 }
168                 MLX5_ASSERT(!rxq_ctrl->obj);
169                 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
170                                             sizeof(*rxq_ctrl->obj), 0,
171                                             rxq_ctrl->socket);
172                 if (!rxq_ctrl->obj) {
173                         DRV_LOG(ERR,
174                                 "Port %u Rx queue %u can't allocate resources.",
175                                 dev->data->port_id, (*priv->rxqs)[i]->idx);
176                         rte_errno = ENOMEM;
177                         goto error;
178                 }
179                 ret = priv->obj_ops.rxq_obj_new(dev, i);
180                 if (ret) {
181                         mlx5_free(rxq_ctrl->obj);
182                         goto error;
183                 }
184                 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
185                         dev->data->port_id, i, (void *)&rxq_ctrl->obj);
186                 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
187         }
188         return 0;
189 error:
190         ret = rte_errno; /* Save rte_errno before cleanup. */
191         do {
192                 mlx5_rxq_release(dev, i);
193         } while (i-- != 0);
194         rte_errno = ret; /* Restore rte_errno. */
195         return -rte_errno;
196 }
197
198 /**
199  * Binds Tx queues to Rx queues for hairpin.
200  *
201  * Binds Tx queues to the target Rx queues.
202  *
203  * @param dev
204  *   Pointer to Ethernet device structure.
205  *
206  * @return
207  *   0 on success, a negative errno value otherwise and rte_errno is set.
208  */
209 static int
210 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
211 {
212         struct mlx5_priv *priv = dev->data->dev_private;
213         struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
214         struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
215         struct mlx5_txq_ctrl *txq_ctrl;
216         struct mlx5_rxq_ctrl *rxq_ctrl;
217         struct mlx5_devx_obj *sq;
218         struct mlx5_devx_obj *rq;
219         unsigned int i;
220         int ret = 0;
221
222         for (i = 0; i != priv->txqs_n; ++i) {
223                 txq_ctrl = mlx5_txq_get(dev, i);
224                 if (!txq_ctrl)
225                         continue;
226                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
227                         mlx5_txq_release(dev, i);
228                         continue;
229                 }
230                 if (!txq_ctrl->obj) {
231                         rte_errno = ENOMEM;
232                         DRV_LOG(ERR, "port %u no txq object found: %d",
233                                 dev->data->port_id, i);
234                         mlx5_txq_release(dev, i);
235                         return -rte_errno;
236                 }
237                 sq = txq_ctrl->obj->sq;
238                 rxq_ctrl = mlx5_rxq_get(dev,
239                                         txq_ctrl->hairpin_conf.peers[0].queue);
240                 if (!rxq_ctrl) {
241                         mlx5_txq_release(dev, i);
242                         rte_errno = EINVAL;
243                         DRV_LOG(ERR, "port %u no rxq object found: %d",
244                                 dev->data->port_id,
245                                 txq_ctrl->hairpin_conf.peers[0].queue);
246                         return -rte_errno;
247                 }
248                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
249                     rxq_ctrl->hairpin_conf.peers[0].queue != i) {
250                         rte_errno = ENOMEM;
251                         DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
252                                 "Rx queue %d", dev->data->port_id,
253                                 i, txq_ctrl->hairpin_conf.peers[0].queue);
254                         goto error;
255                 }
256                 rq = rxq_ctrl->obj->rq;
257                 if (!rq) {
258                         rte_errno = ENOMEM;
259                         DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
260                                 dev->data->port_id,
261                                 txq_ctrl->hairpin_conf.peers[0].queue);
262                         goto error;
263                 }
264                 sq_attr.state = MLX5_SQC_STATE_RDY;
265                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
266                 sq_attr.hairpin_peer_rq = rq->id;
267                 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
268                 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
269                 if (ret)
270                         goto error;
271                 rq_attr.state = MLX5_SQC_STATE_RDY;
272                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
273                 rq_attr.hairpin_peer_sq = sq->id;
274                 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
275                 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
276                 if (ret)
277                         goto error;
278                 mlx5_txq_release(dev, i);
279                 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
280         }
281         return 0;
282 error:
283         mlx5_txq_release(dev, i);
284         mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
285         return -rte_errno;
286 }
287
288 /*
289  * Fetch the peer queue's SW & HW information.
290  *
291  * @param dev
292  *   Pointer to Ethernet device structure.
293  * @param peer_queue
294  *   Index of the queue to fetch the information.
295  * @param current_info
296  *   Pointer to the input peer information, not used currently.
297  * @param peer_info
298  *   Pointer to the structure to store the information, output.
299  * @param direction
300  *   Positive to get the RxQ information, zero to get the TxQ information.
301  *
302  * @return
303  *   0 on success, a negative errno value otherwise and rte_errno is set.
304  */
305 int
306 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
307                                struct rte_hairpin_peer_info *current_info,
308                                struct rte_hairpin_peer_info *peer_info,
309                                uint32_t direction)
310 {
311         struct mlx5_priv *priv = dev->data->dev_private;
312         RTE_SET_USED(current_info);
313
314         if (dev->data->dev_started == 0) {
315                 rte_errno = EBUSY;
316                 DRV_LOG(ERR, "peer port %u is not started",
317                         dev->data->port_id);
318                 return -rte_errno;
319         }
320         /*
321          * Peer port used as egress. In the current design, hairpin Tx queue
322          * will be bound to the peer Rx queue. Indeed, only the information of
323          * peer Rx queue needs to be fetched.
324          */
325         if (direction == 0) {
326                 struct mlx5_txq_ctrl *txq_ctrl;
327
328                 txq_ctrl = mlx5_txq_get(dev, peer_queue);
329                 if (txq_ctrl == NULL) {
330                         rte_errno = EINVAL;
331                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
332                                 dev->data->port_id, peer_queue);
333                         return -rte_errno;
334                 }
335                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
336                         rte_errno = EINVAL;
337                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
338                                 dev->data->port_id, peer_queue);
339                         mlx5_txq_release(dev, peer_queue);
340                         return -rte_errno;
341                 }
342                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
343                         rte_errno = ENOMEM;
344                         DRV_LOG(ERR, "port %u no Txq object found: %d",
345                                 dev->data->port_id, peer_queue);
346                         mlx5_txq_release(dev, peer_queue);
347                         return -rte_errno;
348                 }
349                 peer_info->qp_id = txq_ctrl->obj->sq->id;
350                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
351                 /* 1-to-1 mapping, only the first one is used. */
352                 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
353                 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
354                 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
355                 mlx5_txq_release(dev, peer_queue);
356         } else { /* Peer port used as ingress. */
357                 struct mlx5_rxq_ctrl *rxq_ctrl;
358
359                 rxq_ctrl = mlx5_rxq_get(dev, peer_queue);
360                 if (rxq_ctrl == NULL) {
361                         rte_errno = EINVAL;
362                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
363                                 dev->data->port_id, peer_queue);
364                         return -rte_errno;
365                 }
366                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
367                         rte_errno = EINVAL;
368                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
369                                 dev->data->port_id, peer_queue);
370                         mlx5_rxq_release(dev, peer_queue);
371                         return -rte_errno;
372                 }
373                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
374                         rte_errno = ENOMEM;
375                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
376                                 dev->data->port_id, peer_queue);
377                         mlx5_rxq_release(dev, peer_queue);
378                         return -rte_errno;
379                 }
380                 peer_info->qp_id = rxq_ctrl->obj->rq->id;
381                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
382                 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
383                 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
384                 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
385                 mlx5_rxq_release(dev, peer_queue);
386         }
387         return 0;
388 }
389
390 /*
391  * Bind the hairpin queue with the peer HW information.
392  * This needs to be called twice both for Tx and Rx queues of a pair.
393  * If the queue is already bound, it is considered successful.
394  *
395  * @param dev
396  *   Pointer to Ethernet device structure.
397  * @param cur_queue
398  *   Index of the queue to change the HW configuration to bind.
399  * @param peer_info
400  *   Pointer to information of the peer queue.
401  * @param direction
402  *   Positive to configure the TxQ, zero to configure the RxQ.
403  *
404  * @return
405  *   0 on success, a negative errno value otherwise and rte_errno is set.
406  */
407 int
408 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
409                              struct rte_hairpin_peer_info *peer_info,
410                              uint32_t direction)
411 {
412         int ret = 0;
413
414         /*
415          * Consistency checking of the peer queue: opposite direction is used
416          * to get the peer queue info with ethdev port ID, no need to check.
417          */
418         if (peer_info->peer_q != cur_queue) {
419                 rte_errno = EINVAL;
420                 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
421                         dev->data->port_id, cur_queue, peer_info->peer_q);
422                 return -rte_errno;
423         }
424         if (direction != 0) {
425                 struct mlx5_txq_ctrl *txq_ctrl;
426                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
427
428                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
429                 if (txq_ctrl == NULL) {
430                         rte_errno = EINVAL;
431                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
432                                 dev->data->port_id, cur_queue);
433                         return -rte_errno;
434                 }
435                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
436                         rte_errno = EINVAL;
437                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
438                                 dev->data->port_id, cur_queue);
439                         mlx5_txq_release(dev, cur_queue);
440                         return -rte_errno;
441                 }
442                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
443                         rte_errno = ENOMEM;
444                         DRV_LOG(ERR, "port %u no Txq object found: %d",
445                                 dev->data->port_id, cur_queue);
446                         mlx5_txq_release(dev, cur_queue);
447                         return -rte_errno;
448                 }
449                 if (txq_ctrl->hairpin_status != 0) {
450                         DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
451                                 dev->data->port_id, cur_queue);
452                         mlx5_txq_release(dev, cur_queue);
453                         return 0;
454                 }
455                 /*
456                  * All queues' of one port consistency checking is done in the
457                  * bind() function, and that is optional.
458                  */
459                 if (peer_info->tx_explicit !=
460                     txq_ctrl->hairpin_conf.tx_explicit) {
461                         rte_errno = EINVAL;
462                         DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
463                                 " mismatch", dev->data->port_id, cur_queue);
464                         mlx5_txq_release(dev, cur_queue);
465                         return -rte_errno;
466                 }
467                 if (peer_info->manual_bind !=
468                     txq_ctrl->hairpin_conf.manual_bind) {
469                         rte_errno = EINVAL;
470                         DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
471                                 " mismatch", dev->data->port_id, cur_queue);
472                         mlx5_txq_release(dev, cur_queue);
473                         return -rte_errno;
474                 }
475                 sq_attr.state = MLX5_SQC_STATE_RDY;
476                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
477                 sq_attr.hairpin_peer_rq = peer_info->qp_id;
478                 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
479                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
480                 if (ret == 0)
481                         txq_ctrl->hairpin_status = 1;
482                 mlx5_txq_release(dev, cur_queue);
483         } else {
484                 struct mlx5_rxq_ctrl *rxq_ctrl;
485                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
486
487                 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
488                 if (rxq_ctrl == NULL) {
489                         rte_errno = EINVAL;
490                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
491                                 dev->data->port_id, cur_queue);
492                         return -rte_errno;
493                 }
494                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
495                         rte_errno = EINVAL;
496                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
497                                 dev->data->port_id, cur_queue);
498                         mlx5_rxq_release(dev, cur_queue);
499                         return -rte_errno;
500                 }
501                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
502                         rte_errno = ENOMEM;
503                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
504                                 dev->data->port_id, cur_queue);
505                         mlx5_rxq_release(dev, cur_queue);
506                         return -rte_errno;
507                 }
508                 if (rxq_ctrl->hairpin_status != 0) {
509                         DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
510                                 dev->data->port_id, cur_queue);
511                         mlx5_rxq_release(dev, cur_queue);
512                         return 0;
513                 }
514                 if (peer_info->tx_explicit !=
515                     rxq_ctrl->hairpin_conf.tx_explicit) {
516                         rte_errno = EINVAL;
517                         DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
518                                 " mismatch", dev->data->port_id, cur_queue);
519                         mlx5_rxq_release(dev, cur_queue);
520                         return -rte_errno;
521                 }
522                 if (peer_info->manual_bind !=
523                     rxq_ctrl->hairpin_conf.manual_bind) {
524                         rte_errno = EINVAL;
525                         DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
526                                 " mismatch", dev->data->port_id, cur_queue);
527                         mlx5_rxq_release(dev, cur_queue);
528                         return -rte_errno;
529                 }
530                 rq_attr.state = MLX5_SQC_STATE_RDY;
531                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
532                 rq_attr.hairpin_peer_sq = peer_info->qp_id;
533                 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
534                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
535                 if (ret == 0)
536                         rxq_ctrl->hairpin_status = 1;
537                 mlx5_rxq_release(dev, cur_queue);
538         }
539         return ret;
540 }
541
542 /*
543  * Unbind the hairpin queue and reset its HW configuration.
544  * This needs to be called twice both for Tx and Rx queues of a pair.
545  * If the queue is already unbound, it is considered successful.
546  *
547  * @param dev
548  *   Pointer to Ethernet device structure.
549  * @param cur_queue
550  *   Index of the queue to change the HW configuration to unbind.
551  * @param direction
552  *   Positive to reset the TxQ, zero to reset the RxQ.
553  *
554  * @return
555  *   0 on success, a negative errno value otherwise and rte_errno is set.
556  */
557 int
558 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
559                                uint32_t direction)
560 {
561         int ret = 0;
562
563         if (direction != 0) {
564                 struct mlx5_txq_ctrl *txq_ctrl;
565                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
566
567                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
568                 if (txq_ctrl == NULL) {
569                         rte_errno = EINVAL;
570                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
571                                 dev->data->port_id, cur_queue);
572                         return -rte_errno;
573                 }
574                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
575                         rte_errno = EINVAL;
576                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
577                                 dev->data->port_id, cur_queue);
578                         mlx5_txq_release(dev, cur_queue);
579                         return -rte_errno;
580                 }
581                 /* Already unbound, return success before obj checking. */
582                 if (txq_ctrl->hairpin_status == 0) {
583                         DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
584                                 dev->data->port_id, cur_queue);
585                         mlx5_txq_release(dev, cur_queue);
586                         return 0;
587                 }
588                 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
589                         rte_errno = ENOMEM;
590                         DRV_LOG(ERR, "port %u no Txq object found: %d",
591                                 dev->data->port_id, cur_queue);
592                         mlx5_txq_release(dev, cur_queue);
593                         return -rte_errno;
594                 }
595                 sq_attr.state = MLX5_SQC_STATE_RST;
596                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
597                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
598                 if (ret == 0)
599                         txq_ctrl->hairpin_status = 0;
600                 mlx5_txq_release(dev, cur_queue);
601         } else {
602                 struct mlx5_rxq_ctrl *rxq_ctrl;
603                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
604
605                 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
606                 if (rxq_ctrl == NULL) {
607                         rte_errno = EINVAL;
608                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
609                                 dev->data->port_id, cur_queue);
610                         return -rte_errno;
611                 }
612                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
613                         rte_errno = EINVAL;
614                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
615                                 dev->data->port_id, cur_queue);
616                         mlx5_rxq_release(dev, cur_queue);
617                         return -rte_errno;
618                 }
619                 if (rxq_ctrl->hairpin_status == 0) {
620                         DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
621                                 dev->data->port_id, cur_queue);
622                         mlx5_rxq_release(dev, cur_queue);
623                         return 0;
624                 }
625                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
626                         rte_errno = ENOMEM;
627                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
628                                 dev->data->port_id, cur_queue);
629                         mlx5_rxq_release(dev, cur_queue);
630                         return -rte_errno;
631                 }
632                 rq_attr.state = MLX5_SQC_STATE_RST;
633                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
634                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
635                 if (ret == 0)
636                         rxq_ctrl->hairpin_status = 0;
637                 mlx5_rxq_release(dev, cur_queue);
638         }
639         return ret;
640 }
641
642 /*
643  * Bind the hairpin port pairs, from the Tx to the peer Rx.
644  * This function only supports to bind the Tx to one Rx.
645  *
646  * @param dev
647  *   Pointer to Ethernet device structure.
648  * @param rx_port
649  *   Port identifier of the Rx port.
650  *
651  * @return
652  *   0 on success, a negative errno value otherwise and rte_errno is set.
653  */
654 static int
655 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
656 {
657         struct mlx5_priv *priv = dev->data->dev_private;
658         int ret = 0;
659         struct mlx5_txq_ctrl *txq_ctrl;
660         uint32_t i;
661         struct rte_hairpin_peer_info peer = {0xffffff};
662         struct rte_hairpin_peer_info cur;
663         const struct rte_eth_hairpin_conf *conf;
664         uint16_t num_q = 0;
665         uint16_t local_port = priv->dev_data->port_id;
666         uint32_t manual;
667         uint32_t explicit;
668         uint16_t rx_queue;
669
670         if (mlx5_eth_find_next(rx_port, priv->pci_dev) != rx_port) {
671                 rte_errno = ENODEV;
672                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
673                 return -rte_errno;
674         }
675         /*
676          * Before binding TxQ to peer RxQ, first round loop will be used for
677          * checking the queues' configuration consistency. This would be a
678          * little time consuming but better than doing the rollback.
679          */
680         for (i = 0; i != priv->txqs_n; i++) {
681                 txq_ctrl = mlx5_txq_get(dev, i);
682                 if (txq_ctrl == NULL)
683                         continue;
684                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
685                         mlx5_txq_release(dev, i);
686                         continue;
687                 }
688                 /*
689                  * All hairpin Tx queues of a single port that connected to the
690                  * same peer Rx port should have the same "auto binding" and
691                  * "implicit Tx flow" modes.
692                  * Peer consistency checking will be done in per queue binding.
693                  */
694                 conf = &txq_ctrl->hairpin_conf;
695                 if (conf->peers[0].port == rx_port) {
696                         if (num_q == 0) {
697                                 manual = conf->manual_bind;
698                                 explicit = conf->tx_explicit;
699                         } else {
700                                 if (manual != conf->manual_bind ||
701                                     explicit != conf->tx_explicit) {
702                                         rte_errno = EINVAL;
703                                         DRV_LOG(ERR, "port %u queue %d mode"
704                                                 " mismatch: %u %u, %u %u",
705                                                 local_port, i, manual,
706                                                 conf->manual_bind, explicit,
707                                                 conf->tx_explicit);
708                                         mlx5_txq_release(dev, i);
709                                         return -rte_errno;
710                                 }
711                         }
712                         num_q++;
713                 }
714                 mlx5_txq_release(dev, i);
715         }
716         /* Once no queue is configured, success is returned directly. */
717         if (num_q == 0)
718                 return ret;
719         /* All the hairpin TX queues need to be traversed again. */
720         for (i = 0; i != priv->txqs_n; i++) {
721                 txq_ctrl = mlx5_txq_get(dev, i);
722                 if (txq_ctrl == NULL)
723                         continue;
724                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
725                         mlx5_txq_release(dev, i);
726                         continue;
727                 }
728                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
729                         mlx5_txq_release(dev, i);
730                         continue;
731                 }
732                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
733                 /*
734                  * Fetch peer RxQ's information.
735                  * No need to pass the information of the current queue.
736                  */
737                 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
738                                                         NULL, &peer, 1);
739                 if (ret != 0) {
740                         mlx5_txq_release(dev, i);
741                         goto error;
742                 }
743                 /* Accessing its own device, inside mlx5 PMD. */
744                 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
745                 if (ret != 0) {
746                         mlx5_txq_release(dev, i);
747                         goto error;
748                 }
749                 /* Pass TxQ's information to peer RxQ and try binding. */
750                 cur.peer_q = rx_queue;
751                 cur.qp_id = txq_ctrl->obj->sq->id;
752                 cur.vhca_id = priv->config.hca_attr.vhca_id;
753                 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
754                 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
755                 /*
756                  * In order to access another device in a proper way, RTE level
757                  * private function is needed.
758                  */
759                 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
760                                                       &cur, 0);
761                 if (ret != 0) {
762                         mlx5_txq_release(dev, i);
763                         goto error;
764                 }
765                 mlx5_txq_release(dev, i);
766         }
767         return 0;
768 error:
769         /*
770          * Do roll-back process for the queues already bound.
771          * No need to check the return value of the queue unbind function.
772          */
773         do {
774                 /* No validation is needed here. */
775                 txq_ctrl = mlx5_txq_get(dev, i);
776                 if (txq_ctrl == NULL)
777                         continue;
778                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
779                 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
780                 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
781                 mlx5_txq_release(dev, i);
782         } while (i--);
783         return ret;
784 }
785
786 /*
787  * Unbind the hairpin port pair, HW configuration of both devices will be clear
788  * and status will be reset for all the queues used between the them.
789  * This function only supports to unbind the Tx from one Rx.
790  *
791  * @param dev
792  *   Pointer to Ethernet device structure.
793  * @param rx_port
794  *   Port identifier of the Rx port.
795  *
796  * @return
797  *   0 on success, a negative errno value otherwise and rte_errno is set.
798  */
799 static int
800 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
801 {
802         struct mlx5_priv *priv = dev->data->dev_private;
803         struct mlx5_txq_ctrl *txq_ctrl;
804         uint32_t i;
805         int ret;
806         uint16_t cur_port = priv->dev_data->port_id;
807
808         if (mlx5_eth_find_next(rx_port, priv->pci_dev) != rx_port) {
809                 rte_errno = ENODEV;
810                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
811                 return -rte_errno;
812         }
813         for (i = 0; i != priv->txqs_n; i++) {
814                 uint16_t rx_queue;
815
816                 txq_ctrl = mlx5_txq_get(dev, i);
817                 if (txq_ctrl == NULL)
818                         continue;
819                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
820                         mlx5_txq_release(dev, i);
821                         continue;
822                 }
823                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
824                         mlx5_txq_release(dev, i);
825                         continue;
826                 }
827                 /* Indeed, only the first used queue needs to be checked. */
828                 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
829                         if (cur_port != rx_port) {
830                                 rte_errno = EINVAL;
831                                 DRV_LOG(ERR, "port %u and port %u are in"
832                                         " auto-bind mode", cur_port, rx_port);
833                                 mlx5_txq_release(dev, i);
834                                 return -rte_errno;
835                         } else {
836                                 return 0;
837                         }
838                 }
839                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
840                 mlx5_txq_release(dev, i);
841                 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
842                 if (ret) {
843                         DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
844                                 rx_port, rx_queue);
845                         return ret;
846                 }
847                 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
848                 if (ret) {
849                         DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
850                                 cur_port, i);
851                         return ret;
852                 }
853         }
854         return 0;
855 }
856
857 /*
858  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
859  * @see mlx5_hairpin_bind_single_port()
860  */
861 int
862 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
863 {
864         int ret = 0;
865         uint16_t p, pp;
866         struct mlx5_priv *priv = dev->data->dev_private;
867
868         /*
869          * If the Rx port has no hairpin configuration with the current port,
870          * the binding will be skipped in the called function of single port.
871          * Device started status will be checked only before the queue
872          * information updating.
873          */
874         if (rx_port == RTE_MAX_ETHPORTS) {
875                 MLX5_ETH_FOREACH_DEV(p, priv->pci_dev) {
876                         ret = mlx5_hairpin_bind_single_port(dev, p);
877                         if (ret != 0)
878                                 goto unbind;
879                 }
880                 return ret;
881         } else {
882                 return mlx5_hairpin_bind_single_port(dev, rx_port);
883         }
884 unbind:
885         MLX5_ETH_FOREACH_DEV(pp, priv->pci_dev)
886                 if (pp < p)
887                         mlx5_hairpin_unbind_single_port(dev, pp);
888         return ret;
889 }
890
891 /*
892  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
893  * @see mlx5_hairpin_unbind_single_port()
894  */
895 int
896 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
897 {
898         int ret = 0;
899         uint16_t p;
900         struct mlx5_priv *priv = dev->data->dev_private;
901
902         if (rx_port == RTE_MAX_ETHPORTS)
903                 MLX5_ETH_FOREACH_DEV(p, priv->pci_dev) {
904                         ret = mlx5_hairpin_unbind_single_port(dev, p);
905                         if (ret != 0)
906                                 return ret;
907                 }
908         else
909                 ret = mlx5_hairpin_bind_single_port(dev, rx_port);
910         return ret;
911 }
912
913 /*
914  * DPDK callback to get the hairpin peer ports list.
915  * This will return the actual number of peer ports and save the identifiers
916  * into the array (sorted, may be different from that when setting up the
917  * hairpin peer queues).
918  * The peer port ID could be the same as the port ID of the current device.
919  *
920  * @param dev
921  *   Pointer to Ethernet device structure.
922  * @param peer_ports
923  *   Pointer to array to save the port identifiers.
924  * @param len
925  *   The length of the array.
926  * @param direction
927  *   Current port to peer port direction.
928  *   positive - current used as Tx to get all peer Rx ports.
929  *   zero - current used as Rx to get all peer Tx ports.
930  *
931  * @return
932  *   0 or positive value on success, actual number of peer ports.
933  *   a negative errno value otherwise and rte_errno is set.
934  */
935 int
936 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
937                             size_t len, uint32_t direction)
938 {
939         struct mlx5_priv *priv = dev->data->dev_private;
940         struct mlx5_txq_ctrl *txq_ctrl;
941         struct mlx5_rxq_ctrl *rxq_ctrl;
942         uint32_t i;
943         uint16_t pp;
944         uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
945         int ret = 0;
946
947         if (direction) {
948                 for (i = 0; i < priv->txqs_n; i++) {
949                         txq_ctrl = mlx5_txq_get(dev, i);
950                         if (!txq_ctrl)
951                                 continue;
952                         if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
953                                 mlx5_txq_release(dev, i);
954                                 continue;
955                         }
956                         pp = txq_ctrl->hairpin_conf.peers[0].port;
957                         if (pp >= RTE_MAX_ETHPORTS) {
958                                 rte_errno = ERANGE;
959                                 mlx5_txq_release(dev, i);
960                                 DRV_LOG(ERR, "port %hu queue %u peer port "
961                                         "out of range %hu",
962                                         priv->dev_data->port_id, i, pp);
963                                 return -rte_errno;
964                         }
965                         bits[pp / 32] |= 1 << (pp % 32);
966                         mlx5_txq_release(dev, i);
967                 }
968         } else {
969                 for (i = 0; i < priv->rxqs_n; i++) {
970                         rxq_ctrl = mlx5_rxq_get(dev, i);
971                         if (!rxq_ctrl)
972                                 continue;
973                         if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
974                                 mlx5_rxq_release(dev, i);
975                                 continue;
976                         }
977                         pp = rxq_ctrl->hairpin_conf.peers[0].port;
978                         if (pp >= RTE_MAX_ETHPORTS) {
979                                 rte_errno = ERANGE;
980                                 mlx5_rxq_release(dev, i);
981                                 DRV_LOG(ERR, "port %hu queue %u peer port "
982                                         "out of range %hu",
983                                         priv->dev_data->port_id, i, pp);
984                                 return -rte_errno;
985                         }
986                         bits[pp / 32] |= 1 << (pp % 32);
987                         mlx5_rxq_release(dev, i);
988                 }
989         }
990         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
991                 if (bits[i / 32] & (1 << (i % 32))) {
992                         if ((size_t)ret >= len) {
993                                 rte_errno = E2BIG;
994                                 return -rte_errno;
995                         }
996                         peer_ports[ret++] = i;
997                 }
998         }
999         return ret;
1000 }
1001
1002 /**
1003  * DPDK callback to start the device.
1004  *
1005  * Simulate device start by attaching all configured flows.
1006  *
1007  * @param dev
1008  *   Pointer to Ethernet device structure.
1009  *
1010  * @return
1011  *   0 on success, a negative errno value otherwise and rte_errno is set.
1012  */
1013 int
1014 mlx5_dev_start(struct rte_eth_dev *dev)
1015 {
1016         struct mlx5_priv *priv = dev->data->dev_private;
1017         int ret;
1018         int fine_inline;
1019
1020         DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1021         fine_inline = rte_mbuf_dynflag_lookup
1022                 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1023         if (fine_inline >= 0)
1024                 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1025         else
1026                 rte_net_mlx5_dynf_inline_mask = 0;
1027         if (dev->data->nb_rx_queues > 0) {
1028                 ret = mlx5_dev_configure_rss_reta(dev);
1029                 if (ret) {
1030                         DRV_LOG(ERR, "port %u reta config failed: %s",
1031                                 dev->data->port_id, strerror(rte_errno));
1032                         return -rte_errno;
1033                 }
1034         }
1035         ret = mlx5_txpp_start(dev);
1036         if (ret) {
1037                 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1038                         dev->data->port_id, strerror(rte_errno));
1039                 goto error;
1040         }
1041         ret = mlx5_txq_start(dev);
1042         if (ret) {
1043                 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1044                         dev->data->port_id, strerror(rte_errno));
1045                 goto error;
1046         }
1047         ret = mlx5_rxq_start(dev);
1048         if (ret) {
1049                 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1050                         dev->data->port_id, strerror(rte_errno));
1051                 goto error;
1052         }
1053         ret = mlx5_hairpin_auto_bind(dev);
1054         if (ret) {
1055                 DRV_LOG(ERR, "port %u hairpin binding failed: %s",
1056                         dev->data->port_id, strerror(rte_errno));
1057                 goto error;
1058         }
1059         /* Set started flag here for the following steps like control flow. */
1060         dev->data->dev_started = 1;
1061         ret = mlx5_rx_intr_vec_enable(dev);
1062         if (ret) {
1063                 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1064                         dev->data->port_id);
1065                 goto error;
1066         }
1067         mlx5_os_stats_init(dev);
1068         ret = mlx5_traffic_enable(dev);
1069         if (ret) {
1070                 DRV_LOG(ERR, "port %u failed to set defaults flows",
1071                         dev->data->port_id);
1072                 goto error;
1073         }
1074         /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1075         mlx5_flow_rxq_dynf_metadata_set(dev);
1076         /* Set flags and context to convert Rx timestamps. */
1077         mlx5_rxq_timestamp_set(dev);
1078         /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1079         mlx5_txq_dynf_timestamp_set(dev);
1080         /*
1081          * In non-cached mode, it only needs to start the default mreg copy
1082          * action and no flow created by application exists anymore.
1083          * But it is worth wrapping the interface for further usage.
1084          */
1085         ret = mlx5_flow_start_default(dev);
1086         if (ret) {
1087                 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1088                         dev->data->port_id, strerror(rte_errno));
1089                 goto error;
1090         }
1091         rte_wmb();
1092         dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1093         dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1094         /* Enable datapath on secondary process. */
1095         mlx5_mp_os_req_start_rxtx(dev);
1096         if (priv->sh->intr_handle.fd >= 0) {
1097                 priv->sh->port[priv->dev_port - 1].ih_port_id =
1098                                         (uint32_t)dev->data->port_id;
1099         } else {
1100                 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1101                         dev->data->port_id);
1102                 dev->data->dev_conf.intr_conf.lsc = 0;
1103                 dev->data->dev_conf.intr_conf.rmv = 0;
1104         }
1105         if (priv->sh->intr_handle_devx.fd >= 0)
1106                 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1107                                         (uint32_t)dev->data->port_id;
1108         return 0;
1109 error:
1110         ret = rte_errno; /* Save rte_errno before cleanup. */
1111         /* Rollback. */
1112         dev->data->dev_started = 0;
1113         mlx5_flow_stop_default(dev);
1114         mlx5_traffic_disable(dev);
1115         mlx5_txq_stop(dev);
1116         mlx5_rxq_stop(dev);
1117         mlx5_txpp_stop(dev); /* Stop last. */
1118         rte_errno = ret; /* Restore rte_errno. */
1119         return -rte_errno;
1120 }
1121
1122 /**
1123  * DPDK callback to stop the device.
1124  *
1125  * Simulate device stop by detaching all configured flows.
1126  *
1127  * @param dev
1128  *   Pointer to Ethernet device structure.
1129  */
1130 int
1131 mlx5_dev_stop(struct rte_eth_dev *dev)
1132 {
1133         struct mlx5_priv *priv = dev->data->dev_private;
1134
1135         dev->data->dev_started = 0;
1136         /* Prevent crashes when queues are still in use. */
1137         dev->rx_pkt_burst = removed_rx_burst;
1138         dev->tx_pkt_burst = removed_tx_burst;
1139         rte_wmb();
1140         /* Disable datapath on secondary process. */
1141         mlx5_mp_os_req_stop_rxtx(dev);
1142         usleep(1000 * priv->rxqs_n);
1143         DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1144         mlx5_flow_stop_default(dev);
1145         /* Control flows for default traffic can be removed firstly. */
1146         mlx5_traffic_disable(dev);
1147         /* All RX queue flags will be cleared in the flush interface. */
1148         mlx5_flow_list_flush(dev, &priv->flows, true);
1149         mlx5_rx_intr_vec_disable(dev);
1150         priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1151         priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1152         mlx5_txq_stop(dev);
1153         mlx5_rxq_stop(dev);
1154         mlx5_txpp_stop(dev);
1155
1156         return 0;
1157 }
1158
1159 /**
1160  * Enable traffic flows configured by control plane
1161  *
1162  * @param dev
1163  *   Pointer to Ethernet device private data.
1164  * @param dev
1165  *   Pointer to Ethernet device structure.
1166  *
1167  * @return
1168  *   0 on success, a negative errno value otherwise and rte_errno is set.
1169  */
1170 int
1171 mlx5_traffic_enable(struct rte_eth_dev *dev)
1172 {
1173         struct mlx5_priv *priv = dev->data->dev_private;
1174         struct rte_flow_item_eth bcast = {
1175                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1176         };
1177         struct rte_flow_item_eth ipv6_multi_spec = {
1178                 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1179         };
1180         struct rte_flow_item_eth ipv6_multi_mask = {
1181                 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1182         };
1183         struct rte_flow_item_eth unicast = {
1184                 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1185         };
1186         struct rte_flow_item_eth unicast_mask = {
1187                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1188         };
1189         const unsigned int vlan_filter_n = priv->vlan_filter_n;
1190         const struct rte_ether_addr cmp = {
1191                 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1192         };
1193         unsigned int i;
1194         unsigned int j;
1195         int ret;
1196
1197         /*
1198          * Hairpin txq default flow should be created no matter if it is
1199          * isolation mode. Or else all the packets to be sent will be sent
1200          * out directly without the TX flow actions, e.g. encapsulation.
1201          */
1202         for (i = 0; i != priv->txqs_n; ++i) {
1203                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1204                 if (!txq_ctrl)
1205                         continue;
1206                 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN) {
1207                         ret = mlx5_ctrl_flow_source_queue(dev, i);
1208                         if (ret) {
1209                                 mlx5_txq_release(dev, i);
1210                                 goto error;
1211                         }
1212                 }
1213                 mlx5_txq_release(dev, i);
1214         }
1215         if (priv->config.dv_esw_en && !priv->config.vf) {
1216                 if (mlx5_flow_create_esw_table_zero_flow(dev))
1217                         priv->fdb_def_rule = 1;
1218                 else
1219                         DRV_LOG(INFO, "port %u FDB default rule cannot be"
1220                                 " configured - only Eswitch group 0 flows are"
1221                                 " supported.", dev->data->port_id);
1222         }
1223         if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1224                 ret = mlx5_flow_lacp_miss(dev);
1225                 if (ret)
1226                         DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1227                                 "forward LACP to kernel.", dev->data->port_id);
1228                 else
1229                         DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1230                                 , dev->data->port_id);
1231         }
1232         if (priv->isolated)
1233                 return 0;
1234         if (dev->data->promiscuous) {
1235                 struct rte_flow_item_eth promisc = {
1236                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1237                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1238                         .type = 0,
1239                 };
1240
1241                 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1242                 if (ret)
1243                         goto error;
1244         }
1245         if (dev->data->all_multicast) {
1246                 struct rte_flow_item_eth multicast = {
1247                         .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1248                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1249                         .type = 0,
1250                 };
1251
1252                 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1253                 if (ret)
1254                         goto error;
1255         } else {
1256                 /* Add broadcast/multicast flows. */
1257                 for (i = 0; i != vlan_filter_n; ++i) {
1258                         uint16_t vlan = priv->vlan_filter[i];
1259
1260                         struct rte_flow_item_vlan vlan_spec = {
1261                                 .tci = rte_cpu_to_be_16(vlan),
1262                         };
1263                         struct rte_flow_item_vlan vlan_mask =
1264                                 rte_flow_item_vlan_mask;
1265
1266                         ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1267                                                   &vlan_spec, &vlan_mask);
1268                         if (ret)
1269                                 goto error;
1270                         ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1271                                                   &ipv6_multi_mask,
1272                                                   &vlan_spec, &vlan_mask);
1273                         if (ret)
1274                                 goto error;
1275                 }
1276                 if (!vlan_filter_n) {
1277                         ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1278                         if (ret)
1279                                 goto error;
1280                         ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1281                                              &ipv6_multi_mask);
1282                         if (ret)
1283                                 goto error;
1284                 }
1285         }
1286         /* Add MAC address flows. */
1287         for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1288                 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1289
1290                 if (!memcmp(mac, &cmp, sizeof(*mac)))
1291                         continue;
1292                 memcpy(&unicast.dst.addr_bytes,
1293                        mac->addr_bytes,
1294                        RTE_ETHER_ADDR_LEN);
1295                 for (j = 0; j != vlan_filter_n; ++j) {
1296                         uint16_t vlan = priv->vlan_filter[j];
1297
1298                         struct rte_flow_item_vlan vlan_spec = {
1299                                 .tci = rte_cpu_to_be_16(vlan),
1300                         };
1301                         struct rte_flow_item_vlan vlan_mask =
1302                                 rte_flow_item_vlan_mask;
1303
1304                         ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1305                                                   &unicast_mask,
1306                                                   &vlan_spec,
1307                                                   &vlan_mask);
1308                         if (ret)
1309                                 goto error;
1310                 }
1311                 if (!vlan_filter_n) {
1312                         ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1313                         if (ret)
1314                                 goto error;
1315                 }
1316         }
1317         return 0;
1318 error:
1319         ret = rte_errno; /* Save rte_errno before cleanup. */
1320         mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
1321         rte_errno = ret; /* Restore rte_errno. */
1322         return -rte_errno;
1323 }
1324
1325
1326 /**
1327  * Disable traffic flows configured by control plane
1328  *
1329  * @param dev
1330  *   Pointer to Ethernet device private data.
1331  */
1332 void
1333 mlx5_traffic_disable(struct rte_eth_dev *dev)
1334 {
1335         struct mlx5_priv *priv = dev->data->dev_private;
1336
1337         mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
1338 }
1339
1340 /**
1341  * Restart traffic flows configured by control plane
1342  *
1343  * @param dev
1344  *   Pointer to Ethernet device private data.
1345  *
1346  * @return
1347  *   0 on success, a negative errno value otherwise and rte_errno is set.
1348  */
1349 int
1350 mlx5_traffic_restart(struct rte_eth_dev *dev)
1351 {
1352         if (dev->data->dev_started) {
1353                 mlx5_traffic_disable(dev);
1354                 return mlx5_traffic_enable(dev);
1355         }
1356         return 0;
1357 }