646f29b923c46455c6dc71876fe79610c137a8ed
[dpdk.git] / drivers / net / mlx5 / mlx5_trigger.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5
6 #include <unistd.h>
7
8 #include <rte_ether.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13
14 #include <mlx5_malloc.h>
15
16 #include "mlx5.h"
17 #include "mlx5_mr.h"
18 #include "mlx5_rxtx.h"
19 #include "mlx5_utils.h"
20 #include "rte_pmd_mlx5.h"
21
22 /**
23  * Stop traffic on Tx queues.
24  *
25  * @param dev
26  *   Pointer to Ethernet device structure.
27  */
28 static void
29 mlx5_txq_stop(struct rte_eth_dev *dev)
30 {
31         struct mlx5_priv *priv = dev->data->dev_private;
32         unsigned int i;
33
34         for (i = 0; i != priv->txqs_n; ++i)
35                 mlx5_txq_release(dev, i);
36 }
37
38 /**
39  * Start traffic on Tx queues.
40  *
41  * @param dev
42  *   Pointer to Ethernet device structure.
43  *
44  * @return
45  *   0 on success, a negative errno value otherwise and rte_errno is set.
46  */
47 static int
48 mlx5_txq_start(struct rte_eth_dev *dev)
49 {
50         struct mlx5_priv *priv = dev->data->dev_private;
51         unsigned int i;
52         int ret;
53
54         for (i = 0; i != priv->txqs_n; ++i) {
55                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
56                 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
57                 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
58
59                 if (!txq_ctrl)
60                         continue;
61                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
62                         txq_alloc_elts(txq_ctrl);
63                 MLX5_ASSERT(!txq_ctrl->obj);
64                 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
65                                             0, txq_ctrl->socket);
66                 if (!txq_ctrl->obj) {
67                         DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
68                                 "memory resources.", dev->data->port_id,
69                                 txq_data->idx);
70                         rte_errno = ENOMEM;
71                         goto error;
72                 }
73                 ret = priv->obj_ops.txq_obj_new(dev, i);
74                 if (ret < 0) {
75                         mlx5_free(txq_ctrl->obj);
76                         txq_ctrl->obj = NULL;
77                         goto error;
78                 }
79                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
80                         size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
81
82                         txq_data->fcqs = mlx5_malloc(flags, size,
83                                                      RTE_CACHE_LINE_SIZE,
84                                                      txq_ctrl->socket);
85                         if (!txq_data->fcqs) {
86                                 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
87                                         "allocate memory (FCQ).",
88                                         dev->data->port_id, i);
89                                 rte_errno = ENOMEM;
90                                 goto error;
91                         }
92                 }
93                 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
94                         dev->data->port_id, i, (void *)&txq_ctrl->obj);
95                 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
96         }
97         return 0;
98 error:
99         ret = rte_errno; /* Save rte_errno before cleanup. */
100         do {
101                 mlx5_txq_release(dev, i);
102         } while (i-- != 0);
103         rte_errno = ret; /* Restore rte_errno. */
104         return -rte_errno;
105 }
106
107 /**
108  * Stop traffic on Rx queues.
109  *
110  * @param dev
111  *   Pointer to Ethernet device structure.
112  */
113 static void
114 mlx5_rxq_stop(struct rte_eth_dev *dev)
115 {
116         struct mlx5_priv *priv = dev->data->dev_private;
117         unsigned int i;
118
119         for (i = 0; i != priv->rxqs_n; ++i)
120                 mlx5_rxq_release(dev, i);
121 }
122
123 /**
124  * Start traffic on Rx queues.
125  *
126  * @param dev
127  *   Pointer to Ethernet device structure.
128  *
129  * @return
130  *   0 on success, a negative errno value otherwise and rte_errno is set.
131  */
132 static int
133 mlx5_rxq_start(struct rte_eth_dev *dev)
134 {
135         struct mlx5_priv *priv = dev->data->dev_private;
136         unsigned int i;
137         int ret = 0;
138
139         /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
140         if (mlx5_mprq_alloc_mp(dev)) {
141                 /* Should not release Rx queues but return immediately. */
142                 return -rte_errno;
143         }
144         DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
145                 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
146         DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
147                 dev->data->port_id, priv->sh->device_attr.max_sge);
148         for (i = 0; i != priv->rxqs_n; ++i) {
149                 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
150
151                 if (!rxq_ctrl)
152                         continue;
153                 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
154                         /* Pre-register Rx mempools. */
155                         if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
156                                 mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl,
157                                                   rxq_ctrl->rxq.mprq_mp);
158                         } else {
159                                 uint32_t s;
160
161                                 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++)
162                                         mlx5_mr_update_mp
163                                                 (dev, &rxq_ctrl->rxq.mr_ctrl,
164                                                 rxq_ctrl->rxq.rxseg[s].mp);
165                         }
166                         ret = rxq_alloc_elts(rxq_ctrl);
167                         if (ret)
168                                 goto error;
169                 }
170                 MLX5_ASSERT(!rxq_ctrl->obj);
171                 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
172                                             sizeof(*rxq_ctrl->obj), 0,
173                                             rxq_ctrl->socket);
174                 if (!rxq_ctrl->obj) {
175                         DRV_LOG(ERR,
176                                 "Port %u Rx queue %u can't allocate resources.",
177                                 dev->data->port_id, (*priv->rxqs)[i]->idx);
178                         rte_errno = ENOMEM;
179                         goto error;
180                 }
181                 ret = priv->obj_ops.rxq_obj_new(dev, i);
182                 if (ret) {
183                         mlx5_free(rxq_ctrl->obj);
184                         goto error;
185                 }
186                 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
187                         dev->data->port_id, i, (void *)&rxq_ctrl->obj);
188                 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
189         }
190         return 0;
191 error:
192         ret = rte_errno; /* Save rte_errno before cleanup. */
193         do {
194                 mlx5_rxq_release(dev, i);
195         } while (i-- != 0);
196         rte_errno = ret; /* Restore rte_errno. */
197         return -rte_errno;
198 }
199
200 /**
201  * Binds Tx queues to Rx queues for hairpin.
202  *
203  * Binds Tx queues to the target Rx queues.
204  *
205  * @param dev
206  *   Pointer to Ethernet device structure.
207  *
208  * @return
209  *   0 on success, a negative errno value otherwise and rte_errno is set.
210  */
211 static int
212 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
213 {
214         struct mlx5_priv *priv = dev->data->dev_private;
215         struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
216         struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
217         struct mlx5_txq_ctrl *txq_ctrl;
218         struct mlx5_rxq_ctrl *rxq_ctrl;
219         struct mlx5_devx_obj *sq;
220         struct mlx5_devx_obj *rq;
221         unsigned int i;
222         int ret = 0;
223         bool need_auto = false;
224         uint16_t self_port = dev->data->port_id;
225
226         for (i = 0; i != priv->txqs_n; ++i) {
227                 txq_ctrl = mlx5_txq_get(dev, i);
228                 if (!txq_ctrl)
229                         continue;
230                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
231                         mlx5_txq_release(dev, i);
232                         continue;
233                 }
234                 if (txq_ctrl->hairpin_conf.peers[0].port != self_port)
235                         continue;
236                 if (txq_ctrl->hairpin_conf.manual_bind) {
237                         mlx5_txq_release(dev, i);
238                         return 0;
239                 }
240                 need_auto = true;
241                 mlx5_txq_release(dev, i);
242         }
243         if (!need_auto)
244                 return 0;
245         for (i = 0; i != priv->txqs_n; ++i) {
246                 txq_ctrl = mlx5_txq_get(dev, i);
247                 if (!txq_ctrl)
248                         continue;
249                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
250                         mlx5_txq_release(dev, i);
251                         continue;
252                 }
253                 /* Skip hairpin queues with other peer ports. */
254                 if (txq_ctrl->hairpin_conf.peers[0].port != self_port)
255                         continue;
256                 if (!txq_ctrl->obj) {
257                         rte_errno = ENOMEM;
258                         DRV_LOG(ERR, "port %u no txq object found: %d",
259                                 dev->data->port_id, i);
260                         mlx5_txq_release(dev, i);
261                         return -rte_errno;
262                 }
263                 sq = txq_ctrl->obj->sq;
264                 rxq_ctrl = mlx5_rxq_get(dev,
265                                         txq_ctrl->hairpin_conf.peers[0].queue);
266                 if (!rxq_ctrl) {
267                         mlx5_txq_release(dev, i);
268                         rte_errno = EINVAL;
269                         DRV_LOG(ERR, "port %u no rxq object found: %d",
270                                 dev->data->port_id,
271                                 txq_ctrl->hairpin_conf.peers[0].queue);
272                         return -rte_errno;
273                 }
274                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
275                     rxq_ctrl->hairpin_conf.peers[0].queue != i) {
276                         rte_errno = ENOMEM;
277                         DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
278                                 "Rx queue %d", dev->data->port_id,
279                                 i, txq_ctrl->hairpin_conf.peers[0].queue);
280                         goto error;
281                 }
282                 rq = rxq_ctrl->obj->rq;
283                 if (!rq) {
284                         rte_errno = ENOMEM;
285                         DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
286                                 dev->data->port_id,
287                                 txq_ctrl->hairpin_conf.peers[0].queue);
288                         goto error;
289                 }
290                 sq_attr.state = MLX5_SQC_STATE_RDY;
291                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
292                 sq_attr.hairpin_peer_rq = rq->id;
293                 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
294                 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
295                 if (ret)
296                         goto error;
297                 rq_attr.state = MLX5_SQC_STATE_RDY;
298                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
299                 rq_attr.hairpin_peer_sq = sq->id;
300                 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
301                 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
302                 if (ret)
303                         goto error;
304                 /* Qs with auto-bind will be destroyed directly. */
305                 rxq_ctrl->hairpin_status = 1;
306                 txq_ctrl->hairpin_status = 1;
307                 mlx5_txq_release(dev, i);
308                 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
309         }
310         return 0;
311 error:
312         mlx5_txq_release(dev, i);
313         mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
314         return -rte_errno;
315 }
316
317 /*
318  * Fetch the peer queue's SW & HW information.
319  *
320  * @param dev
321  *   Pointer to Ethernet device structure.
322  * @param peer_queue
323  *   Index of the queue to fetch the information.
324  * @param current_info
325  *   Pointer to the input peer information, not used currently.
326  * @param peer_info
327  *   Pointer to the structure to store the information, output.
328  * @param direction
329  *   Positive to get the RxQ information, zero to get the TxQ information.
330  *
331  * @return
332  *   0 on success, a negative errno value otherwise and rte_errno is set.
333  */
334 int
335 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
336                                struct rte_hairpin_peer_info *current_info,
337                                struct rte_hairpin_peer_info *peer_info,
338                                uint32_t direction)
339 {
340         struct mlx5_priv *priv = dev->data->dev_private;
341         RTE_SET_USED(current_info);
342
343         if (dev->data->dev_started == 0) {
344                 rte_errno = EBUSY;
345                 DRV_LOG(ERR, "peer port %u is not started",
346                         dev->data->port_id);
347                 return -rte_errno;
348         }
349         /*
350          * Peer port used as egress. In the current design, hairpin Tx queue
351          * will be bound to the peer Rx queue. Indeed, only the information of
352          * peer Rx queue needs to be fetched.
353          */
354         if (direction == 0) {
355                 struct mlx5_txq_ctrl *txq_ctrl;
356
357                 txq_ctrl = mlx5_txq_get(dev, peer_queue);
358                 if (txq_ctrl == NULL) {
359                         rte_errno = EINVAL;
360                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
361                                 dev->data->port_id, peer_queue);
362                         return -rte_errno;
363                 }
364                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
365                         rte_errno = EINVAL;
366                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
367                                 dev->data->port_id, peer_queue);
368                         mlx5_txq_release(dev, peer_queue);
369                         return -rte_errno;
370                 }
371                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
372                         rte_errno = ENOMEM;
373                         DRV_LOG(ERR, "port %u no Txq object found: %d",
374                                 dev->data->port_id, peer_queue);
375                         mlx5_txq_release(dev, peer_queue);
376                         return -rte_errno;
377                 }
378                 peer_info->qp_id = txq_ctrl->obj->sq->id;
379                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
380                 /* 1-to-1 mapping, only the first one is used. */
381                 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
382                 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
383                 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
384                 mlx5_txq_release(dev, peer_queue);
385         } else { /* Peer port used as ingress. */
386                 struct mlx5_rxq_ctrl *rxq_ctrl;
387
388                 rxq_ctrl = mlx5_rxq_get(dev, peer_queue);
389                 if (rxq_ctrl == NULL) {
390                         rte_errno = EINVAL;
391                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
392                                 dev->data->port_id, peer_queue);
393                         return -rte_errno;
394                 }
395                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
396                         rte_errno = EINVAL;
397                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
398                                 dev->data->port_id, peer_queue);
399                         mlx5_rxq_release(dev, peer_queue);
400                         return -rte_errno;
401                 }
402                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
403                         rte_errno = ENOMEM;
404                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
405                                 dev->data->port_id, peer_queue);
406                         mlx5_rxq_release(dev, peer_queue);
407                         return -rte_errno;
408                 }
409                 peer_info->qp_id = rxq_ctrl->obj->rq->id;
410                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
411                 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
412                 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
413                 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
414                 mlx5_rxq_release(dev, peer_queue);
415         }
416         return 0;
417 }
418
419 /*
420  * Bind the hairpin queue with the peer HW information.
421  * This needs to be called twice both for Tx and Rx queues of a pair.
422  * If the queue is already bound, it is considered successful.
423  *
424  * @param dev
425  *   Pointer to Ethernet device structure.
426  * @param cur_queue
427  *   Index of the queue to change the HW configuration to bind.
428  * @param peer_info
429  *   Pointer to information of the peer queue.
430  * @param direction
431  *   Positive to configure the TxQ, zero to configure the RxQ.
432  *
433  * @return
434  *   0 on success, a negative errno value otherwise and rte_errno is set.
435  */
436 int
437 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
438                              struct rte_hairpin_peer_info *peer_info,
439                              uint32_t direction)
440 {
441         int ret = 0;
442
443         /*
444          * Consistency checking of the peer queue: opposite direction is used
445          * to get the peer queue info with ethdev port ID, no need to check.
446          */
447         if (peer_info->peer_q != cur_queue) {
448                 rte_errno = EINVAL;
449                 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
450                         dev->data->port_id, cur_queue, peer_info->peer_q);
451                 return -rte_errno;
452         }
453         if (direction != 0) {
454                 struct mlx5_txq_ctrl *txq_ctrl;
455                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
456
457                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
458                 if (txq_ctrl == NULL) {
459                         rte_errno = EINVAL;
460                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
461                                 dev->data->port_id, cur_queue);
462                         return -rte_errno;
463                 }
464                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
465                         rte_errno = EINVAL;
466                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
467                                 dev->data->port_id, cur_queue);
468                         mlx5_txq_release(dev, cur_queue);
469                         return -rte_errno;
470                 }
471                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
472                         rte_errno = ENOMEM;
473                         DRV_LOG(ERR, "port %u no Txq object found: %d",
474                                 dev->data->port_id, cur_queue);
475                         mlx5_txq_release(dev, cur_queue);
476                         return -rte_errno;
477                 }
478                 if (txq_ctrl->hairpin_status != 0) {
479                         DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
480                                 dev->data->port_id, cur_queue);
481                         mlx5_txq_release(dev, cur_queue);
482                         return 0;
483                 }
484                 /*
485                  * All queues' of one port consistency checking is done in the
486                  * bind() function, and that is optional.
487                  */
488                 if (peer_info->tx_explicit !=
489                     txq_ctrl->hairpin_conf.tx_explicit) {
490                         rte_errno = EINVAL;
491                         DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
492                                 " mismatch", dev->data->port_id, cur_queue);
493                         mlx5_txq_release(dev, cur_queue);
494                         return -rte_errno;
495                 }
496                 if (peer_info->manual_bind !=
497                     txq_ctrl->hairpin_conf.manual_bind) {
498                         rte_errno = EINVAL;
499                         DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
500                                 " mismatch", dev->data->port_id, cur_queue);
501                         mlx5_txq_release(dev, cur_queue);
502                         return -rte_errno;
503                 }
504                 sq_attr.state = MLX5_SQC_STATE_RDY;
505                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
506                 sq_attr.hairpin_peer_rq = peer_info->qp_id;
507                 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
508                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
509                 if (ret == 0)
510                         txq_ctrl->hairpin_status = 1;
511                 mlx5_txq_release(dev, cur_queue);
512         } else {
513                 struct mlx5_rxq_ctrl *rxq_ctrl;
514                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
515
516                 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
517                 if (rxq_ctrl == NULL) {
518                         rte_errno = EINVAL;
519                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
520                                 dev->data->port_id, cur_queue);
521                         return -rte_errno;
522                 }
523                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
524                         rte_errno = EINVAL;
525                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
526                                 dev->data->port_id, cur_queue);
527                         mlx5_rxq_release(dev, cur_queue);
528                         return -rte_errno;
529                 }
530                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
531                         rte_errno = ENOMEM;
532                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
533                                 dev->data->port_id, cur_queue);
534                         mlx5_rxq_release(dev, cur_queue);
535                         return -rte_errno;
536                 }
537                 if (rxq_ctrl->hairpin_status != 0) {
538                         DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
539                                 dev->data->port_id, cur_queue);
540                         mlx5_rxq_release(dev, cur_queue);
541                         return 0;
542                 }
543                 if (peer_info->tx_explicit !=
544                     rxq_ctrl->hairpin_conf.tx_explicit) {
545                         rte_errno = EINVAL;
546                         DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
547                                 " mismatch", dev->data->port_id, cur_queue);
548                         mlx5_rxq_release(dev, cur_queue);
549                         return -rte_errno;
550                 }
551                 if (peer_info->manual_bind !=
552                     rxq_ctrl->hairpin_conf.manual_bind) {
553                         rte_errno = EINVAL;
554                         DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
555                                 " mismatch", dev->data->port_id, cur_queue);
556                         mlx5_rxq_release(dev, cur_queue);
557                         return -rte_errno;
558                 }
559                 rq_attr.state = MLX5_SQC_STATE_RDY;
560                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
561                 rq_attr.hairpin_peer_sq = peer_info->qp_id;
562                 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
563                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
564                 if (ret == 0)
565                         rxq_ctrl->hairpin_status = 1;
566                 mlx5_rxq_release(dev, cur_queue);
567         }
568         return ret;
569 }
570
571 /*
572  * Unbind the hairpin queue and reset its HW configuration.
573  * This needs to be called twice both for Tx and Rx queues of a pair.
574  * If the queue is already unbound, it is considered successful.
575  *
576  * @param dev
577  *   Pointer to Ethernet device structure.
578  * @param cur_queue
579  *   Index of the queue to change the HW configuration to unbind.
580  * @param direction
581  *   Positive to reset the TxQ, zero to reset the RxQ.
582  *
583  * @return
584  *   0 on success, a negative errno value otherwise and rte_errno is set.
585  */
586 int
587 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
588                                uint32_t direction)
589 {
590         int ret = 0;
591
592         if (direction != 0) {
593                 struct mlx5_txq_ctrl *txq_ctrl;
594                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
595
596                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
597                 if (txq_ctrl == NULL) {
598                         rte_errno = EINVAL;
599                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
600                                 dev->data->port_id, cur_queue);
601                         return -rte_errno;
602                 }
603                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
604                         rte_errno = EINVAL;
605                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
606                                 dev->data->port_id, cur_queue);
607                         mlx5_txq_release(dev, cur_queue);
608                         return -rte_errno;
609                 }
610                 /* Already unbound, return success before obj checking. */
611                 if (txq_ctrl->hairpin_status == 0) {
612                         DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
613                                 dev->data->port_id, cur_queue);
614                         mlx5_txq_release(dev, cur_queue);
615                         return 0;
616                 }
617                 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
618                         rte_errno = ENOMEM;
619                         DRV_LOG(ERR, "port %u no Txq object found: %d",
620                                 dev->data->port_id, cur_queue);
621                         mlx5_txq_release(dev, cur_queue);
622                         return -rte_errno;
623                 }
624                 sq_attr.state = MLX5_SQC_STATE_RST;
625                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
626                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
627                 if (ret == 0)
628                         txq_ctrl->hairpin_status = 0;
629                 mlx5_txq_release(dev, cur_queue);
630         } else {
631                 struct mlx5_rxq_ctrl *rxq_ctrl;
632                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
633
634                 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
635                 if (rxq_ctrl == NULL) {
636                         rte_errno = EINVAL;
637                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
638                                 dev->data->port_id, cur_queue);
639                         return -rte_errno;
640                 }
641                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
642                         rte_errno = EINVAL;
643                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
644                                 dev->data->port_id, cur_queue);
645                         mlx5_rxq_release(dev, cur_queue);
646                         return -rte_errno;
647                 }
648                 if (rxq_ctrl->hairpin_status == 0) {
649                         DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
650                                 dev->data->port_id, cur_queue);
651                         mlx5_rxq_release(dev, cur_queue);
652                         return 0;
653                 }
654                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
655                         rte_errno = ENOMEM;
656                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
657                                 dev->data->port_id, cur_queue);
658                         mlx5_rxq_release(dev, cur_queue);
659                         return -rte_errno;
660                 }
661                 rq_attr.state = MLX5_SQC_STATE_RST;
662                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
663                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
664                 if (ret == 0)
665                         rxq_ctrl->hairpin_status = 0;
666                 mlx5_rxq_release(dev, cur_queue);
667         }
668         return ret;
669 }
670
671 /*
672  * Bind the hairpin port pairs, from the Tx to the peer Rx.
673  * This function only supports to bind the Tx to one Rx.
674  *
675  * @param dev
676  *   Pointer to Ethernet device structure.
677  * @param rx_port
678  *   Port identifier of the Rx port.
679  *
680  * @return
681  *   0 on success, a negative errno value otherwise and rte_errno is set.
682  */
683 static int
684 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
685 {
686         struct mlx5_priv *priv = dev->data->dev_private;
687         int ret = 0;
688         struct mlx5_txq_ctrl *txq_ctrl;
689         uint32_t i;
690         struct rte_hairpin_peer_info peer = {0xffffff};
691         struct rte_hairpin_peer_info cur;
692         const struct rte_eth_hairpin_conf *conf;
693         uint16_t num_q = 0;
694         uint16_t local_port = priv->dev_data->port_id;
695         uint32_t manual;
696         uint32_t explicit;
697         uint16_t rx_queue;
698
699         if (mlx5_eth_find_next(rx_port, priv->pci_dev) != rx_port) {
700                 rte_errno = ENODEV;
701                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
702                 return -rte_errno;
703         }
704         /*
705          * Before binding TxQ to peer RxQ, first round loop will be used for
706          * checking the queues' configuration consistency. This would be a
707          * little time consuming but better than doing the rollback.
708          */
709         for (i = 0; i != priv->txqs_n; i++) {
710                 txq_ctrl = mlx5_txq_get(dev, i);
711                 if (txq_ctrl == NULL)
712                         continue;
713                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
714                         mlx5_txq_release(dev, i);
715                         continue;
716                 }
717                 /*
718                  * All hairpin Tx queues of a single port that connected to the
719                  * same peer Rx port should have the same "auto binding" and
720                  * "implicit Tx flow" modes.
721                  * Peer consistency checking will be done in per queue binding.
722                  */
723                 conf = &txq_ctrl->hairpin_conf;
724                 if (conf->peers[0].port == rx_port) {
725                         if (num_q == 0) {
726                                 manual = conf->manual_bind;
727                                 explicit = conf->tx_explicit;
728                         } else {
729                                 if (manual != conf->manual_bind ||
730                                     explicit != conf->tx_explicit) {
731                                         rte_errno = EINVAL;
732                                         DRV_LOG(ERR, "port %u queue %d mode"
733                                                 " mismatch: %u %u, %u %u",
734                                                 local_port, i, manual,
735                                                 conf->manual_bind, explicit,
736                                                 conf->tx_explicit);
737                                         mlx5_txq_release(dev, i);
738                                         return -rte_errno;
739                                 }
740                         }
741                         num_q++;
742                 }
743                 mlx5_txq_release(dev, i);
744         }
745         /* Once no queue is configured, success is returned directly. */
746         if (num_q == 0)
747                 return ret;
748         /* All the hairpin TX queues need to be traversed again. */
749         for (i = 0; i != priv->txqs_n; i++) {
750                 txq_ctrl = mlx5_txq_get(dev, i);
751                 if (txq_ctrl == NULL)
752                         continue;
753                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
754                         mlx5_txq_release(dev, i);
755                         continue;
756                 }
757                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
758                         mlx5_txq_release(dev, i);
759                         continue;
760                 }
761                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
762                 /*
763                  * Fetch peer RxQ's information.
764                  * No need to pass the information of the current queue.
765                  */
766                 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
767                                                         NULL, &peer, 1);
768                 if (ret != 0) {
769                         mlx5_txq_release(dev, i);
770                         goto error;
771                 }
772                 /* Accessing its own device, inside mlx5 PMD. */
773                 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
774                 if (ret != 0) {
775                         mlx5_txq_release(dev, i);
776                         goto error;
777                 }
778                 /* Pass TxQ's information to peer RxQ and try binding. */
779                 cur.peer_q = rx_queue;
780                 cur.qp_id = txq_ctrl->obj->sq->id;
781                 cur.vhca_id = priv->config.hca_attr.vhca_id;
782                 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
783                 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
784                 /*
785                  * In order to access another device in a proper way, RTE level
786                  * private function is needed.
787                  */
788                 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
789                                                       &cur, 0);
790                 if (ret != 0) {
791                         mlx5_txq_release(dev, i);
792                         goto error;
793                 }
794                 mlx5_txq_release(dev, i);
795         }
796         return 0;
797 error:
798         /*
799          * Do roll-back process for the queues already bound.
800          * No need to check the return value of the queue unbind function.
801          */
802         do {
803                 /* No validation is needed here. */
804                 txq_ctrl = mlx5_txq_get(dev, i);
805                 if (txq_ctrl == NULL)
806                         continue;
807                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
808                 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
809                 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
810                 mlx5_txq_release(dev, i);
811         } while (i--);
812         return ret;
813 }
814
815 /*
816  * Unbind the hairpin port pair, HW configuration of both devices will be clear
817  * and status will be reset for all the queues used between the them.
818  * This function only supports to unbind the Tx from one Rx.
819  *
820  * @param dev
821  *   Pointer to Ethernet device structure.
822  * @param rx_port
823  *   Port identifier of the Rx port.
824  *
825  * @return
826  *   0 on success, a negative errno value otherwise and rte_errno is set.
827  */
828 static int
829 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
830 {
831         struct mlx5_priv *priv = dev->data->dev_private;
832         struct mlx5_txq_ctrl *txq_ctrl;
833         uint32_t i;
834         int ret;
835         uint16_t cur_port = priv->dev_data->port_id;
836
837         if (mlx5_eth_find_next(rx_port, priv->pci_dev) != rx_port) {
838                 rte_errno = ENODEV;
839                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
840                 return -rte_errno;
841         }
842         for (i = 0; i != priv->txqs_n; i++) {
843                 uint16_t rx_queue;
844
845                 txq_ctrl = mlx5_txq_get(dev, i);
846                 if (txq_ctrl == NULL)
847                         continue;
848                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
849                         mlx5_txq_release(dev, i);
850                         continue;
851                 }
852                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
853                         mlx5_txq_release(dev, i);
854                         continue;
855                 }
856                 /* Indeed, only the first used queue needs to be checked. */
857                 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
858                         if (cur_port != rx_port) {
859                                 rte_errno = EINVAL;
860                                 DRV_LOG(ERR, "port %u and port %u are in"
861                                         " auto-bind mode", cur_port, rx_port);
862                                 mlx5_txq_release(dev, i);
863                                 return -rte_errno;
864                         } else {
865                                 return 0;
866                         }
867                 }
868                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
869                 mlx5_txq_release(dev, i);
870                 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
871                 if (ret) {
872                         DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
873                                 rx_port, rx_queue);
874                         return ret;
875                 }
876                 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
877                 if (ret) {
878                         DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
879                                 cur_port, i);
880                         return ret;
881                 }
882         }
883         return 0;
884 }
885
886 /*
887  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
888  * @see mlx5_hairpin_bind_single_port()
889  */
890 int
891 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
892 {
893         int ret = 0;
894         uint16_t p, pp;
895         struct mlx5_priv *priv = dev->data->dev_private;
896
897         /*
898          * If the Rx port has no hairpin configuration with the current port,
899          * the binding will be skipped in the called function of single port.
900          * Device started status will be checked only before the queue
901          * information updating.
902          */
903         if (rx_port == RTE_MAX_ETHPORTS) {
904                 MLX5_ETH_FOREACH_DEV(p, priv->pci_dev) {
905                         ret = mlx5_hairpin_bind_single_port(dev, p);
906                         if (ret != 0)
907                                 goto unbind;
908                 }
909                 return ret;
910         } else {
911                 return mlx5_hairpin_bind_single_port(dev, rx_port);
912         }
913 unbind:
914         MLX5_ETH_FOREACH_DEV(pp, priv->pci_dev)
915                 if (pp < p)
916                         mlx5_hairpin_unbind_single_port(dev, pp);
917         return ret;
918 }
919
920 /*
921  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
922  * @see mlx5_hairpin_unbind_single_port()
923  */
924 int
925 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
926 {
927         int ret = 0;
928         uint16_t p;
929         struct mlx5_priv *priv = dev->data->dev_private;
930
931         if (rx_port == RTE_MAX_ETHPORTS)
932                 MLX5_ETH_FOREACH_DEV(p, priv->pci_dev) {
933                         ret = mlx5_hairpin_unbind_single_port(dev, p);
934                         if (ret != 0)
935                                 return ret;
936                 }
937         else
938                 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
939         return ret;
940 }
941
942 /*
943  * DPDK callback to get the hairpin peer ports list.
944  * This will return the actual number of peer ports and save the identifiers
945  * into the array (sorted, may be different from that when setting up the
946  * hairpin peer queues).
947  * The peer port ID could be the same as the port ID of the current device.
948  *
949  * @param dev
950  *   Pointer to Ethernet device structure.
951  * @param peer_ports
952  *   Pointer to array to save the port identifiers.
953  * @param len
954  *   The length of the array.
955  * @param direction
956  *   Current port to peer port direction.
957  *   positive - current used as Tx to get all peer Rx ports.
958  *   zero - current used as Rx to get all peer Tx ports.
959  *
960  * @return
961  *   0 or positive value on success, actual number of peer ports.
962  *   a negative errno value otherwise and rte_errno is set.
963  */
964 int
965 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
966                             size_t len, uint32_t direction)
967 {
968         struct mlx5_priv *priv = dev->data->dev_private;
969         struct mlx5_txq_ctrl *txq_ctrl;
970         struct mlx5_rxq_ctrl *rxq_ctrl;
971         uint32_t i;
972         uint16_t pp;
973         uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
974         int ret = 0;
975
976         if (direction) {
977                 for (i = 0; i < priv->txqs_n; i++) {
978                         txq_ctrl = mlx5_txq_get(dev, i);
979                         if (!txq_ctrl)
980                                 continue;
981                         if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
982                                 mlx5_txq_release(dev, i);
983                                 continue;
984                         }
985                         pp = txq_ctrl->hairpin_conf.peers[0].port;
986                         if (pp >= RTE_MAX_ETHPORTS) {
987                                 rte_errno = ERANGE;
988                                 mlx5_txq_release(dev, i);
989                                 DRV_LOG(ERR, "port %hu queue %u peer port "
990                                         "out of range %hu",
991                                         priv->dev_data->port_id, i, pp);
992                                 return -rte_errno;
993                         }
994                         bits[pp / 32] |= 1 << (pp % 32);
995                         mlx5_txq_release(dev, i);
996                 }
997         } else {
998                 for (i = 0; i < priv->rxqs_n; i++) {
999                         rxq_ctrl = mlx5_rxq_get(dev, i);
1000                         if (!rxq_ctrl)
1001                                 continue;
1002                         if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
1003                                 mlx5_rxq_release(dev, i);
1004                                 continue;
1005                         }
1006                         pp = rxq_ctrl->hairpin_conf.peers[0].port;
1007                         if (pp >= RTE_MAX_ETHPORTS) {
1008                                 rte_errno = ERANGE;
1009                                 mlx5_rxq_release(dev, i);
1010                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1011                                         "out of range %hu",
1012                                         priv->dev_data->port_id, i, pp);
1013                                 return -rte_errno;
1014                         }
1015                         bits[pp / 32] |= 1 << (pp % 32);
1016                         mlx5_rxq_release(dev, i);
1017                 }
1018         }
1019         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1020                 if (bits[i / 32] & (1 << (i % 32))) {
1021                         if ((size_t)ret >= len) {
1022                                 rte_errno = E2BIG;
1023                                 return -rte_errno;
1024                         }
1025                         peer_ports[ret++] = i;
1026                 }
1027         }
1028         return ret;
1029 }
1030
1031 /**
1032  * DPDK callback to start the device.
1033  *
1034  * Simulate device start by attaching all configured flows.
1035  *
1036  * @param dev
1037  *   Pointer to Ethernet device structure.
1038  *
1039  * @return
1040  *   0 on success, a negative errno value otherwise and rte_errno is set.
1041  */
1042 int
1043 mlx5_dev_start(struct rte_eth_dev *dev)
1044 {
1045         struct mlx5_priv *priv = dev->data->dev_private;
1046         int ret;
1047         int fine_inline;
1048
1049         DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1050         fine_inline = rte_mbuf_dynflag_lookup
1051                 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1052         if (fine_inline >= 0)
1053                 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1054         else
1055                 rte_net_mlx5_dynf_inline_mask = 0;
1056         if (dev->data->nb_rx_queues > 0) {
1057                 ret = mlx5_dev_configure_rss_reta(dev);
1058                 if (ret) {
1059                         DRV_LOG(ERR, "port %u reta config failed: %s",
1060                                 dev->data->port_id, strerror(rte_errno));
1061                         return -rte_errno;
1062                 }
1063         }
1064         ret = mlx5_txpp_start(dev);
1065         if (ret) {
1066                 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1067                         dev->data->port_id, strerror(rte_errno));
1068                 goto error;
1069         }
1070         ret = mlx5_txq_start(dev);
1071         if (ret) {
1072                 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1073                         dev->data->port_id, strerror(rte_errno));
1074                 goto error;
1075         }
1076         ret = mlx5_rxq_start(dev);
1077         if (ret) {
1078                 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1079                         dev->data->port_id, strerror(rte_errno));
1080                 goto error;
1081         }
1082         /*
1083          * Such step will be skipped if there is no hairpin TX queue configured
1084          * with RX peer queue from the same device.
1085          */
1086         ret = mlx5_hairpin_auto_bind(dev);
1087         if (ret) {
1088                 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1089                         dev->data->port_id, strerror(rte_errno));
1090                 goto error;
1091         }
1092         /* Set started flag here for the following steps like control flow. */
1093         dev->data->dev_started = 1;
1094         ret = mlx5_rx_intr_vec_enable(dev);
1095         if (ret) {
1096                 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1097                         dev->data->port_id);
1098                 goto error;
1099         }
1100         mlx5_os_stats_init(dev);
1101         ret = mlx5_traffic_enable(dev);
1102         if (ret) {
1103                 DRV_LOG(ERR, "port %u failed to set defaults flows",
1104                         dev->data->port_id);
1105                 goto error;
1106         }
1107         /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1108         mlx5_flow_rxq_dynf_metadata_set(dev);
1109         /* Set flags and context to convert Rx timestamps. */
1110         mlx5_rxq_timestamp_set(dev);
1111         /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1112         mlx5_txq_dynf_timestamp_set(dev);
1113         /*
1114          * In non-cached mode, it only needs to start the default mreg copy
1115          * action and no flow created by application exists anymore.
1116          * But it is worth wrapping the interface for further usage.
1117          */
1118         ret = mlx5_flow_start_default(dev);
1119         if (ret) {
1120                 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1121                         dev->data->port_id, strerror(rte_errno));
1122                 goto error;
1123         }
1124         rte_wmb();
1125         dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1126         dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1127         /* Enable datapath on secondary process. */
1128         mlx5_mp_os_req_start_rxtx(dev);
1129         if (priv->sh->intr_handle.fd >= 0) {
1130                 priv->sh->port[priv->dev_port - 1].ih_port_id =
1131                                         (uint32_t)dev->data->port_id;
1132         } else {
1133                 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1134                         dev->data->port_id);
1135                 dev->data->dev_conf.intr_conf.lsc = 0;
1136                 dev->data->dev_conf.intr_conf.rmv = 0;
1137         }
1138         if (priv->sh->intr_handle_devx.fd >= 0)
1139                 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1140                                         (uint32_t)dev->data->port_id;
1141         return 0;
1142 error:
1143         ret = rte_errno; /* Save rte_errno before cleanup. */
1144         /* Rollback. */
1145         dev->data->dev_started = 0;
1146         mlx5_flow_stop_default(dev);
1147         mlx5_traffic_disable(dev);
1148         mlx5_txq_stop(dev);
1149         mlx5_rxq_stop(dev);
1150         mlx5_txpp_stop(dev); /* Stop last. */
1151         rte_errno = ret; /* Restore rte_errno. */
1152         return -rte_errno;
1153 }
1154
1155 /**
1156  * DPDK callback to stop the device.
1157  *
1158  * Simulate device stop by detaching all configured flows.
1159  *
1160  * @param dev
1161  *   Pointer to Ethernet device structure.
1162  */
1163 int
1164 mlx5_dev_stop(struct rte_eth_dev *dev)
1165 {
1166         struct mlx5_priv *priv = dev->data->dev_private;
1167
1168         dev->data->dev_started = 0;
1169         /* Prevent crashes when queues are still in use. */
1170         dev->rx_pkt_burst = removed_rx_burst;
1171         dev->tx_pkt_burst = removed_tx_burst;
1172         rte_wmb();
1173         /* Disable datapath on secondary process. */
1174         mlx5_mp_os_req_stop_rxtx(dev);
1175         rte_delay_us_sleep(1000 * priv->rxqs_n);
1176         DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1177         mlx5_flow_stop_default(dev);
1178         /* Control flows for default traffic can be removed firstly. */
1179         mlx5_traffic_disable(dev);
1180         /* All RX queue flags will be cleared in the flush interface. */
1181         mlx5_flow_list_flush(dev, &priv->flows, true);
1182         mlx5_rx_intr_vec_disable(dev);
1183         priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1184         priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1185         mlx5_txq_stop(dev);
1186         mlx5_rxq_stop(dev);
1187         mlx5_txpp_stop(dev);
1188
1189         return 0;
1190 }
1191
1192 /**
1193  * Enable traffic flows configured by control plane
1194  *
1195  * @param dev
1196  *   Pointer to Ethernet device private data.
1197  * @param dev
1198  *   Pointer to Ethernet device structure.
1199  *
1200  * @return
1201  *   0 on success, a negative errno value otherwise and rte_errno is set.
1202  */
1203 int
1204 mlx5_traffic_enable(struct rte_eth_dev *dev)
1205 {
1206         struct mlx5_priv *priv = dev->data->dev_private;
1207         struct rte_flow_item_eth bcast = {
1208                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1209         };
1210         struct rte_flow_item_eth ipv6_multi_spec = {
1211                 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1212         };
1213         struct rte_flow_item_eth ipv6_multi_mask = {
1214                 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1215         };
1216         struct rte_flow_item_eth unicast = {
1217                 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1218         };
1219         struct rte_flow_item_eth unicast_mask = {
1220                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1221         };
1222         const unsigned int vlan_filter_n = priv->vlan_filter_n;
1223         const struct rte_ether_addr cmp = {
1224                 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1225         };
1226         unsigned int i;
1227         unsigned int j;
1228         int ret;
1229
1230         /*
1231          * Hairpin txq default flow should be created no matter if it is
1232          * isolation mode. Or else all the packets to be sent will be sent
1233          * out directly without the TX flow actions, e.g. encapsulation.
1234          */
1235         for (i = 0; i != priv->txqs_n; ++i) {
1236                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1237                 if (!txq_ctrl)
1238                         continue;
1239                 /* Only Tx implicit mode requires the default Tx flow. */
1240                 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1241                     txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1242                     txq_ctrl->hairpin_conf.peers[0].port ==
1243                     priv->dev_data->port_id) {
1244                         ret = mlx5_ctrl_flow_source_queue(dev, i);
1245                         if (ret) {
1246                                 mlx5_txq_release(dev, i);
1247                                 goto error;
1248                         }
1249                 }
1250                 mlx5_txq_release(dev, i);
1251         }
1252         if (priv->config.dv_esw_en && !priv->config.vf) {
1253                 if (mlx5_flow_create_esw_table_zero_flow(dev))
1254                         priv->fdb_def_rule = 1;
1255                 else
1256                         DRV_LOG(INFO, "port %u FDB default rule cannot be"
1257                                 " configured - only Eswitch group 0 flows are"
1258                                 " supported.", dev->data->port_id);
1259         }
1260         if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1261                 ret = mlx5_flow_lacp_miss(dev);
1262                 if (ret)
1263                         DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1264                                 "forward LACP to kernel.", dev->data->port_id);
1265                 else
1266                         DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1267                                 , dev->data->port_id);
1268         }
1269         if (priv->isolated)
1270                 return 0;
1271         if (dev->data->promiscuous) {
1272                 struct rte_flow_item_eth promisc = {
1273                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1274                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1275                         .type = 0,
1276                 };
1277
1278                 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1279                 if (ret)
1280                         goto error;
1281         }
1282         if (dev->data->all_multicast) {
1283                 struct rte_flow_item_eth multicast = {
1284                         .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1285                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1286                         .type = 0,
1287                 };
1288
1289                 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1290                 if (ret)
1291                         goto error;
1292         } else {
1293                 /* Add broadcast/multicast flows. */
1294                 for (i = 0; i != vlan_filter_n; ++i) {
1295                         uint16_t vlan = priv->vlan_filter[i];
1296
1297                         struct rte_flow_item_vlan vlan_spec = {
1298                                 .tci = rte_cpu_to_be_16(vlan),
1299                         };
1300                         struct rte_flow_item_vlan vlan_mask =
1301                                 rte_flow_item_vlan_mask;
1302
1303                         ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1304                                                   &vlan_spec, &vlan_mask);
1305                         if (ret)
1306                                 goto error;
1307                         ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1308                                                   &ipv6_multi_mask,
1309                                                   &vlan_spec, &vlan_mask);
1310                         if (ret)
1311                                 goto error;
1312                 }
1313                 if (!vlan_filter_n) {
1314                         ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1315                         if (ret)
1316                                 goto error;
1317                         ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1318                                              &ipv6_multi_mask);
1319                         if (ret)
1320                                 goto error;
1321                 }
1322         }
1323         /* Add MAC address flows. */
1324         for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1325                 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1326
1327                 if (!memcmp(mac, &cmp, sizeof(*mac)))
1328                         continue;
1329                 memcpy(&unicast.dst.addr_bytes,
1330                        mac->addr_bytes,
1331                        RTE_ETHER_ADDR_LEN);
1332                 for (j = 0; j != vlan_filter_n; ++j) {
1333                         uint16_t vlan = priv->vlan_filter[j];
1334
1335                         struct rte_flow_item_vlan vlan_spec = {
1336                                 .tci = rte_cpu_to_be_16(vlan),
1337                         };
1338                         struct rte_flow_item_vlan vlan_mask =
1339                                 rte_flow_item_vlan_mask;
1340
1341                         ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1342                                                   &unicast_mask,
1343                                                   &vlan_spec,
1344                                                   &vlan_mask);
1345                         if (ret)
1346                                 goto error;
1347                 }
1348                 if (!vlan_filter_n) {
1349                         ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1350                         if (ret)
1351                                 goto error;
1352                 }
1353         }
1354         return 0;
1355 error:
1356         ret = rte_errno; /* Save rte_errno before cleanup. */
1357         mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
1358         rte_errno = ret; /* Restore rte_errno. */
1359         return -rte_errno;
1360 }
1361
1362
1363 /**
1364  * Disable traffic flows configured by control plane
1365  *
1366  * @param dev
1367  *   Pointer to Ethernet device private data.
1368  */
1369 void
1370 mlx5_traffic_disable(struct rte_eth_dev *dev)
1371 {
1372         struct mlx5_priv *priv = dev->data->dev_private;
1373
1374         mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
1375 }
1376
1377 /**
1378  * Restart traffic flows configured by control plane
1379  *
1380  * @param dev
1381  *   Pointer to Ethernet device private data.
1382  *
1383  * @return
1384  *   0 on success, a negative errno value otherwise and rte_errno is set.
1385  */
1386 int
1387 mlx5_traffic_restart(struct rte_eth_dev *dev)
1388 {
1389         if (dev->data->dev_started) {
1390                 mlx5_traffic_disable(dev);
1391                 return mlx5_traffic_enable(dev);
1392         }
1393         return 0;
1394 }