net/mlx5: add conditional hairpin auto bind
[dpdk.git] / drivers / net / mlx5 / mlx5_trigger.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5
6 #include <unistd.h>
7
8 #include <rte_ether.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12
13 #include <mlx5_malloc.h>
14
15 #include "mlx5.h"
16 #include "mlx5_mr.h"
17 #include "mlx5_rxtx.h"
18 #include "mlx5_utils.h"
19 #include "rte_pmd_mlx5.h"
20
21 /**
22  * Stop traffic on Tx queues.
23  *
24  * @param dev
25  *   Pointer to Ethernet device structure.
26  */
27 static void
28 mlx5_txq_stop(struct rte_eth_dev *dev)
29 {
30         struct mlx5_priv *priv = dev->data->dev_private;
31         unsigned int i;
32
33         for (i = 0; i != priv->txqs_n; ++i)
34                 mlx5_txq_release(dev, i);
35 }
36
37 /**
38  * Start traffic on Tx queues.
39  *
40  * @param dev
41  *   Pointer to Ethernet device structure.
42  *
43  * @return
44  *   0 on success, a negative errno value otherwise and rte_errno is set.
45  */
46 static int
47 mlx5_txq_start(struct rte_eth_dev *dev)
48 {
49         struct mlx5_priv *priv = dev->data->dev_private;
50         unsigned int i;
51         int ret;
52
53         for (i = 0; i != priv->txqs_n; ++i) {
54                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
55                 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
56                 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
57
58                 if (!txq_ctrl)
59                         continue;
60                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
61                         txq_alloc_elts(txq_ctrl);
62                 MLX5_ASSERT(!txq_ctrl->obj);
63                 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
64                                             0, txq_ctrl->socket);
65                 if (!txq_ctrl->obj) {
66                         DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
67                                 "memory resources.", dev->data->port_id,
68                                 txq_data->idx);
69                         rte_errno = ENOMEM;
70                         goto error;
71                 }
72                 ret = priv->obj_ops.txq_obj_new(dev, i);
73                 if (ret < 0) {
74                         mlx5_free(txq_ctrl->obj);
75                         txq_ctrl->obj = NULL;
76                         goto error;
77                 }
78                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
79                         size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
80                         txq_data->fcqs = mlx5_malloc(flags, size,
81                                                      RTE_CACHE_LINE_SIZE,
82                                                      txq_ctrl->socket);
83                         if (!txq_data->fcqs) {
84                                 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
85                                         "allocate memory (FCQ).",
86                                         dev->data->port_id, i);
87                                 rte_errno = ENOMEM;
88                                 goto error;
89                         }
90                 }
91                 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
92                         dev->data->port_id, i, (void *)&txq_ctrl->obj);
93                 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
94         }
95         return 0;
96 error:
97         ret = rte_errno; /* Save rte_errno before cleanup. */
98         do {
99                 mlx5_txq_release(dev, i);
100         } while (i-- != 0);
101         rte_errno = ret; /* Restore rte_errno. */
102         return -rte_errno;
103 }
104
105 /**
106  * Stop traffic on Rx queues.
107  *
108  * @param dev
109  *   Pointer to Ethernet device structure.
110  */
111 static void
112 mlx5_rxq_stop(struct rte_eth_dev *dev)
113 {
114         struct mlx5_priv *priv = dev->data->dev_private;
115         unsigned int i;
116
117         for (i = 0; i != priv->rxqs_n; ++i)
118                 mlx5_rxq_release(dev, i);
119 }
120
121 /**
122  * Start traffic on Rx queues.
123  *
124  * @param dev
125  *   Pointer to Ethernet device structure.
126  *
127  * @return
128  *   0 on success, a negative errno value otherwise and rte_errno is set.
129  */
130 static int
131 mlx5_rxq_start(struct rte_eth_dev *dev)
132 {
133         struct mlx5_priv *priv = dev->data->dev_private;
134         unsigned int i;
135         int ret = 0;
136
137         /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
138         if (mlx5_mprq_alloc_mp(dev)) {
139                 /* Should not release Rx queues but return immediately. */
140                 return -rte_errno;
141         }
142         DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
143                 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
144         DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
145                 dev->data->port_id, priv->sh->device_attr.max_sge);
146         for (i = 0; i != priv->rxqs_n; ++i) {
147                 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
148
149                 if (!rxq_ctrl)
150                         continue;
151                 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
152                         /* Pre-register Rx mempools. */
153                         if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
154                                 mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl,
155                                                   rxq_ctrl->rxq.mprq_mp);
156                         } else {
157                                 uint32_t s;
158
159                                 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++)
160                                         mlx5_mr_update_mp
161                                                 (dev, &rxq_ctrl->rxq.mr_ctrl,
162                                                 rxq_ctrl->rxq.rxseg[s].mp);
163                         }
164                         ret = rxq_alloc_elts(rxq_ctrl);
165                         if (ret)
166                                 goto error;
167                 }
168                 MLX5_ASSERT(!rxq_ctrl->obj);
169                 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
170                                             sizeof(*rxq_ctrl->obj), 0,
171                                             rxq_ctrl->socket);
172                 if (!rxq_ctrl->obj) {
173                         DRV_LOG(ERR,
174                                 "Port %u Rx queue %u can't allocate resources.",
175                                 dev->data->port_id, (*priv->rxqs)[i]->idx);
176                         rte_errno = ENOMEM;
177                         goto error;
178                 }
179                 ret = priv->obj_ops.rxq_obj_new(dev, i);
180                 if (ret) {
181                         mlx5_free(rxq_ctrl->obj);
182                         goto error;
183                 }
184                 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
185                         dev->data->port_id, i, (void *)&rxq_ctrl->obj);
186                 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
187         }
188         return 0;
189 error:
190         ret = rte_errno; /* Save rte_errno before cleanup. */
191         do {
192                 mlx5_rxq_release(dev, i);
193         } while (i-- != 0);
194         rte_errno = ret; /* Restore rte_errno. */
195         return -rte_errno;
196 }
197
198 /**
199  * Binds Tx queues to Rx queues for hairpin.
200  *
201  * Binds Tx queues to the target Rx queues.
202  *
203  * @param dev
204  *   Pointer to Ethernet device structure.
205  *
206  * @return
207  *   0 on success, a negative errno value otherwise and rte_errno is set.
208  */
209 static int
210 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
211 {
212         struct mlx5_priv *priv = dev->data->dev_private;
213         struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
214         struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
215         struct mlx5_txq_ctrl *txq_ctrl;
216         struct mlx5_rxq_ctrl *rxq_ctrl;
217         struct mlx5_devx_obj *sq;
218         struct mlx5_devx_obj *rq;
219         unsigned int i;
220         int ret = 0;
221         bool need_auto = false;
222         uint16_t self_port = dev->data->port_id;
223
224         for (i = 0; i != priv->txqs_n; ++i) {
225                 txq_ctrl = mlx5_txq_get(dev, i);
226                 if (!txq_ctrl)
227                         continue;
228                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
229                         mlx5_txq_release(dev, i);
230                         continue;
231                 }
232                 if (txq_ctrl->hairpin_conf.peers[0].port != self_port)
233                         continue;
234                 if (txq_ctrl->hairpin_conf.manual_bind) {
235                         mlx5_txq_release(dev, i);
236                         return 0;
237                 }
238                 need_auto = true;
239                 mlx5_txq_release(dev, i);
240         }
241         if (!need_auto)
242                 return 0;
243         for (i = 0; i != priv->txqs_n; ++i) {
244                 txq_ctrl = mlx5_txq_get(dev, i);
245                 if (!txq_ctrl)
246                         continue;
247                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
248                         mlx5_txq_release(dev, i);
249                         continue;
250                 }
251                 /* Skip hairpin queues with other peer ports. */
252                 if (txq_ctrl->hairpin_conf.peers[0].port != self_port)
253                         continue;
254                 if (!txq_ctrl->obj) {
255                         rte_errno = ENOMEM;
256                         DRV_LOG(ERR, "port %u no txq object found: %d",
257                                 dev->data->port_id, i);
258                         mlx5_txq_release(dev, i);
259                         return -rte_errno;
260                 }
261                 sq = txq_ctrl->obj->sq;
262                 rxq_ctrl = mlx5_rxq_get(dev,
263                                         txq_ctrl->hairpin_conf.peers[0].queue);
264                 if (!rxq_ctrl) {
265                         mlx5_txq_release(dev, i);
266                         rte_errno = EINVAL;
267                         DRV_LOG(ERR, "port %u no rxq object found: %d",
268                                 dev->data->port_id,
269                                 txq_ctrl->hairpin_conf.peers[0].queue);
270                         return -rte_errno;
271                 }
272                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
273                     rxq_ctrl->hairpin_conf.peers[0].queue != i) {
274                         rte_errno = ENOMEM;
275                         DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
276                                 "Rx queue %d", dev->data->port_id,
277                                 i, txq_ctrl->hairpin_conf.peers[0].queue);
278                         goto error;
279                 }
280                 rq = rxq_ctrl->obj->rq;
281                 if (!rq) {
282                         rte_errno = ENOMEM;
283                         DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
284                                 dev->data->port_id,
285                                 txq_ctrl->hairpin_conf.peers[0].queue);
286                         goto error;
287                 }
288                 sq_attr.state = MLX5_SQC_STATE_RDY;
289                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
290                 sq_attr.hairpin_peer_rq = rq->id;
291                 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
292                 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
293                 if (ret)
294                         goto error;
295                 rq_attr.state = MLX5_SQC_STATE_RDY;
296                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
297                 rq_attr.hairpin_peer_sq = sq->id;
298                 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
299                 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
300                 if (ret)
301                         goto error;
302                 /* Qs with auto-bind will be destroyed directly. */
303                 rxq_ctrl->hairpin_status = 1;
304                 txq_ctrl->hairpin_status = 1;
305                 mlx5_txq_release(dev, i);
306                 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
307         }
308         return 0;
309 error:
310         mlx5_txq_release(dev, i);
311         mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
312         return -rte_errno;
313 }
314
315 /*
316  * Fetch the peer queue's SW & HW information.
317  *
318  * @param dev
319  *   Pointer to Ethernet device structure.
320  * @param peer_queue
321  *   Index of the queue to fetch the information.
322  * @param current_info
323  *   Pointer to the input peer information, not used currently.
324  * @param peer_info
325  *   Pointer to the structure to store the information, output.
326  * @param direction
327  *   Positive to get the RxQ information, zero to get the TxQ information.
328  *
329  * @return
330  *   0 on success, a negative errno value otherwise and rte_errno is set.
331  */
332 int
333 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
334                                struct rte_hairpin_peer_info *current_info,
335                                struct rte_hairpin_peer_info *peer_info,
336                                uint32_t direction)
337 {
338         struct mlx5_priv *priv = dev->data->dev_private;
339         RTE_SET_USED(current_info);
340
341         if (dev->data->dev_started == 0) {
342                 rte_errno = EBUSY;
343                 DRV_LOG(ERR, "peer port %u is not started",
344                         dev->data->port_id);
345                 return -rte_errno;
346         }
347         /*
348          * Peer port used as egress. In the current design, hairpin Tx queue
349          * will be bound to the peer Rx queue. Indeed, only the information of
350          * peer Rx queue needs to be fetched.
351          */
352         if (direction == 0) {
353                 struct mlx5_txq_ctrl *txq_ctrl;
354
355                 txq_ctrl = mlx5_txq_get(dev, peer_queue);
356                 if (txq_ctrl == NULL) {
357                         rte_errno = EINVAL;
358                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
359                                 dev->data->port_id, peer_queue);
360                         return -rte_errno;
361                 }
362                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
363                         rte_errno = EINVAL;
364                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
365                                 dev->data->port_id, peer_queue);
366                         mlx5_txq_release(dev, peer_queue);
367                         return -rte_errno;
368                 }
369                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
370                         rte_errno = ENOMEM;
371                         DRV_LOG(ERR, "port %u no Txq object found: %d",
372                                 dev->data->port_id, peer_queue);
373                         mlx5_txq_release(dev, peer_queue);
374                         return -rte_errno;
375                 }
376                 peer_info->qp_id = txq_ctrl->obj->sq->id;
377                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
378                 /* 1-to-1 mapping, only the first one is used. */
379                 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
380                 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
381                 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
382                 mlx5_txq_release(dev, peer_queue);
383         } else { /* Peer port used as ingress. */
384                 struct mlx5_rxq_ctrl *rxq_ctrl;
385
386                 rxq_ctrl = mlx5_rxq_get(dev, peer_queue);
387                 if (rxq_ctrl == NULL) {
388                         rte_errno = EINVAL;
389                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
390                                 dev->data->port_id, peer_queue);
391                         return -rte_errno;
392                 }
393                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
394                         rte_errno = EINVAL;
395                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
396                                 dev->data->port_id, peer_queue);
397                         mlx5_rxq_release(dev, peer_queue);
398                         return -rte_errno;
399                 }
400                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
401                         rte_errno = ENOMEM;
402                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
403                                 dev->data->port_id, peer_queue);
404                         mlx5_rxq_release(dev, peer_queue);
405                         return -rte_errno;
406                 }
407                 peer_info->qp_id = rxq_ctrl->obj->rq->id;
408                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
409                 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
410                 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
411                 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
412                 mlx5_rxq_release(dev, peer_queue);
413         }
414         return 0;
415 }
416
417 /*
418  * Bind the hairpin queue with the peer HW information.
419  * This needs to be called twice both for Tx and Rx queues of a pair.
420  * If the queue is already bound, it is considered successful.
421  *
422  * @param dev
423  *   Pointer to Ethernet device structure.
424  * @param cur_queue
425  *   Index of the queue to change the HW configuration to bind.
426  * @param peer_info
427  *   Pointer to information of the peer queue.
428  * @param direction
429  *   Positive to configure the TxQ, zero to configure the RxQ.
430  *
431  * @return
432  *   0 on success, a negative errno value otherwise and rte_errno is set.
433  */
434 int
435 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
436                              struct rte_hairpin_peer_info *peer_info,
437                              uint32_t direction)
438 {
439         int ret = 0;
440
441         /*
442          * Consistency checking of the peer queue: opposite direction is used
443          * to get the peer queue info with ethdev port ID, no need to check.
444          */
445         if (peer_info->peer_q != cur_queue) {
446                 rte_errno = EINVAL;
447                 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
448                         dev->data->port_id, cur_queue, peer_info->peer_q);
449                 return -rte_errno;
450         }
451         if (direction != 0) {
452                 struct mlx5_txq_ctrl *txq_ctrl;
453                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
454
455                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
456                 if (txq_ctrl == NULL) {
457                         rte_errno = EINVAL;
458                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
459                                 dev->data->port_id, cur_queue);
460                         return -rte_errno;
461                 }
462                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
463                         rte_errno = EINVAL;
464                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
465                                 dev->data->port_id, cur_queue);
466                         mlx5_txq_release(dev, cur_queue);
467                         return -rte_errno;
468                 }
469                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
470                         rte_errno = ENOMEM;
471                         DRV_LOG(ERR, "port %u no Txq object found: %d",
472                                 dev->data->port_id, cur_queue);
473                         mlx5_txq_release(dev, cur_queue);
474                         return -rte_errno;
475                 }
476                 if (txq_ctrl->hairpin_status != 0) {
477                         DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
478                                 dev->data->port_id, cur_queue);
479                         mlx5_txq_release(dev, cur_queue);
480                         return 0;
481                 }
482                 /*
483                  * All queues' of one port consistency checking is done in the
484                  * bind() function, and that is optional.
485                  */
486                 if (peer_info->tx_explicit !=
487                     txq_ctrl->hairpin_conf.tx_explicit) {
488                         rte_errno = EINVAL;
489                         DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
490                                 " mismatch", dev->data->port_id, cur_queue);
491                         mlx5_txq_release(dev, cur_queue);
492                         return -rte_errno;
493                 }
494                 if (peer_info->manual_bind !=
495                     txq_ctrl->hairpin_conf.manual_bind) {
496                         rte_errno = EINVAL;
497                         DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
498                                 " mismatch", dev->data->port_id, cur_queue);
499                         mlx5_txq_release(dev, cur_queue);
500                         return -rte_errno;
501                 }
502                 sq_attr.state = MLX5_SQC_STATE_RDY;
503                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
504                 sq_attr.hairpin_peer_rq = peer_info->qp_id;
505                 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
506                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
507                 if (ret == 0)
508                         txq_ctrl->hairpin_status = 1;
509                 mlx5_txq_release(dev, cur_queue);
510         } else {
511                 struct mlx5_rxq_ctrl *rxq_ctrl;
512                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
513
514                 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
515                 if (rxq_ctrl == NULL) {
516                         rte_errno = EINVAL;
517                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
518                                 dev->data->port_id, cur_queue);
519                         return -rte_errno;
520                 }
521                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
522                         rte_errno = EINVAL;
523                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
524                                 dev->data->port_id, cur_queue);
525                         mlx5_rxq_release(dev, cur_queue);
526                         return -rte_errno;
527                 }
528                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
529                         rte_errno = ENOMEM;
530                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
531                                 dev->data->port_id, cur_queue);
532                         mlx5_rxq_release(dev, cur_queue);
533                         return -rte_errno;
534                 }
535                 if (rxq_ctrl->hairpin_status != 0) {
536                         DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
537                                 dev->data->port_id, cur_queue);
538                         mlx5_rxq_release(dev, cur_queue);
539                         return 0;
540                 }
541                 if (peer_info->tx_explicit !=
542                     rxq_ctrl->hairpin_conf.tx_explicit) {
543                         rte_errno = EINVAL;
544                         DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
545                                 " mismatch", dev->data->port_id, cur_queue);
546                         mlx5_rxq_release(dev, cur_queue);
547                         return -rte_errno;
548                 }
549                 if (peer_info->manual_bind !=
550                     rxq_ctrl->hairpin_conf.manual_bind) {
551                         rte_errno = EINVAL;
552                         DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
553                                 " mismatch", dev->data->port_id, cur_queue);
554                         mlx5_rxq_release(dev, cur_queue);
555                         return -rte_errno;
556                 }
557                 rq_attr.state = MLX5_SQC_STATE_RDY;
558                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
559                 rq_attr.hairpin_peer_sq = peer_info->qp_id;
560                 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
561                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
562                 if (ret == 0)
563                         rxq_ctrl->hairpin_status = 1;
564                 mlx5_rxq_release(dev, cur_queue);
565         }
566         return ret;
567 }
568
569 /*
570  * Unbind the hairpin queue and reset its HW configuration.
571  * This needs to be called twice both for Tx and Rx queues of a pair.
572  * If the queue is already unbound, it is considered successful.
573  *
574  * @param dev
575  *   Pointer to Ethernet device structure.
576  * @param cur_queue
577  *   Index of the queue to change the HW configuration to unbind.
578  * @param direction
579  *   Positive to reset the TxQ, zero to reset the RxQ.
580  *
581  * @return
582  *   0 on success, a negative errno value otherwise and rte_errno is set.
583  */
584 int
585 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
586                                uint32_t direction)
587 {
588         int ret = 0;
589
590         if (direction != 0) {
591                 struct mlx5_txq_ctrl *txq_ctrl;
592                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
593
594                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
595                 if (txq_ctrl == NULL) {
596                         rte_errno = EINVAL;
597                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
598                                 dev->data->port_id, cur_queue);
599                         return -rte_errno;
600                 }
601                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
602                         rte_errno = EINVAL;
603                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
604                                 dev->data->port_id, cur_queue);
605                         mlx5_txq_release(dev, cur_queue);
606                         return -rte_errno;
607                 }
608                 /* Already unbound, return success before obj checking. */
609                 if (txq_ctrl->hairpin_status == 0) {
610                         DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
611                                 dev->data->port_id, cur_queue);
612                         mlx5_txq_release(dev, cur_queue);
613                         return 0;
614                 }
615                 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
616                         rte_errno = ENOMEM;
617                         DRV_LOG(ERR, "port %u no Txq object found: %d",
618                                 dev->data->port_id, cur_queue);
619                         mlx5_txq_release(dev, cur_queue);
620                         return -rte_errno;
621                 }
622                 sq_attr.state = MLX5_SQC_STATE_RST;
623                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
624                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
625                 if (ret == 0)
626                         txq_ctrl->hairpin_status = 0;
627                 mlx5_txq_release(dev, cur_queue);
628         } else {
629                 struct mlx5_rxq_ctrl *rxq_ctrl;
630                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
631
632                 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
633                 if (rxq_ctrl == NULL) {
634                         rte_errno = EINVAL;
635                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
636                                 dev->data->port_id, cur_queue);
637                         return -rte_errno;
638                 }
639                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
640                         rte_errno = EINVAL;
641                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
642                                 dev->data->port_id, cur_queue);
643                         mlx5_rxq_release(dev, cur_queue);
644                         return -rte_errno;
645                 }
646                 if (rxq_ctrl->hairpin_status == 0) {
647                         DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
648                                 dev->data->port_id, cur_queue);
649                         mlx5_rxq_release(dev, cur_queue);
650                         return 0;
651                 }
652                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
653                         rte_errno = ENOMEM;
654                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
655                                 dev->data->port_id, cur_queue);
656                         mlx5_rxq_release(dev, cur_queue);
657                         return -rte_errno;
658                 }
659                 rq_attr.state = MLX5_SQC_STATE_RST;
660                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
661                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
662                 if (ret == 0)
663                         rxq_ctrl->hairpin_status = 0;
664                 mlx5_rxq_release(dev, cur_queue);
665         }
666         return ret;
667 }
668
669 /*
670  * Bind the hairpin port pairs, from the Tx to the peer Rx.
671  * This function only supports to bind the Tx to one Rx.
672  *
673  * @param dev
674  *   Pointer to Ethernet device structure.
675  * @param rx_port
676  *   Port identifier of the Rx port.
677  *
678  * @return
679  *   0 on success, a negative errno value otherwise and rte_errno is set.
680  */
681 static int
682 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
683 {
684         struct mlx5_priv *priv = dev->data->dev_private;
685         int ret = 0;
686         struct mlx5_txq_ctrl *txq_ctrl;
687         uint32_t i;
688         struct rte_hairpin_peer_info peer = {0xffffff};
689         struct rte_hairpin_peer_info cur;
690         const struct rte_eth_hairpin_conf *conf;
691         uint16_t num_q = 0;
692         uint16_t local_port = priv->dev_data->port_id;
693         uint32_t manual;
694         uint32_t explicit;
695         uint16_t rx_queue;
696
697         if (mlx5_eth_find_next(rx_port, priv->pci_dev) != rx_port) {
698                 rte_errno = ENODEV;
699                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
700                 return -rte_errno;
701         }
702         /*
703          * Before binding TxQ to peer RxQ, first round loop will be used for
704          * checking the queues' configuration consistency. This would be a
705          * little time consuming but better than doing the rollback.
706          */
707         for (i = 0; i != priv->txqs_n; i++) {
708                 txq_ctrl = mlx5_txq_get(dev, i);
709                 if (txq_ctrl == NULL)
710                         continue;
711                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
712                         mlx5_txq_release(dev, i);
713                         continue;
714                 }
715                 /*
716                  * All hairpin Tx queues of a single port that connected to the
717                  * same peer Rx port should have the same "auto binding" and
718                  * "implicit Tx flow" modes.
719                  * Peer consistency checking will be done in per queue binding.
720                  */
721                 conf = &txq_ctrl->hairpin_conf;
722                 if (conf->peers[0].port == rx_port) {
723                         if (num_q == 0) {
724                                 manual = conf->manual_bind;
725                                 explicit = conf->tx_explicit;
726                         } else {
727                                 if (manual != conf->manual_bind ||
728                                     explicit != conf->tx_explicit) {
729                                         rte_errno = EINVAL;
730                                         DRV_LOG(ERR, "port %u queue %d mode"
731                                                 " mismatch: %u %u, %u %u",
732                                                 local_port, i, manual,
733                                                 conf->manual_bind, explicit,
734                                                 conf->tx_explicit);
735                                         mlx5_txq_release(dev, i);
736                                         return -rte_errno;
737                                 }
738                         }
739                         num_q++;
740                 }
741                 mlx5_txq_release(dev, i);
742         }
743         /* Once no queue is configured, success is returned directly. */
744         if (num_q == 0)
745                 return ret;
746         /* All the hairpin TX queues need to be traversed again. */
747         for (i = 0; i != priv->txqs_n; i++) {
748                 txq_ctrl = mlx5_txq_get(dev, i);
749                 if (txq_ctrl == NULL)
750                         continue;
751                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
752                         mlx5_txq_release(dev, i);
753                         continue;
754                 }
755                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
756                         mlx5_txq_release(dev, i);
757                         continue;
758                 }
759                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
760                 /*
761                  * Fetch peer RxQ's information.
762                  * No need to pass the information of the current queue.
763                  */
764                 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
765                                                         NULL, &peer, 1);
766                 if (ret != 0) {
767                         mlx5_txq_release(dev, i);
768                         goto error;
769                 }
770                 /* Accessing its own device, inside mlx5 PMD. */
771                 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
772                 if (ret != 0) {
773                         mlx5_txq_release(dev, i);
774                         goto error;
775                 }
776                 /* Pass TxQ's information to peer RxQ and try binding. */
777                 cur.peer_q = rx_queue;
778                 cur.qp_id = txq_ctrl->obj->sq->id;
779                 cur.vhca_id = priv->config.hca_attr.vhca_id;
780                 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
781                 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
782                 /*
783                  * In order to access another device in a proper way, RTE level
784                  * private function is needed.
785                  */
786                 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
787                                                       &cur, 0);
788                 if (ret != 0) {
789                         mlx5_txq_release(dev, i);
790                         goto error;
791                 }
792                 mlx5_txq_release(dev, i);
793         }
794         return 0;
795 error:
796         /*
797          * Do roll-back process for the queues already bound.
798          * No need to check the return value of the queue unbind function.
799          */
800         do {
801                 /* No validation is needed here. */
802                 txq_ctrl = mlx5_txq_get(dev, i);
803                 if (txq_ctrl == NULL)
804                         continue;
805                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
806                 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
807                 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
808                 mlx5_txq_release(dev, i);
809         } while (i--);
810         return ret;
811 }
812
813 /*
814  * Unbind the hairpin port pair, HW configuration of both devices will be clear
815  * and status will be reset for all the queues used between the them.
816  * This function only supports to unbind the Tx from one Rx.
817  *
818  * @param dev
819  *   Pointer to Ethernet device structure.
820  * @param rx_port
821  *   Port identifier of the Rx port.
822  *
823  * @return
824  *   0 on success, a negative errno value otherwise and rte_errno is set.
825  */
826 static int
827 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
828 {
829         struct mlx5_priv *priv = dev->data->dev_private;
830         struct mlx5_txq_ctrl *txq_ctrl;
831         uint32_t i;
832         int ret;
833         uint16_t cur_port = priv->dev_data->port_id;
834
835         if (mlx5_eth_find_next(rx_port, priv->pci_dev) != rx_port) {
836                 rte_errno = ENODEV;
837                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
838                 return -rte_errno;
839         }
840         for (i = 0; i != priv->txqs_n; i++) {
841                 uint16_t rx_queue;
842
843                 txq_ctrl = mlx5_txq_get(dev, i);
844                 if (txq_ctrl == NULL)
845                         continue;
846                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
847                         mlx5_txq_release(dev, i);
848                         continue;
849                 }
850                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
851                         mlx5_txq_release(dev, i);
852                         continue;
853                 }
854                 /* Indeed, only the first used queue needs to be checked. */
855                 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
856                         if (cur_port != rx_port) {
857                                 rte_errno = EINVAL;
858                                 DRV_LOG(ERR, "port %u and port %u are in"
859                                         " auto-bind mode", cur_port, rx_port);
860                                 mlx5_txq_release(dev, i);
861                                 return -rte_errno;
862                         } else {
863                                 return 0;
864                         }
865                 }
866                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
867                 mlx5_txq_release(dev, i);
868                 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
869                 if (ret) {
870                         DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
871                                 rx_port, rx_queue);
872                         return ret;
873                 }
874                 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
875                 if (ret) {
876                         DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
877                                 cur_port, i);
878                         return ret;
879                 }
880         }
881         return 0;
882 }
883
884 /*
885  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
886  * @see mlx5_hairpin_bind_single_port()
887  */
888 int
889 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
890 {
891         int ret = 0;
892         uint16_t p, pp;
893         struct mlx5_priv *priv = dev->data->dev_private;
894
895         /*
896          * If the Rx port has no hairpin configuration with the current port,
897          * the binding will be skipped in the called function of single port.
898          * Device started status will be checked only before the queue
899          * information updating.
900          */
901         if (rx_port == RTE_MAX_ETHPORTS) {
902                 MLX5_ETH_FOREACH_DEV(p, priv->pci_dev) {
903                         ret = mlx5_hairpin_bind_single_port(dev, p);
904                         if (ret != 0)
905                                 goto unbind;
906                 }
907                 return ret;
908         } else {
909                 return mlx5_hairpin_bind_single_port(dev, rx_port);
910         }
911 unbind:
912         MLX5_ETH_FOREACH_DEV(pp, priv->pci_dev)
913                 if (pp < p)
914                         mlx5_hairpin_unbind_single_port(dev, pp);
915         return ret;
916 }
917
918 /*
919  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
920  * @see mlx5_hairpin_unbind_single_port()
921  */
922 int
923 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
924 {
925         int ret = 0;
926         uint16_t p;
927         struct mlx5_priv *priv = dev->data->dev_private;
928
929         if (rx_port == RTE_MAX_ETHPORTS)
930                 MLX5_ETH_FOREACH_DEV(p, priv->pci_dev) {
931                         ret = mlx5_hairpin_unbind_single_port(dev, p);
932                         if (ret != 0)
933                                 return ret;
934                 }
935         else
936                 ret = mlx5_hairpin_bind_single_port(dev, rx_port);
937         return ret;
938 }
939
940 /*
941  * DPDK callback to get the hairpin peer ports list.
942  * This will return the actual number of peer ports and save the identifiers
943  * into the array (sorted, may be different from that when setting up the
944  * hairpin peer queues).
945  * The peer port ID could be the same as the port ID of the current device.
946  *
947  * @param dev
948  *   Pointer to Ethernet device structure.
949  * @param peer_ports
950  *   Pointer to array to save the port identifiers.
951  * @param len
952  *   The length of the array.
953  * @param direction
954  *   Current port to peer port direction.
955  *   positive - current used as Tx to get all peer Rx ports.
956  *   zero - current used as Rx to get all peer Tx ports.
957  *
958  * @return
959  *   0 or positive value on success, actual number of peer ports.
960  *   a negative errno value otherwise and rte_errno is set.
961  */
962 int
963 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
964                             size_t len, uint32_t direction)
965 {
966         struct mlx5_priv *priv = dev->data->dev_private;
967         struct mlx5_txq_ctrl *txq_ctrl;
968         struct mlx5_rxq_ctrl *rxq_ctrl;
969         uint32_t i;
970         uint16_t pp;
971         uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
972         int ret = 0;
973
974         if (direction) {
975                 for (i = 0; i < priv->txqs_n; i++) {
976                         txq_ctrl = mlx5_txq_get(dev, i);
977                         if (!txq_ctrl)
978                                 continue;
979                         if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
980                                 mlx5_txq_release(dev, i);
981                                 continue;
982                         }
983                         pp = txq_ctrl->hairpin_conf.peers[0].port;
984                         if (pp >= RTE_MAX_ETHPORTS) {
985                                 rte_errno = ERANGE;
986                                 mlx5_txq_release(dev, i);
987                                 DRV_LOG(ERR, "port %hu queue %u peer port "
988                                         "out of range %hu",
989                                         priv->dev_data->port_id, i, pp);
990                                 return -rte_errno;
991                         }
992                         bits[pp / 32] |= 1 << (pp % 32);
993                         mlx5_txq_release(dev, i);
994                 }
995         } else {
996                 for (i = 0; i < priv->rxqs_n; i++) {
997                         rxq_ctrl = mlx5_rxq_get(dev, i);
998                         if (!rxq_ctrl)
999                                 continue;
1000                         if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
1001                                 mlx5_rxq_release(dev, i);
1002                                 continue;
1003                         }
1004                         pp = rxq_ctrl->hairpin_conf.peers[0].port;
1005                         if (pp >= RTE_MAX_ETHPORTS) {
1006                                 rte_errno = ERANGE;
1007                                 mlx5_rxq_release(dev, i);
1008                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1009                                         "out of range %hu",
1010                                         priv->dev_data->port_id, i, pp);
1011                                 return -rte_errno;
1012                         }
1013                         bits[pp / 32] |= 1 << (pp % 32);
1014                         mlx5_rxq_release(dev, i);
1015                 }
1016         }
1017         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1018                 if (bits[i / 32] & (1 << (i % 32))) {
1019                         if ((size_t)ret >= len) {
1020                                 rte_errno = E2BIG;
1021                                 return -rte_errno;
1022                         }
1023                         peer_ports[ret++] = i;
1024                 }
1025         }
1026         return ret;
1027 }
1028
1029 /**
1030  * DPDK callback to start the device.
1031  *
1032  * Simulate device start by attaching all configured flows.
1033  *
1034  * @param dev
1035  *   Pointer to Ethernet device structure.
1036  *
1037  * @return
1038  *   0 on success, a negative errno value otherwise and rte_errno is set.
1039  */
1040 int
1041 mlx5_dev_start(struct rte_eth_dev *dev)
1042 {
1043         struct mlx5_priv *priv = dev->data->dev_private;
1044         int ret;
1045         int fine_inline;
1046
1047         DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1048         fine_inline = rte_mbuf_dynflag_lookup
1049                 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1050         if (fine_inline >= 0)
1051                 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1052         else
1053                 rte_net_mlx5_dynf_inline_mask = 0;
1054         if (dev->data->nb_rx_queues > 0) {
1055                 ret = mlx5_dev_configure_rss_reta(dev);
1056                 if (ret) {
1057                         DRV_LOG(ERR, "port %u reta config failed: %s",
1058                                 dev->data->port_id, strerror(rte_errno));
1059                         return -rte_errno;
1060                 }
1061         }
1062         ret = mlx5_txpp_start(dev);
1063         if (ret) {
1064                 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1065                         dev->data->port_id, strerror(rte_errno));
1066                 goto error;
1067         }
1068         ret = mlx5_txq_start(dev);
1069         if (ret) {
1070                 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1071                         dev->data->port_id, strerror(rte_errno));
1072                 goto error;
1073         }
1074         ret = mlx5_rxq_start(dev);
1075         if (ret) {
1076                 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1077                         dev->data->port_id, strerror(rte_errno));
1078                 goto error;
1079         }
1080         /*
1081          * Such step will be skipped if there is no hairpin TX queue configured
1082          * with RX peer queue from the same device.
1083          */
1084         ret = mlx5_hairpin_auto_bind(dev);
1085         if (ret) {
1086                 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1087                         dev->data->port_id, strerror(rte_errno));
1088                 goto error;
1089         }
1090         /* Set started flag here for the following steps like control flow. */
1091         dev->data->dev_started = 1;
1092         ret = mlx5_rx_intr_vec_enable(dev);
1093         if (ret) {
1094                 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1095                         dev->data->port_id);
1096                 goto error;
1097         }
1098         mlx5_os_stats_init(dev);
1099         ret = mlx5_traffic_enable(dev);
1100         if (ret) {
1101                 DRV_LOG(ERR, "port %u failed to set defaults flows",
1102                         dev->data->port_id);
1103                 goto error;
1104         }
1105         /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1106         mlx5_flow_rxq_dynf_metadata_set(dev);
1107         /* Set flags and context to convert Rx timestamps. */
1108         mlx5_rxq_timestamp_set(dev);
1109         /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1110         mlx5_txq_dynf_timestamp_set(dev);
1111         /*
1112          * In non-cached mode, it only needs to start the default mreg copy
1113          * action and no flow created by application exists anymore.
1114          * But it is worth wrapping the interface for further usage.
1115          */
1116         ret = mlx5_flow_start_default(dev);
1117         if (ret) {
1118                 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1119                         dev->data->port_id, strerror(rte_errno));
1120                 goto error;
1121         }
1122         rte_wmb();
1123         dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1124         dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1125         /* Enable datapath on secondary process. */
1126         mlx5_mp_os_req_start_rxtx(dev);
1127         if (priv->sh->intr_handle.fd >= 0) {
1128                 priv->sh->port[priv->dev_port - 1].ih_port_id =
1129                                         (uint32_t)dev->data->port_id;
1130         } else {
1131                 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1132                         dev->data->port_id);
1133                 dev->data->dev_conf.intr_conf.lsc = 0;
1134                 dev->data->dev_conf.intr_conf.rmv = 0;
1135         }
1136         if (priv->sh->intr_handle_devx.fd >= 0)
1137                 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1138                                         (uint32_t)dev->data->port_id;
1139         return 0;
1140 error:
1141         ret = rte_errno; /* Save rte_errno before cleanup. */
1142         /* Rollback. */
1143         dev->data->dev_started = 0;
1144         mlx5_flow_stop_default(dev);
1145         mlx5_traffic_disable(dev);
1146         mlx5_txq_stop(dev);
1147         mlx5_rxq_stop(dev);
1148         mlx5_txpp_stop(dev); /* Stop last. */
1149         rte_errno = ret; /* Restore rte_errno. */
1150         return -rte_errno;
1151 }
1152
1153 /**
1154  * DPDK callback to stop the device.
1155  *
1156  * Simulate device stop by detaching all configured flows.
1157  *
1158  * @param dev
1159  *   Pointer to Ethernet device structure.
1160  */
1161 int
1162 mlx5_dev_stop(struct rte_eth_dev *dev)
1163 {
1164         struct mlx5_priv *priv = dev->data->dev_private;
1165
1166         dev->data->dev_started = 0;
1167         /* Prevent crashes when queues are still in use. */
1168         dev->rx_pkt_burst = removed_rx_burst;
1169         dev->tx_pkt_burst = removed_tx_burst;
1170         rte_wmb();
1171         /* Disable datapath on secondary process. */
1172         mlx5_mp_os_req_stop_rxtx(dev);
1173         usleep(1000 * priv->rxqs_n);
1174         DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1175         mlx5_flow_stop_default(dev);
1176         /* Control flows for default traffic can be removed firstly. */
1177         mlx5_traffic_disable(dev);
1178         /* All RX queue flags will be cleared in the flush interface. */
1179         mlx5_flow_list_flush(dev, &priv->flows, true);
1180         mlx5_rx_intr_vec_disable(dev);
1181         priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1182         priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1183         mlx5_txq_stop(dev);
1184         mlx5_rxq_stop(dev);
1185         mlx5_txpp_stop(dev);
1186
1187         return 0;
1188 }
1189
1190 /**
1191  * Enable traffic flows configured by control plane
1192  *
1193  * @param dev
1194  *   Pointer to Ethernet device private data.
1195  * @param dev
1196  *   Pointer to Ethernet device structure.
1197  *
1198  * @return
1199  *   0 on success, a negative errno value otherwise and rte_errno is set.
1200  */
1201 int
1202 mlx5_traffic_enable(struct rte_eth_dev *dev)
1203 {
1204         struct mlx5_priv *priv = dev->data->dev_private;
1205         struct rte_flow_item_eth bcast = {
1206                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1207         };
1208         struct rte_flow_item_eth ipv6_multi_spec = {
1209                 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1210         };
1211         struct rte_flow_item_eth ipv6_multi_mask = {
1212                 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1213         };
1214         struct rte_flow_item_eth unicast = {
1215                 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1216         };
1217         struct rte_flow_item_eth unicast_mask = {
1218                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1219         };
1220         const unsigned int vlan_filter_n = priv->vlan_filter_n;
1221         const struct rte_ether_addr cmp = {
1222                 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1223         };
1224         unsigned int i;
1225         unsigned int j;
1226         int ret;
1227
1228         /*
1229          * Hairpin txq default flow should be created no matter if it is
1230          * isolation mode. Or else all the packets to be sent will be sent
1231          * out directly without the TX flow actions, e.g. encapsulation.
1232          */
1233         for (i = 0; i != priv->txqs_n; ++i) {
1234                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1235                 if (!txq_ctrl)
1236                         continue;
1237                 /* Only Tx implicit mode requires the default Tx flow. */
1238                 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1239                     txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1240                     txq_ctrl->hairpin_conf.peers[0].port ==
1241                     priv->dev_data->port_id) {
1242                         ret = mlx5_ctrl_flow_source_queue(dev, i);
1243                         if (ret) {
1244                                 mlx5_txq_release(dev, i);
1245                                 goto error;
1246                         }
1247                 }
1248                 mlx5_txq_release(dev, i);
1249         }
1250         if (priv->config.dv_esw_en && !priv->config.vf) {
1251                 if (mlx5_flow_create_esw_table_zero_flow(dev))
1252                         priv->fdb_def_rule = 1;
1253                 else
1254                         DRV_LOG(INFO, "port %u FDB default rule cannot be"
1255                                 " configured - only Eswitch group 0 flows are"
1256                                 " supported.", dev->data->port_id);
1257         }
1258         if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1259                 ret = mlx5_flow_lacp_miss(dev);
1260                 if (ret)
1261                         DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1262                                 "forward LACP to kernel.", dev->data->port_id);
1263                 else
1264                         DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1265                                 , dev->data->port_id);
1266         }
1267         if (priv->isolated)
1268                 return 0;
1269         if (dev->data->promiscuous) {
1270                 struct rte_flow_item_eth promisc = {
1271                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1272                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1273                         .type = 0,
1274                 };
1275
1276                 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1277                 if (ret)
1278                         goto error;
1279         }
1280         if (dev->data->all_multicast) {
1281                 struct rte_flow_item_eth multicast = {
1282                         .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1283                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1284                         .type = 0,
1285                 };
1286
1287                 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1288                 if (ret)
1289                         goto error;
1290         } else {
1291                 /* Add broadcast/multicast flows. */
1292                 for (i = 0; i != vlan_filter_n; ++i) {
1293                         uint16_t vlan = priv->vlan_filter[i];
1294
1295                         struct rte_flow_item_vlan vlan_spec = {
1296                                 .tci = rte_cpu_to_be_16(vlan),
1297                         };
1298                         struct rte_flow_item_vlan vlan_mask =
1299                                 rte_flow_item_vlan_mask;
1300
1301                         ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1302                                                   &vlan_spec, &vlan_mask);
1303                         if (ret)
1304                                 goto error;
1305                         ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1306                                                   &ipv6_multi_mask,
1307                                                   &vlan_spec, &vlan_mask);
1308                         if (ret)
1309                                 goto error;
1310                 }
1311                 if (!vlan_filter_n) {
1312                         ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1313                         if (ret)
1314                                 goto error;
1315                         ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1316                                              &ipv6_multi_mask);
1317                         if (ret)
1318                                 goto error;
1319                 }
1320         }
1321         /* Add MAC address flows. */
1322         for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1323                 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1324
1325                 if (!memcmp(mac, &cmp, sizeof(*mac)))
1326                         continue;
1327                 memcpy(&unicast.dst.addr_bytes,
1328                        mac->addr_bytes,
1329                        RTE_ETHER_ADDR_LEN);
1330                 for (j = 0; j != vlan_filter_n; ++j) {
1331                         uint16_t vlan = priv->vlan_filter[j];
1332
1333                         struct rte_flow_item_vlan vlan_spec = {
1334                                 .tci = rte_cpu_to_be_16(vlan),
1335                         };
1336                         struct rte_flow_item_vlan vlan_mask =
1337                                 rte_flow_item_vlan_mask;
1338
1339                         ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1340                                                   &unicast_mask,
1341                                                   &vlan_spec,
1342                                                   &vlan_mask);
1343                         if (ret)
1344                                 goto error;
1345                 }
1346                 if (!vlan_filter_n) {
1347                         ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1348                         if (ret)
1349                                 goto error;
1350                 }
1351         }
1352         return 0;
1353 error:
1354         ret = rte_errno; /* Save rte_errno before cleanup. */
1355         mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
1356         rte_errno = ret; /* Restore rte_errno. */
1357         return -rte_errno;
1358 }
1359
1360
1361 /**
1362  * Disable traffic flows configured by control plane
1363  *
1364  * @param dev
1365  *   Pointer to Ethernet device private data.
1366  */
1367 void
1368 mlx5_traffic_disable(struct rte_eth_dev *dev)
1369 {
1370         struct mlx5_priv *priv = dev->data->dev_private;
1371
1372         mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
1373 }
1374
1375 /**
1376  * Restart traffic flows configured by control plane
1377  *
1378  * @param dev
1379  *   Pointer to Ethernet device private data.
1380  *
1381  * @return
1382  *   0 on success, a negative errno value otherwise and rte_errno is set.
1383  */
1384 int
1385 mlx5_traffic_restart(struct rte_eth_dev *dev)
1386 {
1387         if (dev->data->dev_started) {
1388                 mlx5_traffic_disable(dev);
1389                 return mlx5_traffic_enable(dev);
1390         }
1391         return 0;
1392 }