e5d74d275f85e99900a75b58ef5da7ae27c9cbbc
[dpdk.git] / drivers / net / mlx5 / mlx5_trigger.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5
6 #include <unistd.h>
7
8 #include <rte_ether.h>
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13
14 #include <mlx5_malloc.h>
15
16 #include "mlx5.h"
17 #include "mlx5_flow.h"
18 #include "mlx5_rx.h"
19 #include "mlx5_tx.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
22
23 /**
24  * Stop traffic on Tx queues.
25  *
26  * @param dev
27  *   Pointer to Ethernet device structure.
28  */
29 static void
30 mlx5_txq_stop(struct rte_eth_dev *dev)
31 {
32         struct mlx5_priv *priv = dev->data->dev_private;
33         unsigned int i;
34
35         for (i = 0; i != priv->txqs_n; ++i)
36                 mlx5_txq_release(dev, i);
37 }
38
39 /**
40  * Start traffic on Tx queues.
41  *
42  * @param dev
43  *   Pointer to Ethernet device structure.
44  *
45  * @return
46  *   0 on success, a negative errno value otherwise and rte_errno is set.
47  */
48 static int
49 mlx5_txq_start(struct rte_eth_dev *dev)
50 {
51         struct mlx5_priv *priv = dev->data->dev_private;
52         unsigned int i;
53         int ret;
54
55         for (i = 0; i != priv->txqs_n; ++i) {
56                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57                 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58                 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
59
60                 if (!txq_ctrl)
61                         continue;
62                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
63                         txq_alloc_elts(txq_ctrl);
64                 MLX5_ASSERT(!txq_ctrl->obj);
65                 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66                                             0, txq_ctrl->socket);
67                 if (!txq_ctrl->obj) {
68                         DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69                                 "memory resources.", dev->data->port_id,
70                                 txq_data->idx);
71                         rte_errno = ENOMEM;
72                         goto error;
73                 }
74                 ret = priv->obj_ops.txq_obj_new(dev, i);
75                 if (ret < 0) {
76                         mlx5_free(txq_ctrl->obj);
77                         txq_ctrl->obj = NULL;
78                         goto error;
79                 }
80                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
81                         size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
82
83                         txq_data->fcqs = mlx5_malloc(flags, size,
84                                                      RTE_CACHE_LINE_SIZE,
85                                                      txq_ctrl->socket);
86                         if (!txq_data->fcqs) {
87                                 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88                                         "allocate memory (FCQ).",
89                                         dev->data->port_id, i);
90                                 rte_errno = ENOMEM;
91                                 goto error;
92                         }
93                 }
94                 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95                         dev->data->port_id, i, (void *)&txq_ctrl->obj);
96                 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97         }
98         return 0;
99 error:
100         ret = rte_errno; /* Save rte_errno before cleanup. */
101         do {
102                 mlx5_txq_release(dev, i);
103         } while (i-- != 0);
104         rte_errno = ret; /* Restore rte_errno. */
105         return -rte_errno;
106 }
107
108 /**
109  * Translate the chunk address to MR key in order to put in into the cache.
110  */
111 static void
112 mlx5_rxq_mempool_register_cb(struct rte_mempool *mp, void *opaque,
113                              struct rte_mempool_memhdr *memhdr,
114                              unsigned int idx)
115 {
116         struct mlx5_rxq_data *rxq = opaque;
117
118         RTE_SET_USED(mp);
119         RTE_SET_USED(idx);
120         mlx5_rx_addr2mr(rxq, (uintptr_t)memhdr->addr);
121 }
122
123 /**
124  * Register Rx queue mempools and fill the Rx queue cache.
125  * This function tolerates repeated mempool registration.
126  *
127  * @param[in] rxq_ctrl
128  *   Rx queue control data.
129  *
130  * @return
131  *   0 on success, (-1) on failure and rte_errno is set.
132  */
133 static int
134 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
135 {
136         struct mlx5_priv *priv = rxq_ctrl->priv;
137         struct rte_mempool *mp;
138         uint32_t s;
139         int ret = 0;
140
141         mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
142         /* MPRQ mempool is registered on creation, just fill the cache. */
143         if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
144                 rte_mempool_mem_iter(rxq_ctrl->rxq.mprq_mp,
145                                      mlx5_rxq_mempool_register_cb,
146                                      &rxq_ctrl->rxq);
147                 return 0;
148         }
149         for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
150                 mp = rxq_ctrl->rxq.rxseg[s].mp;
151                 ret = mlx5_mr_mempool_register(&priv->sh->cdev->mr_scache,
152                                                priv->sh->cdev->pd, mp,
153                                                &priv->mp_id);
154                 if (ret < 0 && rte_errno != EEXIST)
155                         return ret;
156                 rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb,
157                                      &rxq_ctrl->rxq);
158         }
159         return 0;
160 }
161
162 /**
163  * Stop traffic on Rx queues.
164  *
165  * @param dev
166  *   Pointer to Ethernet device structure.
167  */
168 static void
169 mlx5_rxq_stop(struct rte_eth_dev *dev)
170 {
171         struct mlx5_priv *priv = dev->data->dev_private;
172         unsigned int i;
173
174         for (i = 0; i != priv->rxqs_n; ++i)
175                 mlx5_rxq_release(dev, i);
176 }
177
178 /**
179  * Start traffic on Rx queues.
180  *
181  * @param dev
182  *   Pointer to Ethernet device structure.
183  *
184  * @return
185  *   0 on success, a negative errno value otherwise and rte_errno is set.
186  */
187 static int
188 mlx5_rxq_start(struct rte_eth_dev *dev)
189 {
190         struct mlx5_priv *priv = dev->data->dev_private;
191         unsigned int i;
192         int ret = 0;
193
194         /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
195         if (mlx5_mprq_alloc_mp(dev)) {
196                 /* Should not release Rx queues but return immediately. */
197                 return -rte_errno;
198         }
199         DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
200                 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
201         DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
202                 dev->data->port_id, priv->sh->device_attr.max_sge);
203         for (i = 0; i != priv->rxqs_n; ++i) {
204                 struct mlx5_rxq_priv *rxq = mlx5_rxq_ref(dev, i);
205                 struct mlx5_rxq_ctrl *rxq_ctrl;
206
207                 if (rxq == NULL)
208                         continue;
209                 rxq_ctrl = rxq->ctrl;
210                 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
211                         /*
212                          * Pre-register the mempools. Regardless of whether
213                          * the implicit registration is enabled or not,
214                          * Rx mempool destruction is tracked to free MRs.
215                          */
216                         if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
217                                 goto error;
218                         ret = rxq_alloc_elts(rxq_ctrl);
219                         if (ret)
220                                 goto error;
221                 }
222                 MLX5_ASSERT(!rxq_ctrl->obj);
223                 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
224                                             sizeof(*rxq_ctrl->obj), 0,
225                                             rxq_ctrl->socket);
226                 if (!rxq_ctrl->obj) {
227                         DRV_LOG(ERR,
228                                 "Port %u Rx queue %u can't allocate resources.",
229                                 dev->data->port_id, (*priv->rxqs)[i]->idx);
230                         rte_errno = ENOMEM;
231                         goto error;
232                 }
233                 ret = priv->obj_ops.rxq_obj_new(dev, i);
234                 if (ret) {
235                         mlx5_free(rxq_ctrl->obj);
236                         rxq_ctrl->obj = NULL;
237                         goto error;
238                 }
239                 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
240                         dev->data->port_id, i, (void *)&rxq_ctrl->obj);
241                 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
242         }
243         return 0;
244 error:
245         ret = rte_errno; /* Save rte_errno before cleanup. */
246         do {
247                 mlx5_rxq_release(dev, i);
248         } while (i-- != 0);
249         rte_errno = ret; /* Restore rte_errno. */
250         return -rte_errno;
251 }
252
253 /**
254  * Binds Tx queues to Rx queues for hairpin.
255  *
256  * Binds Tx queues to the target Rx queues.
257  *
258  * @param dev
259  *   Pointer to Ethernet device structure.
260  *
261  * @return
262  *   0 on success, a negative errno value otherwise and rte_errno is set.
263  */
264 static int
265 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
266 {
267         struct mlx5_priv *priv = dev->data->dev_private;
268         struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
269         struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
270         struct mlx5_txq_ctrl *txq_ctrl;
271         struct mlx5_rxq_priv *rxq;
272         struct mlx5_rxq_ctrl *rxq_ctrl;
273         struct mlx5_devx_obj *sq;
274         struct mlx5_devx_obj *rq;
275         unsigned int i;
276         int ret = 0;
277         bool need_auto = false;
278         uint16_t self_port = dev->data->port_id;
279
280         for (i = 0; i != priv->txqs_n; ++i) {
281                 txq_ctrl = mlx5_txq_get(dev, i);
282                 if (!txq_ctrl)
283                         continue;
284                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
285                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
286                         mlx5_txq_release(dev, i);
287                         continue;
288                 }
289                 if (txq_ctrl->hairpin_conf.manual_bind) {
290                         mlx5_txq_release(dev, i);
291                         return 0;
292                 }
293                 need_auto = true;
294                 mlx5_txq_release(dev, i);
295         }
296         if (!need_auto)
297                 return 0;
298         for (i = 0; i != priv->txqs_n; ++i) {
299                 txq_ctrl = mlx5_txq_get(dev, i);
300                 if (!txq_ctrl)
301                         continue;
302                 /* Skip hairpin queues with other peer ports. */
303                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
304                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
305                         mlx5_txq_release(dev, i);
306                         continue;
307                 }
308                 if (!txq_ctrl->obj) {
309                         rte_errno = ENOMEM;
310                         DRV_LOG(ERR, "port %u no txq object found: %d",
311                                 dev->data->port_id, i);
312                         mlx5_txq_release(dev, i);
313                         return -rte_errno;
314                 }
315                 sq = txq_ctrl->obj->sq;
316                 rxq = mlx5_rxq_get(dev, txq_ctrl->hairpin_conf.peers[0].queue);
317                 if (rxq == NULL) {
318                         mlx5_txq_release(dev, i);
319                         rte_errno = EINVAL;
320                         DRV_LOG(ERR, "port %u no rxq object found: %d",
321                                 dev->data->port_id,
322                                 txq_ctrl->hairpin_conf.peers[0].queue);
323                         return -rte_errno;
324                 }
325                 rxq_ctrl = rxq->ctrl;
326                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
327                     rxq_ctrl->hairpin_conf.peers[0].queue != i) {
328                         rte_errno = ENOMEM;
329                         DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
330                                 "Rx queue %d", dev->data->port_id,
331                                 i, txq_ctrl->hairpin_conf.peers[0].queue);
332                         goto error;
333                 }
334                 rq = rxq_ctrl->obj->rq;
335                 if (!rq) {
336                         rte_errno = ENOMEM;
337                         DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
338                                 dev->data->port_id,
339                                 txq_ctrl->hairpin_conf.peers[0].queue);
340                         goto error;
341                 }
342                 sq_attr.state = MLX5_SQC_STATE_RDY;
343                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
344                 sq_attr.hairpin_peer_rq = rq->id;
345                 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
346                 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
347                 if (ret)
348                         goto error;
349                 rq_attr.state = MLX5_SQC_STATE_RDY;
350                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
351                 rq_attr.hairpin_peer_sq = sq->id;
352                 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
353                 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
354                 if (ret)
355                         goto error;
356                 /* Qs with auto-bind will be destroyed directly. */
357                 rxq_ctrl->hairpin_status = 1;
358                 txq_ctrl->hairpin_status = 1;
359                 mlx5_txq_release(dev, i);
360         }
361         return 0;
362 error:
363         mlx5_txq_release(dev, i);
364         return -rte_errno;
365 }
366
367 /*
368  * Fetch the peer queue's SW & HW information.
369  *
370  * @param dev
371  *   Pointer to Ethernet device structure.
372  * @param peer_queue
373  *   Index of the queue to fetch the information.
374  * @param current_info
375  *   Pointer to the input peer information, not used currently.
376  * @param peer_info
377  *   Pointer to the structure to store the information, output.
378  * @param direction
379  *   Positive to get the RxQ information, zero to get the TxQ information.
380  *
381  * @return
382  *   0 on success, a negative errno value otherwise and rte_errno is set.
383  */
384 int
385 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
386                                struct rte_hairpin_peer_info *current_info,
387                                struct rte_hairpin_peer_info *peer_info,
388                                uint32_t direction)
389 {
390         struct mlx5_priv *priv = dev->data->dev_private;
391         RTE_SET_USED(current_info);
392
393         if (dev->data->dev_started == 0) {
394                 rte_errno = EBUSY;
395                 DRV_LOG(ERR, "peer port %u is not started",
396                         dev->data->port_id);
397                 return -rte_errno;
398         }
399         /*
400          * Peer port used as egress. In the current design, hairpin Tx queue
401          * will be bound to the peer Rx queue. Indeed, only the information of
402          * peer Rx queue needs to be fetched.
403          */
404         if (direction == 0) {
405                 struct mlx5_txq_ctrl *txq_ctrl;
406
407                 txq_ctrl = mlx5_txq_get(dev, peer_queue);
408                 if (txq_ctrl == NULL) {
409                         rte_errno = EINVAL;
410                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
411                                 dev->data->port_id, peer_queue);
412                         return -rte_errno;
413                 }
414                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
415                         rte_errno = EINVAL;
416                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
417                                 dev->data->port_id, peer_queue);
418                         mlx5_txq_release(dev, peer_queue);
419                         return -rte_errno;
420                 }
421                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
422                         rte_errno = ENOMEM;
423                         DRV_LOG(ERR, "port %u no Txq object found: %d",
424                                 dev->data->port_id, peer_queue);
425                         mlx5_txq_release(dev, peer_queue);
426                         return -rte_errno;
427                 }
428                 peer_info->qp_id = txq_ctrl->obj->sq->id;
429                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
430                 /* 1-to-1 mapping, only the first one is used. */
431                 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
432                 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
433                 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
434                 mlx5_txq_release(dev, peer_queue);
435         } else { /* Peer port used as ingress. */
436                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, peer_queue);
437                 struct mlx5_rxq_ctrl *rxq_ctrl;
438
439                 if (rxq == NULL) {
440                         rte_errno = EINVAL;
441                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
442                                 dev->data->port_id, peer_queue);
443                         return -rte_errno;
444                 }
445                 rxq_ctrl = rxq->ctrl;
446                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
447                         rte_errno = EINVAL;
448                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
449                                 dev->data->port_id, peer_queue);
450                         return -rte_errno;
451                 }
452                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
453                         rte_errno = ENOMEM;
454                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
455                                 dev->data->port_id, peer_queue);
456                         return -rte_errno;
457                 }
458                 peer_info->qp_id = rxq_ctrl->obj->rq->id;
459                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
460                 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
461                 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
462                 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
463         }
464         return 0;
465 }
466
467 /*
468  * Bind the hairpin queue with the peer HW information.
469  * This needs to be called twice both for Tx and Rx queues of a pair.
470  * If the queue is already bound, it is considered successful.
471  *
472  * @param dev
473  *   Pointer to Ethernet device structure.
474  * @param cur_queue
475  *   Index of the queue to change the HW configuration to bind.
476  * @param peer_info
477  *   Pointer to information of the peer queue.
478  * @param direction
479  *   Positive to configure the TxQ, zero to configure the RxQ.
480  *
481  * @return
482  *   0 on success, a negative errno value otherwise and rte_errno is set.
483  */
484 int
485 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
486                              struct rte_hairpin_peer_info *peer_info,
487                              uint32_t direction)
488 {
489         int ret = 0;
490
491         /*
492          * Consistency checking of the peer queue: opposite direction is used
493          * to get the peer queue info with ethdev port ID, no need to check.
494          */
495         if (peer_info->peer_q != cur_queue) {
496                 rte_errno = EINVAL;
497                 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
498                         dev->data->port_id, cur_queue, peer_info->peer_q);
499                 return -rte_errno;
500         }
501         if (direction != 0) {
502                 struct mlx5_txq_ctrl *txq_ctrl;
503                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
504
505                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
506                 if (txq_ctrl == NULL) {
507                         rte_errno = EINVAL;
508                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
509                                 dev->data->port_id, cur_queue);
510                         return -rte_errno;
511                 }
512                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
513                         rte_errno = EINVAL;
514                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
515                                 dev->data->port_id, cur_queue);
516                         mlx5_txq_release(dev, cur_queue);
517                         return -rte_errno;
518                 }
519                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
520                         rte_errno = ENOMEM;
521                         DRV_LOG(ERR, "port %u no Txq object found: %d",
522                                 dev->data->port_id, cur_queue);
523                         mlx5_txq_release(dev, cur_queue);
524                         return -rte_errno;
525                 }
526                 if (txq_ctrl->hairpin_status != 0) {
527                         DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
528                                 dev->data->port_id, cur_queue);
529                         mlx5_txq_release(dev, cur_queue);
530                         return 0;
531                 }
532                 /*
533                  * All queues' of one port consistency checking is done in the
534                  * bind() function, and that is optional.
535                  */
536                 if (peer_info->tx_explicit !=
537                     txq_ctrl->hairpin_conf.tx_explicit) {
538                         rte_errno = EINVAL;
539                         DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
540                                 " mismatch", dev->data->port_id, cur_queue);
541                         mlx5_txq_release(dev, cur_queue);
542                         return -rte_errno;
543                 }
544                 if (peer_info->manual_bind !=
545                     txq_ctrl->hairpin_conf.manual_bind) {
546                         rte_errno = EINVAL;
547                         DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
548                                 " mismatch", dev->data->port_id, cur_queue);
549                         mlx5_txq_release(dev, cur_queue);
550                         return -rte_errno;
551                 }
552                 sq_attr.state = MLX5_SQC_STATE_RDY;
553                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
554                 sq_attr.hairpin_peer_rq = peer_info->qp_id;
555                 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
556                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
557                 if (ret == 0)
558                         txq_ctrl->hairpin_status = 1;
559                 mlx5_txq_release(dev, cur_queue);
560         } else {
561                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
562                 struct mlx5_rxq_ctrl *rxq_ctrl;
563                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
564
565                 if (rxq == NULL) {
566                         rte_errno = EINVAL;
567                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
568                                 dev->data->port_id, cur_queue);
569                         return -rte_errno;
570                 }
571                 rxq_ctrl = rxq->ctrl;
572                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
573                         rte_errno = EINVAL;
574                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
575                                 dev->data->port_id, cur_queue);
576                         return -rte_errno;
577                 }
578                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
579                         rte_errno = ENOMEM;
580                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
581                                 dev->data->port_id, cur_queue);
582                         return -rte_errno;
583                 }
584                 if (rxq_ctrl->hairpin_status != 0) {
585                         DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
586                                 dev->data->port_id, cur_queue);
587                         return 0;
588                 }
589                 if (peer_info->tx_explicit !=
590                     rxq_ctrl->hairpin_conf.tx_explicit) {
591                         rte_errno = EINVAL;
592                         DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
593                                 " mismatch", dev->data->port_id, cur_queue);
594                         return -rte_errno;
595                 }
596                 if (peer_info->manual_bind !=
597                     rxq_ctrl->hairpin_conf.manual_bind) {
598                         rte_errno = EINVAL;
599                         DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
600                                 " mismatch", dev->data->port_id, cur_queue);
601                         return -rte_errno;
602                 }
603                 rq_attr.state = MLX5_SQC_STATE_RDY;
604                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
605                 rq_attr.hairpin_peer_sq = peer_info->qp_id;
606                 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
607                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
608                 if (ret == 0)
609                         rxq_ctrl->hairpin_status = 1;
610         }
611         return ret;
612 }
613
614 /*
615  * Unbind the hairpin queue and reset its HW configuration.
616  * This needs to be called twice both for Tx and Rx queues of a pair.
617  * If the queue is already unbound, it is considered successful.
618  *
619  * @param dev
620  *   Pointer to Ethernet device structure.
621  * @param cur_queue
622  *   Index of the queue to change the HW configuration to unbind.
623  * @param direction
624  *   Positive to reset the TxQ, zero to reset the RxQ.
625  *
626  * @return
627  *   0 on success, a negative errno value otherwise and rte_errno is set.
628  */
629 int
630 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
631                                uint32_t direction)
632 {
633         int ret = 0;
634
635         if (direction != 0) {
636                 struct mlx5_txq_ctrl *txq_ctrl;
637                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
638
639                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
640                 if (txq_ctrl == NULL) {
641                         rte_errno = EINVAL;
642                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
643                                 dev->data->port_id, cur_queue);
644                         return -rte_errno;
645                 }
646                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
647                         rte_errno = EINVAL;
648                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
649                                 dev->data->port_id, cur_queue);
650                         mlx5_txq_release(dev, cur_queue);
651                         return -rte_errno;
652                 }
653                 /* Already unbound, return success before obj checking. */
654                 if (txq_ctrl->hairpin_status == 0) {
655                         DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
656                                 dev->data->port_id, cur_queue);
657                         mlx5_txq_release(dev, cur_queue);
658                         return 0;
659                 }
660                 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
661                         rte_errno = ENOMEM;
662                         DRV_LOG(ERR, "port %u no Txq object found: %d",
663                                 dev->data->port_id, cur_queue);
664                         mlx5_txq_release(dev, cur_queue);
665                         return -rte_errno;
666                 }
667                 sq_attr.state = MLX5_SQC_STATE_RST;
668                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
669                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
670                 if (ret == 0)
671                         txq_ctrl->hairpin_status = 0;
672                 mlx5_txq_release(dev, cur_queue);
673         } else {
674                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
675                 struct mlx5_rxq_ctrl *rxq_ctrl;
676                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
677
678                 if (rxq == NULL) {
679                         rte_errno = EINVAL;
680                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
681                                 dev->data->port_id, cur_queue);
682                         return -rte_errno;
683                 }
684                 rxq_ctrl = rxq->ctrl;
685                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
686                         rte_errno = EINVAL;
687                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
688                                 dev->data->port_id, cur_queue);
689                         return -rte_errno;
690                 }
691                 if (rxq_ctrl->hairpin_status == 0) {
692                         DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
693                                 dev->data->port_id, cur_queue);
694                         return 0;
695                 }
696                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
697                         rte_errno = ENOMEM;
698                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
699                                 dev->data->port_id, cur_queue);
700                         return -rte_errno;
701                 }
702                 rq_attr.state = MLX5_SQC_STATE_RST;
703                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
704                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
705                 if (ret == 0)
706                         rxq_ctrl->hairpin_status = 0;
707         }
708         return ret;
709 }
710
711 /*
712  * Bind the hairpin port pairs, from the Tx to the peer Rx.
713  * This function only supports to bind the Tx to one Rx.
714  *
715  * @param dev
716  *   Pointer to Ethernet device structure.
717  * @param rx_port
718  *   Port identifier of the Rx port.
719  *
720  * @return
721  *   0 on success, a negative errno value otherwise and rte_errno is set.
722  */
723 static int
724 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
725 {
726         struct mlx5_priv *priv = dev->data->dev_private;
727         int ret = 0;
728         struct mlx5_txq_ctrl *txq_ctrl;
729         uint32_t i;
730         struct rte_hairpin_peer_info peer = {0xffffff};
731         struct rte_hairpin_peer_info cur;
732         const struct rte_eth_hairpin_conf *conf;
733         uint16_t num_q = 0;
734         uint16_t local_port = priv->dev_data->port_id;
735         uint32_t manual;
736         uint32_t explicit;
737         uint16_t rx_queue;
738
739         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
740                 rte_errno = ENODEV;
741                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
742                 return -rte_errno;
743         }
744         /*
745          * Before binding TxQ to peer RxQ, first round loop will be used for
746          * checking the queues' configuration consistency. This would be a
747          * little time consuming but better than doing the rollback.
748          */
749         for (i = 0; i != priv->txqs_n; i++) {
750                 txq_ctrl = mlx5_txq_get(dev, i);
751                 if (txq_ctrl == NULL)
752                         continue;
753                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
754                         mlx5_txq_release(dev, i);
755                         continue;
756                 }
757                 /*
758                  * All hairpin Tx queues of a single port that connected to the
759                  * same peer Rx port should have the same "auto binding" and
760                  * "implicit Tx flow" modes.
761                  * Peer consistency checking will be done in per queue binding.
762                  */
763                 conf = &txq_ctrl->hairpin_conf;
764                 if (conf->peers[0].port == rx_port) {
765                         if (num_q == 0) {
766                                 manual = conf->manual_bind;
767                                 explicit = conf->tx_explicit;
768                         } else {
769                                 if (manual != conf->manual_bind ||
770                                     explicit != conf->tx_explicit) {
771                                         rte_errno = EINVAL;
772                                         DRV_LOG(ERR, "port %u queue %d mode"
773                                                 " mismatch: %u %u, %u %u",
774                                                 local_port, i, manual,
775                                                 conf->manual_bind, explicit,
776                                                 conf->tx_explicit);
777                                         mlx5_txq_release(dev, i);
778                                         return -rte_errno;
779                                 }
780                         }
781                         num_q++;
782                 }
783                 mlx5_txq_release(dev, i);
784         }
785         /* Once no queue is configured, success is returned directly. */
786         if (num_q == 0)
787                 return ret;
788         /* All the hairpin TX queues need to be traversed again. */
789         for (i = 0; i != priv->txqs_n; i++) {
790                 txq_ctrl = mlx5_txq_get(dev, i);
791                 if (txq_ctrl == NULL)
792                         continue;
793                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
794                         mlx5_txq_release(dev, i);
795                         continue;
796                 }
797                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
798                         mlx5_txq_release(dev, i);
799                         continue;
800                 }
801                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
802                 /*
803                  * Fetch peer RxQ's information.
804                  * No need to pass the information of the current queue.
805                  */
806                 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
807                                                         NULL, &peer, 1);
808                 if (ret != 0) {
809                         mlx5_txq_release(dev, i);
810                         goto error;
811                 }
812                 /* Accessing its own device, inside mlx5 PMD. */
813                 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
814                 if (ret != 0) {
815                         mlx5_txq_release(dev, i);
816                         goto error;
817                 }
818                 /* Pass TxQ's information to peer RxQ and try binding. */
819                 cur.peer_q = rx_queue;
820                 cur.qp_id = txq_ctrl->obj->sq->id;
821                 cur.vhca_id = priv->config.hca_attr.vhca_id;
822                 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
823                 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
824                 /*
825                  * In order to access another device in a proper way, RTE level
826                  * private function is needed.
827                  */
828                 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
829                                                       &cur, 0);
830                 if (ret != 0) {
831                         mlx5_txq_release(dev, i);
832                         goto error;
833                 }
834                 mlx5_txq_release(dev, i);
835         }
836         return 0;
837 error:
838         /*
839          * Do roll-back process for the queues already bound.
840          * No need to check the return value of the queue unbind function.
841          */
842         do {
843                 /* No validation is needed here. */
844                 txq_ctrl = mlx5_txq_get(dev, i);
845                 if (txq_ctrl == NULL)
846                         continue;
847                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
848                 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
849                 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
850                 mlx5_txq_release(dev, i);
851         } while (i--);
852         return ret;
853 }
854
855 /*
856  * Unbind the hairpin port pair, HW configuration of both devices will be clear
857  * and status will be reset for all the queues used between the them.
858  * This function only supports to unbind the Tx from one Rx.
859  *
860  * @param dev
861  *   Pointer to Ethernet device structure.
862  * @param rx_port
863  *   Port identifier of the Rx port.
864  *
865  * @return
866  *   0 on success, a negative errno value otherwise and rte_errno is set.
867  */
868 static int
869 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
870 {
871         struct mlx5_priv *priv = dev->data->dev_private;
872         struct mlx5_txq_ctrl *txq_ctrl;
873         uint32_t i;
874         int ret;
875         uint16_t cur_port = priv->dev_data->port_id;
876
877         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
878                 rte_errno = ENODEV;
879                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
880                 return -rte_errno;
881         }
882         for (i = 0; i != priv->txqs_n; i++) {
883                 uint16_t rx_queue;
884
885                 txq_ctrl = mlx5_txq_get(dev, i);
886                 if (txq_ctrl == NULL)
887                         continue;
888                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
889                         mlx5_txq_release(dev, i);
890                         continue;
891                 }
892                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
893                         mlx5_txq_release(dev, i);
894                         continue;
895                 }
896                 /* Indeed, only the first used queue needs to be checked. */
897                 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
898                         if (cur_port != rx_port) {
899                                 rte_errno = EINVAL;
900                                 DRV_LOG(ERR, "port %u and port %u are in"
901                                         " auto-bind mode", cur_port, rx_port);
902                                 mlx5_txq_release(dev, i);
903                                 return -rte_errno;
904                         } else {
905                                 return 0;
906                         }
907                 }
908                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
909                 mlx5_txq_release(dev, i);
910                 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
911                 if (ret) {
912                         DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
913                                 rx_port, rx_queue);
914                         return ret;
915                 }
916                 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
917                 if (ret) {
918                         DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
919                                 cur_port, i);
920                         return ret;
921                 }
922         }
923         return 0;
924 }
925
926 /*
927  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
928  * @see mlx5_hairpin_bind_single_port()
929  */
930 int
931 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
932 {
933         int ret = 0;
934         uint16_t p, pp;
935
936         /*
937          * If the Rx port has no hairpin configuration with the current port,
938          * the binding will be skipped in the called function of single port.
939          * Device started status will be checked only before the queue
940          * information updating.
941          */
942         if (rx_port == RTE_MAX_ETHPORTS) {
943                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
944                         ret = mlx5_hairpin_bind_single_port(dev, p);
945                         if (ret != 0)
946                                 goto unbind;
947                 }
948                 return ret;
949         } else {
950                 return mlx5_hairpin_bind_single_port(dev, rx_port);
951         }
952 unbind:
953         MLX5_ETH_FOREACH_DEV(pp, dev->device)
954                 if (pp < p)
955                         mlx5_hairpin_unbind_single_port(dev, pp);
956         return ret;
957 }
958
959 /*
960  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
961  * @see mlx5_hairpin_unbind_single_port()
962  */
963 int
964 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
965 {
966         int ret = 0;
967         uint16_t p;
968
969         if (rx_port == RTE_MAX_ETHPORTS)
970                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
971                         ret = mlx5_hairpin_unbind_single_port(dev, p);
972                         if (ret != 0)
973                                 return ret;
974                 }
975         else
976                 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
977         return ret;
978 }
979
980 /*
981  * DPDK callback to get the hairpin peer ports list.
982  * This will return the actual number of peer ports and save the identifiers
983  * into the array (sorted, may be different from that when setting up the
984  * hairpin peer queues).
985  * The peer port ID could be the same as the port ID of the current device.
986  *
987  * @param dev
988  *   Pointer to Ethernet device structure.
989  * @param peer_ports
990  *   Pointer to array to save the port identifiers.
991  * @param len
992  *   The length of the array.
993  * @param direction
994  *   Current port to peer port direction.
995  *   positive - current used as Tx to get all peer Rx ports.
996  *   zero - current used as Rx to get all peer Tx ports.
997  *
998  * @return
999  *   0 or positive value on success, actual number of peer ports.
1000  *   a negative errno value otherwise and rte_errno is set.
1001  */
1002 int
1003 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1004                             size_t len, uint32_t direction)
1005 {
1006         struct mlx5_priv *priv = dev->data->dev_private;
1007         struct mlx5_txq_ctrl *txq_ctrl;
1008         uint32_t i;
1009         uint16_t pp;
1010         uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1011         int ret = 0;
1012
1013         if (direction) {
1014                 for (i = 0; i < priv->txqs_n; i++) {
1015                         txq_ctrl = mlx5_txq_get(dev, i);
1016                         if (!txq_ctrl)
1017                                 continue;
1018                         if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
1019                                 mlx5_txq_release(dev, i);
1020                                 continue;
1021                         }
1022                         pp = txq_ctrl->hairpin_conf.peers[0].port;
1023                         if (pp >= RTE_MAX_ETHPORTS) {
1024                                 rte_errno = ERANGE;
1025                                 mlx5_txq_release(dev, i);
1026                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1027                                         "out of range %hu",
1028                                         priv->dev_data->port_id, i, pp);
1029                                 return -rte_errno;
1030                         }
1031                         bits[pp / 32] |= 1 << (pp % 32);
1032                         mlx5_txq_release(dev, i);
1033                 }
1034         } else {
1035                 for (i = 0; i < priv->rxqs_n; i++) {
1036                         struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1037                         struct mlx5_rxq_ctrl *rxq_ctrl;
1038
1039                         if (rxq == NULL)
1040                                 continue;
1041                         rxq_ctrl = rxq->ctrl;
1042                         if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN)
1043                                 continue;
1044                         pp = rxq_ctrl->hairpin_conf.peers[0].port;
1045                         if (pp >= RTE_MAX_ETHPORTS) {
1046                                 rte_errno = ERANGE;
1047                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1048                                         "out of range %hu",
1049                                         priv->dev_data->port_id, i, pp);
1050                                 return -rte_errno;
1051                         }
1052                         bits[pp / 32] |= 1 << (pp % 32);
1053                 }
1054         }
1055         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1056                 if (bits[i / 32] & (1 << (i % 32))) {
1057                         if ((size_t)ret >= len) {
1058                                 rte_errno = E2BIG;
1059                                 return -rte_errno;
1060                         }
1061                         peer_ports[ret++] = i;
1062                 }
1063         }
1064         return ret;
1065 }
1066
1067 /**
1068  * DPDK callback to start the device.
1069  *
1070  * Simulate device start by attaching all configured flows.
1071  *
1072  * @param dev
1073  *   Pointer to Ethernet device structure.
1074  *
1075  * @return
1076  *   0 on success, a negative errno value otherwise and rte_errno is set.
1077  */
1078 int
1079 mlx5_dev_start(struct rte_eth_dev *dev)
1080 {
1081         struct mlx5_priv *priv = dev->data->dev_private;
1082         int ret;
1083         int fine_inline;
1084
1085         DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1086         fine_inline = rte_mbuf_dynflag_lookup
1087                 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1088         if (fine_inline >= 0)
1089                 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1090         else
1091                 rte_net_mlx5_dynf_inline_mask = 0;
1092         if (dev->data->nb_rx_queues > 0) {
1093                 ret = mlx5_dev_configure_rss_reta(dev);
1094                 if (ret) {
1095                         DRV_LOG(ERR, "port %u reta config failed: %s",
1096                                 dev->data->port_id, strerror(rte_errno));
1097                         return -rte_errno;
1098                 }
1099         }
1100         ret = mlx5_txpp_start(dev);
1101         if (ret) {
1102                 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1103                         dev->data->port_id, strerror(rte_errno));
1104                 goto error;
1105         }
1106         if ((priv->sh->devx && priv->config.dv_flow_en &&
1107             priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1108                 ret = priv->obj_ops.lb_dummy_queue_create(dev);
1109                 if (ret)
1110                         goto error;
1111         }
1112         ret = mlx5_txq_start(dev);
1113         if (ret) {
1114                 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1115                         dev->data->port_id, strerror(rte_errno));
1116                 goto error;
1117         }
1118         ret = mlx5_rxq_start(dev);
1119         if (ret) {
1120                 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1121                         dev->data->port_id, strerror(rte_errno));
1122                 goto error;
1123         }
1124         /*
1125          * Such step will be skipped if there is no hairpin TX queue configured
1126          * with RX peer queue from the same device.
1127          */
1128         ret = mlx5_hairpin_auto_bind(dev);
1129         if (ret) {
1130                 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1131                         dev->data->port_id, strerror(rte_errno));
1132                 goto error;
1133         }
1134         /* Set started flag here for the following steps like control flow. */
1135         dev->data->dev_started = 1;
1136         ret = mlx5_rx_intr_vec_enable(dev);
1137         if (ret) {
1138                 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1139                         dev->data->port_id);
1140                 goto error;
1141         }
1142         mlx5_os_stats_init(dev);
1143         ret = mlx5_traffic_enable(dev);
1144         if (ret) {
1145                 DRV_LOG(ERR, "port %u failed to set defaults flows",
1146                         dev->data->port_id);
1147                 goto error;
1148         }
1149         /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1150         mlx5_flow_rxq_dynf_metadata_set(dev);
1151         /* Set flags and context to convert Rx timestamps. */
1152         mlx5_rxq_timestamp_set(dev);
1153         /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1154         mlx5_txq_dynf_timestamp_set(dev);
1155         /* Attach indirection table objects detached on port stop. */
1156         ret = mlx5_action_handle_attach(dev);
1157         if (ret) {
1158                 DRV_LOG(ERR,
1159                         "port %u failed to attach indirect actions: %s",
1160                         dev->data->port_id, rte_strerror(rte_errno));
1161                 goto error;
1162         }
1163         /*
1164          * In non-cached mode, it only needs to start the default mreg copy
1165          * action and no flow created by application exists anymore.
1166          * But it is worth wrapping the interface for further usage.
1167          */
1168         ret = mlx5_flow_start_default(dev);
1169         if (ret) {
1170                 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1171                         dev->data->port_id, strerror(rte_errno));
1172                 goto error;
1173         }
1174         if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1175                 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1176                         dev->data->port_id, rte_strerror(rte_errno));
1177                 goto error;
1178         }
1179         rte_wmb();
1180         dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1181         dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1182         /* Enable datapath on secondary process. */
1183         mlx5_mp_os_req_start_rxtx(dev);
1184         if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) {
1185                 priv->sh->port[priv->dev_port - 1].ih_port_id =
1186                                         (uint32_t)dev->data->port_id;
1187         } else {
1188                 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1189                         dev->data->port_id);
1190                 dev->data->dev_conf.intr_conf.lsc = 0;
1191                 dev->data->dev_conf.intr_conf.rmv = 0;
1192         }
1193         if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
1194                 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1195                                         (uint32_t)dev->data->port_id;
1196         return 0;
1197 error:
1198         ret = rte_errno; /* Save rte_errno before cleanup. */
1199         /* Rollback. */
1200         dev->data->dev_started = 0;
1201         mlx5_flow_stop_default(dev);
1202         mlx5_traffic_disable(dev);
1203         mlx5_txq_stop(dev);
1204         mlx5_rxq_stop(dev);
1205         if (priv->obj_ops.lb_dummy_queue_release)
1206                 priv->obj_ops.lb_dummy_queue_release(dev);
1207         mlx5_txpp_stop(dev); /* Stop last. */
1208         rte_errno = ret; /* Restore rte_errno. */
1209         return -rte_errno;
1210 }
1211
1212 /**
1213  * DPDK callback to stop the device.
1214  *
1215  * Simulate device stop by detaching all configured flows.
1216  *
1217  * @param dev
1218  *   Pointer to Ethernet device structure.
1219  */
1220 int
1221 mlx5_dev_stop(struct rte_eth_dev *dev)
1222 {
1223         struct mlx5_priv *priv = dev->data->dev_private;
1224
1225         dev->data->dev_started = 0;
1226         /* Prevent crashes when queues are still in use. */
1227         dev->rx_pkt_burst = removed_rx_burst;
1228         dev->tx_pkt_burst = removed_tx_burst;
1229         rte_wmb();
1230         /* Disable datapath on secondary process. */
1231         mlx5_mp_os_req_stop_rxtx(dev);
1232         rte_delay_us_sleep(1000 * priv->rxqs_n);
1233         DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1234         mlx5_flow_stop_default(dev);
1235         /* Control flows for default traffic can be removed firstly. */
1236         mlx5_traffic_disable(dev);
1237         /* All RX queue flags will be cleared in the flush interface. */
1238         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1239         mlx5_flow_meter_rxq_flush(dev);
1240         mlx5_action_handle_detach(dev);
1241         mlx5_rx_intr_vec_disable(dev);
1242         priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1243         priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1244         mlx5_txq_stop(dev);
1245         mlx5_rxq_stop(dev);
1246         if (priv->obj_ops.lb_dummy_queue_release)
1247                 priv->obj_ops.lb_dummy_queue_release(dev);
1248         mlx5_txpp_stop(dev);
1249
1250         return 0;
1251 }
1252
1253 /**
1254  * Enable traffic flows configured by control plane
1255  *
1256  * @param dev
1257  *   Pointer to Ethernet device private data.
1258  * @param dev
1259  *   Pointer to Ethernet device structure.
1260  *
1261  * @return
1262  *   0 on success, a negative errno value otherwise and rte_errno is set.
1263  */
1264 int
1265 mlx5_traffic_enable(struct rte_eth_dev *dev)
1266 {
1267         struct mlx5_priv *priv = dev->data->dev_private;
1268         struct rte_flow_item_eth bcast = {
1269                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1270         };
1271         struct rte_flow_item_eth ipv6_multi_spec = {
1272                 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1273         };
1274         struct rte_flow_item_eth ipv6_multi_mask = {
1275                 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1276         };
1277         struct rte_flow_item_eth unicast = {
1278                 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1279         };
1280         struct rte_flow_item_eth unicast_mask = {
1281                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1282         };
1283         const unsigned int vlan_filter_n = priv->vlan_filter_n;
1284         const struct rte_ether_addr cmp = {
1285                 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1286         };
1287         unsigned int i;
1288         unsigned int j;
1289         int ret;
1290
1291         /*
1292          * Hairpin txq default flow should be created no matter if it is
1293          * isolation mode. Or else all the packets to be sent will be sent
1294          * out directly without the TX flow actions, e.g. encapsulation.
1295          */
1296         for (i = 0; i != priv->txqs_n; ++i) {
1297                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1298                 if (!txq_ctrl)
1299                         continue;
1300                 /* Only Tx implicit mode requires the default Tx flow. */
1301                 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1302                     txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1303                     txq_ctrl->hairpin_conf.peers[0].port ==
1304                     priv->dev_data->port_id) {
1305                         ret = mlx5_ctrl_flow_source_queue(dev, i);
1306                         if (ret) {
1307                                 mlx5_txq_release(dev, i);
1308                                 goto error;
1309                         }
1310                 }
1311                 if ((priv->representor || priv->master) &&
1312                     priv->config.dv_esw_en) {
1313                         if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) {
1314                                 DRV_LOG(ERR,
1315                                         "Port %u Tx queue %u SQ create representor devx default miss rule failed.",
1316                                         dev->data->port_id, i);
1317                                 goto error;
1318                         }
1319                 }
1320                 mlx5_txq_release(dev, i);
1321         }
1322         if ((priv->master || priv->representor) && priv->config.dv_esw_en) {
1323                 if (mlx5_flow_create_esw_table_zero_flow(dev))
1324                         priv->fdb_def_rule = 1;
1325                 else
1326                         DRV_LOG(INFO, "port %u FDB default rule cannot be"
1327                                 " configured - only Eswitch group 0 flows are"
1328                                 " supported.", dev->data->port_id);
1329         }
1330         if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1331                 ret = mlx5_flow_lacp_miss(dev);
1332                 if (ret)
1333                         DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1334                                 "forward LACP to kernel.", dev->data->port_id);
1335                 else
1336                         DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1337                                 , dev->data->port_id);
1338         }
1339         if (priv->isolated)
1340                 return 0;
1341         if (dev->data->promiscuous) {
1342                 struct rte_flow_item_eth promisc = {
1343                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1344                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1345                         .type = 0,
1346                 };
1347
1348                 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1349                 if (ret)
1350                         goto error;
1351         }
1352         if (dev->data->all_multicast) {
1353                 struct rte_flow_item_eth multicast = {
1354                         .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1355                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1356                         .type = 0,
1357                 };
1358
1359                 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1360                 if (ret)
1361                         goto error;
1362         } else {
1363                 /* Add broadcast/multicast flows. */
1364                 for (i = 0; i != vlan_filter_n; ++i) {
1365                         uint16_t vlan = priv->vlan_filter[i];
1366
1367                         struct rte_flow_item_vlan vlan_spec = {
1368                                 .tci = rte_cpu_to_be_16(vlan),
1369                         };
1370                         struct rte_flow_item_vlan vlan_mask =
1371                                 rte_flow_item_vlan_mask;
1372
1373                         ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1374                                                   &vlan_spec, &vlan_mask);
1375                         if (ret)
1376                                 goto error;
1377                         ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1378                                                   &ipv6_multi_mask,
1379                                                   &vlan_spec, &vlan_mask);
1380                         if (ret)
1381                                 goto error;
1382                 }
1383                 if (!vlan_filter_n) {
1384                         ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1385                         if (ret)
1386                                 goto error;
1387                         ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1388                                              &ipv6_multi_mask);
1389                         if (ret) {
1390                                 /* Do not fail on IPv6 broadcast creation failure. */
1391                                 DRV_LOG(WARNING,
1392                                         "IPv6 broadcast is not supported");
1393                                 ret = 0;
1394                         }
1395                 }
1396         }
1397         /* Add MAC address flows. */
1398         for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1399                 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1400
1401                 if (!memcmp(mac, &cmp, sizeof(*mac)))
1402                         continue;
1403                 memcpy(&unicast.dst.addr_bytes,
1404                        mac->addr_bytes,
1405                        RTE_ETHER_ADDR_LEN);
1406                 for (j = 0; j != vlan_filter_n; ++j) {
1407                         uint16_t vlan = priv->vlan_filter[j];
1408
1409                         struct rte_flow_item_vlan vlan_spec = {
1410                                 .tci = rte_cpu_to_be_16(vlan),
1411                         };
1412                         struct rte_flow_item_vlan vlan_mask =
1413                                 rte_flow_item_vlan_mask;
1414
1415                         ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1416                                                   &unicast_mask,
1417                                                   &vlan_spec,
1418                                                   &vlan_mask);
1419                         if (ret)
1420                                 goto error;
1421                 }
1422                 if (!vlan_filter_n) {
1423                         ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1424                         if (ret)
1425                                 goto error;
1426                 }
1427         }
1428         return 0;
1429 error:
1430         ret = rte_errno; /* Save rte_errno before cleanup. */
1431         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1432         rte_errno = ret; /* Restore rte_errno. */
1433         return -rte_errno;
1434 }
1435
1436
1437 /**
1438  * Disable traffic flows configured by control plane
1439  *
1440  * @param dev
1441  *   Pointer to Ethernet device private data.
1442  */
1443 void
1444 mlx5_traffic_disable(struct rte_eth_dev *dev)
1445 {
1446         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1447 }
1448
1449 /**
1450  * Restart traffic flows configured by control plane
1451  *
1452  * @param dev
1453  *   Pointer to Ethernet device private data.
1454  *
1455  * @return
1456  *   0 on success, a negative errno value otherwise and rte_errno is set.
1457  */
1458 int
1459 mlx5_traffic_restart(struct rte_eth_dev *dev)
1460 {
1461         if (dev->data->dev_started) {
1462                 mlx5_traffic_disable(dev);
1463                 return mlx5_traffic_enable(dev);
1464         }
1465         return 0;
1466 }