remove repeated 'the' in the code
[dpdk.git] / drivers / net / mlx5 / mlx5_trigger.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5
6 #include <unistd.h>
7
8 #include <rte_ether.h>
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13
14 #include <mlx5_malloc.h>
15
16 #include "mlx5.h"
17 #include "mlx5_flow.h"
18 #include "mlx5_rx.h"
19 #include "mlx5_tx.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
22
23 /**
24  * Stop traffic on Tx queues.
25  *
26  * @param dev
27  *   Pointer to Ethernet device structure.
28  */
29 static void
30 mlx5_txq_stop(struct rte_eth_dev *dev)
31 {
32         struct mlx5_priv *priv = dev->data->dev_private;
33         unsigned int i;
34
35         for (i = 0; i != priv->txqs_n; ++i)
36                 mlx5_txq_release(dev, i);
37 }
38
39 /**
40  * Start traffic on Tx queues.
41  *
42  * @param dev
43  *   Pointer to Ethernet device structure.
44  *
45  * @return
46  *   0 on success, a negative errno value otherwise and rte_errno is set.
47  */
48 static int
49 mlx5_txq_start(struct rte_eth_dev *dev)
50 {
51         struct mlx5_priv *priv = dev->data->dev_private;
52         unsigned int i;
53         int ret;
54
55         for (i = 0; i != priv->txqs_n; ++i) {
56                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57                 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58                 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
59
60                 if (!txq_ctrl)
61                         continue;
62                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
63                         txq_alloc_elts(txq_ctrl);
64                 MLX5_ASSERT(!txq_ctrl->obj);
65                 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66                                             0, txq_ctrl->socket);
67                 if (!txq_ctrl->obj) {
68                         DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69                                 "memory resources.", dev->data->port_id,
70                                 txq_data->idx);
71                         rte_errno = ENOMEM;
72                         goto error;
73                 }
74                 ret = priv->obj_ops.txq_obj_new(dev, i);
75                 if (ret < 0) {
76                         mlx5_free(txq_ctrl->obj);
77                         txq_ctrl->obj = NULL;
78                         goto error;
79                 }
80                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
81                         size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
82
83                         txq_data->fcqs = mlx5_malloc(flags, size,
84                                                      RTE_CACHE_LINE_SIZE,
85                                                      txq_ctrl->socket);
86                         if (!txq_data->fcqs) {
87                                 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88                                         "allocate memory (FCQ).",
89                                         dev->data->port_id, i);
90                                 rte_errno = ENOMEM;
91                                 goto error;
92                         }
93                 }
94                 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95                         dev->data->port_id, i, (void *)&txq_ctrl->obj);
96                 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97         }
98         return 0;
99 error:
100         ret = rte_errno; /* Save rte_errno before cleanup. */
101         do {
102                 mlx5_txq_release(dev, i);
103         } while (i-- != 0);
104         rte_errno = ret; /* Restore rte_errno. */
105         return -rte_errno;
106 }
107
108 /**
109  * Register Rx queue mempools and fill the Rx queue cache.
110  * This function tolerates repeated mempool registration.
111  *
112  * @param[in] rxq_ctrl
113  *   Rx queue control data.
114  *
115  * @return
116  *   0 on success, (-1) on failure and rte_errno is set.
117  */
118 static int
119 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
120 {
121         struct rte_mempool *mp;
122         uint32_t s;
123         int ret = 0;
124
125         mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
126         /* MPRQ mempool is registered on creation, just fill the cache. */
127         if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
128                 return mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl,
129                                                       rxq_ctrl->rxq.mprq_mp);
130         for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
131                 bool is_extmem;
132
133                 mp = rxq_ctrl->rxq.rxseg[s].mp;
134                 is_extmem = (rte_pktmbuf_priv_flags(mp) &
135                              RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) != 0;
136                 ret = mlx5_mr_mempool_register(rxq_ctrl->sh->cdev, mp,
137                                                is_extmem);
138                 if (ret < 0 && rte_errno != EEXIST)
139                         return ret;
140                 ret = mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl,
141                                                      mp);
142                 if (ret < 0)
143                         return ret;
144         }
145         return 0;
146 }
147
148 /**
149  * Stop traffic on Rx queues.
150  *
151  * @param dev
152  *   Pointer to Ethernet device structure.
153  */
154 static void
155 mlx5_rxq_stop(struct rte_eth_dev *dev)
156 {
157         struct mlx5_priv *priv = dev->data->dev_private;
158         unsigned int i;
159
160         for (i = 0; i != priv->rxqs_n; ++i)
161                 mlx5_rxq_release(dev, i);
162 }
163
164 static int
165 mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
166                       unsigned int idx)
167 {
168         int ret = 0;
169
170         if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
171                 /*
172                  * Pre-register the mempools. Regardless of whether
173                  * the implicit registration is enabled or not,
174                  * Rx mempool destruction is tracked to free MRs.
175                  */
176                 if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
177                         return -rte_errno;
178                 ret = rxq_alloc_elts(rxq_ctrl);
179                 if (ret)
180                         return ret;
181         }
182         MLX5_ASSERT(!rxq_ctrl->obj);
183         rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
184                                     sizeof(*rxq_ctrl->obj), 0,
185                                     rxq_ctrl->socket);
186         if (!rxq_ctrl->obj) {
187                 DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.",
188                         dev->data->port_id, idx);
189                 rte_errno = ENOMEM;
190                 return -rte_errno;
191         }
192         DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", dev->data->port_id,
193                 idx, (void *)&rxq_ctrl->obj);
194         return 0;
195 }
196
197 /**
198  * Start traffic on Rx queues.
199  *
200  * @param dev
201  *   Pointer to Ethernet device structure.
202  *
203  * @return
204  *   0 on success, a negative errno value otherwise and rte_errno is set.
205  */
206 static int
207 mlx5_rxq_start(struct rte_eth_dev *dev)
208 {
209         struct mlx5_priv *priv = dev->data->dev_private;
210         unsigned int i;
211         int ret = 0;
212
213         /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
214         if (mlx5_mprq_alloc_mp(dev)) {
215                 /* Should not release Rx queues but return immediately. */
216                 return -rte_errno;
217         }
218         DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
219                 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
220         DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
221                 dev->data->port_id, priv->sh->device_attr.max_sge);
222         for (i = 0; i != priv->rxqs_n; ++i) {
223                 struct mlx5_rxq_priv *rxq = mlx5_rxq_ref(dev, i);
224                 struct mlx5_rxq_ctrl *rxq_ctrl;
225
226                 if (rxq == NULL)
227                         continue;
228                 rxq_ctrl = rxq->ctrl;
229                 if (!rxq_ctrl->started) {
230                         if (mlx5_rxq_ctrl_prepare(dev, rxq_ctrl, i) < 0)
231                                 goto error;
232                         LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
233                 }
234                 ret = priv->obj_ops.rxq_obj_new(rxq);
235                 if (ret) {
236                         mlx5_free(rxq_ctrl->obj);
237                         rxq_ctrl->obj = NULL;
238                         goto error;
239                 }
240                 rxq_ctrl->started = true;
241         }
242         return 0;
243 error:
244         ret = rte_errno; /* Save rte_errno before cleanup. */
245         do {
246                 mlx5_rxq_release(dev, i);
247         } while (i-- != 0);
248         rte_errno = ret; /* Restore rte_errno. */
249         return -rte_errno;
250 }
251
252 /**
253  * Binds Tx queues to Rx queues for hairpin.
254  *
255  * Binds Tx queues to the target Rx queues.
256  *
257  * @param dev
258  *   Pointer to Ethernet device structure.
259  *
260  * @return
261  *   0 on success, a negative errno value otherwise and rte_errno is set.
262  */
263 static int
264 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
265 {
266         struct mlx5_priv *priv = dev->data->dev_private;
267         struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
268         struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
269         struct mlx5_txq_ctrl *txq_ctrl;
270         struct mlx5_rxq_priv *rxq;
271         struct mlx5_rxq_ctrl *rxq_ctrl;
272         struct mlx5_devx_obj *sq;
273         struct mlx5_devx_obj *rq;
274         unsigned int i;
275         int ret = 0;
276         bool need_auto = false;
277         uint16_t self_port = dev->data->port_id;
278
279         for (i = 0; i != priv->txqs_n; ++i) {
280                 txq_ctrl = mlx5_txq_get(dev, i);
281                 if (!txq_ctrl)
282                         continue;
283                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
284                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
285                         mlx5_txq_release(dev, i);
286                         continue;
287                 }
288                 if (txq_ctrl->hairpin_conf.manual_bind) {
289                         mlx5_txq_release(dev, i);
290                         return 0;
291                 }
292                 need_auto = true;
293                 mlx5_txq_release(dev, i);
294         }
295         if (!need_auto)
296                 return 0;
297         for (i = 0; i != priv->txqs_n; ++i) {
298                 txq_ctrl = mlx5_txq_get(dev, i);
299                 if (!txq_ctrl)
300                         continue;
301                 /* Skip hairpin queues with other peer ports. */
302                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
303                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
304                         mlx5_txq_release(dev, i);
305                         continue;
306                 }
307                 if (!txq_ctrl->obj) {
308                         rte_errno = ENOMEM;
309                         DRV_LOG(ERR, "port %u no txq object found: %d",
310                                 dev->data->port_id, i);
311                         mlx5_txq_release(dev, i);
312                         return -rte_errno;
313                 }
314                 sq = txq_ctrl->obj->sq;
315                 rxq = mlx5_rxq_get(dev, txq_ctrl->hairpin_conf.peers[0].queue);
316                 if (rxq == NULL) {
317                         mlx5_txq_release(dev, i);
318                         rte_errno = EINVAL;
319                         DRV_LOG(ERR, "port %u no rxq object found: %d",
320                                 dev->data->port_id,
321                                 txq_ctrl->hairpin_conf.peers[0].queue);
322                         return -rte_errno;
323                 }
324                 rxq_ctrl = rxq->ctrl;
325                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
326                     rxq->hairpin_conf.peers[0].queue != i) {
327                         rte_errno = ENOMEM;
328                         DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
329                                 "Rx queue %d", dev->data->port_id,
330                                 i, txq_ctrl->hairpin_conf.peers[0].queue);
331                         goto error;
332                 }
333                 rq = rxq_ctrl->obj->rq;
334                 if (!rq) {
335                         rte_errno = ENOMEM;
336                         DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
337                                 dev->data->port_id,
338                                 txq_ctrl->hairpin_conf.peers[0].queue);
339                         goto error;
340                 }
341                 sq_attr.state = MLX5_SQC_STATE_RDY;
342                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
343                 sq_attr.hairpin_peer_rq = rq->id;
344                 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
345                 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
346                 if (ret)
347                         goto error;
348                 rq_attr.state = MLX5_SQC_STATE_RDY;
349                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
350                 rq_attr.hairpin_peer_sq = sq->id;
351                 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
352                 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
353                 if (ret)
354                         goto error;
355                 /* Qs with auto-bind will be destroyed directly. */
356                 rxq->hairpin_status = 1;
357                 txq_ctrl->hairpin_status = 1;
358                 mlx5_txq_release(dev, i);
359         }
360         return 0;
361 error:
362         mlx5_txq_release(dev, i);
363         return -rte_errno;
364 }
365
366 /*
367  * Fetch the peer queue's SW & HW information.
368  *
369  * @param dev
370  *   Pointer to Ethernet device structure.
371  * @param peer_queue
372  *   Index of the queue to fetch the information.
373  * @param current_info
374  *   Pointer to the input peer information, not used currently.
375  * @param peer_info
376  *   Pointer to the structure to store the information, output.
377  * @param direction
378  *   Positive to get the RxQ information, zero to get the TxQ information.
379  *
380  * @return
381  *   0 on success, a negative errno value otherwise and rte_errno is set.
382  */
383 int
384 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
385                                struct rte_hairpin_peer_info *current_info,
386                                struct rte_hairpin_peer_info *peer_info,
387                                uint32_t direction)
388 {
389         struct mlx5_priv *priv = dev->data->dev_private;
390         RTE_SET_USED(current_info);
391
392         if (dev->data->dev_started == 0) {
393                 rte_errno = EBUSY;
394                 DRV_LOG(ERR, "peer port %u is not started",
395                         dev->data->port_id);
396                 return -rte_errno;
397         }
398         /*
399          * Peer port used as egress. In the current design, hairpin Tx queue
400          * will be bound to the peer Rx queue. Indeed, only the information of
401          * peer Rx queue needs to be fetched.
402          */
403         if (direction == 0) {
404                 struct mlx5_txq_ctrl *txq_ctrl;
405
406                 txq_ctrl = mlx5_txq_get(dev, peer_queue);
407                 if (txq_ctrl == NULL) {
408                         rte_errno = EINVAL;
409                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
410                                 dev->data->port_id, peer_queue);
411                         return -rte_errno;
412                 }
413                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
414                         rte_errno = EINVAL;
415                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
416                                 dev->data->port_id, peer_queue);
417                         mlx5_txq_release(dev, peer_queue);
418                         return -rte_errno;
419                 }
420                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
421                         rte_errno = ENOMEM;
422                         DRV_LOG(ERR, "port %u no Txq object found: %d",
423                                 dev->data->port_id, peer_queue);
424                         mlx5_txq_release(dev, peer_queue);
425                         return -rte_errno;
426                 }
427                 peer_info->qp_id = txq_ctrl->obj->sq->id;
428                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
429                 /* 1-to-1 mapping, only the first one is used. */
430                 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
431                 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
432                 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
433                 mlx5_txq_release(dev, peer_queue);
434         } else { /* Peer port used as ingress. */
435                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, peer_queue);
436                 struct mlx5_rxq_ctrl *rxq_ctrl;
437
438                 if (rxq == NULL) {
439                         rte_errno = EINVAL;
440                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
441                                 dev->data->port_id, peer_queue);
442                         return -rte_errno;
443                 }
444                 rxq_ctrl = rxq->ctrl;
445                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
446                         rte_errno = EINVAL;
447                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
448                                 dev->data->port_id, peer_queue);
449                         return -rte_errno;
450                 }
451                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
452                         rte_errno = ENOMEM;
453                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
454                                 dev->data->port_id, peer_queue);
455                         return -rte_errno;
456                 }
457                 peer_info->qp_id = rxq_ctrl->obj->rq->id;
458                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
459                 peer_info->peer_q = rxq->hairpin_conf.peers[0].queue;
460                 peer_info->tx_explicit = rxq->hairpin_conf.tx_explicit;
461                 peer_info->manual_bind = rxq->hairpin_conf.manual_bind;
462         }
463         return 0;
464 }
465
466 /*
467  * Bind the hairpin queue with the peer HW information.
468  * This needs to be called twice both for Tx and Rx queues of a pair.
469  * If the queue is already bound, it is considered successful.
470  *
471  * @param dev
472  *   Pointer to Ethernet device structure.
473  * @param cur_queue
474  *   Index of the queue to change the HW configuration to bind.
475  * @param peer_info
476  *   Pointer to information of the peer queue.
477  * @param direction
478  *   Positive to configure the TxQ, zero to configure the RxQ.
479  *
480  * @return
481  *   0 on success, a negative errno value otherwise and rte_errno is set.
482  */
483 int
484 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
485                              struct rte_hairpin_peer_info *peer_info,
486                              uint32_t direction)
487 {
488         int ret = 0;
489
490         /*
491          * Consistency checking of the peer queue: opposite direction is used
492          * to get the peer queue info with ethdev port ID, no need to check.
493          */
494         if (peer_info->peer_q != cur_queue) {
495                 rte_errno = EINVAL;
496                 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
497                         dev->data->port_id, cur_queue, peer_info->peer_q);
498                 return -rte_errno;
499         }
500         if (direction != 0) {
501                 struct mlx5_txq_ctrl *txq_ctrl;
502                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
503
504                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
505                 if (txq_ctrl == NULL) {
506                         rte_errno = EINVAL;
507                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
508                                 dev->data->port_id, cur_queue);
509                         return -rte_errno;
510                 }
511                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
512                         rte_errno = EINVAL;
513                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
514                                 dev->data->port_id, cur_queue);
515                         mlx5_txq_release(dev, cur_queue);
516                         return -rte_errno;
517                 }
518                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
519                         rte_errno = ENOMEM;
520                         DRV_LOG(ERR, "port %u no Txq object found: %d",
521                                 dev->data->port_id, cur_queue);
522                         mlx5_txq_release(dev, cur_queue);
523                         return -rte_errno;
524                 }
525                 if (txq_ctrl->hairpin_status != 0) {
526                         DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
527                                 dev->data->port_id, cur_queue);
528                         mlx5_txq_release(dev, cur_queue);
529                         return 0;
530                 }
531                 /*
532                  * All queues' of one port consistency checking is done in the
533                  * bind() function, and that is optional.
534                  */
535                 if (peer_info->tx_explicit !=
536                     txq_ctrl->hairpin_conf.tx_explicit) {
537                         rte_errno = EINVAL;
538                         DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
539                                 " mismatch", dev->data->port_id, cur_queue);
540                         mlx5_txq_release(dev, cur_queue);
541                         return -rte_errno;
542                 }
543                 if (peer_info->manual_bind !=
544                     txq_ctrl->hairpin_conf.manual_bind) {
545                         rte_errno = EINVAL;
546                         DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
547                                 " mismatch", dev->data->port_id, cur_queue);
548                         mlx5_txq_release(dev, cur_queue);
549                         return -rte_errno;
550                 }
551                 sq_attr.state = MLX5_SQC_STATE_RDY;
552                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
553                 sq_attr.hairpin_peer_rq = peer_info->qp_id;
554                 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
555                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
556                 if (ret == 0)
557                         txq_ctrl->hairpin_status = 1;
558                 mlx5_txq_release(dev, cur_queue);
559         } else {
560                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
561                 struct mlx5_rxq_ctrl *rxq_ctrl;
562                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
563
564                 if (rxq == NULL) {
565                         rte_errno = EINVAL;
566                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
567                                 dev->data->port_id, cur_queue);
568                         return -rte_errno;
569                 }
570                 rxq_ctrl = rxq->ctrl;
571                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
572                         rte_errno = EINVAL;
573                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
574                                 dev->data->port_id, cur_queue);
575                         return -rte_errno;
576                 }
577                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
578                         rte_errno = ENOMEM;
579                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
580                                 dev->data->port_id, cur_queue);
581                         return -rte_errno;
582                 }
583                 if (rxq->hairpin_status != 0) {
584                         DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
585                                 dev->data->port_id, cur_queue);
586                         return 0;
587                 }
588                 if (peer_info->tx_explicit !=
589                     rxq->hairpin_conf.tx_explicit) {
590                         rte_errno = EINVAL;
591                         DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
592                                 " mismatch", dev->data->port_id, cur_queue);
593                         return -rte_errno;
594                 }
595                 if (peer_info->manual_bind !=
596                     rxq->hairpin_conf.manual_bind) {
597                         rte_errno = EINVAL;
598                         DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
599                                 " mismatch", dev->data->port_id, cur_queue);
600                         return -rte_errno;
601                 }
602                 rq_attr.state = MLX5_SQC_STATE_RDY;
603                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
604                 rq_attr.hairpin_peer_sq = peer_info->qp_id;
605                 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
606                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
607                 if (ret == 0)
608                         rxq->hairpin_status = 1;
609         }
610         return ret;
611 }
612
613 /*
614  * Unbind the hairpin queue and reset its HW configuration.
615  * This needs to be called twice both for Tx and Rx queues of a pair.
616  * If the queue is already unbound, it is considered successful.
617  *
618  * @param dev
619  *   Pointer to Ethernet device structure.
620  * @param cur_queue
621  *   Index of the queue to change the HW configuration to unbind.
622  * @param direction
623  *   Positive to reset the TxQ, zero to reset the RxQ.
624  *
625  * @return
626  *   0 on success, a negative errno value otherwise and rte_errno is set.
627  */
628 int
629 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
630                                uint32_t direction)
631 {
632         int ret = 0;
633
634         if (direction != 0) {
635                 struct mlx5_txq_ctrl *txq_ctrl;
636                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
637
638                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
639                 if (txq_ctrl == NULL) {
640                         rte_errno = EINVAL;
641                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
642                                 dev->data->port_id, cur_queue);
643                         return -rte_errno;
644                 }
645                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
646                         rte_errno = EINVAL;
647                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
648                                 dev->data->port_id, cur_queue);
649                         mlx5_txq_release(dev, cur_queue);
650                         return -rte_errno;
651                 }
652                 /* Already unbound, return success before obj checking. */
653                 if (txq_ctrl->hairpin_status == 0) {
654                         DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
655                                 dev->data->port_id, cur_queue);
656                         mlx5_txq_release(dev, cur_queue);
657                         return 0;
658                 }
659                 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
660                         rte_errno = ENOMEM;
661                         DRV_LOG(ERR, "port %u no Txq object found: %d",
662                                 dev->data->port_id, cur_queue);
663                         mlx5_txq_release(dev, cur_queue);
664                         return -rte_errno;
665                 }
666                 sq_attr.state = MLX5_SQC_STATE_RST;
667                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
668                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
669                 if (ret == 0)
670                         txq_ctrl->hairpin_status = 0;
671                 mlx5_txq_release(dev, cur_queue);
672         } else {
673                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
674                 struct mlx5_rxq_ctrl *rxq_ctrl;
675                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
676
677                 if (rxq == NULL) {
678                         rte_errno = EINVAL;
679                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
680                                 dev->data->port_id, cur_queue);
681                         return -rte_errno;
682                 }
683                 rxq_ctrl = rxq->ctrl;
684                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
685                         rte_errno = EINVAL;
686                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
687                                 dev->data->port_id, cur_queue);
688                         return -rte_errno;
689                 }
690                 if (rxq->hairpin_status == 0) {
691                         DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
692                                 dev->data->port_id, cur_queue);
693                         return 0;
694                 }
695                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
696                         rte_errno = ENOMEM;
697                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
698                                 dev->data->port_id, cur_queue);
699                         return -rte_errno;
700                 }
701                 rq_attr.state = MLX5_SQC_STATE_RST;
702                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
703                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
704                 if (ret == 0)
705                         rxq->hairpin_status = 0;
706         }
707         return ret;
708 }
709
710 /*
711  * Bind the hairpin port pairs, from the Tx to the peer Rx.
712  * This function only supports to bind the Tx to one Rx.
713  *
714  * @param dev
715  *   Pointer to Ethernet device structure.
716  * @param rx_port
717  *   Port identifier of the Rx port.
718  *
719  * @return
720  *   0 on success, a negative errno value otherwise and rte_errno is set.
721  */
722 static int
723 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
724 {
725         struct mlx5_priv *priv = dev->data->dev_private;
726         int ret = 0;
727         struct mlx5_txq_ctrl *txq_ctrl;
728         uint32_t i;
729         struct rte_hairpin_peer_info peer = {0xffffff};
730         struct rte_hairpin_peer_info cur;
731         const struct rte_eth_hairpin_conf *conf;
732         uint16_t num_q = 0;
733         uint16_t local_port = priv->dev_data->port_id;
734         uint32_t manual;
735         uint32_t explicit;
736         uint16_t rx_queue;
737
738         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
739                 rte_errno = ENODEV;
740                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
741                 return -rte_errno;
742         }
743         /*
744          * Before binding TxQ to peer RxQ, first round loop will be used for
745          * checking the queues' configuration consistency. This would be a
746          * little time consuming but better than doing the rollback.
747          */
748         for (i = 0; i != priv->txqs_n; i++) {
749                 txq_ctrl = mlx5_txq_get(dev, i);
750                 if (txq_ctrl == NULL)
751                         continue;
752                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
753                         mlx5_txq_release(dev, i);
754                         continue;
755                 }
756                 /*
757                  * All hairpin Tx queues of a single port that connected to the
758                  * same peer Rx port should have the same "auto binding" and
759                  * "implicit Tx flow" modes.
760                  * Peer consistency checking will be done in per queue binding.
761                  */
762                 conf = &txq_ctrl->hairpin_conf;
763                 if (conf->peers[0].port == rx_port) {
764                         if (num_q == 0) {
765                                 manual = conf->manual_bind;
766                                 explicit = conf->tx_explicit;
767                         } else {
768                                 if (manual != conf->manual_bind ||
769                                     explicit != conf->tx_explicit) {
770                                         rte_errno = EINVAL;
771                                         DRV_LOG(ERR, "port %u queue %d mode"
772                                                 " mismatch: %u %u, %u %u",
773                                                 local_port, i, manual,
774                                                 conf->manual_bind, explicit,
775                                                 conf->tx_explicit);
776                                         mlx5_txq_release(dev, i);
777                                         return -rte_errno;
778                                 }
779                         }
780                         num_q++;
781                 }
782                 mlx5_txq_release(dev, i);
783         }
784         /* Once no queue is configured, success is returned directly. */
785         if (num_q == 0)
786                 return ret;
787         /* All the hairpin TX queues need to be traversed again. */
788         for (i = 0; i != priv->txqs_n; i++) {
789                 txq_ctrl = mlx5_txq_get(dev, i);
790                 if (txq_ctrl == NULL)
791                         continue;
792                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
793                         mlx5_txq_release(dev, i);
794                         continue;
795                 }
796                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
797                         mlx5_txq_release(dev, i);
798                         continue;
799                 }
800                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
801                 /*
802                  * Fetch peer RxQ's information.
803                  * No need to pass the information of the current queue.
804                  */
805                 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
806                                                         NULL, &peer, 1);
807                 if (ret != 0) {
808                         mlx5_txq_release(dev, i);
809                         goto error;
810                 }
811                 /* Accessing its own device, inside mlx5 PMD. */
812                 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
813                 if (ret != 0) {
814                         mlx5_txq_release(dev, i);
815                         goto error;
816                 }
817                 /* Pass TxQ's information to peer RxQ and try binding. */
818                 cur.peer_q = rx_queue;
819                 cur.qp_id = txq_ctrl->obj->sq->id;
820                 cur.vhca_id = priv->config.hca_attr.vhca_id;
821                 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
822                 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
823                 /*
824                  * In order to access another device in a proper way, RTE level
825                  * private function is needed.
826                  */
827                 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
828                                                       &cur, 0);
829                 if (ret != 0) {
830                         mlx5_txq_release(dev, i);
831                         goto error;
832                 }
833                 mlx5_txq_release(dev, i);
834         }
835         return 0;
836 error:
837         /*
838          * Do roll-back process for the queues already bound.
839          * No need to check the return value of the queue unbind function.
840          */
841         do {
842                 /* No validation is needed here. */
843                 txq_ctrl = mlx5_txq_get(dev, i);
844                 if (txq_ctrl == NULL)
845                         continue;
846                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
847                 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
848                 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
849                 mlx5_txq_release(dev, i);
850         } while (i--);
851         return ret;
852 }
853
854 /*
855  * Unbind the hairpin port pair, HW configuration of both devices will be clear
856  * and status will be reset for all the queues used between them.
857  * This function only supports to unbind the Tx from one Rx.
858  *
859  * @param dev
860  *   Pointer to Ethernet device structure.
861  * @param rx_port
862  *   Port identifier of the Rx port.
863  *
864  * @return
865  *   0 on success, a negative errno value otherwise and rte_errno is set.
866  */
867 static int
868 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
869 {
870         struct mlx5_priv *priv = dev->data->dev_private;
871         struct mlx5_txq_ctrl *txq_ctrl;
872         uint32_t i;
873         int ret;
874         uint16_t cur_port = priv->dev_data->port_id;
875
876         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
877                 rte_errno = ENODEV;
878                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
879                 return -rte_errno;
880         }
881         for (i = 0; i != priv->txqs_n; i++) {
882                 uint16_t rx_queue;
883
884                 txq_ctrl = mlx5_txq_get(dev, i);
885                 if (txq_ctrl == NULL)
886                         continue;
887                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
888                         mlx5_txq_release(dev, i);
889                         continue;
890                 }
891                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
892                         mlx5_txq_release(dev, i);
893                         continue;
894                 }
895                 /* Indeed, only the first used queue needs to be checked. */
896                 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
897                         if (cur_port != rx_port) {
898                                 rte_errno = EINVAL;
899                                 DRV_LOG(ERR, "port %u and port %u are in"
900                                         " auto-bind mode", cur_port, rx_port);
901                                 mlx5_txq_release(dev, i);
902                                 return -rte_errno;
903                         } else {
904                                 return 0;
905                         }
906                 }
907                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
908                 mlx5_txq_release(dev, i);
909                 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
910                 if (ret) {
911                         DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
912                                 rx_port, rx_queue);
913                         return ret;
914                 }
915                 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
916                 if (ret) {
917                         DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
918                                 cur_port, i);
919                         return ret;
920                 }
921         }
922         return 0;
923 }
924
925 /*
926  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
927  * @see mlx5_hairpin_bind_single_port()
928  */
929 int
930 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
931 {
932         int ret = 0;
933         uint16_t p, pp;
934
935         /*
936          * If the Rx port has no hairpin configuration with the current port,
937          * the binding will be skipped in the called function of single port.
938          * Device started status will be checked only before the queue
939          * information updating.
940          */
941         if (rx_port == RTE_MAX_ETHPORTS) {
942                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
943                         ret = mlx5_hairpin_bind_single_port(dev, p);
944                         if (ret != 0)
945                                 goto unbind;
946                 }
947                 return ret;
948         } else {
949                 return mlx5_hairpin_bind_single_port(dev, rx_port);
950         }
951 unbind:
952         MLX5_ETH_FOREACH_DEV(pp, dev->device)
953                 if (pp < p)
954                         mlx5_hairpin_unbind_single_port(dev, pp);
955         return ret;
956 }
957
958 /*
959  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
960  * @see mlx5_hairpin_unbind_single_port()
961  */
962 int
963 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
964 {
965         int ret = 0;
966         uint16_t p;
967
968         if (rx_port == RTE_MAX_ETHPORTS)
969                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
970                         ret = mlx5_hairpin_unbind_single_port(dev, p);
971                         if (ret != 0)
972                                 return ret;
973                 }
974         else
975                 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
976         return ret;
977 }
978
979 /*
980  * DPDK callback to get the hairpin peer ports list.
981  * This will return the actual number of peer ports and save the identifiers
982  * into the array (sorted, may be different from that when setting up the
983  * hairpin peer queues).
984  * The peer port ID could be the same as the port ID of the current device.
985  *
986  * @param dev
987  *   Pointer to Ethernet device structure.
988  * @param peer_ports
989  *   Pointer to array to save the port identifiers.
990  * @param len
991  *   The length of the array.
992  * @param direction
993  *   Current port to peer port direction.
994  *   positive - current used as Tx to get all peer Rx ports.
995  *   zero - current used as Rx to get all peer Tx ports.
996  *
997  * @return
998  *   0 or positive value on success, actual number of peer ports.
999  *   a negative errno value otherwise and rte_errno is set.
1000  */
1001 int
1002 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1003                             size_t len, uint32_t direction)
1004 {
1005         struct mlx5_priv *priv = dev->data->dev_private;
1006         struct mlx5_txq_ctrl *txq_ctrl;
1007         uint32_t i;
1008         uint16_t pp;
1009         uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1010         int ret = 0;
1011
1012         if (direction) {
1013                 for (i = 0; i < priv->txqs_n; i++) {
1014                         txq_ctrl = mlx5_txq_get(dev, i);
1015                         if (!txq_ctrl)
1016                                 continue;
1017                         if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
1018                                 mlx5_txq_release(dev, i);
1019                                 continue;
1020                         }
1021                         pp = txq_ctrl->hairpin_conf.peers[0].port;
1022                         if (pp >= RTE_MAX_ETHPORTS) {
1023                                 rte_errno = ERANGE;
1024                                 mlx5_txq_release(dev, i);
1025                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1026                                         "out of range %hu",
1027                                         priv->dev_data->port_id, i, pp);
1028                                 return -rte_errno;
1029                         }
1030                         bits[pp / 32] |= 1 << (pp % 32);
1031                         mlx5_txq_release(dev, i);
1032                 }
1033         } else {
1034                 for (i = 0; i < priv->rxqs_n; i++) {
1035                         struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1036                         struct mlx5_rxq_ctrl *rxq_ctrl;
1037
1038                         if (rxq == NULL)
1039                                 continue;
1040                         rxq_ctrl = rxq->ctrl;
1041                         if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN)
1042                                 continue;
1043                         pp = rxq->hairpin_conf.peers[0].port;
1044                         if (pp >= RTE_MAX_ETHPORTS) {
1045                                 rte_errno = ERANGE;
1046                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1047                                         "out of range %hu",
1048                                         priv->dev_data->port_id, i, pp);
1049                                 return -rte_errno;
1050                         }
1051                         bits[pp / 32] |= 1 << (pp % 32);
1052                 }
1053         }
1054         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1055                 if (bits[i / 32] & (1 << (i % 32))) {
1056                         if ((size_t)ret >= len) {
1057                                 rte_errno = E2BIG;
1058                                 return -rte_errno;
1059                         }
1060                         peer_ports[ret++] = i;
1061                 }
1062         }
1063         return ret;
1064 }
1065
1066 /**
1067  * DPDK callback to start the device.
1068  *
1069  * Simulate device start by attaching all configured flows.
1070  *
1071  * @param dev
1072  *   Pointer to Ethernet device structure.
1073  *
1074  * @return
1075  *   0 on success, a negative errno value otherwise and rte_errno is set.
1076  */
1077 int
1078 mlx5_dev_start(struct rte_eth_dev *dev)
1079 {
1080         struct mlx5_priv *priv = dev->data->dev_private;
1081         int ret;
1082         int fine_inline;
1083
1084         DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1085         fine_inline = rte_mbuf_dynflag_lookup
1086                 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1087         if (fine_inline >= 0)
1088                 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1089         else
1090                 rte_net_mlx5_dynf_inline_mask = 0;
1091         if (dev->data->nb_rx_queues > 0) {
1092                 ret = mlx5_dev_configure_rss_reta(dev);
1093                 if (ret) {
1094                         DRV_LOG(ERR, "port %u reta config failed: %s",
1095                                 dev->data->port_id, strerror(rte_errno));
1096                         return -rte_errno;
1097                 }
1098         }
1099         ret = mlx5_txpp_start(dev);
1100         if (ret) {
1101                 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1102                         dev->data->port_id, strerror(rte_errno));
1103                 goto error;
1104         }
1105         if ((priv->sh->devx && priv->config.dv_flow_en &&
1106             priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1107                 ret = priv->obj_ops.lb_dummy_queue_create(dev);
1108                 if (ret)
1109                         goto error;
1110         }
1111         ret = mlx5_txq_start(dev);
1112         if (ret) {
1113                 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1114                         dev->data->port_id, strerror(rte_errno));
1115                 goto error;
1116         }
1117         if (priv->config.std_delay_drop || priv->config.hp_delay_drop) {
1118                 if (!priv->config.vf && !priv->config.sf &&
1119                     !priv->representor) {
1120                         ret = mlx5_get_flag_dropless_rq(dev);
1121                         if (ret < 0)
1122                                 DRV_LOG(WARNING,
1123                                         "port %u cannot query dropless flag",
1124                                         dev->data->port_id);
1125                         else if (!ret)
1126                                 DRV_LOG(WARNING,
1127                                         "port %u dropless_rq OFF, no rearming",
1128                                         dev->data->port_id);
1129                 } else {
1130                         DRV_LOG(DEBUG,
1131                                 "port %u doesn't support dropless_rq flag",
1132                                 dev->data->port_id);
1133                 }
1134         }
1135         ret = mlx5_rxq_start(dev);
1136         if (ret) {
1137                 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1138                         dev->data->port_id, strerror(rte_errno));
1139                 goto error;
1140         }
1141         /*
1142          * Such step will be skipped if there is no hairpin TX queue configured
1143          * with RX peer queue from the same device.
1144          */
1145         ret = mlx5_hairpin_auto_bind(dev);
1146         if (ret) {
1147                 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1148                         dev->data->port_id, strerror(rte_errno));
1149                 goto error;
1150         }
1151         /* Set started flag here for the following steps like control flow. */
1152         dev->data->dev_started = 1;
1153         ret = mlx5_rx_intr_vec_enable(dev);
1154         if (ret) {
1155                 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1156                         dev->data->port_id);
1157                 goto error;
1158         }
1159         mlx5_os_stats_init(dev);
1160         /*
1161          * Attach indirection table objects detached on port stop.
1162          * They may be needed to create RSS in non-isolated mode.
1163          */
1164         ret = mlx5_action_handle_attach(dev);
1165         if (ret) {
1166                 DRV_LOG(ERR,
1167                         "port %u failed to attach indirect actions: %s",
1168                         dev->data->port_id, rte_strerror(rte_errno));
1169                 goto error;
1170         }
1171         ret = mlx5_traffic_enable(dev);
1172         if (ret) {
1173                 DRV_LOG(ERR, "port %u failed to set defaults flows",
1174                         dev->data->port_id);
1175                 goto error;
1176         }
1177         /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1178         mlx5_flow_rxq_dynf_metadata_set(dev);
1179         /* Set flags and context to convert Rx timestamps. */
1180         mlx5_rxq_timestamp_set(dev);
1181         /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1182         mlx5_txq_dynf_timestamp_set(dev);
1183         /*
1184          * In non-cached mode, it only needs to start the default mreg copy
1185          * action and no flow created by application exists anymore.
1186          * But it is worth wrapping the interface for further usage.
1187          */
1188         ret = mlx5_flow_start_default(dev);
1189         if (ret) {
1190                 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1191                         dev->data->port_id, strerror(rte_errno));
1192                 goto error;
1193         }
1194         if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1195                 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1196                         dev->data->port_id, rte_strerror(rte_errno));
1197                 goto error;
1198         }
1199         rte_wmb();
1200         dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1201         dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1202         /* Enable datapath on secondary process. */
1203         mlx5_mp_os_req_start_rxtx(dev);
1204         if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) {
1205                 priv->sh->port[priv->dev_port - 1].ih_port_id =
1206                                         (uint32_t)dev->data->port_id;
1207         } else {
1208                 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1209                         dev->data->port_id);
1210                 dev->data->dev_conf.intr_conf.lsc = 0;
1211                 dev->data->dev_conf.intr_conf.rmv = 0;
1212         }
1213         if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
1214                 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1215                                         (uint32_t)dev->data->port_id;
1216         return 0;
1217 error:
1218         ret = rte_errno; /* Save rte_errno before cleanup. */
1219         /* Rollback. */
1220         dev->data->dev_started = 0;
1221         mlx5_flow_stop_default(dev);
1222         mlx5_traffic_disable(dev);
1223         mlx5_txq_stop(dev);
1224         mlx5_rxq_stop(dev);
1225         if (priv->obj_ops.lb_dummy_queue_release)
1226                 priv->obj_ops.lb_dummy_queue_release(dev);
1227         mlx5_txpp_stop(dev); /* Stop last. */
1228         rte_errno = ret; /* Restore rte_errno. */
1229         return -rte_errno;
1230 }
1231
1232 /**
1233  * DPDK callback to stop the device.
1234  *
1235  * Simulate device stop by detaching all configured flows.
1236  *
1237  * @param dev
1238  *   Pointer to Ethernet device structure.
1239  */
1240 int
1241 mlx5_dev_stop(struct rte_eth_dev *dev)
1242 {
1243         struct mlx5_priv *priv = dev->data->dev_private;
1244
1245         dev->data->dev_started = 0;
1246         /* Prevent crashes when queues are still in use. */
1247         dev->rx_pkt_burst = removed_rx_burst;
1248         dev->tx_pkt_burst = removed_tx_burst;
1249         rte_wmb();
1250         /* Disable datapath on secondary process. */
1251         mlx5_mp_os_req_stop_rxtx(dev);
1252         rte_delay_us_sleep(1000 * priv->rxqs_n);
1253         DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1254         mlx5_flow_stop_default(dev);
1255         /* Control flows for default traffic can be removed firstly. */
1256         mlx5_traffic_disable(dev);
1257         /* All RX queue flags will be cleared in the flush interface. */
1258         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1259         mlx5_flow_meter_rxq_flush(dev);
1260         mlx5_action_handle_detach(dev);
1261         mlx5_rx_intr_vec_disable(dev);
1262         priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1263         priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1264         mlx5_txq_stop(dev);
1265         mlx5_rxq_stop(dev);
1266         if (priv->obj_ops.lb_dummy_queue_release)
1267                 priv->obj_ops.lb_dummy_queue_release(dev);
1268         mlx5_txpp_stop(dev);
1269
1270         return 0;
1271 }
1272
1273 /**
1274  * Enable traffic flows configured by control plane
1275  *
1276  * @param dev
1277  *   Pointer to Ethernet device private data.
1278  * @param dev
1279  *   Pointer to Ethernet device structure.
1280  *
1281  * @return
1282  *   0 on success, a negative errno value otherwise and rte_errno is set.
1283  */
1284 int
1285 mlx5_traffic_enable(struct rte_eth_dev *dev)
1286 {
1287         struct mlx5_priv *priv = dev->data->dev_private;
1288         struct rte_flow_item_eth bcast = {
1289                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1290         };
1291         struct rte_flow_item_eth ipv6_multi_spec = {
1292                 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1293         };
1294         struct rte_flow_item_eth ipv6_multi_mask = {
1295                 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1296         };
1297         struct rte_flow_item_eth unicast = {
1298                 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1299         };
1300         struct rte_flow_item_eth unicast_mask = {
1301                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1302         };
1303         const unsigned int vlan_filter_n = priv->vlan_filter_n;
1304         const struct rte_ether_addr cmp = {
1305                 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1306         };
1307         unsigned int i;
1308         unsigned int j;
1309         int ret;
1310
1311         /*
1312          * Hairpin txq default flow should be created no matter if it is
1313          * isolation mode. Or else all the packets to be sent will be sent
1314          * out directly without the TX flow actions, e.g. encapsulation.
1315          */
1316         for (i = 0; i != priv->txqs_n; ++i) {
1317                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1318                 if (!txq_ctrl)
1319                         continue;
1320                 /* Only Tx implicit mode requires the default Tx flow. */
1321                 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1322                     txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1323                     txq_ctrl->hairpin_conf.peers[0].port ==
1324                     priv->dev_data->port_id) {
1325                         ret = mlx5_ctrl_flow_source_queue(dev, i);
1326                         if (ret) {
1327                                 mlx5_txq_release(dev, i);
1328                                 goto error;
1329                         }
1330                 }
1331                 if ((priv->representor || priv->master) &&
1332                     priv->config.dv_esw_en) {
1333                         if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) {
1334                                 DRV_LOG(ERR,
1335                                         "Port %u Tx queue %u SQ create representor devx default miss rule failed.",
1336                                         dev->data->port_id, i);
1337                                 goto error;
1338                         }
1339                 }
1340                 mlx5_txq_release(dev, i);
1341         }
1342         if ((priv->master || priv->representor) && priv->config.dv_esw_en) {
1343                 if (mlx5_flow_create_esw_table_zero_flow(dev))
1344                         priv->fdb_def_rule = 1;
1345                 else
1346                         DRV_LOG(INFO, "port %u FDB default rule cannot be"
1347                                 " configured - only Eswitch group 0 flows are"
1348                                 " supported.", dev->data->port_id);
1349         }
1350         if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1351                 ret = mlx5_flow_lacp_miss(dev);
1352                 if (ret)
1353                         DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1354                                 "forward LACP to kernel.", dev->data->port_id);
1355                 else
1356                         DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1357                                 , dev->data->port_id);
1358         }
1359         if (priv->isolated)
1360                 return 0;
1361         if (dev->data->promiscuous) {
1362                 struct rte_flow_item_eth promisc = {
1363                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1364                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1365                         .type = 0,
1366                 };
1367
1368                 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1369                 if (ret)
1370                         goto error;
1371         }
1372         if (dev->data->all_multicast) {
1373                 struct rte_flow_item_eth multicast = {
1374                         .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1375                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1376                         .type = 0,
1377                 };
1378
1379                 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1380                 if (ret)
1381                         goto error;
1382         } else {
1383                 /* Add broadcast/multicast flows. */
1384                 for (i = 0; i != vlan_filter_n; ++i) {
1385                         uint16_t vlan = priv->vlan_filter[i];
1386
1387                         struct rte_flow_item_vlan vlan_spec = {
1388                                 .tci = rte_cpu_to_be_16(vlan),
1389                         };
1390                         struct rte_flow_item_vlan vlan_mask =
1391                                 rte_flow_item_vlan_mask;
1392
1393                         ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1394                                                   &vlan_spec, &vlan_mask);
1395                         if (ret)
1396                                 goto error;
1397                         ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1398                                                   &ipv6_multi_mask,
1399                                                   &vlan_spec, &vlan_mask);
1400                         if (ret)
1401                                 goto error;
1402                 }
1403                 if (!vlan_filter_n) {
1404                         ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1405                         if (ret)
1406                                 goto error;
1407                         ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1408                                              &ipv6_multi_mask);
1409                         if (ret) {
1410                                 /* Do not fail on IPv6 broadcast creation failure. */
1411                                 DRV_LOG(WARNING,
1412                                         "IPv6 broadcast is not supported");
1413                                 ret = 0;
1414                         }
1415                 }
1416         }
1417         /* Add MAC address flows. */
1418         for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1419                 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1420
1421                 if (!memcmp(mac, &cmp, sizeof(*mac)))
1422                         continue;
1423                 memcpy(&unicast.dst.addr_bytes,
1424                        mac->addr_bytes,
1425                        RTE_ETHER_ADDR_LEN);
1426                 for (j = 0; j != vlan_filter_n; ++j) {
1427                         uint16_t vlan = priv->vlan_filter[j];
1428
1429                         struct rte_flow_item_vlan vlan_spec = {
1430                                 .tci = rte_cpu_to_be_16(vlan),
1431                         };
1432                         struct rte_flow_item_vlan vlan_mask =
1433                                 rte_flow_item_vlan_mask;
1434
1435                         ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1436                                                   &unicast_mask,
1437                                                   &vlan_spec,
1438                                                   &vlan_mask);
1439                         if (ret)
1440                                 goto error;
1441                 }
1442                 if (!vlan_filter_n) {
1443                         ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1444                         if (ret)
1445                                 goto error;
1446                 }
1447         }
1448         return 0;
1449 error:
1450         ret = rte_errno; /* Save rte_errno before cleanup. */
1451         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1452         rte_errno = ret; /* Restore rte_errno. */
1453         return -rte_errno;
1454 }
1455
1456
1457 /**
1458  * Disable traffic flows configured by control plane
1459  *
1460  * @param dev
1461  *   Pointer to Ethernet device private data.
1462  */
1463 void
1464 mlx5_traffic_disable(struct rte_eth_dev *dev)
1465 {
1466         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1467 }
1468
1469 /**
1470  * Restart traffic flows configured by control plane
1471  *
1472  * @param dev
1473  *   Pointer to Ethernet device private data.
1474  *
1475  * @return
1476  *   0 on success, a negative errno value otherwise and rte_errno is set.
1477  */
1478 int
1479 mlx5_traffic_restart(struct rte_eth_dev *dev)
1480 {
1481         if (dev->data->dev_started) {
1482                 mlx5_traffic_disable(dev);
1483                 return mlx5_traffic_enable(dev);
1484         }
1485         return 0;
1486 }