net/mlx5: remove HCA attribute structure duplication
[dpdk.git] / drivers / net / mlx5 / mlx5_trigger.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5
6 #include <unistd.h>
7
8 #include <rte_ether.h>
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13
14 #include <mlx5_malloc.h>
15
16 #include "mlx5.h"
17 #include "mlx5_flow.h"
18 #include "mlx5_rx.h"
19 #include "mlx5_tx.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
22
23 /**
24  * Stop traffic on Tx queues.
25  *
26  * @param dev
27  *   Pointer to Ethernet device structure.
28  */
29 static void
30 mlx5_txq_stop(struct rte_eth_dev *dev)
31 {
32         struct mlx5_priv *priv = dev->data->dev_private;
33         unsigned int i;
34
35         for (i = 0; i != priv->txqs_n; ++i)
36                 mlx5_txq_release(dev, i);
37 }
38
39 /**
40  * Start traffic on Tx queues.
41  *
42  * @param dev
43  *   Pointer to Ethernet device structure.
44  *
45  * @return
46  *   0 on success, a negative errno value otherwise and rte_errno is set.
47  */
48 static int
49 mlx5_txq_start(struct rte_eth_dev *dev)
50 {
51         struct mlx5_priv *priv = dev->data->dev_private;
52         unsigned int i;
53         int ret;
54
55         for (i = 0; i != priv->txqs_n; ++i) {
56                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57                 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58                 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
59
60                 if (!txq_ctrl)
61                         continue;
62                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
63                         txq_alloc_elts(txq_ctrl);
64                 MLX5_ASSERT(!txq_ctrl->obj);
65                 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66                                             0, txq_ctrl->socket);
67                 if (!txq_ctrl->obj) {
68                         DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69                                 "memory resources.", dev->data->port_id,
70                                 txq_data->idx);
71                         rte_errno = ENOMEM;
72                         goto error;
73                 }
74                 ret = priv->obj_ops.txq_obj_new(dev, i);
75                 if (ret < 0) {
76                         mlx5_free(txq_ctrl->obj);
77                         txq_ctrl->obj = NULL;
78                         goto error;
79                 }
80                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
81                         size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
82
83                         txq_data->fcqs = mlx5_malloc(flags, size,
84                                                      RTE_CACHE_LINE_SIZE,
85                                                      txq_ctrl->socket);
86                         if (!txq_data->fcqs) {
87                                 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88                                         "allocate memory (FCQ).",
89                                         dev->data->port_id, i);
90                                 rte_errno = ENOMEM;
91                                 goto error;
92                         }
93                 }
94                 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95                         dev->data->port_id, i, (void *)&txq_ctrl->obj);
96                 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97         }
98         return 0;
99 error:
100         ret = rte_errno; /* Save rte_errno before cleanup. */
101         do {
102                 mlx5_txq_release(dev, i);
103         } while (i-- != 0);
104         rte_errno = ret; /* Restore rte_errno. */
105         return -rte_errno;
106 }
107
108 /**
109  * Register Rx queue mempools and fill the Rx queue cache.
110  * This function tolerates repeated mempool registration.
111  *
112  * @param[in] rxq_ctrl
113  *   Rx queue control data.
114  *
115  * @return
116  *   0 on success, (-1) on failure and rte_errno is set.
117  */
118 static int
119 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
120 {
121         struct rte_mempool *mp;
122         uint32_t s;
123         int ret = 0;
124
125         mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
126         /* MPRQ mempool is registered on creation, just fill the cache. */
127         if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
128                 return mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl,
129                                                       rxq_ctrl->rxq.mprq_mp);
130         for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
131                 bool is_extmem;
132
133                 mp = rxq_ctrl->rxq.rxseg[s].mp;
134                 is_extmem = (rte_pktmbuf_priv_flags(mp) &
135                              RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) != 0;
136                 ret = mlx5_mr_mempool_register(rxq_ctrl->sh->cdev, mp,
137                                                is_extmem);
138                 if (ret < 0 && rte_errno != EEXIST)
139                         return ret;
140                 ret = mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl,
141                                                      mp);
142                 if (ret < 0)
143                         return ret;
144         }
145         return 0;
146 }
147
148 /**
149  * Stop traffic on Rx queues.
150  *
151  * @param dev
152  *   Pointer to Ethernet device structure.
153  */
154 static void
155 mlx5_rxq_stop(struct rte_eth_dev *dev)
156 {
157         struct mlx5_priv *priv = dev->data->dev_private;
158         unsigned int i;
159
160         for (i = 0; i != priv->rxqs_n; ++i)
161                 mlx5_rxq_release(dev, i);
162 }
163
164 static int
165 mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
166                       unsigned int idx)
167 {
168         int ret = 0;
169
170         if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
171                 /*
172                  * Pre-register the mempools. Regardless of whether
173                  * the implicit registration is enabled or not,
174                  * Rx mempool destruction is tracked to free MRs.
175                  */
176                 if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
177                         return -rte_errno;
178                 ret = rxq_alloc_elts(rxq_ctrl);
179                 if (ret)
180                         return ret;
181         }
182         MLX5_ASSERT(!rxq_ctrl->obj);
183         rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
184                                     sizeof(*rxq_ctrl->obj), 0,
185                                     rxq_ctrl->socket);
186         if (!rxq_ctrl->obj) {
187                 DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.",
188                         dev->data->port_id, idx);
189                 rte_errno = ENOMEM;
190                 return -rte_errno;
191         }
192         DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", dev->data->port_id,
193                 idx, (void *)&rxq_ctrl->obj);
194         return 0;
195 }
196
197 /**
198  * Start traffic on Rx queues.
199  *
200  * @param dev
201  *   Pointer to Ethernet device structure.
202  *
203  * @return
204  *   0 on success, a negative errno value otherwise and rte_errno is set.
205  */
206 static int
207 mlx5_rxq_start(struct rte_eth_dev *dev)
208 {
209         struct mlx5_priv *priv = dev->data->dev_private;
210         unsigned int i;
211         int ret = 0;
212
213         /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
214         if (mlx5_mprq_alloc_mp(dev)) {
215                 /* Should not release Rx queues but return immediately. */
216                 return -rte_errno;
217         }
218         DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
219                 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
220         DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
221                 dev->data->port_id, priv->sh->device_attr.max_sge);
222         for (i = 0; i != priv->rxqs_n; ++i) {
223                 struct mlx5_rxq_priv *rxq = mlx5_rxq_ref(dev, i);
224                 struct mlx5_rxq_ctrl *rxq_ctrl;
225
226                 if (rxq == NULL)
227                         continue;
228                 rxq_ctrl = rxq->ctrl;
229                 if (!rxq_ctrl->started) {
230                         if (mlx5_rxq_ctrl_prepare(dev, rxq_ctrl, i) < 0)
231                                 goto error;
232                         LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
233                 }
234                 ret = priv->obj_ops.rxq_obj_new(rxq);
235                 if (ret) {
236                         mlx5_free(rxq_ctrl->obj);
237                         rxq_ctrl->obj = NULL;
238                         goto error;
239                 }
240                 rxq_ctrl->started = true;
241         }
242         return 0;
243 error:
244         ret = rte_errno; /* Save rte_errno before cleanup. */
245         do {
246                 mlx5_rxq_release(dev, i);
247         } while (i-- != 0);
248         rte_errno = ret; /* Restore rte_errno. */
249         return -rte_errno;
250 }
251
252 /**
253  * Binds Tx queues to Rx queues for hairpin.
254  *
255  * Binds Tx queues to the target Rx queues.
256  *
257  * @param dev
258  *   Pointer to Ethernet device structure.
259  *
260  * @return
261  *   0 on success, a negative errno value otherwise and rte_errno is set.
262  */
263 static int
264 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
265 {
266         struct mlx5_priv *priv = dev->data->dev_private;
267         struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
268         struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
269         struct mlx5_txq_ctrl *txq_ctrl;
270         struct mlx5_rxq_priv *rxq;
271         struct mlx5_rxq_ctrl *rxq_ctrl;
272         struct mlx5_devx_obj *sq;
273         struct mlx5_devx_obj *rq;
274         unsigned int i;
275         int ret = 0;
276         bool need_auto = false;
277         uint16_t self_port = dev->data->port_id;
278
279         for (i = 0; i != priv->txqs_n; ++i) {
280                 txq_ctrl = mlx5_txq_get(dev, i);
281                 if (!txq_ctrl)
282                         continue;
283                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
284                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
285                         mlx5_txq_release(dev, i);
286                         continue;
287                 }
288                 if (txq_ctrl->hairpin_conf.manual_bind) {
289                         mlx5_txq_release(dev, i);
290                         return 0;
291                 }
292                 need_auto = true;
293                 mlx5_txq_release(dev, i);
294         }
295         if (!need_auto)
296                 return 0;
297         for (i = 0; i != priv->txqs_n; ++i) {
298                 txq_ctrl = mlx5_txq_get(dev, i);
299                 if (!txq_ctrl)
300                         continue;
301                 /* Skip hairpin queues with other peer ports. */
302                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
303                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
304                         mlx5_txq_release(dev, i);
305                         continue;
306                 }
307                 if (!txq_ctrl->obj) {
308                         rte_errno = ENOMEM;
309                         DRV_LOG(ERR, "port %u no txq object found: %d",
310                                 dev->data->port_id, i);
311                         mlx5_txq_release(dev, i);
312                         return -rte_errno;
313                 }
314                 sq = txq_ctrl->obj->sq;
315                 rxq = mlx5_rxq_get(dev, txq_ctrl->hairpin_conf.peers[0].queue);
316                 if (rxq == NULL) {
317                         mlx5_txq_release(dev, i);
318                         rte_errno = EINVAL;
319                         DRV_LOG(ERR, "port %u no rxq object found: %d",
320                                 dev->data->port_id,
321                                 txq_ctrl->hairpin_conf.peers[0].queue);
322                         return -rte_errno;
323                 }
324                 rxq_ctrl = rxq->ctrl;
325                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
326                     rxq->hairpin_conf.peers[0].queue != i) {
327                         rte_errno = ENOMEM;
328                         DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
329                                 "Rx queue %d", dev->data->port_id,
330                                 i, txq_ctrl->hairpin_conf.peers[0].queue);
331                         goto error;
332                 }
333                 rq = rxq_ctrl->obj->rq;
334                 if (!rq) {
335                         rte_errno = ENOMEM;
336                         DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
337                                 dev->data->port_id,
338                                 txq_ctrl->hairpin_conf.peers[0].queue);
339                         goto error;
340                 }
341                 sq_attr.state = MLX5_SQC_STATE_RDY;
342                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
343                 sq_attr.hairpin_peer_rq = rq->id;
344                 sq_attr.hairpin_peer_vhca =
345                                 priv->sh->cdev->config.hca_attr.vhca_id;
346                 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
347                 if (ret)
348                         goto error;
349                 rq_attr.state = MLX5_SQC_STATE_RDY;
350                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
351                 rq_attr.hairpin_peer_sq = sq->id;
352                 rq_attr.hairpin_peer_vhca =
353                                 priv->sh->cdev->config.hca_attr.vhca_id;
354                 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
355                 if (ret)
356                         goto error;
357                 /* Qs with auto-bind will be destroyed directly. */
358                 rxq->hairpin_status = 1;
359                 txq_ctrl->hairpin_status = 1;
360                 mlx5_txq_release(dev, i);
361         }
362         return 0;
363 error:
364         mlx5_txq_release(dev, i);
365         return -rte_errno;
366 }
367
368 /*
369  * Fetch the peer queue's SW & HW information.
370  *
371  * @param dev
372  *   Pointer to Ethernet device structure.
373  * @param peer_queue
374  *   Index of the queue to fetch the information.
375  * @param current_info
376  *   Pointer to the input peer information, not used currently.
377  * @param peer_info
378  *   Pointer to the structure to store the information, output.
379  * @param direction
380  *   Positive to get the RxQ information, zero to get the TxQ information.
381  *
382  * @return
383  *   0 on success, a negative errno value otherwise and rte_errno is set.
384  */
385 int
386 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
387                                struct rte_hairpin_peer_info *current_info,
388                                struct rte_hairpin_peer_info *peer_info,
389                                uint32_t direction)
390 {
391         struct mlx5_priv *priv = dev->data->dev_private;
392         RTE_SET_USED(current_info);
393
394         if (dev->data->dev_started == 0) {
395                 rte_errno = EBUSY;
396                 DRV_LOG(ERR, "peer port %u is not started",
397                         dev->data->port_id);
398                 return -rte_errno;
399         }
400         /*
401          * Peer port used as egress. In the current design, hairpin Tx queue
402          * will be bound to the peer Rx queue. Indeed, only the information of
403          * peer Rx queue needs to be fetched.
404          */
405         if (direction == 0) {
406                 struct mlx5_txq_ctrl *txq_ctrl;
407
408                 txq_ctrl = mlx5_txq_get(dev, peer_queue);
409                 if (txq_ctrl == NULL) {
410                         rte_errno = EINVAL;
411                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
412                                 dev->data->port_id, peer_queue);
413                         return -rte_errno;
414                 }
415                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
416                         rte_errno = EINVAL;
417                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
418                                 dev->data->port_id, peer_queue);
419                         mlx5_txq_release(dev, peer_queue);
420                         return -rte_errno;
421                 }
422                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
423                         rte_errno = ENOMEM;
424                         DRV_LOG(ERR, "port %u no Txq object found: %d",
425                                 dev->data->port_id, peer_queue);
426                         mlx5_txq_release(dev, peer_queue);
427                         return -rte_errno;
428                 }
429                 peer_info->qp_id = txq_ctrl->obj->sq->id;
430                 peer_info->vhca_id = priv->sh->cdev->config.hca_attr.vhca_id;
431                 /* 1-to-1 mapping, only the first one is used. */
432                 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
433                 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
434                 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
435                 mlx5_txq_release(dev, peer_queue);
436         } else { /* Peer port used as ingress. */
437                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, peer_queue);
438                 struct mlx5_rxq_ctrl *rxq_ctrl;
439
440                 if (rxq == NULL) {
441                         rte_errno = EINVAL;
442                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
443                                 dev->data->port_id, peer_queue);
444                         return -rte_errno;
445                 }
446                 rxq_ctrl = rxq->ctrl;
447                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
448                         rte_errno = EINVAL;
449                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
450                                 dev->data->port_id, peer_queue);
451                         return -rte_errno;
452                 }
453                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
454                         rte_errno = ENOMEM;
455                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
456                                 dev->data->port_id, peer_queue);
457                         return -rte_errno;
458                 }
459                 peer_info->qp_id = rxq_ctrl->obj->rq->id;
460                 peer_info->vhca_id = priv->sh->cdev->config.hca_attr.vhca_id;
461                 peer_info->peer_q = rxq->hairpin_conf.peers[0].queue;
462                 peer_info->tx_explicit = rxq->hairpin_conf.tx_explicit;
463                 peer_info->manual_bind = rxq->hairpin_conf.manual_bind;
464         }
465         return 0;
466 }
467
468 /*
469  * Bind the hairpin queue with the peer HW information.
470  * This needs to be called twice both for Tx and Rx queues of a pair.
471  * If the queue is already bound, it is considered successful.
472  *
473  * @param dev
474  *   Pointer to Ethernet device structure.
475  * @param cur_queue
476  *   Index of the queue to change the HW configuration to bind.
477  * @param peer_info
478  *   Pointer to information of the peer queue.
479  * @param direction
480  *   Positive to configure the TxQ, zero to configure the RxQ.
481  *
482  * @return
483  *   0 on success, a negative errno value otherwise and rte_errno is set.
484  */
485 int
486 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
487                              struct rte_hairpin_peer_info *peer_info,
488                              uint32_t direction)
489 {
490         int ret = 0;
491
492         /*
493          * Consistency checking of the peer queue: opposite direction is used
494          * to get the peer queue info with ethdev port ID, no need to check.
495          */
496         if (peer_info->peer_q != cur_queue) {
497                 rte_errno = EINVAL;
498                 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
499                         dev->data->port_id, cur_queue, peer_info->peer_q);
500                 return -rte_errno;
501         }
502         if (direction != 0) {
503                 struct mlx5_txq_ctrl *txq_ctrl;
504                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
505
506                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
507                 if (txq_ctrl == NULL) {
508                         rte_errno = EINVAL;
509                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
510                                 dev->data->port_id, cur_queue);
511                         return -rte_errno;
512                 }
513                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
514                         rte_errno = EINVAL;
515                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
516                                 dev->data->port_id, cur_queue);
517                         mlx5_txq_release(dev, cur_queue);
518                         return -rte_errno;
519                 }
520                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
521                         rte_errno = ENOMEM;
522                         DRV_LOG(ERR, "port %u no Txq object found: %d",
523                                 dev->data->port_id, cur_queue);
524                         mlx5_txq_release(dev, cur_queue);
525                         return -rte_errno;
526                 }
527                 if (txq_ctrl->hairpin_status != 0) {
528                         DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
529                                 dev->data->port_id, cur_queue);
530                         mlx5_txq_release(dev, cur_queue);
531                         return 0;
532                 }
533                 /*
534                  * All queues' of one port consistency checking is done in the
535                  * bind() function, and that is optional.
536                  */
537                 if (peer_info->tx_explicit !=
538                     txq_ctrl->hairpin_conf.tx_explicit) {
539                         rte_errno = EINVAL;
540                         DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
541                                 " mismatch", dev->data->port_id, cur_queue);
542                         mlx5_txq_release(dev, cur_queue);
543                         return -rte_errno;
544                 }
545                 if (peer_info->manual_bind !=
546                     txq_ctrl->hairpin_conf.manual_bind) {
547                         rte_errno = EINVAL;
548                         DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
549                                 " mismatch", dev->data->port_id, cur_queue);
550                         mlx5_txq_release(dev, cur_queue);
551                         return -rte_errno;
552                 }
553                 sq_attr.state = MLX5_SQC_STATE_RDY;
554                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
555                 sq_attr.hairpin_peer_rq = peer_info->qp_id;
556                 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
557                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
558                 if (ret == 0)
559                         txq_ctrl->hairpin_status = 1;
560                 mlx5_txq_release(dev, cur_queue);
561         } else {
562                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
563                 struct mlx5_rxq_ctrl *rxq_ctrl;
564                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
565
566                 if (rxq == NULL) {
567                         rte_errno = EINVAL;
568                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
569                                 dev->data->port_id, cur_queue);
570                         return -rte_errno;
571                 }
572                 rxq_ctrl = rxq->ctrl;
573                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
574                         rte_errno = EINVAL;
575                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
576                                 dev->data->port_id, cur_queue);
577                         return -rte_errno;
578                 }
579                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
580                         rte_errno = ENOMEM;
581                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
582                                 dev->data->port_id, cur_queue);
583                         return -rte_errno;
584                 }
585                 if (rxq->hairpin_status != 0) {
586                         DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
587                                 dev->data->port_id, cur_queue);
588                         return 0;
589                 }
590                 if (peer_info->tx_explicit !=
591                     rxq->hairpin_conf.tx_explicit) {
592                         rte_errno = EINVAL;
593                         DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
594                                 " mismatch", dev->data->port_id, cur_queue);
595                         return -rte_errno;
596                 }
597                 if (peer_info->manual_bind !=
598                     rxq->hairpin_conf.manual_bind) {
599                         rte_errno = EINVAL;
600                         DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
601                                 " mismatch", dev->data->port_id, cur_queue);
602                         return -rte_errno;
603                 }
604                 rq_attr.state = MLX5_SQC_STATE_RDY;
605                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
606                 rq_attr.hairpin_peer_sq = peer_info->qp_id;
607                 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
608                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
609                 if (ret == 0)
610                         rxq->hairpin_status = 1;
611         }
612         return ret;
613 }
614
615 /*
616  * Unbind the hairpin queue and reset its HW configuration.
617  * This needs to be called twice both for Tx and Rx queues of a pair.
618  * If the queue is already unbound, it is considered successful.
619  *
620  * @param dev
621  *   Pointer to Ethernet device structure.
622  * @param cur_queue
623  *   Index of the queue to change the HW configuration to unbind.
624  * @param direction
625  *   Positive to reset the TxQ, zero to reset the RxQ.
626  *
627  * @return
628  *   0 on success, a negative errno value otherwise and rte_errno is set.
629  */
630 int
631 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
632                                uint32_t direction)
633 {
634         int ret = 0;
635
636         if (direction != 0) {
637                 struct mlx5_txq_ctrl *txq_ctrl;
638                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
639
640                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
641                 if (txq_ctrl == NULL) {
642                         rte_errno = EINVAL;
643                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
644                                 dev->data->port_id, cur_queue);
645                         return -rte_errno;
646                 }
647                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
648                         rte_errno = EINVAL;
649                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
650                                 dev->data->port_id, cur_queue);
651                         mlx5_txq_release(dev, cur_queue);
652                         return -rte_errno;
653                 }
654                 /* Already unbound, return success before obj checking. */
655                 if (txq_ctrl->hairpin_status == 0) {
656                         DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
657                                 dev->data->port_id, cur_queue);
658                         mlx5_txq_release(dev, cur_queue);
659                         return 0;
660                 }
661                 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
662                         rte_errno = ENOMEM;
663                         DRV_LOG(ERR, "port %u no Txq object found: %d",
664                                 dev->data->port_id, cur_queue);
665                         mlx5_txq_release(dev, cur_queue);
666                         return -rte_errno;
667                 }
668                 sq_attr.state = MLX5_SQC_STATE_RST;
669                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
670                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
671                 if (ret == 0)
672                         txq_ctrl->hairpin_status = 0;
673                 mlx5_txq_release(dev, cur_queue);
674         } else {
675                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
676                 struct mlx5_rxq_ctrl *rxq_ctrl;
677                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
678
679                 if (rxq == NULL) {
680                         rte_errno = EINVAL;
681                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
682                                 dev->data->port_id, cur_queue);
683                         return -rte_errno;
684                 }
685                 rxq_ctrl = rxq->ctrl;
686                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
687                         rte_errno = EINVAL;
688                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
689                                 dev->data->port_id, cur_queue);
690                         return -rte_errno;
691                 }
692                 if (rxq->hairpin_status == 0) {
693                         DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
694                                 dev->data->port_id, cur_queue);
695                         return 0;
696                 }
697                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
698                         rte_errno = ENOMEM;
699                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
700                                 dev->data->port_id, cur_queue);
701                         return -rte_errno;
702                 }
703                 rq_attr.state = MLX5_SQC_STATE_RST;
704                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
705                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
706                 if (ret == 0)
707                         rxq->hairpin_status = 0;
708         }
709         return ret;
710 }
711
712 /*
713  * Bind the hairpin port pairs, from the Tx to the peer Rx.
714  * This function only supports to bind the Tx to one Rx.
715  *
716  * @param dev
717  *   Pointer to Ethernet device structure.
718  * @param rx_port
719  *   Port identifier of the Rx port.
720  *
721  * @return
722  *   0 on success, a negative errno value otherwise and rte_errno is set.
723  */
724 static int
725 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
726 {
727         struct mlx5_priv *priv = dev->data->dev_private;
728         int ret = 0;
729         struct mlx5_txq_ctrl *txq_ctrl;
730         uint32_t i;
731         struct rte_hairpin_peer_info peer = {0xffffff};
732         struct rte_hairpin_peer_info cur;
733         const struct rte_eth_hairpin_conf *conf;
734         uint16_t num_q = 0;
735         uint16_t local_port = priv->dev_data->port_id;
736         uint32_t manual;
737         uint32_t explicit;
738         uint16_t rx_queue;
739
740         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
741                 rte_errno = ENODEV;
742                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
743                 return -rte_errno;
744         }
745         /*
746          * Before binding TxQ to peer RxQ, first round loop will be used for
747          * checking the queues' configuration consistency. This would be a
748          * little time consuming but better than doing the rollback.
749          */
750         for (i = 0; i != priv->txqs_n; i++) {
751                 txq_ctrl = mlx5_txq_get(dev, i);
752                 if (txq_ctrl == NULL)
753                         continue;
754                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
755                         mlx5_txq_release(dev, i);
756                         continue;
757                 }
758                 /*
759                  * All hairpin Tx queues of a single port that connected to the
760                  * same peer Rx port should have the same "auto binding" and
761                  * "implicit Tx flow" modes.
762                  * Peer consistency checking will be done in per queue binding.
763                  */
764                 conf = &txq_ctrl->hairpin_conf;
765                 if (conf->peers[0].port == rx_port) {
766                         if (num_q == 0) {
767                                 manual = conf->manual_bind;
768                                 explicit = conf->tx_explicit;
769                         } else {
770                                 if (manual != conf->manual_bind ||
771                                     explicit != conf->tx_explicit) {
772                                         rte_errno = EINVAL;
773                                         DRV_LOG(ERR, "port %u queue %d mode"
774                                                 " mismatch: %u %u, %u %u",
775                                                 local_port, i, manual,
776                                                 conf->manual_bind, explicit,
777                                                 conf->tx_explicit);
778                                         mlx5_txq_release(dev, i);
779                                         return -rte_errno;
780                                 }
781                         }
782                         num_q++;
783                 }
784                 mlx5_txq_release(dev, i);
785         }
786         /* Once no queue is configured, success is returned directly. */
787         if (num_q == 0)
788                 return ret;
789         /* All the hairpin TX queues need to be traversed again. */
790         for (i = 0; i != priv->txqs_n; i++) {
791                 txq_ctrl = mlx5_txq_get(dev, i);
792                 if (txq_ctrl == NULL)
793                         continue;
794                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
795                         mlx5_txq_release(dev, i);
796                         continue;
797                 }
798                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
799                         mlx5_txq_release(dev, i);
800                         continue;
801                 }
802                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
803                 /*
804                  * Fetch peer RxQ's information.
805                  * No need to pass the information of the current queue.
806                  */
807                 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
808                                                         NULL, &peer, 1);
809                 if (ret != 0) {
810                         mlx5_txq_release(dev, i);
811                         goto error;
812                 }
813                 /* Accessing its own device, inside mlx5 PMD. */
814                 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
815                 if (ret != 0) {
816                         mlx5_txq_release(dev, i);
817                         goto error;
818                 }
819                 /* Pass TxQ's information to peer RxQ and try binding. */
820                 cur.peer_q = rx_queue;
821                 cur.qp_id = txq_ctrl->obj->sq->id;
822                 cur.vhca_id = priv->sh->cdev->config.hca_attr.vhca_id;
823                 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
824                 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
825                 /*
826                  * In order to access another device in a proper way, RTE level
827                  * private function is needed.
828                  */
829                 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
830                                                       &cur, 0);
831                 if (ret != 0) {
832                         mlx5_txq_release(dev, i);
833                         goto error;
834                 }
835                 mlx5_txq_release(dev, i);
836         }
837         return 0;
838 error:
839         /*
840          * Do roll-back process for the queues already bound.
841          * No need to check the return value of the queue unbind function.
842          */
843         do {
844                 /* No validation is needed here. */
845                 txq_ctrl = mlx5_txq_get(dev, i);
846                 if (txq_ctrl == NULL)
847                         continue;
848                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
849                 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
850                 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
851                 mlx5_txq_release(dev, i);
852         } while (i--);
853         return ret;
854 }
855
856 /*
857  * Unbind the hairpin port pair, HW configuration of both devices will be clear
858  * and status will be reset for all the queues used between them.
859  * This function only supports to unbind the Tx from one Rx.
860  *
861  * @param dev
862  *   Pointer to Ethernet device structure.
863  * @param rx_port
864  *   Port identifier of the Rx port.
865  *
866  * @return
867  *   0 on success, a negative errno value otherwise and rte_errno is set.
868  */
869 static int
870 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
871 {
872         struct mlx5_priv *priv = dev->data->dev_private;
873         struct mlx5_txq_ctrl *txq_ctrl;
874         uint32_t i;
875         int ret;
876         uint16_t cur_port = priv->dev_data->port_id;
877
878         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
879                 rte_errno = ENODEV;
880                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
881                 return -rte_errno;
882         }
883         for (i = 0; i != priv->txqs_n; i++) {
884                 uint16_t rx_queue;
885
886                 txq_ctrl = mlx5_txq_get(dev, i);
887                 if (txq_ctrl == NULL)
888                         continue;
889                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
890                         mlx5_txq_release(dev, i);
891                         continue;
892                 }
893                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
894                         mlx5_txq_release(dev, i);
895                         continue;
896                 }
897                 /* Indeed, only the first used queue needs to be checked. */
898                 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
899                         if (cur_port != rx_port) {
900                                 rte_errno = EINVAL;
901                                 DRV_LOG(ERR, "port %u and port %u are in"
902                                         " auto-bind mode", cur_port, rx_port);
903                                 mlx5_txq_release(dev, i);
904                                 return -rte_errno;
905                         } else {
906                                 return 0;
907                         }
908                 }
909                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
910                 mlx5_txq_release(dev, i);
911                 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
912                 if (ret) {
913                         DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
914                                 rx_port, rx_queue);
915                         return ret;
916                 }
917                 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
918                 if (ret) {
919                         DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
920                                 cur_port, i);
921                         return ret;
922                 }
923         }
924         return 0;
925 }
926
927 /*
928  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
929  * @see mlx5_hairpin_bind_single_port()
930  */
931 int
932 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
933 {
934         int ret = 0;
935         uint16_t p, pp;
936
937         /*
938          * If the Rx port has no hairpin configuration with the current port,
939          * the binding will be skipped in the called function of single port.
940          * Device started status will be checked only before the queue
941          * information updating.
942          */
943         if (rx_port == RTE_MAX_ETHPORTS) {
944                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
945                         ret = mlx5_hairpin_bind_single_port(dev, p);
946                         if (ret != 0)
947                                 goto unbind;
948                 }
949                 return ret;
950         } else {
951                 return mlx5_hairpin_bind_single_port(dev, rx_port);
952         }
953 unbind:
954         MLX5_ETH_FOREACH_DEV(pp, dev->device)
955                 if (pp < p)
956                         mlx5_hairpin_unbind_single_port(dev, pp);
957         return ret;
958 }
959
960 /*
961  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
962  * @see mlx5_hairpin_unbind_single_port()
963  */
964 int
965 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
966 {
967         int ret = 0;
968         uint16_t p;
969
970         if (rx_port == RTE_MAX_ETHPORTS)
971                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
972                         ret = mlx5_hairpin_unbind_single_port(dev, p);
973                         if (ret != 0)
974                                 return ret;
975                 }
976         else
977                 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
978         return ret;
979 }
980
981 /*
982  * DPDK callback to get the hairpin peer ports list.
983  * This will return the actual number of peer ports and save the identifiers
984  * into the array (sorted, may be different from that when setting up the
985  * hairpin peer queues).
986  * The peer port ID could be the same as the port ID of the current device.
987  *
988  * @param dev
989  *   Pointer to Ethernet device structure.
990  * @param peer_ports
991  *   Pointer to array to save the port identifiers.
992  * @param len
993  *   The length of the array.
994  * @param direction
995  *   Current port to peer port direction.
996  *   positive - current used as Tx to get all peer Rx ports.
997  *   zero - current used as Rx to get all peer Tx ports.
998  *
999  * @return
1000  *   0 or positive value on success, actual number of peer ports.
1001  *   a negative errno value otherwise and rte_errno is set.
1002  */
1003 int
1004 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1005                             size_t len, uint32_t direction)
1006 {
1007         struct mlx5_priv *priv = dev->data->dev_private;
1008         struct mlx5_txq_ctrl *txq_ctrl;
1009         uint32_t i;
1010         uint16_t pp;
1011         uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1012         int ret = 0;
1013
1014         if (direction) {
1015                 for (i = 0; i < priv->txqs_n; i++) {
1016                         txq_ctrl = mlx5_txq_get(dev, i);
1017                         if (!txq_ctrl)
1018                                 continue;
1019                         if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
1020                                 mlx5_txq_release(dev, i);
1021                                 continue;
1022                         }
1023                         pp = txq_ctrl->hairpin_conf.peers[0].port;
1024                         if (pp >= RTE_MAX_ETHPORTS) {
1025                                 rte_errno = ERANGE;
1026                                 mlx5_txq_release(dev, i);
1027                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1028                                         "out of range %hu",
1029                                         priv->dev_data->port_id, i, pp);
1030                                 return -rte_errno;
1031                         }
1032                         bits[pp / 32] |= 1 << (pp % 32);
1033                         mlx5_txq_release(dev, i);
1034                 }
1035         } else {
1036                 for (i = 0; i < priv->rxqs_n; i++) {
1037                         struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1038                         struct mlx5_rxq_ctrl *rxq_ctrl;
1039
1040                         if (rxq == NULL)
1041                                 continue;
1042                         rxq_ctrl = rxq->ctrl;
1043                         if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN)
1044                                 continue;
1045                         pp = rxq->hairpin_conf.peers[0].port;
1046                         if (pp >= RTE_MAX_ETHPORTS) {
1047                                 rte_errno = ERANGE;
1048                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1049                                         "out of range %hu",
1050                                         priv->dev_data->port_id, i, pp);
1051                                 return -rte_errno;
1052                         }
1053                         bits[pp / 32] |= 1 << (pp % 32);
1054                 }
1055         }
1056         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1057                 if (bits[i / 32] & (1 << (i % 32))) {
1058                         if ((size_t)ret >= len) {
1059                                 rte_errno = E2BIG;
1060                                 return -rte_errno;
1061                         }
1062                         peer_ports[ret++] = i;
1063                 }
1064         }
1065         return ret;
1066 }
1067
1068 /**
1069  * DPDK callback to start the device.
1070  *
1071  * Simulate device start by attaching all configured flows.
1072  *
1073  * @param dev
1074  *   Pointer to Ethernet device structure.
1075  *
1076  * @return
1077  *   0 on success, a negative errno value otherwise and rte_errno is set.
1078  */
1079 int
1080 mlx5_dev_start(struct rte_eth_dev *dev)
1081 {
1082         struct mlx5_priv *priv = dev->data->dev_private;
1083         int ret;
1084         int fine_inline;
1085
1086         DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1087         fine_inline = rte_mbuf_dynflag_lookup
1088                 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1089         if (fine_inline >= 0)
1090                 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1091         else
1092                 rte_net_mlx5_dynf_inline_mask = 0;
1093         if (dev->data->nb_rx_queues > 0) {
1094                 ret = mlx5_dev_configure_rss_reta(dev);
1095                 if (ret) {
1096                         DRV_LOG(ERR, "port %u reta config failed: %s",
1097                                 dev->data->port_id, strerror(rte_errno));
1098                         return -rte_errno;
1099                 }
1100         }
1101         ret = mlx5_txpp_start(dev);
1102         if (ret) {
1103                 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1104                         dev->data->port_id, strerror(rte_errno));
1105                 goto error;
1106         }
1107         if ((priv->sh->devx && priv->config.dv_flow_en &&
1108             priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1109                 ret = priv->obj_ops.lb_dummy_queue_create(dev);
1110                 if (ret)
1111                         goto error;
1112         }
1113         ret = mlx5_txq_start(dev);
1114         if (ret) {
1115                 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1116                         dev->data->port_id, strerror(rte_errno));
1117                 goto error;
1118         }
1119         if (priv->config.std_delay_drop || priv->config.hp_delay_drop) {
1120                 if (!priv->config.vf && !priv->config.sf &&
1121                     !priv->representor) {
1122                         ret = mlx5_get_flag_dropless_rq(dev);
1123                         if (ret < 0)
1124                                 DRV_LOG(WARNING,
1125                                         "port %u cannot query dropless flag",
1126                                         dev->data->port_id);
1127                         else if (!ret)
1128                                 DRV_LOG(WARNING,
1129                                         "port %u dropless_rq OFF, no rearming",
1130                                         dev->data->port_id);
1131                 } else {
1132                         DRV_LOG(DEBUG,
1133                                 "port %u doesn't support dropless_rq flag",
1134                                 dev->data->port_id);
1135                 }
1136         }
1137         ret = mlx5_rxq_start(dev);
1138         if (ret) {
1139                 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1140                         dev->data->port_id, strerror(rte_errno));
1141                 goto error;
1142         }
1143         /*
1144          * Such step will be skipped if there is no hairpin TX queue configured
1145          * with RX peer queue from the same device.
1146          */
1147         ret = mlx5_hairpin_auto_bind(dev);
1148         if (ret) {
1149                 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1150                         dev->data->port_id, strerror(rte_errno));
1151                 goto error;
1152         }
1153         /* Set started flag here for the following steps like control flow. */
1154         dev->data->dev_started = 1;
1155         ret = mlx5_rx_intr_vec_enable(dev);
1156         if (ret) {
1157                 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1158                         dev->data->port_id);
1159                 goto error;
1160         }
1161         mlx5_os_stats_init(dev);
1162         /*
1163          * Attach indirection table objects detached on port stop.
1164          * They may be needed to create RSS in non-isolated mode.
1165          */
1166         ret = mlx5_action_handle_attach(dev);
1167         if (ret) {
1168                 DRV_LOG(ERR,
1169                         "port %u failed to attach indirect actions: %s",
1170                         dev->data->port_id, rte_strerror(rte_errno));
1171                 goto error;
1172         }
1173         ret = mlx5_traffic_enable(dev);
1174         if (ret) {
1175                 DRV_LOG(ERR, "port %u failed to set defaults flows",
1176                         dev->data->port_id);
1177                 goto error;
1178         }
1179         /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1180         mlx5_flow_rxq_dynf_metadata_set(dev);
1181         /* Set flags and context to convert Rx timestamps. */
1182         mlx5_rxq_timestamp_set(dev);
1183         /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1184         mlx5_txq_dynf_timestamp_set(dev);
1185         /*
1186          * In non-cached mode, it only needs to start the default mreg copy
1187          * action and no flow created by application exists anymore.
1188          * But it is worth wrapping the interface for further usage.
1189          */
1190         ret = mlx5_flow_start_default(dev);
1191         if (ret) {
1192                 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1193                         dev->data->port_id, strerror(rte_errno));
1194                 goto error;
1195         }
1196         if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1197                 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1198                         dev->data->port_id, rte_strerror(rte_errno));
1199                 goto error;
1200         }
1201         rte_wmb();
1202         dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1203         dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1204         /* Enable datapath on secondary process. */
1205         mlx5_mp_os_req_start_rxtx(dev);
1206         if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) {
1207                 priv->sh->port[priv->dev_port - 1].ih_port_id =
1208                                         (uint32_t)dev->data->port_id;
1209         } else {
1210                 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1211                         dev->data->port_id);
1212                 dev->data->dev_conf.intr_conf.lsc = 0;
1213                 dev->data->dev_conf.intr_conf.rmv = 0;
1214         }
1215         if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
1216                 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1217                                         (uint32_t)dev->data->port_id;
1218         return 0;
1219 error:
1220         ret = rte_errno; /* Save rte_errno before cleanup. */
1221         /* Rollback. */
1222         dev->data->dev_started = 0;
1223         mlx5_flow_stop_default(dev);
1224         mlx5_traffic_disable(dev);
1225         mlx5_txq_stop(dev);
1226         mlx5_rxq_stop(dev);
1227         if (priv->obj_ops.lb_dummy_queue_release)
1228                 priv->obj_ops.lb_dummy_queue_release(dev);
1229         mlx5_txpp_stop(dev); /* Stop last. */
1230         rte_errno = ret; /* Restore rte_errno. */
1231         return -rte_errno;
1232 }
1233
1234 /**
1235  * DPDK callback to stop the device.
1236  *
1237  * Simulate device stop by detaching all configured flows.
1238  *
1239  * @param dev
1240  *   Pointer to Ethernet device structure.
1241  */
1242 int
1243 mlx5_dev_stop(struct rte_eth_dev *dev)
1244 {
1245         struct mlx5_priv *priv = dev->data->dev_private;
1246
1247         dev->data->dev_started = 0;
1248         /* Prevent crashes when queues are still in use. */
1249         dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
1250         dev->tx_pkt_burst = rte_eth_pkt_burst_dummy;
1251         rte_wmb();
1252         /* Disable datapath on secondary process. */
1253         mlx5_mp_os_req_stop_rxtx(dev);
1254         rte_delay_us_sleep(1000 * priv->rxqs_n);
1255         DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1256         mlx5_flow_stop_default(dev);
1257         /* Control flows for default traffic can be removed firstly. */
1258         mlx5_traffic_disable(dev);
1259         /* All RX queue flags will be cleared in the flush interface. */
1260         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1261         mlx5_flow_meter_rxq_flush(dev);
1262         mlx5_action_handle_detach(dev);
1263         mlx5_rx_intr_vec_disable(dev);
1264         priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1265         priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1266         mlx5_txq_stop(dev);
1267         mlx5_rxq_stop(dev);
1268         if (priv->obj_ops.lb_dummy_queue_release)
1269                 priv->obj_ops.lb_dummy_queue_release(dev);
1270         mlx5_txpp_stop(dev);
1271
1272         return 0;
1273 }
1274
1275 /**
1276  * Enable traffic flows configured by control plane
1277  *
1278  * @param dev
1279  *   Pointer to Ethernet device private data.
1280  * @param dev
1281  *   Pointer to Ethernet device structure.
1282  *
1283  * @return
1284  *   0 on success, a negative errno value otherwise and rte_errno is set.
1285  */
1286 int
1287 mlx5_traffic_enable(struct rte_eth_dev *dev)
1288 {
1289         struct mlx5_priv *priv = dev->data->dev_private;
1290         struct rte_flow_item_eth bcast = {
1291                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1292         };
1293         struct rte_flow_item_eth ipv6_multi_spec = {
1294                 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1295         };
1296         struct rte_flow_item_eth ipv6_multi_mask = {
1297                 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1298         };
1299         struct rte_flow_item_eth unicast = {
1300                 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1301         };
1302         struct rte_flow_item_eth unicast_mask = {
1303                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1304         };
1305         const unsigned int vlan_filter_n = priv->vlan_filter_n;
1306         const struct rte_ether_addr cmp = {
1307                 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1308         };
1309         unsigned int i;
1310         unsigned int j;
1311         int ret;
1312
1313         /*
1314          * Hairpin txq default flow should be created no matter if it is
1315          * isolation mode. Or else all the packets to be sent will be sent
1316          * out directly without the TX flow actions, e.g. encapsulation.
1317          */
1318         for (i = 0; i != priv->txqs_n; ++i) {
1319                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1320                 if (!txq_ctrl)
1321                         continue;
1322                 /* Only Tx implicit mode requires the default Tx flow. */
1323                 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1324                     txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1325                     txq_ctrl->hairpin_conf.peers[0].port ==
1326                     priv->dev_data->port_id) {
1327                         ret = mlx5_ctrl_flow_source_queue(dev, i);
1328                         if (ret) {
1329                                 mlx5_txq_release(dev, i);
1330                                 goto error;
1331                         }
1332                 }
1333                 if ((priv->representor || priv->master) &&
1334                     priv->config.dv_esw_en) {
1335                         if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) {
1336                                 DRV_LOG(ERR,
1337                                         "Port %u Tx queue %u SQ create representor devx default miss rule failed.",
1338                                         dev->data->port_id, i);
1339                                 goto error;
1340                         }
1341                 }
1342                 mlx5_txq_release(dev, i);
1343         }
1344         if ((priv->master || priv->representor) && priv->config.dv_esw_en) {
1345                 if (mlx5_flow_create_esw_table_zero_flow(dev))
1346                         priv->fdb_def_rule = 1;
1347                 else
1348                         DRV_LOG(INFO, "port %u FDB default rule cannot be"
1349                                 " configured - only Eswitch group 0 flows are"
1350                                 " supported.", dev->data->port_id);
1351         }
1352         if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1353                 ret = mlx5_flow_lacp_miss(dev);
1354                 if (ret)
1355                         DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1356                                 "forward LACP to kernel.", dev->data->port_id);
1357                 else
1358                         DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1359                                 , dev->data->port_id);
1360         }
1361         if (priv->isolated)
1362                 return 0;
1363         if (dev->data->promiscuous) {
1364                 struct rte_flow_item_eth promisc = {
1365                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1366                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1367                         .type = 0,
1368                 };
1369
1370                 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1371                 if (ret)
1372                         goto error;
1373         }
1374         if (dev->data->all_multicast) {
1375                 struct rte_flow_item_eth multicast = {
1376                         .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1377                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1378                         .type = 0,
1379                 };
1380
1381                 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1382                 if (ret)
1383                         goto error;
1384         } else {
1385                 /* Add broadcast/multicast flows. */
1386                 for (i = 0; i != vlan_filter_n; ++i) {
1387                         uint16_t vlan = priv->vlan_filter[i];
1388
1389                         struct rte_flow_item_vlan vlan_spec = {
1390                                 .tci = rte_cpu_to_be_16(vlan),
1391                         };
1392                         struct rte_flow_item_vlan vlan_mask =
1393                                 rte_flow_item_vlan_mask;
1394
1395                         ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1396                                                   &vlan_spec, &vlan_mask);
1397                         if (ret)
1398                                 goto error;
1399                         ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1400                                                   &ipv6_multi_mask,
1401                                                   &vlan_spec, &vlan_mask);
1402                         if (ret)
1403                                 goto error;
1404                 }
1405                 if (!vlan_filter_n) {
1406                         ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1407                         if (ret)
1408                                 goto error;
1409                         ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1410                                              &ipv6_multi_mask);
1411                         if (ret) {
1412                                 /* Do not fail on IPv6 broadcast creation failure. */
1413                                 DRV_LOG(WARNING,
1414                                         "IPv6 broadcast is not supported");
1415                                 ret = 0;
1416                         }
1417                 }
1418         }
1419         /* Add MAC address flows. */
1420         for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1421                 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1422
1423                 if (!memcmp(mac, &cmp, sizeof(*mac)))
1424                         continue;
1425                 memcpy(&unicast.dst.addr_bytes,
1426                        mac->addr_bytes,
1427                        RTE_ETHER_ADDR_LEN);
1428                 for (j = 0; j != vlan_filter_n; ++j) {
1429                         uint16_t vlan = priv->vlan_filter[j];
1430
1431                         struct rte_flow_item_vlan vlan_spec = {
1432                                 .tci = rte_cpu_to_be_16(vlan),
1433                         };
1434                         struct rte_flow_item_vlan vlan_mask =
1435                                 rte_flow_item_vlan_mask;
1436
1437                         ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1438                                                   &unicast_mask,
1439                                                   &vlan_spec,
1440                                                   &vlan_mask);
1441                         if (ret)
1442                                 goto error;
1443                 }
1444                 if (!vlan_filter_n) {
1445                         ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1446                         if (ret)
1447                                 goto error;
1448                 }
1449         }
1450         return 0;
1451 error:
1452         ret = rte_errno; /* Save rte_errno before cleanup. */
1453         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1454         rte_errno = ret; /* Restore rte_errno. */
1455         return -rte_errno;
1456 }
1457
1458
1459 /**
1460  * Disable traffic flows configured by control plane
1461  *
1462  * @param dev
1463  *   Pointer to Ethernet device private data.
1464  */
1465 void
1466 mlx5_traffic_disable(struct rte_eth_dev *dev)
1467 {
1468         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1469 }
1470
1471 /**
1472  * Restart traffic flows configured by control plane
1473  *
1474  * @param dev
1475  *   Pointer to Ethernet device private data.
1476  *
1477  * @return
1478  *   0 on success, a negative errno value otherwise and rte_errno is set.
1479  */
1480 int
1481 mlx5_traffic_restart(struct rte_eth_dev *dev)
1482 {
1483         if (dev->data->dev_started) {
1484                 mlx5_traffic_disable(dev);
1485                 return mlx5_traffic_enable(dev);
1486         }
1487         return 0;
1488 }