net/sfc: support port representor related flow actions
[dpdk.git] / drivers / net / mlx5 / mlx5_trigger.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5
6 #include <unistd.h>
7
8 #include <rte_ether.h>
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13
14 #include <mlx5_malloc.h>
15
16 #include "mlx5.h"
17 #include "mlx5_flow.h"
18 #include "mlx5_rx.h"
19 #include "mlx5_tx.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
22
23 /**
24  * Stop traffic on Tx queues.
25  *
26  * @param dev
27  *   Pointer to Ethernet device structure.
28  */
29 static void
30 mlx5_txq_stop(struct rte_eth_dev *dev)
31 {
32         struct mlx5_priv *priv = dev->data->dev_private;
33         unsigned int i;
34
35         for (i = 0; i != priv->txqs_n; ++i)
36                 mlx5_txq_release(dev, i);
37 }
38
39 /**
40  * Start traffic on Tx queues.
41  *
42  * @param dev
43  *   Pointer to Ethernet device structure.
44  *
45  * @return
46  *   0 on success, a negative errno value otherwise and rte_errno is set.
47  */
48 static int
49 mlx5_txq_start(struct rte_eth_dev *dev)
50 {
51         struct mlx5_priv *priv = dev->data->dev_private;
52         unsigned int i;
53         int ret;
54
55         for (i = 0; i != priv->txqs_n; ++i) {
56                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57                 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58                 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
59
60                 if (!txq_ctrl)
61                         continue;
62                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
63                         txq_alloc_elts(txq_ctrl);
64                 MLX5_ASSERT(!txq_ctrl->obj);
65                 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66                                             0, txq_ctrl->socket);
67                 if (!txq_ctrl->obj) {
68                         DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69                                 "memory resources.", dev->data->port_id,
70                                 txq_data->idx);
71                         rte_errno = ENOMEM;
72                         goto error;
73                 }
74                 ret = priv->obj_ops.txq_obj_new(dev, i);
75                 if (ret < 0) {
76                         mlx5_free(txq_ctrl->obj);
77                         txq_ctrl->obj = NULL;
78                         goto error;
79                 }
80                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
81                         size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
82
83                         txq_data->fcqs = mlx5_malloc(flags, size,
84                                                      RTE_CACHE_LINE_SIZE,
85                                                      txq_ctrl->socket);
86                         if (!txq_data->fcqs) {
87                                 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88                                         "allocate memory (FCQ).",
89                                         dev->data->port_id, i);
90                                 rte_errno = ENOMEM;
91                                 goto error;
92                         }
93                 }
94                 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95                         dev->data->port_id, i, (void *)&txq_ctrl->obj);
96                 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97         }
98         return 0;
99 error:
100         ret = rte_errno; /* Save rte_errno before cleanup. */
101         do {
102                 mlx5_txq_release(dev, i);
103         } while (i-- != 0);
104         rte_errno = ret; /* Restore rte_errno. */
105         return -rte_errno;
106 }
107
108 /**
109  * Translate the chunk address to MR key in order to put in into the cache.
110  */
111 static void
112 mlx5_rxq_mempool_register_cb(struct rte_mempool *mp, void *opaque,
113                              struct rte_mempool_memhdr *memhdr,
114                              unsigned int idx)
115 {
116         struct mlx5_rxq_data *rxq = opaque;
117
118         RTE_SET_USED(mp);
119         RTE_SET_USED(idx);
120         mlx5_rx_addr2mr(rxq, (uintptr_t)memhdr->addr);
121 }
122
123 /**
124  * Register Rx queue mempools and fill the Rx queue cache.
125  * This function tolerates repeated mempool registration.
126  *
127  * @param[in] rxq_ctrl
128  *   Rx queue control data.
129  *
130  * @return
131  *   0 on success, (-1) on failure and rte_errno is set.
132  */
133 static int
134 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
135 {
136         struct mlx5_priv *priv = rxq_ctrl->priv;
137         struct rte_mempool *mp;
138         uint32_t s;
139         int ret = 0;
140
141         mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
142         /* MPRQ mempool is registered on creation, just fill the cache. */
143         if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
144                 rte_mempool_mem_iter(rxq_ctrl->rxq.mprq_mp,
145                                      mlx5_rxq_mempool_register_cb,
146                                      &rxq_ctrl->rxq);
147                 return 0;
148         }
149         for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
150                 mp = rxq_ctrl->rxq.rxseg[s].mp;
151                 ret = mlx5_mr_mempool_register(&priv->sh->cdev->mr_scache,
152                                                priv->sh->cdev->pd, mp,
153                                                &priv->mp_id);
154                 if (ret < 0 && rte_errno != EEXIST)
155                         return ret;
156                 rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb,
157                                      &rxq_ctrl->rxq);
158         }
159         return 0;
160 }
161
162 /**
163  * Stop traffic on Rx queues.
164  *
165  * @param dev
166  *   Pointer to Ethernet device structure.
167  */
168 static void
169 mlx5_rxq_stop(struct rte_eth_dev *dev)
170 {
171         struct mlx5_priv *priv = dev->data->dev_private;
172         unsigned int i;
173
174         for (i = 0; i != priv->rxqs_n; ++i)
175                 mlx5_rxq_release(dev, i);
176 }
177
178 /**
179  * Start traffic on Rx queues.
180  *
181  * @param dev
182  *   Pointer to Ethernet device structure.
183  *
184  * @return
185  *   0 on success, a negative errno value otherwise and rte_errno is set.
186  */
187 static int
188 mlx5_rxq_start(struct rte_eth_dev *dev)
189 {
190         struct mlx5_priv *priv = dev->data->dev_private;
191         unsigned int i;
192         int ret = 0;
193
194         /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
195         if (mlx5_mprq_alloc_mp(dev)) {
196                 /* Should not release Rx queues but return immediately. */
197                 return -rte_errno;
198         }
199         DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
200                 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
201         DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
202                 dev->data->port_id, priv->sh->device_attr.max_sge);
203         for (i = 0; i != priv->rxqs_n; ++i) {
204                 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
205
206                 if (!rxq_ctrl)
207                         continue;
208                 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
209                         /*
210                          * Pre-register the mempools. Regardless of whether
211                          * the implicit registration is enabled or not,
212                          * Rx mempool destruction is tracked to free MRs.
213                          */
214                         if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
215                                 goto error;
216                         ret = rxq_alloc_elts(rxq_ctrl);
217                         if (ret)
218                                 goto error;
219                 }
220                 MLX5_ASSERT(!rxq_ctrl->obj);
221                 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
222                                             sizeof(*rxq_ctrl->obj), 0,
223                                             rxq_ctrl->socket);
224                 if (!rxq_ctrl->obj) {
225                         DRV_LOG(ERR,
226                                 "Port %u Rx queue %u can't allocate resources.",
227                                 dev->data->port_id, (*priv->rxqs)[i]->idx);
228                         rte_errno = ENOMEM;
229                         goto error;
230                 }
231                 ret = priv->obj_ops.rxq_obj_new(dev, i);
232                 if (ret) {
233                         mlx5_free(rxq_ctrl->obj);
234                         rxq_ctrl->obj = NULL;
235                         goto error;
236                 }
237                 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
238                         dev->data->port_id, i, (void *)&rxq_ctrl->obj);
239                 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
240         }
241         return 0;
242 error:
243         ret = rte_errno; /* Save rte_errno before cleanup. */
244         do {
245                 mlx5_rxq_release(dev, i);
246         } while (i-- != 0);
247         rte_errno = ret; /* Restore rte_errno. */
248         return -rte_errno;
249 }
250
251 /**
252  * Binds Tx queues to Rx queues for hairpin.
253  *
254  * Binds Tx queues to the target Rx queues.
255  *
256  * @param dev
257  *   Pointer to Ethernet device structure.
258  *
259  * @return
260  *   0 on success, a negative errno value otherwise and rte_errno is set.
261  */
262 static int
263 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
264 {
265         struct mlx5_priv *priv = dev->data->dev_private;
266         struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
267         struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
268         struct mlx5_txq_ctrl *txq_ctrl;
269         struct mlx5_rxq_ctrl *rxq_ctrl;
270         struct mlx5_devx_obj *sq;
271         struct mlx5_devx_obj *rq;
272         unsigned int i;
273         int ret = 0;
274         bool need_auto = false;
275         uint16_t self_port = dev->data->port_id;
276
277         for (i = 0; i != priv->txqs_n; ++i) {
278                 txq_ctrl = mlx5_txq_get(dev, i);
279                 if (!txq_ctrl)
280                         continue;
281                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
282                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
283                         mlx5_txq_release(dev, i);
284                         continue;
285                 }
286                 if (txq_ctrl->hairpin_conf.manual_bind) {
287                         mlx5_txq_release(dev, i);
288                         return 0;
289                 }
290                 need_auto = true;
291                 mlx5_txq_release(dev, i);
292         }
293         if (!need_auto)
294                 return 0;
295         for (i = 0; i != priv->txqs_n; ++i) {
296                 txq_ctrl = mlx5_txq_get(dev, i);
297                 if (!txq_ctrl)
298                         continue;
299                 /* Skip hairpin queues with other peer ports. */
300                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
301                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
302                         mlx5_txq_release(dev, i);
303                         continue;
304                 }
305                 if (!txq_ctrl->obj) {
306                         rte_errno = ENOMEM;
307                         DRV_LOG(ERR, "port %u no txq object found: %d",
308                                 dev->data->port_id, i);
309                         mlx5_txq_release(dev, i);
310                         return -rte_errno;
311                 }
312                 sq = txq_ctrl->obj->sq;
313                 rxq_ctrl = mlx5_rxq_get(dev,
314                                         txq_ctrl->hairpin_conf.peers[0].queue);
315                 if (!rxq_ctrl) {
316                         mlx5_txq_release(dev, i);
317                         rte_errno = EINVAL;
318                         DRV_LOG(ERR, "port %u no rxq object found: %d",
319                                 dev->data->port_id,
320                                 txq_ctrl->hairpin_conf.peers[0].queue);
321                         return -rte_errno;
322                 }
323                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
324                     rxq_ctrl->hairpin_conf.peers[0].queue != i) {
325                         rte_errno = ENOMEM;
326                         DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
327                                 "Rx queue %d", dev->data->port_id,
328                                 i, txq_ctrl->hairpin_conf.peers[0].queue);
329                         goto error;
330                 }
331                 rq = rxq_ctrl->obj->rq;
332                 if (!rq) {
333                         rte_errno = ENOMEM;
334                         DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
335                                 dev->data->port_id,
336                                 txq_ctrl->hairpin_conf.peers[0].queue);
337                         goto error;
338                 }
339                 sq_attr.state = MLX5_SQC_STATE_RDY;
340                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
341                 sq_attr.hairpin_peer_rq = rq->id;
342                 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
343                 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
344                 if (ret)
345                         goto error;
346                 rq_attr.state = MLX5_SQC_STATE_RDY;
347                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
348                 rq_attr.hairpin_peer_sq = sq->id;
349                 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
350                 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
351                 if (ret)
352                         goto error;
353                 /* Qs with auto-bind will be destroyed directly. */
354                 rxq_ctrl->hairpin_status = 1;
355                 txq_ctrl->hairpin_status = 1;
356                 mlx5_txq_release(dev, i);
357                 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
358         }
359         return 0;
360 error:
361         mlx5_txq_release(dev, i);
362         mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
363         return -rte_errno;
364 }
365
366 /*
367  * Fetch the peer queue's SW & HW information.
368  *
369  * @param dev
370  *   Pointer to Ethernet device structure.
371  * @param peer_queue
372  *   Index of the queue to fetch the information.
373  * @param current_info
374  *   Pointer to the input peer information, not used currently.
375  * @param peer_info
376  *   Pointer to the structure to store the information, output.
377  * @param direction
378  *   Positive to get the RxQ information, zero to get the TxQ information.
379  *
380  * @return
381  *   0 on success, a negative errno value otherwise and rte_errno is set.
382  */
383 int
384 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
385                                struct rte_hairpin_peer_info *current_info,
386                                struct rte_hairpin_peer_info *peer_info,
387                                uint32_t direction)
388 {
389         struct mlx5_priv *priv = dev->data->dev_private;
390         RTE_SET_USED(current_info);
391
392         if (dev->data->dev_started == 0) {
393                 rte_errno = EBUSY;
394                 DRV_LOG(ERR, "peer port %u is not started",
395                         dev->data->port_id);
396                 return -rte_errno;
397         }
398         /*
399          * Peer port used as egress. In the current design, hairpin Tx queue
400          * will be bound to the peer Rx queue. Indeed, only the information of
401          * peer Rx queue needs to be fetched.
402          */
403         if (direction == 0) {
404                 struct mlx5_txq_ctrl *txq_ctrl;
405
406                 txq_ctrl = mlx5_txq_get(dev, peer_queue);
407                 if (txq_ctrl == NULL) {
408                         rte_errno = EINVAL;
409                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
410                                 dev->data->port_id, peer_queue);
411                         return -rte_errno;
412                 }
413                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
414                         rte_errno = EINVAL;
415                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
416                                 dev->data->port_id, peer_queue);
417                         mlx5_txq_release(dev, peer_queue);
418                         return -rte_errno;
419                 }
420                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
421                         rte_errno = ENOMEM;
422                         DRV_LOG(ERR, "port %u no Txq object found: %d",
423                                 dev->data->port_id, peer_queue);
424                         mlx5_txq_release(dev, peer_queue);
425                         return -rte_errno;
426                 }
427                 peer_info->qp_id = txq_ctrl->obj->sq->id;
428                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
429                 /* 1-to-1 mapping, only the first one is used. */
430                 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
431                 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
432                 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
433                 mlx5_txq_release(dev, peer_queue);
434         } else { /* Peer port used as ingress. */
435                 struct mlx5_rxq_ctrl *rxq_ctrl;
436
437                 rxq_ctrl = mlx5_rxq_get(dev, peer_queue);
438                 if (rxq_ctrl == NULL) {
439                         rte_errno = EINVAL;
440                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
441                                 dev->data->port_id, peer_queue);
442                         return -rte_errno;
443                 }
444                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
445                         rte_errno = EINVAL;
446                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
447                                 dev->data->port_id, peer_queue);
448                         mlx5_rxq_release(dev, peer_queue);
449                         return -rte_errno;
450                 }
451                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
452                         rte_errno = ENOMEM;
453                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
454                                 dev->data->port_id, peer_queue);
455                         mlx5_rxq_release(dev, peer_queue);
456                         return -rte_errno;
457                 }
458                 peer_info->qp_id = rxq_ctrl->obj->rq->id;
459                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
460                 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
461                 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
462                 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
463                 mlx5_rxq_release(dev, peer_queue);
464         }
465         return 0;
466 }
467
468 /*
469  * Bind the hairpin queue with the peer HW information.
470  * This needs to be called twice both for Tx and Rx queues of a pair.
471  * If the queue is already bound, it is considered successful.
472  *
473  * @param dev
474  *   Pointer to Ethernet device structure.
475  * @param cur_queue
476  *   Index of the queue to change the HW configuration to bind.
477  * @param peer_info
478  *   Pointer to information of the peer queue.
479  * @param direction
480  *   Positive to configure the TxQ, zero to configure the RxQ.
481  *
482  * @return
483  *   0 on success, a negative errno value otherwise and rte_errno is set.
484  */
485 int
486 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
487                              struct rte_hairpin_peer_info *peer_info,
488                              uint32_t direction)
489 {
490         int ret = 0;
491
492         /*
493          * Consistency checking of the peer queue: opposite direction is used
494          * to get the peer queue info with ethdev port ID, no need to check.
495          */
496         if (peer_info->peer_q != cur_queue) {
497                 rte_errno = EINVAL;
498                 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
499                         dev->data->port_id, cur_queue, peer_info->peer_q);
500                 return -rte_errno;
501         }
502         if (direction != 0) {
503                 struct mlx5_txq_ctrl *txq_ctrl;
504                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
505
506                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
507                 if (txq_ctrl == NULL) {
508                         rte_errno = EINVAL;
509                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
510                                 dev->data->port_id, cur_queue);
511                         return -rte_errno;
512                 }
513                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
514                         rte_errno = EINVAL;
515                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
516                                 dev->data->port_id, cur_queue);
517                         mlx5_txq_release(dev, cur_queue);
518                         return -rte_errno;
519                 }
520                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
521                         rte_errno = ENOMEM;
522                         DRV_LOG(ERR, "port %u no Txq object found: %d",
523                                 dev->data->port_id, cur_queue);
524                         mlx5_txq_release(dev, cur_queue);
525                         return -rte_errno;
526                 }
527                 if (txq_ctrl->hairpin_status != 0) {
528                         DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
529                                 dev->data->port_id, cur_queue);
530                         mlx5_txq_release(dev, cur_queue);
531                         return 0;
532                 }
533                 /*
534                  * All queues' of one port consistency checking is done in the
535                  * bind() function, and that is optional.
536                  */
537                 if (peer_info->tx_explicit !=
538                     txq_ctrl->hairpin_conf.tx_explicit) {
539                         rte_errno = EINVAL;
540                         DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
541                                 " mismatch", dev->data->port_id, cur_queue);
542                         mlx5_txq_release(dev, cur_queue);
543                         return -rte_errno;
544                 }
545                 if (peer_info->manual_bind !=
546                     txq_ctrl->hairpin_conf.manual_bind) {
547                         rte_errno = EINVAL;
548                         DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
549                                 " mismatch", dev->data->port_id, cur_queue);
550                         mlx5_txq_release(dev, cur_queue);
551                         return -rte_errno;
552                 }
553                 sq_attr.state = MLX5_SQC_STATE_RDY;
554                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
555                 sq_attr.hairpin_peer_rq = peer_info->qp_id;
556                 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
557                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
558                 if (ret == 0)
559                         txq_ctrl->hairpin_status = 1;
560                 mlx5_txq_release(dev, cur_queue);
561         } else {
562                 struct mlx5_rxq_ctrl *rxq_ctrl;
563                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
564
565                 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
566                 if (rxq_ctrl == NULL) {
567                         rte_errno = EINVAL;
568                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
569                                 dev->data->port_id, cur_queue);
570                         return -rte_errno;
571                 }
572                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
573                         rte_errno = EINVAL;
574                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
575                                 dev->data->port_id, cur_queue);
576                         mlx5_rxq_release(dev, cur_queue);
577                         return -rte_errno;
578                 }
579                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
580                         rte_errno = ENOMEM;
581                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
582                                 dev->data->port_id, cur_queue);
583                         mlx5_rxq_release(dev, cur_queue);
584                         return -rte_errno;
585                 }
586                 if (rxq_ctrl->hairpin_status != 0) {
587                         DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
588                                 dev->data->port_id, cur_queue);
589                         mlx5_rxq_release(dev, cur_queue);
590                         return 0;
591                 }
592                 if (peer_info->tx_explicit !=
593                     rxq_ctrl->hairpin_conf.tx_explicit) {
594                         rte_errno = EINVAL;
595                         DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
596                                 " mismatch", dev->data->port_id, cur_queue);
597                         mlx5_rxq_release(dev, cur_queue);
598                         return -rte_errno;
599                 }
600                 if (peer_info->manual_bind !=
601                     rxq_ctrl->hairpin_conf.manual_bind) {
602                         rte_errno = EINVAL;
603                         DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
604                                 " mismatch", dev->data->port_id, cur_queue);
605                         mlx5_rxq_release(dev, cur_queue);
606                         return -rte_errno;
607                 }
608                 rq_attr.state = MLX5_SQC_STATE_RDY;
609                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
610                 rq_attr.hairpin_peer_sq = peer_info->qp_id;
611                 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
612                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
613                 if (ret == 0)
614                         rxq_ctrl->hairpin_status = 1;
615                 mlx5_rxq_release(dev, cur_queue);
616         }
617         return ret;
618 }
619
620 /*
621  * Unbind the hairpin queue and reset its HW configuration.
622  * This needs to be called twice both for Tx and Rx queues of a pair.
623  * If the queue is already unbound, it is considered successful.
624  *
625  * @param dev
626  *   Pointer to Ethernet device structure.
627  * @param cur_queue
628  *   Index of the queue to change the HW configuration to unbind.
629  * @param direction
630  *   Positive to reset the TxQ, zero to reset the RxQ.
631  *
632  * @return
633  *   0 on success, a negative errno value otherwise and rte_errno is set.
634  */
635 int
636 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
637                                uint32_t direction)
638 {
639         int ret = 0;
640
641         if (direction != 0) {
642                 struct mlx5_txq_ctrl *txq_ctrl;
643                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
644
645                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
646                 if (txq_ctrl == NULL) {
647                         rte_errno = EINVAL;
648                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
649                                 dev->data->port_id, cur_queue);
650                         return -rte_errno;
651                 }
652                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
653                         rte_errno = EINVAL;
654                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
655                                 dev->data->port_id, cur_queue);
656                         mlx5_txq_release(dev, cur_queue);
657                         return -rte_errno;
658                 }
659                 /* Already unbound, return success before obj checking. */
660                 if (txq_ctrl->hairpin_status == 0) {
661                         DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
662                                 dev->data->port_id, cur_queue);
663                         mlx5_txq_release(dev, cur_queue);
664                         return 0;
665                 }
666                 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
667                         rte_errno = ENOMEM;
668                         DRV_LOG(ERR, "port %u no Txq object found: %d",
669                                 dev->data->port_id, cur_queue);
670                         mlx5_txq_release(dev, cur_queue);
671                         return -rte_errno;
672                 }
673                 sq_attr.state = MLX5_SQC_STATE_RST;
674                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
675                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
676                 if (ret == 0)
677                         txq_ctrl->hairpin_status = 0;
678                 mlx5_txq_release(dev, cur_queue);
679         } else {
680                 struct mlx5_rxq_ctrl *rxq_ctrl;
681                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
682
683                 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
684                 if (rxq_ctrl == NULL) {
685                         rte_errno = EINVAL;
686                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
687                                 dev->data->port_id, cur_queue);
688                         return -rte_errno;
689                 }
690                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
691                         rte_errno = EINVAL;
692                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
693                                 dev->data->port_id, cur_queue);
694                         mlx5_rxq_release(dev, cur_queue);
695                         return -rte_errno;
696                 }
697                 if (rxq_ctrl->hairpin_status == 0) {
698                         DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
699                                 dev->data->port_id, cur_queue);
700                         mlx5_rxq_release(dev, cur_queue);
701                         return 0;
702                 }
703                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
704                         rte_errno = ENOMEM;
705                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
706                                 dev->data->port_id, cur_queue);
707                         mlx5_rxq_release(dev, cur_queue);
708                         return -rte_errno;
709                 }
710                 rq_attr.state = MLX5_SQC_STATE_RST;
711                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
712                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
713                 if (ret == 0)
714                         rxq_ctrl->hairpin_status = 0;
715                 mlx5_rxq_release(dev, cur_queue);
716         }
717         return ret;
718 }
719
720 /*
721  * Bind the hairpin port pairs, from the Tx to the peer Rx.
722  * This function only supports to bind the Tx to one Rx.
723  *
724  * @param dev
725  *   Pointer to Ethernet device structure.
726  * @param rx_port
727  *   Port identifier of the Rx port.
728  *
729  * @return
730  *   0 on success, a negative errno value otherwise and rte_errno is set.
731  */
732 static int
733 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
734 {
735         struct mlx5_priv *priv = dev->data->dev_private;
736         int ret = 0;
737         struct mlx5_txq_ctrl *txq_ctrl;
738         uint32_t i;
739         struct rte_hairpin_peer_info peer = {0xffffff};
740         struct rte_hairpin_peer_info cur;
741         const struct rte_eth_hairpin_conf *conf;
742         uint16_t num_q = 0;
743         uint16_t local_port = priv->dev_data->port_id;
744         uint32_t manual;
745         uint32_t explicit;
746         uint16_t rx_queue;
747
748         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
749                 rte_errno = ENODEV;
750                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
751                 return -rte_errno;
752         }
753         /*
754          * Before binding TxQ to peer RxQ, first round loop will be used for
755          * checking the queues' configuration consistency. This would be a
756          * little time consuming but better than doing the rollback.
757          */
758         for (i = 0; i != priv->txqs_n; i++) {
759                 txq_ctrl = mlx5_txq_get(dev, i);
760                 if (txq_ctrl == NULL)
761                         continue;
762                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
763                         mlx5_txq_release(dev, i);
764                         continue;
765                 }
766                 /*
767                  * All hairpin Tx queues of a single port that connected to the
768                  * same peer Rx port should have the same "auto binding" and
769                  * "implicit Tx flow" modes.
770                  * Peer consistency checking will be done in per queue binding.
771                  */
772                 conf = &txq_ctrl->hairpin_conf;
773                 if (conf->peers[0].port == rx_port) {
774                         if (num_q == 0) {
775                                 manual = conf->manual_bind;
776                                 explicit = conf->tx_explicit;
777                         } else {
778                                 if (manual != conf->manual_bind ||
779                                     explicit != conf->tx_explicit) {
780                                         rte_errno = EINVAL;
781                                         DRV_LOG(ERR, "port %u queue %d mode"
782                                                 " mismatch: %u %u, %u %u",
783                                                 local_port, i, manual,
784                                                 conf->manual_bind, explicit,
785                                                 conf->tx_explicit);
786                                         mlx5_txq_release(dev, i);
787                                         return -rte_errno;
788                                 }
789                         }
790                         num_q++;
791                 }
792                 mlx5_txq_release(dev, i);
793         }
794         /* Once no queue is configured, success is returned directly. */
795         if (num_q == 0)
796                 return ret;
797         /* All the hairpin TX queues need to be traversed again. */
798         for (i = 0; i != priv->txqs_n; i++) {
799                 txq_ctrl = mlx5_txq_get(dev, i);
800                 if (txq_ctrl == NULL)
801                         continue;
802                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
803                         mlx5_txq_release(dev, i);
804                         continue;
805                 }
806                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
807                         mlx5_txq_release(dev, i);
808                         continue;
809                 }
810                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
811                 /*
812                  * Fetch peer RxQ's information.
813                  * No need to pass the information of the current queue.
814                  */
815                 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
816                                                         NULL, &peer, 1);
817                 if (ret != 0) {
818                         mlx5_txq_release(dev, i);
819                         goto error;
820                 }
821                 /* Accessing its own device, inside mlx5 PMD. */
822                 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
823                 if (ret != 0) {
824                         mlx5_txq_release(dev, i);
825                         goto error;
826                 }
827                 /* Pass TxQ's information to peer RxQ and try binding. */
828                 cur.peer_q = rx_queue;
829                 cur.qp_id = txq_ctrl->obj->sq->id;
830                 cur.vhca_id = priv->config.hca_attr.vhca_id;
831                 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
832                 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
833                 /*
834                  * In order to access another device in a proper way, RTE level
835                  * private function is needed.
836                  */
837                 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
838                                                       &cur, 0);
839                 if (ret != 0) {
840                         mlx5_txq_release(dev, i);
841                         goto error;
842                 }
843                 mlx5_txq_release(dev, i);
844         }
845         return 0;
846 error:
847         /*
848          * Do roll-back process for the queues already bound.
849          * No need to check the return value of the queue unbind function.
850          */
851         do {
852                 /* No validation is needed here. */
853                 txq_ctrl = mlx5_txq_get(dev, i);
854                 if (txq_ctrl == NULL)
855                         continue;
856                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
857                 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
858                 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
859                 mlx5_txq_release(dev, i);
860         } while (i--);
861         return ret;
862 }
863
864 /*
865  * Unbind the hairpin port pair, HW configuration of both devices will be clear
866  * and status will be reset for all the queues used between the them.
867  * This function only supports to unbind the Tx from one Rx.
868  *
869  * @param dev
870  *   Pointer to Ethernet device structure.
871  * @param rx_port
872  *   Port identifier of the Rx port.
873  *
874  * @return
875  *   0 on success, a negative errno value otherwise and rte_errno is set.
876  */
877 static int
878 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
879 {
880         struct mlx5_priv *priv = dev->data->dev_private;
881         struct mlx5_txq_ctrl *txq_ctrl;
882         uint32_t i;
883         int ret;
884         uint16_t cur_port = priv->dev_data->port_id;
885
886         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
887                 rte_errno = ENODEV;
888                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
889                 return -rte_errno;
890         }
891         for (i = 0; i != priv->txqs_n; i++) {
892                 uint16_t rx_queue;
893
894                 txq_ctrl = mlx5_txq_get(dev, i);
895                 if (txq_ctrl == NULL)
896                         continue;
897                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
898                         mlx5_txq_release(dev, i);
899                         continue;
900                 }
901                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
902                         mlx5_txq_release(dev, i);
903                         continue;
904                 }
905                 /* Indeed, only the first used queue needs to be checked. */
906                 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
907                         if (cur_port != rx_port) {
908                                 rte_errno = EINVAL;
909                                 DRV_LOG(ERR, "port %u and port %u are in"
910                                         " auto-bind mode", cur_port, rx_port);
911                                 mlx5_txq_release(dev, i);
912                                 return -rte_errno;
913                         } else {
914                                 return 0;
915                         }
916                 }
917                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
918                 mlx5_txq_release(dev, i);
919                 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
920                 if (ret) {
921                         DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
922                                 rx_port, rx_queue);
923                         return ret;
924                 }
925                 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
926                 if (ret) {
927                         DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
928                                 cur_port, i);
929                         return ret;
930                 }
931         }
932         return 0;
933 }
934
935 /*
936  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
937  * @see mlx5_hairpin_bind_single_port()
938  */
939 int
940 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
941 {
942         int ret = 0;
943         uint16_t p, pp;
944
945         /*
946          * If the Rx port has no hairpin configuration with the current port,
947          * the binding will be skipped in the called function of single port.
948          * Device started status will be checked only before the queue
949          * information updating.
950          */
951         if (rx_port == RTE_MAX_ETHPORTS) {
952                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
953                         ret = mlx5_hairpin_bind_single_port(dev, p);
954                         if (ret != 0)
955                                 goto unbind;
956                 }
957                 return ret;
958         } else {
959                 return mlx5_hairpin_bind_single_port(dev, rx_port);
960         }
961 unbind:
962         MLX5_ETH_FOREACH_DEV(pp, dev->device)
963                 if (pp < p)
964                         mlx5_hairpin_unbind_single_port(dev, pp);
965         return ret;
966 }
967
968 /*
969  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
970  * @see mlx5_hairpin_unbind_single_port()
971  */
972 int
973 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
974 {
975         int ret = 0;
976         uint16_t p;
977
978         if (rx_port == RTE_MAX_ETHPORTS)
979                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
980                         ret = mlx5_hairpin_unbind_single_port(dev, p);
981                         if (ret != 0)
982                                 return ret;
983                 }
984         else
985                 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
986         return ret;
987 }
988
989 /*
990  * DPDK callback to get the hairpin peer ports list.
991  * This will return the actual number of peer ports and save the identifiers
992  * into the array (sorted, may be different from that when setting up the
993  * hairpin peer queues).
994  * The peer port ID could be the same as the port ID of the current device.
995  *
996  * @param dev
997  *   Pointer to Ethernet device structure.
998  * @param peer_ports
999  *   Pointer to array to save the port identifiers.
1000  * @param len
1001  *   The length of the array.
1002  * @param direction
1003  *   Current port to peer port direction.
1004  *   positive - current used as Tx to get all peer Rx ports.
1005  *   zero - current used as Rx to get all peer Tx ports.
1006  *
1007  * @return
1008  *   0 or positive value on success, actual number of peer ports.
1009  *   a negative errno value otherwise and rte_errno is set.
1010  */
1011 int
1012 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1013                             size_t len, uint32_t direction)
1014 {
1015         struct mlx5_priv *priv = dev->data->dev_private;
1016         struct mlx5_txq_ctrl *txq_ctrl;
1017         struct mlx5_rxq_ctrl *rxq_ctrl;
1018         uint32_t i;
1019         uint16_t pp;
1020         uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1021         int ret = 0;
1022
1023         if (direction) {
1024                 for (i = 0; i < priv->txqs_n; i++) {
1025                         txq_ctrl = mlx5_txq_get(dev, i);
1026                         if (!txq_ctrl)
1027                                 continue;
1028                         if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
1029                                 mlx5_txq_release(dev, i);
1030                                 continue;
1031                         }
1032                         pp = txq_ctrl->hairpin_conf.peers[0].port;
1033                         if (pp >= RTE_MAX_ETHPORTS) {
1034                                 rte_errno = ERANGE;
1035                                 mlx5_txq_release(dev, i);
1036                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1037                                         "out of range %hu",
1038                                         priv->dev_data->port_id, i, pp);
1039                                 return -rte_errno;
1040                         }
1041                         bits[pp / 32] |= 1 << (pp % 32);
1042                         mlx5_txq_release(dev, i);
1043                 }
1044         } else {
1045                 for (i = 0; i < priv->rxqs_n; i++) {
1046                         rxq_ctrl = mlx5_rxq_get(dev, i);
1047                         if (!rxq_ctrl)
1048                                 continue;
1049                         if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
1050                                 mlx5_rxq_release(dev, i);
1051                                 continue;
1052                         }
1053                         pp = rxq_ctrl->hairpin_conf.peers[0].port;
1054                         if (pp >= RTE_MAX_ETHPORTS) {
1055                                 rte_errno = ERANGE;
1056                                 mlx5_rxq_release(dev, i);
1057                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1058                                         "out of range %hu",
1059                                         priv->dev_data->port_id, i, pp);
1060                                 return -rte_errno;
1061                         }
1062                         bits[pp / 32] |= 1 << (pp % 32);
1063                         mlx5_rxq_release(dev, i);
1064                 }
1065         }
1066         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1067                 if (bits[i / 32] & (1 << (i % 32))) {
1068                         if ((size_t)ret >= len) {
1069                                 rte_errno = E2BIG;
1070                                 return -rte_errno;
1071                         }
1072                         peer_ports[ret++] = i;
1073                 }
1074         }
1075         return ret;
1076 }
1077
1078 /**
1079  * DPDK callback to start the device.
1080  *
1081  * Simulate device start by attaching all configured flows.
1082  *
1083  * @param dev
1084  *   Pointer to Ethernet device structure.
1085  *
1086  * @return
1087  *   0 on success, a negative errno value otherwise and rte_errno is set.
1088  */
1089 int
1090 mlx5_dev_start(struct rte_eth_dev *dev)
1091 {
1092         struct mlx5_priv *priv = dev->data->dev_private;
1093         int ret;
1094         int fine_inline;
1095
1096         DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1097         fine_inline = rte_mbuf_dynflag_lookup
1098                 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1099         if (fine_inline >= 0)
1100                 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1101         else
1102                 rte_net_mlx5_dynf_inline_mask = 0;
1103         if (dev->data->nb_rx_queues > 0) {
1104                 ret = mlx5_dev_configure_rss_reta(dev);
1105                 if (ret) {
1106                         DRV_LOG(ERR, "port %u reta config failed: %s",
1107                                 dev->data->port_id, strerror(rte_errno));
1108                         return -rte_errno;
1109                 }
1110         }
1111         ret = mlx5_txpp_start(dev);
1112         if (ret) {
1113                 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1114                         dev->data->port_id, strerror(rte_errno));
1115                 goto error;
1116         }
1117         if ((priv->sh->devx && priv->config.dv_flow_en &&
1118             priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1119                 ret = priv->obj_ops.lb_dummy_queue_create(dev);
1120                 if (ret)
1121                         goto error;
1122         }
1123         ret = mlx5_txq_start(dev);
1124         if (ret) {
1125                 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1126                         dev->data->port_id, strerror(rte_errno));
1127                 goto error;
1128         }
1129         ret = mlx5_rxq_start(dev);
1130         if (ret) {
1131                 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1132                         dev->data->port_id, strerror(rte_errno));
1133                 goto error;
1134         }
1135         /*
1136          * Such step will be skipped if there is no hairpin TX queue configured
1137          * with RX peer queue from the same device.
1138          */
1139         ret = mlx5_hairpin_auto_bind(dev);
1140         if (ret) {
1141                 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1142                         dev->data->port_id, strerror(rte_errno));
1143                 goto error;
1144         }
1145         /* Set started flag here for the following steps like control flow. */
1146         dev->data->dev_started = 1;
1147         ret = mlx5_rx_intr_vec_enable(dev);
1148         if (ret) {
1149                 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1150                         dev->data->port_id);
1151                 goto error;
1152         }
1153         mlx5_os_stats_init(dev);
1154         ret = mlx5_traffic_enable(dev);
1155         if (ret) {
1156                 DRV_LOG(ERR, "port %u failed to set defaults flows",
1157                         dev->data->port_id);
1158                 goto error;
1159         }
1160         /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1161         mlx5_flow_rxq_dynf_metadata_set(dev);
1162         /* Set flags and context to convert Rx timestamps. */
1163         mlx5_rxq_timestamp_set(dev);
1164         /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1165         mlx5_txq_dynf_timestamp_set(dev);
1166         /* Attach indirection table objects detached on port stop. */
1167         ret = mlx5_action_handle_attach(dev);
1168         if (ret) {
1169                 DRV_LOG(ERR,
1170                         "port %u failed to attach indirect actions: %s",
1171                         dev->data->port_id, rte_strerror(rte_errno));
1172                 goto error;
1173         }
1174         /*
1175          * In non-cached mode, it only needs to start the default mreg copy
1176          * action and no flow created by application exists anymore.
1177          * But it is worth wrapping the interface for further usage.
1178          */
1179         ret = mlx5_flow_start_default(dev);
1180         if (ret) {
1181                 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1182                         dev->data->port_id, strerror(rte_errno));
1183                 goto error;
1184         }
1185         if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1186                 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1187                         dev->data->port_id, rte_strerror(rte_errno));
1188                 goto error;
1189         }
1190         rte_wmb();
1191         dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1192         dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1193         /* Enable datapath on secondary process. */
1194         mlx5_mp_os_req_start_rxtx(dev);
1195         if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) {
1196                 priv->sh->port[priv->dev_port - 1].ih_port_id =
1197                                         (uint32_t)dev->data->port_id;
1198         } else {
1199                 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1200                         dev->data->port_id);
1201                 dev->data->dev_conf.intr_conf.lsc = 0;
1202                 dev->data->dev_conf.intr_conf.rmv = 0;
1203         }
1204         if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
1205                 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1206                                         (uint32_t)dev->data->port_id;
1207         return 0;
1208 error:
1209         ret = rte_errno; /* Save rte_errno before cleanup. */
1210         /* Rollback. */
1211         dev->data->dev_started = 0;
1212         mlx5_flow_stop_default(dev);
1213         mlx5_traffic_disable(dev);
1214         mlx5_txq_stop(dev);
1215         mlx5_rxq_stop(dev);
1216         if (priv->obj_ops.lb_dummy_queue_release)
1217                 priv->obj_ops.lb_dummy_queue_release(dev);
1218         mlx5_txpp_stop(dev); /* Stop last. */
1219         rte_errno = ret; /* Restore rte_errno. */
1220         return -rte_errno;
1221 }
1222
1223 /**
1224  * DPDK callback to stop the device.
1225  *
1226  * Simulate device stop by detaching all configured flows.
1227  *
1228  * @param dev
1229  *   Pointer to Ethernet device structure.
1230  */
1231 int
1232 mlx5_dev_stop(struct rte_eth_dev *dev)
1233 {
1234         struct mlx5_priv *priv = dev->data->dev_private;
1235
1236         dev->data->dev_started = 0;
1237         /* Prevent crashes when queues are still in use. */
1238         dev->rx_pkt_burst = removed_rx_burst;
1239         dev->tx_pkt_burst = removed_tx_burst;
1240         rte_wmb();
1241         /* Disable datapath on secondary process. */
1242         mlx5_mp_os_req_stop_rxtx(dev);
1243         rte_delay_us_sleep(1000 * priv->rxqs_n);
1244         DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1245         mlx5_flow_stop_default(dev);
1246         /* Control flows for default traffic can be removed firstly. */
1247         mlx5_traffic_disable(dev);
1248         /* All RX queue flags will be cleared in the flush interface. */
1249         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1250         mlx5_flow_meter_rxq_flush(dev);
1251         mlx5_action_handle_detach(dev);
1252         mlx5_rx_intr_vec_disable(dev);
1253         priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1254         priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1255         mlx5_txq_stop(dev);
1256         mlx5_rxq_stop(dev);
1257         if (priv->obj_ops.lb_dummy_queue_release)
1258                 priv->obj_ops.lb_dummy_queue_release(dev);
1259         mlx5_txpp_stop(dev);
1260
1261         return 0;
1262 }
1263
1264 /**
1265  * Enable traffic flows configured by control plane
1266  *
1267  * @param dev
1268  *   Pointer to Ethernet device private data.
1269  * @param dev
1270  *   Pointer to Ethernet device structure.
1271  *
1272  * @return
1273  *   0 on success, a negative errno value otherwise and rte_errno is set.
1274  */
1275 int
1276 mlx5_traffic_enable(struct rte_eth_dev *dev)
1277 {
1278         struct mlx5_priv *priv = dev->data->dev_private;
1279         struct rte_flow_item_eth bcast = {
1280                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1281         };
1282         struct rte_flow_item_eth ipv6_multi_spec = {
1283                 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1284         };
1285         struct rte_flow_item_eth ipv6_multi_mask = {
1286                 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1287         };
1288         struct rte_flow_item_eth unicast = {
1289                 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1290         };
1291         struct rte_flow_item_eth unicast_mask = {
1292                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1293         };
1294         const unsigned int vlan_filter_n = priv->vlan_filter_n;
1295         const struct rte_ether_addr cmp = {
1296                 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1297         };
1298         unsigned int i;
1299         unsigned int j;
1300         int ret;
1301
1302         /*
1303          * Hairpin txq default flow should be created no matter if it is
1304          * isolation mode. Or else all the packets to be sent will be sent
1305          * out directly without the TX flow actions, e.g. encapsulation.
1306          */
1307         for (i = 0; i != priv->txqs_n; ++i) {
1308                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1309                 if (!txq_ctrl)
1310                         continue;
1311                 /* Only Tx implicit mode requires the default Tx flow. */
1312                 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1313                     txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1314                     txq_ctrl->hairpin_conf.peers[0].port ==
1315                     priv->dev_data->port_id) {
1316                         ret = mlx5_ctrl_flow_source_queue(dev, i);
1317                         if (ret) {
1318                                 mlx5_txq_release(dev, i);
1319                                 goto error;
1320                         }
1321                 }
1322                 if ((priv->representor || priv->master) &&
1323                     priv->config.dv_esw_en) {
1324                         if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) {
1325                                 DRV_LOG(ERR,
1326                                         "Port %u Tx queue %u SQ create representor devx default miss rule failed.",
1327                                         dev->data->port_id, i);
1328                                 goto error;
1329                         }
1330                 }
1331                 mlx5_txq_release(dev, i);
1332         }
1333         if ((priv->master || priv->representor) && priv->config.dv_esw_en) {
1334                 if (mlx5_flow_create_esw_table_zero_flow(dev))
1335                         priv->fdb_def_rule = 1;
1336                 else
1337                         DRV_LOG(INFO, "port %u FDB default rule cannot be"
1338                                 " configured - only Eswitch group 0 flows are"
1339                                 " supported.", dev->data->port_id);
1340         }
1341         if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1342                 ret = mlx5_flow_lacp_miss(dev);
1343                 if (ret)
1344                         DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1345                                 "forward LACP to kernel.", dev->data->port_id);
1346                 else
1347                         DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1348                                 , dev->data->port_id);
1349         }
1350         if (priv->isolated)
1351                 return 0;
1352         if (dev->data->promiscuous) {
1353                 struct rte_flow_item_eth promisc = {
1354                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1355                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1356                         .type = 0,
1357                 };
1358
1359                 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1360                 if (ret)
1361                         goto error;
1362         }
1363         if (dev->data->all_multicast) {
1364                 struct rte_flow_item_eth multicast = {
1365                         .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1366                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1367                         .type = 0,
1368                 };
1369
1370                 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1371                 if (ret)
1372                         goto error;
1373         } else {
1374                 /* Add broadcast/multicast flows. */
1375                 for (i = 0; i != vlan_filter_n; ++i) {
1376                         uint16_t vlan = priv->vlan_filter[i];
1377
1378                         struct rte_flow_item_vlan vlan_spec = {
1379                                 .tci = rte_cpu_to_be_16(vlan),
1380                         };
1381                         struct rte_flow_item_vlan vlan_mask =
1382                                 rte_flow_item_vlan_mask;
1383
1384                         ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1385                                                   &vlan_spec, &vlan_mask);
1386                         if (ret)
1387                                 goto error;
1388                         ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1389                                                   &ipv6_multi_mask,
1390                                                   &vlan_spec, &vlan_mask);
1391                         if (ret)
1392                                 goto error;
1393                 }
1394                 if (!vlan_filter_n) {
1395                         ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1396                         if (ret)
1397                                 goto error;
1398                         ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1399                                              &ipv6_multi_mask);
1400                         if (ret) {
1401                                 /* Do not fail on IPv6 broadcast creation failure. */
1402                                 DRV_LOG(WARNING,
1403                                         "IPv6 broadcast is not supported");
1404                                 ret = 0;
1405                         }
1406                 }
1407         }
1408         /* Add MAC address flows. */
1409         for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1410                 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1411
1412                 if (!memcmp(mac, &cmp, sizeof(*mac)))
1413                         continue;
1414                 memcpy(&unicast.dst.addr_bytes,
1415                        mac->addr_bytes,
1416                        RTE_ETHER_ADDR_LEN);
1417                 for (j = 0; j != vlan_filter_n; ++j) {
1418                         uint16_t vlan = priv->vlan_filter[j];
1419
1420                         struct rte_flow_item_vlan vlan_spec = {
1421                                 .tci = rte_cpu_to_be_16(vlan),
1422                         };
1423                         struct rte_flow_item_vlan vlan_mask =
1424                                 rte_flow_item_vlan_mask;
1425
1426                         ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1427                                                   &unicast_mask,
1428                                                   &vlan_spec,
1429                                                   &vlan_mask);
1430                         if (ret)
1431                                 goto error;
1432                 }
1433                 if (!vlan_filter_n) {
1434                         ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1435                         if (ret)
1436                                 goto error;
1437                 }
1438         }
1439         return 0;
1440 error:
1441         ret = rte_errno; /* Save rte_errno before cleanup. */
1442         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1443         rte_errno = ret; /* Restore rte_errno. */
1444         return -rte_errno;
1445 }
1446
1447
1448 /**
1449  * Disable traffic flows configured by control plane
1450  *
1451  * @param dev
1452  *   Pointer to Ethernet device private data.
1453  */
1454 void
1455 mlx5_traffic_disable(struct rte_eth_dev *dev)
1456 {
1457         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1458 }
1459
1460 /**
1461  * Restart traffic flows configured by control plane
1462  *
1463  * @param dev
1464  *   Pointer to Ethernet device private data.
1465  *
1466  * @return
1467  *   0 on success, a negative errno value otherwise and rte_errno is set.
1468  */
1469 int
1470 mlx5_traffic_restart(struct rte_eth_dev *dev)
1471 {
1472         if (dev->data->dev_started) {
1473                 mlx5_traffic_disable(dev);
1474                 return mlx5_traffic_enable(dev);
1475         }
1476         return 0;
1477 }