1952d684449a3003e320c67b8eeb2c77884f5a9e
[dpdk.git] / drivers / net / mlx5 / mlx5_trigger.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5
6 #include <unistd.h>
7
8 #include <rte_ether.h>
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13
14 #include <mlx5_malloc.h>
15
16 #include "mlx5.h"
17 #include "mlx5_flow.h"
18 #include "mlx5_rx.h"
19 #include "mlx5_tx.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
22
23 /**
24  * Stop traffic on Tx queues.
25  *
26  * @param dev
27  *   Pointer to Ethernet device structure.
28  */
29 static void
30 mlx5_txq_stop(struct rte_eth_dev *dev)
31 {
32         struct mlx5_priv *priv = dev->data->dev_private;
33         unsigned int i;
34
35         for (i = 0; i != priv->txqs_n; ++i)
36                 mlx5_txq_release(dev, i);
37 }
38
39 /**
40  * Start traffic on Tx queues.
41  *
42  * @param dev
43  *   Pointer to Ethernet device structure.
44  *
45  * @return
46  *   0 on success, a negative errno value otherwise and rte_errno is set.
47  */
48 static int
49 mlx5_txq_start(struct rte_eth_dev *dev)
50 {
51         struct mlx5_priv *priv = dev->data->dev_private;
52         unsigned int i;
53         int ret;
54
55         for (i = 0; i != priv->txqs_n; ++i) {
56                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57                 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58                 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
59
60                 if (!txq_ctrl)
61                         continue;
62                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
63                         txq_alloc_elts(txq_ctrl);
64                 MLX5_ASSERT(!txq_ctrl->obj);
65                 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66                                             0, txq_ctrl->socket);
67                 if (!txq_ctrl->obj) {
68                         DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69                                 "memory resources.", dev->data->port_id,
70                                 txq_data->idx);
71                         rte_errno = ENOMEM;
72                         goto error;
73                 }
74                 ret = priv->obj_ops.txq_obj_new(dev, i);
75                 if (ret < 0) {
76                         mlx5_free(txq_ctrl->obj);
77                         txq_ctrl->obj = NULL;
78                         goto error;
79                 }
80                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
81                         size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
82
83                         txq_data->fcqs = mlx5_malloc(flags, size,
84                                                      RTE_CACHE_LINE_SIZE,
85                                                      txq_ctrl->socket);
86                         if (!txq_data->fcqs) {
87                                 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88                                         "allocate memory (FCQ).",
89                                         dev->data->port_id, i);
90                                 rte_errno = ENOMEM;
91                                 goto error;
92                         }
93                 }
94                 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95                         dev->data->port_id, i, (void *)&txq_ctrl->obj);
96                 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97         }
98         return 0;
99 error:
100         ret = rte_errno; /* Save rte_errno before cleanup. */
101         do {
102                 mlx5_txq_release(dev, i);
103         } while (i-- != 0);
104         rte_errno = ret; /* Restore rte_errno. */
105         return -rte_errno;
106 }
107
108 /**
109  * Translate the chunk address to MR key in order to put in into the cache.
110  */
111 static void
112 mlx5_rxq_mempool_register_cb(struct rte_mempool *mp, void *opaque,
113                              struct rte_mempool_memhdr *memhdr,
114                              unsigned int idx)
115 {
116         struct mlx5_rxq_data *rxq = opaque;
117
118         RTE_SET_USED(mp);
119         RTE_SET_USED(idx);
120         mlx5_rx_addr2mr(rxq, (uintptr_t)memhdr->addr);
121 }
122
123 /**
124  * Register Rx queue mempools and fill the Rx queue cache.
125  * This function tolerates repeated mempool registration.
126  *
127  * @param[in] rxq_ctrl
128  *   Rx queue control data.
129  *
130  * @return
131  *   0 on success, (-1) on failure and rte_errno is set.
132  */
133 static int
134 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
135 {
136         struct rte_mempool *mp;
137         uint32_t s;
138         int ret = 0;
139
140         mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
141         /* MPRQ mempool is registered on creation, just fill the cache. */
142         if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
143                 rte_mempool_mem_iter(rxq_ctrl->rxq.mprq_mp,
144                                      mlx5_rxq_mempool_register_cb,
145                                      &rxq_ctrl->rxq);
146                 return 0;
147         }
148         for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
149                 uint32_t flags;
150
151                 mp = rxq_ctrl->rxq.rxseg[s].mp;
152                 flags = rte_pktmbuf_priv_flags(mp);
153                 ret = mlx5_mr_mempool_register(rxq_ctrl->sh->cdev, mp);
154                 if (ret < 0 && rte_errno != EEXIST)
155                         return ret;
156                 if ((flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) == 0)
157                         rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb,
158                                         &rxq_ctrl->rxq);
159         }
160         return 0;
161 }
162
163 /**
164  * Stop traffic on Rx queues.
165  *
166  * @param dev
167  *   Pointer to Ethernet device structure.
168  */
169 static void
170 mlx5_rxq_stop(struct rte_eth_dev *dev)
171 {
172         struct mlx5_priv *priv = dev->data->dev_private;
173         unsigned int i;
174
175         for (i = 0; i != priv->rxqs_n; ++i)
176                 mlx5_rxq_release(dev, i);
177 }
178
179 static int
180 mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
181                       unsigned int idx)
182 {
183         int ret = 0;
184
185         if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
186                 /*
187                  * Pre-register the mempools. Regardless of whether
188                  * the implicit registration is enabled or not,
189                  * Rx mempool destruction is tracked to free MRs.
190                  */
191                 if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
192                         return -rte_errno;
193                 ret = rxq_alloc_elts(rxq_ctrl);
194                 if (ret)
195                         return ret;
196         }
197         MLX5_ASSERT(!rxq_ctrl->obj);
198         rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
199                                     sizeof(*rxq_ctrl->obj), 0,
200                                     rxq_ctrl->socket);
201         if (!rxq_ctrl->obj) {
202                 DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.",
203                         dev->data->port_id, idx);
204                 rte_errno = ENOMEM;
205                 return -rte_errno;
206         }
207         DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", dev->data->port_id,
208                 idx, (void *)&rxq_ctrl->obj);
209         return 0;
210 }
211
212 /**
213  * Start traffic on Rx queues.
214  *
215  * @param dev
216  *   Pointer to Ethernet device structure.
217  *
218  * @return
219  *   0 on success, a negative errno value otherwise and rte_errno is set.
220  */
221 static int
222 mlx5_rxq_start(struct rte_eth_dev *dev)
223 {
224         struct mlx5_priv *priv = dev->data->dev_private;
225         unsigned int i;
226         int ret = 0;
227
228         /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
229         if (mlx5_mprq_alloc_mp(dev)) {
230                 /* Should not release Rx queues but return immediately. */
231                 return -rte_errno;
232         }
233         DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
234                 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
235         DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
236                 dev->data->port_id, priv->sh->device_attr.max_sge);
237         for (i = 0; i != priv->rxqs_n; ++i) {
238                 struct mlx5_rxq_priv *rxq = mlx5_rxq_ref(dev, i);
239                 struct mlx5_rxq_ctrl *rxq_ctrl;
240
241                 if (rxq == NULL)
242                         continue;
243                 rxq_ctrl = rxq->ctrl;
244                 if (!rxq_ctrl->started) {
245                         if (mlx5_rxq_ctrl_prepare(dev, rxq_ctrl, i) < 0)
246                                 goto error;
247                         LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
248                 }
249                 ret = priv->obj_ops.rxq_obj_new(rxq);
250                 if (ret) {
251                         mlx5_free(rxq_ctrl->obj);
252                         rxq_ctrl->obj = NULL;
253                         goto error;
254                 }
255                 rxq_ctrl->started = true;
256         }
257         return 0;
258 error:
259         ret = rte_errno; /* Save rte_errno before cleanup. */
260         do {
261                 mlx5_rxq_release(dev, i);
262         } while (i-- != 0);
263         rte_errno = ret; /* Restore rte_errno. */
264         return -rte_errno;
265 }
266
267 /**
268  * Binds Tx queues to Rx queues for hairpin.
269  *
270  * Binds Tx queues to the target Rx queues.
271  *
272  * @param dev
273  *   Pointer to Ethernet device structure.
274  *
275  * @return
276  *   0 on success, a negative errno value otherwise and rte_errno is set.
277  */
278 static int
279 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
280 {
281         struct mlx5_priv *priv = dev->data->dev_private;
282         struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
283         struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
284         struct mlx5_txq_ctrl *txq_ctrl;
285         struct mlx5_rxq_priv *rxq;
286         struct mlx5_rxq_ctrl *rxq_ctrl;
287         struct mlx5_devx_obj *sq;
288         struct mlx5_devx_obj *rq;
289         unsigned int i;
290         int ret = 0;
291         bool need_auto = false;
292         uint16_t self_port = dev->data->port_id;
293
294         for (i = 0; i != priv->txqs_n; ++i) {
295                 txq_ctrl = mlx5_txq_get(dev, i);
296                 if (!txq_ctrl)
297                         continue;
298                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
299                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
300                         mlx5_txq_release(dev, i);
301                         continue;
302                 }
303                 if (txq_ctrl->hairpin_conf.manual_bind) {
304                         mlx5_txq_release(dev, i);
305                         return 0;
306                 }
307                 need_auto = true;
308                 mlx5_txq_release(dev, i);
309         }
310         if (!need_auto)
311                 return 0;
312         for (i = 0; i != priv->txqs_n; ++i) {
313                 txq_ctrl = mlx5_txq_get(dev, i);
314                 if (!txq_ctrl)
315                         continue;
316                 /* Skip hairpin queues with other peer ports. */
317                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
318                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
319                         mlx5_txq_release(dev, i);
320                         continue;
321                 }
322                 if (!txq_ctrl->obj) {
323                         rte_errno = ENOMEM;
324                         DRV_LOG(ERR, "port %u no txq object found: %d",
325                                 dev->data->port_id, i);
326                         mlx5_txq_release(dev, i);
327                         return -rte_errno;
328                 }
329                 sq = txq_ctrl->obj->sq;
330                 rxq = mlx5_rxq_get(dev, txq_ctrl->hairpin_conf.peers[0].queue);
331                 if (rxq == NULL) {
332                         mlx5_txq_release(dev, i);
333                         rte_errno = EINVAL;
334                         DRV_LOG(ERR, "port %u no rxq object found: %d",
335                                 dev->data->port_id,
336                                 txq_ctrl->hairpin_conf.peers[0].queue);
337                         return -rte_errno;
338                 }
339                 rxq_ctrl = rxq->ctrl;
340                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
341                     rxq->hairpin_conf.peers[0].queue != i) {
342                         rte_errno = ENOMEM;
343                         DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
344                                 "Rx queue %d", dev->data->port_id,
345                                 i, txq_ctrl->hairpin_conf.peers[0].queue);
346                         goto error;
347                 }
348                 rq = rxq_ctrl->obj->rq;
349                 if (!rq) {
350                         rte_errno = ENOMEM;
351                         DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
352                                 dev->data->port_id,
353                                 txq_ctrl->hairpin_conf.peers[0].queue);
354                         goto error;
355                 }
356                 sq_attr.state = MLX5_SQC_STATE_RDY;
357                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
358                 sq_attr.hairpin_peer_rq = rq->id;
359                 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
360                 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
361                 if (ret)
362                         goto error;
363                 rq_attr.state = MLX5_SQC_STATE_RDY;
364                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
365                 rq_attr.hairpin_peer_sq = sq->id;
366                 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
367                 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
368                 if (ret)
369                         goto error;
370                 /* Qs with auto-bind will be destroyed directly. */
371                 rxq->hairpin_status = 1;
372                 txq_ctrl->hairpin_status = 1;
373                 mlx5_txq_release(dev, i);
374         }
375         return 0;
376 error:
377         mlx5_txq_release(dev, i);
378         return -rte_errno;
379 }
380
381 /*
382  * Fetch the peer queue's SW & HW information.
383  *
384  * @param dev
385  *   Pointer to Ethernet device structure.
386  * @param peer_queue
387  *   Index of the queue to fetch the information.
388  * @param current_info
389  *   Pointer to the input peer information, not used currently.
390  * @param peer_info
391  *   Pointer to the structure to store the information, output.
392  * @param direction
393  *   Positive to get the RxQ information, zero to get the TxQ information.
394  *
395  * @return
396  *   0 on success, a negative errno value otherwise and rte_errno is set.
397  */
398 int
399 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
400                                struct rte_hairpin_peer_info *current_info,
401                                struct rte_hairpin_peer_info *peer_info,
402                                uint32_t direction)
403 {
404         struct mlx5_priv *priv = dev->data->dev_private;
405         RTE_SET_USED(current_info);
406
407         if (dev->data->dev_started == 0) {
408                 rte_errno = EBUSY;
409                 DRV_LOG(ERR, "peer port %u is not started",
410                         dev->data->port_id);
411                 return -rte_errno;
412         }
413         /*
414          * Peer port used as egress. In the current design, hairpin Tx queue
415          * will be bound to the peer Rx queue. Indeed, only the information of
416          * peer Rx queue needs to be fetched.
417          */
418         if (direction == 0) {
419                 struct mlx5_txq_ctrl *txq_ctrl;
420
421                 txq_ctrl = mlx5_txq_get(dev, peer_queue);
422                 if (txq_ctrl == NULL) {
423                         rte_errno = EINVAL;
424                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
425                                 dev->data->port_id, peer_queue);
426                         return -rte_errno;
427                 }
428                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
429                         rte_errno = EINVAL;
430                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
431                                 dev->data->port_id, peer_queue);
432                         mlx5_txq_release(dev, peer_queue);
433                         return -rte_errno;
434                 }
435                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
436                         rte_errno = ENOMEM;
437                         DRV_LOG(ERR, "port %u no Txq object found: %d",
438                                 dev->data->port_id, peer_queue);
439                         mlx5_txq_release(dev, peer_queue);
440                         return -rte_errno;
441                 }
442                 peer_info->qp_id = txq_ctrl->obj->sq->id;
443                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
444                 /* 1-to-1 mapping, only the first one is used. */
445                 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
446                 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
447                 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
448                 mlx5_txq_release(dev, peer_queue);
449         } else { /* Peer port used as ingress. */
450                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, peer_queue);
451                 struct mlx5_rxq_ctrl *rxq_ctrl;
452
453                 if (rxq == NULL) {
454                         rte_errno = EINVAL;
455                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
456                                 dev->data->port_id, peer_queue);
457                         return -rte_errno;
458                 }
459                 rxq_ctrl = rxq->ctrl;
460                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
461                         rte_errno = EINVAL;
462                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
463                                 dev->data->port_id, peer_queue);
464                         return -rte_errno;
465                 }
466                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
467                         rte_errno = ENOMEM;
468                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
469                                 dev->data->port_id, peer_queue);
470                         return -rte_errno;
471                 }
472                 peer_info->qp_id = rxq_ctrl->obj->rq->id;
473                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
474                 peer_info->peer_q = rxq->hairpin_conf.peers[0].queue;
475                 peer_info->tx_explicit = rxq->hairpin_conf.tx_explicit;
476                 peer_info->manual_bind = rxq->hairpin_conf.manual_bind;
477         }
478         return 0;
479 }
480
481 /*
482  * Bind the hairpin queue with the peer HW information.
483  * This needs to be called twice both for Tx and Rx queues of a pair.
484  * If the queue is already bound, it is considered successful.
485  *
486  * @param dev
487  *   Pointer to Ethernet device structure.
488  * @param cur_queue
489  *   Index of the queue to change the HW configuration to bind.
490  * @param peer_info
491  *   Pointer to information of the peer queue.
492  * @param direction
493  *   Positive to configure the TxQ, zero to configure the RxQ.
494  *
495  * @return
496  *   0 on success, a negative errno value otherwise and rte_errno is set.
497  */
498 int
499 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
500                              struct rte_hairpin_peer_info *peer_info,
501                              uint32_t direction)
502 {
503         int ret = 0;
504
505         /*
506          * Consistency checking of the peer queue: opposite direction is used
507          * to get the peer queue info with ethdev port ID, no need to check.
508          */
509         if (peer_info->peer_q != cur_queue) {
510                 rte_errno = EINVAL;
511                 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
512                         dev->data->port_id, cur_queue, peer_info->peer_q);
513                 return -rte_errno;
514         }
515         if (direction != 0) {
516                 struct mlx5_txq_ctrl *txq_ctrl;
517                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
518
519                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
520                 if (txq_ctrl == NULL) {
521                         rte_errno = EINVAL;
522                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
523                                 dev->data->port_id, cur_queue);
524                         return -rte_errno;
525                 }
526                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
527                         rte_errno = EINVAL;
528                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
529                                 dev->data->port_id, cur_queue);
530                         mlx5_txq_release(dev, cur_queue);
531                         return -rte_errno;
532                 }
533                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
534                         rte_errno = ENOMEM;
535                         DRV_LOG(ERR, "port %u no Txq object found: %d",
536                                 dev->data->port_id, cur_queue);
537                         mlx5_txq_release(dev, cur_queue);
538                         return -rte_errno;
539                 }
540                 if (txq_ctrl->hairpin_status != 0) {
541                         DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
542                                 dev->data->port_id, cur_queue);
543                         mlx5_txq_release(dev, cur_queue);
544                         return 0;
545                 }
546                 /*
547                  * All queues' of one port consistency checking is done in the
548                  * bind() function, and that is optional.
549                  */
550                 if (peer_info->tx_explicit !=
551                     txq_ctrl->hairpin_conf.tx_explicit) {
552                         rte_errno = EINVAL;
553                         DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
554                                 " mismatch", dev->data->port_id, cur_queue);
555                         mlx5_txq_release(dev, cur_queue);
556                         return -rte_errno;
557                 }
558                 if (peer_info->manual_bind !=
559                     txq_ctrl->hairpin_conf.manual_bind) {
560                         rte_errno = EINVAL;
561                         DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
562                                 " mismatch", dev->data->port_id, cur_queue);
563                         mlx5_txq_release(dev, cur_queue);
564                         return -rte_errno;
565                 }
566                 sq_attr.state = MLX5_SQC_STATE_RDY;
567                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
568                 sq_attr.hairpin_peer_rq = peer_info->qp_id;
569                 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
570                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
571                 if (ret == 0)
572                         txq_ctrl->hairpin_status = 1;
573                 mlx5_txq_release(dev, cur_queue);
574         } else {
575                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
576                 struct mlx5_rxq_ctrl *rxq_ctrl;
577                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
578
579                 if (rxq == NULL) {
580                         rte_errno = EINVAL;
581                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
582                                 dev->data->port_id, cur_queue);
583                         return -rte_errno;
584                 }
585                 rxq_ctrl = rxq->ctrl;
586                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
587                         rte_errno = EINVAL;
588                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
589                                 dev->data->port_id, cur_queue);
590                         return -rte_errno;
591                 }
592                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
593                         rte_errno = ENOMEM;
594                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
595                                 dev->data->port_id, cur_queue);
596                         return -rte_errno;
597                 }
598                 if (rxq->hairpin_status != 0) {
599                         DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
600                                 dev->data->port_id, cur_queue);
601                         return 0;
602                 }
603                 if (peer_info->tx_explicit !=
604                     rxq->hairpin_conf.tx_explicit) {
605                         rte_errno = EINVAL;
606                         DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
607                                 " mismatch", dev->data->port_id, cur_queue);
608                         return -rte_errno;
609                 }
610                 if (peer_info->manual_bind !=
611                     rxq->hairpin_conf.manual_bind) {
612                         rte_errno = EINVAL;
613                         DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
614                                 " mismatch", dev->data->port_id, cur_queue);
615                         return -rte_errno;
616                 }
617                 rq_attr.state = MLX5_SQC_STATE_RDY;
618                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
619                 rq_attr.hairpin_peer_sq = peer_info->qp_id;
620                 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
621                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
622                 if (ret == 0)
623                         rxq->hairpin_status = 1;
624         }
625         return ret;
626 }
627
628 /*
629  * Unbind the hairpin queue and reset its HW configuration.
630  * This needs to be called twice both for Tx and Rx queues of a pair.
631  * If the queue is already unbound, it is considered successful.
632  *
633  * @param dev
634  *   Pointer to Ethernet device structure.
635  * @param cur_queue
636  *   Index of the queue to change the HW configuration to unbind.
637  * @param direction
638  *   Positive to reset the TxQ, zero to reset the RxQ.
639  *
640  * @return
641  *   0 on success, a negative errno value otherwise and rte_errno is set.
642  */
643 int
644 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
645                                uint32_t direction)
646 {
647         int ret = 0;
648
649         if (direction != 0) {
650                 struct mlx5_txq_ctrl *txq_ctrl;
651                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
652
653                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
654                 if (txq_ctrl == NULL) {
655                         rte_errno = EINVAL;
656                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
657                                 dev->data->port_id, cur_queue);
658                         return -rte_errno;
659                 }
660                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
661                         rte_errno = EINVAL;
662                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
663                                 dev->data->port_id, cur_queue);
664                         mlx5_txq_release(dev, cur_queue);
665                         return -rte_errno;
666                 }
667                 /* Already unbound, return success before obj checking. */
668                 if (txq_ctrl->hairpin_status == 0) {
669                         DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
670                                 dev->data->port_id, cur_queue);
671                         mlx5_txq_release(dev, cur_queue);
672                         return 0;
673                 }
674                 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
675                         rte_errno = ENOMEM;
676                         DRV_LOG(ERR, "port %u no Txq object found: %d",
677                                 dev->data->port_id, cur_queue);
678                         mlx5_txq_release(dev, cur_queue);
679                         return -rte_errno;
680                 }
681                 sq_attr.state = MLX5_SQC_STATE_RST;
682                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
683                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
684                 if (ret == 0)
685                         txq_ctrl->hairpin_status = 0;
686                 mlx5_txq_release(dev, cur_queue);
687         } else {
688                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
689                 struct mlx5_rxq_ctrl *rxq_ctrl;
690                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
691
692                 if (rxq == NULL) {
693                         rte_errno = EINVAL;
694                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
695                                 dev->data->port_id, cur_queue);
696                         return -rte_errno;
697                 }
698                 rxq_ctrl = rxq->ctrl;
699                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
700                         rte_errno = EINVAL;
701                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
702                                 dev->data->port_id, cur_queue);
703                         return -rte_errno;
704                 }
705                 if (rxq->hairpin_status == 0) {
706                         DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
707                                 dev->data->port_id, cur_queue);
708                         return 0;
709                 }
710                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
711                         rte_errno = ENOMEM;
712                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
713                                 dev->data->port_id, cur_queue);
714                         return -rte_errno;
715                 }
716                 rq_attr.state = MLX5_SQC_STATE_RST;
717                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
718                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
719                 if (ret == 0)
720                         rxq->hairpin_status = 0;
721         }
722         return ret;
723 }
724
725 /*
726  * Bind the hairpin port pairs, from the Tx to the peer Rx.
727  * This function only supports to bind the Tx to one Rx.
728  *
729  * @param dev
730  *   Pointer to Ethernet device structure.
731  * @param rx_port
732  *   Port identifier of the Rx port.
733  *
734  * @return
735  *   0 on success, a negative errno value otherwise and rte_errno is set.
736  */
737 static int
738 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
739 {
740         struct mlx5_priv *priv = dev->data->dev_private;
741         int ret = 0;
742         struct mlx5_txq_ctrl *txq_ctrl;
743         uint32_t i;
744         struct rte_hairpin_peer_info peer = {0xffffff};
745         struct rte_hairpin_peer_info cur;
746         const struct rte_eth_hairpin_conf *conf;
747         uint16_t num_q = 0;
748         uint16_t local_port = priv->dev_data->port_id;
749         uint32_t manual;
750         uint32_t explicit;
751         uint16_t rx_queue;
752
753         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
754                 rte_errno = ENODEV;
755                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
756                 return -rte_errno;
757         }
758         /*
759          * Before binding TxQ to peer RxQ, first round loop will be used for
760          * checking the queues' configuration consistency. This would be a
761          * little time consuming but better than doing the rollback.
762          */
763         for (i = 0; i != priv->txqs_n; i++) {
764                 txq_ctrl = mlx5_txq_get(dev, i);
765                 if (txq_ctrl == NULL)
766                         continue;
767                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
768                         mlx5_txq_release(dev, i);
769                         continue;
770                 }
771                 /*
772                  * All hairpin Tx queues of a single port that connected to the
773                  * same peer Rx port should have the same "auto binding" and
774                  * "implicit Tx flow" modes.
775                  * Peer consistency checking will be done in per queue binding.
776                  */
777                 conf = &txq_ctrl->hairpin_conf;
778                 if (conf->peers[0].port == rx_port) {
779                         if (num_q == 0) {
780                                 manual = conf->manual_bind;
781                                 explicit = conf->tx_explicit;
782                         } else {
783                                 if (manual != conf->manual_bind ||
784                                     explicit != conf->tx_explicit) {
785                                         rte_errno = EINVAL;
786                                         DRV_LOG(ERR, "port %u queue %d mode"
787                                                 " mismatch: %u %u, %u %u",
788                                                 local_port, i, manual,
789                                                 conf->manual_bind, explicit,
790                                                 conf->tx_explicit);
791                                         mlx5_txq_release(dev, i);
792                                         return -rte_errno;
793                                 }
794                         }
795                         num_q++;
796                 }
797                 mlx5_txq_release(dev, i);
798         }
799         /* Once no queue is configured, success is returned directly. */
800         if (num_q == 0)
801                 return ret;
802         /* All the hairpin TX queues need to be traversed again. */
803         for (i = 0; i != priv->txqs_n; i++) {
804                 txq_ctrl = mlx5_txq_get(dev, i);
805                 if (txq_ctrl == NULL)
806                         continue;
807                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
808                         mlx5_txq_release(dev, i);
809                         continue;
810                 }
811                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
812                         mlx5_txq_release(dev, i);
813                         continue;
814                 }
815                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
816                 /*
817                  * Fetch peer RxQ's information.
818                  * No need to pass the information of the current queue.
819                  */
820                 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
821                                                         NULL, &peer, 1);
822                 if (ret != 0) {
823                         mlx5_txq_release(dev, i);
824                         goto error;
825                 }
826                 /* Accessing its own device, inside mlx5 PMD. */
827                 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
828                 if (ret != 0) {
829                         mlx5_txq_release(dev, i);
830                         goto error;
831                 }
832                 /* Pass TxQ's information to peer RxQ and try binding. */
833                 cur.peer_q = rx_queue;
834                 cur.qp_id = txq_ctrl->obj->sq->id;
835                 cur.vhca_id = priv->config.hca_attr.vhca_id;
836                 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
837                 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
838                 /*
839                  * In order to access another device in a proper way, RTE level
840                  * private function is needed.
841                  */
842                 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
843                                                       &cur, 0);
844                 if (ret != 0) {
845                         mlx5_txq_release(dev, i);
846                         goto error;
847                 }
848                 mlx5_txq_release(dev, i);
849         }
850         return 0;
851 error:
852         /*
853          * Do roll-back process for the queues already bound.
854          * No need to check the return value of the queue unbind function.
855          */
856         do {
857                 /* No validation is needed here. */
858                 txq_ctrl = mlx5_txq_get(dev, i);
859                 if (txq_ctrl == NULL)
860                         continue;
861                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
862                 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
863                 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
864                 mlx5_txq_release(dev, i);
865         } while (i--);
866         return ret;
867 }
868
869 /*
870  * Unbind the hairpin port pair, HW configuration of both devices will be clear
871  * and status will be reset for all the queues used between the them.
872  * This function only supports to unbind the Tx from one Rx.
873  *
874  * @param dev
875  *   Pointer to Ethernet device structure.
876  * @param rx_port
877  *   Port identifier of the Rx port.
878  *
879  * @return
880  *   0 on success, a negative errno value otherwise and rte_errno is set.
881  */
882 static int
883 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
884 {
885         struct mlx5_priv *priv = dev->data->dev_private;
886         struct mlx5_txq_ctrl *txq_ctrl;
887         uint32_t i;
888         int ret;
889         uint16_t cur_port = priv->dev_data->port_id;
890
891         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
892                 rte_errno = ENODEV;
893                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
894                 return -rte_errno;
895         }
896         for (i = 0; i != priv->txqs_n; i++) {
897                 uint16_t rx_queue;
898
899                 txq_ctrl = mlx5_txq_get(dev, i);
900                 if (txq_ctrl == NULL)
901                         continue;
902                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
903                         mlx5_txq_release(dev, i);
904                         continue;
905                 }
906                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
907                         mlx5_txq_release(dev, i);
908                         continue;
909                 }
910                 /* Indeed, only the first used queue needs to be checked. */
911                 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
912                         if (cur_port != rx_port) {
913                                 rte_errno = EINVAL;
914                                 DRV_LOG(ERR, "port %u and port %u are in"
915                                         " auto-bind mode", cur_port, rx_port);
916                                 mlx5_txq_release(dev, i);
917                                 return -rte_errno;
918                         } else {
919                                 return 0;
920                         }
921                 }
922                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
923                 mlx5_txq_release(dev, i);
924                 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
925                 if (ret) {
926                         DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
927                                 rx_port, rx_queue);
928                         return ret;
929                 }
930                 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
931                 if (ret) {
932                         DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
933                                 cur_port, i);
934                         return ret;
935                 }
936         }
937         return 0;
938 }
939
940 /*
941  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
942  * @see mlx5_hairpin_bind_single_port()
943  */
944 int
945 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
946 {
947         int ret = 0;
948         uint16_t p, pp;
949
950         /*
951          * If the Rx port has no hairpin configuration with the current port,
952          * the binding will be skipped in the called function of single port.
953          * Device started status will be checked only before the queue
954          * information updating.
955          */
956         if (rx_port == RTE_MAX_ETHPORTS) {
957                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
958                         ret = mlx5_hairpin_bind_single_port(dev, p);
959                         if (ret != 0)
960                                 goto unbind;
961                 }
962                 return ret;
963         } else {
964                 return mlx5_hairpin_bind_single_port(dev, rx_port);
965         }
966 unbind:
967         MLX5_ETH_FOREACH_DEV(pp, dev->device)
968                 if (pp < p)
969                         mlx5_hairpin_unbind_single_port(dev, pp);
970         return ret;
971 }
972
973 /*
974  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
975  * @see mlx5_hairpin_unbind_single_port()
976  */
977 int
978 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
979 {
980         int ret = 0;
981         uint16_t p;
982
983         if (rx_port == RTE_MAX_ETHPORTS)
984                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
985                         ret = mlx5_hairpin_unbind_single_port(dev, p);
986                         if (ret != 0)
987                                 return ret;
988                 }
989         else
990                 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
991         return ret;
992 }
993
994 /*
995  * DPDK callback to get the hairpin peer ports list.
996  * This will return the actual number of peer ports and save the identifiers
997  * into the array (sorted, may be different from that when setting up the
998  * hairpin peer queues).
999  * The peer port ID could be the same as the port ID of the current device.
1000  *
1001  * @param dev
1002  *   Pointer to Ethernet device structure.
1003  * @param peer_ports
1004  *   Pointer to array to save the port identifiers.
1005  * @param len
1006  *   The length of the array.
1007  * @param direction
1008  *   Current port to peer port direction.
1009  *   positive - current used as Tx to get all peer Rx ports.
1010  *   zero - current used as Rx to get all peer Tx ports.
1011  *
1012  * @return
1013  *   0 or positive value on success, actual number of peer ports.
1014  *   a negative errno value otherwise and rte_errno is set.
1015  */
1016 int
1017 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1018                             size_t len, uint32_t direction)
1019 {
1020         struct mlx5_priv *priv = dev->data->dev_private;
1021         struct mlx5_txq_ctrl *txq_ctrl;
1022         uint32_t i;
1023         uint16_t pp;
1024         uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1025         int ret = 0;
1026
1027         if (direction) {
1028                 for (i = 0; i < priv->txqs_n; i++) {
1029                         txq_ctrl = mlx5_txq_get(dev, i);
1030                         if (!txq_ctrl)
1031                                 continue;
1032                         if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
1033                                 mlx5_txq_release(dev, i);
1034                                 continue;
1035                         }
1036                         pp = txq_ctrl->hairpin_conf.peers[0].port;
1037                         if (pp >= RTE_MAX_ETHPORTS) {
1038                                 rte_errno = ERANGE;
1039                                 mlx5_txq_release(dev, i);
1040                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1041                                         "out of range %hu",
1042                                         priv->dev_data->port_id, i, pp);
1043                                 return -rte_errno;
1044                         }
1045                         bits[pp / 32] |= 1 << (pp % 32);
1046                         mlx5_txq_release(dev, i);
1047                 }
1048         } else {
1049                 for (i = 0; i < priv->rxqs_n; i++) {
1050                         struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1051                         struct mlx5_rxq_ctrl *rxq_ctrl;
1052
1053                         if (rxq == NULL)
1054                                 continue;
1055                         rxq_ctrl = rxq->ctrl;
1056                         if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN)
1057                                 continue;
1058                         pp = rxq->hairpin_conf.peers[0].port;
1059                         if (pp >= RTE_MAX_ETHPORTS) {
1060                                 rte_errno = ERANGE;
1061                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1062                                         "out of range %hu",
1063                                         priv->dev_data->port_id, i, pp);
1064                                 return -rte_errno;
1065                         }
1066                         bits[pp / 32] |= 1 << (pp % 32);
1067                 }
1068         }
1069         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1070                 if (bits[i / 32] & (1 << (i % 32))) {
1071                         if ((size_t)ret >= len) {
1072                                 rte_errno = E2BIG;
1073                                 return -rte_errno;
1074                         }
1075                         peer_ports[ret++] = i;
1076                 }
1077         }
1078         return ret;
1079 }
1080
1081 /**
1082  * DPDK callback to start the device.
1083  *
1084  * Simulate device start by attaching all configured flows.
1085  *
1086  * @param dev
1087  *   Pointer to Ethernet device structure.
1088  *
1089  * @return
1090  *   0 on success, a negative errno value otherwise and rte_errno is set.
1091  */
1092 int
1093 mlx5_dev_start(struct rte_eth_dev *dev)
1094 {
1095         struct mlx5_priv *priv = dev->data->dev_private;
1096         int ret;
1097         int fine_inline;
1098
1099         DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1100         fine_inline = rte_mbuf_dynflag_lookup
1101                 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1102         if (fine_inline >= 0)
1103                 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1104         else
1105                 rte_net_mlx5_dynf_inline_mask = 0;
1106         if (dev->data->nb_rx_queues > 0) {
1107                 ret = mlx5_dev_configure_rss_reta(dev);
1108                 if (ret) {
1109                         DRV_LOG(ERR, "port %u reta config failed: %s",
1110                                 dev->data->port_id, strerror(rte_errno));
1111                         return -rte_errno;
1112                 }
1113         }
1114         ret = mlx5_txpp_start(dev);
1115         if (ret) {
1116                 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1117                         dev->data->port_id, strerror(rte_errno));
1118                 goto error;
1119         }
1120         if ((priv->sh->devx && priv->config.dv_flow_en &&
1121             priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1122                 ret = priv->obj_ops.lb_dummy_queue_create(dev);
1123                 if (ret)
1124                         goto error;
1125         }
1126         ret = mlx5_txq_start(dev);
1127         if (ret) {
1128                 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1129                         dev->data->port_id, strerror(rte_errno));
1130                 goto error;
1131         }
1132         if (priv->config.std_delay_drop || priv->config.hp_delay_drop) {
1133                 if (!priv->config.vf && !priv->config.sf &&
1134                     !priv->representor) {
1135                         ret = mlx5_get_flag_dropless_rq(dev);
1136                         if (ret < 0)
1137                                 DRV_LOG(WARNING,
1138                                         "port %u cannot query dropless flag",
1139                                         dev->data->port_id);
1140                         else if (!ret)
1141                                 DRV_LOG(WARNING,
1142                                         "port %u dropless_rq OFF, no rearming",
1143                                         dev->data->port_id);
1144                 } else {
1145                         DRV_LOG(DEBUG,
1146                                 "port %u doesn't support dropless_rq flag",
1147                                 dev->data->port_id);
1148                 }
1149         }
1150         ret = mlx5_rxq_start(dev);
1151         if (ret) {
1152                 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1153                         dev->data->port_id, strerror(rte_errno));
1154                 goto error;
1155         }
1156         /*
1157          * Such step will be skipped if there is no hairpin TX queue configured
1158          * with RX peer queue from the same device.
1159          */
1160         ret = mlx5_hairpin_auto_bind(dev);
1161         if (ret) {
1162                 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1163                         dev->data->port_id, strerror(rte_errno));
1164                 goto error;
1165         }
1166         /* Set started flag here for the following steps like control flow. */
1167         dev->data->dev_started = 1;
1168         ret = mlx5_rx_intr_vec_enable(dev);
1169         if (ret) {
1170                 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1171                         dev->data->port_id);
1172                 goto error;
1173         }
1174         mlx5_os_stats_init(dev);
1175         ret = mlx5_traffic_enable(dev);
1176         if (ret) {
1177                 DRV_LOG(ERR, "port %u failed to set defaults flows",
1178                         dev->data->port_id);
1179                 goto error;
1180         }
1181         /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1182         mlx5_flow_rxq_dynf_metadata_set(dev);
1183         /* Set flags and context to convert Rx timestamps. */
1184         mlx5_rxq_timestamp_set(dev);
1185         /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1186         mlx5_txq_dynf_timestamp_set(dev);
1187         /* Attach indirection table objects detached on port stop. */
1188         ret = mlx5_action_handle_attach(dev);
1189         if (ret) {
1190                 DRV_LOG(ERR,
1191                         "port %u failed to attach indirect actions: %s",
1192                         dev->data->port_id, rte_strerror(rte_errno));
1193                 goto error;
1194         }
1195         /*
1196          * In non-cached mode, it only needs to start the default mreg copy
1197          * action and no flow created by application exists anymore.
1198          * But it is worth wrapping the interface for further usage.
1199          */
1200         ret = mlx5_flow_start_default(dev);
1201         if (ret) {
1202                 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1203                         dev->data->port_id, strerror(rte_errno));
1204                 goto error;
1205         }
1206         if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1207                 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1208                         dev->data->port_id, rte_strerror(rte_errno));
1209                 goto error;
1210         }
1211         rte_wmb();
1212         dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1213         dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1214         /* Enable datapath on secondary process. */
1215         mlx5_mp_os_req_start_rxtx(dev);
1216         if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) {
1217                 priv->sh->port[priv->dev_port - 1].ih_port_id =
1218                                         (uint32_t)dev->data->port_id;
1219         } else {
1220                 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1221                         dev->data->port_id);
1222                 dev->data->dev_conf.intr_conf.lsc = 0;
1223                 dev->data->dev_conf.intr_conf.rmv = 0;
1224         }
1225         if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
1226                 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1227                                         (uint32_t)dev->data->port_id;
1228         return 0;
1229 error:
1230         ret = rte_errno; /* Save rte_errno before cleanup. */
1231         /* Rollback. */
1232         dev->data->dev_started = 0;
1233         mlx5_flow_stop_default(dev);
1234         mlx5_traffic_disable(dev);
1235         mlx5_txq_stop(dev);
1236         mlx5_rxq_stop(dev);
1237         if (priv->obj_ops.lb_dummy_queue_release)
1238                 priv->obj_ops.lb_dummy_queue_release(dev);
1239         mlx5_txpp_stop(dev); /* Stop last. */
1240         rte_errno = ret; /* Restore rte_errno. */
1241         return -rte_errno;
1242 }
1243
1244 /**
1245  * DPDK callback to stop the device.
1246  *
1247  * Simulate device stop by detaching all configured flows.
1248  *
1249  * @param dev
1250  *   Pointer to Ethernet device structure.
1251  */
1252 int
1253 mlx5_dev_stop(struct rte_eth_dev *dev)
1254 {
1255         struct mlx5_priv *priv = dev->data->dev_private;
1256
1257         dev->data->dev_started = 0;
1258         /* Prevent crashes when queues are still in use. */
1259         dev->rx_pkt_burst = removed_rx_burst;
1260         dev->tx_pkt_burst = removed_tx_burst;
1261         rte_wmb();
1262         /* Disable datapath on secondary process. */
1263         mlx5_mp_os_req_stop_rxtx(dev);
1264         rte_delay_us_sleep(1000 * priv->rxqs_n);
1265         DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1266         mlx5_flow_stop_default(dev);
1267         /* Control flows for default traffic can be removed firstly. */
1268         mlx5_traffic_disable(dev);
1269         /* All RX queue flags will be cleared in the flush interface. */
1270         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1271         mlx5_flow_meter_rxq_flush(dev);
1272         mlx5_action_handle_detach(dev);
1273         mlx5_rx_intr_vec_disable(dev);
1274         priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1275         priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1276         mlx5_txq_stop(dev);
1277         mlx5_rxq_stop(dev);
1278         if (priv->obj_ops.lb_dummy_queue_release)
1279                 priv->obj_ops.lb_dummy_queue_release(dev);
1280         mlx5_txpp_stop(dev);
1281
1282         return 0;
1283 }
1284
1285 /**
1286  * Enable traffic flows configured by control plane
1287  *
1288  * @param dev
1289  *   Pointer to Ethernet device private data.
1290  * @param dev
1291  *   Pointer to Ethernet device structure.
1292  *
1293  * @return
1294  *   0 on success, a negative errno value otherwise and rte_errno is set.
1295  */
1296 int
1297 mlx5_traffic_enable(struct rte_eth_dev *dev)
1298 {
1299         struct mlx5_priv *priv = dev->data->dev_private;
1300         struct rte_flow_item_eth bcast = {
1301                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1302         };
1303         struct rte_flow_item_eth ipv6_multi_spec = {
1304                 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1305         };
1306         struct rte_flow_item_eth ipv6_multi_mask = {
1307                 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1308         };
1309         struct rte_flow_item_eth unicast = {
1310                 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1311         };
1312         struct rte_flow_item_eth unicast_mask = {
1313                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1314         };
1315         const unsigned int vlan_filter_n = priv->vlan_filter_n;
1316         const struct rte_ether_addr cmp = {
1317                 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1318         };
1319         unsigned int i;
1320         unsigned int j;
1321         int ret;
1322
1323         /*
1324          * Hairpin txq default flow should be created no matter if it is
1325          * isolation mode. Or else all the packets to be sent will be sent
1326          * out directly without the TX flow actions, e.g. encapsulation.
1327          */
1328         for (i = 0; i != priv->txqs_n; ++i) {
1329                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1330                 if (!txq_ctrl)
1331                         continue;
1332                 /* Only Tx implicit mode requires the default Tx flow. */
1333                 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1334                     txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1335                     txq_ctrl->hairpin_conf.peers[0].port ==
1336                     priv->dev_data->port_id) {
1337                         ret = mlx5_ctrl_flow_source_queue(dev, i);
1338                         if (ret) {
1339                                 mlx5_txq_release(dev, i);
1340                                 goto error;
1341                         }
1342                 }
1343                 if ((priv->representor || priv->master) &&
1344                     priv->config.dv_esw_en) {
1345                         if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) {
1346                                 DRV_LOG(ERR,
1347                                         "Port %u Tx queue %u SQ create representor devx default miss rule failed.",
1348                                         dev->data->port_id, i);
1349                                 goto error;
1350                         }
1351                 }
1352                 mlx5_txq_release(dev, i);
1353         }
1354         if ((priv->master || priv->representor) && priv->config.dv_esw_en) {
1355                 if (mlx5_flow_create_esw_table_zero_flow(dev))
1356                         priv->fdb_def_rule = 1;
1357                 else
1358                         DRV_LOG(INFO, "port %u FDB default rule cannot be"
1359                                 " configured - only Eswitch group 0 flows are"
1360                                 " supported.", dev->data->port_id);
1361         }
1362         if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1363                 ret = mlx5_flow_lacp_miss(dev);
1364                 if (ret)
1365                         DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1366                                 "forward LACP to kernel.", dev->data->port_id);
1367                 else
1368                         DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1369                                 , dev->data->port_id);
1370         }
1371         if (priv->isolated)
1372                 return 0;
1373         if (dev->data->promiscuous) {
1374                 struct rte_flow_item_eth promisc = {
1375                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1376                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1377                         .type = 0,
1378                 };
1379
1380                 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1381                 if (ret)
1382                         goto error;
1383         }
1384         if (dev->data->all_multicast) {
1385                 struct rte_flow_item_eth multicast = {
1386                         .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1387                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1388                         .type = 0,
1389                 };
1390
1391                 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1392                 if (ret)
1393                         goto error;
1394         } else {
1395                 /* Add broadcast/multicast flows. */
1396                 for (i = 0; i != vlan_filter_n; ++i) {
1397                         uint16_t vlan = priv->vlan_filter[i];
1398
1399                         struct rte_flow_item_vlan vlan_spec = {
1400                                 .tci = rte_cpu_to_be_16(vlan),
1401                         };
1402                         struct rte_flow_item_vlan vlan_mask =
1403                                 rte_flow_item_vlan_mask;
1404
1405                         ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1406                                                   &vlan_spec, &vlan_mask);
1407                         if (ret)
1408                                 goto error;
1409                         ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1410                                                   &ipv6_multi_mask,
1411                                                   &vlan_spec, &vlan_mask);
1412                         if (ret)
1413                                 goto error;
1414                 }
1415                 if (!vlan_filter_n) {
1416                         ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1417                         if (ret)
1418                                 goto error;
1419                         ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1420                                              &ipv6_multi_mask);
1421                         if (ret) {
1422                                 /* Do not fail on IPv6 broadcast creation failure. */
1423                                 DRV_LOG(WARNING,
1424                                         "IPv6 broadcast is not supported");
1425                                 ret = 0;
1426                         }
1427                 }
1428         }
1429         /* Add MAC address flows. */
1430         for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1431                 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1432
1433                 if (!memcmp(mac, &cmp, sizeof(*mac)))
1434                         continue;
1435                 memcpy(&unicast.dst.addr_bytes,
1436                        mac->addr_bytes,
1437                        RTE_ETHER_ADDR_LEN);
1438                 for (j = 0; j != vlan_filter_n; ++j) {
1439                         uint16_t vlan = priv->vlan_filter[j];
1440
1441                         struct rte_flow_item_vlan vlan_spec = {
1442                                 .tci = rte_cpu_to_be_16(vlan),
1443                         };
1444                         struct rte_flow_item_vlan vlan_mask =
1445                                 rte_flow_item_vlan_mask;
1446
1447                         ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1448                                                   &unicast_mask,
1449                                                   &vlan_spec,
1450                                                   &vlan_mask);
1451                         if (ret)
1452                                 goto error;
1453                 }
1454                 if (!vlan_filter_n) {
1455                         ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1456                         if (ret)
1457                                 goto error;
1458                 }
1459         }
1460         return 0;
1461 error:
1462         ret = rte_errno; /* Save rte_errno before cleanup. */
1463         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1464         rte_errno = ret; /* Restore rte_errno. */
1465         return -rte_errno;
1466 }
1467
1468
1469 /**
1470  * Disable traffic flows configured by control plane
1471  *
1472  * @param dev
1473  *   Pointer to Ethernet device private data.
1474  */
1475 void
1476 mlx5_traffic_disable(struct rte_eth_dev *dev)
1477 {
1478         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1479 }
1480
1481 /**
1482  * Restart traffic flows configured by control plane
1483  *
1484  * @param dev
1485  *   Pointer to Ethernet device private data.
1486  *
1487  * @return
1488  *   0 on success, a negative errno value otherwise and rte_errno is set.
1489  */
1490 int
1491 mlx5_traffic_restart(struct rte_eth_dev *dev)
1492 {
1493         if (dev->data->dev_started) {
1494                 mlx5_traffic_disable(dev);
1495                 return mlx5_traffic_enable(dev);
1496         }
1497         return 0;
1498 }