ca43bd51aab27fcede21611dcdeb246a5580c11a
[dpdk.git] / drivers / net / mlx5 / mlx5_trigger.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5
6 #include <unistd.h>
7
8 #include <rte_ether.h>
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13
14 #include <mlx5_malloc.h>
15
16 #include "mlx5.h"
17 #include "mlx5_rx.h"
18 #include "mlx5_tx.h"
19 #include "mlx5_utils.h"
20 #include "rte_pmd_mlx5.h"
21
22 /**
23  * Stop traffic on Tx queues.
24  *
25  * @param dev
26  *   Pointer to Ethernet device structure.
27  */
28 static void
29 mlx5_txq_stop(struct rte_eth_dev *dev)
30 {
31         struct mlx5_priv *priv = dev->data->dev_private;
32         unsigned int i;
33
34         for (i = 0; i != priv->txqs_n; ++i)
35                 mlx5_txq_release(dev, i);
36 }
37
38 /**
39  * Start traffic on Tx queues.
40  *
41  * @param dev
42  *   Pointer to Ethernet device structure.
43  *
44  * @return
45  *   0 on success, a negative errno value otherwise and rte_errno is set.
46  */
47 static int
48 mlx5_txq_start(struct rte_eth_dev *dev)
49 {
50         struct mlx5_priv *priv = dev->data->dev_private;
51         unsigned int i;
52         int ret;
53
54         for (i = 0; i != priv->txqs_n; ++i) {
55                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
56                 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
57                 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
58
59                 if (!txq_ctrl)
60                         continue;
61                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
62                         txq_alloc_elts(txq_ctrl);
63                 MLX5_ASSERT(!txq_ctrl->obj);
64                 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
65                                             0, txq_ctrl->socket);
66                 if (!txq_ctrl->obj) {
67                         DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
68                                 "memory resources.", dev->data->port_id,
69                                 txq_data->idx);
70                         rte_errno = ENOMEM;
71                         goto error;
72                 }
73                 ret = priv->obj_ops.txq_obj_new(dev, i);
74                 if (ret < 0) {
75                         mlx5_free(txq_ctrl->obj);
76                         txq_ctrl->obj = NULL;
77                         goto error;
78                 }
79                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
80                         size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
81
82                         txq_data->fcqs = mlx5_malloc(flags, size,
83                                                      RTE_CACHE_LINE_SIZE,
84                                                      txq_ctrl->socket);
85                         if (!txq_data->fcqs) {
86                                 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
87                                         "allocate memory (FCQ).",
88                                         dev->data->port_id, i);
89                                 rte_errno = ENOMEM;
90                                 goto error;
91                         }
92                 }
93                 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
94                         dev->data->port_id, i, (void *)&txq_ctrl->obj);
95                 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
96         }
97         return 0;
98 error:
99         ret = rte_errno; /* Save rte_errno before cleanup. */
100         do {
101                 mlx5_txq_release(dev, i);
102         } while (i-- != 0);
103         rte_errno = ret; /* Restore rte_errno. */
104         return -rte_errno;
105 }
106
107 /**
108  * Translate the chunk address to MR key in order to put in into the cache.
109  */
110 static void
111 mlx5_rxq_mempool_register_cb(struct rte_mempool *mp, void *opaque,
112                              struct rte_mempool_memhdr *memhdr,
113                              unsigned int idx)
114 {
115         struct mlx5_rxq_data *rxq = opaque;
116
117         RTE_SET_USED(mp);
118         RTE_SET_USED(idx);
119         mlx5_rx_addr2mr(rxq, (uintptr_t)memhdr->addr);
120 }
121
122 /**
123  * Register Rx queue mempools and fill the Rx queue cache.
124  * This function tolerates repeated mempool registration.
125  *
126  * @param[in] rxq_ctrl
127  *   Rx queue control data.
128  *
129  * @return
130  *   0 on success, (-1) on failure and rte_errno is set.
131  */
132 static int
133 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
134 {
135         struct mlx5_priv *priv = rxq_ctrl->priv;
136         struct rte_mempool *mp;
137         uint32_t s;
138         int ret = 0;
139
140         mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
141         /* MPRQ mempool is registered on creation, just fill the cache. */
142         if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
143                 rte_mempool_mem_iter(rxq_ctrl->rxq.mprq_mp,
144                                      mlx5_rxq_mempool_register_cb,
145                                      &rxq_ctrl->rxq);
146                 return 0;
147         }
148         for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
149                 mp = rxq_ctrl->rxq.rxseg[s].mp;
150                 ret = mlx5_mr_mempool_register(&priv->sh->cdev->mr_scache,
151                                                priv->sh->cdev->pd, mp,
152                                                &priv->mp_id);
153                 if (ret < 0 && rte_errno != EEXIST)
154                         return ret;
155                 rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb,
156                                      &rxq_ctrl->rxq);
157         }
158         return 0;
159 }
160
161 /**
162  * Stop traffic on Rx queues.
163  *
164  * @param dev
165  *   Pointer to Ethernet device structure.
166  */
167 static void
168 mlx5_rxq_stop(struct rte_eth_dev *dev)
169 {
170         struct mlx5_priv *priv = dev->data->dev_private;
171         unsigned int i;
172
173         for (i = 0; i != priv->rxqs_n; ++i)
174                 mlx5_rxq_release(dev, i);
175 }
176
177 /**
178  * Start traffic on Rx queues.
179  *
180  * @param dev
181  *   Pointer to Ethernet device structure.
182  *
183  * @return
184  *   0 on success, a negative errno value otherwise and rte_errno is set.
185  */
186 static int
187 mlx5_rxq_start(struct rte_eth_dev *dev)
188 {
189         struct mlx5_priv *priv = dev->data->dev_private;
190         unsigned int i;
191         int ret = 0;
192
193         /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
194         if (mlx5_mprq_alloc_mp(dev)) {
195                 /* Should not release Rx queues but return immediately. */
196                 return -rte_errno;
197         }
198         DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
199                 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
200         DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
201                 dev->data->port_id, priv->sh->device_attr.max_sge);
202         for (i = 0; i != priv->rxqs_n; ++i) {
203                 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
204
205                 if (!rxq_ctrl)
206                         continue;
207                 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
208                         /*
209                          * Pre-register the mempools. Regardless of whether
210                          * the implicit registration is enabled or not,
211                          * Rx mempool destruction is tracked to free MRs.
212                          */
213                         if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
214                                 goto error;
215                         ret = rxq_alloc_elts(rxq_ctrl);
216                         if (ret)
217                                 goto error;
218                 }
219                 MLX5_ASSERT(!rxq_ctrl->obj);
220                 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
221                                             sizeof(*rxq_ctrl->obj), 0,
222                                             rxq_ctrl->socket);
223                 if (!rxq_ctrl->obj) {
224                         DRV_LOG(ERR,
225                                 "Port %u Rx queue %u can't allocate resources.",
226                                 dev->data->port_id, (*priv->rxqs)[i]->idx);
227                         rte_errno = ENOMEM;
228                         goto error;
229                 }
230                 ret = priv->obj_ops.rxq_obj_new(dev, i);
231                 if (ret) {
232                         mlx5_free(rxq_ctrl->obj);
233                         goto error;
234                 }
235                 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
236                         dev->data->port_id, i, (void *)&rxq_ctrl->obj);
237                 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
238         }
239         return 0;
240 error:
241         ret = rte_errno; /* Save rte_errno before cleanup. */
242         do {
243                 mlx5_rxq_release(dev, i);
244         } while (i-- != 0);
245         rte_errno = ret; /* Restore rte_errno. */
246         return -rte_errno;
247 }
248
249 /**
250  * Binds Tx queues to Rx queues for hairpin.
251  *
252  * Binds Tx queues to the target Rx queues.
253  *
254  * @param dev
255  *   Pointer to Ethernet device structure.
256  *
257  * @return
258  *   0 on success, a negative errno value otherwise and rte_errno is set.
259  */
260 static int
261 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
262 {
263         struct mlx5_priv *priv = dev->data->dev_private;
264         struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
265         struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
266         struct mlx5_txq_ctrl *txq_ctrl;
267         struct mlx5_rxq_ctrl *rxq_ctrl;
268         struct mlx5_devx_obj *sq;
269         struct mlx5_devx_obj *rq;
270         unsigned int i;
271         int ret = 0;
272         bool need_auto = false;
273         uint16_t self_port = dev->data->port_id;
274
275         for (i = 0; i != priv->txqs_n; ++i) {
276                 txq_ctrl = mlx5_txq_get(dev, i);
277                 if (!txq_ctrl)
278                         continue;
279                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
280                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
281                         mlx5_txq_release(dev, i);
282                         continue;
283                 }
284                 if (txq_ctrl->hairpin_conf.manual_bind) {
285                         mlx5_txq_release(dev, i);
286                         return 0;
287                 }
288                 need_auto = true;
289                 mlx5_txq_release(dev, i);
290         }
291         if (!need_auto)
292                 return 0;
293         for (i = 0; i != priv->txqs_n; ++i) {
294                 txq_ctrl = mlx5_txq_get(dev, i);
295                 if (!txq_ctrl)
296                         continue;
297                 /* Skip hairpin queues with other peer ports. */
298                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
299                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
300                         mlx5_txq_release(dev, i);
301                         continue;
302                 }
303                 if (!txq_ctrl->obj) {
304                         rte_errno = ENOMEM;
305                         DRV_LOG(ERR, "port %u no txq object found: %d",
306                                 dev->data->port_id, i);
307                         mlx5_txq_release(dev, i);
308                         return -rte_errno;
309                 }
310                 sq = txq_ctrl->obj->sq;
311                 rxq_ctrl = mlx5_rxq_get(dev,
312                                         txq_ctrl->hairpin_conf.peers[0].queue);
313                 if (!rxq_ctrl) {
314                         mlx5_txq_release(dev, i);
315                         rte_errno = EINVAL;
316                         DRV_LOG(ERR, "port %u no rxq object found: %d",
317                                 dev->data->port_id,
318                                 txq_ctrl->hairpin_conf.peers[0].queue);
319                         return -rte_errno;
320                 }
321                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
322                     rxq_ctrl->hairpin_conf.peers[0].queue != i) {
323                         rte_errno = ENOMEM;
324                         DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
325                                 "Rx queue %d", dev->data->port_id,
326                                 i, txq_ctrl->hairpin_conf.peers[0].queue);
327                         goto error;
328                 }
329                 rq = rxq_ctrl->obj->rq;
330                 if (!rq) {
331                         rte_errno = ENOMEM;
332                         DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
333                                 dev->data->port_id,
334                                 txq_ctrl->hairpin_conf.peers[0].queue);
335                         goto error;
336                 }
337                 sq_attr.state = MLX5_SQC_STATE_RDY;
338                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
339                 sq_attr.hairpin_peer_rq = rq->id;
340                 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
341                 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
342                 if (ret)
343                         goto error;
344                 rq_attr.state = MLX5_SQC_STATE_RDY;
345                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
346                 rq_attr.hairpin_peer_sq = sq->id;
347                 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
348                 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
349                 if (ret)
350                         goto error;
351                 /* Qs with auto-bind will be destroyed directly. */
352                 rxq_ctrl->hairpin_status = 1;
353                 txq_ctrl->hairpin_status = 1;
354                 mlx5_txq_release(dev, i);
355                 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
356         }
357         return 0;
358 error:
359         mlx5_txq_release(dev, i);
360         mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
361         return -rte_errno;
362 }
363
364 /*
365  * Fetch the peer queue's SW & HW information.
366  *
367  * @param dev
368  *   Pointer to Ethernet device structure.
369  * @param peer_queue
370  *   Index of the queue to fetch the information.
371  * @param current_info
372  *   Pointer to the input peer information, not used currently.
373  * @param peer_info
374  *   Pointer to the structure to store the information, output.
375  * @param direction
376  *   Positive to get the RxQ information, zero to get the TxQ information.
377  *
378  * @return
379  *   0 on success, a negative errno value otherwise and rte_errno is set.
380  */
381 int
382 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
383                                struct rte_hairpin_peer_info *current_info,
384                                struct rte_hairpin_peer_info *peer_info,
385                                uint32_t direction)
386 {
387         struct mlx5_priv *priv = dev->data->dev_private;
388         RTE_SET_USED(current_info);
389
390         if (dev->data->dev_started == 0) {
391                 rte_errno = EBUSY;
392                 DRV_LOG(ERR, "peer port %u is not started",
393                         dev->data->port_id);
394                 return -rte_errno;
395         }
396         /*
397          * Peer port used as egress. In the current design, hairpin Tx queue
398          * will be bound to the peer Rx queue. Indeed, only the information of
399          * peer Rx queue needs to be fetched.
400          */
401         if (direction == 0) {
402                 struct mlx5_txq_ctrl *txq_ctrl;
403
404                 txq_ctrl = mlx5_txq_get(dev, peer_queue);
405                 if (txq_ctrl == NULL) {
406                         rte_errno = EINVAL;
407                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
408                                 dev->data->port_id, peer_queue);
409                         return -rte_errno;
410                 }
411                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
412                         rte_errno = EINVAL;
413                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
414                                 dev->data->port_id, peer_queue);
415                         mlx5_txq_release(dev, peer_queue);
416                         return -rte_errno;
417                 }
418                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
419                         rte_errno = ENOMEM;
420                         DRV_LOG(ERR, "port %u no Txq object found: %d",
421                                 dev->data->port_id, peer_queue);
422                         mlx5_txq_release(dev, peer_queue);
423                         return -rte_errno;
424                 }
425                 peer_info->qp_id = txq_ctrl->obj->sq->id;
426                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
427                 /* 1-to-1 mapping, only the first one is used. */
428                 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
429                 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
430                 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
431                 mlx5_txq_release(dev, peer_queue);
432         } else { /* Peer port used as ingress. */
433                 struct mlx5_rxq_ctrl *rxq_ctrl;
434
435                 rxq_ctrl = mlx5_rxq_get(dev, peer_queue);
436                 if (rxq_ctrl == NULL) {
437                         rte_errno = EINVAL;
438                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
439                                 dev->data->port_id, peer_queue);
440                         return -rte_errno;
441                 }
442                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
443                         rte_errno = EINVAL;
444                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
445                                 dev->data->port_id, peer_queue);
446                         mlx5_rxq_release(dev, peer_queue);
447                         return -rte_errno;
448                 }
449                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
450                         rte_errno = ENOMEM;
451                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
452                                 dev->data->port_id, peer_queue);
453                         mlx5_rxq_release(dev, peer_queue);
454                         return -rte_errno;
455                 }
456                 peer_info->qp_id = rxq_ctrl->obj->rq->id;
457                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
458                 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
459                 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
460                 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
461                 mlx5_rxq_release(dev, peer_queue);
462         }
463         return 0;
464 }
465
466 /*
467  * Bind the hairpin queue with the peer HW information.
468  * This needs to be called twice both for Tx and Rx queues of a pair.
469  * If the queue is already bound, it is considered successful.
470  *
471  * @param dev
472  *   Pointer to Ethernet device structure.
473  * @param cur_queue
474  *   Index of the queue to change the HW configuration to bind.
475  * @param peer_info
476  *   Pointer to information of the peer queue.
477  * @param direction
478  *   Positive to configure the TxQ, zero to configure the RxQ.
479  *
480  * @return
481  *   0 on success, a negative errno value otherwise and rte_errno is set.
482  */
483 int
484 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
485                              struct rte_hairpin_peer_info *peer_info,
486                              uint32_t direction)
487 {
488         int ret = 0;
489
490         /*
491          * Consistency checking of the peer queue: opposite direction is used
492          * to get the peer queue info with ethdev port ID, no need to check.
493          */
494         if (peer_info->peer_q != cur_queue) {
495                 rte_errno = EINVAL;
496                 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
497                         dev->data->port_id, cur_queue, peer_info->peer_q);
498                 return -rte_errno;
499         }
500         if (direction != 0) {
501                 struct mlx5_txq_ctrl *txq_ctrl;
502                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
503
504                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
505                 if (txq_ctrl == NULL) {
506                         rte_errno = EINVAL;
507                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
508                                 dev->data->port_id, cur_queue);
509                         return -rte_errno;
510                 }
511                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
512                         rte_errno = EINVAL;
513                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
514                                 dev->data->port_id, cur_queue);
515                         mlx5_txq_release(dev, cur_queue);
516                         return -rte_errno;
517                 }
518                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
519                         rte_errno = ENOMEM;
520                         DRV_LOG(ERR, "port %u no Txq object found: %d",
521                                 dev->data->port_id, cur_queue);
522                         mlx5_txq_release(dev, cur_queue);
523                         return -rte_errno;
524                 }
525                 if (txq_ctrl->hairpin_status != 0) {
526                         DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
527                                 dev->data->port_id, cur_queue);
528                         mlx5_txq_release(dev, cur_queue);
529                         return 0;
530                 }
531                 /*
532                  * All queues' of one port consistency checking is done in the
533                  * bind() function, and that is optional.
534                  */
535                 if (peer_info->tx_explicit !=
536                     txq_ctrl->hairpin_conf.tx_explicit) {
537                         rte_errno = EINVAL;
538                         DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
539                                 " mismatch", dev->data->port_id, cur_queue);
540                         mlx5_txq_release(dev, cur_queue);
541                         return -rte_errno;
542                 }
543                 if (peer_info->manual_bind !=
544                     txq_ctrl->hairpin_conf.manual_bind) {
545                         rte_errno = EINVAL;
546                         DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
547                                 " mismatch", dev->data->port_id, cur_queue);
548                         mlx5_txq_release(dev, cur_queue);
549                         return -rte_errno;
550                 }
551                 sq_attr.state = MLX5_SQC_STATE_RDY;
552                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
553                 sq_attr.hairpin_peer_rq = peer_info->qp_id;
554                 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
555                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
556                 if (ret == 0)
557                         txq_ctrl->hairpin_status = 1;
558                 mlx5_txq_release(dev, cur_queue);
559         } else {
560                 struct mlx5_rxq_ctrl *rxq_ctrl;
561                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
562
563                 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
564                 if (rxq_ctrl == NULL) {
565                         rte_errno = EINVAL;
566                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
567                                 dev->data->port_id, cur_queue);
568                         return -rte_errno;
569                 }
570                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
571                         rte_errno = EINVAL;
572                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
573                                 dev->data->port_id, cur_queue);
574                         mlx5_rxq_release(dev, cur_queue);
575                         return -rte_errno;
576                 }
577                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
578                         rte_errno = ENOMEM;
579                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
580                                 dev->data->port_id, cur_queue);
581                         mlx5_rxq_release(dev, cur_queue);
582                         return -rte_errno;
583                 }
584                 if (rxq_ctrl->hairpin_status != 0) {
585                         DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
586                                 dev->data->port_id, cur_queue);
587                         mlx5_rxq_release(dev, cur_queue);
588                         return 0;
589                 }
590                 if (peer_info->tx_explicit !=
591                     rxq_ctrl->hairpin_conf.tx_explicit) {
592                         rte_errno = EINVAL;
593                         DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
594                                 " mismatch", dev->data->port_id, cur_queue);
595                         mlx5_rxq_release(dev, cur_queue);
596                         return -rte_errno;
597                 }
598                 if (peer_info->manual_bind !=
599                     rxq_ctrl->hairpin_conf.manual_bind) {
600                         rte_errno = EINVAL;
601                         DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
602                                 " mismatch", dev->data->port_id, cur_queue);
603                         mlx5_rxq_release(dev, cur_queue);
604                         return -rte_errno;
605                 }
606                 rq_attr.state = MLX5_SQC_STATE_RDY;
607                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
608                 rq_attr.hairpin_peer_sq = peer_info->qp_id;
609                 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
610                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
611                 if (ret == 0)
612                         rxq_ctrl->hairpin_status = 1;
613                 mlx5_rxq_release(dev, cur_queue);
614         }
615         return ret;
616 }
617
618 /*
619  * Unbind the hairpin queue and reset its HW configuration.
620  * This needs to be called twice both for Tx and Rx queues of a pair.
621  * If the queue is already unbound, it is considered successful.
622  *
623  * @param dev
624  *   Pointer to Ethernet device structure.
625  * @param cur_queue
626  *   Index of the queue to change the HW configuration to unbind.
627  * @param direction
628  *   Positive to reset the TxQ, zero to reset the RxQ.
629  *
630  * @return
631  *   0 on success, a negative errno value otherwise and rte_errno is set.
632  */
633 int
634 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
635                                uint32_t direction)
636 {
637         int ret = 0;
638
639         if (direction != 0) {
640                 struct mlx5_txq_ctrl *txq_ctrl;
641                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
642
643                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
644                 if (txq_ctrl == NULL) {
645                         rte_errno = EINVAL;
646                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
647                                 dev->data->port_id, cur_queue);
648                         return -rte_errno;
649                 }
650                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
651                         rte_errno = EINVAL;
652                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
653                                 dev->data->port_id, cur_queue);
654                         mlx5_txq_release(dev, cur_queue);
655                         return -rte_errno;
656                 }
657                 /* Already unbound, return success before obj checking. */
658                 if (txq_ctrl->hairpin_status == 0) {
659                         DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
660                                 dev->data->port_id, cur_queue);
661                         mlx5_txq_release(dev, cur_queue);
662                         return 0;
663                 }
664                 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
665                         rte_errno = ENOMEM;
666                         DRV_LOG(ERR, "port %u no Txq object found: %d",
667                                 dev->data->port_id, cur_queue);
668                         mlx5_txq_release(dev, cur_queue);
669                         return -rte_errno;
670                 }
671                 sq_attr.state = MLX5_SQC_STATE_RST;
672                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
673                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
674                 if (ret == 0)
675                         txq_ctrl->hairpin_status = 0;
676                 mlx5_txq_release(dev, cur_queue);
677         } else {
678                 struct mlx5_rxq_ctrl *rxq_ctrl;
679                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
680
681                 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
682                 if (rxq_ctrl == NULL) {
683                         rte_errno = EINVAL;
684                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
685                                 dev->data->port_id, cur_queue);
686                         return -rte_errno;
687                 }
688                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
689                         rte_errno = EINVAL;
690                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
691                                 dev->data->port_id, cur_queue);
692                         mlx5_rxq_release(dev, cur_queue);
693                         return -rte_errno;
694                 }
695                 if (rxq_ctrl->hairpin_status == 0) {
696                         DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
697                                 dev->data->port_id, cur_queue);
698                         mlx5_rxq_release(dev, cur_queue);
699                         return 0;
700                 }
701                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
702                         rte_errno = ENOMEM;
703                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
704                                 dev->data->port_id, cur_queue);
705                         mlx5_rxq_release(dev, cur_queue);
706                         return -rte_errno;
707                 }
708                 rq_attr.state = MLX5_SQC_STATE_RST;
709                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
710                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
711                 if (ret == 0)
712                         rxq_ctrl->hairpin_status = 0;
713                 mlx5_rxq_release(dev, cur_queue);
714         }
715         return ret;
716 }
717
718 /*
719  * Bind the hairpin port pairs, from the Tx to the peer Rx.
720  * This function only supports to bind the Tx to one Rx.
721  *
722  * @param dev
723  *   Pointer to Ethernet device structure.
724  * @param rx_port
725  *   Port identifier of the Rx port.
726  *
727  * @return
728  *   0 on success, a negative errno value otherwise and rte_errno is set.
729  */
730 static int
731 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
732 {
733         struct mlx5_priv *priv = dev->data->dev_private;
734         int ret = 0;
735         struct mlx5_txq_ctrl *txq_ctrl;
736         uint32_t i;
737         struct rte_hairpin_peer_info peer = {0xffffff};
738         struct rte_hairpin_peer_info cur;
739         const struct rte_eth_hairpin_conf *conf;
740         uint16_t num_q = 0;
741         uint16_t local_port = priv->dev_data->port_id;
742         uint32_t manual;
743         uint32_t explicit;
744         uint16_t rx_queue;
745
746         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
747                 rte_errno = ENODEV;
748                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
749                 return -rte_errno;
750         }
751         /*
752          * Before binding TxQ to peer RxQ, first round loop will be used for
753          * checking the queues' configuration consistency. This would be a
754          * little time consuming but better than doing the rollback.
755          */
756         for (i = 0; i != priv->txqs_n; i++) {
757                 txq_ctrl = mlx5_txq_get(dev, i);
758                 if (txq_ctrl == NULL)
759                         continue;
760                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
761                         mlx5_txq_release(dev, i);
762                         continue;
763                 }
764                 /*
765                  * All hairpin Tx queues of a single port that connected to the
766                  * same peer Rx port should have the same "auto binding" and
767                  * "implicit Tx flow" modes.
768                  * Peer consistency checking will be done in per queue binding.
769                  */
770                 conf = &txq_ctrl->hairpin_conf;
771                 if (conf->peers[0].port == rx_port) {
772                         if (num_q == 0) {
773                                 manual = conf->manual_bind;
774                                 explicit = conf->tx_explicit;
775                         } else {
776                                 if (manual != conf->manual_bind ||
777                                     explicit != conf->tx_explicit) {
778                                         rte_errno = EINVAL;
779                                         DRV_LOG(ERR, "port %u queue %d mode"
780                                                 " mismatch: %u %u, %u %u",
781                                                 local_port, i, manual,
782                                                 conf->manual_bind, explicit,
783                                                 conf->tx_explicit);
784                                         mlx5_txq_release(dev, i);
785                                         return -rte_errno;
786                                 }
787                         }
788                         num_q++;
789                 }
790                 mlx5_txq_release(dev, i);
791         }
792         /* Once no queue is configured, success is returned directly. */
793         if (num_q == 0)
794                 return ret;
795         /* All the hairpin TX queues need to be traversed again. */
796         for (i = 0; i != priv->txqs_n; i++) {
797                 txq_ctrl = mlx5_txq_get(dev, i);
798                 if (txq_ctrl == NULL)
799                         continue;
800                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
801                         mlx5_txq_release(dev, i);
802                         continue;
803                 }
804                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
805                         mlx5_txq_release(dev, i);
806                         continue;
807                 }
808                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
809                 /*
810                  * Fetch peer RxQ's information.
811                  * No need to pass the information of the current queue.
812                  */
813                 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
814                                                         NULL, &peer, 1);
815                 if (ret != 0) {
816                         mlx5_txq_release(dev, i);
817                         goto error;
818                 }
819                 /* Accessing its own device, inside mlx5 PMD. */
820                 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
821                 if (ret != 0) {
822                         mlx5_txq_release(dev, i);
823                         goto error;
824                 }
825                 /* Pass TxQ's information to peer RxQ and try binding. */
826                 cur.peer_q = rx_queue;
827                 cur.qp_id = txq_ctrl->obj->sq->id;
828                 cur.vhca_id = priv->config.hca_attr.vhca_id;
829                 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
830                 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
831                 /*
832                  * In order to access another device in a proper way, RTE level
833                  * private function is needed.
834                  */
835                 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
836                                                       &cur, 0);
837                 if (ret != 0) {
838                         mlx5_txq_release(dev, i);
839                         goto error;
840                 }
841                 mlx5_txq_release(dev, i);
842         }
843         return 0;
844 error:
845         /*
846          * Do roll-back process for the queues already bound.
847          * No need to check the return value of the queue unbind function.
848          */
849         do {
850                 /* No validation is needed here. */
851                 txq_ctrl = mlx5_txq_get(dev, i);
852                 if (txq_ctrl == NULL)
853                         continue;
854                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
855                 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
856                 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
857                 mlx5_txq_release(dev, i);
858         } while (i--);
859         return ret;
860 }
861
862 /*
863  * Unbind the hairpin port pair, HW configuration of both devices will be clear
864  * and status will be reset for all the queues used between the them.
865  * This function only supports to unbind the Tx from one Rx.
866  *
867  * @param dev
868  *   Pointer to Ethernet device structure.
869  * @param rx_port
870  *   Port identifier of the Rx port.
871  *
872  * @return
873  *   0 on success, a negative errno value otherwise and rte_errno is set.
874  */
875 static int
876 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
877 {
878         struct mlx5_priv *priv = dev->data->dev_private;
879         struct mlx5_txq_ctrl *txq_ctrl;
880         uint32_t i;
881         int ret;
882         uint16_t cur_port = priv->dev_data->port_id;
883
884         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
885                 rte_errno = ENODEV;
886                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
887                 return -rte_errno;
888         }
889         for (i = 0; i != priv->txqs_n; i++) {
890                 uint16_t rx_queue;
891
892                 txq_ctrl = mlx5_txq_get(dev, i);
893                 if (txq_ctrl == NULL)
894                         continue;
895                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
896                         mlx5_txq_release(dev, i);
897                         continue;
898                 }
899                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
900                         mlx5_txq_release(dev, i);
901                         continue;
902                 }
903                 /* Indeed, only the first used queue needs to be checked. */
904                 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
905                         if (cur_port != rx_port) {
906                                 rte_errno = EINVAL;
907                                 DRV_LOG(ERR, "port %u and port %u are in"
908                                         " auto-bind mode", cur_port, rx_port);
909                                 mlx5_txq_release(dev, i);
910                                 return -rte_errno;
911                         } else {
912                                 return 0;
913                         }
914                 }
915                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
916                 mlx5_txq_release(dev, i);
917                 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
918                 if (ret) {
919                         DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
920                                 rx_port, rx_queue);
921                         return ret;
922                 }
923                 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
924                 if (ret) {
925                         DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
926                                 cur_port, i);
927                         return ret;
928                 }
929         }
930         return 0;
931 }
932
933 /*
934  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
935  * @see mlx5_hairpin_bind_single_port()
936  */
937 int
938 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
939 {
940         int ret = 0;
941         uint16_t p, pp;
942
943         /*
944          * If the Rx port has no hairpin configuration with the current port,
945          * the binding will be skipped in the called function of single port.
946          * Device started status will be checked only before the queue
947          * information updating.
948          */
949         if (rx_port == RTE_MAX_ETHPORTS) {
950                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
951                         ret = mlx5_hairpin_bind_single_port(dev, p);
952                         if (ret != 0)
953                                 goto unbind;
954                 }
955                 return ret;
956         } else {
957                 return mlx5_hairpin_bind_single_port(dev, rx_port);
958         }
959 unbind:
960         MLX5_ETH_FOREACH_DEV(pp, dev->device)
961                 if (pp < p)
962                         mlx5_hairpin_unbind_single_port(dev, pp);
963         return ret;
964 }
965
966 /*
967  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
968  * @see mlx5_hairpin_unbind_single_port()
969  */
970 int
971 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
972 {
973         int ret = 0;
974         uint16_t p;
975
976         if (rx_port == RTE_MAX_ETHPORTS)
977                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
978                         ret = mlx5_hairpin_unbind_single_port(dev, p);
979                         if (ret != 0)
980                                 return ret;
981                 }
982         else
983                 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
984         return ret;
985 }
986
987 /*
988  * DPDK callback to get the hairpin peer ports list.
989  * This will return the actual number of peer ports and save the identifiers
990  * into the array (sorted, may be different from that when setting up the
991  * hairpin peer queues).
992  * The peer port ID could be the same as the port ID of the current device.
993  *
994  * @param dev
995  *   Pointer to Ethernet device structure.
996  * @param peer_ports
997  *   Pointer to array to save the port identifiers.
998  * @param len
999  *   The length of the array.
1000  * @param direction
1001  *   Current port to peer port direction.
1002  *   positive - current used as Tx to get all peer Rx ports.
1003  *   zero - current used as Rx to get all peer Tx ports.
1004  *
1005  * @return
1006  *   0 or positive value on success, actual number of peer ports.
1007  *   a negative errno value otherwise and rte_errno is set.
1008  */
1009 int
1010 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1011                             size_t len, uint32_t direction)
1012 {
1013         struct mlx5_priv *priv = dev->data->dev_private;
1014         struct mlx5_txq_ctrl *txq_ctrl;
1015         struct mlx5_rxq_ctrl *rxq_ctrl;
1016         uint32_t i;
1017         uint16_t pp;
1018         uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1019         int ret = 0;
1020
1021         if (direction) {
1022                 for (i = 0; i < priv->txqs_n; i++) {
1023                         txq_ctrl = mlx5_txq_get(dev, i);
1024                         if (!txq_ctrl)
1025                                 continue;
1026                         if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
1027                                 mlx5_txq_release(dev, i);
1028                                 continue;
1029                         }
1030                         pp = txq_ctrl->hairpin_conf.peers[0].port;
1031                         if (pp >= RTE_MAX_ETHPORTS) {
1032                                 rte_errno = ERANGE;
1033                                 mlx5_txq_release(dev, i);
1034                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1035                                         "out of range %hu",
1036                                         priv->dev_data->port_id, i, pp);
1037                                 return -rte_errno;
1038                         }
1039                         bits[pp / 32] |= 1 << (pp % 32);
1040                         mlx5_txq_release(dev, i);
1041                 }
1042         } else {
1043                 for (i = 0; i < priv->rxqs_n; i++) {
1044                         rxq_ctrl = mlx5_rxq_get(dev, i);
1045                         if (!rxq_ctrl)
1046                                 continue;
1047                         if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
1048                                 mlx5_rxq_release(dev, i);
1049                                 continue;
1050                         }
1051                         pp = rxq_ctrl->hairpin_conf.peers[0].port;
1052                         if (pp >= RTE_MAX_ETHPORTS) {
1053                                 rte_errno = ERANGE;
1054                                 mlx5_rxq_release(dev, i);
1055                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1056                                         "out of range %hu",
1057                                         priv->dev_data->port_id, i, pp);
1058                                 return -rte_errno;
1059                         }
1060                         bits[pp / 32] |= 1 << (pp % 32);
1061                         mlx5_rxq_release(dev, i);
1062                 }
1063         }
1064         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1065                 if (bits[i / 32] & (1 << (i % 32))) {
1066                         if ((size_t)ret >= len) {
1067                                 rte_errno = E2BIG;
1068                                 return -rte_errno;
1069                         }
1070                         peer_ports[ret++] = i;
1071                 }
1072         }
1073         return ret;
1074 }
1075
1076 /**
1077  * DPDK callback to start the device.
1078  *
1079  * Simulate device start by attaching all configured flows.
1080  *
1081  * @param dev
1082  *   Pointer to Ethernet device structure.
1083  *
1084  * @return
1085  *   0 on success, a negative errno value otherwise and rte_errno is set.
1086  */
1087 int
1088 mlx5_dev_start(struct rte_eth_dev *dev)
1089 {
1090         struct mlx5_priv *priv = dev->data->dev_private;
1091         int ret;
1092         int fine_inline;
1093
1094         DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1095         fine_inline = rte_mbuf_dynflag_lookup
1096                 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1097         if (fine_inline >= 0)
1098                 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1099         else
1100                 rte_net_mlx5_dynf_inline_mask = 0;
1101         if (dev->data->nb_rx_queues > 0) {
1102                 ret = mlx5_dev_configure_rss_reta(dev);
1103                 if (ret) {
1104                         DRV_LOG(ERR, "port %u reta config failed: %s",
1105                                 dev->data->port_id, strerror(rte_errno));
1106                         return -rte_errno;
1107                 }
1108         }
1109         ret = mlx5_txpp_start(dev);
1110         if (ret) {
1111                 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1112                         dev->data->port_id, strerror(rte_errno));
1113                 goto error;
1114         }
1115         if ((priv->sh->devx && priv->config.dv_flow_en &&
1116             priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1117                 ret = priv->obj_ops.lb_dummy_queue_create(dev);
1118                 if (ret)
1119                         goto error;
1120         }
1121         ret = mlx5_txq_start(dev);
1122         if (ret) {
1123                 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1124                         dev->data->port_id, strerror(rte_errno));
1125                 goto error;
1126         }
1127         ret = mlx5_rxq_start(dev);
1128         if (ret) {
1129                 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1130                         dev->data->port_id, strerror(rte_errno));
1131                 goto error;
1132         }
1133         /*
1134          * Such step will be skipped if there is no hairpin TX queue configured
1135          * with RX peer queue from the same device.
1136          */
1137         ret = mlx5_hairpin_auto_bind(dev);
1138         if (ret) {
1139                 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1140                         dev->data->port_id, strerror(rte_errno));
1141                 goto error;
1142         }
1143         /* Set started flag here for the following steps like control flow. */
1144         dev->data->dev_started = 1;
1145         ret = mlx5_rx_intr_vec_enable(dev);
1146         if (ret) {
1147                 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1148                         dev->data->port_id);
1149                 goto error;
1150         }
1151         mlx5_os_stats_init(dev);
1152         ret = mlx5_traffic_enable(dev);
1153         if (ret) {
1154                 DRV_LOG(ERR, "port %u failed to set defaults flows",
1155                         dev->data->port_id);
1156                 goto error;
1157         }
1158         /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1159         mlx5_flow_rxq_dynf_metadata_set(dev);
1160         /* Set flags and context to convert Rx timestamps. */
1161         mlx5_rxq_timestamp_set(dev);
1162         /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1163         mlx5_txq_dynf_timestamp_set(dev);
1164         /*
1165          * In non-cached mode, it only needs to start the default mreg copy
1166          * action and no flow created by application exists anymore.
1167          * But it is worth wrapping the interface for further usage.
1168          */
1169         ret = mlx5_flow_start_default(dev);
1170         if (ret) {
1171                 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1172                         dev->data->port_id, strerror(rte_errno));
1173                 goto error;
1174         }
1175         if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1176                 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1177                         dev->data->port_id, rte_strerror(rte_errno));
1178                 goto error;
1179         }
1180         rte_wmb();
1181         dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1182         dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1183         /* Enable datapath on secondary process. */
1184         mlx5_mp_os_req_start_rxtx(dev);
1185         if (priv->sh->intr_handle.fd >= 0) {
1186                 priv->sh->port[priv->dev_port - 1].ih_port_id =
1187                                         (uint32_t)dev->data->port_id;
1188         } else {
1189                 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1190                         dev->data->port_id);
1191                 dev->data->dev_conf.intr_conf.lsc = 0;
1192                 dev->data->dev_conf.intr_conf.rmv = 0;
1193         }
1194         if (priv->sh->intr_handle_devx.fd >= 0)
1195                 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1196                                         (uint32_t)dev->data->port_id;
1197         return 0;
1198 error:
1199         ret = rte_errno; /* Save rte_errno before cleanup. */
1200         /* Rollback. */
1201         dev->data->dev_started = 0;
1202         mlx5_flow_stop_default(dev);
1203         mlx5_traffic_disable(dev);
1204         mlx5_txq_stop(dev);
1205         mlx5_rxq_stop(dev);
1206         if (priv->obj_ops.lb_dummy_queue_release)
1207                 priv->obj_ops.lb_dummy_queue_release(dev);
1208         mlx5_txpp_stop(dev); /* Stop last. */
1209         rte_errno = ret; /* Restore rte_errno. */
1210         return -rte_errno;
1211 }
1212
1213 /**
1214  * DPDK callback to stop the device.
1215  *
1216  * Simulate device stop by detaching all configured flows.
1217  *
1218  * @param dev
1219  *   Pointer to Ethernet device structure.
1220  */
1221 int
1222 mlx5_dev_stop(struct rte_eth_dev *dev)
1223 {
1224         struct mlx5_priv *priv = dev->data->dev_private;
1225
1226         dev->data->dev_started = 0;
1227         /* Prevent crashes when queues are still in use. */
1228         dev->rx_pkt_burst = removed_rx_burst;
1229         dev->tx_pkt_burst = removed_tx_burst;
1230         rte_wmb();
1231         /* Disable datapath on secondary process. */
1232         mlx5_mp_os_req_stop_rxtx(dev);
1233         rte_delay_us_sleep(1000 * priv->rxqs_n);
1234         DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1235         mlx5_flow_stop_default(dev);
1236         /* Control flows for default traffic can be removed firstly. */
1237         mlx5_traffic_disable(dev);
1238         /* All RX queue flags will be cleared in the flush interface. */
1239         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1240         mlx5_flow_meter_rxq_flush(dev);
1241         mlx5_rx_intr_vec_disable(dev);
1242         priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1243         priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1244         mlx5_txq_stop(dev);
1245         mlx5_rxq_stop(dev);
1246         if (priv->obj_ops.lb_dummy_queue_release)
1247                 priv->obj_ops.lb_dummy_queue_release(dev);
1248         mlx5_txpp_stop(dev);
1249
1250         return 0;
1251 }
1252
1253 /**
1254  * Enable traffic flows configured by control plane
1255  *
1256  * @param dev
1257  *   Pointer to Ethernet device private data.
1258  * @param dev
1259  *   Pointer to Ethernet device structure.
1260  *
1261  * @return
1262  *   0 on success, a negative errno value otherwise and rte_errno is set.
1263  */
1264 int
1265 mlx5_traffic_enable(struct rte_eth_dev *dev)
1266 {
1267         struct mlx5_priv *priv = dev->data->dev_private;
1268         struct rte_flow_item_eth bcast = {
1269                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1270         };
1271         struct rte_flow_item_eth ipv6_multi_spec = {
1272                 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1273         };
1274         struct rte_flow_item_eth ipv6_multi_mask = {
1275                 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1276         };
1277         struct rte_flow_item_eth unicast = {
1278                 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1279         };
1280         struct rte_flow_item_eth unicast_mask = {
1281                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1282         };
1283         const unsigned int vlan_filter_n = priv->vlan_filter_n;
1284         const struct rte_ether_addr cmp = {
1285                 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1286         };
1287         unsigned int i;
1288         unsigned int j;
1289         int ret;
1290
1291         /*
1292          * Hairpin txq default flow should be created no matter if it is
1293          * isolation mode. Or else all the packets to be sent will be sent
1294          * out directly without the TX flow actions, e.g. encapsulation.
1295          */
1296         for (i = 0; i != priv->txqs_n; ++i) {
1297                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1298                 if (!txq_ctrl)
1299                         continue;
1300                 /* Only Tx implicit mode requires the default Tx flow. */
1301                 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1302                     txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1303                     txq_ctrl->hairpin_conf.peers[0].port ==
1304                     priv->dev_data->port_id) {
1305                         ret = mlx5_ctrl_flow_source_queue(dev, i);
1306                         if (ret) {
1307                                 mlx5_txq_release(dev, i);
1308                                 goto error;
1309                         }
1310                 }
1311                 if ((priv->representor || priv->master) &&
1312                     priv->config.dv_esw_en) {
1313                         if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) {
1314                                 DRV_LOG(ERR,
1315                                         "Port %u Tx queue %u SQ create representor devx default miss rule failed.",
1316                                         dev->data->port_id, i);
1317                                 goto error;
1318                         }
1319                 }
1320                 mlx5_txq_release(dev, i);
1321         }
1322         if ((priv->master || priv->representor) && priv->config.dv_esw_en) {
1323                 if (mlx5_flow_create_esw_table_zero_flow(dev))
1324                         priv->fdb_def_rule = 1;
1325                 else
1326                         DRV_LOG(INFO, "port %u FDB default rule cannot be"
1327                                 " configured - only Eswitch group 0 flows are"
1328                                 " supported.", dev->data->port_id);
1329         }
1330         if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1331                 ret = mlx5_flow_lacp_miss(dev);
1332                 if (ret)
1333                         DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1334                                 "forward LACP to kernel.", dev->data->port_id);
1335                 else
1336                         DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1337                                 , dev->data->port_id);
1338         }
1339         if (priv->isolated)
1340                 return 0;
1341         if (dev->data->promiscuous) {
1342                 struct rte_flow_item_eth promisc = {
1343                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1344                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1345                         .type = 0,
1346                 };
1347
1348                 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1349                 if (ret)
1350                         goto error;
1351         }
1352         if (dev->data->all_multicast) {
1353                 struct rte_flow_item_eth multicast = {
1354                         .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1355                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1356                         .type = 0,
1357                 };
1358
1359                 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1360                 if (ret)
1361                         goto error;
1362         } else {
1363                 /* Add broadcast/multicast flows. */
1364                 for (i = 0; i != vlan_filter_n; ++i) {
1365                         uint16_t vlan = priv->vlan_filter[i];
1366
1367                         struct rte_flow_item_vlan vlan_spec = {
1368                                 .tci = rte_cpu_to_be_16(vlan),
1369                         };
1370                         struct rte_flow_item_vlan vlan_mask =
1371                                 rte_flow_item_vlan_mask;
1372
1373                         ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1374                                                   &vlan_spec, &vlan_mask);
1375                         if (ret)
1376                                 goto error;
1377                         ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1378                                                   &ipv6_multi_mask,
1379                                                   &vlan_spec, &vlan_mask);
1380                         if (ret)
1381                                 goto error;
1382                 }
1383                 if (!vlan_filter_n) {
1384                         ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1385                         if (ret)
1386                                 goto error;
1387                         ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1388                                              &ipv6_multi_mask);
1389                         if (ret) {
1390                                 /* Do not fail on IPv6 broadcast creation failure. */
1391                                 DRV_LOG(WARNING,
1392                                         "IPv6 broadcast is not supported");
1393                                 ret = 0;
1394                         }
1395                 }
1396         }
1397         /* Add MAC address flows. */
1398         for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1399                 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1400
1401                 if (!memcmp(mac, &cmp, sizeof(*mac)))
1402                         continue;
1403                 memcpy(&unicast.dst.addr_bytes,
1404                        mac->addr_bytes,
1405                        RTE_ETHER_ADDR_LEN);
1406                 for (j = 0; j != vlan_filter_n; ++j) {
1407                         uint16_t vlan = priv->vlan_filter[j];
1408
1409                         struct rte_flow_item_vlan vlan_spec = {
1410                                 .tci = rte_cpu_to_be_16(vlan),
1411                         };
1412                         struct rte_flow_item_vlan vlan_mask =
1413                                 rte_flow_item_vlan_mask;
1414
1415                         ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1416                                                   &unicast_mask,
1417                                                   &vlan_spec,
1418                                                   &vlan_mask);
1419                         if (ret)
1420                                 goto error;
1421                 }
1422                 if (!vlan_filter_n) {
1423                         ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1424                         if (ret)
1425                                 goto error;
1426                 }
1427         }
1428         return 0;
1429 error:
1430         ret = rte_errno; /* Save rte_errno before cleanup. */
1431         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1432         rte_errno = ret; /* Restore rte_errno. */
1433         return -rte_errno;
1434 }
1435
1436
1437 /**
1438  * Disable traffic flows configured by control plane
1439  *
1440  * @param dev
1441  *   Pointer to Ethernet device private data.
1442  */
1443 void
1444 mlx5_traffic_disable(struct rte_eth_dev *dev)
1445 {
1446         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1447 }
1448
1449 /**
1450  * Restart traffic flows configured by control plane
1451  *
1452  * @param dev
1453  *   Pointer to Ethernet device private data.
1454  *
1455  * @return
1456  *   0 on success, a negative errno value otherwise and rte_errno is set.
1457  */
1458 int
1459 mlx5_traffic_restart(struct rte_eth_dev *dev)
1460 {
1461         if (dev->data->dev_started) {
1462                 mlx5_traffic_disable(dev);
1463                 return mlx5_traffic_enable(dev);
1464         }
1465         return 0;
1466 }