common/mlx5: share protection domain object
[dpdk.git] / drivers / net / mlx5 / mlx5_trigger.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5
6 #include <unistd.h>
7
8 #include <rte_ether.h>
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13
14 #include <mlx5_malloc.h>
15
16 #include "mlx5.h"
17 #include "mlx5_mr.h"
18 #include "mlx5_rx.h"
19 #include "mlx5_tx.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
22
23 /**
24  * Stop traffic on Tx queues.
25  *
26  * @param dev
27  *   Pointer to Ethernet device structure.
28  */
29 static void
30 mlx5_txq_stop(struct rte_eth_dev *dev)
31 {
32         struct mlx5_priv *priv = dev->data->dev_private;
33         unsigned int i;
34
35         for (i = 0; i != priv->txqs_n; ++i)
36                 mlx5_txq_release(dev, i);
37 }
38
39 /**
40  * Start traffic on Tx queues.
41  *
42  * @param dev
43  *   Pointer to Ethernet device structure.
44  *
45  * @return
46  *   0 on success, a negative errno value otherwise and rte_errno is set.
47  */
48 static int
49 mlx5_txq_start(struct rte_eth_dev *dev)
50 {
51         struct mlx5_priv *priv = dev->data->dev_private;
52         unsigned int i;
53         int ret;
54
55         for (i = 0; i != priv->txqs_n; ++i) {
56                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57                 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58                 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
59
60                 if (!txq_ctrl)
61                         continue;
62                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
63                         txq_alloc_elts(txq_ctrl);
64                 MLX5_ASSERT(!txq_ctrl->obj);
65                 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66                                             0, txq_ctrl->socket);
67                 if (!txq_ctrl->obj) {
68                         DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69                                 "memory resources.", dev->data->port_id,
70                                 txq_data->idx);
71                         rte_errno = ENOMEM;
72                         goto error;
73                 }
74                 ret = priv->obj_ops.txq_obj_new(dev, i);
75                 if (ret < 0) {
76                         mlx5_free(txq_ctrl->obj);
77                         txq_ctrl->obj = NULL;
78                         goto error;
79                 }
80                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
81                         size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
82
83                         txq_data->fcqs = mlx5_malloc(flags, size,
84                                                      RTE_CACHE_LINE_SIZE,
85                                                      txq_ctrl->socket);
86                         if (!txq_data->fcqs) {
87                                 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88                                         "allocate memory (FCQ).",
89                                         dev->data->port_id, i);
90                                 rte_errno = ENOMEM;
91                                 goto error;
92                         }
93                 }
94                 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95                         dev->data->port_id, i, (void *)&txq_ctrl->obj);
96                 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97         }
98         return 0;
99 error:
100         ret = rte_errno; /* Save rte_errno before cleanup. */
101         do {
102                 mlx5_txq_release(dev, i);
103         } while (i-- != 0);
104         rte_errno = ret; /* Restore rte_errno. */
105         return -rte_errno;
106 }
107
108 /**
109  * Translate the chunk address to MR key in order to put in into the cache.
110  */
111 static void
112 mlx5_rxq_mempool_register_cb(struct rte_mempool *mp, void *opaque,
113                              struct rte_mempool_memhdr *memhdr,
114                              unsigned int idx)
115 {
116         struct mlx5_rxq_data *rxq = opaque;
117
118         RTE_SET_USED(mp);
119         RTE_SET_USED(idx);
120         mlx5_rx_addr2mr(rxq, (uintptr_t)memhdr->addr);
121 }
122
123 /**
124  * Register Rx queue mempools and fill the Rx queue cache.
125  * This function tolerates repeated mempool registration.
126  *
127  * @param[in] rxq_ctrl
128  *   Rx queue control data.
129  *
130  * @return
131  *   0 on success, (-1) on failure and rte_errno is set.
132  */
133 static int
134 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
135 {
136         struct mlx5_priv *priv = rxq_ctrl->priv;
137         struct rte_mempool *mp;
138         uint32_t s;
139         int ret = 0;
140
141         mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
142         /* MPRQ mempool is registered on creation, just fill the cache. */
143         if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
144                 rte_mempool_mem_iter(rxq_ctrl->rxq.mprq_mp,
145                                      mlx5_rxq_mempool_register_cb,
146                                      &rxq_ctrl->rxq);
147                 return 0;
148         }
149         for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
150                 mp = rxq_ctrl->rxq.rxseg[s].mp;
151                 ret = mlx5_mr_mempool_register(&priv->sh->share_cache,
152                                                priv->sh->cdev->pd, mp,
153                                                &priv->mp_id);
154                 if (ret < 0 && rte_errno != EEXIST)
155                         return ret;
156                 rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb,
157                                      &rxq_ctrl->rxq);
158         }
159         return 0;
160 }
161
162 /**
163  * Stop traffic on Rx queues.
164  *
165  * @param dev
166  *   Pointer to Ethernet device structure.
167  */
168 static void
169 mlx5_rxq_stop(struct rte_eth_dev *dev)
170 {
171         struct mlx5_priv *priv = dev->data->dev_private;
172         unsigned int i;
173
174         for (i = 0; i != priv->rxqs_n; ++i)
175                 mlx5_rxq_release(dev, i);
176 }
177
178 /**
179  * Start traffic on Rx queues.
180  *
181  * @param dev
182  *   Pointer to Ethernet device structure.
183  *
184  * @return
185  *   0 on success, a negative errno value otherwise and rte_errno is set.
186  */
187 static int
188 mlx5_rxq_start(struct rte_eth_dev *dev)
189 {
190         struct mlx5_priv *priv = dev->data->dev_private;
191         unsigned int i;
192         int ret = 0;
193
194         /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
195         if (mlx5_mprq_alloc_mp(dev)) {
196                 /* Should not release Rx queues but return immediately. */
197                 return -rte_errno;
198         }
199         DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
200                 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
201         DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
202                 dev->data->port_id, priv->sh->device_attr.max_sge);
203         for (i = 0; i != priv->rxqs_n; ++i) {
204                 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
205
206                 if (!rxq_ctrl)
207                         continue;
208                 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
209                         /*
210                          * Pre-register the mempools. Regardless of whether
211                          * the implicit registration is enabled or not,
212                          * Rx mempool destruction is tracked to free MRs.
213                          */
214                         if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
215                                 goto error;
216                         ret = rxq_alloc_elts(rxq_ctrl);
217                         if (ret)
218                                 goto error;
219                 }
220                 MLX5_ASSERT(!rxq_ctrl->obj);
221                 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
222                                             sizeof(*rxq_ctrl->obj), 0,
223                                             rxq_ctrl->socket);
224                 if (!rxq_ctrl->obj) {
225                         DRV_LOG(ERR,
226                                 "Port %u Rx queue %u can't allocate resources.",
227                                 dev->data->port_id, (*priv->rxqs)[i]->idx);
228                         rte_errno = ENOMEM;
229                         goto error;
230                 }
231                 ret = priv->obj_ops.rxq_obj_new(dev, i);
232                 if (ret) {
233                         mlx5_free(rxq_ctrl->obj);
234                         goto error;
235                 }
236                 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
237                         dev->data->port_id, i, (void *)&rxq_ctrl->obj);
238                 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
239         }
240         return 0;
241 error:
242         ret = rte_errno; /* Save rte_errno before cleanup. */
243         do {
244                 mlx5_rxq_release(dev, i);
245         } while (i-- != 0);
246         rte_errno = ret; /* Restore rte_errno. */
247         return -rte_errno;
248 }
249
250 /**
251  * Binds Tx queues to Rx queues for hairpin.
252  *
253  * Binds Tx queues to the target Rx queues.
254  *
255  * @param dev
256  *   Pointer to Ethernet device structure.
257  *
258  * @return
259  *   0 on success, a negative errno value otherwise and rte_errno is set.
260  */
261 static int
262 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
263 {
264         struct mlx5_priv *priv = dev->data->dev_private;
265         struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
266         struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
267         struct mlx5_txq_ctrl *txq_ctrl;
268         struct mlx5_rxq_ctrl *rxq_ctrl;
269         struct mlx5_devx_obj *sq;
270         struct mlx5_devx_obj *rq;
271         unsigned int i;
272         int ret = 0;
273         bool need_auto = false;
274         uint16_t self_port = dev->data->port_id;
275
276         for (i = 0; i != priv->txqs_n; ++i) {
277                 txq_ctrl = mlx5_txq_get(dev, i);
278                 if (!txq_ctrl)
279                         continue;
280                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
281                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
282                         mlx5_txq_release(dev, i);
283                         continue;
284                 }
285                 if (txq_ctrl->hairpin_conf.manual_bind) {
286                         mlx5_txq_release(dev, i);
287                         return 0;
288                 }
289                 need_auto = true;
290                 mlx5_txq_release(dev, i);
291         }
292         if (!need_auto)
293                 return 0;
294         for (i = 0; i != priv->txqs_n; ++i) {
295                 txq_ctrl = mlx5_txq_get(dev, i);
296                 if (!txq_ctrl)
297                         continue;
298                 /* Skip hairpin queues with other peer ports. */
299                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
300                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
301                         mlx5_txq_release(dev, i);
302                         continue;
303                 }
304                 if (!txq_ctrl->obj) {
305                         rte_errno = ENOMEM;
306                         DRV_LOG(ERR, "port %u no txq object found: %d",
307                                 dev->data->port_id, i);
308                         mlx5_txq_release(dev, i);
309                         return -rte_errno;
310                 }
311                 sq = txq_ctrl->obj->sq;
312                 rxq_ctrl = mlx5_rxq_get(dev,
313                                         txq_ctrl->hairpin_conf.peers[0].queue);
314                 if (!rxq_ctrl) {
315                         mlx5_txq_release(dev, i);
316                         rte_errno = EINVAL;
317                         DRV_LOG(ERR, "port %u no rxq object found: %d",
318                                 dev->data->port_id,
319                                 txq_ctrl->hairpin_conf.peers[0].queue);
320                         return -rte_errno;
321                 }
322                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
323                     rxq_ctrl->hairpin_conf.peers[0].queue != i) {
324                         rte_errno = ENOMEM;
325                         DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
326                                 "Rx queue %d", dev->data->port_id,
327                                 i, txq_ctrl->hairpin_conf.peers[0].queue);
328                         goto error;
329                 }
330                 rq = rxq_ctrl->obj->rq;
331                 if (!rq) {
332                         rte_errno = ENOMEM;
333                         DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
334                                 dev->data->port_id,
335                                 txq_ctrl->hairpin_conf.peers[0].queue);
336                         goto error;
337                 }
338                 sq_attr.state = MLX5_SQC_STATE_RDY;
339                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
340                 sq_attr.hairpin_peer_rq = rq->id;
341                 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
342                 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
343                 if (ret)
344                         goto error;
345                 rq_attr.state = MLX5_SQC_STATE_RDY;
346                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
347                 rq_attr.hairpin_peer_sq = sq->id;
348                 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
349                 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
350                 if (ret)
351                         goto error;
352                 /* Qs with auto-bind will be destroyed directly. */
353                 rxq_ctrl->hairpin_status = 1;
354                 txq_ctrl->hairpin_status = 1;
355                 mlx5_txq_release(dev, i);
356                 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
357         }
358         return 0;
359 error:
360         mlx5_txq_release(dev, i);
361         mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
362         return -rte_errno;
363 }
364
365 /*
366  * Fetch the peer queue's SW & HW information.
367  *
368  * @param dev
369  *   Pointer to Ethernet device structure.
370  * @param peer_queue
371  *   Index of the queue to fetch the information.
372  * @param current_info
373  *   Pointer to the input peer information, not used currently.
374  * @param peer_info
375  *   Pointer to the structure to store the information, output.
376  * @param direction
377  *   Positive to get the RxQ information, zero to get the TxQ information.
378  *
379  * @return
380  *   0 on success, a negative errno value otherwise and rte_errno is set.
381  */
382 int
383 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
384                                struct rte_hairpin_peer_info *current_info,
385                                struct rte_hairpin_peer_info *peer_info,
386                                uint32_t direction)
387 {
388         struct mlx5_priv *priv = dev->data->dev_private;
389         RTE_SET_USED(current_info);
390
391         if (dev->data->dev_started == 0) {
392                 rte_errno = EBUSY;
393                 DRV_LOG(ERR, "peer port %u is not started",
394                         dev->data->port_id);
395                 return -rte_errno;
396         }
397         /*
398          * Peer port used as egress. In the current design, hairpin Tx queue
399          * will be bound to the peer Rx queue. Indeed, only the information of
400          * peer Rx queue needs to be fetched.
401          */
402         if (direction == 0) {
403                 struct mlx5_txq_ctrl *txq_ctrl;
404
405                 txq_ctrl = mlx5_txq_get(dev, peer_queue);
406                 if (txq_ctrl == NULL) {
407                         rte_errno = EINVAL;
408                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
409                                 dev->data->port_id, peer_queue);
410                         return -rte_errno;
411                 }
412                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
413                         rte_errno = EINVAL;
414                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
415                                 dev->data->port_id, peer_queue);
416                         mlx5_txq_release(dev, peer_queue);
417                         return -rte_errno;
418                 }
419                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
420                         rte_errno = ENOMEM;
421                         DRV_LOG(ERR, "port %u no Txq object found: %d",
422                                 dev->data->port_id, peer_queue);
423                         mlx5_txq_release(dev, peer_queue);
424                         return -rte_errno;
425                 }
426                 peer_info->qp_id = txq_ctrl->obj->sq->id;
427                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
428                 /* 1-to-1 mapping, only the first one is used. */
429                 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
430                 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
431                 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
432                 mlx5_txq_release(dev, peer_queue);
433         } else { /* Peer port used as ingress. */
434                 struct mlx5_rxq_ctrl *rxq_ctrl;
435
436                 rxq_ctrl = mlx5_rxq_get(dev, peer_queue);
437                 if (rxq_ctrl == NULL) {
438                         rte_errno = EINVAL;
439                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
440                                 dev->data->port_id, peer_queue);
441                         return -rte_errno;
442                 }
443                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
444                         rte_errno = EINVAL;
445                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
446                                 dev->data->port_id, peer_queue);
447                         mlx5_rxq_release(dev, peer_queue);
448                         return -rte_errno;
449                 }
450                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
451                         rte_errno = ENOMEM;
452                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
453                                 dev->data->port_id, peer_queue);
454                         mlx5_rxq_release(dev, peer_queue);
455                         return -rte_errno;
456                 }
457                 peer_info->qp_id = rxq_ctrl->obj->rq->id;
458                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
459                 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
460                 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
461                 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
462                 mlx5_rxq_release(dev, peer_queue);
463         }
464         return 0;
465 }
466
467 /*
468  * Bind the hairpin queue with the peer HW information.
469  * This needs to be called twice both for Tx and Rx queues of a pair.
470  * If the queue is already bound, it is considered successful.
471  *
472  * @param dev
473  *   Pointer to Ethernet device structure.
474  * @param cur_queue
475  *   Index of the queue to change the HW configuration to bind.
476  * @param peer_info
477  *   Pointer to information of the peer queue.
478  * @param direction
479  *   Positive to configure the TxQ, zero to configure the RxQ.
480  *
481  * @return
482  *   0 on success, a negative errno value otherwise and rte_errno is set.
483  */
484 int
485 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
486                              struct rte_hairpin_peer_info *peer_info,
487                              uint32_t direction)
488 {
489         int ret = 0;
490
491         /*
492          * Consistency checking of the peer queue: opposite direction is used
493          * to get the peer queue info with ethdev port ID, no need to check.
494          */
495         if (peer_info->peer_q != cur_queue) {
496                 rte_errno = EINVAL;
497                 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
498                         dev->data->port_id, cur_queue, peer_info->peer_q);
499                 return -rte_errno;
500         }
501         if (direction != 0) {
502                 struct mlx5_txq_ctrl *txq_ctrl;
503                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
504
505                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
506                 if (txq_ctrl == NULL) {
507                         rte_errno = EINVAL;
508                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
509                                 dev->data->port_id, cur_queue);
510                         return -rte_errno;
511                 }
512                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
513                         rte_errno = EINVAL;
514                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
515                                 dev->data->port_id, cur_queue);
516                         mlx5_txq_release(dev, cur_queue);
517                         return -rte_errno;
518                 }
519                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
520                         rte_errno = ENOMEM;
521                         DRV_LOG(ERR, "port %u no Txq object found: %d",
522                                 dev->data->port_id, cur_queue);
523                         mlx5_txq_release(dev, cur_queue);
524                         return -rte_errno;
525                 }
526                 if (txq_ctrl->hairpin_status != 0) {
527                         DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
528                                 dev->data->port_id, cur_queue);
529                         mlx5_txq_release(dev, cur_queue);
530                         return 0;
531                 }
532                 /*
533                  * All queues' of one port consistency checking is done in the
534                  * bind() function, and that is optional.
535                  */
536                 if (peer_info->tx_explicit !=
537                     txq_ctrl->hairpin_conf.tx_explicit) {
538                         rte_errno = EINVAL;
539                         DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
540                                 " mismatch", dev->data->port_id, cur_queue);
541                         mlx5_txq_release(dev, cur_queue);
542                         return -rte_errno;
543                 }
544                 if (peer_info->manual_bind !=
545                     txq_ctrl->hairpin_conf.manual_bind) {
546                         rte_errno = EINVAL;
547                         DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
548                                 " mismatch", dev->data->port_id, cur_queue);
549                         mlx5_txq_release(dev, cur_queue);
550                         return -rte_errno;
551                 }
552                 sq_attr.state = MLX5_SQC_STATE_RDY;
553                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
554                 sq_attr.hairpin_peer_rq = peer_info->qp_id;
555                 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
556                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
557                 if (ret == 0)
558                         txq_ctrl->hairpin_status = 1;
559                 mlx5_txq_release(dev, cur_queue);
560         } else {
561                 struct mlx5_rxq_ctrl *rxq_ctrl;
562                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
563
564                 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
565                 if (rxq_ctrl == NULL) {
566                         rte_errno = EINVAL;
567                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
568                                 dev->data->port_id, cur_queue);
569                         return -rte_errno;
570                 }
571                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
572                         rte_errno = EINVAL;
573                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
574                                 dev->data->port_id, cur_queue);
575                         mlx5_rxq_release(dev, cur_queue);
576                         return -rte_errno;
577                 }
578                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
579                         rte_errno = ENOMEM;
580                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
581                                 dev->data->port_id, cur_queue);
582                         mlx5_rxq_release(dev, cur_queue);
583                         return -rte_errno;
584                 }
585                 if (rxq_ctrl->hairpin_status != 0) {
586                         DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
587                                 dev->data->port_id, cur_queue);
588                         mlx5_rxq_release(dev, cur_queue);
589                         return 0;
590                 }
591                 if (peer_info->tx_explicit !=
592                     rxq_ctrl->hairpin_conf.tx_explicit) {
593                         rte_errno = EINVAL;
594                         DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
595                                 " mismatch", dev->data->port_id, cur_queue);
596                         mlx5_rxq_release(dev, cur_queue);
597                         return -rte_errno;
598                 }
599                 if (peer_info->manual_bind !=
600                     rxq_ctrl->hairpin_conf.manual_bind) {
601                         rte_errno = EINVAL;
602                         DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
603                                 " mismatch", dev->data->port_id, cur_queue);
604                         mlx5_rxq_release(dev, cur_queue);
605                         return -rte_errno;
606                 }
607                 rq_attr.state = MLX5_SQC_STATE_RDY;
608                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
609                 rq_attr.hairpin_peer_sq = peer_info->qp_id;
610                 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
611                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
612                 if (ret == 0)
613                         rxq_ctrl->hairpin_status = 1;
614                 mlx5_rxq_release(dev, cur_queue);
615         }
616         return ret;
617 }
618
619 /*
620  * Unbind the hairpin queue and reset its HW configuration.
621  * This needs to be called twice both for Tx and Rx queues of a pair.
622  * If the queue is already unbound, it is considered successful.
623  *
624  * @param dev
625  *   Pointer to Ethernet device structure.
626  * @param cur_queue
627  *   Index of the queue to change the HW configuration to unbind.
628  * @param direction
629  *   Positive to reset the TxQ, zero to reset the RxQ.
630  *
631  * @return
632  *   0 on success, a negative errno value otherwise and rte_errno is set.
633  */
634 int
635 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
636                                uint32_t direction)
637 {
638         int ret = 0;
639
640         if (direction != 0) {
641                 struct mlx5_txq_ctrl *txq_ctrl;
642                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
643
644                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
645                 if (txq_ctrl == NULL) {
646                         rte_errno = EINVAL;
647                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
648                                 dev->data->port_id, cur_queue);
649                         return -rte_errno;
650                 }
651                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
652                         rte_errno = EINVAL;
653                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
654                                 dev->data->port_id, cur_queue);
655                         mlx5_txq_release(dev, cur_queue);
656                         return -rte_errno;
657                 }
658                 /* Already unbound, return success before obj checking. */
659                 if (txq_ctrl->hairpin_status == 0) {
660                         DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
661                                 dev->data->port_id, cur_queue);
662                         mlx5_txq_release(dev, cur_queue);
663                         return 0;
664                 }
665                 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
666                         rte_errno = ENOMEM;
667                         DRV_LOG(ERR, "port %u no Txq object found: %d",
668                                 dev->data->port_id, cur_queue);
669                         mlx5_txq_release(dev, cur_queue);
670                         return -rte_errno;
671                 }
672                 sq_attr.state = MLX5_SQC_STATE_RST;
673                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
674                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
675                 if (ret == 0)
676                         txq_ctrl->hairpin_status = 0;
677                 mlx5_txq_release(dev, cur_queue);
678         } else {
679                 struct mlx5_rxq_ctrl *rxq_ctrl;
680                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
681
682                 rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
683                 if (rxq_ctrl == NULL) {
684                         rte_errno = EINVAL;
685                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
686                                 dev->data->port_id, cur_queue);
687                         return -rte_errno;
688                 }
689                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
690                         rte_errno = EINVAL;
691                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
692                                 dev->data->port_id, cur_queue);
693                         mlx5_rxq_release(dev, cur_queue);
694                         return -rte_errno;
695                 }
696                 if (rxq_ctrl->hairpin_status == 0) {
697                         DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
698                                 dev->data->port_id, cur_queue);
699                         mlx5_rxq_release(dev, cur_queue);
700                         return 0;
701                 }
702                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
703                         rte_errno = ENOMEM;
704                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
705                                 dev->data->port_id, cur_queue);
706                         mlx5_rxq_release(dev, cur_queue);
707                         return -rte_errno;
708                 }
709                 rq_attr.state = MLX5_SQC_STATE_RST;
710                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
711                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
712                 if (ret == 0)
713                         rxq_ctrl->hairpin_status = 0;
714                 mlx5_rxq_release(dev, cur_queue);
715         }
716         return ret;
717 }
718
719 /*
720  * Bind the hairpin port pairs, from the Tx to the peer Rx.
721  * This function only supports to bind the Tx to one Rx.
722  *
723  * @param dev
724  *   Pointer to Ethernet device structure.
725  * @param rx_port
726  *   Port identifier of the Rx port.
727  *
728  * @return
729  *   0 on success, a negative errno value otherwise and rte_errno is set.
730  */
731 static int
732 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
733 {
734         struct mlx5_priv *priv = dev->data->dev_private;
735         int ret = 0;
736         struct mlx5_txq_ctrl *txq_ctrl;
737         uint32_t i;
738         struct rte_hairpin_peer_info peer = {0xffffff};
739         struct rte_hairpin_peer_info cur;
740         const struct rte_eth_hairpin_conf *conf;
741         uint16_t num_q = 0;
742         uint16_t local_port = priv->dev_data->port_id;
743         uint32_t manual;
744         uint32_t explicit;
745         uint16_t rx_queue;
746
747         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
748                 rte_errno = ENODEV;
749                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
750                 return -rte_errno;
751         }
752         /*
753          * Before binding TxQ to peer RxQ, first round loop will be used for
754          * checking the queues' configuration consistency. This would be a
755          * little time consuming but better than doing the rollback.
756          */
757         for (i = 0; i != priv->txqs_n; i++) {
758                 txq_ctrl = mlx5_txq_get(dev, i);
759                 if (txq_ctrl == NULL)
760                         continue;
761                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
762                         mlx5_txq_release(dev, i);
763                         continue;
764                 }
765                 /*
766                  * All hairpin Tx queues of a single port that connected to the
767                  * same peer Rx port should have the same "auto binding" and
768                  * "implicit Tx flow" modes.
769                  * Peer consistency checking will be done in per queue binding.
770                  */
771                 conf = &txq_ctrl->hairpin_conf;
772                 if (conf->peers[0].port == rx_port) {
773                         if (num_q == 0) {
774                                 manual = conf->manual_bind;
775                                 explicit = conf->tx_explicit;
776                         } else {
777                                 if (manual != conf->manual_bind ||
778                                     explicit != conf->tx_explicit) {
779                                         rte_errno = EINVAL;
780                                         DRV_LOG(ERR, "port %u queue %d mode"
781                                                 " mismatch: %u %u, %u %u",
782                                                 local_port, i, manual,
783                                                 conf->manual_bind, explicit,
784                                                 conf->tx_explicit);
785                                         mlx5_txq_release(dev, i);
786                                         return -rte_errno;
787                                 }
788                         }
789                         num_q++;
790                 }
791                 mlx5_txq_release(dev, i);
792         }
793         /* Once no queue is configured, success is returned directly. */
794         if (num_q == 0)
795                 return ret;
796         /* All the hairpin TX queues need to be traversed again. */
797         for (i = 0; i != priv->txqs_n; i++) {
798                 txq_ctrl = mlx5_txq_get(dev, i);
799                 if (txq_ctrl == NULL)
800                         continue;
801                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
802                         mlx5_txq_release(dev, i);
803                         continue;
804                 }
805                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
806                         mlx5_txq_release(dev, i);
807                         continue;
808                 }
809                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
810                 /*
811                  * Fetch peer RxQ's information.
812                  * No need to pass the information of the current queue.
813                  */
814                 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
815                                                         NULL, &peer, 1);
816                 if (ret != 0) {
817                         mlx5_txq_release(dev, i);
818                         goto error;
819                 }
820                 /* Accessing its own device, inside mlx5 PMD. */
821                 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
822                 if (ret != 0) {
823                         mlx5_txq_release(dev, i);
824                         goto error;
825                 }
826                 /* Pass TxQ's information to peer RxQ and try binding. */
827                 cur.peer_q = rx_queue;
828                 cur.qp_id = txq_ctrl->obj->sq->id;
829                 cur.vhca_id = priv->config.hca_attr.vhca_id;
830                 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
831                 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
832                 /*
833                  * In order to access another device in a proper way, RTE level
834                  * private function is needed.
835                  */
836                 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
837                                                       &cur, 0);
838                 if (ret != 0) {
839                         mlx5_txq_release(dev, i);
840                         goto error;
841                 }
842                 mlx5_txq_release(dev, i);
843         }
844         return 0;
845 error:
846         /*
847          * Do roll-back process for the queues already bound.
848          * No need to check the return value of the queue unbind function.
849          */
850         do {
851                 /* No validation is needed here. */
852                 txq_ctrl = mlx5_txq_get(dev, i);
853                 if (txq_ctrl == NULL)
854                         continue;
855                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
856                 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
857                 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
858                 mlx5_txq_release(dev, i);
859         } while (i--);
860         return ret;
861 }
862
863 /*
864  * Unbind the hairpin port pair, HW configuration of both devices will be clear
865  * and status will be reset for all the queues used between the them.
866  * This function only supports to unbind the Tx from one Rx.
867  *
868  * @param dev
869  *   Pointer to Ethernet device structure.
870  * @param rx_port
871  *   Port identifier of the Rx port.
872  *
873  * @return
874  *   0 on success, a negative errno value otherwise and rte_errno is set.
875  */
876 static int
877 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
878 {
879         struct mlx5_priv *priv = dev->data->dev_private;
880         struct mlx5_txq_ctrl *txq_ctrl;
881         uint32_t i;
882         int ret;
883         uint16_t cur_port = priv->dev_data->port_id;
884
885         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
886                 rte_errno = ENODEV;
887                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
888                 return -rte_errno;
889         }
890         for (i = 0; i != priv->txqs_n; i++) {
891                 uint16_t rx_queue;
892
893                 txq_ctrl = mlx5_txq_get(dev, i);
894                 if (txq_ctrl == NULL)
895                         continue;
896                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
897                         mlx5_txq_release(dev, i);
898                         continue;
899                 }
900                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
901                         mlx5_txq_release(dev, i);
902                         continue;
903                 }
904                 /* Indeed, only the first used queue needs to be checked. */
905                 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
906                         if (cur_port != rx_port) {
907                                 rte_errno = EINVAL;
908                                 DRV_LOG(ERR, "port %u and port %u are in"
909                                         " auto-bind mode", cur_port, rx_port);
910                                 mlx5_txq_release(dev, i);
911                                 return -rte_errno;
912                         } else {
913                                 return 0;
914                         }
915                 }
916                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
917                 mlx5_txq_release(dev, i);
918                 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
919                 if (ret) {
920                         DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
921                                 rx_port, rx_queue);
922                         return ret;
923                 }
924                 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
925                 if (ret) {
926                         DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
927                                 cur_port, i);
928                         return ret;
929                 }
930         }
931         return 0;
932 }
933
934 /*
935  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
936  * @see mlx5_hairpin_bind_single_port()
937  */
938 int
939 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
940 {
941         int ret = 0;
942         uint16_t p, pp;
943
944         /*
945          * If the Rx port has no hairpin configuration with the current port,
946          * the binding will be skipped in the called function of single port.
947          * Device started status will be checked only before the queue
948          * information updating.
949          */
950         if (rx_port == RTE_MAX_ETHPORTS) {
951                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
952                         ret = mlx5_hairpin_bind_single_port(dev, p);
953                         if (ret != 0)
954                                 goto unbind;
955                 }
956                 return ret;
957         } else {
958                 return mlx5_hairpin_bind_single_port(dev, rx_port);
959         }
960 unbind:
961         MLX5_ETH_FOREACH_DEV(pp, dev->device)
962                 if (pp < p)
963                         mlx5_hairpin_unbind_single_port(dev, pp);
964         return ret;
965 }
966
967 /*
968  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
969  * @see mlx5_hairpin_unbind_single_port()
970  */
971 int
972 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
973 {
974         int ret = 0;
975         uint16_t p;
976
977         if (rx_port == RTE_MAX_ETHPORTS)
978                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
979                         ret = mlx5_hairpin_unbind_single_port(dev, p);
980                         if (ret != 0)
981                                 return ret;
982                 }
983         else
984                 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
985         return ret;
986 }
987
988 /*
989  * DPDK callback to get the hairpin peer ports list.
990  * This will return the actual number of peer ports and save the identifiers
991  * into the array (sorted, may be different from that when setting up the
992  * hairpin peer queues).
993  * The peer port ID could be the same as the port ID of the current device.
994  *
995  * @param dev
996  *   Pointer to Ethernet device structure.
997  * @param peer_ports
998  *   Pointer to array to save the port identifiers.
999  * @param len
1000  *   The length of the array.
1001  * @param direction
1002  *   Current port to peer port direction.
1003  *   positive - current used as Tx to get all peer Rx ports.
1004  *   zero - current used as Rx to get all peer Tx ports.
1005  *
1006  * @return
1007  *   0 or positive value on success, actual number of peer ports.
1008  *   a negative errno value otherwise and rte_errno is set.
1009  */
1010 int
1011 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1012                             size_t len, uint32_t direction)
1013 {
1014         struct mlx5_priv *priv = dev->data->dev_private;
1015         struct mlx5_txq_ctrl *txq_ctrl;
1016         struct mlx5_rxq_ctrl *rxq_ctrl;
1017         uint32_t i;
1018         uint16_t pp;
1019         uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1020         int ret = 0;
1021
1022         if (direction) {
1023                 for (i = 0; i < priv->txqs_n; i++) {
1024                         txq_ctrl = mlx5_txq_get(dev, i);
1025                         if (!txq_ctrl)
1026                                 continue;
1027                         if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
1028                                 mlx5_txq_release(dev, i);
1029                                 continue;
1030                         }
1031                         pp = txq_ctrl->hairpin_conf.peers[0].port;
1032                         if (pp >= RTE_MAX_ETHPORTS) {
1033                                 rte_errno = ERANGE;
1034                                 mlx5_txq_release(dev, i);
1035                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1036                                         "out of range %hu",
1037                                         priv->dev_data->port_id, i, pp);
1038                                 return -rte_errno;
1039                         }
1040                         bits[pp / 32] |= 1 << (pp % 32);
1041                         mlx5_txq_release(dev, i);
1042                 }
1043         } else {
1044                 for (i = 0; i < priv->rxqs_n; i++) {
1045                         rxq_ctrl = mlx5_rxq_get(dev, i);
1046                         if (!rxq_ctrl)
1047                                 continue;
1048                         if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
1049                                 mlx5_rxq_release(dev, i);
1050                                 continue;
1051                         }
1052                         pp = rxq_ctrl->hairpin_conf.peers[0].port;
1053                         if (pp >= RTE_MAX_ETHPORTS) {
1054                                 rte_errno = ERANGE;
1055                                 mlx5_rxq_release(dev, i);
1056                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1057                                         "out of range %hu",
1058                                         priv->dev_data->port_id, i, pp);
1059                                 return -rte_errno;
1060                         }
1061                         bits[pp / 32] |= 1 << (pp % 32);
1062                         mlx5_rxq_release(dev, i);
1063                 }
1064         }
1065         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1066                 if (bits[i / 32] & (1 << (i % 32))) {
1067                         if ((size_t)ret >= len) {
1068                                 rte_errno = E2BIG;
1069                                 return -rte_errno;
1070                         }
1071                         peer_ports[ret++] = i;
1072                 }
1073         }
1074         return ret;
1075 }
1076
1077 /**
1078  * DPDK callback to start the device.
1079  *
1080  * Simulate device start by attaching all configured flows.
1081  *
1082  * @param dev
1083  *   Pointer to Ethernet device structure.
1084  *
1085  * @return
1086  *   0 on success, a negative errno value otherwise and rte_errno is set.
1087  */
1088 int
1089 mlx5_dev_start(struct rte_eth_dev *dev)
1090 {
1091         struct mlx5_priv *priv = dev->data->dev_private;
1092         int ret;
1093         int fine_inline;
1094
1095         DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1096         fine_inline = rte_mbuf_dynflag_lookup
1097                 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1098         if (fine_inline >= 0)
1099                 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1100         else
1101                 rte_net_mlx5_dynf_inline_mask = 0;
1102         if (dev->data->nb_rx_queues > 0) {
1103                 ret = mlx5_dev_configure_rss_reta(dev);
1104                 if (ret) {
1105                         DRV_LOG(ERR, "port %u reta config failed: %s",
1106                                 dev->data->port_id, strerror(rte_errno));
1107                         return -rte_errno;
1108                 }
1109         }
1110         ret = mlx5_txpp_start(dev);
1111         if (ret) {
1112                 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1113                         dev->data->port_id, strerror(rte_errno));
1114                 goto error;
1115         }
1116         if ((priv->sh->devx && priv->config.dv_flow_en &&
1117             priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1118                 ret = priv->obj_ops.lb_dummy_queue_create(dev);
1119                 if (ret)
1120                         goto error;
1121         }
1122         ret = mlx5_txq_start(dev);
1123         if (ret) {
1124                 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1125                         dev->data->port_id, strerror(rte_errno));
1126                 goto error;
1127         }
1128         ret = mlx5_rxq_start(dev);
1129         if (ret) {
1130                 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1131                         dev->data->port_id, strerror(rte_errno));
1132                 goto error;
1133         }
1134         /*
1135          * Such step will be skipped if there is no hairpin TX queue configured
1136          * with RX peer queue from the same device.
1137          */
1138         ret = mlx5_hairpin_auto_bind(dev);
1139         if (ret) {
1140                 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1141                         dev->data->port_id, strerror(rte_errno));
1142                 goto error;
1143         }
1144         /* Set started flag here for the following steps like control flow. */
1145         dev->data->dev_started = 1;
1146         ret = mlx5_rx_intr_vec_enable(dev);
1147         if (ret) {
1148                 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1149                         dev->data->port_id);
1150                 goto error;
1151         }
1152         mlx5_os_stats_init(dev);
1153         ret = mlx5_traffic_enable(dev);
1154         if (ret) {
1155                 DRV_LOG(ERR, "port %u failed to set defaults flows",
1156                         dev->data->port_id);
1157                 goto error;
1158         }
1159         /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1160         mlx5_flow_rxq_dynf_metadata_set(dev);
1161         /* Set flags and context to convert Rx timestamps. */
1162         mlx5_rxq_timestamp_set(dev);
1163         /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1164         mlx5_txq_dynf_timestamp_set(dev);
1165         /*
1166          * In non-cached mode, it only needs to start the default mreg copy
1167          * action and no flow created by application exists anymore.
1168          * But it is worth wrapping the interface for further usage.
1169          */
1170         ret = mlx5_flow_start_default(dev);
1171         if (ret) {
1172                 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1173                         dev->data->port_id, strerror(rte_errno));
1174                 goto error;
1175         }
1176         if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1177                 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1178                         dev->data->port_id, rte_strerror(rte_errno));
1179                 goto error;
1180         }
1181         rte_wmb();
1182         dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1183         dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1184         /* Enable datapath on secondary process. */
1185         mlx5_mp_os_req_start_rxtx(dev);
1186         if (priv->sh->intr_handle.fd >= 0) {
1187                 priv->sh->port[priv->dev_port - 1].ih_port_id =
1188                                         (uint32_t)dev->data->port_id;
1189         } else {
1190                 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1191                         dev->data->port_id);
1192                 dev->data->dev_conf.intr_conf.lsc = 0;
1193                 dev->data->dev_conf.intr_conf.rmv = 0;
1194         }
1195         if (priv->sh->intr_handle_devx.fd >= 0)
1196                 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1197                                         (uint32_t)dev->data->port_id;
1198         return 0;
1199 error:
1200         ret = rte_errno; /* Save rte_errno before cleanup. */
1201         /* Rollback. */
1202         dev->data->dev_started = 0;
1203         mlx5_flow_stop_default(dev);
1204         mlx5_traffic_disable(dev);
1205         mlx5_txq_stop(dev);
1206         mlx5_rxq_stop(dev);
1207         if (priv->obj_ops.lb_dummy_queue_release)
1208                 priv->obj_ops.lb_dummy_queue_release(dev);
1209         mlx5_txpp_stop(dev); /* Stop last. */
1210         rte_errno = ret; /* Restore rte_errno. */
1211         return -rte_errno;
1212 }
1213
1214 /**
1215  * DPDK callback to stop the device.
1216  *
1217  * Simulate device stop by detaching all configured flows.
1218  *
1219  * @param dev
1220  *   Pointer to Ethernet device structure.
1221  */
1222 int
1223 mlx5_dev_stop(struct rte_eth_dev *dev)
1224 {
1225         struct mlx5_priv *priv = dev->data->dev_private;
1226
1227         dev->data->dev_started = 0;
1228         /* Prevent crashes when queues are still in use. */
1229         dev->rx_pkt_burst = removed_rx_burst;
1230         dev->tx_pkt_burst = removed_tx_burst;
1231         rte_wmb();
1232         /* Disable datapath on secondary process. */
1233         mlx5_mp_os_req_stop_rxtx(dev);
1234         rte_delay_us_sleep(1000 * priv->rxqs_n);
1235         DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1236         mlx5_flow_stop_default(dev);
1237         /* Control flows for default traffic can be removed firstly. */
1238         mlx5_traffic_disable(dev);
1239         /* All RX queue flags will be cleared in the flush interface. */
1240         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1241         mlx5_flow_meter_rxq_flush(dev);
1242         mlx5_rx_intr_vec_disable(dev);
1243         priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1244         priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1245         mlx5_txq_stop(dev);
1246         mlx5_rxq_stop(dev);
1247         if (priv->obj_ops.lb_dummy_queue_release)
1248                 priv->obj_ops.lb_dummy_queue_release(dev);
1249         mlx5_txpp_stop(dev);
1250
1251         return 0;
1252 }
1253
1254 /**
1255  * Enable traffic flows configured by control plane
1256  *
1257  * @param dev
1258  *   Pointer to Ethernet device private data.
1259  * @param dev
1260  *   Pointer to Ethernet device structure.
1261  *
1262  * @return
1263  *   0 on success, a negative errno value otherwise and rte_errno is set.
1264  */
1265 int
1266 mlx5_traffic_enable(struct rte_eth_dev *dev)
1267 {
1268         struct mlx5_priv *priv = dev->data->dev_private;
1269         struct rte_flow_item_eth bcast = {
1270                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1271         };
1272         struct rte_flow_item_eth ipv6_multi_spec = {
1273                 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1274         };
1275         struct rte_flow_item_eth ipv6_multi_mask = {
1276                 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1277         };
1278         struct rte_flow_item_eth unicast = {
1279                 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1280         };
1281         struct rte_flow_item_eth unicast_mask = {
1282                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1283         };
1284         const unsigned int vlan_filter_n = priv->vlan_filter_n;
1285         const struct rte_ether_addr cmp = {
1286                 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1287         };
1288         unsigned int i;
1289         unsigned int j;
1290         int ret;
1291
1292         /*
1293          * Hairpin txq default flow should be created no matter if it is
1294          * isolation mode. Or else all the packets to be sent will be sent
1295          * out directly without the TX flow actions, e.g. encapsulation.
1296          */
1297         for (i = 0; i != priv->txqs_n; ++i) {
1298                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1299                 if (!txq_ctrl)
1300                         continue;
1301                 /* Only Tx implicit mode requires the default Tx flow. */
1302                 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1303                     txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1304                     txq_ctrl->hairpin_conf.peers[0].port ==
1305                     priv->dev_data->port_id) {
1306                         ret = mlx5_ctrl_flow_source_queue(dev, i);
1307                         if (ret) {
1308                                 mlx5_txq_release(dev, i);
1309                                 goto error;
1310                         }
1311                 }
1312                 mlx5_txq_release(dev, i);
1313         }
1314         if (priv->config.dv_esw_en && !priv->config.vf && !priv->config.sf) {
1315                 if (mlx5_flow_create_esw_table_zero_flow(dev))
1316                         priv->fdb_def_rule = 1;
1317                 else
1318                         DRV_LOG(INFO, "port %u FDB default rule cannot be"
1319                                 " configured - only Eswitch group 0 flows are"
1320                                 " supported.", dev->data->port_id);
1321         }
1322         if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1323                 ret = mlx5_flow_lacp_miss(dev);
1324                 if (ret)
1325                         DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1326                                 "forward LACP to kernel.", dev->data->port_id);
1327                 else
1328                         DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1329                                 , dev->data->port_id);
1330         }
1331         if (priv->isolated)
1332                 return 0;
1333         if (dev->data->promiscuous) {
1334                 struct rte_flow_item_eth promisc = {
1335                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1336                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1337                         .type = 0,
1338                 };
1339
1340                 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1341                 if (ret)
1342                         goto error;
1343         }
1344         if (dev->data->all_multicast) {
1345                 struct rte_flow_item_eth multicast = {
1346                         .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1347                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1348                         .type = 0,
1349                 };
1350
1351                 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1352                 if (ret)
1353                         goto error;
1354         } else {
1355                 /* Add broadcast/multicast flows. */
1356                 for (i = 0; i != vlan_filter_n; ++i) {
1357                         uint16_t vlan = priv->vlan_filter[i];
1358
1359                         struct rte_flow_item_vlan vlan_spec = {
1360                                 .tci = rte_cpu_to_be_16(vlan),
1361                         };
1362                         struct rte_flow_item_vlan vlan_mask =
1363                                 rte_flow_item_vlan_mask;
1364
1365                         ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1366                                                   &vlan_spec, &vlan_mask);
1367                         if (ret)
1368                                 goto error;
1369                         ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1370                                                   &ipv6_multi_mask,
1371                                                   &vlan_spec, &vlan_mask);
1372                         if (ret)
1373                                 goto error;
1374                 }
1375                 if (!vlan_filter_n) {
1376                         ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1377                         if (ret)
1378                                 goto error;
1379                         ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1380                                              &ipv6_multi_mask);
1381                         if (ret) {
1382                                 /* Do not fail on IPv6 broadcast creation failure. */
1383                                 DRV_LOG(WARNING,
1384                                         "IPv6 broadcast is not supported");
1385                                 ret = 0;
1386                         }
1387                 }
1388         }
1389         /* Add MAC address flows. */
1390         for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1391                 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1392
1393                 if (!memcmp(mac, &cmp, sizeof(*mac)))
1394                         continue;
1395                 memcpy(&unicast.dst.addr_bytes,
1396                        mac->addr_bytes,
1397                        RTE_ETHER_ADDR_LEN);
1398                 for (j = 0; j != vlan_filter_n; ++j) {
1399                         uint16_t vlan = priv->vlan_filter[j];
1400
1401                         struct rte_flow_item_vlan vlan_spec = {
1402                                 .tci = rte_cpu_to_be_16(vlan),
1403                         };
1404                         struct rte_flow_item_vlan vlan_mask =
1405                                 rte_flow_item_vlan_mask;
1406
1407                         ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1408                                                   &unicast_mask,
1409                                                   &vlan_spec,
1410                                                   &vlan_mask);
1411                         if (ret)
1412                                 goto error;
1413                 }
1414                 if (!vlan_filter_n) {
1415                         ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1416                         if (ret)
1417                                 goto error;
1418                 }
1419         }
1420         return 0;
1421 error:
1422         ret = rte_errno; /* Save rte_errno before cleanup. */
1423         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1424         rte_errno = ret; /* Restore rte_errno. */
1425         return -rte_errno;
1426 }
1427
1428
1429 /**
1430  * Disable traffic flows configured by control plane
1431  *
1432  * @param dev
1433  *   Pointer to Ethernet device private data.
1434  */
1435 void
1436 mlx5_traffic_disable(struct rte_eth_dev *dev)
1437 {
1438         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1439 }
1440
1441 /**
1442  * Restart traffic flows configured by control plane
1443  *
1444  * @param dev
1445  *   Pointer to Ethernet device private data.
1446  *
1447  * @return
1448  *   0 on success, a negative errno value otherwise and rte_errno is set.
1449  */
1450 int
1451 mlx5_traffic_restart(struct rte_eth_dev *dev)
1452 {
1453         if (dev->data->dev_started) {
1454                 mlx5_traffic_disable(dev);
1455                 return mlx5_traffic_enable(dev);
1456         }
1457         return 0;
1458 }