common/mlx5: fix MPRQ mempool registration
[dpdk.git] / drivers / net / mlx5 / mlx5_trigger.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5
6 #include <unistd.h>
7
8 #include <rte_ether.h>
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13
14 #include <mlx5_malloc.h>
15
16 #include "mlx5.h"
17 #include "mlx5_flow.h"
18 #include "mlx5_rx.h"
19 #include "mlx5_tx.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
22
23 /**
24  * Stop traffic on Tx queues.
25  *
26  * @param dev
27  *   Pointer to Ethernet device structure.
28  */
29 static void
30 mlx5_txq_stop(struct rte_eth_dev *dev)
31 {
32         struct mlx5_priv *priv = dev->data->dev_private;
33         unsigned int i;
34
35         for (i = 0; i != priv->txqs_n; ++i)
36                 mlx5_txq_release(dev, i);
37 }
38
39 /**
40  * Start traffic on Tx queues.
41  *
42  * @param dev
43  *   Pointer to Ethernet device structure.
44  *
45  * @return
46  *   0 on success, a negative errno value otherwise and rte_errno is set.
47  */
48 static int
49 mlx5_txq_start(struct rte_eth_dev *dev)
50 {
51         struct mlx5_priv *priv = dev->data->dev_private;
52         unsigned int i;
53         int ret;
54
55         for (i = 0; i != priv->txqs_n; ++i) {
56                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57                 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58                 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
59
60                 if (!txq_ctrl)
61                         continue;
62                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
63                         txq_alloc_elts(txq_ctrl);
64                 MLX5_ASSERT(!txq_ctrl->obj);
65                 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66                                             0, txq_ctrl->socket);
67                 if (!txq_ctrl->obj) {
68                         DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69                                 "memory resources.", dev->data->port_id,
70                                 txq_data->idx);
71                         rte_errno = ENOMEM;
72                         goto error;
73                 }
74                 ret = priv->obj_ops.txq_obj_new(dev, i);
75                 if (ret < 0) {
76                         mlx5_free(txq_ctrl->obj);
77                         txq_ctrl->obj = NULL;
78                         goto error;
79                 }
80                 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
81                         size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
82
83                         txq_data->fcqs = mlx5_malloc(flags, size,
84                                                      RTE_CACHE_LINE_SIZE,
85                                                      txq_ctrl->socket);
86                         if (!txq_data->fcqs) {
87                                 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88                                         "allocate memory (FCQ).",
89                                         dev->data->port_id, i);
90                                 rte_errno = ENOMEM;
91                                 goto error;
92                         }
93                 }
94                 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95                         dev->data->port_id, i, (void *)&txq_ctrl->obj);
96                 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97         }
98         return 0;
99 error:
100         ret = rte_errno; /* Save rte_errno before cleanup. */
101         do {
102                 mlx5_txq_release(dev, i);
103         } while (i-- != 0);
104         rte_errno = ret; /* Restore rte_errno. */
105         return -rte_errno;
106 }
107
108 /**
109  * Translate the chunk address to MR key in order to put in into the cache.
110  */
111 static void
112 mlx5_rxq_mempool_register_cb(struct rte_mempool *mp, void *opaque,
113                              struct rte_mempool_memhdr *memhdr,
114                              unsigned int idx)
115 {
116         struct mlx5_rxq_data *rxq = opaque;
117
118         RTE_SET_USED(mp);
119         RTE_SET_USED(idx);
120         mlx5_rx_addr2mr(rxq, (uintptr_t)memhdr->addr);
121 }
122
123 /**
124  * Register Rx queue mempools and fill the Rx queue cache.
125  * This function tolerates repeated mempool registration.
126  *
127  * @param[in] rxq_ctrl
128  *   Rx queue control data.
129  *
130  * @return
131  *   0 on success, (-1) on failure and rte_errno is set.
132  */
133 static int
134 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
135 {
136         struct rte_mempool *mp;
137         uint32_t s;
138         int ret = 0;
139
140         mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
141         /* MPRQ mempool is registered on creation, just fill the cache. */
142         if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
143                 rte_mempool_mem_iter(rxq_ctrl->rxq.mprq_mp,
144                                      mlx5_rxq_mempool_register_cb,
145                                      &rxq_ctrl->rxq);
146                 return 0;
147         }
148         for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
149                 uint32_t flags;
150
151                 mp = rxq_ctrl->rxq.rxseg[s].mp;
152                 flags = mp != rxq_ctrl->rxq.mprq_mp ?
153                         rte_pktmbuf_priv_flags(mp) : 0;
154                 ret = mlx5_mr_mempool_register(rxq_ctrl->sh->cdev, mp);
155                 if (ret < 0 && rte_errno != EEXIST)
156                         return ret;
157                 if ((flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) == 0)
158                         rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb,
159                                              &rxq_ctrl->rxq);
160         }
161         return 0;
162 }
163
164 /**
165  * Stop traffic on Rx queues.
166  *
167  * @param dev
168  *   Pointer to Ethernet device structure.
169  */
170 static void
171 mlx5_rxq_stop(struct rte_eth_dev *dev)
172 {
173         struct mlx5_priv *priv = dev->data->dev_private;
174         unsigned int i;
175
176         for (i = 0; i != priv->rxqs_n; ++i)
177                 mlx5_rxq_release(dev, i);
178 }
179
180 static int
181 mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
182                       unsigned int idx)
183 {
184         int ret = 0;
185
186         if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
187                 /*
188                  * Pre-register the mempools. Regardless of whether
189                  * the implicit registration is enabled or not,
190                  * Rx mempool destruction is tracked to free MRs.
191                  */
192                 if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
193                         return -rte_errno;
194                 ret = rxq_alloc_elts(rxq_ctrl);
195                 if (ret)
196                         return ret;
197         }
198         MLX5_ASSERT(!rxq_ctrl->obj);
199         rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
200                                     sizeof(*rxq_ctrl->obj), 0,
201                                     rxq_ctrl->socket);
202         if (!rxq_ctrl->obj) {
203                 DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.",
204                         dev->data->port_id, idx);
205                 rte_errno = ENOMEM;
206                 return -rte_errno;
207         }
208         DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", dev->data->port_id,
209                 idx, (void *)&rxq_ctrl->obj);
210         return 0;
211 }
212
213 /**
214  * Start traffic on Rx queues.
215  *
216  * @param dev
217  *   Pointer to Ethernet device structure.
218  *
219  * @return
220  *   0 on success, a negative errno value otherwise and rte_errno is set.
221  */
222 static int
223 mlx5_rxq_start(struct rte_eth_dev *dev)
224 {
225         struct mlx5_priv *priv = dev->data->dev_private;
226         unsigned int i;
227         int ret = 0;
228
229         /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
230         if (mlx5_mprq_alloc_mp(dev)) {
231                 /* Should not release Rx queues but return immediately. */
232                 return -rte_errno;
233         }
234         DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
235                 dev->data->port_id, priv->sh->device_attr.max_qp_wr);
236         DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
237                 dev->data->port_id, priv->sh->device_attr.max_sge);
238         for (i = 0; i != priv->rxqs_n; ++i) {
239                 struct mlx5_rxq_priv *rxq = mlx5_rxq_ref(dev, i);
240                 struct mlx5_rxq_ctrl *rxq_ctrl;
241
242                 if (rxq == NULL)
243                         continue;
244                 rxq_ctrl = rxq->ctrl;
245                 if (!rxq_ctrl->started) {
246                         if (mlx5_rxq_ctrl_prepare(dev, rxq_ctrl, i) < 0)
247                                 goto error;
248                         LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
249                 }
250                 ret = priv->obj_ops.rxq_obj_new(rxq);
251                 if (ret) {
252                         mlx5_free(rxq_ctrl->obj);
253                         rxq_ctrl->obj = NULL;
254                         goto error;
255                 }
256                 rxq_ctrl->started = true;
257         }
258         return 0;
259 error:
260         ret = rte_errno; /* Save rte_errno before cleanup. */
261         do {
262                 mlx5_rxq_release(dev, i);
263         } while (i-- != 0);
264         rte_errno = ret; /* Restore rte_errno. */
265         return -rte_errno;
266 }
267
268 /**
269  * Binds Tx queues to Rx queues for hairpin.
270  *
271  * Binds Tx queues to the target Rx queues.
272  *
273  * @param dev
274  *   Pointer to Ethernet device structure.
275  *
276  * @return
277  *   0 on success, a negative errno value otherwise and rte_errno is set.
278  */
279 static int
280 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
281 {
282         struct mlx5_priv *priv = dev->data->dev_private;
283         struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
284         struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
285         struct mlx5_txq_ctrl *txq_ctrl;
286         struct mlx5_rxq_priv *rxq;
287         struct mlx5_rxq_ctrl *rxq_ctrl;
288         struct mlx5_devx_obj *sq;
289         struct mlx5_devx_obj *rq;
290         unsigned int i;
291         int ret = 0;
292         bool need_auto = false;
293         uint16_t self_port = dev->data->port_id;
294
295         for (i = 0; i != priv->txqs_n; ++i) {
296                 txq_ctrl = mlx5_txq_get(dev, i);
297                 if (!txq_ctrl)
298                         continue;
299                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
300                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
301                         mlx5_txq_release(dev, i);
302                         continue;
303                 }
304                 if (txq_ctrl->hairpin_conf.manual_bind) {
305                         mlx5_txq_release(dev, i);
306                         return 0;
307                 }
308                 need_auto = true;
309                 mlx5_txq_release(dev, i);
310         }
311         if (!need_auto)
312                 return 0;
313         for (i = 0; i != priv->txqs_n; ++i) {
314                 txq_ctrl = mlx5_txq_get(dev, i);
315                 if (!txq_ctrl)
316                         continue;
317                 /* Skip hairpin queues with other peer ports. */
318                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
319                     txq_ctrl->hairpin_conf.peers[0].port != self_port) {
320                         mlx5_txq_release(dev, i);
321                         continue;
322                 }
323                 if (!txq_ctrl->obj) {
324                         rte_errno = ENOMEM;
325                         DRV_LOG(ERR, "port %u no txq object found: %d",
326                                 dev->data->port_id, i);
327                         mlx5_txq_release(dev, i);
328                         return -rte_errno;
329                 }
330                 sq = txq_ctrl->obj->sq;
331                 rxq = mlx5_rxq_get(dev, txq_ctrl->hairpin_conf.peers[0].queue);
332                 if (rxq == NULL) {
333                         mlx5_txq_release(dev, i);
334                         rte_errno = EINVAL;
335                         DRV_LOG(ERR, "port %u no rxq object found: %d",
336                                 dev->data->port_id,
337                                 txq_ctrl->hairpin_conf.peers[0].queue);
338                         return -rte_errno;
339                 }
340                 rxq_ctrl = rxq->ctrl;
341                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
342                     rxq->hairpin_conf.peers[0].queue != i) {
343                         rte_errno = ENOMEM;
344                         DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
345                                 "Rx queue %d", dev->data->port_id,
346                                 i, txq_ctrl->hairpin_conf.peers[0].queue);
347                         goto error;
348                 }
349                 rq = rxq_ctrl->obj->rq;
350                 if (!rq) {
351                         rte_errno = ENOMEM;
352                         DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
353                                 dev->data->port_id,
354                                 txq_ctrl->hairpin_conf.peers[0].queue);
355                         goto error;
356                 }
357                 sq_attr.state = MLX5_SQC_STATE_RDY;
358                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
359                 sq_attr.hairpin_peer_rq = rq->id;
360                 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
361                 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
362                 if (ret)
363                         goto error;
364                 rq_attr.state = MLX5_SQC_STATE_RDY;
365                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
366                 rq_attr.hairpin_peer_sq = sq->id;
367                 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
368                 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
369                 if (ret)
370                         goto error;
371                 /* Qs with auto-bind will be destroyed directly. */
372                 rxq->hairpin_status = 1;
373                 txq_ctrl->hairpin_status = 1;
374                 mlx5_txq_release(dev, i);
375         }
376         return 0;
377 error:
378         mlx5_txq_release(dev, i);
379         return -rte_errno;
380 }
381
382 /*
383  * Fetch the peer queue's SW & HW information.
384  *
385  * @param dev
386  *   Pointer to Ethernet device structure.
387  * @param peer_queue
388  *   Index of the queue to fetch the information.
389  * @param current_info
390  *   Pointer to the input peer information, not used currently.
391  * @param peer_info
392  *   Pointer to the structure to store the information, output.
393  * @param direction
394  *   Positive to get the RxQ information, zero to get the TxQ information.
395  *
396  * @return
397  *   0 on success, a negative errno value otherwise and rte_errno is set.
398  */
399 int
400 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
401                                struct rte_hairpin_peer_info *current_info,
402                                struct rte_hairpin_peer_info *peer_info,
403                                uint32_t direction)
404 {
405         struct mlx5_priv *priv = dev->data->dev_private;
406         RTE_SET_USED(current_info);
407
408         if (dev->data->dev_started == 0) {
409                 rte_errno = EBUSY;
410                 DRV_LOG(ERR, "peer port %u is not started",
411                         dev->data->port_id);
412                 return -rte_errno;
413         }
414         /*
415          * Peer port used as egress. In the current design, hairpin Tx queue
416          * will be bound to the peer Rx queue. Indeed, only the information of
417          * peer Rx queue needs to be fetched.
418          */
419         if (direction == 0) {
420                 struct mlx5_txq_ctrl *txq_ctrl;
421
422                 txq_ctrl = mlx5_txq_get(dev, peer_queue);
423                 if (txq_ctrl == NULL) {
424                         rte_errno = EINVAL;
425                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
426                                 dev->data->port_id, peer_queue);
427                         return -rte_errno;
428                 }
429                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
430                         rte_errno = EINVAL;
431                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
432                                 dev->data->port_id, peer_queue);
433                         mlx5_txq_release(dev, peer_queue);
434                         return -rte_errno;
435                 }
436                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
437                         rte_errno = ENOMEM;
438                         DRV_LOG(ERR, "port %u no Txq object found: %d",
439                                 dev->data->port_id, peer_queue);
440                         mlx5_txq_release(dev, peer_queue);
441                         return -rte_errno;
442                 }
443                 peer_info->qp_id = txq_ctrl->obj->sq->id;
444                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
445                 /* 1-to-1 mapping, only the first one is used. */
446                 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
447                 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
448                 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
449                 mlx5_txq_release(dev, peer_queue);
450         } else { /* Peer port used as ingress. */
451                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, peer_queue);
452                 struct mlx5_rxq_ctrl *rxq_ctrl;
453
454                 if (rxq == NULL) {
455                         rte_errno = EINVAL;
456                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
457                                 dev->data->port_id, peer_queue);
458                         return -rte_errno;
459                 }
460                 rxq_ctrl = rxq->ctrl;
461                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
462                         rte_errno = EINVAL;
463                         DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
464                                 dev->data->port_id, peer_queue);
465                         return -rte_errno;
466                 }
467                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
468                         rte_errno = ENOMEM;
469                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
470                                 dev->data->port_id, peer_queue);
471                         return -rte_errno;
472                 }
473                 peer_info->qp_id = rxq_ctrl->obj->rq->id;
474                 peer_info->vhca_id = priv->config.hca_attr.vhca_id;
475                 peer_info->peer_q = rxq->hairpin_conf.peers[0].queue;
476                 peer_info->tx_explicit = rxq->hairpin_conf.tx_explicit;
477                 peer_info->manual_bind = rxq->hairpin_conf.manual_bind;
478         }
479         return 0;
480 }
481
482 /*
483  * Bind the hairpin queue with the peer HW information.
484  * This needs to be called twice both for Tx and Rx queues of a pair.
485  * If the queue is already bound, it is considered successful.
486  *
487  * @param dev
488  *   Pointer to Ethernet device structure.
489  * @param cur_queue
490  *   Index of the queue to change the HW configuration to bind.
491  * @param peer_info
492  *   Pointer to information of the peer queue.
493  * @param direction
494  *   Positive to configure the TxQ, zero to configure the RxQ.
495  *
496  * @return
497  *   0 on success, a negative errno value otherwise and rte_errno is set.
498  */
499 int
500 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
501                              struct rte_hairpin_peer_info *peer_info,
502                              uint32_t direction)
503 {
504         int ret = 0;
505
506         /*
507          * Consistency checking of the peer queue: opposite direction is used
508          * to get the peer queue info with ethdev port ID, no need to check.
509          */
510         if (peer_info->peer_q != cur_queue) {
511                 rte_errno = EINVAL;
512                 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
513                         dev->data->port_id, cur_queue, peer_info->peer_q);
514                 return -rte_errno;
515         }
516         if (direction != 0) {
517                 struct mlx5_txq_ctrl *txq_ctrl;
518                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
519
520                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
521                 if (txq_ctrl == NULL) {
522                         rte_errno = EINVAL;
523                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
524                                 dev->data->port_id, cur_queue);
525                         return -rte_errno;
526                 }
527                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
528                         rte_errno = EINVAL;
529                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
530                                 dev->data->port_id, cur_queue);
531                         mlx5_txq_release(dev, cur_queue);
532                         return -rte_errno;
533                 }
534                 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
535                         rte_errno = ENOMEM;
536                         DRV_LOG(ERR, "port %u no Txq object found: %d",
537                                 dev->data->port_id, cur_queue);
538                         mlx5_txq_release(dev, cur_queue);
539                         return -rte_errno;
540                 }
541                 if (txq_ctrl->hairpin_status != 0) {
542                         DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
543                                 dev->data->port_id, cur_queue);
544                         mlx5_txq_release(dev, cur_queue);
545                         return 0;
546                 }
547                 /*
548                  * All queues' of one port consistency checking is done in the
549                  * bind() function, and that is optional.
550                  */
551                 if (peer_info->tx_explicit !=
552                     txq_ctrl->hairpin_conf.tx_explicit) {
553                         rte_errno = EINVAL;
554                         DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
555                                 " mismatch", dev->data->port_id, cur_queue);
556                         mlx5_txq_release(dev, cur_queue);
557                         return -rte_errno;
558                 }
559                 if (peer_info->manual_bind !=
560                     txq_ctrl->hairpin_conf.manual_bind) {
561                         rte_errno = EINVAL;
562                         DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
563                                 " mismatch", dev->data->port_id, cur_queue);
564                         mlx5_txq_release(dev, cur_queue);
565                         return -rte_errno;
566                 }
567                 sq_attr.state = MLX5_SQC_STATE_RDY;
568                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
569                 sq_attr.hairpin_peer_rq = peer_info->qp_id;
570                 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
571                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
572                 if (ret == 0)
573                         txq_ctrl->hairpin_status = 1;
574                 mlx5_txq_release(dev, cur_queue);
575         } else {
576                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
577                 struct mlx5_rxq_ctrl *rxq_ctrl;
578                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
579
580                 if (rxq == NULL) {
581                         rte_errno = EINVAL;
582                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
583                                 dev->data->port_id, cur_queue);
584                         return -rte_errno;
585                 }
586                 rxq_ctrl = rxq->ctrl;
587                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
588                         rte_errno = EINVAL;
589                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
590                                 dev->data->port_id, cur_queue);
591                         return -rte_errno;
592                 }
593                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
594                         rte_errno = ENOMEM;
595                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
596                                 dev->data->port_id, cur_queue);
597                         return -rte_errno;
598                 }
599                 if (rxq->hairpin_status != 0) {
600                         DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
601                                 dev->data->port_id, cur_queue);
602                         return 0;
603                 }
604                 if (peer_info->tx_explicit !=
605                     rxq->hairpin_conf.tx_explicit) {
606                         rte_errno = EINVAL;
607                         DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
608                                 " mismatch", dev->data->port_id, cur_queue);
609                         return -rte_errno;
610                 }
611                 if (peer_info->manual_bind !=
612                     rxq->hairpin_conf.manual_bind) {
613                         rte_errno = EINVAL;
614                         DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
615                                 " mismatch", dev->data->port_id, cur_queue);
616                         return -rte_errno;
617                 }
618                 rq_attr.state = MLX5_SQC_STATE_RDY;
619                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
620                 rq_attr.hairpin_peer_sq = peer_info->qp_id;
621                 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
622                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
623                 if (ret == 0)
624                         rxq->hairpin_status = 1;
625         }
626         return ret;
627 }
628
629 /*
630  * Unbind the hairpin queue and reset its HW configuration.
631  * This needs to be called twice both for Tx and Rx queues of a pair.
632  * If the queue is already unbound, it is considered successful.
633  *
634  * @param dev
635  *   Pointer to Ethernet device structure.
636  * @param cur_queue
637  *   Index of the queue to change the HW configuration to unbind.
638  * @param direction
639  *   Positive to reset the TxQ, zero to reset the RxQ.
640  *
641  * @return
642  *   0 on success, a negative errno value otherwise and rte_errno is set.
643  */
644 int
645 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
646                                uint32_t direction)
647 {
648         int ret = 0;
649
650         if (direction != 0) {
651                 struct mlx5_txq_ctrl *txq_ctrl;
652                 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
653
654                 txq_ctrl = mlx5_txq_get(dev, cur_queue);
655                 if (txq_ctrl == NULL) {
656                         rte_errno = EINVAL;
657                         DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
658                                 dev->data->port_id, cur_queue);
659                         return -rte_errno;
660                 }
661                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
662                         rte_errno = EINVAL;
663                         DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
664                                 dev->data->port_id, cur_queue);
665                         mlx5_txq_release(dev, cur_queue);
666                         return -rte_errno;
667                 }
668                 /* Already unbound, return success before obj checking. */
669                 if (txq_ctrl->hairpin_status == 0) {
670                         DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
671                                 dev->data->port_id, cur_queue);
672                         mlx5_txq_release(dev, cur_queue);
673                         return 0;
674                 }
675                 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
676                         rte_errno = ENOMEM;
677                         DRV_LOG(ERR, "port %u no Txq object found: %d",
678                                 dev->data->port_id, cur_queue);
679                         mlx5_txq_release(dev, cur_queue);
680                         return -rte_errno;
681                 }
682                 sq_attr.state = MLX5_SQC_STATE_RST;
683                 sq_attr.sq_state = MLX5_SQC_STATE_RST;
684                 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
685                 if (ret == 0)
686                         txq_ctrl->hairpin_status = 0;
687                 mlx5_txq_release(dev, cur_queue);
688         } else {
689                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
690                 struct mlx5_rxq_ctrl *rxq_ctrl;
691                 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
692
693                 if (rxq == NULL) {
694                         rte_errno = EINVAL;
695                         DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
696                                 dev->data->port_id, cur_queue);
697                         return -rte_errno;
698                 }
699                 rxq_ctrl = rxq->ctrl;
700                 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
701                         rte_errno = EINVAL;
702                         DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
703                                 dev->data->port_id, cur_queue);
704                         return -rte_errno;
705                 }
706                 if (rxq->hairpin_status == 0) {
707                         DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
708                                 dev->data->port_id, cur_queue);
709                         return 0;
710                 }
711                 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
712                         rte_errno = ENOMEM;
713                         DRV_LOG(ERR, "port %u no Rxq object found: %d",
714                                 dev->data->port_id, cur_queue);
715                         return -rte_errno;
716                 }
717                 rq_attr.state = MLX5_SQC_STATE_RST;
718                 rq_attr.rq_state = MLX5_SQC_STATE_RST;
719                 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
720                 if (ret == 0)
721                         rxq->hairpin_status = 0;
722         }
723         return ret;
724 }
725
726 /*
727  * Bind the hairpin port pairs, from the Tx to the peer Rx.
728  * This function only supports to bind the Tx to one Rx.
729  *
730  * @param dev
731  *   Pointer to Ethernet device structure.
732  * @param rx_port
733  *   Port identifier of the Rx port.
734  *
735  * @return
736  *   0 on success, a negative errno value otherwise and rte_errno is set.
737  */
738 static int
739 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
740 {
741         struct mlx5_priv *priv = dev->data->dev_private;
742         int ret = 0;
743         struct mlx5_txq_ctrl *txq_ctrl;
744         uint32_t i;
745         struct rte_hairpin_peer_info peer = {0xffffff};
746         struct rte_hairpin_peer_info cur;
747         const struct rte_eth_hairpin_conf *conf;
748         uint16_t num_q = 0;
749         uint16_t local_port = priv->dev_data->port_id;
750         uint32_t manual;
751         uint32_t explicit;
752         uint16_t rx_queue;
753
754         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
755                 rte_errno = ENODEV;
756                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
757                 return -rte_errno;
758         }
759         /*
760          * Before binding TxQ to peer RxQ, first round loop will be used for
761          * checking the queues' configuration consistency. This would be a
762          * little time consuming but better than doing the rollback.
763          */
764         for (i = 0; i != priv->txqs_n; i++) {
765                 txq_ctrl = mlx5_txq_get(dev, i);
766                 if (txq_ctrl == NULL)
767                         continue;
768                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
769                         mlx5_txq_release(dev, i);
770                         continue;
771                 }
772                 /*
773                  * All hairpin Tx queues of a single port that connected to the
774                  * same peer Rx port should have the same "auto binding" and
775                  * "implicit Tx flow" modes.
776                  * Peer consistency checking will be done in per queue binding.
777                  */
778                 conf = &txq_ctrl->hairpin_conf;
779                 if (conf->peers[0].port == rx_port) {
780                         if (num_q == 0) {
781                                 manual = conf->manual_bind;
782                                 explicit = conf->tx_explicit;
783                         } else {
784                                 if (manual != conf->manual_bind ||
785                                     explicit != conf->tx_explicit) {
786                                         rte_errno = EINVAL;
787                                         DRV_LOG(ERR, "port %u queue %d mode"
788                                                 " mismatch: %u %u, %u %u",
789                                                 local_port, i, manual,
790                                                 conf->manual_bind, explicit,
791                                                 conf->tx_explicit);
792                                         mlx5_txq_release(dev, i);
793                                         return -rte_errno;
794                                 }
795                         }
796                         num_q++;
797                 }
798                 mlx5_txq_release(dev, i);
799         }
800         /* Once no queue is configured, success is returned directly. */
801         if (num_q == 0)
802                 return ret;
803         /* All the hairpin TX queues need to be traversed again. */
804         for (i = 0; i != priv->txqs_n; i++) {
805                 txq_ctrl = mlx5_txq_get(dev, i);
806                 if (txq_ctrl == NULL)
807                         continue;
808                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
809                         mlx5_txq_release(dev, i);
810                         continue;
811                 }
812                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
813                         mlx5_txq_release(dev, i);
814                         continue;
815                 }
816                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
817                 /*
818                  * Fetch peer RxQ's information.
819                  * No need to pass the information of the current queue.
820                  */
821                 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
822                                                         NULL, &peer, 1);
823                 if (ret != 0) {
824                         mlx5_txq_release(dev, i);
825                         goto error;
826                 }
827                 /* Accessing its own device, inside mlx5 PMD. */
828                 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
829                 if (ret != 0) {
830                         mlx5_txq_release(dev, i);
831                         goto error;
832                 }
833                 /* Pass TxQ's information to peer RxQ and try binding. */
834                 cur.peer_q = rx_queue;
835                 cur.qp_id = txq_ctrl->obj->sq->id;
836                 cur.vhca_id = priv->config.hca_attr.vhca_id;
837                 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
838                 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
839                 /*
840                  * In order to access another device in a proper way, RTE level
841                  * private function is needed.
842                  */
843                 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
844                                                       &cur, 0);
845                 if (ret != 0) {
846                         mlx5_txq_release(dev, i);
847                         goto error;
848                 }
849                 mlx5_txq_release(dev, i);
850         }
851         return 0;
852 error:
853         /*
854          * Do roll-back process for the queues already bound.
855          * No need to check the return value of the queue unbind function.
856          */
857         do {
858                 /* No validation is needed here. */
859                 txq_ctrl = mlx5_txq_get(dev, i);
860                 if (txq_ctrl == NULL)
861                         continue;
862                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
863                 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
864                 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
865                 mlx5_txq_release(dev, i);
866         } while (i--);
867         return ret;
868 }
869
870 /*
871  * Unbind the hairpin port pair, HW configuration of both devices will be clear
872  * and status will be reset for all the queues used between the them.
873  * This function only supports to unbind the Tx from one Rx.
874  *
875  * @param dev
876  *   Pointer to Ethernet device structure.
877  * @param rx_port
878  *   Port identifier of the Rx port.
879  *
880  * @return
881  *   0 on success, a negative errno value otherwise and rte_errno is set.
882  */
883 static int
884 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
885 {
886         struct mlx5_priv *priv = dev->data->dev_private;
887         struct mlx5_txq_ctrl *txq_ctrl;
888         uint32_t i;
889         int ret;
890         uint16_t cur_port = priv->dev_data->port_id;
891
892         if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
893                 rte_errno = ENODEV;
894                 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
895                 return -rte_errno;
896         }
897         for (i = 0; i != priv->txqs_n; i++) {
898                 uint16_t rx_queue;
899
900                 txq_ctrl = mlx5_txq_get(dev, i);
901                 if (txq_ctrl == NULL)
902                         continue;
903                 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
904                         mlx5_txq_release(dev, i);
905                         continue;
906                 }
907                 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
908                         mlx5_txq_release(dev, i);
909                         continue;
910                 }
911                 /* Indeed, only the first used queue needs to be checked. */
912                 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
913                         if (cur_port != rx_port) {
914                                 rte_errno = EINVAL;
915                                 DRV_LOG(ERR, "port %u and port %u are in"
916                                         " auto-bind mode", cur_port, rx_port);
917                                 mlx5_txq_release(dev, i);
918                                 return -rte_errno;
919                         } else {
920                                 return 0;
921                         }
922                 }
923                 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
924                 mlx5_txq_release(dev, i);
925                 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
926                 if (ret) {
927                         DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
928                                 rx_port, rx_queue);
929                         return ret;
930                 }
931                 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
932                 if (ret) {
933                         DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
934                                 cur_port, i);
935                         return ret;
936                 }
937         }
938         return 0;
939 }
940
941 /*
942  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
943  * @see mlx5_hairpin_bind_single_port()
944  */
945 int
946 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
947 {
948         int ret = 0;
949         uint16_t p, pp;
950
951         /*
952          * If the Rx port has no hairpin configuration with the current port,
953          * the binding will be skipped in the called function of single port.
954          * Device started status will be checked only before the queue
955          * information updating.
956          */
957         if (rx_port == RTE_MAX_ETHPORTS) {
958                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
959                         ret = mlx5_hairpin_bind_single_port(dev, p);
960                         if (ret != 0)
961                                 goto unbind;
962                 }
963                 return ret;
964         } else {
965                 return mlx5_hairpin_bind_single_port(dev, rx_port);
966         }
967 unbind:
968         MLX5_ETH_FOREACH_DEV(pp, dev->device)
969                 if (pp < p)
970                         mlx5_hairpin_unbind_single_port(dev, pp);
971         return ret;
972 }
973
974 /*
975  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
976  * @see mlx5_hairpin_unbind_single_port()
977  */
978 int
979 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
980 {
981         int ret = 0;
982         uint16_t p;
983
984         if (rx_port == RTE_MAX_ETHPORTS)
985                 MLX5_ETH_FOREACH_DEV(p, dev->device) {
986                         ret = mlx5_hairpin_unbind_single_port(dev, p);
987                         if (ret != 0)
988                                 return ret;
989                 }
990         else
991                 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
992         return ret;
993 }
994
995 /*
996  * DPDK callback to get the hairpin peer ports list.
997  * This will return the actual number of peer ports and save the identifiers
998  * into the array (sorted, may be different from that when setting up the
999  * hairpin peer queues).
1000  * The peer port ID could be the same as the port ID of the current device.
1001  *
1002  * @param dev
1003  *   Pointer to Ethernet device structure.
1004  * @param peer_ports
1005  *   Pointer to array to save the port identifiers.
1006  * @param len
1007  *   The length of the array.
1008  * @param direction
1009  *   Current port to peer port direction.
1010  *   positive - current used as Tx to get all peer Rx ports.
1011  *   zero - current used as Rx to get all peer Tx ports.
1012  *
1013  * @return
1014  *   0 or positive value on success, actual number of peer ports.
1015  *   a negative errno value otherwise and rte_errno is set.
1016  */
1017 int
1018 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1019                             size_t len, uint32_t direction)
1020 {
1021         struct mlx5_priv *priv = dev->data->dev_private;
1022         struct mlx5_txq_ctrl *txq_ctrl;
1023         uint32_t i;
1024         uint16_t pp;
1025         uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1026         int ret = 0;
1027
1028         if (direction) {
1029                 for (i = 0; i < priv->txqs_n; i++) {
1030                         txq_ctrl = mlx5_txq_get(dev, i);
1031                         if (!txq_ctrl)
1032                                 continue;
1033                         if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
1034                                 mlx5_txq_release(dev, i);
1035                                 continue;
1036                         }
1037                         pp = txq_ctrl->hairpin_conf.peers[0].port;
1038                         if (pp >= RTE_MAX_ETHPORTS) {
1039                                 rte_errno = ERANGE;
1040                                 mlx5_txq_release(dev, i);
1041                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1042                                         "out of range %hu",
1043                                         priv->dev_data->port_id, i, pp);
1044                                 return -rte_errno;
1045                         }
1046                         bits[pp / 32] |= 1 << (pp % 32);
1047                         mlx5_txq_release(dev, i);
1048                 }
1049         } else {
1050                 for (i = 0; i < priv->rxqs_n; i++) {
1051                         struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1052                         struct mlx5_rxq_ctrl *rxq_ctrl;
1053
1054                         if (rxq == NULL)
1055                                 continue;
1056                         rxq_ctrl = rxq->ctrl;
1057                         if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN)
1058                                 continue;
1059                         pp = rxq->hairpin_conf.peers[0].port;
1060                         if (pp >= RTE_MAX_ETHPORTS) {
1061                                 rte_errno = ERANGE;
1062                                 DRV_LOG(ERR, "port %hu queue %u peer port "
1063                                         "out of range %hu",
1064                                         priv->dev_data->port_id, i, pp);
1065                                 return -rte_errno;
1066                         }
1067                         bits[pp / 32] |= 1 << (pp % 32);
1068                 }
1069         }
1070         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1071                 if (bits[i / 32] & (1 << (i % 32))) {
1072                         if ((size_t)ret >= len) {
1073                                 rte_errno = E2BIG;
1074                                 return -rte_errno;
1075                         }
1076                         peer_ports[ret++] = i;
1077                 }
1078         }
1079         return ret;
1080 }
1081
1082 /**
1083  * DPDK callback to start the device.
1084  *
1085  * Simulate device start by attaching all configured flows.
1086  *
1087  * @param dev
1088  *   Pointer to Ethernet device structure.
1089  *
1090  * @return
1091  *   0 on success, a negative errno value otherwise and rte_errno is set.
1092  */
1093 int
1094 mlx5_dev_start(struct rte_eth_dev *dev)
1095 {
1096         struct mlx5_priv *priv = dev->data->dev_private;
1097         int ret;
1098         int fine_inline;
1099
1100         DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1101         fine_inline = rte_mbuf_dynflag_lookup
1102                 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1103         if (fine_inline >= 0)
1104                 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1105         else
1106                 rte_net_mlx5_dynf_inline_mask = 0;
1107         if (dev->data->nb_rx_queues > 0) {
1108                 ret = mlx5_dev_configure_rss_reta(dev);
1109                 if (ret) {
1110                         DRV_LOG(ERR, "port %u reta config failed: %s",
1111                                 dev->data->port_id, strerror(rte_errno));
1112                         return -rte_errno;
1113                 }
1114         }
1115         ret = mlx5_txpp_start(dev);
1116         if (ret) {
1117                 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1118                         dev->data->port_id, strerror(rte_errno));
1119                 goto error;
1120         }
1121         if ((priv->sh->devx && priv->config.dv_flow_en &&
1122             priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1123                 ret = priv->obj_ops.lb_dummy_queue_create(dev);
1124                 if (ret)
1125                         goto error;
1126         }
1127         ret = mlx5_txq_start(dev);
1128         if (ret) {
1129                 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1130                         dev->data->port_id, strerror(rte_errno));
1131                 goto error;
1132         }
1133         if (priv->config.std_delay_drop || priv->config.hp_delay_drop) {
1134                 if (!priv->config.vf && !priv->config.sf &&
1135                     !priv->representor) {
1136                         ret = mlx5_get_flag_dropless_rq(dev);
1137                         if (ret < 0)
1138                                 DRV_LOG(WARNING,
1139                                         "port %u cannot query dropless flag",
1140                                         dev->data->port_id);
1141                         else if (!ret)
1142                                 DRV_LOG(WARNING,
1143                                         "port %u dropless_rq OFF, no rearming",
1144                                         dev->data->port_id);
1145                 } else {
1146                         DRV_LOG(DEBUG,
1147                                 "port %u doesn't support dropless_rq flag",
1148                                 dev->data->port_id);
1149                 }
1150         }
1151         ret = mlx5_rxq_start(dev);
1152         if (ret) {
1153                 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1154                         dev->data->port_id, strerror(rte_errno));
1155                 goto error;
1156         }
1157         /*
1158          * Such step will be skipped if there is no hairpin TX queue configured
1159          * with RX peer queue from the same device.
1160          */
1161         ret = mlx5_hairpin_auto_bind(dev);
1162         if (ret) {
1163                 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1164                         dev->data->port_id, strerror(rte_errno));
1165                 goto error;
1166         }
1167         /* Set started flag here for the following steps like control flow. */
1168         dev->data->dev_started = 1;
1169         ret = mlx5_rx_intr_vec_enable(dev);
1170         if (ret) {
1171                 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1172                         dev->data->port_id);
1173                 goto error;
1174         }
1175         mlx5_os_stats_init(dev);
1176         /*
1177          * Attach indirection table objects detached on port stop.
1178          * They may be needed to create RSS in non-isolated mode.
1179          */
1180         ret = mlx5_action_handle_attach(dev);
1181         if (ret) {
1182                 DRV_LOG(ERR,
1183                         "port %u failed to attach indirect actions: %s",
1184                         dev->data->port_id, rte_strerror(rte_errno));
1185                 goto error;
1186         }
1187         ret = mlx5_traffic_enable(dev);
1188         if (ret) {
1189                 DRV_LOG(ERR, "port %u failed to set defaults flows",
1190                         dev->data->port_id);
1191                 goto error;
1192         }
1193         /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1194         mlx5_flow_rxq_dynf_metadata_set(dev);
1195         /* Set flags and context to convert Rx timestamps. */
1196         mlx5_rxq_timestamp_set(dev);
1197         /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1198         mlx5_txq_dynf_timestamp_set(dev);
1199         /*
1200          * In non-cached mode, it only needs to start the default mreg copy
1201          * action and no flow created by application exists anymore.
1202          * But it is worth wrapping the interface for further usage.
1203          */
1204         ret = mlx5_flow_start_default(dev);
1205         if (ret) {
1206                 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1207                         dev->data->port_id, strerror(rte_errno));
1208                 goto error;
1209         }
1210         if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1211                 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1212                         dev->data->port_id, rte_strerror(rte_errno));
1213                 goto error;
1214         }
1215         rte_wmb();
1216         dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1217         dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1218         /* Enable datapath on secondary process. */
1219         mlx5_mp_os_req_start_rxtx(dev);
1220         if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) {
1221                 priv->sh->port[priv->dev_port - 1].ih_port_id =
1222                                         (uint32_t)dev->data->port_id;
1223         } else {
1224                 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1225                         dev->data->port_id);
1226                 dev->data->dev_conf.intr_conf.lsc = 0;
1227                 dev->data->dev_conf.intr_conf.rmv = 0;
1228         }
1229         if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
1230                 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1231                                         (uint32_t)dev->data->port_id;
1232         return 0;
1233 error:
1234         ret = rte_errno; /* Save rte_errno before cleanup. */
1235         /* Rollback. */
1236         dev->data->dev_started = 0;
1237         mlx5_flow_stop_default(dev);
1238         mlx5_traffic_disable(dev);
1239         mlx5_txq_stop(dev);
1240         mlx5_rxq_stop(dev);
1241         if (priv->obj_ops.lb_dummy_queue_release)
1242                 priv->obj_ops.lb_dummy_queue_release(dev);
1243         mlx5_txpp_stop(dev); /* Stop last. */
1244         rte_errno = ret; /* Restore rte_errno. */
1245         return -rte_errno;
1246 }
1247
1248 /**
1249  * DPDK callback to stop the device.
1250  *
1251  * Simulate device stop by detaching all configured flows.
1252  *
1253  * @param dev
1254  *   Pointer to Ethernet device structure.
1255  */
1256 int
1257 mlx5_dev_stop(struct rte_eth_dev *dev)
1258 {
1259         struct mlx5_priv *priv = dev->data->dev_private;
1260
1261         dev->data->dev_started = 0;
1262         /* Prevent crashes when queues are still in use. */
1263         dev->rx_pkt_burst = removed_rx_burst;
1264         dev->tx_pkt_burst = removed_tx_burst;
1265         rte_wmb();
1266         /* Disable datapath on secondary process. */
1267         mlx5_mp_os_req_stop_rxtx(dev);
1268         rte_delay_us_sleep(1000 * priv->rxqs_n);
1269         DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1270         mlx5_flow_stop_default(dev);
1271         /* Control flows for default traffic can be removed firstly. */
1272         mlx5_traffic_disable(dev);
1273         /* All RX queue flags will be cleared in the flush interface. */
1274         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1275         mlx5_flow_meter_rxq_flush(dev);
1276         mlx5_action_handle_detach(dev);
1277         mlx5_rx_intr_vec_disable(dev);
1278         priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1279         priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1280         mlx5_txq_stop(dev);
1281         mlx5_rxq_stop(dev);
1282         if (priv->obj_ops.lb_dummy_queue_release)
1283                 priv->obj_ops.lb_dummy_queue_release(dev);
1284         mlx5_txpp_stop(dev);
1285
1286         return 0;
1287 }
1288
1289 /**
1290  * Enable traffic flows configured by control plane
1291  *
1292  * @param dev
1293  *   Pointer to Ethernet device private data.
1294  * @param dev
1295  *   Pointer to Ethernet device structure.
1296  *
1297  * @return
1298  *   0 on success, a negative errno value otherwise and rte_errno is set.
1299  */
1300 int
1301 mlx5_traffic_enable(struct rte_eth_dev *dev)
1302 {
1303         struct mlx5_priv *priv = dev->data->dev_private;
1304         struct rte_flow_item_eth bcast = {
1305                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1306         };
1307         struct rte_flow_item_eth ipv6_multi_spec = {
1308                 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1309         };
1310         struct rte_flow_item_eth ipv6_multi_mask = {
1311                 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1312         };
1313         struct rte_flow_item_eth unicast = {
1314                 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1315         };
1316         struct rte_flow_item_eth unicast_mask = {
1317                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1318         };
1319         const unsigned int vlan_filter_n = priv->vlan_filter_n;
1320         const struct rte_ether_addr cmp = {
1321                 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1322         };
1323         unsigned int i;
1324         unsigned int j;
1325         int ret;
1326
1327         /*
1328          * Hairpin txq default flow should be created no matter if it is
1329          * isolation mode. Or else all the packets to be sent will be sent
1330          * out directly without the TX flow actions, e.g. encapsulation.
1331          */
1332         for (i = 0; i != priv->txqs_n; ++i) {
1333                 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1334                 if (!txq_ctrl)
1335                         continue;
1336                 /* Only Tx implicit mode requires the default Tx flow. */
1337                 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1338                     txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1339                     txq_ctrl->hairpin_conf.peers[0].port ==
1340                     priv->dev_data->port_id) {
1341                         ret = mlx5_ctrl_flow_source_queue(dev, i);
1342                         if (ret) {
1343                                 mlx5_txq_release(dev, i);
1344                                 goto error;
1345                         }
1346                 }
1347                 if ((priv->representor || priv->master) &&
1348                     priv->config.dv_esw_en) {
1349                         if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) {
1350                                 DRV_LOG(ERR,
1351                                         "Port %u Tx queue %u SQ create representor devx default miss rule failed.",
1352                                         dev->data->port_id, i);
1353                                 goto error;
1354                         }
1355                 }
1356                 mlx5_txq_release(dev, i);
1357         }
1358         if ((priv->master || priv->representor) && priv->config.dv_esw_en) {
1359                 if (mlx5_flow_create_esw_table_zero_flow(dev))
1360                         priv->fdb_def_rule = 1;
1361                 else
1362                         DRV_LOG(INFO, "port %u FDB default rule cannot be"
1363                                 " configured - only Eswitch group 0 flows are"
1364                                 " supported.", dev->data->port_id);
1365         }
1366         if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1367                 ret = mlx5_flow_lacp_miss(dev);
1368                 if (ret)
1369                         DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1370                                 "forward LACP to kernel.", dev->data->port_id);
1371                 else
1372                         DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1373                                 , dev->data->port_id);
1374         }
1375         if (priv->isolated)
1376                 return 0;
1377         if (dev->data->promiscuous) {
1378                 struct rte_flow_item_eth promisc = {
1379                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1380                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1381                         .type = 0,
1382                 };
1383
1384                 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1385                 if (ret)
1386                         goto error;
1387         }
1388         if (dev->data->all_multicast) {
1389                 struct rte_flow_item_eth multicast = {
1390                         .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1391                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1392                         .type = 0,
1393                 };
1394
1395                 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1396                 if (ret)
1397                         goto error;
1398         } else {
1399                 /* Add broadcast/multicast flows. */
1400                 for (i = 0; i != vlan_filter_n; ++i) {
1401                         uint16_t vlan = priv->vlan_filter[i];
1402
1403                         struct rte_flow_item_vlan vlan_spec = {
1404                                 .tci = rte_cpu_to_be_16(vlan),
1405                         };
1406                         struct rte_flow_item_vlan vlan_mask =
1407                                 rte_flow_item_vlan_mask;
1408
1409                         ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1410                                                   &vlan_spec, &vlan_mask);
1411                         if (ret)
1412                                 goto error;
1413                         ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1414                                                   &ipv6_multi_mask,
1415                                                   &vlan_spec, &vlan_mask);
1416                         if (ret)
1417                                 goto error;
1418                 }
1419                 if (!vlan_filter_n) {
1420                         ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1421                         if (ret)
1422                                 goto error;
1423                         ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1424                                              &ipv6_multi_mask);
1425                         if (ret) {
1426                                 /* Do not fail on IPv6 broadcast creation failure. */
1427                                 DRV_LOG(WARNING,
1428                                         "IPv6 broadcast is not supported");
1429                                 ret = 0;
1430                         }
1431                 }
1432         }
1433         /* Add MAC address flows. */
1434         for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1435                 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1436
1437                 if (!memcmp(mac, &cmp, sizeof(*mac)))
1438                         continue;
1439                 memcpy(&unicast.dst.addr_bytes,
1440                        mac->addr_bytes,
1441                        RTE_ETHER_ADDR_LEN);
1442                 for (j = 0; j != vlan_filter_n; ++j) {
1443                         uint16_t vlan = priv->vlan_filter[j];
1444
1445                         struct rte_flow_item_vlan vlan_spec = {
1446                                 .tci = rte_cpu_to_be_16(vlan),
1447                         };
1448                         struct rte_flow_item_vlan vlan_mask =
1449                                 rte_flow_item_vlan_mask;
1450
1451                         ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1452                                                   &unicast_mask,
1453                                                   &vlan_spec,
1454                                                   &vlan_mask);
1455                         if (ret)
1456                                 goto error;
1457                 }
1458                 if (!vlan_filter_n) {
1459                         ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1460                         if (ret)
1461                                 goto error;
1462                 }
1463         }
1464         return 0;
1465 error:
1466         ret = rte_errno; /* Save rte_errno before cleanup. */
1467         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1468         rte_errno = ret; /* Restore rte_errno. */
1469         return -rte_errno;
1470 }
1471
1472
1473 /**
1474  * Disable traffic flows configured by control plane
1475  *
1476  * @param dev
1477  *   Pointer to Ethernet device private data.
1478  */
1479 void
1480 mlx5_traffic_disable(struct rte_eth_dev *dev)
1481 {
1482         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1483 }
1484
1485 /**
1486  * Restart traffic flows configured by control plane
1487  *
1488  * @param dev
1489  *   Pointer to Ethernet device private data.
1490  *
1491  * @return
1492  *   0 on success, a negative errno value otherwise and rte_errno is set.
1493  */
1494 int
1495 mlx5_traffic_restart(struct rte_eth_dev *dev)
1496 {
1497         if (dev->data->dev_started) {
1498                 mlx5_traffic_disable(dev);
1499                 return mlx5_traffic_enable(dev);
1500         }
1501         return 0;
1502 }