net/mlx5: support Rx hairpin queues
[dpdk.git] / drivers / net / mlx5 / mlx5_rxq.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5
6 #include <stddef.h>
7 #include <assert.h>
8 #include <errno.h>
9 #include <string.h>
10 #include <stdint.h>
11 #include <fcntl.h>
12 #include <sys/queue.h>
13
14 /* Verbs header. */
15 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic ignored "-Wpedantic"
18 #endif
19 #include <infiniband/verbs.h>
20 #include <infiniband/mlx5dv.h>
21 #ifdef PEDANTIC
22 #pragma GCC diagnostic error "-Wpedantic"
23 #endif
24
25 #include <rte_mbuf.h>
26 #include <rte_malloc.h>
27 #include <rte_ethdev_driver.h>
28 #include <rte_common.h>
29 #include <rte_interrupts.h>
30 #include <rte_debug.h>
31 #include <rte_io.h>
32
33 #include "mlx5.h"
34 #include "mlx5_rxtx.h"
35 #include "mlx5_utils.h"
36 #include "mlx5_autoconf.h"
37 #include "mlx5_defs.h"
38 #include "mlx5_glue.h"
39
40 /* Default RSS hash key also used for ConnectX-3. */
41 uint8_t rss_hash_default_key[] = {
42         0x2c, 0xc6, 0x81, 0xd1,
43         0x5b, 0xdb, 0xf4, 0xf7,
44         0xfc, 0xa2, 0x83, 0x19,
45         0xdb, 0x1a, 0x3e, 0x94,
46         0x6b, 0x9e, 0x38, 0xd9,
47         0x2c, 0x9c, 0x03, 0xd1,
48         0xad, 0x99, 0x44, 0xa7,
49         0xd9, 0x56, 0x3d, 0x59,
50         0x06, 0x3c, 0x25, 0xf3,
51         0xfc, 0x1f, 0xdc, 0x2a,
52 };
53
54 /* Length of the default RSS hash key. */
55 static_assert(MLX5_RSS_HASH_KEY_LEN ==
56               (unsigned int)sizeof(rss_hash_default_key),
57               "wrong RSS default key size.");
58
59 /**
60  * Check whether Multi-Packet RQ can be enabled for the device.
61  *
62  * @param dev
63  *   Pointer to Ethernet device.
64  *
65  * @return
66  *   1 if supported, negative errno value if not.
67  */
68 inline int
69 mlx5_check_mprq_support(struct rte_eth_dev *dev)
70 {
71         struct mlx5_priv *priv = dev->data->dev_private;
72
73         if (priv->config.mprq.enabled &&
74             priv->rxqs_n >= priv->config.mprq.min_rxqs_num)
75                 return 1;
76         return -ENOTSUP;
77 }
78
79 /**
80  * Check whether Multi-Packet RQ is enabled for the Rx queue.
81  *
82  *  @param rxq
83  *     Pointer to receive queue structure.
84  *
85  * @return
86  *   0 if disabled, otherwise enabled.
87  */
88 inline int
89 mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq)
90 {
91         return rxq->strd_num_n > 0;
92 }
93
94 /**
95  * Check whether Multi-Packet RQ is enabled for the device.
96  *
97  * @param dev
98  *   Pointer to Ethernet device.
99  *
100  * @return
101  *   0 if disabled, otherwise enabled.
102  */
103 inline int
104 mlx5_mprq_enabled(struct rte_eth_dev *dev)
105 {
106         struct mlx5_priv *priv = dev->data->dev_private;
107         uint16_t i;
108         uint16_t n = 0;
109         uint16_t n_ibv = 0;
110
111         if (mlx5_check_mprq_support(dev) < 0)
112                 return 0;
113         /* All the configured queues should be enabled. */
114         for (i = 0; i < priv->rxqs_n; ++i) {
115                 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
116                 struct mlx5_rxq_ctrl *rxq_ctrl = container_of
117                         (rxq, struct mlx5_rxq_ctrl, rxq);
118
119                 if (rxq == NULL || rxq_ctrl->type != MLX5_RXQ_TYPE_STANDARD)
120                         continue;
121                 n_ibv++;
122                 if (mlx5_rxq_mprq_enabled(rxq))
123                         ++n;
124         }
125         /* Multi-Packet RQ can't be partially configured. */
126         assert(n == 0 || n == n_ibv);
127         return n == n_ibv;
128 }
129
130 /**
131  * Allocate RX queue elements for Multi-Packet RQ.
132  *
133  * @param rxq_ctrl
134  *   Pointer to RX queue structure.
135  *
136  * @return
137  *   0 on success, a negative errno value otherwise and rte_errno is set.
138  */
139 static int
140 rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
141 {
142         struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
143         unsigned int wqe_n = 1 << rxq->elts_n;
144         unsigned int i;
145         int err;
146
147         /* Iterate on segments. */
148         for (i = 0; i <= wqe_n; ++i) {
149                 struct mlx5_mprq_buf *buf;
150
151                 if (rte_mempool_get(rxq->mprq_mp, (void **)&buf) < 0) {
152                         DRV_LOG(ERR, "port %u empty mbuf pool", rxq->port_id);
153                         rte_errno = ENOMEM;
154                         goto error;
155                 }
156                 if (i < wqe_n)
157                         (*rxq->mprq_bufs)[i] = buf;
158                 else
159                         rxq->mprq_repl = buf;
160         }
161         DRV_LOG(DEBUG,
162                 "port %u Rx queue %u allocated and configured %u segments",
163                 rxq->port_id, rxq->idx, wqe_n);
164         return 0;
165 error:
166         err = rte_errno; /* Save rte_errno before cleanup. */
167         wqe_n = i;
168         for (i = 0; (i != wqe_n); ++i) {
169                 if ((*rxq->mprq_bufs)[i] != NULL)
170                         rte_mempool_put(rxq->mprq_mp,
171                                         (*rxq->mprq_bufs)[i]);
172                 (*rxq->mprq_bufs)[i] = NULL;
173         }
174         DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
175                 rxq->port_id, rxq->idx);
176         rte_errno = err; /* Restore rte_errno. */
177         return -rte_errno;
178 }
179
180 /**
181  * Allocate RX queue elements for Single-Packet RQ.
182  *
183  * @param rxq_ctrl
184  *   Pointer to RX queue structure.
185  *
186  * @return
187  *   0 on success, errno value on failure.
188  */
189 static int
190 rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
191 {
192         const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
193         unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
194         unsigned int i;
195         int err;
196
197         /* Iterate on segments. */
198         for (i = 0; (i != elts_n); ++i) {
199                 struct rte_mbuf *buf;
200
201                 buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
202                 if (buf == NULL) {
203                         DRV_LOG(ERR, "port %u empty mbuf pool",
204                                 PORT_ID(rxq_ctrl->priv));
205                         rte_errno = ENOMEM;
206                         goto error;
207                 }
208                 /* Headroom is reserved by rte_pktmbuf_alloc(). */
209                 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
210                 /* Buffer is supposed to be empty. */
211                 assert(rte_pktmbuf_data_len(buf) == 0);
212                 assert(rte_pktmbuf_pkt_len(buf) == 0);
213                 assert(!buf->next);
214                 /* Only the first segment keeps headroom. */
215                 if (i % sges_n)
216                         SET_DATA_OFF(buf, 0);
217                 PORT(buf) = rxq_ctrl->rxq.port_id;
218                 DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
219                 PKT_LEN(buf) = DATA_LEN(buf);
220                 NB_SEGS(buf) = 1;
221                 (*rxq_ctrl->rxq.elts)[i] = buf;
222         }
223         /* If Rx vector is activated. */
224         if (mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
225                 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
226                 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
227                 int j;
228
229                 /* Initialize default rearm_data for vPMD. */
230                 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
231                 rte_mbuf_refcnt_set(mbuf_init, 1);
232                 mbuf_init->nb_segs = 1;
233                 mbuf_init->port = rxq->port_id;
234                 /*
235                  * prevent compiler reordering:
236                  * rearm_data covers previous fields.
237                  */
238                 rte_compiler_barrier();
239                 rxq->mbuf_initializer =
240                         *(uint64_t *)&mbuf_init->rearm_data;
241                 /* Padding with a fake mbuf for vectorized Rx. */
242                 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
243                         (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
244         }
245         DRV_LOG(DEBUG,
246                 "port %u Rx queue %u allocated and configured %u segments"
247                 " (max %u packets)",
248                 PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx, elts_n,
249                 elts_n / (1 << rxq_ctrl->rxq.sges_n));
250         return 0;
251 error:
252         err = rte_errno; /* Save rte_errno before cleanup. */
253         elts_n = i;
254         for (i = 0; (i != elts_n); ++i) {
255                 if ((*rxq_ctrl->rxq.elts)[i] != NULL)
256                         rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
257                 (*rxq_ctrl->rxq.elts)[i] = NULL;
258         }
259         DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
260                 PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
261         rte_errno = err; /* Restore rte_errno. */
262         return -rte_errno;
263 }
264
265 /**
266  * Allocate RX queue elements.
267  *
268  * @param rxq_ctrl
269  *   Pointer to RX queue structure.
270  *
271  * @return
272  *   0 on success, errno value on failure.
273  */
274 int
275 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
276 {
277         return mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
278                rxq_alloc_elts_mprq(rxq_ctrl) : rxq_alloc_elts_sprq(rxq_ctrl);
279 }
280
281 /**
282  * Free RX queue elements for Multi-Packet RQ.
283  *
284  * @param rxq_ctrl
285  *   Pointer to RX queue structure.
286  */
287 static void
288 rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
289 {
290         struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
291         uint16_t i;
292
293         DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs",
294                 rxq->port_id, rxq->idx);
295         if (rxq->mprq_bufs == NULL)
296                 return;
297         assert(mlx5_rxq_check_vec_support(rxq) < 0);
298         for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
299                 if ((*rxq->mprq_bufs)[i] != NULL)
300                         mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]);
301                 (*rxq->mprq_bufs)[i] = NULL;
302         }
303         if (rxq->mprq_repl != NULL) {
304                 mlx5_mprq_buf_free(rxq->mprq_repl);
305                 rxq->mprq_repl = NULL;
306         }
307 }
308
309 /**
310  * Free RX queue elements for Single-Packet RQ.
311  *
312  * @param rxq_ctrl
313  *   Pointer to RX queue structure.
314  */
315 static void
316 rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
317 {
318         struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
319         const uint16_t q_n = (1 << rxq->elts_n);
320         const uint16_t q_mask = q_n - 1;
321         uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
322         uint16_t i;
323
324         DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs",
325                 PORT_ID(rxq_ctrl->priv), rxq->idx);
326         if (rxq->elts == NULL)
327                 return;
328         /**
329          * Some mbuf in the Ring belongs to the application.  They cannot be
330          * freed.
331          */
332         if (mlx5_rxq_check_vec_support(rxq) > 0) {
333                 for (i = 0; i < used; ++i)
334                         (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
335                 rxq->rq_pi = rxq->rq_ci;
336         }
337         for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
338                 if ((*rxq->elts)[i] != NULL)
339                         rte_pktmbuf_free_seg((*rxq->elts)[i]);
340                 (*rxq->elts)[i] = NULL;
341         }
342 }
343
344 /**
345  * Free RX queue elements.
346  *
347  * @param rxq_ctrl
348  *   Pointer to RX queue structure.
349  */
350 static void
351 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
352 {
353         if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
354                 rxq_free_elts_mprq(rxq_ctrl);
355         else
356                 rxq_free_elts_sprq(rxq_ctrl);
357 }
358
359 /**
360  * Returns the per-queue supported offloads.
361  *
362  * @param dev
363  *   Pointer to Ethernet device.
364  *
365  * @return
366  *   Supported Rx offloads.
367  */
368 uint64_t
369 mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev)
370 {
371         struct mlx5_priv *priv = dev->data->dev_private;
372         struct mlx5_dev_config *config = &priv->config;
373         uint64_t offloads = (DEV_RX_OFFLOAD_SCATTER |
374                              DEV_RX_OFFLOAD_TIMESTAMP |
375                              DEV_RX_OFFLOAD_JUMBO_FRAME);
376
377         if (config->hw_fcs_strip)
378                 offloads |= DEV_RX_OFFLOAD_KEEP_CRC;
379
380         if (config->hw_csum)
381                 offloads |= (DEV_RX_OFFLOAD_IPV4_CKSUM |
382                              DEV_RX_OFFLOAD_UDP_CKSUM |
383                              DEV_RX_OFFLOAD_TCP_CKSUM);
384         if (config->hw_vlan_strip)
385                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
386         if (MLX5_LRO_SUPPORTED(dev))
387                 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
388         return offloads;
389 }
390
391
392 /**
393  * Returns the per-port supported offloads.
394  *
395  * @return
396  *   Supported Rx offloads.
397  */
398 uint64_t
399 mlx5_get_rx_port_offloads(void)
400 {
401         uint64_t offloads = DEV_RX_OFFLOAD_VLAN_FILTER;
402
403         return offloads;
404 }
405
406 /**
407  * Verify if the queue can be released.
408  *
409  * @param dev
410  *   Pointer to Ethernet device.
411  * @param idx
412  *   RX queue index.
413  *
414  * @return
415  *   1 if the queue can be released
416  *   0 if the queue can not be released, there are references to it.
417  *   Negative errno and rte_errno is set if queue doesn't exist.
418  */
419 static int
420 mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx)
421 {
422         struct mlx5_priv *priv = dev->data->dev_private;
423         struct mlx5_rxq_ctrl *rxq_ctrl;
424
425         if (!(*priv->rxqs)[idx]) {
426                 rte_errno = EINVAL;
427                 return -rte_errno;
428         }
429         rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
430         return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
431 }
432
433 /**
434  * Rx queue presetup checks.
435  *
436  * @param dev
437  *   Pointer to Ethernet device structure.
438  * @param idx
439  *   RX queue index.
440  * @param desc
441  *   Number of descriptors to configure in queue.
442  *
443  * @return
444  *   0 on success, a negative errno value otherwise and rte_errno is set.
445  */
446 static int
447 mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc)
448 {
449         struct mlx5_priv *priv = dev->data->dev_private;
450
451         if (!rte_is_power_of_2(desc)) {
452                 desc = 1 << log2above(desc);
453                 DRV_LOG(WARNING,
454                         "port %u increased number of descriptors in Rx queue %u"
455                         " to the next power of two (%d)",
456                         dev->data->port_id, idx, desc);
457         }
458         DRV_LOG(DEBUG, "port %u configuring Rx queue %u for %u descriptors",
459                 dev->data->port_id, idx, desc);
460         if (idx >= priv->rxqs_n) {
461                 DRV_LOG(ERR, "port %u Rx queue index out of range (%u >= %u)",
462                         dev->data->port_id, idx, priv->rxqs_n);
463                 rte_errno = EOVERFLOW;
464                 return -rte_errno;
465         }
466         if (!mlx5_rxq_releasable(dev, idx)) {
467                 DRV_LOG(ERR, "port %u unable to release queue index %u",
468                         dev->data->port_id, idx);
469                 rte_errno = EBUSY;
470                 return -rte_errno;
471         }
472         mlx5_rxq_release(dev, idx);
473         return 0;
474 }
475
476 /**
477  *
478  * @param dev
479  *   Pointer to Ethernet device structure.
480  * @param idx
481  *   RX queue index.
482  * @param desc
483  *   Number of descriptors to configure in queue.
484  * @param socket
485  *   NUMA socket on which memory must be allocated.
486  * @param[in] conf
487  *   Thresholds parameters.
488  * @param mp
489  *   Memory pool for buffer allocations.
490  *
491  * @return
492  *   0 on success, a negative errno value otherwise and rte_errno is set.
493  */
494 int
495 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
496                     unsigned int socket, const struct rte_eth_rxconf *conf,
497                     struct rte_mempool *mp)
498 {
499         struct mlx5_priv *priv = dev->data->dev_private;
500         struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
501         struct mlx5_rxq_ctrl *rxq_ctrl =
502                 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
503         int res;
504
505         res = mlx5_rx_queue_pre_setup(dev, idx, desc);
506         if (res)
507                 return res;
508         rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, mp);
509         if (!rxq_ctrl) {
510                 DRV_LOG(ERR, "port %u unable to allocate queue index %u",
511                         dev->data->port_id, idx);
512                 rte_errno = ENOMEM;
513                 return -rte_errno;
514         }
515         DRV_LOG(DEBUG, "port %u adding Rx queue %u to list",
516                 dev->data->port_id, idx);
517         (*priv->rxqs)[idx] = &rxq_ctrl->rxq;
518         return 0;
519 }
520
521 /**
522  *
523  * @param dev
524  *   Pointer to Ethernet device structure.
525  * @param idx
526  *   RX queue index.
527  * @param desc
528  *   Number of descriptors to configure in queue.
529  * @param hairpin_conf
530  *   Hairpin configuration parameters.
531  *
532  * @return
533  *   0 on success, a negative errno value otherwise and rte_errno is set.
534  */
535 int
536 mlx5_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
537                             uint16_t desc,
538                             const struct rte_eth_hairpin_conf *hairpin_conf)
539 {
540         struct mlx5_priv *priv = dev->data->dev_private;
541         struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
542         struct mlx5_rxq_ctrl *rxq_ctrl =
543                 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
544         int res;
545
546         res = mlx5_rx_queue_pre_setup(dev, idx, desc);
547         if (res)
548                 return res;
549         if (hairpin_conf->peer_count != 1 ||
550             hairpin_conf->peers[0].port != dev->data->port_id ||
551             hairpin_conf->peers[0].queue >= priv->txqs_n) {
552                 DRV_LOG(ERR, "port %u unable to setup hairpin queue index %u "
553                         " invalid hairpind configuration", dev->data->port_id,
554                         idx);
555                 rte_errno = EINVAL;
556                 return -rte_errno;
557         }
558         rxq_ctrl = mlx5_rxq_hairpin_new(dev, idx, desc, hairpin_conf);
559         if (!rxq_ctrl) {
560                 DRV_LOG(ERR, "port %u unable to allocate queue index %u",
561                         dev->data->port_id, idx);
562                 rte_errno = ENOMEM;
563                 return -rte_errno;
564         }
565         DRV_LOG(DEBUG, "port %u adding Rx queue %u to list",
566                 dev->data->port_id, idx);
567         (*priv->rxqs)[idx] = &rxq_ctrl->rxq;
568         return 0;
569 }
570
571 /**
572  * DPDK callback to release a RX queue.
573  *
574  * @param dpdk_rxq
575  *   Generic RX queue pointer.
576  */
577 void
578 mlx5_rx_queue_release(void *dpdk_rxq)
579 {
580         struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
581         struct mlx5_rxq_ctrl *rxq_ctrl;
582         struct mlx5_priv *priv;
583
584         if (rxq == NULL)
585                 return;
586         rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
587         priv = rxq_ctrl->priv;
588         if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.idx))
589                 rte_panic("port %u Rx queue %u is still used by a flow and"
590                           " cannot be removed\n",
591                           PORT_ID(priv), rxq->idx);
592         mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.idx);
593 }
594
595 /**
596  * Get an Rx queue Verbs/DevX object.
597  *
598  * @param dev
599  *   Pointer to Ethernet device.
600  * @param idx
601  *   Queue index in DPDK Rx queue array
602  *
603  * @return
604  *   The Verbs/DevX object if it exists.
605  */
606 static struct mlx5_rxq_obj *
607 mlx5_rxq_obj_get(struct rte_eth_dev *dev, uint16_t idx)
608 {
609         struct mlx5_priv *priv = dev->data->dev_private;
610         struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
611         struct mlx5_rxq_ctrl *rxq_ctrl;
612
613         if (idx >= priv->rxqs_n)
614                 return NULL;
615         if (!rxq_data)
616                 return NULL;
617         rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
618         if (rxq_ctrl->obj)
619                 rte_atomic32_inc(&rxq_ctrl->obj->refcnt);
620         return rxq_ctrl->obj;
621 }
622
623 /**
624  * Release the resources allocated for an RQ DevX object.
625  *
626  * @param rxq_ctrl
627  *   DevX Rx queue object.
628  */
629 static void
630 rxq_release_rq_resources(struct mlx5_rxq_ctrl *rxq_ctrl)
631 {
632         if (rxq_ctrl->rxq.wqes) {
633                 rte_free((void *)(uintptr_t)rxq_ctrl->rxq.wqes);
634                 rxq_ctrl->rxq.wqes = NULL;
635         }
636         if (rxq_ctrl->wq_umem) {
637                 mlx5_glue->devx_umem_dereg(rxq_ctrl->wq_umem);
638                 rxq_ctrl->wq_umem = NULL;
639         }
640 }
641
642 /**
643  * Release an Rx hairpin related resources.
644  *
645  * @param rxq_obj
646  *   Hairpin Rx queue object.
647  */
648 static void
649 rxq_obj_hairpin_release(struct mlx5_rxq_obj *rxq_obj)
650 {
651         struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
652
653         assert(rxq_obj);
654         rq_attr.state = MLX5_RQC_STATE_RST;
655         rq_attr.rq_state = MLX5_RQC_STATE_RDY;
656         mlx5_devx_cmd_modify_rq(rxq_obj->rq, &rq_attr);
657         claim_zero(mlx5_devx_cmd_destroy(rxq_obj->rq));
658 }
659
660 /**
661  * Release an Rx verbs/DevX queue object.
662  *
663  * @param rxq_obj
664  *   Verbs/DevX Rx queue object.
665  *
666  * @return
667  *   1 while a reference on it exists, 0 when freed.
668  */
669 static int
670 mlx5_rxq_obj_release(struct mlx5_rxq_obj *rxq_obj)
671 {
672         assert(rxq_obj);
673         if (rxq_obj->type == MLX5_RXQ_OBJ_TYPE_IBV)
674                 assert(rxq_obj->wq);
675         assert(rxq_obj->cq);
676         if (rte_atomic32_dec_and_test(&rxq_obj->refcnt)) {
677                 switch (rxq_obj->type) {
678                 case MLX5_RXQ_OBJ_TYPE_IBV:
679                         rxq_free_elts(rxq_obj->rxq_ctrl);
680                         claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
681                         claim_zero(mlx5_glue->destroy_cq(rxq_obj->cq));
682                         break;
683                 case MLX5_RXQ_OBJ_TYPE_DEVX_RQ:
684                         rxq_free_elts(rxq_obj->rxq_ctrl);
685                         claim_zero(mlx5_devx_cmd_destroy(rxq_obj->rq));
686                         rxq_release_rq_resources(rxq_obj->rxq_ctrl);
687                         claim_zero(mlx5_glue->destroy_cq(rxq_obj->cq));
688                         break;
689                 case MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN:
690                         rxq_obj_hairpin_release(rxq_obj);
691                         break;
692                 }
693                 if (rxq_obj->channel)
694                         claim_zero(mlx5_glue->destroy_comp_channel
695                                    (rxq_obj->channel));
696                 LIST_REMOVE(rxq_obj, next);
697                 rte_free(rxq_obj);
698                 return 0;
699         }
700         return 1;
701 }
702
703 /**
704  * Allocate queue vector and fill epoll fd list for Rx interrupts.
705  *
706  * @param dev
707  *   Pointer to Ethernet device.
708  *
709  * @return
710  *   0 on success, a negative errno value otherwise and rte_errno is set.
711  */
712 int
713 mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev)
714 {
715         struct mlx5_priv *priv = dev->data->dev_private;
716         unsigned int i;
717         unsigned int rxqs_n = priv->rxqs_n;
718         unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
719         unsigned int count = 0;
720         struct rte_intr_handle *intr_handle = dev->intr_handle;
721
722         if (!dev->data->dev_conf.intr_conf.rxq)
723                 return 0;
724         mlx5_rx_intr_vec_disable(dev);
725         intr_handle->intr_vec = malloc(n * sizeof(intr_handle->intr_vec[0]));
726         if (intr_handle->intr_vec == NULL) {
727                 DRV_LOG(ERR,
728                         "port %u failed to allocate memory for interrupt"
729                         " vector, Rx interrupts will not be supported",
730                         dev->data->port_id);
731                 rte_errno = ENOMEM;
732                 return -rte_errno;
733         }
734         intr_handle->type = RTE_INTR_HANDLE_EXT;
735         for (i = 0; i != n; ++i) {
736                 /* This rxq obj must not be released in this function. */
737                 struct mlx5_rxq_obj *rxq_obj = mlx5_rxq_obj_get(dev, i);
738                 int fd;
739                 int flags;
740                 int rc;
741
742                 /* Skip queues that cannot request interrupts. */
743                 if (!rxq_obj || !rxq_obj->channel) {
744                         /* Use invalid intr_vec[] index to disable entry. */
745                         intr_handle->intr_vec[i] =
746                                 RTE_INTR_VEC_RXTX_OFFSET +
747                                 RTE_MAX_RXTX_INTR_VEC_ID;
748                         continue;
749                 }
750                 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
751                         DRV_LOG(ERR,
752                                 "port %u too many Rx queues for interrupt"
753                                 " vector size (%d), Rx interrupts cannot be"
754                                 " enabled",
755                                 dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID);
756                         mlx5_rx_intr_vec_disable(dev);
757                         rte_errno = ENOMEM;
758                         return -rte_errno;
759                 }
760                 fd = rxq_obj->channel->fd;
761                 flags = fcntl(fd, F_GETFL);
762                 rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
763                 if (rc < 0) {
764                         rte_errno = errno;
765                         DRV_LOG(ERR,
766                                 "port %u failed to make Rx interrupt file"
767                                 " descriptor %d non-blocking for queue index"
768                                 " %d",
769                                 dev->data->port_id, fd, i);
770                         mlx5_rx_intr_vec_disable(dev);
771                         return -rte_errno;
772                 }
773                 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count;
774                 intr_handle->efds[count] = fd;
775                 count++;
776         }
777         if (!count)
778                 mlx5_rx_intr_vec_disable(dev);
779         else
780                 intr_handle->nb_efd = count;
781         return 0;
782 }
783
784 /**
785  * Clean up Rx interrupts handler.
786  *
787  * @param dev
788  *   Pointer to Ethernet device.
789  */
790 void
791 mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev)
792 {
793         struct mlx5_priv *priv = dev->data->dev_private;
794         struct rte_intr_handle *intr_handle = dev->intr_handle;
795         unsigned int i;
796         unsigned int rxqs_n = priv->rxqs_n;
797         unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
798
799         if (!dev->data->dev_conf.intr_conf.rxq)
800                 return;
801         if (!intr_handle->intr_vec)
802                 goto free;
803         for (i = 0; i != n; ++i) {
804                 struct mlx5_rxq_ctrl *rxq_ctrl;
805                 struct mlx5_rxq_data *rxq_data;
806
807                 if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
808                     RTE_MAX_RXTX_INTR_VEC_ID)
809                         continue;
810                 /**
811                  * Need to access directly the queue to release the reference
812                  * kept in mlx5_rx_intr_vec_enable().
813                  */
814                 rxq_data = (*priv->rxqs)[i];
815                 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
816                 if (rxq_ctrl->obj)
817                         mlx5_rxq_obj_release(rxq_ctrl->obj);
818         }
819 free:
820         rte_intr_free_epoll_fd(intr_handle);
821         if (intr_handle->intr_vec)
822                 free(intr_handle->intr_vec);
823         intr_handle->nb_efd = 0;
824         intr_handle->intr_vec = NULL;
825 }
826
827 /**
828  *  MLX5 CQ notification .
829  *
830  *  @param rxq
831  *     Pointer to receive queue structure.
832  *  @param sq_n_rxq
833  *     Sequence number per receive queue .
834  */
835 static inline void
836 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
837 {
838         int sq_n = 0;
839         uint32_t doorbell_hi;
840         uint64_t doorbell;
841         void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL;
842
843         sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK;
844         doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK);
845         doorbell = (uint64_t)doorbell_hi << 32;
846         doorbell |=  rxq->cqn;
847         rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
848         mlx5_uar_write64(rte_cpu_to_be_64(doorbell),
849                          cq_db_reg, rxq->uar_lock_cq);
850 }
851
852 /**
853  * DPDK callback for Rx queue interrupt enable.
854  *
855  * @param dev
856  *   Pointer to Ethernet device structure.
857  * @param rx_queue_id
858  *   Rx queue number.
859  *
860  * @return
861  *   0 on success, a negative errno value otherwise and rte_errno is set.
862  */
863 int
864 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
865 {
866         struct mlx5_priv *priv = dev->data->dev_private;
867         struct mlx5_rxq_data *rxq_data;
868         struct mlx5_rxq_ctrl *rxq_ctrl;
869
870         rxq_data = (*priv->rxqs)[rx_queue_id];
871         if (!rxq_data) {
872                 rte_errno = EINVAL;
873                 return -rte_errno;
874         }
875         rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
876         if (rxq_ctrl->irq) {
877                 struct mlx5_rxq_obj *rxq_obj;
878
879                 rxq_obj = mlx5_rxq_obj_get(dev, rx_queue_id);
880                 if (!rxq_obj) {
881                         rte_errno = EINVAL;
882                         return -rte_errno;
883                 }
884                 mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
885                 mlx5_rxq_obj_release(rxq_obj);
886         }
887         return 0;
888 }
889
890 /**
891  * DPDK callback for Rx queue interrupt disable.
892  *
893  * @param dev
894  *   Pointer to Ethernet device structure.
895  * @param rx_queue_id
896  *   Rx queue number.
897  *
898  * @return
899  *   0 on success, a negative errno value otherwise and rte_errno is set.
900  */
901 int
902 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
903 {
904         struct mlx5_priv *priv = dev->data->dev_private;
905         struct mlx5_rxq_data *rxq_data;
906         struct mlx5_rxq_ctrl *rxq_ctrl;
907         struct mlx5_rxq_obj *rxq_obj = NULL;
908         struct ibv_cq *ev_cq;
909         void *ev_ctx;
910         int ret;
911
912         rxq_data = (*priv->rxqs)[rx_queue_id];
913         if (!rxq_data) {
914                 rte_errno = EINVAL;
915                 return -rte_errno;
916         }
917         rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
918         if (!rxq_ctrl->irq)
919                 return 0;
920         rxq_obj = mlx5_rxq_obj_get(dev, rx_queue_id);
921         if (!rxq_obj) {
922                 rte_errno = EINVAL;
923                 return -rte_errno;
924         }
925         ret = mlx5_glue->get_cq_event(rxq_obj->channel, &ev_cq, &ev_ctx);
926         if (ret || ev_cq != rxq_obj->cq) {
927                 rte_errno = EINVAL;
928                 goto exit;
929         }
930         rxq_data->cq_arm_sn++;
931         mlx5_glue->ack_cq_events(rxq_obj->cq, 1);
932         mlx5_rxq_obj_release(rxq_obj);
933         return 0;
934 exit:
935         ret = rte_errno; /* Save rte_errno before cleanup. */
936         if (rxq_obj)
937                 mlx5_rxq_obj_release(rxq_obj);
938         DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d",
939                 dev->data->port_id, rx_queue_id);
940         rte_errno = ret; /* Restore rte_errno. */
941         return -rte_errno;
942 }
943
944 /**
945  * Create a CQ Verbs object.
946  *
947  * @param dev
948  *   Pointer to Ethernet device.
949  * @param priv
950  *   Pointer to device private data.
951  * @param rxq_data
952  *   Pointer to Rx queue data.
953  * @param cqe_n
954  *   Number of CQEs in CQ.
955  * @param rxq_obj
956  *   Pointer to Rx queue object data.
957  *
958  * @return
959  *   The Verbs object initialised, NULL otherwise and rte_errno is set.
960  */
961 static struct ibv_cq *
962 mlx5_ibv_cq_new(struct rte_eth_dev *dev, struct mlx5_priv *priv,
963                 struct mlx5_rxq_data *rxq_data,
964                 unsigned int cqe_n, struct mlx5_rxq_obj *rxq_obj)
965 {
966         struct {
967                 struct ibv_cq_init_attr_ex ibv;
968                 struct mlx5dv_cq_init_attr mlx5;
969         } cq_attr;
970
971         cq_attr.ibv = (struct ibv_cq_init_attr_ex){
972                 .cqe = cqe_n,
973                 .channel = rxq_obj->channel,
974                 .comp_mask = 0,
975         };
976         cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){
977                 .comp_mask = 0,
978         };
979         if (priv->config.cqe_comp && !rxq_data->hw_timestamp &&
980             !rxq_data->lro) {
981                 cq_attr.mlx5.comp_mask |=
982                                 MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
983 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
984                 cq_attr.mlx5.cqe_comp_res_format =
985                                 mlx5_rxq_mprq_enabled(rxq_data) ?
986                                 MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX :
987                                 MLX5DV_CQE_RES_FORMAT_HASH;
988 #else
989                 cq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
990 #endif
991                 /*
992                  * For vectorized Rx, it must not be doubled in order to
993                  * make cq_ci and rq_ci aligned.
994                  */
995                 if (mlx5_rxq_check_vec_support(rxq_data) < 0)
996                         cq_attr.ibv.cqe *= 2;
997         } else if (priv->config.cqe_comp && rxq_data->hw_timestamp) {
998                 DRV_LOG(DEBUG,
999                         "port %u Rx CQE compression is disabled for HW"
1000                         " timestamp",
1001                         dev->data->port_id);
1002         } else if (priv->config.cqe_comp && rxq_data->lro) {
1003                 DRV_LOG(DEBUG,
1004                         "port %u Rx CQE compression is disabled for LRO",
1005                         dev->data->port_id);
1006         }
1007 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
1008         if (priv->config.cqe_pad) {
1009                 cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;
1010                 cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
1011         }
1012 #endif
1013         return mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(priv->sh->ctx,
1014                                                               &cq_attr.ibv,
1015                                                               &cq_attr.mlx5));
1016 }
1017
1018 /**
1019  * Create a WQ Verbs object.
1020  *
1021  * @param dev
1022  *   Pointer to Ethernet device.
1023  * @param priv
1024  *   Pointer to device private data.
1025  * @param rxq_data
1026  *   Pointer to Rx queue data.
1027  * @param idx
1028  *   Queue index in DPDK Rx queue array
1029  * @param wqe_n
1030  *   Number of WQEs in WQ.
1031  * @param rxq_obj
1032  *   Pointer to Rx queue object data.
1033  *
1034  * @return
1035  *   The Verbs object initialised, NULL otherwise and rte_errno is set.
1036  */
1037 static struct ibv_wq *
1038 mlx5_ibv_wq_new(struct rte_eth_dev *dev, struct mlx5_priv *priv,
1039                 struct mlx5_rxq_data *rxq_data, uint16_t idx,
1040                 unsigned int wqe_n, struct mlx5_rxq_obj *rxq_obj)
1041 {
1042         struct {
1043                 struct ibv_wq_init_attr ibv;
1044 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
1045                 struct mlx5dv_wq_init_attr mlx5;
1046 #endif
1047         } wq_attr;
1048
1049         wq_attr.ibv = (struct ibv_wq_init_attr){
1050                 .wq_context = NULL, /* Could be useful in the future. */
1051                 .wq_type = IBV_WQT_RQ,
1052                 /* Max number of outstanding WRs. */
1053                 .max_wr = wqe_n >> rxq_data->sges_n,
1054                 /* Max number of scatter/gather elements in a WR. */
1055                 .max_sge = 1 << rxq_data->sges_n,
1056                 .pd = priv->sh->pd,
1057                 .cq = rxq_obj->cq,
1058                 .comp_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING | 0,
1059                 .create_flags = (rxq_data->vlan_strip ?
1060                                  IBV_WQ_FLAGS_CVLAN_STRIPPING : 0),
1061         };
1062         /* By default, FCS (CRC) is stripped by hardware. */
1063         if (rxq_data->crc_present) {
1064                 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
1065                 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1066         }
1067         if (priv->config.hw_padding) {
1068 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)
1069                 wq_attr.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
1070                 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1071 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)
1072                 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING;
1073                 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1074 #endif
1075         }
1076 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
1077         wq_attr.mlx5 = (struct mlx5dv_wq_init_attr){
1078                 .comp_mask = 0,
1079         };
1080         if (mlx5_rxq_mprq_enabled(rxq_data)) {
1081                 struct mlx5dv_striding_rq_init_attr *mprq_attr =
1082                                                 &wq_attr.mlx5.striding_rq_attrs;
1083
1084                 wq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ;
1085                 *mprq_attr = (struct mlx5dv_striding_rq_init_attr){
1086                         .single_stride_log_num_of_bytes = rxq_data->strd_sz_n,
1087                         .single_wqe_log_num_of_strides = rxq_data->strd_num_n,
1088                         .two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT,
1089                 };
1090         }
1091         rxq_obj->wq = mlx5_glue->dv_create_wq(priv->sh->ctx, &wq_attr.ibv,
1092                                               &wq_attr.mlx5);
1093 #else
1094         rxq_obj->wq = mlx5_glue->create_wq(priv->sh->ctx, &wq_attr.ibv);
1095 #endif
1096         if (rxq_obj->wq) {
1097                 /*
1098                  * Make sure number of WRs*SGEs match expectations since a queue
1099                  * cannot allocate more than "desc" buffers.
1100                  */
1101                 if (wq_attr.ibv.max_wr != (wqe_n >> rxq_data->sges_n) ||
1102                     wq_attr.ibv.max_sge != (1u << rxq_data->sges_n)) {
1103                         DRV_LOG(ERR,
1104                                 "port %u Rx queue %u requested %u*%u but got"
1105                                 " %u*%u WRs*SGEs",
1106                                 dev->data->port_id, idx,
1107                                 wqe_n >> rxq_data->sges_n,
1108                                 (1 << rxq_data->sges_n),
1109                                 wq_attr.ibv.max_wr, wq_attr.ibv.max_sge);
1110                         claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
1111                         rxq_obj->wq = NULL;
1112                         rte_errno = EINVAL;
1113                 }
1114         }
1115         return rxq_obj->wq;
1116 }
1117
1118 /**
1119  * Fill common fields of create RQ attributes structure.
1120  *
1121  * @param rxq_data
1122  *   Pointer to Rx queue data.
1123  * @param cqn
1124  *   CQ number to use with this RQ.
1125  * @param rq_attr
1126  *   RQ attributes structure to fill..
1127  */
1128 static void
1129 mlx5_devx_create_rq_attr_fill(struct mlx5_rxq_data *rxq_data, uint32_t cqn,
1130                               struct mlx5_devx_create_rq_attr *rq_attr)
1131 {
1132         rq_attr->state = MLX5_RQC_STATE_RST;
1133         rq_attr->vsd = (rxq_data->vlan_strip) ? 0 : 1;
1134         rq_attr->cqn = cqn;
1135         rq_attr->scatter_fcs = (rxq_data->crc_present) ? 1 : 0;
1136 }
1137
1138 /**
1139  * Fill common fields of DevX WQ attributes structure.
1140  *
1141  * @param priv
1142  *   Pointer to device private data.
1143  * @param rxq_ctrl
1144  *   Pointer to Rx queue control structure.
1145  * @param wq_attr
1146  *   WQ attributes structure to fill..
1147  */
1148 static void
1149 mlx5_devx_wq_attr_fill(struct mlx5_priv *priv, struct mlx5_rxq_ctrl *rxq_ctrl,
1150                        struct mlx5_devx_wq_attr *wq_attr)
1151 {
1152         wq_attr->end_padding_mode = priv->config.cqe_pad ?
1153                                         MLX5_WQ_END_PAD_MODE_ALIGN :
1154                                         MLX5_WQ_END_PAD_MODE_NONE;
1155         wq_attr->pd = priv->sh->pdn;
1156         wq_attr->dbr_addr = rxq_ctrl->dbr_offset;
1157         wq_attr->dbr_umem_id = rxq_ctrl->dbr_umem_id;
1158         wq_attr->dbr_umem_valid = 1;
1159         wq_attr->wq_umem_id = rxq_ctrl->wq_umem->umem_id;
1160         wq_attr->wq_umem_valid = 1;
1161 }
1162
1163 /**
1164  * Create a RQ object using DevX.
1165  *
1166  * @param dev
1167  *   Pointer to Ethernet device.
1168  * @param idx
1169  *   Queue index in DPDK Rx queue array
1170  * @param cqn
1171  *   CQ number to use with this RQ.
1172  *
1173  * @return
1174  *   The DevX object initialised, NULL otherwise and rte_errno is set.
1175  */
1176 static struct mlx5_devx_obj *
1177 mlx5_devx_rq_new(struct rte_eth_dev *dev, uint16_t idx, uint32_t cqn)
1178 {
1179         struct mlx5_priv *priv = dev->data->dev_private;
1180         struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1181         struct mlx5_rxq_ctrl *rxq_ctrl =
1182                 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1183         struct mlx5_devx_create_rq_attr rq_attr;
1184         uint32_t wqe_n = 1 << (rxq_data->elts_n - rxq_data->sges_n);
1185         uint32_t wq_size = 0;
1186         uint32_t wqe_size = 0;
1187         uint32_t log_wqe_size = 0;
1188         void *buf = NULL;
1189         struct mlx5_devx_obj *rq;
1190
1191         memset(&rq_attr, 0, sizeof(rq_attr));
1192         /* Fill RQ attributes. */
1193         rq_attr.mem_rq_type = MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE;
1194         rq_attr.flush_in_error_en = 1;
1195         mlx5_devx_create_rq_attr_fill(rxq_data, cqn, &rq_attr);
1196         /* Fill WQ attributes for this RQ. */
1197         if (mlx5_rxq_mprq_enabled(rxq_data)) {
1198                 rq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ;
1199                 /*
1200                  * Number of strides in each WQE:
1201                  * 512*2^single_wqe_log_num_of_strides.
1202                  */
1203                 rq_attr.wq_attr.single_wqe_log_num_of_strides =
1204                                 rxq_data->strd_num_n -
1205                                 MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
1206                 /* Stride size = (2^single_stride_log_num_of_bytes)*64B. */
1207                 rq_attr.wq_attr.single_stride_log_num_of_bytes =
1208                                 rxq_data->strd_sz_n -
1209                                 MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES;
1210                 wqe_size = sizeof(struct mlx5_wqe_mprq);
1211         } else {
1212                 rq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC;
1213                 wqe_size = sizeof(struct mlx5_wqe_data_seg);
1214         }
1215         log_wqe_size = log2above(wqe_size) + rxq_data->sges_n;
1216         rq_attr.wq_attr.log_wq_stride = log_wqe_size;
1217         rq_attr.wq_attr.log_wq_sz = rxq_data->elts_n - rxq_data->sges_n;
1218         /* Calculate and allocate WQ memory space. */
1219         wqe_size = 1 << log_wqe_size; /* round up power of two.*/
1220         wq_size = wqe_n * wqe_size;
1221         buf = rte_calloc_socket(__func__, 1, wq_size, MLX5_WQE_BUF_ALIGNMENT,
1222                                 rxq_ctrl->socket);
1223         if (!buf)
1224                 return NULL;
1225         rxq_data->wqes = buf;
1226         rxq_ctrl->wq_umem = mlx5_glue->devx_umem_reg(priv->sh->ctx,
1227                                                      buf, wq_size, 0);
1228         if (!rxq_ctrl->wq_umem) {
1229                 rte_free(buf);
1230                 return NULL;
1231         }
1232         mlx5_devx_wq_attr_fill(priv, rxq_ctrl, &rq_attr.wq_attr);
1233         rq = mlx5_devx_cmd_create_rq(priv->sh->ctx, &rq_attr, rxq_ctrl->socket);
1234         if (!rq)
1235                 rxq_release_rq_resources(rxq_ctrl);
1236         return rq;
1237 }
1238
1239 /**
1240  * Create the Rx hairpin queue object.
1241  *
1242  * @param dev
1243  *   Pointer to Ethernet device.
1244  * @param idx
1245  *   Queue index in DPDK Rx queue array
1246  *
1247  * @return
1248  *   The hairpin DevX object initialised, NULL otherwise and rte_errno is set.
1249  */
1250 static struct mlx5_rxq_obj *
1251 mlx5_rxq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx)
1252 {
1253         struct mlx5_priv *priv = dev->data->dev_private;
1254         struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1255         struct mlx5_rxq_ctrl *rxq_ctrl =
1256                 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1257         struct mlx5_devx_create_rq_attr attr = { 0 };
1258         struct mlx5_rxq_obj *tmpl = NULL;
1259         int ret = 0;
1260
1261         assert(rxq_data);
1262         assert(!rxq_ctrl->obj);
1263         tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
1264                                  rxq_ctrl->socket);
1265         if (!tmpl) {
1266                 DRV_LOG(ERR,
1267                         "port %u Rx queue %u cannot allocate verbs resources",
1268                         dev->data->port_id, rxq_data->idx);
1269                 rte_errno = ENOMEM;
1270                 goto error;
1271         }
1272         tmpl->type = MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN;
1273         tmpl->rxq_ctrl = rxq_ctrl;
1274         attr.hairpin = 1;
1275         /* Workaround for hairpin startup */
1276         attr.wq_attr.log_hairpin_num_packets = log2above(32);
1277         /* Workaround for packets larger than 1KB */
1278         attr.wq_attr.log_hairpin_data_sz =
1279                         priv->config.hca_attr.log_max_hairpin_wq_data_sz;
1280         tmpl->rq = mlx5_devx_cmd_create_rq(priv->sh->ctx, &attr,
1281                                            rxq_ctrl->socket);
1282         if (!tmpl->rq) {
1283                 DRV_LOG(ERR,
1284                         "port %u Rx hairpin queue %u can't create rq object",
1285                         dev->data->port_id, idx);
1286                 rte_errno = errno;
1287                 goto error;
1288         }
1289         DRV_LOG(DEBUG, "port %u rxq %u updated with %p", dev->data->port_id,
1290                 idx, (void *)&tmpl);
1291         rte_atomic32_inc(&tmpl->refcnt);
1292         LIST_INSERT_HEAD(&priv->rxqsobj, tmpl, next);
1293         priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
1294         return tmpl;
1295 error:
1296         ret = rte_errno; /* Save rte_errno before cleanup. */
1297         if (tmpl->rq)
1298                 mlx5_devx_cmd_destroy(tmpl->rq);
1299         rte_errno = ret; /* Restore rte_errno. */
1300         return NULL;
1301 }
1302
1303 /**
1304  * Create the Rx queue Verbs/DevX object.
1305  *
1306  * @param dev
1307  *   Pointer to Ethernet device.
1308  * @param idx
1309  *   Queue index in DPDK Rx queue array
1310  * @param type
1311  *   Type of Rx queue object to create.
1312  *
1313  * @return
1314  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
1315  */
1316 struct mlx5_rxq_obj *
1317 mlx5_rxq_obj_new(struct rte_eth_dev *dev, uint16_t idx,
1318                  enum mlx5_rxq_obj_type type)
1319 {
1320         struct mlx5_priv *priv = dev->data->dev_private;
1321         struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1322         struct mlx5_rxq_ctrl *rxq_ctrl =
1323                 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1324         struct ibv_wq_attr mod;
1325         unsigned int cqe_n;
1326         unsigned int wqe_n = 1 << rxq_data->elts_n;
1327         struct mlx5_rxq_obj *tmpl = NULL;
1328         struct mlx5dv_cq cq_info;
1329         struct mlx5dv_rwq rwq;
1330         int ret = 0;
1331         struct mlx5dv_obj obj;
1332
1333         assert(rxq_data);
1334         assert(!rxq_ctrl->obj);
1335         if (type == MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN)
1336                 return mlx5_rxq_obj_hairpin_new(dev, idx);
1337         priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE;
1338         priv->verbs_alloc_ctx.obj = rxq_ctrl;
1339         tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
1340                                  rxq_ctrl->socket);
1341         if (!tmpl) {
1342                 DRV_LOG(ERR,
1343                         "port %u Rx queue %u cannot allocate verbs resources",
1344                         dev->data->port_id, rxq_data->idx);
1345                 rte_errno = ENOMEM;
1346                 goto error;
1347         }
1348         tmpl->type = type;
1349         tmpl->rxq_ctrl = rxq_ctrl;
1350         if (rxq_ctrl->irq) {
1351                 tmpl->channel = mlx5_glue->create_comp_channel(priv->sh->ctx);
1352                 if (!tmpl->channel) {
1353                         DRV_LOG(ERR, "port %u: comp channel creation failure",
1354                                 dev->data->port_id);
1355                         rte_errno = ENOMEM;
1356                         goto error;
1357                 }
1358         }
1359         if (mlx5_rxq_mprq_enabled(rxq_data))
1360                 cqe_n = wqe_n * (1 << rxq_data->strd_num_n) - 1;
1361         else
1362                 cqe_n = wqe_n  - 1;
1363         tmpl->cq = mlx5_ibv_cq_new(dev, priv, rxq_data, cqe_n, tmpl);
1364         if (!tmpl->cq) {
1365                 DRV_LOG(ERR, "port %u Rx queue %u CQ creation failure",
1366                         dev->data->port_id, idx);
1367                 rte_errno = ENOMEM;
1368                 goto error;
1369         }
1370         obj.cq.in = tmpl->cq;
1371         obj.cq.out = &cq_info;
1372         ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ);
1373         if (ret) {
1374                 rte_errno = ret;
1375                 goto error;
1376         }
1377         if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
1378                 DRV_LOG(ERR,
1379                         "port %u wrong MLX5_CQE_SIZE environment variable"
1380                         " value: it should be set to %u",
1381                         dev->data->port_id, RTE_CACHE_LINE_SIZE);
1382                 rte_errno = EINVAL;
1383                 goto error;
1384         }
1385         DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
1386                 dev->data->port_id, priv->sh->device_attr.orig_attr.max_qp_wr);
1387         DRV_LOG(DEBUG, "port %u device_attr.max_sge is %d",
1388                 dev->data->port_id, priv->sh->device_attr.orig_attr.max_sge);
1389         /* Allocate door-bell for types created with DevX. */
1390         if (tmpl->type != MLX5_RXQ_OBJ_TYPE_IBV) {
1391                 struct mlx5_devx_dbr_page *dbr_page;
1392                 int64_t dbr_offset;
1393
1394                 dbr_offset = mlx5_get_dbr(dev, &dbr_page);
1395                 if (dbr_offset < 0)
1396                         goto error;
1397                 rxq_ctrl->dbr_offset = dbr_offset;
1398                 rxq_ctrl->dbr_umem_id = dbr_page->umem->umem_id;
1399                 rxq_ctrl->dbr_umem_id_valid = 1;
1400                 rxq_data->rq_db = (uint32_t *)((uintptr_t)dbr_page->dbrs +
1401                                                (uintptr_t)rxq_ctrl->dbr_offset);
1402         }
1403         if (tmpl->type == MLX5_RXQ_OBJ_TYPE_IBV) {
1404                 tmpl->wq = mlx5_ibv_wq_new(dev, priv, rxq_data, idx, wqe_n,
1405                                            tmpl);
1406                 if (!tmpl->wq) {
1407                         DRV_LOG(ERR, "port %u Rx queue %u WQ creation failure",
1408                                 dev->data->port_id, idx);
1409                         rte_errno = ENOMEM;
1410                         goto error;
1411                 }
1412                 /* Change queue state to ready. */
1413                 mod = (struct ibv_wq_attr){
1414                         .attr_mask = IBV_WQ_ATTR_STATE,
1415                         .wq_state = IBV_WQS_RDY,
1416                 };
1417                 ret = mlx5_glue->modify_wq(tmpl->wq, &mod);
1418                 if (ret) {
1419                         DRV_LOG(ERR,
1420                                 "port %u Rx queue %u WQ state to IBV_WQS_RDY"
1421                                 " failed", dev->data->port_id, idx);
1422                         rte_errno = ret;
1423                         goto error;
1424                 }
1425                 obj.rwq.in = tmpl->wq;
1426                 obj.rwq.out = &rwq;
1427                 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_RWQ);
1428                 if (ret) {
1429                         rte_errno = ret;
1430                         goto error;
1431                 }
1432                 rxq_data->wqes = rwq.buf;
1433                 rxq_data->rq_db = rwq.dbrec;
1434         } else if (tmpl->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ) {
1435                 struct mlx5_devx_modify_rq_attr rq_attr;
1436
1437                 memset(&rq_attr, 0, sizeof(rq_attr));
1438                 tmpl->rq = mlx5_devx_rq_new(dev, idx, cq_info.cqn);
1439                 if (!tmpl->rq) {
1440                         DRV_LOG(ERR, "port %u Rx queue %u RQ creation failure",
1441                                 dev->data->port_id, idx);
1442                         rte_errno = ENOMEM;
1443                         goto error;
1444                 }
1445                 /* Change queue state to ready. */
1446                 rq_attr.rq_state = MLX5_RQC_STATE_RST;
1447                 rq_attr.state = MLX5_RQC_STATE_RDY;
1448                 ret = mlx5_devx_cmd_modify_rq(tmpl->rq, &rq_attr);
1449                 if (ret)
1450                         goto error;
1451         }
1452         /* Fill the rings. */
1453         rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
1454         rxq_data->cq_db = cq_info.dbrec;
1455         rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
1456         rxq_data->cq_uar = cq_info.cq_uar;
1457         rxq_data->cqn = cq_info.cqn;
1458         rxq_data->cq_arm_sn = 0;
1459         mlx5_rxq_initialize(rxq_data);
1460         rxq_data->cq_ci = 0;
1461         DRV_LOG(DEBUG, "port %u rxq %u updated with %p", dev->data->port_id,
1462                 idx, (void *)&tmpl);
1463         rte_atomic32_inc(&tmpl->refcnt);
1464         LIST_INSERT_HEAD(&priv->rxqsobj, tmpl, next);
1465         priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
1466         return tmpl;
1467 error:
1468         if (tmpl) {
1469                 ret = rte_errno; /* Save rte_errno before cleanup. */
1470                 if (tmpl->type == MLX5_RXQ_OBJ_TYPE_IBV && tmpl->wq)
1471                         claim_zero(mlx5_glue->destroy_wq(tmpl->wq));
1472                 else if (tmpl->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ && tmpl->rq)
1473                         claim_zero(mlx5_devx_cmd_destroy(tmpl->rq));
1474                 if (tmpl->cq)
1475                         claim_zero(mlx5_glue->destroy_cq(tmpl->cq));
1476                 if (tmpl->channel)
1477                         claim_zero(mlx5_glue->destroy_comp_channel
1478                                                         (tmpl->channel));
1479                 rte_free(tmpl);
1480                 rte_errno = ret; /* Restore rte_errno. */
1481         }
1482         if (type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ)
1483                 rxq_release_rq_resources(rxq_ctrl);
1484         priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
1485         return NULL;
1486 }
1487
1488 /**
1489  * Verify the Rx queue objects list is empty
1490  *
1491  * @param dev
1492  *   Pointer to Ethernet device.
1493  *
1494  * @return
1495  *   The number of objects not released.
1496  */
1497 int
1498 mlx5_rxq_obj_verify(struct rte_eth_dev *dev)
1499 {
1500         struct mlx5_priv *priv = dev->data->dev_private;
1501         int ret = 0;
1502         struct mlx5_rxq_obj *rxq_obj;
1503
1504         LIST_FOREACH(rxq_obj, &priv->rxqsobj, next) {
1505                 DRV_LOG(DEBUG, "port %u Rx queue %u still referenced",
1506                         dev->data->port_id, rxq_obj->rxq_ctrl->rxq.idx);
1507                 ++ret;
1508         }
1509         return ret;
1510 }
1511
1512 /**
1513  * Callback function to initialize mbufs for Multi-Packet RQ.
1514  */
1515 static inline void
1516 mlx5_mprq_buf_init(struct rte_mempool *mp, void *opaque_arg,
1517                     void *_m, unsigned int i __rte_unused)
1518 {
1519         struct mlx5_mprq_buf *buf = _m;
1520         struct rte_mbuf_ext_shared_info *shinfo;
1521         unsigned int strd_n = (unsigned int)(uintptr_t)opaque_arg;
1522         unsigned int j;
1523
1524         memset(_m, 0, sizeof(*buf));
1525         buf->mp = mp;
1526         rte_atomic16_set(&buf->refcnt, 1);
1527         for (j = 0; j != strd_n; ++j) {
1528                 shinfo = &buf->shinfos[j];
1529                 shinfo->free_cb = mlx5_mprq_buf_free_cb;
1530                 shinfo->fcb_opaque = buf;
1531         }
1532 }
1533
1534 /**
1535  * Free mempool of Multi-Packet RQ.
1536  *
1537  * @param dev
1538  *   Pointer to Ethernet device.
1539  *
1540  * @return
1541  *   0 on success, negative errno value on failure.
1542  */
1543 int
1544 mlx5_mprq_free_mp(struct rte_eth_dev *dev)
1545 {
1546         struct mlx5_priv *priv = dev->data->dev_private;
1547         struct rte_mempool *mp = priv->mprq_mp;
1548         unsigned int i;
1549
1550         if (mp == NULL)
1551                 return 0;
1552         DRV_LOG(DEBUG, "port %u freeing mempool (%s) for Multi-Packet RQ",
1553                 dev->data->port_id, mp->name);
1554         /*
1555          * If a buffer in the pool has been externally attached to a mbuf and it
1556          * is still in use by application, destroying the Rx queue can spoil
1557          * the packet. It is unlikely to happen but if application dynamically
1558          * creates and destroys with holding Rx packets, this can happen.
1559          *
1560          * TODO: It is unavoidable for now because the mempool for Multi-Packet
1561          * RQ isn't provided by application but managed by PMD.
1562          */
1563         if (!rte_mempool_full(mp)) {
1564                 DRV_LOG(ERR,
1565                         "port %u mempool for Multi-Packet RQ is still in use",
1566                         dev->data->port_id);
1567                 rte_errno = EBUSY;
1568                 return -rte_errno;
1569         }
1570         rte_mempool_free(mp);
1571         /* Unset mempool for each Rx queue. */
1572         for (i = 0; i != priv->rxqs_n; ++i) {
1573                 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
1574
1575                 if (rxq == NULL)
1576                         continue;
1577                 rxq->mprq_mp = NULL;
1578         }
1579         priv->mprq_mp = NULL;
1580         return 0;
1581 }
1582
1583 /**
1584  * Allocate a mempool for Multi-Packet RQ. All configured Rx queues share the
1585  * mempool. If already allocated, reuse it if there're enough elements.
1586  * Otherwise, resize it.
1587  *
1588  * @param dev
1589  *   Pointer to Ethernet device.
1590  *
1591  * @return
1592  *   0 on success, negative errno value on failure.
1593  */
1594 int
1595 mlx5_mprq_alloc_mp(struct rte_eth_dev *dev)
1596 {
1597         struct mlx5_priv *priv = dev->data->dev_private;
1598         struct rte_mempool *mp = priv->mprq_mp;
1599         char name[RTE_MEMPOOL_NAMESIZE];
1600         unsigned int desc = 0;
1601         unsigned int buf_len;
1602         unsigned int obj_num;
1603         unsigned int obj_size;
1604         unsigned int strd_num_n = 0;
1605         unsigned int strd_sz_n = 0;
1606         unsigned int i;
1607         unsigned int n_ibv = 0;
1608
1609         if (!mlx5_mprq_enabled(dev))
1610                 return 0;
1611         /* Count the total number of descriptors configured. */
1612         for (i = 0; i != priv->rxqs_n; ++i) {
1613                 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
1614                 struct mlx5_rxq_ctrl *rxq_ctrl = container_of
1615                         (rxq, struct mlx5_rxq_ctrl, rxq);
1616
1617                 if (rxq == NULL || rxq_ctrl->type != MLX5_RXQ_TYPE_STANDARD)
1618                         continue;
1619                 n_ibv++;
1620                 desc += 1 << rxq->elts_n;
1621                 /* Get the max number of strides. */
1622                 if (strd_num_n < rxq->strd_num_n)
1623                         strd_num_n = rxq->strd_num_n;
1624                 /* Get the max size of a stride. */
1625                 if (strd_sz_n < rxq->strd_sz_n)
1626                         strd_sz_n = rxq->strd_sz_n;
1627         }
1628         assert(strd_num_n && strd_sz_n);
1629         buf_len = (1 << strd_num_n) * (1 << strd_sz_n);
1630         obj_size = sizeof(struct mlx5_mprq_buf) + buf_len + (1 << strd_num_n) *
1631                 sizeof(struct rte_mbuf_ext_shared_info) + RTE_PKTMBUF_HEADROOM;
1632         /*
1633          * Received packets can be either memcpy'd or externally referenced. In
1634          * case that the packet is attached to an mbuf as an external buffer, as
1635          * it isn't possible to predict how the buffers will be queued by
1636          * application, there's no option to exactly pre-allocate needed buffers
1637          * in advance but to speculatively prepares enough buffers.
1638          *
1639          * In the data path, if this Mempool is depleted, PMD will try to memcpy
1640          * received packets to buffers provided by application (rxq->mp) until
1641          * this Mempool gets available again.
1642          */
1643         desc *= 4;
1644         obj_num = desc + MLX5_MPRQ_MP_CACHE_SZ * n_ibv;
1645         /*
1646          * rte_mempool_create_empty() has sanity check to refuse large cache
1647          * size compared to the number of elements.
1648          * CACHE_FLUSHTHRESH_MULTIPLIER is defined in a C file, so using a
1649          * constant number 2 instead.
1650          */
1651         obj_num = RTE_MAX(obj_num, MLX5_MPRQ_MP_CACHE_SZ * 2);
1652         /* Check a mempool is already allocated and if it can be resued. */
1653         if (mp != NULL && mp->elt_size >= obj_size && mp->size >= obj_num) {
1654                 DRV_LOG(DEBUG, "port %u mempool %s is being reused",
1655                         dev->data->port_id, mp->name);
1656                 /* Reuse. */
1657                 goto exit;
1658         } else if (mp != NULL) {
1659                 DRV_LOG(DEBUG, "port %u mempool %s should be resized, freeing it",
1660                         dev->data->port_id, mp->name);
1661                 /*
1662                  * If failed to free, which means it may be still in use, no way
1663                  * but to keep using the existing one. On buffer underrun,
1664                  * packets will be memcpy'd instead of external buffer
1665                  * attachment.
1666                  */
1667                 if (mlx5_mprq_free_mp(dev)) {
1668                         if (mp->elt_size >= obj_size)
1669                                 goto exit;
1670                         else
1671                                 return -rte_errno;
1672                 }
1673         }
1674         snprintf(name, sizeof(name), "port-%u-mprq", dev->data->port_id);
1675         mp = rte_mempool_create(name, obj_num, obj_size, MLX5_MPRQ_MP_CACHE_SZ,
1676                                 0, NULL, NULL, mlx5_mprq_buf_init,
1677                                 (void *)(uintptr_t)(1 << strd_num_n),
1678                                 dev->device->numa_node, 0);
1679         if (mp == NULL) {
1680                 DRV_LOG(ERR,
1681                         "port %u failed to allocate a mempool for"
1682                         " Multi-Packet RQ, count=%u, size=%u",
1683                         dev->data->port_id, obj_num, obj_size);
1684                 rte_errno = ENOMEM;
1685                 return -rte_errno;
1686         }
1687         priv->mprq_mp = mp;
1688 exit:
1689         /* Set mempool for each Rx queue. */
1690         for (i = 0; i != priv->rxqs_n; ++i) {
1691                 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
1692                 struct mlx5_rxq_ctrl *rxq_ctrl = container_of
1693                         (rxq, struct mlx5_rxq_ctrl, rxq);
1694
1695                 if (rxq == NULL || rxq_ctrl->type != MLX5_RXQ_TYPE_STANDARD)
1696                         continue;
1697                 rxq->mprq_mp = mp;
1698         }
1699         DRV_LOG(INFO, "port %u Multi-Packet RQ is configured",
1700                 dev->data->port_id);
1701         return 0;
1702 }
1703
1704 #define MLX5_MAX_LRO_SIZE (UINT8_MAX * 256u)
1705 #define MLX5_MAX_TCP_HDR_OFFSET ((unsigned int)(sizeof(struct rte_ether_hdr) + \
1706                                         sizeof(struct rte_vlan_hdr) * 2 + \
1707                                         sizeof(struct rte_ipv6_hdr)))
1708 #define MAX_TCP_OPTION_SIZE 40u
1709 #define MLX5_MAX_LRO_HEADER_FIX ((unsigned int)(MLX5_MAX_TCP_HDR_OFFSET + \
1710                                  sizeof(struct rte_tcp_hdr) + \
1711                                  MAX_TCP_OPTION_SIZE))
1712
1713 /**
1714  * Adjust the maximum LRO massage size.
1715  *
1716  * @param dev
1717  *   Pointer to Ethernet device.
1718  * @param max_lro_size
1719  *   The maximum size for LRO packet.
1720  */
1721 static void
1722 mlx5_max_lro_msg_size_adjust(struct rte_eth_dev *dev, uint32_t max_lro_size)
1723 {
1724         struct mlx5_priv *priv = dev->data->dev_private;
1725
1726         if (priv->config.hca_attr.lro_max_msg_sz_mode ==
1727             MLX5_LRO_MAX_MSG_SIZE_START_FROM_L4 && max_lro_size >
1728             MLX5_MAX_TCP_HDR_OFFSET)
1729                 max_lro_size -= MLX5_MAX_TCP_HDR_OFFSET;
1730         max_lro_size = RTE_MIN(max_lro_size, MLX5_MAX_LRO_SIZE);
1731         assert(max_lro_size >= 256u);
1732         max_lro_size /= 256u;
1733         if (priv->max_lro_msg_size)
1734                 priv->max_lro_msg_size =
1735                         RTE_MIN((uint32_t)priv->max_lro_msg_size, max_lro_size);
1736         else
1737                 priv->max_lro_msg_size = max_lro_size;
1738 }
1739
1740 /**
1741  * Create a DPDK Rx queue.
1742  *
1743  * @param dev
1744  *   Pointer to Ethernet device.
1745  * @param idx
1746  *   RX queue index.
1747  * @param desc
1748  *   Number of descriptors to configure in queue.
1749  * @param socket
1750  *   NUMA socket on which memory must be allocated.
1751  *
1752  * @return
1753  *   A DPDK queue object on success, NULL otherwise and rte_errno is set.
1754  */
1755 struct mlx5_rxq_ctrl *
1756 mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
1757              unsigned int socket, const struct rte_eth_rxconf *conf,
1758              struct rte_mempool *mp)
1759 {
1760         struct mlx5_priv *priv = dev->data->dev_private;
1761         struct mlx5_rxq_ctrl *tmpl;
1762         unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
1763         unsigned int mprq_stride_size;
1764         struct mlx5_dev_config *config = &priv->config;
1765         unsigned int strd_headroom_en;
1766         /*
1767          * Always allocate extra slots, even if eventually
1768          * the vector Rx will not be used.
1769          */
1770         uint16_t desc_n =
1771                 desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
1772         uint64_t offloads = conf->offloads |
1773                            dev->data->dev_conf.rxmode.offloads;
1774         unsigned int lro_on_queue = !!(offloads & DEV_RX_OFFLOAD_TCP_LRO);
1775         const int mprq_en = mlx5_check_mprq_support(dev) > 0;
1776         unsigned int max_rx_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len;
1777         unsigned int non_scatter_min_mbuf_size = max_rx_pkt_len +
1778                                                         RTE_PKTMBUF_HEADROOM;
1779         unsigned int max_lro_size = 0;
1780         unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM;
1781
1782         if (non_scatter_min_mbuf_size > mb_len && !(offloads &
1783                                                     DEV_RX_OFFLOAD_SCATTER)) {
1784                 DRV_LOG(ERR, "port %u Rx queue %u: Scatter offload is not"
1785                         " configured and no enough mbuf space(%u) to contain "
1786                         "the maximum RX packet length(%u) with head-room(%u)",
1787                         dev->data->port_id, idx, mb_len, max_rx_pkt_len,
1788                         RTE_PKTMBUF_HEADROOM);
1789                 rte_errno = ENOSPC;
1790                 return NULL;
1791         }
1792         tmpl = rte_calloc_socket("RXQ", 1,
1793                                  sizeof(*tmpl) +
1794                                  desc_n * sizeof(struct rte_mbuf *),
1795                                  0, socket);
1796         if (!tmpl) {
1797                 rte_errno = ENOMEM;
1798                 return NULL;
1799         }
1800         tmpl->type = MLX5_RXQ_TYPE_STANDARD;
1801         if (mlx5_mr_btree_init(&tmpl->rxq.mr_ctrl.cache_bh,
1802                                MLX5_MR_BTREE_CACHE_N, socket)) {
1803                 /* rte_errno is already set. */
1804                 goto error;
1805         }
1806         tmpl->socket = socket;
1807         if (dev->data->dev_conf.intr_conf.rxq)
1808                 tmpl->irq = 1;
1809         /*
1810          * LRO packet may consume all the stride memory, hence we cannot
1811          * guaranty head-room near the packet memory in the stride.
1812          * In this case scatter is, for sure, enabled and an empty mbuf may be
1813          * added in the start for the head-room.
1814          */
1815         if (lro_on_queue && RTE_PKTMBUF_HEADROOM > 0 &&
1816             non_scatter_min_mbuf_size > mb_len) {
1817                 strd_headroom_en = 0;
1818                 mprq_stride_size = RTE_MIN(max_rx_pkt_len,
1819                                         1u << config->mprq.max_stride_size_n);
1820         } else {
1821                 strd_headroom_en = 1;
1822                 mprq_stride_size = non_scatter_min_mbuf_size;
1823         }
1824         /*
1825          * This Rx queue can be configured as a Multi-Packet RQ if all of the
1826          * following conditions are met:
1827          *  - MPRQ is enabled.
1828          *  - The number of descs is more than the number of strides.
1829          *  - max_rx_pkt_len plus overhead is less than the max size of a
1830          *    stride.
1831          *  Otherwise, enable Rx scatter if necessary.
1832          */
1833         if (mprq_en &&
1834             desc > (1U << config->mprq.stride_num_n) &&
1835             mprq_stride_size <= (1U << config->mprq.max_stride_size_n)) {
1836                 /* TODO: Rx scatter isn't supported yet. */
1837                 tmpl->rxq.sges_n = 0;
1838                 /* Trim the number of descs needed. */
1839                 desc >>= config->mprq.stride_num_n;
1840                 tmpl->rxq.strd_num_n = config->mprq.stride_num_n;
1841                 tmpl->rxq.strd_sz_n = RTE_MAX(log2above(mprq_stride_size),
1842                                               config->mprq.min_stride_size_n);
1843                 tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT;
1844                 tmpl->rxq.strd_headroom_en = strd_headroom_en;
1845                 tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size,
1846                                 config->mprq.max_memcpy_len);
1847                 max_lro_size = RTE_MIN(max_rx_pkt_len,
1848                                        (1u << tmpl->rxq.strd_num_n) *
1849                                        (1u << tmpl->rxq.strd_sz_n));
1850                 DRV_LOG(DEBUG,
1851                         "port %u Rx queue %u: Multi-Packet RQ is enabled"
1852                         " strd_num_n = %u, strd_sz_n = %u",
1853                         dev->data->port_id, idx,
1854                         tmpl->rxq.strd_num_n, tmpl->rxq.strd_sz_n);
1855         } else if (max_rx_pkt_len <= first_mb_free_size) {
1856                 tmpl->rxq.sges_n = 0;
1857                 max_lro_size = max_rx_pkt_len;
1858         } else if (offloads & DEV_RX_OFFLOAD_SCATTER) {
1859                 unsigned int size = non_scatter_min_mbuf_size;
1860                 unsigned int sges_n;
1861
1862                 if (lro_on_queue && first_mb_free_size <
1863                     MLX5_MAX_LRO_HEADER_FIX) {
1864                         DRV_LOG(ERR, "Not enough space in the first segment(%u)"
1865                                 " to include the max header size(%u) for LRO",
1866                                 first_mb_free_size, MLX5_MAX_LRO_HEADER_FIX);
1867                         rte_errno = ENOTSUP;
1868                         goto error;
1869                 }
1870                 /*
1871                  * Determine the number of SGEs needed for a full packet
1872                  * and round it to the next power of two.
1873                  */
1874                 sges_n = log2above((size / mb_len) + !!(size % mb_len));
1875                 if (sges_n > MLX5_MAX_LOG_RQ_SEGS) {
1876                         DRV_LOG(ERR,
1877                                 "port %u too many SGEs (%u) needed to handle"
1878                                 " requested maximum packet size %u, the maximum"
1879                                 " supported are %u", dev->data->port_id,
1880                                 1 << sges_n, max_rx_pkt_len,
1881                                 1u << MLX5_MAX_LOG_RQ_SEGS);
1882                         rte_errno = ENOTSUP;
1883                         goto error;
1884                 }
1885                 tmpl->rxq.sges_n = sges_n;
1886                 max_lro_size = max_rx_pkt_len;
1887         }
1888         if (mprq_en && !mlx5_rxq_mprq_enabled(&tmpl->rxq))
1889                 DRV_LOG(WARNING,
1890                         "port %u MPRQ is requested but cannot be enabled"
1891                         " (requested: desc = %u, stride_sz = %u,"
1892                         " supported: min_stride_num = %u, max_stride_sz = %u).",
1893                         dev->data->port_id, desc, mprq_stride_size,
1894                         (1 << config->mprq.stride_num_n),
1895                         (1 << config->mprq.max_stride_size_n));
1896         DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u",
1897                 dev->data->port_id, 1 << tmpl->rxq.sges_n);
1898         if (desc % (1 << tmpl->rxq.sges_n)) {
1899                 DRV_LOG(ERR,
1900                         "port %u number of Rx queue descriptors (%u) is not a"
1901                         " multiple of SGEs per packet (%u)",
1902                         dev->data->port_id,
1903                         desc,
1904                         1 << tmpl->rxq.sges_n);
1905                 rte_errno = EINVAL;
1906                 goto error;
1907         }
1908         mlx5_max_lro_msg_size_adjust(dev, max_lro_size);
1909         /* Toggle RX checksum offload if hardware supports it. */
1910         tmpl->rxq.csum = !!(offloads & DEV_RX_OFFLOAD_CHECKSUM);
1911         tmpl->rxq.hw_timestamp = !!(offloads & DEV_RX_OFFLOAD_TIMESTAMP);
1912         /* Configure VLAN stripping. */
1913         tmpl->rxq.vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
1914         /* By default, FCS (CRC) is stripped by hardware. */
1915         tmpl->rxq.crc_present = 0;
1916         tmpl->rxq.lro = lro_on_queue;
1917         if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) {
1918                 if (config->hw_fcs_strip) {
1919                         /*
1920                          * RQs used for LRO-enabled TIRs should not be
1921                          * configured to scatter the FCS.
1922                          */
1923                         if (lro_on_queue)
1924                                 DRV_LOG(WARNING,
1925                                         "port %u CRC stripping has been "
1926                                         "disabled but will still be performed "
1927                                         "by hardware, because LRO is enabled",
1928                                         dev->data->port_id);
1929                         else
1930                                 tmpl->rxq.crc_present = 1;
1931                 } else {
1932                         DRV_LOG(WARNING,
1933                                 "port %u CRC stripping has been disabled but will"
1934                                 " still be performed by hardware, make sure MLNX_OFED"
1935                                 " and firmware are up to date",
1936                                 dev->data->port_id);
1937                 }
1938         }
1939         DRV_LOG(DEBUG,
1940                 "port %u CRC stripping is %s, %u bytes will be subtracted from"
1941                 " incoming frames to hide it",
1942                 dev->data->port_id,
1943                 tmpl->rxq.crc_present ? "disabled" : "enabled",
1944                 tmpl->rxq.crc_present << 2);
1945         /* Save port ID. */
1946         tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf &&
1947                 (!!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS));
1948         tmpl->rxq.port_id = dev->data->port_id;
1949         tmpl->priv = priv;
1950         tmpl->rxq.mp = mp;
1951         tmpl->rxq.elts_n = log2above(desc);
1952         tmpl->rxq.rq_repl_thresh =
1953                 MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n);
1954         tmpl->rxq.elts =
1955                 (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
1956 #ifndef RTE_ARCH_64
1957         tmpl->rxq.uar_lock_cq = &priv->uar_lock_cq;
1958 #endif
1959         tmpl->rxq.idx = idx;
1960         rte_atomic32_inc(&tmpl->refcnt);
1961         LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
1962         return tmpl;
1963 error:
1964         rte_free(tmpl);
1965         return NULL;
1966 }
1967
1968 /**
1969  * Create a DPDK Rx hairpin queue.
1970  *
1971  * @param dev
1972  *   Pointer to Ethernet device.
1973  * @param idx
1974  *   RX queue index.
1975  * @param desc
1976  *   Number of descriptors to configure in queue.
1977  * @param hairpin_conf
1978  *   The hairpin binding configuration.
1979  *
1980  * @return
1981  *   A DPDK queue object on success, NULL otherwise and rte_errno is set.
1982  */
1983 struct mlx5_rxq_ctrl *
1984 mlx5_rxq_hairpin_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
1985                      const struct rte_eth_hairpin_conf *hairpin_conf)
1986 {
1987         struct mlx5_priv *priv = dev->data->dev_private;
1988         struct mlx5_rxq_ctrl *tmpl;
1989
1990         tmpl = rte_calloc_socket("RXQ", 1, sizeof(*tmpl), 0, SOCKET_ID_ANY);
1991         if (!tmpl) {
1992                 rte_errno = ENOMEM;
1993                 return NULL;
1994         }
1995         tmpl->type = MLX5_RXQ_TYPE_HAIRPIN;
1996         tmpl->socket = SOCKET_ID_ANY;
1997         tmpl->rxq.rss_hash = 0;
1998         tmpl->rxq.port_id = dev->data->port_id;
1999         tmpl->priv = priv;
2000         tmpl->rxq.mp = NULL;
2001         tmpl->rxq.elts_n = log2above(desc);
2002         tmpl->rxq.elts = NULL;
2003         tmpl->rxq.mr_ctrl.cache_bh = (struct mlx5_mr_btree) { 0 };
2004         tmpl->hairpin_conf = *hairpin_conf;
2005         tmpl->rxq.idx = idx;
2006         rte_atomic32_inc(&tmpl->refcnt);
2007         LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
2008         return tmpl;
2009 }
2010
2011 /**
2012  * Get a Rx queue.
2013  *
2014  * @param dev
2015  *   Pointer to Ethernet device.
2016  * @param idx
2017  *   RX queue index.
2018  *
2019  * @return
2020  *   A pointer to the queue if it exists, NULL otherwise.
2021  */
2022 struct mlx5_rxq_ctrl *
2023 mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx)
2024 {
2025         struct mlx5_priv *priv = dev->data->dev_private;
2026         struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
2027
2028         if ((*priv->rxqs)[idx]) {
2029                 rxq_ctrl = container_of((*priv->rxqs)[idx],
2030                                         struct mlx5_rxq_ctrl,
2031                                         rxq);
2032                 mlx5_rxq_obj_get(dev, idx);
2033                 rte_atomic32_inc(&rxq_ctrl->refcnt);
2034         }
2035         return rxq_ctrl;
2036 }
2037
2038 /**
2039  * Release a Rx queue.
2040  *
2041  * @param dev
2042  *   Pointer to Ethernet device.
2043  * @param idx
2044  *   RX queue index.
2045  *
2046  * @return
2047  *   1 while a reference on it exists, 0 when freed.
2048  */
2049 int
2050 mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx)
2051 {
2052         struct mlx5_priv *priv = dev->data->dev_private;
2053         struct mlx5_rxq_ctrl *rxq_ctrl;
2054
2055         if (!(*priv->rxqs)[idx])
2056                 return 0;
2057         rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
2058         assert(rxq_ctrl->priv);
2059         if (rxq_ctrl->obj && !mlx5_rxq_obj_release(rxq_ctrl->obj))
2060                 rxq_ctrl->obj = NULL;
2061         if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
2062                 if (rxq_ctrl->dbr_umem_id_valid)
2063                         claim_zero(mlx5_release_dbr(dev, rxq_ctrl->dbr_umem_id,
2064                                                     rxq_ctrl->dbr_offset));
2065                 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD)
2066                         mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh);
2067                 LIST_REMOVE(rxq_ctrl, next);
2068                 rte_free(rxq_ctrl);
2069                 (*priv->rxqs)[idx] = NULL;
2070                 return 0;
2071         }
2072         return 1;
2073 }
2074
2075 /**
2076  * Verify the Rx Queue list is empty
2077  *
2078  * @param dev
2079  *   Pointer to Ethernet device.
2080  *
2081  * @return
2082  *   The number of object not released.
2083  */
2084 int
2085 mlx5_rxq_verify(struct rte_eth_dev *dev)
2086 {
2087         struct mlx5_priv *priv = dev->data->dev_private;
2088         struct mlx5_rxq_ctrl *rxq_ctrl;
2089         int ret = 0;
2090
2091         LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
2092                 DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced",
2093                         dev->data->port_id, rxq_ctrl->rxq.idx);
2094                 ++ret;
2095         }
2096         return ret;
2097 }
2098
2099 /**
2100  * Create an indirection table.
2101  *
2102  * @param dev
2103  *   Pointer to Ethernet device.
2104  * @param queues
2105  *   Queues entering in the indirection table.
2106  * @param queues_n
2107  *   Number of queues in the array.
2108  *
2109  * @return
2110  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
2111  */
2112 static struct mlx5_ind_table_obj *
2113 mlx5_ind_table_obj_new(struct rte_eth_dev *dev, const uint16_t *queues,
2114                        uint32_t queues_n, enum mlx5_ind_tbl_type type)
2115 {
2116         struct mlx5_priv *priv = dev->data->dev_private;
2117         struct mlx5_ind_table_obj *ind_tbl;
2118         unsigned int i = 0, j = 0, k = 0;
2119
2120         ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) +
2121                              queues_n * sizeof(uint16_t), 0);
2122         if (!ind_tbl) {
2123                 rte_errno = ENOMEM;
2124                 return NULL;
2125         }
2126         ind_tbl->type = type;
2127         if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV) {
2128                 const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
2129                         log2above(queues_n) :
2130                         log2above(priv->config.ind_table_max_size);
2131                 struct ibv_wq *wq[1 << wq_n];
2132
2133                 for (i = 0; i != queues_n; ++i) {
2134                         struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev,
2135                                                                  queues[i]);
2136                         if (!rxq)
2137                                 goto error;
2138                         wq[i] = rxq->obj->wq;
2139                         ind_tbl->queues[i] = queues[i];
2140                 }
2141                 ind_tbl->queues_n = queues_n;
2142                 /* Finalise indirection table. */
2143                 k = i; /* Retain value of i for use in error case. */
2144                 for (j = 0; k != (unsigned int)(1 << wq_n); ++k, ++j)
2145                         wq[k] = wq[j];
2146                 ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table
2147                         (priv->sh->ctx,
2148                          &(struct ibv_rwq_ind_table_init_attr){
2149                                 .log_ind_tbl_size = wq_n,
2150                                 .ind_tbl = wq,
2151                                 .comp_mask = 0,
2152                         });
2153                 if (!ind_tbl->ind_table) {
2154                         rte_errno = errno;
2155                         goto error;
2156                 }
2157         } else { /* ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX */
2158                 struct mlx5_devx_rqt_attr *rqt_attr = NULL;
2159
2160                 rqt_attr = rte_calloc(__func__, 1, sizeof(*rqt_attr) +
2161                                       queues_n * sizeof(uint32_t), 0);
2162                 if (!rqt_attr) {
2163                         DRV_LOG(ERR, "port %u cannot allocate RQT resources",
2164                                 dev->data->port_id);
2165                         rte_errno = ENOMEM;
2166                         goto error;
2167                 }
2168                 rqt_attr->rqt_max_size = priv->config.ind_table_max_size;
2169                 rqt_attr->rqt_actual_size = queues_n;
2170                 for (i = 0; i != queues_n; ++i) {
2171                         struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev,
2172                                                                  queues[i]);
2173                         if (!rxq)
2174                                 goto error;
2175                         rqt_attr->rq_list[i] = rxq->obj->rq->id;
2176                         ind_tbl->queues[i] = queues[i];
2177                 }
2178                 ind_tbl->rqt = mlx5_devx_cmd_create_rqt(priv->sh->ctx,
2179                                                         rqt_attr);
2180                 rte_free(rqt_attr);
2181                 if (!ind_tbl->rqt) {
2182                         DRV_LOG(ERR, "port %u cannot create DevX RQT",
2183                                 dev->data->port_id);
2184                         rte_errno = errno;
2185                         goto error;
2186                 }
2187                 ind_tbl->queues_n = queues_n;
2188         }
2189         rte_atomic32_inc(&ind_tbl->refcnt);
2190         LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
2191         return ind_tbl;
2192 error:
2193         for (j = 0; j < i; j++)
2194                 mlx5_rxq_release(dev, ind_tbl->queues[j]);
2195         rte_free(ind_tbl);
2196         DEBUG("port %u cannot create indirection table", dev->data->port_id);
2197         return NULL;
2198 }
2199
2200 /**
2201  * Get an indirection table.
2202  *
2203  * @param dev
2204  *   Pointer to Ethernet device.
2205  * @param queues
2206  *   Queues entering in the indirection table.
2207  * @param queues_n
2208  *   Number of queues in the array.
2209  *
2210  * @return
2211  *   An indirection table if found.
2212  */
2213 static struct mlx5_ind_table_obj *
2214 mlx5_ind_table_obj_get(struct rte_eth_dev *dev, const uint16_t *queues,
2215                        uint32_t queues_n)
2216 {
2217         struct mlx5_priv *priv = dev->data->dev_private;
2218         struct mlx5_ind_table_obj *ind_tbl;
2219
2220         LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
2221                 if ((ind_tbl->queues_n == queues_n) &&
2222                     (memcmp(ind_tbl->queues, queues,
2223                             ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
2224                      == 0))
2225                         break;
2226         }
2227         if (ind_tbl) {
2228                 unsigned int i;
2229
2230                 rte_atomic32_inc(&ind_tbl->refcnt);
2231                 for (i = 0; i != ind_tbl->queues_n; ++i)
2232                         mlx5_rxq_get(dev, ind_tbl->queues[i]);
2233         }
2234         return ind_tbl;
2235 }
2236
2237 /**
2238  * Release an indirection table.
2239  *
2240  * @param dev
2241  *   Pointer to Ethernet device.
2242  * @param ind_table
2243  *   Indirection table to release.
2244  *
2245  * @return
2246  *   1 while a reference on it exists, 0 when freed.
2247  */
2248 static int
2249 mlx5_ind_table_obj_release(struct rte_eth_dev *dev,
2250                            struct mlx5_ind_table_obj *ind_tbl)
2251 {
2252         unsigned int i;
2253
2254         if (rte_atomic32_dec_and_test(&ind_tbl->refcnt)) {
2255                 if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV)
2256                         claim_zero(mlx5_glue->destroy_rwq_ind_table
2257                                                         (ind_tbl->ind_table));
2258                 else if (ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX)
2259                         claim_zero(mlx5_devx_cmd_destroy(ind_tbl->rqt));
2260         }
2261         for (i = 0; i != ind_tbl->queues_n; ++i)
2262                 claim_nonzero(mlx5_rxq_release(dev, ind_tbl->queues[i]));
2263         if (!rte_atomic32_read(&ind_tbl->refcnt)) {
2264                 LIST_REMOVE(ind_tbl, next);
2265                 rte_free(ind_tbl);
2266                 return 0;
2267         }
2268         return 1;
2269 }
2270
2271 /**
2272  * Verify the Rx Queue list is empty
2273  *
2274  * @param dev
2275  *   Pointer to Ethernet device.
2276  *
2277  * @return
2278  *   The number of object not released.
2279  */
2280 int
2281 mlx5_ind_table_obj_verify(struct rte_eth_dev *dev)
2282 {
2283         struct mlx5_priv *priv = dev->data->dev_private;
2284         struct mlx5_ind_table_obj *ind_tbl;
2285         int ret = 0;
2286
2287         LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
2288                 DRV_LOG(DEBUG,
2289                         "port %u indirection table obj %p still referenced",
2290                         dev->data->port_id, (void *)ind_tbl);
2291                 ++ret;
2292         }
2293         return ret;
2294 }
2295
2296 /**
2297  * Create an Rx Hash queue.
2298  *
2299  * @param dev
2300  *   Pointer to Ethernet device.
2301  * @param rss_key
2302  *   RSS key for the Rx hash queue.
2303  * @param rss_key_len
2304  *   RSS key length.
2305  * @param hash_fields
2306  *   Verbs protocol hash field to make the RSS on.
2307  * @param queues
2308  *   Queues entering in hash queue. In case of empty hash_fields only the
2309  *   first queue index will be taken for the indirection table.
2310  * @param queues_n
2311  *   Number of queues.
2312  * @param tunnel
2313  *   Tunnel type.
2314  *
2315  * @return
2316  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
2317  */
2318 struct mlx5_hrxq *
2319 mlx5_hrxq_new(struct rte_eth_dev *dev,
2320               const uint8_t *rss_key, uint32_t rss_key_len,
2321               uint64_t hash_fields,
2322               const uint16_t *queues, uint32_t queues_n,
2323               int tunnel __rte_unused)
2324 {
2325         struct mlx5_priv *priv = dev->data->dev_private;
2326         struct mlx5_hrxq *hrxq;
2327         struct ibv_qp *qp = NULL;
2328         struct mlx5_ind_table_obj *ind_tbl;
2329         int err;
2330         struct mlx5_devx_obj *tir = NULL;
2331
2332         queues_n = hash_fields ? queues_n : 1;
2333         ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n);
2334         if (!ind_tbl) {
2335                 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[queues[0]];
2336                 struct mlx5_rxq_ctrl *rxq_ctrl =
2337                         container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2338                 enum mlx5_ind_tbl_type type;
2339
2340                 type = rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_IBV ?
2341                                 MLX5_IND_TBL_TYPE_IBV : MLX5_IND_TBL_TYPE_DEVX;
2342                 ind_tbl = mlx5_ind_table_obj_new(dev, queues, queues_n, type);
2343         }
2344         if (!ind_tbl) {
2345                 rte_errno = ENOMEM;
2346                 return NULL;
2347         }
2348         if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV) {
2349 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
2350                 struct mlx5dv_qp_init_attr qp_init_attr;
2351
2352                 memset(&qp_init_attr, 0, sizeof(qp_init_attr));
2353                 if (tunnel) {
2354                         qp_init_attr.comp_mask =
2355                                 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
2356                         qp_init_attr.create_flags =
2357                                 MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;
2358                 }
2359 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
2360                 if (dev->data->dev_conf.lpbk_mode) {
2361                         /*
2362                          * Allow packet sent from NIC loop back
2363                          * w/o source MAC check.
2364                          */
2365                         qp_init_attr.comp_mask |=
2366                                 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
2367                         qp_init_attr.create_flags |=
2368                                 MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC;
2369                 }
2370 #endif
2371                 qp = mlx5_glue->dv_create_qp
2372                         (priv->sh->ctx,
2373                          &(struct ibv_qp_init_attr_ex){
2374                                 .qp_type = IBV_QPT_RAW_PACKET,
2375                                 .comp_mask =
2376                                         IBV_QP_INIT_ATTR_PD |
2377                                         IBV_QP_INIT_ATTR_IND_TABLE |
2378                                         IBV_QP_INIT_ATTR_RX_HASH,
2379                                 .rx_hash_conf = (struct ibv_rx_hash_conf){
2380                                         .rx_hash_function =
2381                                                 IBV_RX_HASH_FUNC_TOEPLITZ,
2382                                         .rx_hash_key_len = rss_key_len,
2383                                         .rx_hash_key =
2384                                                 (void *)(uintptr_t)rss_key,
2385                                         .rx_hash_fields_mask = hash_fields,
2386                                 },
2387                                 .rwq_ind_tbl = ind_tbl->ind_table,
2388                                 .pd = priv->sh->pd,
2389                           },
2390                           &qp_init_attr);
2391 #else
2392                 qp = mlx5_glue->create_qp_ex
2393                         (priv->sh->ctx,
2394                          &(struct ibv_qp_init_attr_ex){
2395                                 .qp_type = IBV_QPT_RAW_PACKET,
2396                                 .comp_mask =
2397                                         IBV_QP_INIT_ATTR_PD |
2398                                         IBV_QP_INIT_ATTR_IND_TABLE |
2399                                         IBV_QP_INIT_ATTR_RX_HASH,
2400                                 .rx_hash_conf = (struct ibv_rx_hash_conf){
2401                                         .rx_hash_function =
2402                                                 IBV_RX_HASH_FUNC_TOEPLITZ,
2403                                         .rx_hash_key_len = rss_key_len,
2404                                         .rx_hash_key =
2405                                                 (void *)(uintptr_t)rss_key,
2406                                         .rx_hash_fields_mask = hash_fields,
2407                                 },
2408                                 .rwq_ind_tbl = ind_tbl->ind_table,
2409                                 .pd = priv->sh->pd,
2410                          });
2411 #endif
2412                 if (!qp) {
2413                         rte_errno = errno;
2414                         goto error;
2415                 }
2416         } else { /* ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX */
2417                 struct mlx5_devx_tir_attr tir_attr;
2418                 uint32_t i;
2419                 uint32_t lro = 1;
2420
2421                 /* Enable TIR LRO only if all the queues were configured for. */
2422                 for (i = 0; i < queues_n; ++i) {
2423                         if (!(*priv->rxqs)[queues[i]]->lro) {
2424                                 lro = 0;
2425                                 break;
2426                         }
2427                 }
2428                 memset(&tir_attr, 0, sizeof(tir_attr));
2429                 tir_attr.disp_type = MLX5_TIRC_DISP_TYPE_INDIRECT;
2430                 tir_attr.rx_hash_fn = MLX5_RX_HASH_FN_TOEPLITZ;
2431                 memcpy(&tir_attr.rx_hash_field_selector_outer, &hash_fields,
2432                        sizeof(uint64_t));
2433                 tir_attr.transport_domain = priv->sh->tdn;
2434                 memcpy(tir_attr.rx_hash_toeplitz_key, rss_key, rss_key_len);
2435                 tir_attr.indirect_table = ind_tbl->rqt->id;
2436                 if (dev->data->dev_conf.lpbk_mode)
2437                         tir_attr.self_lb_block =
2438                                         MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
2439                 if (lro) {
2440                         tir_attr.lro_timeout_period_usecs =
2441                                         priv->config.lro.timeout;
2442                         tir_attr.lro_max_msg_sz = priv->max_lro_msg_size;
2443                         tir_attr.lro_enable_mask =
2444                                         MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
2445                                         MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO;
2446                 }
2447                 tir = mlx5_devx_cmd_create_tir(priv->sh->ctx, &tir_attr);
2448                 if (!tir) {
2449                         DRV_LOG(ERR, "port %u cannot create DevX TIR",
2450                                 dev->data->port_id);
2451                         rte_errno = errno;
2452                         goto error;
2453                 }
2454         }
2455         hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0);
2456         if (!hrxq)
2457                 goto error;
2458         hrxq->ind_table = ind_tbl;
2459         if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV) {
2460                 hrxq->qp = qp;
2461 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
2462                 hrxq->action =
2463                         mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
2464                 if (!hrxq->action) {
2465                         rte_errno = errno;
2466                         goto error;
2467                 }
2468 #endif
2469         } else { /* ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX */
2470                 hrxq->tir = tir;
2471 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
2472                 hrxq->action = mlx5_glue->dv_create_flow_action_dest_devx_tir
2473                                                         (hrxq->tir->obj);
2474                 if (!hrxq->action) {
2475                         rte_errno = errno;
2476                         goto error;
2477                 }
2478 #endif
2479         }
2480         hrxq->rss_key_len = rss_key_len;
2481         hrxq->hash_fields = hash_fields;
2482         memcpy(hrxq->rss_key, rss_key, rss_key_len);
2483         rte_atomic32_inc(&hrxq->refcnt);
2484         LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
2485         return hrxq;
2486 error:
2487         err = rte_errno; /* Save rte_errno before cleanup. */
2488         mlx5_ind_table_obj_release(dev, ind_tbl);
2489         if (qp)
2490                 claim_zero(mlx5_glue->destroy_qp(qp));
2491         else if (tir)
2492                 claim_zero(mlx5_devx_cmd_destroy(tir));
2493         rte_errno = err; /* Restore rte_errno. */
2494         return NULL;
2495 }
2496
2497 /**
2498  * Get an Rx Hash queue.
2499  *
2500  * @param dev
2501  *   Pointer to Ethernet device.
2502  * @param rss_conf
2503  *   RSS configuration for the Rx hash queue.
2504  * @param queues
2505  *   Queues entering in hash queue. In case of empty hash_fields only the
2506  *   first queue index will be taken for the indirection table.
2507  * @param queues_n
2508  *   Number of queues.
2509  *
2510  * @return
2511  *   An hash Rx queue on success.
2512  */
2513 struct mlx5_hrxq *
2514 mlx5_hrxq_get(struct rte_eth_dev *dev,
2515               const uint8_t *rss_key, uint32_t rss_key_len,
2516               uint64_t hash_fields,
2517               const uint16_t *queues, uint32_t queues_n)
2518 {
2519         struct mlx5_priv *priv = dev->data->dev_private;
2520         struct mlx5_hrxq *hrxq;
2521
2522         queues_n = hash_fields ? queues_n : 1;
2523         LIST_FOREACH(hrxq, &priv->hrxqs, next) {
2524                 struct mlx5_ind_table_obj *ind_tbl;
2525
2526                 if (hrxq->rss_key_len != rss_key_len)
2527                         continue;
2528                 if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
2529                         continue;
2530                 if (hrxq->hash_fields != hash_fields)
2531                         continue;
2532                 ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n);
2533                 if (!ind_tbl)
2534                         continue;
2535                 if (ind_tbl != hrxq->ind_table) {
2536                         mlx5_ind_table_obj_release(dev, ind_tbl);
2537                         continue;
2538                 }
2539                 rte_atomic32_inc(&hrxq->refcnt);
2540                 return hrxq;
2541         }
2542         return NULL;
2543 }
2544
2545 /**
2546  * Release the hash Rx queue.
2547  *
2548  * @param dev
2549  *   Pointer to Ethernet device.
2550  * @param hrxq
2551  *   Pointer to Hash Rx queue to release.
2552  *
2553  * @return
2554  *   1 while a reference on it exists, 0 when freed.
2555  */
2556 int
2557 mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq)
2558 {
2559         if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
2560 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
2561                 mlx5_glue->destroy_flow_action(hrxq->action);
2562 #endif
2563                 if (hrxq->ind_table->type == MLX5_IND_TBL_TYPE_IBV)
2564                         claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
2565                 else /* hrxq->ind_table->type == MLX5_IND_TBL_TYPE_DEVX */
2566                         claim_zero(mlx5_devx_cmd_destroy(hrxq->tir));
2567                 mlx5_ind_table_obj_release(dev, hrxq->ind_table);
2568                 LIST_REMOVE(hrxq, next);
2569                 rte_free(hrxq);
2570                 return 0;
2571         }
2572         claim_nonzero(mlx5_ind_table_obj_release(dev, hrxq->ind_table));
2573         return 1;
2574 }
2575
2576 /**
2577  * Verify the Rx Queue list is empty
2578  *
2579  * @param dev
2580  *   Pointer to Ethernet device.
2581  *
2582  * @return
2583  *   The number of object not released.
2584  */
2585 int
2586 mlx5_hrxq_verify(struct rte_eth_dev *dev)
2587 {
2588         struct mlx5_priv *priv = dev->data->dev_private;
2589         struct mlx5_hrxq *hrxq;
2590         int ret = 0;
2591
2592         LIST_FOREACH(hrxq, &priv->hrxqs, next) {
2593                 DRV_LOG(DEBUG,
2594                         "port %u hash Rx queue %p still referenced",
2595                         dev->data->port_id, (void *)hrxq);
2596                 ++ret;
2597         }
2598         return ret;
2599 }
2600
2601 /**
2602  * Create a drop Rx queue Verbs/DevX object.
2603  *
2604  * @param dev
2605  *   Pointer to Ethernet device.
2606  *
2607  * @return
2608  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
2609  */
2610 static struct mlx5_rxq_obj *
2611 mlx5_rxq_obj_drop_new(struct rte_eth_dev *dev)
2612 {
2613         struct mlx5_priv *priv = dev->data->dev_private;
2614         struct ibv_context *ctx = priv->sh->ctx;
2615         struct ibv_cq *cq;
2616         struct ibv_wq *wq = NULL;
2617         struct mlx5_rxq_obj *rxq;
2618
2619         if (priv->drop_queue.rxq)
2620                 return priv->drop_queue.rxq;
2621         cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0);
2622         if (!cq) {
2623                 DEBUG("port %u cannot allocate CQ for drop queue",
2624                       dev->data->port_id);
2625                 rte_errno = errno;
2626                 goto error;
2627         }
2628         wq = mlx5_glue->create_wq(ctx,
2629                  &(struct ibv_wq_init_attr){
2630                         .wq_type = IBV_WQT_RQ,
2631                         .max_wr = 1,
2632                         .max_sge = 1,
2633                         .pd = priv->sh->pd,
2634                         .cq = cq,
2635                  });
2636         if (!wq) {
2637                 DEBUG("port %u cannot allocate WQ for drop queue",
2638                       dev->data->port_id);
2639                 rte_errno = errno;
2640                 goto error;
2641         }
2642         rxq = rte_calloc(__func__, 1, sizeof(*rxq), 0);
2643         if (!rxq) {
2644                 DEBUG("port %u cannot allocate drop Rx queue memory",
2645                       dev->data->port_id);
2646                 rte_errno = ENOMEM;
2647                 goto error;
2648         }
2649         rxq->cq = cq;
2650         rxq->wq = wq;
2651         priv->drop_queue.rxq = rxq;
2652         return rxq;
2653 error:
2654         if (wq)
2655                 claim_zero(mlx5_glue->destroy_wq(wq));
2656         if (cq)
2657                 claim_zero(mlx5_glue->destroy_cq(cq));
2658         return NULL;
2659 }
2660
2661 /**
2662  * Release a drop Rx queue Verbs/DevX object.
2663  *
2664  * @param dev
2665  *   Pointer to Ethernet device.
2666  *
2667  * @return
2668  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
2669  */
2670 static void
2671 mlx5_rxq_obj_drop_release(struct rte_eth_dev *dev)
2672 {
2673         struct mlx5_priv *priv = dev->data->dev_private;
2674         struct mlx5_rxq_obj *rxq = priv->drop_queue.rxq;
2675
2676         if (rxq->wq)
2677                 claim_zero(mlx5_glue->destroy_wq(rxq->wq));
2678         if (rxq->cq)
2679                 claim_zero(mlx5_glue->destroy_cq(rxq->cq));
2680         rte_free(rxq);
2681         priv->drop_queue.rxq = NULL;
2682 }
2683
2684 /**
2685  * Create a drop indirection table.
2686  *
2687  * @param dev
2688  *   Pointer to Ethernet device.
2689  *
2690  * @return
2691  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
2692  */
2693 static struct mlx5_ind_table_obj *
2694 mlx5_ind_table_obj_drop_new(struct rte_eth_dev *dev)
2695 {
2696         struct mlx5_priv *priv = dev->data->dev_private;
2697         struct mlx5_ind_table_obj *ind_tbl;
2698         struct mlx5_rxq_obj *rxq;
2699         struct mlx5_ind_table_obj tmpl;
2700
2701         rxq = mlx5_rxq_obj_drop_new(dev);
2702         if (!rxq)
2703                 return NULL;
2704         tmpl.ind_table = mlx5_glue->create_rwq_ind_table
2705                 (priv->sh->ctx,
2706                  &(struct ibv_rwq_ind_table_init_attr){
2707                         .log_ind_tbl_size = 0,
2708                         .ind_tbl = &rxq->wq,
2709                         .comp_mask = 0,
2710                  });
2711         if (!tmpl.ind_table) {
2712                 DEBUG("port %u cannot allocate indirection table for drop"
2713                       " queue",
2714                       dev->data->port_id);
2715                 rte_errno = errno;
2716                 goto error;
2717         }
2718         ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl), 0);
2719         if (!ind_tbl) {
2720                 rte_errno = ENOMEM;
2721                 goto error;
2722         }
2723         ind_tbl->ind_table = tmpl.ind_table;
2724         return ind_tbl;
2725 error:
2726         mlx5_rxq_obj_drop_release(dev);
2727         return NULL;
2728 }
2729
2730 /**
2731  * Release a drop indirection table.
2732  *
2733  * @param dev
2734  *   Pointer to Ethernet device.
2735  */
2736 static void
2737 mlx5_ind_table_obj_drop_release(struct rte_eth_dev *dev)
2738 {
2739         struct mlx5_priv *priv = dev->data->dev_private;
2740         struct mlx5_ind_table_obj *ind_tbl = priv->drop_queue.hrxq->ind_table;
2741
2742         claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table));
2743         mlx5_rxq_obj_drop_release(dev);
2744         rte_free(ind_tbl);
2745         priv->drop_queue.hrxq->ind_table = NULL;
2746 }
2747
2748 /**
2749  * Create a drop Rx Hash queue.
2750  *
2751  * @param dev
2752  *   Pointer to Ethernet device.
2753  *
2754  * @return
2755  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
2756  */
2757 struct mlx5_hrxq *
2758 mlx5_hrxq_drop_new(struct rte_eth_dev *dev)
2759 {
2760         struct mlx5_priv *priv = dev->data->dev_private;
2761         struct mlx5_ind_table_obj *ind_tbl = NULL;
2762         struct ibv_qp *qp = NULL;
2763         struct mlx5_hrxq *hrxq = NULL;
2764
2765         if (priv->drop_queue.hrxq) {
2766                 rte_atomic32_inc(&priv->drop_queue.hrxq->refcnt);
2767                 return priv->drop_queue.hrxq;
2768         }
2769         hrxq = rte_calloc(__func__, 1, sizeof(*hrxq), 0);
2770         if (!hrxq) {
2771                 DRV_LOG(WARNING,
2772                         "port %u cannot allocate memory for drop queue",
2773                         dev->data->port_id);
2774                 rte_errno = ENOMEM;
2775                 goto error;
2776         }
2777         priv->drop_queue.hrxq = hrxq;
2778         ind_tbl = mlx5_ind_table_obj_drop_new(dev);
2779         if (!ind_tbl)
2780                 goto error;
2781         hrxq->ind_table = ind_tbl;
2782         qp = mlx5_glue->create_qp_ex(priv->sh->ctx,
2783                  &(struct ibv_qp_init_attr_ex){
2784                         .qp_type = IBV_QPT_RAW_PACKET,
2785                         .comp_mask =
2786                                 IBV_QP_INIT_ATTR_PD |
2787                                 IBV_QP_INIT_ATTR_IND_TABLE |
2788                                 IBV_QP_INIT_ATTR_RX_HASH,
2789                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2790                                 .rx_hash_function =
2791                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2792                                 .rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN,
2793                                 .rx_hash_key = rss_hash_default_key,
2794                                 .rx_hash_fields_mask = 0,
2795                                 },
2796                         .rwq_ind_tbl = ind_tbl->ind_table,
2797                         .pd = priv->sh->pd
2798                  });
2799         if (!qp) {
2800                 DEBUG("port %u cannot allocate QP for drop queue",
2801                       dev->data->port_id);
2802                 rte_errno = errno;
2803                 goto error;
2804         }
2805         hrxq->qp = qp;
2806 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
2807         hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
2808         if (!hrxq->action) {
2809                 rte_errno = errno;
2810                 goto error;
2811         }
2812 #endif
2813         rte_atomic32_set(&hrxq->refcnt, 1);
2814         return hrxq;
2815 error:
2816 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
2817         if (hrxq && hrxq->action)
2818                 mlx5_glue->destroy_flow_action(hrxq->action);
2819 #endif
2820         if (qp)
2821                 claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
2822         if (ind_tbl)
2823                 mlx5_ind_table_obj_drop_release(dev);
2824         if (hrxq) {
2825                 priv->drop_queue.hrxq = NULL;
2826                 rte_free(hrxq);
2827         }
2828         return NULL;
2829 }
2830
2831 /**
2832  * Release a drop hash Rx queue.
2833  *
2834  * @param dev
2835  *   Pointer to Ethernet device.
2836  */
2837 void
2838 mlx5_hrxq_drop_release(struct rte_eth_dev *dev)
2839 {
2840         struct mlx5_priv *priv = dev->data->dev_private;
2841         struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;
2842
2843         if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
2844 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
2845                 mlx5_glue->destroy_flow_action(hrxq->action);
2846 #endif
2847                 claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
2848                 mlx5_ind_table_obj_drop_release(dev);
2849                 rte_free(hrxq);
2850                 priv->drop_queue.hrxq = NULL;
2851         }
2852 }