371b9961009ab68158ab04a160b5e59d03615ded
[dpdk.git] / drivers / net / mlx5 / mlx5_rxq.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5
6 #include <stddef.h>
7 #include <assert.h>
8 #include <errno.h>
9 #include <string.h>
10 #include <stdint.h>
11 #include <fcntl.h>
12 #include <sys/queue.h>
13
14 /* Verbs header. */
15 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic ignored "-Wpedantic"
18 #endif
19 #include <infiniband/verbs.h>
20 #include <infiniband/mlx5dv.h>
21 #ifdef PEDANTIC
22 #pragma GCC diagnostic error "-Wpedantic"
23 #endif
24
25 #include <rte_mbuf.h>
26 #include <rte_malloc.h>
27 #include <rte_ethdev_driver.h>
28 #include <rte_common.h>
29 #include <rte_interrupts.h>
30 #include <rte_debug.h>
31 #include <rte_io.h>
32
33 #include "mlx5.h"
34 #include "mlx5_rxtx.h"
35 #include "mlx5_utils.h"
36 #include "mlx5_autoconf.h"
37 #include "mlx5_defs.h"
38 #include "mlx5_glue.h"
39 #include "mlx5_flow.h"
40 #include "mlx5_devx_cmds.h"
41
42 /* Default RSS hash key also used for ConnectX-3. */
43 uint8_t rss_hash_default_key[] = {
44         0x2c, 0xc6, 0x81, 0xd1,
45         0x5b, 0xdb, 0xf4, 0xf7,
46         0xfc, 0xa2, 0x83, 0x19,
47         0xdb, 0x1a, 0x3e, 0x94,
48         0x6b, 0x9e, 0x38, 0xd9,
49         0x2c, 0x9c, 0x03, 0xd1,
50         0xad, 0x99, 0x44, 0xa7,
51         0xd9, 0x56, 0x3d, 0x59,
52         0x06, 0x3c, 0x25, 0xf3,
53         0xfc, 0x1f, 0xdc, 0x2a,
54 };
55
56 /* Length of the default RSS hash key. */
57 static_assert(MLX5_RSS_HASH_KEY_LEN ==
58               (unsigned int)sizeof(rss_hash_default_key),
59               "wrong RSS default key size.");
60
61 /**
62  * Check whether Multi-Packet RQ can be enabled for the device.
63  *
64  * @param dev
65  *   Pointer to Ethernet device.
66  *
67  * @return
68  *   1 if supported, negative errno value if not.
69  */
70 inline int
71 mlx5_check_mprq_support(struct rte_eth_dev *dev)
72 {
73         struct mlx5_priv *priv = dev->data->dev_private;
74
75         if (priv->config.mprq.enabled &&
76             priv->rxqs_n >= priv->config.mprq.min_rxqs_num)
77                 return 1;
78         return -ENOTSUP;
79 }
80
81 /**
82  * Check whether Multi-Packet RQ is enabled for the Rx queue.
83  *
84  *  @param rxq
85  *     Pointer to receive queue structure.
86  *
87  * @return
88  *   0 if disabled, otherwise enabled.
89  */
90 inline int
91 mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq)
92 {
93         return rxq->strd_num_n > 0;
94 }
95
96 /**
97  * Check whether Multi-Packet RQ is enabled for the device.
98  *
99  * @param dev
100  *   Pointer to Ethernet device.
101  *
102  * @return
103  *   0 if disabled, otherwise enabled.
104  */
105 inline int
106 mlx5_mprq_enabled(struct rte_eth_dev *dev)
107 {
108         struct mlx5_priv *priv = dev->data->dev_private;
109         uint16_t i;
110         uint16_t n = 0;
111         uint16_t n_ibv = 0;
112
113         if (mlx5_check_mprq_support(dev) < 0)
114                 return 0;
115         /* All the configured queues should be enabled. */
116         for (i = 0; i < priv->rxqs_n; ++i) {
117                 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
118                 struct mlx5_rxq_ctrl *rxq_ctrl = container_of
119                         (rxq, struct mlx5_rxq_ctrl, rxq);
120
121                 if (rxq == NULL || rxq_ctrl->type != MLX5_RXQ_TYPE_STANDARD)
122                         continue;
123                 n_ibv++;
124                 if (mlx5_rxq_mprq_enabled(rxq))
125                         ++n;
126         }
127         /* Multi-Packet RQ can't be partially configured. */
128         assert(n == 0 || n == n_ibv);
129         return n == n_ibv;
130 }
131
132 /**
133  * Allocate RX queue elements for Multi-Packet RQ.
134  *
135  * @param rxq_ctrl
136  *   Pointer to RX queue structure.
137  *
138  * @return
139  *   0 on success, a negative errno value otherwise and rte_errno is set.
140  */
141 static int
142 rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
143 {
144         struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
145         unsigned int wqe_n = 1 << rxq->elts_n;
146         unsigned int i;
147         int err;
148
149         /* Iterate on segments. */
150         for (i = 0; i <= wqe_n; ++i) {
151                 struct mlx5_mprq_buf *buf;
152
153                 if (rte_mempool_get(rxq->mprq_mp, (void **)&buf) < 0) {
154                         DRV_LOG(ERR, "port %u empty mbuf pool", rxq->port_id);
155                         rte_errno = ENOMEM;
156                         goto error;
157                 }
158                 if (i < wqe_n)
159                         (*rxq->mprq_bufs)[i] = buf;
160                 else
161                         rxq->mprq_repl = buf;
162         }
163         DRV_LOG(DEBUG,
164                 "port %u Rx queue %u allocated and configured %u segments",
165                 rxq->port_id, rxq->idx, wqe_n);
166         return 0;
167 error:
168         err = rte_errno; /* Save rte_errno before cleanup. */
169         wqe_n = i;
170         for (i = 0; (i != wqe_n); ++i) {
171                 if ((*rxq->mprq_bufs)[i] != NULL)
172                         rte_mempool_put(rxq->mprq_mp,
173                                         (*rxq->mprq_bufs)[i]);
174                 (*rxq->mprq_bufs)[i] = NULL;
175         }
176         DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
177                 rxq->port_id, rxq->idx);
178         rte_errno = err; /* Restore rte_errno. */
179         return -rte_errno;
180 }
181
182 /**
183  * Allocate RX queue elements for Single-Packet RQ.
184  *
185  * @param rxq_ctrl
186  *   Pointer to RX queue structure.
187  *
188  * @return
189  *   0 on success, errno value on failure.
190  */
191 static int
192 rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
193 {
194         const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
195         unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
196         unsigned int i;
197         int err;
198
199         /* Iterate on segments. */
200         for (i = 0; (i != elts_n); ++i) {
201                 struct rte_mbuf *buf;
202
203                 buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
204                 if (buf == NULL) {
205                         DRV_LOG(ERR, "port %u empty mbuf pool",
206                                 PORT_ID(rxq_ctrl->priv));
207                         rte_errno = ENOMEM;
208                         goto error;
209                 }
210                 /* Headroom is reserved by rte_pktmbuf_alloc(). */
211                 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
212                 /* Buffer is supposed to be empty. */
213                 assert(rte_pktmbuf_data_len(buf) == 0);
214                 assert(rte_pktmbuf_pkt_len(buf) == 0);
215                 assert(!buf->next);
216                 /* Only the first segment keeps headroom. */
217                 if (i % sges_n)
218                         SET_DATA_OFF(buf, 0);
219                 PORT(buf) = rxq_ctrl->rxq.port_id;
220                 DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
221                 PKT_LEN(buf) = DATA_LEN(buf);
222                 NB_SEGS(buf) = 1;
223                 (*rxq_ctrl->rxq.elts)[i] = buf;
224         }
225         /* If Rx vector is activated. */
226         if (mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
227                 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
228                 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
229                 struct rte_pktmbuf_pool_private *priv =
230                         (struct rte_pktmbuf_pool_private *)
231                                 rte_mempool_get_priv(rxq_ctrl->rxq.mp);
232                 int j;
233
234                 /* Initialize default rearm_data for vPMD. */
235                 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
236                 rte_mbuf_refcnt_set(mbuf_init, 1);
237                 mbuf_init->nb_segs = 1;
238                 mbuf_init->port = rxq->port_id;
239                 if (priv->flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF)
240                         mbuf_init->ol_flags = EXT_ATTACHED_MBUF;
241                 /*
242                  * prevent compiler reordering:
243                  * rearm_data covers previous fields.
244                  */
245                 rte_compiler_barrier();
246                 rxq->mbuf_initializer =
247                         *(rte_xmm_t *)&mbuf_init->rearm_data;
248                 /* Padding with a fake mbuf for vectorized Rx. */
249                 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
250                         (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
251         }
252         DRV_LOG(DEBUG,
253                 "port %u Rx queue %u allocated and configured %u segments"
254                 " (max %u packets)",
255                 PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx, elts_n,
256                 elts_n / (1 << rxq_ctrl->rxq.sges_n));
257         return 0;
258 error:
259         err = rte_errno; /* Save rte_errno before cleanup. */
260         elts_n = i;
261         for (i = 0; (i != elts_n); ++i) {
262                 if ((*rxq_ctrl->rxq.elts)[i] != NULL)
263                         rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
264                 (*rxq_ctrl->rxq.elts)[i] = NULL;
265         }
266         DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
267                 PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
268         rte_errno = err; /* Restore rte_errno. */
269         return -rte_errno;
270 }
271
272 /**
273  * Allocate RX queue elements.
274  *
275  * @param rxq_ctrl
276  *   Pointer to RX queue structure.
277  *
278  * @return
279  *   0 on success, errno value on failure.
280  */
281 int
282 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
283 {
284         return mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
285                rxq_alloc_elts_mprq(rxq_ctrl) : rxq_alloc_elts_sprq(rxq_ctrl);
286 }
287
288 /**
289  * Free RX queue elements for Multi-Packet RQ.
290  *
291  * @param rxq_ctrl
292  *   Pointer to RX queue structure.
293  */
294 static void
295 rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
296 {
297         struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
298         uint16_t i;
299
300         DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs",
301                 rxq->port_id, rxq->idx);
302         if (rxq->mprq_bufs == NULL)
303                 return;
304         assert(mlx5_rxq_check_vec_support(rxq) < 0);
305         for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
306                 if ((*rxq->mprq_bufs)[i] != NULL)
307                         mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]);
308                 (*rxq->mprq_bufs)[i] = NULL;
309         }
310         if (rxq->mprq_repl != NULL) {
311                 mlx5_mprq_buf_free(rxq->mprq_repl);
312                 rxq->mprq_repl = NULL;
313         }
314 }
315
316 /**
317  * Free RX queue elements for Single-Packet RQ.
318  *
319  * @param rxq_ctrl
320  *   Pointer to RX queue structure.
321  */
322 static void
323 rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
324 {
325         struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
326         const uint16_t q_n = (1 << rxq->elts_n);
327         const uint16_t q_mask = q_n - 1;
328         uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
329         uint16_t i;
330
331         DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs",
332                 PORT_ID(rxq_ctrl->priv), rxq->idx);
333         if (rxq->elts == NULL)
334                 return;
335         /**
336          * Some mbuf in the Ring belongs to the application.  They cannot be
337          * freed.
338          */
339         if (mlx5_rxq_check_vec_support(rxq) > 0) {
340                 for (i = 0; i < used; ++i)
341                         (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
342                 rxq->rq_pi = rxq->rq_ci;
343         }
344         for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
345                 if ((*rxq->elts)[i] != NULL)
346                         rte_pktmbuf_free_seg((*rxq->elts)[i]);
347                 (*rxq->elts)[i] = NULL;
348         }
349 }
350
351 /**
352  * Free RX queue elements.
353  *
354  * @param rxq_ctrl
355  *   Pointer to RX queue structure.
356  */
357 static void
358 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
359 {
360         if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
361                 rxq_free_elts_mprq(rxq_ctrl);
362         else
363                 rxq_free_elts_sprq(rxq_ctrl);
364 }
365
366 /**
367  * Returns the per-queue supported offloads.
368  *
369  * @param dev
370  *   Pointer to Ethernet device.
371  *
372  * @return
373  *   Supported Rx offloads.
374  */
375 uint64_t
376 mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev)
377 {
378         struct mlx5_priv *priv = dev->data->dev_private;
379         struct mlx5_dev_config *config = &priv->config;
380         uint64_t offloads = (DEV_RX_OFFLOAD_SCATTER |
381                              DEV_RX_OFFLOAD_TIMESTAMP |
382                              DEV_RX_OFFLOAD_JUMBO_FRAME |
383                              DEV_RX_OFFLOAD_RSS_HASH);
384
385         if (config->hw_fcs_strip)
386                 offloads |= DEV_RX_OFFLOAD_KEEP_CRC;
387
388         if (config->hw_csum)
389                 offloads |= (DEV_RX_OFFLOAD_IPV4_CKSUM |
390                              DEV_RX_OFFLOAD_UDP_CKSUM |
391                              DEV_RX_OFFLOAD_TCP_CKSUM);
392         if (config->hw_vlan_strip)
393                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
394         if (MLX5_LRO_SUPPORTED(dev))
395                 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
396         return offloads;
397 }
398
399
400 /**
401  * Returns the per-port supported offloads.
402  *
403  * @return
404  *   Supported Rx offloads.
405  */
406 uint64_t
407 mlx5_get_rx_port_offloads(void)
408 {
409         uint64_t offloads = DEV_RX_OFFLOAD_VLAN_FILTER;
410
411         return offloads;
412 }
413
414 /**
415  * Verify if the queue can be released.
416  *
417  * @param dev
418  *   Pointer to Ethernet device.
419  * @param idx
420  *   RX queue index.
421  *
422  * @return
423  *   1 if the queue can be released
424  *   0 if the queue can not be released, there are references to it.
425  *   Negative errno and rte_errno is set if queue doesn't exist.
426  */
427 static int
428 mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx)
429 {
430         struct mlx5_priv *priv = dev->data->dev_private;
431         struct mlx5_rxq_ctrl *rxq_ctrl;
432
433         if (!(*priv->rxqs)[idx]) {
434                 rte_errno = EINVAL;
435                 return -rte_errno;
436         }
437         rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
438         return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
439 }
440
441 /**
442  * Rx queue presetup checks.
443  *
444  * @param dev
445  *   Pointer to Ethernet device structure.
446  * @param idx
447  *   RX queue index.
448  * @param desc
449  *   Number of descriptors to configure in queue.
450  *
451  * @return
452  *   0 on success, a negative errno value otherwise and rte_errno is set.
453  */
454 static int
455 mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc)
456 {
457         struct mlx5_priv *priv = dev->data->dev_private;
458
459         if (!rte_is_power_of_2(desc)) {
460                 desc = 1 << log2above(desc);
461                 DRV_LOG(WARNING,
462                         "port %u increased number of descriptors in Rx queue %u"
463                         " to the next power of two (%d)",
464                         dev->data->port_id, idx, desc);
465         }
466         DRV_LOG(DEBUG, "port %u configuring Rx queue %u for %u descriptors",
467                 dev->data->port_id, idx, desc);
468         if (idx >= priv->rxqs_n) {
469                 DRV_LOG(ERR, "port %u Rx queue index out of range (%u >= %u)",
470                         dev->data->port_id, idx, priv->rxqs_n);
471                 rte_errno = EOVERFLOW;
472                 return -rte_errno;
473         }
474         if (!mlx5_rxq_releasable(dev, idx)) {
475                 DRV_LOG(ERR, "port %u unable to release queue index %u",
476                         dev->data->port_id, idx);
477                 rte_errno = EBUSY;
478                 return -rte_errno;
479         }
480         mlx5_rxq_release(dev, idx);
481         return 0;
482 }
483
484 /**
485  *
486  * @param dev
487  *   Pointer to Ethernet device structure.
488  * @param idx
489  *   RX queue index.
490  * @param desc
491  *   Number of descriptors to configure in queue.
492  * @param socket
493  *   NUMA socket on which memory must be allocated.
494  * @param[in] conf
495  *   Thresholds parameters.
496  * @param mp
497  *   Memory pool for buffer allocations.
498  *
499  * @return
500  *   0 on success, a negative errno value otherwise and rte_errno is set.
501  */
502 int
503 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
504                     unsigned int socket, const struct rte_eth_rxconf *conf,
505                     struct rte_mempool *mp)
506 {
507         struct mlx5_priv *priv = dev->data->dev_private;
508         struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
509         struct mlx5_rxq_ctrl *rxq_ctrl =
510                 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
511         int res;
512
513         res = mlx5_rx_queue_pre_setup(dev, idx, desc);
514         if (res)
515                 return res;
516         rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, mp);
517         if (!rxq_ctrl) {
518                 DRV_LOG(ERR, "port %u unable to allocate queue index %u",
519                         dev->data->port_id, idx);
520                 rte_errno = ENOMEM;
521                 return -rte_errno;
522         }
523         DRV_LOG(DEBUG, "port %u adding Rx queue %u to list",
524                 dev->data->port_id, idx);
525         (*priv->rxqs)[idx] = &rxq_ctrl->rxq;
526         return 0;
527 }
528
529 /**
530  *
531  * @param dev
532  *   Pointer to Ethernet device structure.
533  * @param idx
534  *   RX queue index.
535  * @param desc
536  *   Number of descriptors to configure in queue.
537  * @param hairpin_conf
538  *   Hairpin configuration parameters.
539  *
540  * @return
541  *   0 on success, a negative errno value otherwise and rte_errno is set.
542  */
543 int
544 mlx5_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
545                             uint16_t desc,
546                             const struct rte_eth_hairpin_conf *hairpin_conf)
547 {
548         struct mlx5_priv *priv = dev->data->dev_private;
549         struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
550         struct mlx5_rxq_ctrl *rxq_ctrl =
551                 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
552         int res;
553
554         res = mlx5_rx_queue_pre_setup(dev, idx, desc);
555         if (res)
556                 return res;
557         if (hairpin_conf->peer_count != 1 ||
558             hairpin_conf->peers[0].port != dev->data->port_id ||
559             hairpin_conf->peers[0].queue >= priv->txqs_n) {
560                 DRV_LOG(ERR, "port %u unable to setup hairpin queue index %u "
561                         " invalid hairpind configuration", dev->data->port_id,
562                         idx);
563                 rte_errno = EINVAL;
564                 return -rte_errno;
565         }
566         rxq_ctrl = mlx5_rxq_hairpin_new(dev, idx, desc, hairpin_conf);
567         if (!rxq_ctrl) {
568                 DRV_LOG(ERR, "port %u unable to allocate queue index %u",
569                         dev->data->port_id, idx);
570                 rte_errno = ENOMEM;
571                 return -rte_errno;
572         }
573         DRV_LOG(DEBUG, "port %u adding Rx queue %u to list",
574                 dev->data->port_id, idx);
575         (*priv->rxqs)[idx] = &rxq_ctrl->rxq;
576         return 0;
577 }
578
579 /**
580  * DPDK callback to release a RX queue.
581  *
582  * @param dpdk_rxq
583  *   Generic RX queue pointer.
584  */
585 void
586 mlx5_rx_queue_release(void *dpdk_rxq)
587 {
588         struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
589         struct mlx5_rxq_ctrl *rxq_ctrl;
590         struct mlx5_priv *priv;
591
592         if (rxq == NULL)
593                 return;
594         rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
595         priv = rxq_ctrl->priv;
596         if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.idx))
597                 rte_panic("port %u Rx queue %u is still used by a flow and"
598                           " cannot be removed\n",
599                           PORT_ID(priv), rxq->idx);
600         mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.idx);
601 }
602
603 /**
604  * Get an Rx queue Verbs/DevX object.
605  *
606  * @param dev
607  *   Pointer to Ethernet device.
608  * @param idx
609  *   Queue index in DPDK Rx queue array
610  *
611  * @return
612  *   The Verbs/DevX object if it exists.
613  */
614 static struct mlx5_rxq_obj *
615 mlx5_rxq_obj_get(struct rte_eth_dev *dev, uint16_t idx)
616 {
617         struct mlx5_priv *priv = dev->data->dev_private;
618         struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
619         struct mlx5_rxq_ctrl *rxq_ctrl;
620
621         if (idx >= priv->rxqs_n)
622                 return NULL;
623         if (!rxq_data)
624                 return NULL;
625         rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
626         if (rxq_ctrl->obj)
627                 rte_atomic32_inc(&rxq_ctrl->obj->refcnt);
628         return rxq_ctrl->obj;
629 }
630
631 /**
632  * Release the resources allocated for an RQ DevX object.
633  *
634  * @param rxq_ctrl
635  *   DevX Rx queue object.
636  */
637 static void
638 rxq_release_rq_resources(struct mlx5_rxq_ctrl *rxq_ctrl)
639 {
640         if (rxq_ctrl->rxq.wqes) {
641                 rte_free((void *)(uintptr_t)rxq_ctrl->rxq.wqes);
642                 rxq_ctrl->rxq.wqes = NULL;
643         }
644         if (rxq_ctrl->wq_umem) {
645                 mlx5_glue->devx_umem_dereg(rxq_ctrl->wq_umem);
646                 rxq_ctrl->wq_umem = NULL;
647         }
648 }
649
650 /**
651  * Release an Rx hairpin related resources.
652  *
653  * @param rxq_obj
654  *   Hairpin Rx queue object.
655  */
656 static void
657 rxq_obj_hairpin_release(struct mlx5_rxq_obj *rxq_obj)
658 {
659         struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
660
661         assert(rxq_obj);
662         rq_attr.state = MLX5_RQC_STATE_RST;
663         rq_attr.rq_state = MLX5_RQC_STATE_RDY;
664         mlx5_devx_cmd_modify_rq(rxq_obj->rq, &rq_attr);
665         claim_zero(mlx5_devx_cmd_destroy(rxq_obj->rq));
666 }
667
668 /**
669  * Release an Rx verbs/DevX queue object.
670  *
671  * @param rxq_obj
672  *   Verbs/DevX Rx queue object.
673  *
674  * @return
675  *   1 while a reference on it exists, 0 when freed.
676  */
677 static int
678 mlx5_rxq_obj_release(struct mlx5_rxq_obj *rxq_obj)
679 {
680         assert(rxq_obj);
681         if (rte_atomic32_dec_and_test(&rxq_obj->refcnt)) {
682                 switch (rxq_obj->type) {
683                 case MLX5_RXQ_OBJ_TYPE_IBV:
684                         assert(rxq_obj->wq);
685                         assert(rxq_obj->cq);
686                         rxq_free_elts(rxq_obj->rxq_ctrl);
687                         claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
688                         claim_zero(mlx5_glue->destroy_cq(rxq_obj->cq));
689                         break;
690                 case MLX5_RXQ_OBJ_TYPE_DEVX_RQ:
691                         assert(rxq_obj->cq);
692                         assert(rxq_obj->rq);
693                         rxq_free_elts(rxq_obj->rxq_ctrl);
694                         claim_zero(mlx5_devx_cmd_destroy(rxq_obj->rq));
695                         rxq_release_rq_resources(rxq_obj->rxq_ctrl);
696                         claim_zero(mlx5_glue->destroy_cq(rxq_obj->cq));
697                         break;
698                 case MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN:
699                         assert(rxq_obj->rq);
700                         rxq_obj_hairpin_release(rxq_obj);
701                         break;
702                 }
703                 if (rxq_obj->channel)
704                         claim_zero(mlx5_glue->destroy_comp_channel
705                                    (rxq_obj->channel));
706                 LIST_REMOVE(rxq_obj, next);
707                 rte_free(rxq_obj);
708                 return 0;
709         }
710         return 1;
711 }
712
713 /**
714  * Allocate queue vector and fill epoll fd list for Rx interrupts.
715  *
716  * @param dev
717  *   Pointer to Ethernet device.
718  *
719  * @return
720  *   0 on success, a negative errno value otherwise and rte_errno is set.
721  */
722 int
723 mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev)
724 {
725         struct mlx5_priv *priv = dev->data->dev_private;
726         unsigned int i;
727         unsigned int rxqs_n = priv->rxqs_n;
728         unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
729         unsigned int count = 0;
730         struct rte_intr_handle *intr_handle = dev->intr_handle;
731
732         if (!dev->data->dev_conf.intr_conf.rxq)
733                 return 0;
734         mlx5_rx_intr_vec_disable(dev);
735         intr_handle->intr_vec = malloc(n * sizeof(intr_handle->intr_vec[0]));
736         if (intr_handle->intr_vec == NULL) {
737                 DRV_LOG(ERR,
738                         "port %u failed to allocate memory for interrupt"
739                         " vector, Rx interrupts will not be supported",
740                         dev->data->port_id);
741                 rte_errno = ENOMEM;
742                 return -rte_errno;
743         }
744         intr_handle->type = RTE_INTR_HANDLE_EXT;
745         for (i = 0; i != n; ++i) {
746                 /* This rxq obj must not be released in this function. */
747                 struct mlx5_rxq_obj *rxq_obj = mlx5_rxq_obj_get(dev, i);
748                 int fd;
749                 int flags;
750                 int rc;
751
752                 /* Skip queues that cannot request interrupts. */
753                 if (!rxq_obj || !rxq_obj->channel) {
754                         /* Use invalid intr_vec[] index to disable entry. */
755                         intr_handle->intr_vec[i] =
756                                 RTE_INTR_VEC_RXTX_OFFSET +
757                                 RTE_MAX_RXTX_INTR_VEC_ID;
758                         continue;
759                 }
760                 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
761                         DRV_LOG(ERR,
762                                 "port %u too many Rx queues for interrupt"
763                                 " vector size (%d), Rx interrupts cannot be"
764                                 " enabled",
765                                 dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID);
766                         mlx5_rx_intr_vec_disable(dev);
767                         rte_errno = ENOMEM;
768                         return -rte_errno;
769                 }
770                 fd = rxq_obj->channel->fd;
771                 flags = fcntl(fd, F_GETFL);
772                 rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
773                 if (rc < 0) {
774                         rte_errno = errno;
775                         DRV_LOG(ERR,
776                                 "port %u failed to make Rx interrupt file"
777                                 " descriptor %d non-blocking for queue index"
778                                 " %d",
779                                 dev->data->port_id, fd, i);
780                         mlx5_rx_intr_vec_disable(dev);
781                         return -rte_errno;
782                 }
783                 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count;
784                 intr_handle->efds[count] = fd;
785                 count++;
786         }
787         if (!count)
788                 mlx5_rx_intr_vec_disable(dev);
789         else
790                 intr_handle->nb_efd = count;
791         return 0;
792 }
793
794 /**
795  * Clean up Rx interrupts handler.
796  *
797  * @param dev
798  *   Pointer to Ethernet device.
799  */
800 void
801 mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev)
802 {
803         struct mlx5_priv *priv = dev->data->dev_private;
804         struct rte_intr_handle *intr_handle = dev->intr_handle;
805         unsigned int i;
806         unsigned int rxqs_n = priv->rxqs_n;
807         unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
808
809         if (!dev->data->dev_conf.intr_conf.rxq)
810                 return;
811         if (!intr_handle->intr_vec)
812                 goto free;
813         for (i = 0; i != n; ++i) {
814                 struct mlx5_rxq_ctrl *rxq_ctrl;
815                 struct mlx5_rxq_data *rxq_data;
816
817                 if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
818                     RTE_MAX_RXTX_INTR_VEC_ID)
819                         continue;
820                 /**
821                  * Need to access directly the queue to release the reference
822                  * kept in mlx5_rx_intr_vec_enable().
823                  */
824                 rxq_data = (*priv->rxqs)[i];
825                 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
826                 if (rxq_ctrl->obj)
827                         mlx5_rxq_obj_release(rxq_ctrl->obj);
828         }
829 free:
830         rte_intr_free_epoll_fd(intr_handle);
831         if (intr_handle->intr_vec)
832                 free(intr_handle->intr_vec);
833         intr_handle->nb_efd = 0;
834         intr_handle->intr_vec = NULL;
835 }
836
837 /**
838  *  MLX5 CQ notification .
839  *
840  *  @param rxq
841  *     Pointer to receive queue structure.
842  *  @param sq_n_rxq
843  *     Sequence number per receive queue .
844  */
845 static inline void
846 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
847 {
848         int sq_n = 0;
849         uint32_t doorbell_hi;
850         uint64_t doorbell;
851         void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL;
852
853         sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK;
854         doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK);
855         doorbell = (uint64_t)doorbell_hi << 32;
856         doorbell |=  rxq->cqn;
857         rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
858         mlx5_uar_write64(rte_cpu_to_be_64(doorbell),
859                          cq_db_reg, rxq->uar_lock_cq);
860 }
861
862 /**
863  * DPDK callback for Rx queue interrupt enable.
864  *
865  * @param dev
866  *   Pointer to Ethernet device structure.
867  * @param rx_queue_id
868  *   Rx queue number.
869  *
870  * @return
871  *   0 on success, a negative errno value otherwise and rte_errno is set.
872  */
873 int
874 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
875 {
876         struct mlx5_priv *priv = dev->data->dev_private;
877         struct mlx5_rxq_data *rxq_data;
878         struct mlx5_rxq_ctrl *rxq_ctrl;
879
880         rxq_data = (*priv->rxqs)[rx_queue_id];
881         if (!rxq_data) {
882                 rte_errno = EINVAL;
883                 return -rte_errno;
884         }
885         rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
886         if (rxq_ctrl->irq) {
887                 struct mlx5_rxq_obj *rxq_obj;
888
889                 rxq_obj = mlx5_rxq_obj_get(dev, rx_queue_id);
890                 if (!rxq_obj) {
891                         rte_errno = EINVAL;
892                         return -rte_errno;
893                 }
894                 mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
895                 mlx5_rxq_obj_release(rxq_obj);
896         }
897         return 0;
898 }
899
900 /**
901  * DPDK callback for Rx queue interrupt disable.
902  *
903  * @param dev
904  *   Pointer to Ethernet device structure.
905  * @param rx_queue_id
906  *   Rx queue number.
907  *
908  * @return
909  *   0 on success, a negative errno value otherwise and rte_errno is set.
910  */
911 int
912 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
913 {
914         struct mlx5_priv *priv = dev->data->dev_private;
915         struct mlx5_rxq_data *rxq_data;
916         struct mlx5_rxq_ctrl *rxq_ctrl;
917         struct mlx5_rxq_obj *rxq_obj = NULL;
918         struct ibv_cq *ev_cq;
919         void *ev_ctx;
920         int ret;
921
922         rxq_data = (*priv->rxqs)[rx_queue_id];
923         if (!rxq_data) {
924                 rte_errno = EINVAL;
925                 return -rte_errno;
926         }
927         rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
928         if (!rxq_ctrl->irq)
929                 return 0;
930         rxq_obj = mlx5_rxq_obj_get(dev, rx_queue_id);
931         if (!rxq_obj) {
932                 rte_errno = EINVAL;
933                 return -rte_errno;
934         }
935         ret = mlx5_glue->get_cq_event(rxq_obj->channel, &ev_cq, &ev_ctx);
936         if (ret || ev_cq != rxq_obj->cq) {
937                 rte_errno = EINVAL;
938                 goto exit;
939         }
940         rxq_data->cq_arm_sn++;
941         mlx5_glue->ack_cq_events(rxq_obj->cq, 1);
942         mlx5_rxq_obj_release(rxq_obj);
943         return 0;
944 exit:
945         ret = rte_errno; /* Save rte_errno before cleanup. */
946         if (rxq_obj)
947                 mlx5_rxq_obj_release(rxq_obj);
948         DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d",
949                 dev->data->port_id, rx_queue_id);
950         rte_errno = ret; /* Restore rte_errno. */
951         return -rte_errno;
952 }
953
954 /**
955  * Create a CQ Verbs object.
956  *
957  * @param dev
958  *   Pointer to Ethernet device.
959  * @param priv
960  *   Pointer to device private data.
961  * @param rxq_data
962  *   Pointer to Rx queue data.
963  * @param cqe_n
964  *   Number of CQEs in CQ.
965  * @param rxq_obj
966  *   Pointer to Rx queue object data.
967  *
968  * @return
969  *   The Verbs object initialised, NULL otherwise and rte_errno is set.
970  */
971 static struct ibv_cq *
972 mlx5_ibv_cq_new(struct rte_eth_dev *dev, struct mlx5_priv *priv,
973                 struct mlx5_rxq_data *rxq_data,
974                 unsigned int cqe_n, struct mlx5_rxq_obj *rxq_obj)
975 {
976         struct {
977                 struct ibv_cq_init_attr_ex ibv;
978                 struct mlx5dv_cq_init_attr mlx5;
979         } cq_attr;
980
981         cq_attr.ibv = (struct ibv_cq_init_attr_ex){
982                 .cqe = cqe_n,
983                 .channel = rxq_obj->channel,
984                 .comp_mask = 0,
985         };
986         cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){
987                 .comp_mask = 0,
988         };
989         if (priv->config.cqe_comp && !rxq_data->hw_timestamp &&
990             !rxq_data->lro) {
991                 cq_attr.mlx5.comp_mask |=
992                                 MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
993 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
994                 cq_attr.mlx5.cqe_comp_res_format =
995                                 mlx5_rxq_mprq_enabled(rxq_data) ?
996                                 MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX :
997                                 MLX5DV_CQE_RES_FORMAT_HASH;
998 #else
999                 cq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
1000 #endif
1001                 /*
1002                  * For vectorized Rx, it must not be doubled in order to
1003                  * make cq_ci and rq_ci aligned.
1004                  */
1005                 if (mlx5_rxq_check_vec_support(rxq_data) < 0)
1006                         cq_attr.ibv.cqe *= 2;
1007         } else if (priv->config.cqe_comp && rxq_data->hw_timestamp) {
1008                 DRV_LOG(DEBUG,
1009                         "port %u Rx CQE compression is disabled for HW"
1010                         " timestamp",
1011                         dev->data->port_id);
1012         } else if (priv->config.cqe_comp && rxq_data->lro) {
1013                 DRV_LOG(DEBUG,
1014                         "port %u Rx CQE compression is disabled for LRO",
1015                         dev->data->port_id);
1016         }
1017 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
1018         if (priv->config.cqe_pad) {
1019                 cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;
1020                 cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
1021         }
1022 #endif
1023         return mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(priv->sh->ctx,
1024                                                               &cq_attr.ibv,
1025                                                               &cq_attr.mlx5));
1026 }
1027
1028 /**
1029  * Create a WQ Verbs object.
1030  *
1031  * @param dev
1032  *   Pointer to Ethernet device.
1033  * @param priv
1034  *   Pointer to device private data.
1035  * @param rxq_data
1036  *   Pointer to Rx queue data.
1037  * @param idx
1038  *   Queue index in DPDK Rx queue array
1039  * @param wqe_n
1040  *   Number of WQEs in WQ.
1041  * @param rxq_obj
1042  *   Pointer to Rx queue object data.
1043  *
1044  * @return
1045  *   The Verbs object initialised, NULL otherwise and rte_errno is set.
1046  */
1047 static struct ibv_wq *
1048 mlx5_ibv_wq_new(struct rte_eth_dev *dev, struct mlx5_priv *priv,
1049                 struct mlx5_rxq_data *rxq_data, uint16_t idx,
1050                 unsigned int wqe_n, struct mlx5_rxq_obj *rxq_obj)
1051 {
1052         struct {
1053                 struct ibv_wq_init_attr ibv;
1054 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
1055                 struct mlx5dv_wq_init_attr mlx5;
1056 #endif
1057         } wq_attr;
1058
1059         wq_attr.ibv = (struct ibv_wq_init_attr){
1060                 .wq_context = NULL, /* Could be useful in the future. */
1061                 .wq_type = IBV_WQT_RQ,
1062                 /* Max number of outstanding WRs. */
1063                 .max_wr = wqe_n >> rxq_data->sges_n,
1064                 /* Max number of scatter/gather elements in a WR. */
1065                 .max_sge = 1 << rxq_data->sges_n,
1066                 .pd = priv->sh->pd,
1067                 .cq = rxq_obj->cq,
1068                 .comp_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING | 0,
1069                 .create_flags = (rxq_data->vlan_strip ?
1070                                  IBV_WQ_FLAGS_CVLAN_STRIPPING : 0),
1071         };
1072         /* By default, FCS (CRC) is stripped by hardware. */
1073         if (rxq_data->crc_present) {
1074                 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
1075                 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1076         }
1077         if (priv->config.hw_padding) {
1078 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)
1079                 wq_attr.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
1080                 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1081 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)
1082                 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING;
1083                 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1084 #endif
1085         }
1086 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
1087         wq_attr.mlx5 = (struct mlx5dv_wq_init_attr){
1088                 .comp_mask = 0,
1089         };
1090         if (mlx5_rxq_mprq_enabled(rxq_data)) {
1091                 struct mlx5dv_striding_rq_init_attr *mprq_attr =
1092                                                 &wq_attr.mlx5.striding_rq_attrs;
1093
1094                 wq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ;
1095                 *mprq_attr = (struct mlx5dv_striding_rq_init_attr){
1096                         .single_stride_log_num_of_bytes = rxq_data->strd_sz_n,
1097                         .single_wqe_log_num_of_strides = rxq_data->strd_num_n,
1098                         .two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT,
1099                 };
1100         }
1101         rxq_obj->wq = mlx5_glue->dv_create_wq(priv->sh->ctx, &wq_attr.ibv,
1102                                               &wq_attr.mlx5);
1103 #else
1104         rxq_obj->wq = mlx5_glue->create_wq(priv->sh->ctx, &wq_attr.ibv);
1105 #endif
1106         if (rxq_obj->wq) {
1107                 /*
1108                  * Make sure number of WRs*SGEs match expectations since a queue
1109                  * cannot allocate more than "desc" buffers.
1110                  */
1111                 if (wq_attr.ibv.max_wr != (wqe_n >> rxq_data->sges_n) ||
1112                     wq_attr.ibv.max_sge != (1u << rxq_data->sges_n)) {
1113                         DRV_LOG(ERR,
1114                                 "port %u Rx queue %u requested %u*%u but got"
1115                                 " %u*%u WRs*SGEs",
1116                                 dev->data->port_id, idx,
1117                                 wqe_n >> rxq_data->sges_n,
1118                                 (1 << rxq_data->sges_n),
1119                                 wq_attr.ibv.max_wr, wq_attr.ibv.max_sge);
1120                         claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
1121                         rxq_obj->wq = NULL;
1122                         rte_errno = EINVAL;
1123                 }
1124         }
1125         return rxq_obj->wq;
1126 }
1127
1128 /**
1129  * Fill common fields of create RQ attributes structure.
1130  *
1131  * @param rxq_data
1132  *   Pointer to Rx queue data.
1133  * @param cqn
1134  *   CQ number to use with this RQ.
1135  * @param rq_attr
1136  *   RQ attributes structure to fill..
1137  */
1138 static void
1139 mlx5_devx_create_rq_attr_fill(struct mlx5_rxq_data *rxq_data, uint32_t cqn,
1140                               struct mlx5_devx_create_rq_attr *rq_attr)
1141 {
1142         rq_attr->state = MLX5_RQC_STATE_RST;
1143         rq_attr->vsd = (rxq_data->vlan_strip) ? 0 : 1;
1144         rq_attr->cqn = cqn;
1145         rq_attr->scatter_fcs = (rxq_data->crc_present) ? 1 : 0;
1146 }
1147
1148 /**
1149  * Fill common fields of DevX WQ attributes structure.
1150  *
1151  * @param priv
1152  *   Pointer to device private data.
1153  * @param rxq_ctrl
1154  *   Pointer to Rx queue control structure.
1155  * @param wq_attr
1156  *   WQ attributes structure to fill..
1157  */
1158 static void
1159 mlx5_devx_wq_attr_fill(struct mlx5_priv *priv, struct mlx5_rxq_ctrl *rxq_ctrl,
1160                        struct mlx5_devx_wq_attr *wq_attr)
1161 {
1162         wq_attr->end_padding_mode = priv->config.cqe_pad ?
1163                                         MLX5_WQ_END_PAD_MODE_ALIGN :
1164                                         MLX5_WQ_END_PAD_MODE_NONE;
1165         wq_attr->pd = priv->sh->pdn;
1166         wq_attr->dbr_addr = rxq_ctrl->dbr_offset;
1167         wq_attr->dbr_umem_id = rxq_ctrl->dbr_umem_id;
1168         wq_attr->dbr_umem_valid = 1;
1169         wq_attr->wq_umem_id = rxq_ctrl->wq_umem->umem_id;
1170         wq_attr->wq_umem_valid = 1;
1171 }
1172
1173 /**
1174  * Create a RQ object using DevX.
1175  *
1176  * @param dev
1177  *   Pointer to Ethernet device.
1178  * @param idx
1179  *   Queue index in DPDK Rx queue array
1180  * @param cqn
1181  *   CQ number to use with this RQ.
1182  *
1183  * @return
1184  *   The DevX object initialised, NULL otherwise and rte_errno is set.
1185  */
1186 static struct mlx5_devx_obj *
1187 mlx5_devx_rq_new(struct rte_eth_dev *dev, uint16_t idx, uint32_t cqn)
1188 {
1189         struct mlx5_priv *priv = dev->data->dev_private;
1190         struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1191         struct mlx5_rxq_ctrl *rxq_ctrl =
1192                 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1193         struct mlx5_devx_create_rq_attr rq_attr;
1194         uint32_t wqe_n = 1 << (rxq_data->elts_n - rxq_data->sges_n);
1195         uint32_t wq_size = 0;
1196         uint32_t wqe_size = 0;
1197         uint32_t log_wqe_size = 0;
1198         void *buf = NULL;
1199         struct mlx5_devx_obj *rq;
1200
1201         memset(&rq_attr, 0, sizeof(rq_attr));
1202         /* Fill RQ attributes. */
1203         rq_attr.mem_rq_type = MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE;
1204         rq_attr.flush_in_error_en = 1;
1205         mlx5_devx_create_rq_attr_fill(rxq_data, cqn, &rq_attr);
1206         /* Fill WQ attributes for this RQ. */
1207         if (mlx5_rxq_mprq_enabled(rxq_data)) {
1208                 rq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ;
1209                 /*
1210                  * Number of strides in each WQE:
1211                  * 512*2^single_wqe_log_num_of_strides.
1212                  */
1213                 rq_attr.wq_attr.single_wqe_log_num_of_strides =
1214                                 rxq_data->strd_num_n -
1215                                 MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
1216                 /* Stride size = (2^single_stride_log_num_of_bytes)*64B. */
1217                 rq_attr.wq_attr.single_stride_log_num_of_bytes =
1218                                 rxq_data->strd_sz_n -
1219                                 MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES;
1220                 wqe_size = sizeof(struct mlx5_wqe_mprq);
1221         } else {
1222                 rq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC;
1223                 wqe_size = sizeof(struct mlx5_wqe_data_seg);
1224         }
1225         log_wqe_size = log2above(wqe_size) + rxq_data->sges_n;
1226         rq_attr.wq_attr.log_wq_stride = log_wqe_size;
1227         rq_attr.wq_attr.log_wq_sz = rxq_data->elts_n - rxq_data->sges_n;
1228         /* Calculate and allocate WQ memory space. */
1229         wqe_size = 1 << log_wqe_size; /* round up power of two.*/
1230         wq_size = wqe_n * wqe_size;
1231         buf = rte_calloc_socket(__func__, 1, wq_size, MLX5_WQE_BUF_ALIGNMENT,
1232                                 rxq_ctrl->socket);
1233         if (!buf)
1234                 return NULL;
1235         rxq_data->wqes = buf;
1236         rxq_ctrl->wq_umem = mlx5_glue->devx_umem_reg(priv->sh->ctx,
1237                                                      buf, wq_size, 0);
1238         if (!rxq_ctrl->wq_umem) {
1239                 rte_free(buf);
1240                 return NULL;
1241         }
1242         mlx5_devx_wq_attr_fill(priv, rxq_ctrl, &rq_attr.wq_attr);
1243         rq = mlx5_devx_cmd_create_rq(priv->sh->ctx, &rq_attr, rxq_ctrl->socket);
1244         if (!rq)
1245                 rxq_release_rq_resources(rxq_ctrl);
1246         return rq;
1247 }
1248
1249 /**
1250  * Create the Rx hairpin queue object.
1251  *
1252  * @param dev
1253  *   Pointer to Ethernet device.
1254  * @param idx
1255  *   Queue index in DPDK Rx queue array
1256  *
1257  * @return
1258  *   The hairpin DevX object initialised, NULL otherwise and rte_errno is set.
1259  */
1260 static struct mlx5_rxq_obj *
1261 mlx5_rxq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx)
1262 {
1263         struct mlx5_priv *priv = dev->data->dev_private;
1264         struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1265         struct mlx5_rxq_ctrl *rxq_ctrl =
1266                 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1267         struct mlx5_devx_create_rq_attr attr = { 0 };
1268         struct mlx5_rxq_obj *tmpl = NULL;
1269         int ret = 0;
1270
1271         assert(rxq_data);
1272         assert(!rxq_ctrl->obj);
1273         tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
1274                                  rxq_ctrl->socket);
1275         if (!tmpl) {
1276                 DRV_LOG(ERR,
1277                         "port %u Rx queue %u cannot allocate verbs resources",
1278                         dev->data->port_id, rxq_data->idx);
1279                 rte_errno = ENOMEM;
1280                 goto error;
1281         }
1282         tmpl->type = MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN;
1283         tmpl->rxq_ctrl = rxq_ctrl;
1284         attr.hairpin = 1;
1285         /* Workaround for hairpin startup */
1286         attr.wq_attr.log_hairpin_num_packets = log2above(32);
1287         /* Workaround for packets larger than 1KB */
1288         attr.wq_attr.log_hairpin_data_sz =
1289                         priv->config.hca_attr.log_max_hairpin_wq_data_sz;
1290         tmpl->rq = mlx5_devx_cmd_create_rq(priv->sh->ctx, &attr,
1291                                            rxq_ctrl->socket);
1292         if (!tmpl->rq) {
1293                 DRV_LOG(ERR,
1294                         "port %u Rx hairpin queue %u can't create rq object",
1295                         dev->data->port_id, idx);
1296                 rte_errno = errno;
1297                 goto error;
1298         }
1299         DRV_LOG(DEBUG, "port %u rxq %u updated with %p", dev->data->port_id,
1300                 idx, (void *)&tmpl);
1301         rte_atomic32_inc(&tmpl->refcnt);
1302         LIST_INSERT_HEAD(&priv->rxqsobj, tmpl, next);
1303         priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
1304         return tmpl;
1305 error:
1306         ret = rte_errno; /* Save rte_errno before cleanup. */
1307         if (tmpl->rq)
1308                 mlx5_devx_cmd_destroy(tmpl->rq);
1309         rte_errno = ret; /* Restore rte_errno. */
1310         return NULL;
1311 }
1312
1313 /**
1314  * Create the Rx queue Verbs/DevX object.
1315  *
1316  * @param dev
1317  *   Pointer to Ethernet device.
1318  * @param idx
1319  *   Queue index in DPDK Rx queue array
1320  * @param type
1321  *   Type of Rx queue object to create.
1322  *
1323  * @return
1324  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
1325  */
1326 struct mlx5_rxq_obj *
1327 mlx5_rxq_obj_new(struct rte_eth_dev *dev, uint16_t idx,
1328                  enum mlx5_rxq_obj_type type)
1329 {
1330         struct mlx5_priv *priv = dev->data->dev_private;
1331         struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1332         struct mlx5_rxq_ctrl *rxq_ctrl =
1333                 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1334         struct ibv_wq_attr mod;
1335         unsigned int cqe_n;
1336         unsigned int wqe_n = 1 << rxq_data->elts_n;
1337         struct mlx5_rxq_obj *tmpl = NULL;
1338         struct mlx5dv_cq cq_info;
1339         struct mlx5dv_rwq rwq;
1340         int ret = 0;
1341         struct mlx5dv_obj obj;
1342
1343         assert(rxq_data);
1344         assert(!rxq_ctrl->obj);
1345         if (type == MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN)
1346                 return mlx5_rxq_obj_hairpin_new(dev, idx);
1347         priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE;
1348         priv->verbs_alloc_ctx.obj = rxq_ctrl;
1349         tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
1350                                  rxq_ctrl->socket);
1351         if (!tmpl) {
1352                 DRV_LOG(ERR,
1353                         "port %u Rx queue %u cannot allocate verbs resources",
1354                         dev->data->port_id, rxq_data->idx);
1355                 rte_errno = ENOMEM;
1356                 goto error;
1357         }
1358         tmpl->type = type;
1359         tmpl->rxq_ctrl = rxq_ctrl;
1360         if (rxq_ctrl->irq) {
1361                 tmpl->channel = mlx5_glue->create_comp_channel(priv->sh->ctx);
1362                 if (!tmpl->channel) {
1363                         DRV_LOG(ERR, "port %u: comp channel creation failure",
1364                                 dev->data->port_id);
1365                         rte_errno = ENOMEM;
1366                         goto error;
1367                 }
1368         }
1369         if (mlx5_rxq_mprq_enabled(rxq_data))
1370                 cqe_n = wqe_n * (1 << rxq_data->strd_num_n) - 1;
1371         else
1372                 cqe_n = wqe_n  - 1;
1373         tmpl->cq = mlx5_ibv_cq_new(dev, priv, rxq_data, cqe_n, tmpl);
1374         if (!tmpl->cq) {
1375                 DRV_LOG(ERR, "port %u Rx queue %u CQ creation failure",
1376                         dev->data->port_id, idx);
1377                 rte_errno = ENOMEM;
1378                 goto error;
1379         }
1380         obj.cq.in = tmpl->cq;
1381         obj.cq.out = &cq_info;
1382         ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ);
1383         if (ret) {
1384                 rte_errno = ret;
1385                 goto error;
1386         }
1387         if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
1388                 DRV_LOG(ERR,
1389                         "port %u wrong MLX5_CQE_SIZE environment variable"
1390                         " value: it should be set to %u",
1391                         dev->data->port_id, RTE_CACHE_LINE_SIZE);
1392                 rte_errno = EINVAL;
1393                 goto error;
1394         }
1395         DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d",
1396                 dev->data->port_id, priv->sh->device_attr.orig_attr.max_qp_wr);
1397         DRV_LOG(DEBUG, "port %u device_attr.max_sge is %d",
1398                 dev->data->port_id, priv->sh->device_attr.orig_attr.max_sge);
1399         /* Allocate door-bell for types created with DevX. */
1400         if (tmpl->type != MLX5_RXQ_OBJ_TYPE_IBV) {
1401                 struct mlx5_devx_dbr_page *dbr_page;
1402                 int64_t dbr_offset;
1403
1404                 dbr_offset = mlx5_get_dbr(dev, &dbr_page);
1405                 if (dbr_offset < 0)
1406                         goto error;
1407                 rxq_ctrl->dbr_offset = dbr_offset;
1408                 rxq_ctrl->dbr_umem_id = dbr_page->umem->umem_id;
1409                 rxq_ctrl->dbr_umem_id_valid = 1;
1410                 rxq_data->rq_db = (uint32_t *)((uintptr_t)dbr_page->dbrs +
1411                                                (uintptr_t)rxq_ctrl->dbr_offset);
1412         }
1413         if (tmpl->type == MLX5_RXQ_OBJ_TYPE_IBV) {
1414                 tmpl->wq = mlx5_ibv_wq_new(dev, priv, rxq_data, idx, wqe_n,
1415                                            tmpl);
1416                 if (!tmpl->wq) {
1417                         DRV_LOG(ERR, "port %u Rx queue %u WQ creation failure",
1418                                 dev->data->port_id, idx);
1419                         rte_errno = ENOMEM;
1420                         goto error;
1421                 }
1422                 /* Change queue state to ready. */
1423                 mod = (struct ibv_wq_attr){
1424                         .attr_mask = IBV_WQ_ATTR_STATE,
1425                         .wq_state = IBV_WQS_RDY,
1426                 };
1427                 ret = mlx5_glue->modify_wq(tmpl->wq, &mod);
1428                 if (ret) {
1429                         DRV_LOG(ERR,
1430                                 "port %u Rx queue %u WQ state to IBV_WQS_RDY"
1431                                 " failed", dev->data->port_id, idx);
1432                         rte_errno = ret;
1433                         goto error;
1434                 }
1435                 obj.rwq.in = tmpl->wq;
1436                 obj.rwq.out = &rwq;
1437                 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_RWQ);
1438                 if (ret) {
1439                         rte_errno = ret;
1440                         goto error;
1441                 }
1442                 rxq_data->wqes = rwq.buf;
1443                 rxq_data->rq_db = rwq.dbrec;
1444         } else if (tmpl->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ) {
1445                 struct mlx5_devx_modify_rq_attr rq_attr;
1446
1447                 memset(&rq_attr, 0, sizeof(rq_attr));
1448                 tmpl->rq = mlx5_devx_rq_new(dev, idx, cq_info.cqn);
1449                 if (!tmpl->rq) {
1450                         DRV_LOG(ERR, "port %u Rx queue %u RQ creation failure",
1451                                 dev->data->port_id, idx);
1452                         rte_errno = ENOMEM;
1453                         goto error;
1454                 }
1455                 /* Change queue state to ready. */
1456                 rq_attr.rq_state = MLX5_RQC_STATE_RST;
1457                 rq_attr.state = MLX5_RQC_STATE_RDY;
1458                 ret = mlx5_devx_cmd_modify_rq(tmpl->rq, &rq_attr);
1459                 if (ret)
1460                         goto error;
1461         }
1462         /* Fill the rings. */
1463         rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
1464         rxq_data->cq_db = cq_info.dbrec;
1465         rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
1466         rxq_data->cq_uar = cq_info.cq_uar;
1467         rxq_data->cqn = cq_info.cqn;
1468         rxq_data->cq_arm_sn = 0;
1469         mlx5_rxq_initialize(rxq_data);
1470         rxq_data->cq_ci = 0;
1471         DRV_LOG(DEBUG, "port %u rxq %u updated with %p", dev->data->port_id,
1472                 idx, (void *)&tmpl);
1473         rte_atomic32_inc(&tmpl->refcnt);
1474         LIST_INSERT_HEAD(&priv->rxqsobj, tmpl, next);
1475         priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
1476         return tmpl;
1477 error:
1478         if (tmpl) {
1479                 ret = rte_errno; /* Save rte_errno before cleanup. */
1480                 if (tmpl->type == MLX5_RXQ_OBJ_TYPE_IBV && tmpl->wq)
1481                         claim_zero(mlx5_glue->destroy_wq(tmpl->wq));
1482                 else if (tmpl->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ && tmpl->rq)
1483                         claim_zero(mlx5_devx_cmd_destroy(tmpl->rq));
1484                 if (tmpl->cq)
1485                         claim_zero(mlx5_glue->destroy_cq(tmpl->cq));
1486                 if (tmpl->channel)
1487                         claim_zero(mlx5_glue->destroy_comp_channel
1488                                                         (tmpl->channel));
1489                 rte_free(tmpl);
1490                 rte_errno = ret; /* Restore rte_errno. */
1491         }
1492         if (type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ)
1493                 rxq_release_rq_resources(rxq_ctrl);
1494         priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
1495         return NULL;
1496 }
1497
1498 /**
1499  * Verify the Rx queue objects list is empty
1500  *
1501  * @param dev
1502  *   Pointer to Ethernet device.
1503  *
1504  * @return
1505  *   The number of objects not released.
1506  */
1507 int
1508 mlx5_rxq_obj_verify(struct rte_eth_dev *dev)
1509 {
1510         struct mlx5_priv *priv = dev->data->dev_private;
1511         int ret = 0;
1512         struct mlx5_rxq_obj *rxq_obj;
1513
1514         LIST_FOREACH(rxq_obj, &priv->rxqsobj, next) {
1515                 DRV_LOG(DEBUG, "port %u Rx queue %u still referenced",
1516                         dev->data->port_id, rxq_obj->rxq_ctrl->rxq.idx);
1517                 ++ret;
1518         }
1519         return ret;
1520 }
1521
1522 /**
1523  * Callback function to initialize mbufs for Multi-Packet RQ.
1524  */
1525 static inline void
1526 mlx5_mprq_buf_init(struct rte_mempool *mp, void *opaque_arg,
1527                     void *_m, unsigned int i __rte_unused)
1528 {
1529         struct mlx5_mprq_buf *buf = _m;
1530         struct rte_mbuf_ext_shared_info *shinfo;
1531         unsigned int strd_n = (unsigned int)(uintptr_t)opaque_arg;
1532         unsigned int j;
1533
1534         memset(_m, 0, sizeof(*buf));
1535         buf->mp = mp;
1536         rte_atomic16_set(&buf->refcnt, 1);
1537         for (j = 0; j != strd_n; ++j) {
1538                 shinfo = &buf->shinfos[j];
1539                 shinfo->free_cb = mlx5_mprq_buf_free_cb;
1540                 shinfo->fcb_opaque = buf;
1541         }
1542 }
1543
1544 /**
1545  * Free mempool of Multi-Packet RQ.
1546  *
1547  * @param dev
1548  *   Pointer to Ethernet device.
1549  *
1550  * @return
1551  *   0 on success, negative errno value on failure.
1552  */
1553 int
1554 mlx5_mprq_free_mp(struct rte_eth_dev *dev)
1555 {
1556         struct mlx5_priv *priv = dev->data->dev_private;
1557         struct rte_mempool *mp = priv->mprq_mp;
1558         unsigned int i;
1559
1560         if (mp == NULL)
1561                 return 0;
1562         DRV_LOG(DEBUG, "port %u freeing mempool (%s) for Multi-Packet RQ",
1563                 dev->data->port_id, mp->name);
1564         /*
1565          * If a buffer in the pool has been externally attached to a mbuf and it
1566          * is still in use by application, destroying the Rx queue can spoil
1567          * the packet. It is unlikely to happen but if application dynamically
1568          * creates and destroys with holding Rx packets, this can happen.
1569          *
1570          * TODO: It is unavoidable for now because the mempool for Multi-Packet
1571          * RQ isn't provided by application but managed by PMD.
1572          */
1573         if (!rte_mempool_full(mp)) {
1574                 DRV_LOG(ERR,
1575                         "port %u mempool for Multi-Packet RQ is still in use",
1576                         dev->data->port_id);
1577                 rte_errno = EBUSY;
1578                 return -rte_errno;
1579         }
1580         rte_mempool_free(mp);
1581         /* Unset mempool for each Rx queue. */
1582         for (i = 0; i != priv->rxqs_n; ++i) {
1583                 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
1584
1585                 if (rxq == NULL)
1586                         continue;
1587                 rxq->mprq_mp = NULL;
1588         }
1589         priv->mprq_mp = NULL;
1590         return 0;
1591 }
1592
1593 /**
1594  * Allocate a mempool for Multi-Packet RQ. All configured Rx queues share the
1595  * mempool. If already allocated, reuse it if there're enough elements.
1596  * Otherwise, resize it.
1597  *
1598  * @param dev
1599  *   Pointer to Ethernet device.
1600  *
1601  * @return
1602  *   0 on success, negative errno value on failure.
1603  */
1604 int
1605 mlx5_mprq_alloc_mp(struct rte_eth_dev *dev)
1606 {
1607         struct mlx5_priv *priv = dev->data->dev_private;
1608         struct rte_mempool *mp = priv->mprq_mp;
1609         char name[RTE_MEMPOOL_NAMESIZE];
1610         unsigned int desc = 0;
1611         unsigned int buf_len;
1612         unsigned int obj_num;
1613         unsigned int obj_size;
1614         unsigned int strd_num_n = 0;
1615         unsigned int strd_sz_n = 0;
1616         unsigned int i;
1617         unsigned int n_ibv = 0;
1618
1619         if (!mlx5_mprq_enabled(dev))
1620                 return 0;
1621         /* Count the total number of descriptors configured. */
1622         for (i = 0; i != priv->rxqs_n; ++i) {
1623                 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
1624                 struct mlx5_rxq_ctrl *rxq_ctrl = container_of
1625                         (rxq, struct mlx5_rxq_ctrl, rxq);
1626
1627                 if (rxq == NULL || rxq_ctrl->type != MLX5_RXQ_TYPE_STANDARD)
1628                         continue;
1629                 n_ibv++;
1630                 desc += 1 << rxq->elts_n;
1631                 /* Get the max number of strides. */
1632                 if (strd_num_n < rxq->strd_num_n)
1633                         strd_num_n = rxq->strd_num_n;
1634                 /* Get the max size of a stride. */
1635                 if (strd_sz_n < rxq->strd_sz_n)
1636                         strd_sz_n = rxq->strd_sz_n;
1637         }
1638         assert(strd_num_n && strd_sz_n);
1639         buf_len = (1 << strd_num_n) * (1 << strd_sz_n);
1640         obj_size = sizeof(struct mlx5_mprq_buf) + buf_len + (1 << strd_num_n) *
1641                 sizeof(struct rte_mbuf_ext_shared_info) + RTE_PKTMBUF_HEADROOM;
1642         /*
1643          * Received packets can be either memcpy'd or externally referenced. In
1644          * case that the packet is attached to an mbuf as an external buffer, as
1645          * it isn't possible to predict how the buffers will be queued by
1646          * application, there's no option to exactly pre-allocate needed buffers
1647          * in advance but to speculatively prepares enough buffers.
1648          *
1649          * In the data path, if this Mempool is depleted, PMD will try to memcpy
1650          * received packets to buffers provided by application (rxq->mp) until
1651          * this Mempool gets available again.
1652          */
1653         desc *= 4;
1654         obj_num = desc + MLX5_MPRQ_MP_CACHE_SZ * n_ibv;
1655         /*
1656          * rte_mempool_create_empty() has sanity check to refuse large cache
1657          * size compared to the number of elements.
1658          * CACHE_FLUSHTHRESH_MULTIPLIER is defined in a C file, so using a
1659          * constant number 2 instead.
1660          */
1661         obj_num = RTE_MAX(obj_num, MLX5_MPRQ_MP_CACHE_SZ * 2);
1662         /* Check a mempool is already allocated and if it can be resued. */
1663         if (mp != NULL && mp->elt_size >= obj_size && mp->size >= obj_num) {
1664                 DRV_LOG(DEBUG, "port %u mempool %s is being reused",
1665                         dev->data->port_id, mp->name);
1666                 /* Reuse. */
1667                 goto exit;
1668         } else if (mp != NULL) {
1669                 DRV_LOG(DEBUG, "port %u mempool %s should be resized, freeing it",
1670                         dev->data->port_id, mp->name);
1671                 /*
1672                  * If failed to free, which means it may be still in use, no way
1673                  * but to keep using the existing one. On buffer underrun,
1674                  * packets will be memcpy'd instead of external buffer
1675                  * attachment.
1676                  */
1677                 if (mlx5_mprq_free_mp(dev)) {
1678                         if (mp->elt_size >= obj_size)
1679                                 goto exit;
1680                         else
1681                                 return -rte_errno;
1682                 }
1683         }
1684         snprintf(name, sizeof(name), "port-%u-mprq", dev->data->port_id);
1685         mp = rte_mempool_create(name, obj_num, obj_size, MLX5_MPRQ_MP_CACHE_SZ,
1686                                 0, NULL, NULL, mlx5_mprq_buf_init,
1687                                 (void *)(uintptr_t)(1 << strd_num_n),
1688                                 dev->device->numa_node, 0);
1689         if (mp == NULL) {
1690                 DRV_LOG(ERR,
1691                         "port %u failed to allocate a mempool for"
1692                         " Multi-Packet RQ, count=%u, size=%u",
1693                         dev->data->port_id, obj_num, obj_size);
1694                 rte_errno = ENOMEM;
1695                 return -rte_errno;
1696         }
1697         priv->mprq_mp = mp;
1698 exit:
1699         /* Set mempool for each Rx queue. */
1700         for (i = 0; i != priv->rxqs_n; ++i) {
1701                 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
1702                 struct mlx5_rxq_ctrl *rxq_ctrl = container_of
1703                         (rxq, struct mlx5_rxq_ctrl, rxq);
1704
1705                 if (rxq == NULL || rxq_ctrl->type != MLX5_RXQ_TYPE_STANDARD)
1706                         continue;
1707                 rxq->mprq_mp = mp;
1708         }
1709         DRV_LOG(INFO, "port %u Multi-Packet RQ is configured",
1710                 dev->data->port_id);
1711         return 0;
1712 }
1713
1714 #define MLX5_MAX_TCP_HDR_OFFSET ((unsigned int)(sizeof(struct rte_ether_hdr) + \
1715                                         sizeof(struct rte_vlan_hdr) * 2 + \
1716                                         sizeof(struct rte_ipv6_hdr)))
1717 #define MAX_TCP_OPTION_SIZE 40u
1718 #define MLX5_MAX_LRO_HEADER_FIX ((unsigned int)(MLX5_MAX_TCP_HDR_OFFSET + \
1719                                  sizeof(struct rte_tcp_hdr) + \
1720                                  MAX_TCP_OPTION_SIZE))
1721
1722 /**
1723  * Adjust the maximum LRO massage size.
1724  *
1725  * @param dev
1726  *   Pointer to Ethernet device.
1727  * @param idx
1728  *   RX queue index.
1729  * @param max_lro_size
1730  *   The maximum size for LRO packet.
1731  */
1732 static void
1733 mlx5_max_lro_msg_size_adjust(struct rte_eth_dev *dev, uint16_t idx,
1734                              uint32_t max_lro_size)
1735 {
1736         struct mlx5_priv *priv = dev->data->dev_private;
1737
1738         if (priv->config.hca_attr.lro_max_msg_sz_mode ==
1739             MLX5_LRO_MAX_MSG_SIZE_START_FROM_L4 && max_lro_size >
1740             MLX5_MAX_TCP_HDR_OFFSET)
1741                 max_lro_size -= MLX5_MAX_TCP_HDR_OFFSET;
1742         max_lro_size = RTE_MIN(max_lro_size, MLX5_MAX_LRO_SIZE);
1743         assert(max_lro_size >= MLX5_LRO_SEG_CHUNK_SIZE);
1744         max_lro_size /= MLX5_LRO_SEG_CHUNK_SIZE;
1745         if (priv->max_lro_msg_size)
1746                 priv->max_lro_msg_size =
1747                         RTE_MIN((uint32_t)priv->max_lro_msg_size, max_lro_size);
1748         else
1749                 priv->max_lro_msg_size = max_lro_size;
1750         DRV_LOG(DEBUG,
1751                 "port %u Rx Queue %u max LRO message size adjusted to %u bytes",
1752                 dev->data->port_id, idx,
1753                 priv->max_lro_msg_size * MLX5_LRO_SEG_CHUNK_SIZE);
1754 }
1755
1756 /**
1757  * Create a DPDK Rx queue.
1758  *
1759  * @param dev
1760  *   Pointer to Ethernet device.
1761  * @param idx
1762  *   RX queue index.
1763  * @param desc
1764  *   Number of descriptors to configure in queue.
1765  * @param socket
1766  *   NUMA socket on which memory must be allocated.
1767  *
1768  * @return
1769  *   A DPDK queue object on success, NULL otherwise and rte_errno is set.
1770  */
1771 struct mlx5_rxq_ctrl *
1772 mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
1773              unsigned int socket, const struct rte_eth_rxconf *conf,
1774              struct rte_mempool *mp)
1775 {
1776         struct mlx5_priv *priv = dev->data->dev_private;
1777         struct mlx5_rxq_ctrl *tmpl;
1778         unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
1779         unsigned int mprq_stride_size;
1780         struct mlx5_dev_config *config = &priv->config;
1781         unsigned int strd_headroom_en;
1782         /*
1783          * Always allocate extra slots, even if eventually
1784          * the vector Rx will not be used.
1785          */
1786         uint16_t desc_n =
1787                 desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
1788         uint64_t offloads = conf->offloads |
1789                            dev->data->dev_conf.rxmode.offloads;
1790         unsigned int lro_on_queue = !!(offloads & DEV_RX_OFFLOAD_TCP_LRO);
1791         const int mprq_en = mlx5_check_mprq_support(dev) > 0;
1792         unsigned int max_rx_pkt_len = lro_on_queue ?
1793                         dev->data->dev_conf.rxmode.max_lro_pkt_size :
1794                         dev->data->dev_conf.rxmode.max_rx_pkt_len;
1795         unsigned int non_scatter_min_mbuf_size = max_rx_pkt_len +
1796                                                         RTE_PKTMBUF_HEADROOM;
1797         unsigned int max_lro_size = 0;
1798         unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM;
1799
1800         if (non_scatter_min_mbuf_size > mb_len && !(offloads &
1801                                                     DEV_RX_OFFLOAD_SCATTER)) {
1802                 DRV_LOG(ERR, "port %u Rx queue %u: Scatter offload is not"
1803                         " configured and no enough mbuf space(%u) to contain "
1804                         "the maximum RX packet length(%u) with head-room(%u)",
1805                         dev->data->port_id, idx, mb_len, max_rx_pkt_len,
1806                         RTE_PKTMBUF_HEADROOM);
1807                 rte_errno = ENOSPC;
1808                 return NULL;
1809         }
1810         tmpl = rte_calloc_socket("RXQ", 1,
1811                                  sizeof(*tmpl) +
1812                                  desc_n * sizeof(struct rte_mbuf *),
1813                                  0, socket);
1814         if (!tmpl) {
1815                 rte_errno = ENOMEM;
1816                 return NULL;
1817         }
1818         tmpl->type = MLX5_RXQ_TYPE_STANDARD;
1819         if (mlx5_mr_btree_init(&tmpl->rxq.mr_ctrl.cache_bh,
1820                                MLX5_MR_BTREE_CACHE_N, socket)) {
1821                 /* rte_errno is already set. */
1822                 goto error;
1823         }
1824         tmpl->socket = socket;
1825         if (dev->data->dev_conf.intr_conf.rxq)
1826                 tmpl->irq = 1;
1827         /*
1828          * LRO packet may consume all the stride memory, hence we cannot
1829          * guaranty head-room near the packet memory in the stride.
1830          * In this case scatter is, for sure, enabled and an empty mbuf may be
1831          * added in the start for the head-room.
1832          */
1833         if (lro_on_queue && RTE_PKTMBUF_HEADROOM > 0 &&
1834             non_scatter_min_mbuf_size > mb_len) {
1835                 strd_headroom_en = 0;
1836                 mprq_stride_size = RTE_MIN(max_rx_pkt_len,
1837                                         1u << config->mprq.max_stride_size_n);
1838         } else {
1839                 strd_headroom_en = 1;
1840                 mprq_stride_size = non_scatter_min_mbuf_size;
1841         }
1842         /*
1843          * This Rx queue can be configured as a Multi-Packet RQ if all of the
1844          * following conditions are met:
1845          *  - MPRQ is enabled.
1846          *  - The number of descs is more than the number of strides.
1847          *  - max_rx_pkt_len plus overhead is less than the max size of a
1848          *    stride.
1849          *  Otherwise, enable Rx scatter if necessary.
1850          */
1851         if (mprq_en &&
1852             desc > (1U << config->mprq.stride_num_n) &&
1853             mprq_stride_size <= (1U << config->mprq.max_stride_size_n)) {
1854                 /* TODO: Rx scatter isn't supported yet. */
1855                 tmpl->rxq.sges_n = 0;
1856                 /* Trim the number of descs needed. */
1857                 desc >>= config->mprq.stride_num_n;
1858                 tmpl->rxq.strd_num_n = config->mprq.stride_num_n;
1859                 tmpl->rxq.strd_sz_n = RTE_MAX(log2above(mprq_stride_size),
1860                                               config->mprq.min_stride_size_n);
1861                 tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT;
1862                 tmpl->rxq.strd_headroom_en = strd_headroom_en;
1863                 tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size,
1864                                 config->mprq.max_memcpy_len);
1865                 max_lro_size = RTE_MIN(max_rx_pkt_len,
1866                                        (1u << tmpl->rxq.strd_num_n) *
1867                                        (1u << tmpl->rxq.strd_sz_n));
1868                 DRV_LOG(DEBUG,
1869                         "port %u Rx queue %u: Multi-Packet RQ is enabled"
1870                         " strd_num_n = %u, strd_sz_n = %u",
1871                         dev->data->port_id, idx,
1872                         tmpl->rxq.strd_num_n, tmpl->rxq.strd_sz_n);
1873         } else if (max_rx_pkt_len <= first_mb_free_size) {
1874                 tmpl->rxq.sges_n = 0;
1875                 max_lro_size = max_rx_pkt_len;
1876         } else if (offloads & DEV_RX_OFFLOAD_SCATTER) {
1877                 unsigned int size = non_scatter_min_mbuf_size;
1878                 unsigned int sges_n;
1879
1880                 if (lro_on_queue && first_mb_free_size <
1881                     MLX5_MAX_LRO_HEADER_FIX) {
1882                         DRV_LOG(ERR, "Not enough space in the first segment(%u)"
1883                                 " to include the max header size(%u) for LRO",
1884                                 first_mb_free_size, MLX5_MAX_LRO_HEADER_FIX);
1885                         rte_errno = ENOTSUP;
1886                         goto error;
1887                 }
1888                 /*
1889                  * Determine the number of SGEs needed for a full packet
1890                  * and round it to the next power of two.
1891                  */
1892                 sges_n = log2above((size / mb_len) + !!(size % mb_len));
1893                 if (sges_n > MLX5_MAX_LOG_RQ_SEGS) {
1894                         DRV_LOG(ERR,
1895                                 "port %u too many SGEs (%u) needed to handle"
1896                                 " requested maximum packet size %u, the maximum"
1897                                 " supported are %u", dev->data->port_id,
1898                                 1 << sges_n, max_rx_pkt_len,
1899                                 1u << MLX5_MAX_LOG_RQ_SEGS);
1900                         rte_errno = ENOTSUP;
1901                         goto error;
1902                 }
1903                 tmpl->rxq.sges_n = sges_n;
1904                 max_lro_size = max_rx_pkt_len;
1905         }
1906         if (mprq_en && !mlx5_rxq_mprq_enabled(&tmpl->rxq))
1907                 DRV_LOG(WARNING,
1908                         "port %u MPRQ is requested but cannot be enabled"
1909                         " (requested: desc = %u, stride_sz = %u,"
1910                         " supported: min_stride_num = %u, max_stride_sz = %u).",
1911                         dev->data->port_id, desc, mprq_stride_size,
1912                         (1 << config->mprq.stride_num_n),
1913                         (1 << config->mprq.max_stride_size_n));
1914         DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u",
1915                 dev->data->port_id, 1 << tmpl->rxq.sges_n);
1916         if (desc % (1 << tmpl->rxq.sges_n)) {
1917                 DRV_LOG(ERR,
1918                         "port %u number of Rx queue descriptors (%u) is not a"
1919                         " multiple of SGEs per packet (%u)",
1920                         dev->data->port_id,
1921                         desc,
1922                         1 << tmpl->rxq.sges_n);
1923                 rte_errno = EINVAL;
1924                 goto error;
1925         }
1926         mlx5_max_lro_msg_size_adjust(dev, idx, max_lro_size);
1927         /* Toggle RX checksum offload if hardware supports it. */
1928         tmpl->rxq.csum = !!(offloads & DEV_RX_OFFLOAD_CHECKSUM);
1929         tmpl->rxq.hw_timestamp = !!(offloads & DEV_RX_OFFLOAD_TIMESTAMP);
1930         /* Configure VLAN stripping. */
1931         tmpl->rxq.vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
1932         /* By default, FCS (CRC) is stripped by hardware. */
1933         tmpl->rxq.crc_present = 0;
1934         tmpl->rxq.lro = lro_on_queue;
1935         if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) {
1936                 if (config->hw_fcs_strip) {
1937                         /*
1938                          * RQs used for LRO-enabled TIRs should not be
1939                          * configured to scatter the FCS.
1940                          */
1941                         if (lro_on_queue)
1942                                 DRV_LOG(WARNING,
1943                                         "port %u CRC stripping has been "
1944                                         "disabled but will still be performed "
1945                                         "by hardware, because LRO is enabled",
1946                                         dev->data->port_id);
1947                         else
1948                                 tmpl->rxq.crc_present = 1;
1949                 } else {
1950                         DRV_LOG(WARNING,
1951                                 "port %u CRC stripping has been disabled but will"
1952                                 " still be performed by hardware, make sure MLNX_OFED"
1953                                 " and firmware are up to date",
1954                                 dev->data->port_id);
1955                 }
1956         }
1957         DRV_LOG(DEBUG,
1958                 "port %u CRC stripping is %s, %u bytes will be subtracted from"
1959                 " incoming frames to hide it",
1960                 dev->data->port_id,
1961                 tmpl->rxq.crc_present ? "disabled" : "enabled",
1962                 tmpl->rxq.crc_present << 2);
1963         /* Save port ID. */
1964         tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf &&
1965                 (!!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS));
1966         tmpl->rxq.port_id = dev->data->port_id;
1967         tmpl->priv = priv;
1968         tmpl->rxq.mp = mp;
1969         tmpl->rxq.elts_n = log2above(desc);
1970         tmpl->rxq.rq_repl_thresh =
1971                 MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n);
1972         tmpl->rxq.elts =
1973                 (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
1974 #ifndef RTE_ARCH_64
1975         tmpl->rxq.uar_lock_cq = &priv->uar_lock_cq;
1976 #endif
1977         tmpl->rxq.idx = idx;
1978         rte_atomic32_inc(&tmpl->refcnt);
1979         LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
1980         return tmpl;
1981 error:
1982         rte_free(tmpl);
1983         return NULL;
1984 }
1985
1986 /**
1987  * Create a DPDK Rx hairpin queue.
1988  *
1989  * @param dev
1990  *   Pointer to Ethernet device.
1991  * @param idx
1992  *   RX queue index.
1993  * @param desc
1994  *   Number of descriptors to configure in queue.
1995  * @param hairpin_conf
1996  *   The hairpin binding configuration.
1997  *
1998  * @return
1999  *   A DPDK queue object on success, NULL otherwise and rte_errno is set.
2000  */
2001 struct mlx5_rxq_ctrl *
2002 mlx5_rxq_hairpin_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
2003                      const struct rte_eth_hairpin_conf *hairpin_conf)
2004 {
2005         struct mlx5_priv *priv = dev->data->dev_private;
2006         struct mlx5_rxq_ctrl *tmpl;
2007
2008         tmpl = rte_calloc_socket("RXQ", 1, sizeof(*tmpl), 0, SOCKET_ID_ANY);
2009         if (!tmpl) {
2010                 rte_errno = ENOMEM;
2011                 return NULL;
2012         }
2013         tmpl->type = MLX5_RXQ_TYPE_HAIRPIN;
2014         tmpl->socket = SOCKET_ID_ANY;
2015         tmpl->rxq.rss_hash = 0;
2016         tmpl->rxq.port_id = dev->data->port_id;
2017         tmpl->priv = priv;
2018         tmpl->rxq.mp = NULL;
2019         tmpl->rxq.elts_n = log2above(desc);
2020         tmpl->rxq.elts = NULL;
2021         tmpl->rxq.mr_ctrl.cache_bh = (struct mlx5_mr_btree) { 0 };
2022         tmpl->hairpin_conf = *hairpin_conf;
2023         tmpl->rxq.idx = idx;
2024         rte_atomic32_inc(&tmpl->refcnt);
2025         LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
2026         return tmpl;
2027 }
2028
2029 /**
2030  * Get a Rx queue.
2031  *
2032  * @param dev
2033  *   Pointer to Ethernet device.
2034  * @param idx
2035  *   RX queue index.
2036  *
2037  * @return
2038  *   A pointer to the queue if it exists, NULL otherwise.
2039  */
2040 struct mlx5_rxq_ctrl *
2041 mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx)
2042 {
2043         struct mlx5_priv *priv = dev->data->dev_private;
2044         struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
2045
2046         if ((*priv->rxqs)[idx]) {
2047                 rxq_ctrl = container_of((*priv->rxqs)[idx],
2048                                         struct mlx5_rxq_ctrl,
2049                                         rxq);
2050                 mlx5_rxq_obj_get(dev, idx);
2051                 rte_atomic32_inc(&rxq_ctrl->refcnt);
2052         }
2053         return rxq_ctrl;
2054 }
2055
2056 /**
2057  * Release a Rx queue.
2058  *
2059  * @param dev
2060  *   Pointer to Ethernet device.
2061  * @param idx
2062  *   RX queue index.
2063  *
2064  * @return
2065  *   1 while a reference on it exists, 0 when freed.
2066  */
2067 int
2068 mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx)
2069 {
2070         struct mlx5_priv *priv = dev->data->dev_private;
2071         struct mlx5_rxq_ctrl *rxq_ctrl;
2072
2073         if (!(*priv->rxqs)[idx])
2074                 return 0;
2075         rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
2076         assert(rxq_ctrl->priv);
2077         if (rxq_ctrl->obj && !mlx5_rxq_obj_release(rxq_ctrl->obj))
2078                 rxq_ctrl->obj = NULL;
2079         if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
2080                 if (rxq_ctrl->dbr_umem_id_valid)
2081                         claim_zero(mlx5_release_dbr(dev, rxq_ctrl->dbr_umem_id,
2082                                                     rxq_ctrl->dbr_offset));
2083                 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD)
2084                         mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh);
2085                 LIST_REMOVE(rxq_ctrl, next);
2086                 rte_free(rxq_ctrl);
2087                 (*priv->rxqs)[idx] = NULL;
2088                 return 0;
2089         }
2090         return 1;
2091 }
2092
2093 /**
2094  * Verify the Rx Queue list is empty
2095  *
2096  * @param dev
2097  *   Pointer to Ethernet device.
2098  *
2099  * @return
2100  *   The number of object not released.
2101  */
2102 int
2103 mlx5_rxq_verify(struct rte_eth_dev *dev)
2104 {
2105         struct mlx5_priv *priv = dev->data->dev_private;
2106         struct mlx5_rxq_ctrl *rxq_ctrl;
2107         int ret = 0;
2108
2109         LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
2110                 DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced",
2111                         dev->data->port_id, rxq_ctrl->rxq.idx);
2112                 ++ret;
2113         }
2114         return ret;
2115 }
2116
2117 /**
2118  * Get a Rx queue type.
2119  *
2120  * @param dev
2121  *   Pointer to Ethernet device.
2122  * @param idx
2123  *   Rx queue index.
2124  *
2125  * @return
2126  *   The Rx queue type.
2127  */
2128 enum mlx5_rxq_type
2129 mlx5_rxq_get_type(struct rte_eth_dev *dev, uint16_t idx)
2130 {
2131         struct mlx5_priv *priv = dev->data->dev_private;
2132         struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
2133
2134         if (idx < priv->rxqs_n && (*priv->rxqs)[idx]) {
2135                 rxq_ctrl = container_of((*priv->rxqs)[idx],
2136                                         struct mlx5_rxq_ctrl,
2137                                         rxq);
2138                 return rxq_ctrl->type;
2139         }
2140         return MLX5_RXQ_TYPE_UNDEFINED;
2141 }
2142
2143 /**
2144  * Create an indirection table.
2145  *
2146  * @param dev
2147  *   Pointer to Ethernet device.
2148  * @param queues
2149  *   Queues entering in the indirection table.
2150  * @param queues_n
2151  *   Number of queues in the array.
2152  *
2153  * @return
2154  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
2155  */
2156 static struct mlx5_ind_table_obj *
2157 mlx5_ind_table_obj_new(struct rte_eth_dev *dev, const uint16_t *queues,
2158                        uint32_t queues_n, enum mlx5_ind_tbl_type type)
2159 {
2160         struct mlx5_priv *priv = dev->data->dev_private;
2161         struct mlx5_ind_table_obj *ind_tbl;
2162         unsigned int i = 0, j = 0, k = 0;
2163
2164         ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) +
2165                              queues_n * sizeof(uint16_t), 0);
2166         if (!ind_tbl) {
2167                 rte_errno = ENOMEM;
2168                 return NULL;
2169         }
2170         ind_tbl->type = type;
2171         if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV) {
2172                 const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
2173                         log2above(queues_n) :
2174                         log2above(priv->config.ind_table_max_size);
2175                 struct ibv_wq *wq[1 << wq_n];
2176
2177                 for (i = 0; i != queues_n; ++i) {
2178                         struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev,
2179                                                                  queues[i]);
2180                         if (!rxq)
2181                                 goto error;
2182                         wq[i] = rxq->obj->wq;
2183                         ind_tbl->queues[i] = queues[i];
2184                 }
2185                 ind_tbl->queues_n = queues_n;
2186                 /* Finalise indirection table. */
2187                 k = i; /* Retain value of i for use in error case. */
2188                 for (j = 0; k != (unsigned int)(1 << wq_n); ++k, ++j)
2189                         wq[k] = wq[j];
2190                 ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table
2191                         (priv->sh->ctx,
2192                          &(struct ibv_rwq_ind_table_init_attr){
2193                                 .log_ind_tbl_size = wq_n,
2194                                 .ind_tbl = wq,
2195                                 .comp_mask = 0,
2196                         });
2197                 if (!ind_tbl->ind_table) {
2198                         rte_errno = errno;
2199                         goto error;
2200                 }
2201         } else { /* ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX */
2202                 struct mlx5_devx_rqt_attr *rqt_attr = NULL;
2203                 const unsigned int rqt_n =
2204                         1 << (rte_is_power_of_2(queues_n) ?
2205                               log2above(queues_n) :
2206                               log2above(priv->config.ind_table_max_size));
2207
2208                 rqt_attr = rte_calloc(__func__, 1, sizeof(*rqt_attr) +
2209                                       rqt_n * sizeof(uint32_t), 0);
2210                 if (!rqt_attr) {
2211                         DRV_LOG(ERR, "port %u cannot allocate RQT resources",
2212                                 dev->data->port_id);
2213                         rte_errno = ENOMEM;
2214                         goto error;
2215                 }
2216                 rqt_attr->rqt_max_size = priv->config.ind_table_max_size;
2217                 rqt_attr->rqt_actual_size = rqt_n;
2218                 for (i = 0; i != queues_n; ++i) {
2219                         struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev,
2220                                                                  queues[i]);
2221                         if (!rxq)
2222                                 goto error;
2223                         rqt_attr->rq_list[i] = rxq->obj->rq->id;
2224                         ind_tbl->queues[i] = queues[i];
2225                 }
2226                 k = i; /* Retain value of i for use in error case. */
2227                 for (j = 0; k != rqt_n; ++k, ++j)
2228                         rqt_attr->rq_list[k] = rqt_attr->rq_list[j];
2229                 ind_tbl->rqt = mlx5_devx_cmd_create_rqt(priv->sh->ctx,
2230                                                         rqt_attr);
2231                 rte_free(rqt_attr);
2232                 if (!ind_tbl->rqt) {
2233                         DRV_LOG(ERR, "port %u cannot create DevX RQT",
2234                                 dev->data->port_id);
2235                         rte_errno = errno;
2236                         goto error;
2237                 }
2238                 ind_tbl->queues_n = queues_n;
2239         }
2240         rte_atomic32_inc(&ind_tbl->refcnt);
2241         LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
2242         return ind_tbl;
2243 error:
2244         for (j = 0; j < i; j++)
2245                 mlx5_rxq_release(dev, ind_tbl->queues[j]);
2246         rte_free(ind_tbl);
2247         DEBUG("port %u cannot create indirection table", dev->data->port_id);
2248         return NULL;
2249 }
2250
2251 /**
2252  * Get an indirection table.
2253  *
2254  * @param dev
2255  *   Pointer to Ethernet device.
2256  * @param queues
2257  *   Queues entering in the indirection table.
2258  * @param queues_n
2259  *   Number of queues in the array.
2260  *
2261  * @return
2262  *   An indirection table if found.
2263  */
2264 static struct mlx5_ind_table_obj *
2265 mlx5_ind_table_obj_get(struct rte_eth_dev *dev, const uint16_t *queues,
2266                        uint32_t queues_n)
2267 {
2268         struct mlx5_priv *priv = dev->data->dev_private;
2269         struct mlx5_ind_table_obj *ind_tbl;
2270
2271         LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
2272                 if ((ind_tbl->queues_n == queues_n) &&
2273                     (memcmp(ind_tbl->queues, queues,
2274                             ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
2275                      == 0))
2276                         break;
2277         }
2278         if (ind_tbl) {
2279                 unsigned int i;
2280
2281                 rte_atomic32_inc(&ind_tbl->refcnt);
2282                 for (i = 0; i != ind_tbl->queues_n; ++i)
2283                         mlx5_rxq_get(dev, ind_tbl->queues[i]);
2284         }
2285         return ind_tbl;
2286 }
2287
2288 /**
2289  * Release an indirection table.
2290  *
2291  * @param dev
2292  *   Pointer to Ethernet device.
2293  * @param ind_table
2294  *   Indirection table to release.
2295  *
2296  * @return
2297  *   1 while a reference on it exists, 0 when freed.
2298  */
2299 static int
2300 mlx5_ind_table_obj_release(struct rte_eth_dev *dev,
2301                            struct mlx5_ind_table_obj *ind_tbl)
2302 {
2303         unsigned int i;
2304
2305         if (rte_atomic32_dec_and_test(&ind_tbl->refcnt)) {
2306                 if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV)
2307                         claim_zero(mlx5_glue->destroy_rwq_ind_table
2308                                                         (ind_tbl->ind_table));
2309                 else if (ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX)
2310                         claim_zero(mlx5_devx_cmd_destroy(ind_tbl->rqt));
2311         }
2312         for (i = 0; i != ind_tbl->queues_n; ++i)
2313                 claim_nonzero(mlx5_rxq_release(dev, ind_tbl->queues[i]));
2314         if (!rte_atomic32_read(&ind_tbl->refcnt)) {
2315                 LIST_REMOVE(ind_tbl, next);
2316                 rte_free(ind_tbl);
2317                 return 0;
2318         }
2319         return 1;
2320 }
2321
2322 /**
2323  * Verify the Rx Queue list is empty
2324  *
2325  * @param dev
2326  *   Pointer to Ethernet device.
2327  *
2328  * @return
2329  *   The number of object not released.
2330  */
2331 int
2332 mlx5_ind_table_obj_verify(struct rte_eth_dev *dev)
2333 {
2334         struct mlx5_priv *priv = dev->data->dev_private;
2335         struct mlx5_ind_table_obj *ind_tbl;
2336         int ret = 0;
2337
2338         LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
2339                 DRV_LOG(DEBUG,
2340                         "port %u indirection table obj %p still referenced",
2341                         dev->data->port_id, (void *)ind_tbl);
2342                 ++ret;
2343         }
2344         return ret;
2345 }
2346
2347 /**
2348  * Create an Rx Hash queue.
2349  *
2350  * @param dev
2351  *   Pointer to Ethernet device.
2352  * @param rss_key
2353  *   RSS key for the Rx hash queue.
2354  * @param rss_key_len
2355  *   RSS key length.
2356  * @param hash_fields
2357  *   Verbs protocol hash field to make the RSS on.
2358  * @param queues
2359  *   Queues entering in hash queue. In case of empty hash_fields only the
2360  *   first queue index will be taken for the indirection table.
2361  * @param queues_n
2362  *   Number of queues.
2363  * @param tunnel
2364  *   Tunnel type.
2365  *
2366  * @return
2367  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
2368  */
2369 struct mlx5_hrxq *
2370 mlx5_hrxq_new(struct rte_eth_dev *dev,
2371               const uint8_t *rss_key, uint32_t rss_key_len,
2372               uint64_t hash_fields,
2373               const uint16_t *queues, uint32_t queues_n,
2374               int tunnel __rte_unused)
2375 {
2376         struct mlx5_priv *priv = dev->data->dev_private;
2377         struct mlx5_hrxq *hrxq;
2378         struct ibv_qp *qp = NULL;
2379         struct mlx5_ind_table_obj *ind_tbl;
2380         int err;
2381         struct mlx5_devx_obj *tir = NULL;
2382         struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[queues[0]];
2383         struct mlx5_rxq_ctrl *rxq_ctrl =
2384                 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2385
2386         queues_n = hash_fields ? queues_n : 1;
2387         ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n);
2388         if (!ind_tbl) {
2389                 enum mlx5_ind_tbl_type type;
2390
2391                 type = rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_IBV ?
2392                                 MLX5_IND_TBL_TYPE_IBV : MLX5_IND_TBL_TYPE_DEVX;
2393                 ind_tbl = mlx5_ind_table_obj_new(dev, queues, queues_n, type);
2394         }
2395         if (!ind_tbl) {
2396                 rte_errno = ENOMEM;
2397                 return NULL;
2398         }
2399         if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV) {
2400 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
2401                 struct mlx5dv_qp_init_attr qp_init_attr;
2402
2403                 memset(&qp_init_attr, 0, sizeof(qp_init_attr));
2404                 if (tunnel) {
2405                         qp_init_attr.comp_mask =
2406                                 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
2407                         qp_init_attr.create_flags =
2408                                 MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;
2409                 }
2410 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
2411                 if (dev->data->dev_conf.lpbk_mode) {
2412                         /*
2413                          * Allow packet sent from NIC loop back
2414                          * w/o source MAC check.
2415                          */
2416                         qp_init_attr.comp_mask |=
2417                                 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
2418                         qp_init_attr.create_flags |=
2419                                 MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC;
2420                 }
2421 #endif
2422                 qp = mlx5_glue->dv_create_qp
2423                         (priv->sh->ctx,
2424                          &(struct ibv_qp_init_attr_ex){
2425                                 .qp_type = IBV_QPT_RAW_PACKET,
2426                                 .comp_mask =
2427                                         IBV_QP_INIT_ATTR_PD |
2428                                         IBV_QP_INIT_ATTR_IND_TABLE |
2429                                         IBV_QP_INIT_ATTR_RX_HASH,
2430                                 .rx_hash_conf = (struct ibv_rx_hash_conf){
2431                                         .rx_hash_function =
2432                                                 IBV_RX_HASH_FUNC_TOEPLITZ,
2433                                         .rx_hash_key_len = rss_key_len,
2434                                         .rx_hash_key =
2435                                                 (void *)(uintptr_t)rss_key,
2436                                         .rx_hash_fields_mask = hash_fields,
2437                                 },
2438                                 .rwq_ind_tbl = ind_tbl->ind_table,
2439                                 .pd = priv->sh->pd,
2440                           },
2441                           &qp_init_attr);
2442 #else
2443                 qp = mlx5_glue->create_qp_ex
2444                         (priv->sh->ctx,
2445                          &(struct ibv_qp_init_attr_ex){
2446                                 .qp_type = IBV_QPT_RAW_PACKET,
2447                                 .comp_mask =
2448                                         IBV_QP_INIT_ATTR_PD |
2449                                         IBV_QP_INIT_ATTR_IND_TABLE |
2450                                         IBV_QP_INIT_ATTR_RX_HASH,
2451                                 .rx_hash_conf = (struct ibv_rx_hash_conf){
2452                                         .rx_hash_function =
2453                                                 IBV_RX_HASH_FUNC_TOEPLITZ,
2454                                         .rx_hash_key_len = rss_key_len,
2455                                         .rx_hash_key =
2456                                                 (void *)(uintptr_t)rss_key,
2457                                         .rx_hash_fields_mask = hash_fields,
2458                                 },
2459                                 .rwq_ind_tbl = ind_tbl->ind_table,
2460                                 .pd = priv->sh->pd,
2461                          });
2462 #endif
2463                 if (!qp) {
2464                         rte_errno = errno;
2465                         goto error;
2466                 }
2467         } else { /* ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX */
2468                 struct mlx5_devx_tir_attr tir_attr;
2469                 uint32_t i;
2470                 uint32_t lro = 1;
2471
2472                 /* Enable TIR LRO only if all the queues were configured for. */
2473                 for (i = 0; i < queues_n; ++i) {
2474                         if (!(*priv->rxqs)[queues[i]]->lro) {
2475                                 lro = 0;
2476                                 break;
2477                         }
2478                 }
2479                 memset(&tir_attr, 0, sizeof(tir_attr));
2480                 tir_attr.disp_type = MLX5_TIRC_DISP_TYPE_INDIRECT;
2481                 tir_attr.rx_hash_fn = MLX5_RX_HASH_FN_TOEPLITZ;
2482                 tir_attr.tunneled_offload_en = !!tunnel;
2483                 /* If needed, translate hash_fields bitmap to PRM format. */
2484                 if (hash_fields) {
2485 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
2486                         struct mlx5_rx_hash_field_select *rx_hash_field_select =
2487                                         hash_fields & IBV_RX_HASH_INNER ?
2488                                         &tir_attr.rx_hash_field_selector_inner :
2489                                         &tir_attr.rx_hash_field_selector_outer;
2490 #else
2491                         struct mlx5_rx_hash_field_select *rx_hash_field_select =
2492                                         &tir_attr.rx_hash_field_selector_outer;
2493 #endif
2494
2495                         /* 1 bit: 0: IPv4, 1: IPv6. */
2496                         rx_hash_field_select->l3_prot_type =
2497                                 !!(hash_fields & MLX5_IPV6_IBV_RX_HASH);
2498                         /* 1 bit: 0: TCP, 1: UDP. */
2499                         rx_hash_field_select->l4_prot_type =
2500                                 !!(hash_fields & MLX5_UDP_IBV_RX_HASH);
2501                         /* Bitmask which sets which fields to use in RX Hash. */
2502                         rx_hash_field_select->selected_fields =
2503                         ((!!(hash_fields & MLX5_L3_SRC_IBV_RX_HASH)) <<
2504                          MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_SRC_IP) |
2505                         (!!(hash_fields & MLX5_L3_DST_IBV_RX_HASH)) <<
2506                          MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_DST_IP |
2507                         (!!(hash_fields & MLX5_L4_SRC_IBV_RX_HASH)) <<
2508                          MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_SPORT |
2509                         (!!(hash_fields & MLX5_L4_DST_IBV_RX_HASH)) <<
2510                          MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_DPORT;
2511                 }
2512                 if (rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN)
2513                         tir_attr.transport_domain = priv->sh->td->id;
2514                 else
2515                         tir_attr.transport_domain = priv->sh->tdn;
2516                 memcpy(tir_attr.rx_hash_toeplitz_key, rss_key, rss_key_len);
2517                 tir_attr.indirect_table = ind_tbl->rqt->id;
2518                 if (dev->data->dev_conf.lpbk_mode)
2519                         tir_attr.self_lb_block =
2520                                         MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
2521                 if (lro) {
2522                         tir_attr.lro_timeout_period_usecs =
2523                                         priv->config.lro.timeout;
2524                         tir_attr.lro_max_msg_sz = priv->max_lro_msg_size;
2525                         tir_attr.lro_enable_mask =
2526                                         MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
2527                                         MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO;
2528                 }
2529                 tir = mlx5_devx_cmd_create_tir(priv->sh->ctx, &tir_attr);
2530                 if (!tir) {
2531                         DRV_LOG(ERR, "port %u cannot create DevX TIR",
2532                                 dev->data->port_id);
2533                         rte_errno = errno;
2534                         goto error;
2535                 }
2536         }
2537         hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0);
2538         if (!hrxq)
2539                 goto error;
2540         hrxq->ind_table = ind_tbl;
2541         if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV) {
2542                 hrxq->qp = qp;
2543 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
2544                 hrxq->action =
2545                         mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
2546                 if (!hrxq->action) {
2547                         rte_errno = errno;
2548                         goto error;
2549                 }
2550 #endif
2551         } else { /* ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX */
2552                 hrxq->tir = tir;
2553 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
2554                 hrxq->action = mlx5_glue->dv_create_flow_action_dest_devx_tir
2555                                                         (hrxq->tir->obj);
2556                 if (!hrxq->action) {
2557                         rte_errno = errno;
2558                         goto error;
2559                 }
2560 #endif
2561         }
2562         hrxq->rss_key_len = rss_key_len;
2563         hrxq->hash_fields = hash_fields;
2564         memcpy(hrxq->rss_key, rss_key, rss_key_len);
2565         rte_atomic32_inc(&hrxq->refcnt);
2566         LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
2567         return hrxq;
2568 error:
2569         err = rte_errno; /* Save rte_errno before cleanup. */
2570         mlx5_ind_table_obj_release(dev, ind_tbl);
2571         if (qp)
2572                 claim_zero(mlx5_glue->destroy_qp(qp));
2573         else if (tir)
2574                 claim_zero(mlx5_devx_cmd_destroy(tir));
2575         rte_errno = err; /* Restore rte_errno. */
2576         return NULL;
2577 }
2578
2579 /**
2580  * Get an Rx Hash queue.
2581  *
2582  * @param dev
2583  *   Pointer to Ethernet device.
2584  * @param rss_conf
2585  *   RSS configuration for the Rx hash queue.
2586  * @param queues
2587  *   Queues entering in hash queue. In case of empty hash_fields only the
2588  *   first queue index will be taken for the indirection table.
2589  * @param queues_n
2590  *   Number of queues.
2591  *
2592  * @return
2593  *   An hash Rx queue on success.
2594  */
2595 struct mlx5_hrxq *
2596 mlx5_hrxq_get(struct rte_eth_dev *dev,
2597               const uint8_t *rss_key, uint32_t rss_key_len,
2598               uint64_t hash_fields,
2599               const uint16_t *queues, uint32_t queues_n)
2600 {
2601         struct mlx5_priv *priv = dev->data->dev_private;
2602         struct mlx5_hrxq *hrxq;
2603
2604         queues_n = hash_fields ? queues_n : 1;
2605         LIST_FOREACH(hrxq, &priv->hrxqs, next) {
2606                 struct mlx5_ind_table_obj *ind_tbl;
2607
2608                 if (hrxq->rss_key_len != rss_key_len)
2609                         continue;
2610                 if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
2611                         continue;
2612                 if (hrxq->hash_fields != hash_fields)
2613                         continue;
2614                 ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n);
2615                 if (!ind_tbl)
2616                         continue;
2617                 if (ind_tbl != hrxq->ind_table) {
2618                         mlx5_ind_table_obj_release(dev, ind_tbl);
2619                         continue;
2620                 }
2621                 rte_atomic32_inc(&hrxq->refcnt);
2622                 return hrxq;
2623         }
2624         return NULL;
2625 }
2626
2627 /**
2628  * Release the hash Rx queue.
2629  *
2630  * @param dev
2631  *   Pointer to Ethernet device.
2632  * @param hrxq
2633  *   Pointer to Hash Rx queue to release.
2634  *
2635  * @return
2636  *   1 while a reference on it exists, 0 when freed.
2637  */
2638 int
2639 mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq)
2640 {
2641         if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
2642 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
2643                 mlx5_glue->destroy_flow_action(hrxq->action);
2644 #endif
2645                 if (hrxq->ind_table->type == MLX5_IND_TBL_TYPE_IBV)
2646                         claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
2647                 else /* hrxq->ind_table->type == MLX5_IND_TBL_TYPE_DEVX */
2648                         claim_zero(mlx5_devx_cmd_destroy(hrxq->tir));
2649                 mlx5_ind_table_obj_release(dev, hrxq->ind_table);
2650                 LIST_REMOVE(hrxq, next);
2651                 rte_free(hrxq);
2652                 return 0;
2653         }
2654         claim_nonzero(mlx5_ind_table_obj_release(dev, hrxq->ind_table));
2655         return 1;
2656 }
2657
2658 /**
2659  * Verify the Rx Queue list is empty
2660  *
2661  * @param dev
2662  *   Pointer to Ethernet device.
2663  *
2664  * @return
2665  *   The number of object not released.
2666  */
2667 int
2668 mlx5_hrxq_verify(struct rte_eth_dev *dev)
2669 {
2670         struct mlx5_priv *priv = dev->data->dev_private;
2671         struct mlx5_hrxq *hrxq;
2672         int ret = 0;
2673
2674         LIST_FOREACH(hrxq, &priv->hrxqs, next) {
2675                 DRV_LOG(DEBUG,
2676                         "port %u hash Rx queue %p still referenced",
2677                         dev->data->port_id, (void *)hrxq);
2678                 ++ret;
2679         }
2680         return ret;
2681 }
2682
2683 /**
2684  * Create a drop Rx queue Verbs/DevX object.
2685  *
2686  * @param dev
2687  *   Pointer to Ethernet device.
2688  *
2689  * @return
2690  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
2691  */
2692 static struct mlx5_rxq_obj *
2693 mlx5_rxq_obj_drop_new(struct rte_eth_dev *dev)
2694 {
2695         struct mlx5_priv *priv = dev->data->dev_private;
2696         struct ibv_context *ctx = priv->sh->ctx;
2697         struct ibv_cq *cq;
2698         struct ibv_wq *wq = NULL;
2699         struct mlx5_rxq_obj *rxq;
2700
2701         if (priv->drop_queue.rxq)
2702                 return priv->drop_queue.rxq;
2703         cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0);
2704         if (!cq) {
2705                 DEBUG("port %u cannot allocate CQ for drop queue",
2706                       dev->data->port_id);
2707                 rte_errno = errno;
2708                 goto error;
2709         }
2710         wq = mlx5_glue->create_wq(ctx,
2711                  &(struct ibv_wq_init_attr){
2712                         .wq_type = IBV_WQT_RQ,
2713                         .max_wr = 1,
2714                         .max_sge = 1,
2715                         .pd = priv->sh->pd,
2716                         .cq = cq,
2717                  });
2718         if (!wq) {
2719                 DEBUG("port %u cannot allocate WQ for drop queue",
2720                       dev->data->port_id);
2721                 rte_errno = errno;
2722                 goto error;
2723         }
2724         rxq = rte_calloc(__func__, 1, sizeof(*rxq), 0);
2725         if (!rxq) {
2726                 DEBUG("port %u cannot allocate drop Rx queue memory",
2727                       dev->data->port_id);
2728                 rte_errno = ENOMEM;
2729                 goto error;
2730         }
2731         rxq->cq = cq;
2732         rxq->wq = wq;
2733         priv->drop_queue.rxq = rxq;
2734         return rxq;
2735 error:
2736         if (wq)
2737                 claim_zero(mlx5_glue->destroy_wq(wq));
2738         if (cq)
2739                 claim_zero(mlx5_glue->destroy_cq(cq));
2740         return NULL;
2741 }
2742
2743 /**
2744  * Release a drop Rx queue Verbs/DevX object.
2745  *
2746  * @param dev
2747  *   Pointer to Ethernet device.
2748  *
2749  * @return
2750  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
2751  */
2752 static void
2753 mlx5_rxq_obj_drop_release(struct rte_eth_dev *dev)
2754 {
2755         struct mlx5_priv *priv = dev->data->dev_private;
2756         struct mlx5_rxq_obj *rxq = priv->drop_queue.rxq;
2757
2758         if (rxq->wq)
2759                 claim_zero(mlx5_glue->destroy_wq(rxq->wq));
2760         if (rxq->cq)
2761                 claim_zero(mlx5_glue->destroy_cq(rxq->cq));
2762         rte_free(rxq);
2763         priv->drop_queue.rxq = NULL;
2764 }
2765
2766 /**
2767  * Create a drop indirection table.
2768  *
2769  * @param dev
2770  *   Pointer to Ethernet device.
2771  *
2772  * @return
2773  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
2774  */
2775 static struct mlx5_ind_table_obj *
2776 mlx5_ind_table_obj_drop_new(struct rte_eth_dev *dev)
2777 {
2778         struct mlx5_priv *priv = dev->data->dev_private;
2779         struct mlx5_ind_table_obj *ind_tbl;
2780         struct mlx5_rxq_obj *rxq;
2781         struct mlx5_ind_table_obj tmpl;
2782
2783         rxq = mlx5_rxq_obj_drop_new(dev);
2784         if (!rxq)
2785                 return NULL;
2786         tmpl.ind_table = mlx5_glue->create_rwq_ind_table
2787                 (priv->sh->ctx,
2788                  &(struct ibv_rwq_ind_table_init_attr){
2789                         .log_ind_tbl_size = 0,
2790                         .ind_tbl = &rxq->wq,
2791                         .comp_mask = 0,
2792                  });
2793         if (!tmpl.ind_table) {
2794                 DEBUG("port %u cannot allocate indirection table for drop"
2795                       " queue",
2796                       dev->data->port_id);
2797                 rte_errno = errno;
2798                 goto error;
2799         }
2800         ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl), 0);
2801         if (!ind_tbl) {
2802                 rte_errno = ENOMEM;
2803                 goto error;
2804         }
2805         ind_tbl->ind_table = tmpl.ind_table;
2806         return ind_tbl;
2807 error:
2808         mlx5_rxq_obj_drop_release(dev);
2809         return NULL;
2810 }
2811
2812 /**
2813  * Release a drop indirection table.
2814  *
2815  * @param dev
2816  *   Pointer to Ethernet device.
2817  */
2818 static void
2819 mlx5_ind_table_obj_drop_release(struct rte_eth_dev *dev)
2820 {
2821         struct mlx5_priv *priv = dev->data->dev_private;
2822         struct mlx5_ind_table_obj *ind_tbl = priv->drop_queue.hrxq->ind_table;
2823
2824         claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table));
2825         mlx5_rxq_obj_drop_release(dev);
2826         rte_free(ind_tbl);
2827         priv->drop_queue.hrxq->ind_table = NULL;
2828 }
2829
2830 /**
2831  * Create a drop Rx Hash queue.
2832  *
2833  * @param dev
2834  *   Pointer to Ethernet device.
2835  *
2836  * @return
2837  *   The Verbs/DevX object initialised, NULL otherwise and rte_errno is set.
2838  */
2839 struct mlx5_hrxq *
2840 mlx5_hrxq_drop_new(struct rte_eth_dev *dev)
2841 {
2842         struct mlx5_priv *priv = dev->data->dev_private;
2843         struct mlx5_ind_table_obj *ind_tbl = NULL;
2844         struct ibv_qp *qp = NULL;
2845         struct mlx5_hrxq *hrxq = NULL;
2846
2847         if (priv->drop_queue.hrxq) {
2848                 rte_atomic32_inc(&priv->drop_queue.hrxq->refcnt);
2849                 return priv->drop_queue.hrxq;
2850         }
2851         hrxq = rte_calloc(__func__, 1, sizeof(*hrxq), 0);
2852         if (!hrxq) {
2853                 DRV_LOG(WARNING,
2854                         "port %u cannot allocate memory for drop queue",
2855                         dev->data->port_id);
2856                 rte_errno = ENOMEM;
2857                 goto error;
2858         }
2859         priv->drop_queue.hrxq = hrxq;
2860         ind_tbl = mlx5_ind_table_obj_drop_new(dev);
2861         if (!ind_tbl)
2862                 goto error;
2863         hrxq->ind_table = ind_tbl;
2864         qp = mlx5_glue->create_qp_ex(priv->sh->ctx,
2865                  &(struct ibv_qp_init_attr_ex){
2866                         .qp_type = IBV_QPT_RAW_PACKET,
2867                         .comp_mask =
2868                                 IBV_QP_INIT_ATTR_PD |
2869                                 IBV_QP_INIT_ATTR_IND_TABLE |
2870                                 IBV_QP_INIT_ATTR_RX_HASH,
2871                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2872                                 .rx_hash_function =
2873                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2874                                 .rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN,
2875                                 .rx_hash_key = rss_hash_default_key,
2876                                 .rx_hash_fields_mask = 0,
2877                                 },
2878                         .rwq_ind_tbl = ind_tbl->ind_table,
2879                         .pd = priv->sh->pd
2880                  });
2881         if (!qp) {
2882                 DEBUG("port %u cannot allocate QP for drop queue",
2883                       dev->data->port_id);
2884                 rte_errno = errno;
2885                 goto error;
2886         }
2887         hrxq->qp = qp;
2888 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
2889         hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
2890         if (!hrxq->action) {
2891                 rte_errno = errno;
2892                 goto error;
2893         }
2894 #endif
2895         rte_atomic32_set(&hrxq->refcnt, 1);
2896         return hrxq;
2897 error:
2898 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
2899         if (hrxq && hrxq->action)
2900                 mlx5_glue->destroy_flow_action(hrxq->action);
2901 #endif
2902         if (qp)
2903                 claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
2904         if (ind_tbl)
2905                 mlx5_ind_table_obj_drop_release(dev);
2906         if (hrxq) {
2907                 priv->drop_queue.hrxq = NULL;
2908                 rte_free(hrxq);
2909         }
2910         return NULL;
2911 }
2912
2913 /**
2914  * Release a drop hash Rx queue.
2915  *
2916  * @param dev
2917  *   Pointer to Ethernet device.
2918  */
2919 void
2920 mlx5_hrxq_drop_release(struct rte_eth_dev *dev)
2921 {
2922         struct mlx5_priv *priv = dev->data->dev_private;
2923         struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;
2924
2925         if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
2926 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
2927                 mlx5_glue->destroy_flow_action(hrxq->action);
2928 #endif
2929                 claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
2930                 mlx5_ind_table_obj_drop_release(dev);
2931                 rte_free(hrxq);
2932                 priv->drop_queue.hrxq = NULL;
2933         }
2934 }