net/mlx5: fix clang compilation error
[dpdk.git] / drivers / net / mlx5 / mlx5_rxq.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2015 6WIND S.A.
5  *   Copyright 2015 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stddef.h>
35 #include <assert.h>
36 #include <errno.h>
37 #include <string.h>
38 #include <stdint.h>
39 #include <fcntl.h>
40 #include <sys/queue.h>
41
42 /* Verbs header. */
43 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
44 #ifdef PEDANTIC
45 #pragma GCC diagnostic ignored "-Wpedantic"
46 #endif
47 #include <infiniband/verbs.h>
48 #include <infiniband/mlx5dv.h>
49 #ifdef PEDANTIC
50 #pragma GCC diagnostic error "-Wpedantic"
51 #endif
52
53 #include <rte_mbuf.h>
54 #include <rte_malloc.h>
55 #include <rte_ethdev.h>
56 #include <rte_common.h>
57 #include <rte_interrupts.h>
58 #include <rte_debug.h>
59 #include <rte_io.h>
60
61 #include "mlx5.h"
62 #include "mlx5_rxtx.h"
63 #include "mlx5_utils.h"
64 #include "mlx5_autoconf.h"
65 #include "mlx5_defs.h"
66
67 /* Initialization data for hash RX queues. */
68 const struct hash_rxq_init hash_rxq_init[] = {
69         [HASH_RXQ_TCPV4] = {
70                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
71                                 IBV_RX_HASH_DST_IPV4 |
72                                 IBV_RX_HASH_SRC_PORT_TCP |
73                                 IBV_RX_HASH_DST_PORT_TCP),
74                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
75                 .flow_priority = 0,
76                 .flow_spec.tcp_udp = {
77                         .type = IBV_FLOW_SPEC_TCP,
78                         .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
79                 },
80                 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
81         },
82         [HASH_RXQ_UDPV4] = {
83                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
84                                 IBV_RX_HASH_DST_IPV4 |
85                                 IBV_RX_HASH_SRC_PORT_UDP |
86                                 IBV_RX_HASH_DST_PORT_UDP),
87                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
88                 .flow_priority = 0,
89                 .flow_spec.tcp_udp = {
90                         .type = IBV_FLOW_SPEC_UDP,
91                         .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
92                 },
93                 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
94         },
95         [HASH_RXQ_IPV4] = {
96                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
97                                 IBV_RX_HASH_DST_IPV4),
98                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
99                                 ETH_RSS_FRAG_IPV4),
100                 .flow_priority = 1,
101                 .flow_spec.ipv4 = {
102                         .type = IBV_FLOW_SPEC_IPV4,
103                         .size = sizeof(hash_rxq_init[0].flow_spec.ipv4),
104                 },
105                 .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
106         },
107         [HASH_RXQ_TCPV6] = {
108                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
109                                 IBV_RX_HASH_DST_IPV6 |
110                                 IBV_RX_HASH_SRC_PORT_TCP |
111                                 IBV_RX_HASH_DST_PORT_TCP),
112                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
113                 .flow_priority = 0,
114                 .flow_spec.tcp_udp = {
115                         .type = IBV_FLOW_SPEC_TCP,
116                         .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
117                 },
118                 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
119         },
120         [HASH_RXQ_UDPV6] = {
121                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
122                                 IBV_RX_HASH_DST_IPV6 |
123                                 IBV_RX_HASH_SRC_PORT_UDP |
124                                 IBV_RX_HASH_DST_PORT_UDP),
125                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
126                 .flow_priority = 0,
127                 .flow_spec.tcp_udp = {
128                         .type = IBV_FLOW_SPEC_UDP,
129                         .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
130                 },
131                 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
132         },
133         [HASH_RXQ_IPV6] = {
134                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
135                                 IBV_RX_HASH_DST_IPV6),
136                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
137                                 ETH_RSS_FRAG_IPV6),
138                 .flow_priority = 1,
139                 .flow_spec.ipv6 = {
140                         .type = IBV_FLOW_SPEC_IPV6,
141                         .size = sizeof(hash_rxq_init[0].flow_spec.ipv6),
142                 },
143                 .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
144         },
145         [HASH_RXQ_ETH] = {
146                 .hash_fields = 0,
147                 .dpdk_rss_hf = 0,
148                 .flow_priority = 2,
149                 .flow_spec.eth = {
150                         .type = IBV_FLOW_SPEC_ETH,
151                         .size = sizeof(hash_rxq_init[0].flow_spec.eth),
152                 },
153                 .underlayer = NULL,
154         },
155 };
156
157 /* Number of entries in hash_rxq_init[]. */
158 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
159
160 /* Initialization data for hash RX queue indirection tables. */
161 static const struct ind_table_init ind_table_init[] = {
162         {
163                 .max_size = -1u, /* Superseded by HW limitations. */
164                 .hash_types =
165                         1 << HASH_RXQ_TCPV4 |
166                         1 << HASH_RXQ_UDPV4 |
167                         1 << HASH_RXQ_IPV4 |
168                         1 << HASH_RXQ_TCPV6 |
169                         1 << HASH_RXQ_UDPV6 |
170                         1 << HASH_RXQ_IPV6 |
171                         0,
172                 .hash_types_n = 6,
173         },
174         {
175                 .max_size = 1,
176                 .hash_types = 1 << HASH_RXQ_ETH,
177                 .hash_types_n = 1,
178         },
179 };
180
181 #define IND_TABLE_INIT_N RTE_DIM(ind_table_init)
182
183 /* Default RSS hash key also used for ConnectX-3. */
184 uint8_t rss_hash_default_key[] = {
185         0x2c, 0xc6, 0x81, 0xd1,
186         0x5b, 0xdb, 0xf4, 0xf7,
187         0xfc, 0xa2, 0x83, 0x19,
188         0xdb, 0x1a, 0x3e, 0x94,
189         0x6b, 0x9e, 0x38, 0xd9,
190         0x2c, 0x9c, 0x03, 0xd1,
191         0xad, 0x99, 0x44, 0xa7,
192         0xd9, 0x56, 0x3d, 0x59,
193         0x06, 0x3c, 0x25, 0xf3,
194         0xfc, 0x1f, 0xdc, 0x2a,
195 };
196
197 /* Length of the default RSS hash key. */
198 const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key);
199
200 /**
201  * Populate flow steering rule for a given hash RX queue type using
202  * information from hash_rxq_init[]. Nothing is written to flow_attr when
203  * flow_attr_size is not large enough, but the required size is still returned.
204  *
205  * @param priv
206  *   Pointer to private structure.
207  * @param[out] flow_attr
208  *   Pointer to flow attribute structure to fill. Note that the allocated
209  *   area must be larger and large enough to hold all flow specifications.
210  * @param flow_attr_size
211  *   Entire size of flow_attr and trailing room for flow specifications.
212  * @param type
213  *   Hash RX queue type to use for flow steering rule.
214  *
215  * @return
216  *   Total size of the flow attribute buffer. No errors are defined.
217  */
218 size_t
219 priv_flow_attr(struct priv *priv, struct ibv_flow_attr *flow_attr,
220                size_t flow_attr_size, enum hash_rxq_type type)
221 {
222         size_t offset = sizeof(*flow_attr);
223         const struct hash_rxq_init *init = &hash_rxq_init[type];
224
225         assert(priv != NULL);
226         assert((size_t)type < RTE_DIM(hash_rxq_init));
227         do {
228                 offset += init->flow_spec.hdr.size;
229                 init = init->underlayer;
230         } while (init != NULL);
231         if (offset > flow_attr_size)
232                 return offset;
233         flow_attr_size = offset;
234         init = &hash_rxq_init[type];
235         *flow_attr = (struct ibv_flow_attr){
236                 .type = IBV_FLOW_ATTR_NORMAL,
237                 /* Priorities < 3 are reserved for flow director. */
238                 .priority = init->flow_priority + 3,
239                 .num_of_specs = 0,
240                 .port = priv->port,
241                 .flags = 0,
242         };
243         do {
244                 offset -= init->flow_spec.hdr.size;
245                 memcpy((void *)((uintptr_t)flow_attr + offset),
246                        &init->flow_spec,
247                        init->flow_spec.hdr.size);
248                 ++flow_attr->num_of_specs;
249                 init = init->underlayer;
250         } while (init != NULL);
251         return flow_attr_size;
252 }
253
254 /**
255  * Convert hash type position in indirection table initializer to
256  * hash RX queue type.
257  *
258  * @param table
259  *   Indirection table initializer.
260  * @param pos
261  *   Hash type position.
262  *
263  * @return
264  *   Hash RX queue type.
265  */
266 static enum hash_rxq_type
267 hash_rxq_type_from_pos(const struct ind_table_init *table, unsigned int pos)
268 {
269         enum hash_rxq_type type = HASH_RXQ_TCPV4;
270
271         assert(pos < table->hash_types_n);
272         do {
273                 if ((table->hash_types & (1 << type)) && (pos-- == 0))
274                         break;
275                 ++type;
276         } while (1);
277         return type;
278 }
279
280 /**
281  * Filter out disabled hash RX queue types from ind_table_init[].
282  *
283  * @param priv
284  *   Pointer to private structure.
285  * @param[out] table
286  *   Output table.
287  *
288  * @return
289  *   Number of table entries.
290  */
291 static unsigned int
292 priv_make_ind_table_init(struct priv *priv,
293                          struct ind_table_init (*table)[IND_TABLE_INIT_N])
294 {
295         uint64_t rss_hf;
296         unsigned int i;
297         unsigned int j;
298         unsigned int table_n = 0;
299         /* Mandatory to receive frames not handled by normal hash RX queues. */
300         unsigned int hash_types_sup = 1 << HASH_RXQ_ETH;
301
302         rss_hf = priv->rss_hf;
303         /* Process other protocols only if more than one queue. */
304         if (priv->rxqs_n > 1)
305                 for (i = 0; (i != hash_rxq_init_n); ++i)
306                         if (rss_hf & hash_rxq_init[i].dpdk_rss_hf)
307                                 hash_types_sup |= (1 << i);
308
309         /* Filter out entries whose protocols are not in the set. */
310         for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) {
311                 unsigned int nb;
312                 unsigned int h;
313
314                 /* j is increased only if the table has valid protocols. */
315                 assert(j <= i);
316                 (*table)[j] = ind_table_init[i];
317                 (*table)[j].hash_types &= hash_types_sup;
318                 for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h)
319                         if (((*table)[j].hash_types >> h) & 0x1)
320                                 ++nb;
321                 (*table)[i].hash_types_n = nb;
322                 if (nb) {
323                         ++table_n;
324                         ++j;
325                 }
326         }
327         return table_n;
328 }
329
330 /**
331  * Initialize hash RX queues and indirection table.
332  *
333  * @param priv
334  *   Pointer to private structure.
335  *
336  * @return
337  *   0 on success, errno value on failure.
338  */
339 int
340 priv_create_hash_rxqs(struct priv *priv)
341 {
342         struct ibv_wq *wqs[priv->reta_idx_n];
343         struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
344         unsigned int ind_tables_n =
345                 priv_make_ind_table_init(priv, &ind_table_init);
346         unsigned int hash_rxqs_n = 0;
347         struct hash_rxq (*hash_rxqs)[] = NULL;
348         struct ibv_rwq_ind_table *(*ind_tables)[] = NULL;
349         unsigned int i;
350         unsigned int j;
351         unsigned int k;
352         int err = 0;
353
354         assert(priv->ind_tables == NULL);
355         assert(priv->ind_tables_n == 0);
356         assert(priv->hash_rxqs == NULL);
357         assert(priv->hash_rxqs_n == 0);
358         assert(priv->pd != NULL);
359         assert(priv->ctx != NULL);
360         if (priv->isolated)
361                 return 0;
362         if (priv->rxqs_n == 0)
363                 return EINVAL;
364         assert(priv->rxqs != NULL);
365         if (ind_tables_n == 0) {
366                 ERROR("all hash RX queue types have been filtered out,"
367                       " indirection table cannot be created");
368                 return EINVAL;
369         }
370         if (priv->rxqs_n & (priv->rxqs_n - 1)) {
371                 INFO("%u RX queues are configured, consider rounding this"
372                      " number to the next power of two for better balancing",
373                      priv->rxqs_n);
374                 DEBUG("indirection table extended to assume %u WQs",
375                       priv->reta_idx_n);
376         }
377         for (i = 0; (i != priv->reta_idx_n); ++i) {
378                 struct mlx5_rxq_ctrl *rxq_ctrl;
379
380                 rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
381                                         struct mlx5_rxq_ctrl, rxq);
382                 wqs[i] = rxq_ctrl->ibv->wq;
383         }
384         /* Get number of hash RX queues to configure. */
385         for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
386                 hash_rxqs_n += ind_table_init[i].hash_types_n;
387         DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables",
388               hash_rxqs_n, priv->rxqs_n, ind_tables_n);
389         /* Create indirection tables. */
390         ind_tables = rte_calloc(__func__, ind_tables_n,
391                                 sizeof((*ind_tables)[0]), 0);
392         if (ind_tables == NULL) {
393                 err = ENOMEM;
394                 ERROR("cannot allocate indirection tables container: %s",
395                       strerror(err));
396                 goto error;
397         }
398         for (i = 0; (i != ind_tables_n); ++i) {
399                 struct ibv_rwq_ind_table_init_attr ind_init_attr = {
400                         .log_ind_tbl_size = 0, /* Set below. */
401                         .ind_tbl = wqs,
402                         .comp_mask = 0,
403                 };
404                 unsigned int ind_tbl_size = ind_table_init[i].max_size;
405                 struct ibv_rwq_ind_table *ind_table;
406
407                 if (priv->reta_idx_n < ind_tbl_size)
408                         ind_tbl_size = priv->reta_idx_n;
409                 ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
410                 errno = 0;
411                 ind_table = ibv_create_rwq_ind_table(priv->ctx,
412                                                      &ind_init_attr);
413                 if (ind_table != NULL) {
414                         (*ind_tables)[i] = ind_table;
415                         continue;
416                 }
417                 /* Not clear whether errno is set. */
418                 err = (errno ? errno : EINVAL);
419                 ERROR("RX indirection table creation failed with error %d: %s",
420                       err, strerror(err));
421                 goto error;
422         }
423         /* Allocate array that holds hash RX queues and related data. */
424         hash_rxqs = rte_calloc(__func__, hash_rxqs_n,
425                                sizeof((*hash_rxqs)[0]), 0);
426         if (hash_rxqs == NULL) {
427                 err = ENOMEM;
428                 ERROR("cannot allocate hash RX queues container: %s",
429                       strerror(err));
430                 goto error;
431         }
432         for (i = 0, j = 0, k = 0;
433              ((i != hash_rxqs_n) && (j != ind_tables_n));
434              ++i) {
435                 struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
436                 enum hash_rxq_type type =
437                         hash_rxq_type_from_pos(&ind_table_init[j], k);
438                 struct rte_eth_rss_conf *priv_rss_conf =
439                         (*priv->rss_conf)[type];
440                 struct ibv_rx_hash_conf hash_conf = {
441                         .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
442                         .rx_hash_key_len = (priv_rss_conf ?
443                                             priv_rss_conf->rss_key_len :
444                                             rss_hash_default_key_len),
445                         .rx_hash_key = (priv_rss_conf ?
446                                         priv_rss_conf->rss_key :
447                                         rss_hash_default_key),
448                         .rx_hash_fields_mask = hash_rxq_init[type].hash_fields,
449                 };
450                 struct ibv_qp_init_attr_ex qp_init_attr = {
451                         .qp_type = IBV_QPT_RAW_PACKET,
452                         .comp_mask = (IBV_QP_INIT_ATTR_PD |
453                                       IBV_QP_INIT_ATTR_IND_TABLE |
454                                       IBV_QP_INIT_ATTR_RX_HASH),
455                         .rx_hash_conf = hash_conf,
456                         .rwq_ind_tbl = (*ind_tables)[j],
457                         .pd = priv->pd,
458                 };
459
460                 DEBUG("using indirection table %u for hash RX queue %u type %d",
461                       j, i, type);
462                 *hash_rxq = (struct hash_rxq){
463                         .priv = priv,
464                         .qp = ibv_create_qp_ex(priv->ctx, &qp_init_attr),
465                         .type = type,
466                 };
467                 if (hash_rxq->qp == NULL) {
468                         err = (errno ? errno : EINVAL);
469                         ERROR("Hash RX QP creation failure: %s",
470                               strerror(err));
471                         goto error;
472                 }
473                 if (++k < ind_table_init[j].hash_types_n)
474                         continue;
475                 /* Switch to the next indirection table and reset hash RX
476                  * queue type array index. */
477                 ++j;
478                 k = 0;
479         }
480         priv->ind_tables = ind_tables;
481         priv->ind_tables_n = ind_tables_n;
482         priv->hash_rxqs = hash_rxqs;
483         priv->hash_rxqs_n = hash_rxqs_n;
484         assert(err == 0);
485         return 0;
486 error:
487         if (hash_rxqs != NULL) {
488                 for (i = 0; (i != hash_rxqs_n); ++i) {
489                         struct ibv_qp *qp = (*hash_rxqs)[i].qp;
490
491                         if (qp == NULL)
492                                 continue;
493                         claim_zero(ibv_destroy_qp(qp));
494                 }
495                 rte_free(hash_rxqs);
496         }
497         if (ind_tables != NULL) {
498                 for (j = 0; (j != ind_tables_n); ++j) {
499                         struct ibv_rwq_ind_table *ind_table =
500                                 (*ind_tables)[j];
501
502                         if (ind_table == NULL)
503                                 continue;
504                         claim_zero(ibv_destroy_rwq_ind_table(ind_table));
505                 }
506                 rte_free(ind_tables);
507         }
508         return err;
509 }
510
511 /**
512  * Clean up hash RX queues and indirection table.
513  *
514  * @param priv
515  *   Pointer to private structure.
516  */
517 void
518 priv_destroy_hash_rxqs(struct priv *priv)
519 {
520         unsigned int i;
521
522         DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n);
523         if (priv->hash_rxqs_n == 0) {
524                 assert(priv->hash_rxqs == NULL);
525                 assert(priv->ind_tables == NULL);
526                 return;
527         }
528         for (i = 0; (i != priv->hash_rxqs_n); ++i) {
529                 struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
530                 unsigned int j, k;
531
532                 assert(hash_rxq->priv == priv);
533                 assert(hash_rxq->qp != NULL);
534                 /* Also check that there are no remaining flows. */
535                 for (j = 0; (j != RTE_DIM(hash_rxq->special_flow)); ++j)
536                         for (k = 0;
537                              (k != RTE_DIM(hash_rxq->special_flow[j]));
538                              ++k)
539                                 assert(hash_rxq->special_flow[j][k] == NULL);
540                 for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
541                         for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
542                                 assert(hash_rxq->mac_flow[j][k] == NULL);
543                 claim_zero(ibv_destroy_qp(hash_rxq->qp));
544         }
545         priv->hash_rxqs_n = 0;
546         rte_free(priv->hash_rxqs);
547         priv->hash_rxqs = NULL;
548         for (i = 0; (i != priv->ind_tables_n); ++i) {
549                 struct ibv_rwq_ind_table *ind_table =
550                         (*priv->ind_tables)[i];
551
552                 assert(ind_table != NULL);
553                 claim_zero(ibv_destroy_rwq_ind_table(ind_table));
554         }
555         priv->ind_tables_n = 0;
556         rte_free(priv->ind_tables);
557         priv->ind_tables = NULL;
558 }
559
560 /**
561  * Check whether a given flow type is allowed.
562  *
563  * @param priv
564  *   Pointer to private structure.
565  * @param type
566  *   Flow type to check.
567  *
568  * @return
569  *   Nonzero if the given flow type is allowed.
570  */
571 int
572 priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
573 {
574         /* Only FLOW_TYPE_PROMISC is allowed when promiscuous mode
575          * has been requested. */
576         if (priv->promisc_req)
577                 return type == HASH_RXQ_FLOW_TYPE_PROMISC;
578         switch (type) {
579         case HASH_RXQ_FLOW_TYPE_PROMISC:
580                 return !!priv->promisc_req;
581         case HASH_RXQ_FLOW_TYPE_ALLMULTI:
582                 return !!priv->allmulti_req;
583         case HASH_RXQ_FLOW_TYPE_BROADCAST:
584         case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
585                 /* If allmulti is enabled, broadcast and ipv6multi
586                  * are unnecessary. */
587                 return !priv->allmulti_req;
588         case HASH_RXQ_FLOW_TYPE_MAC:
589                 return 1;
590         default:
591                 /* Unsupported flow type is not allowed. */
592                 return 0;
593         }
594         return 0;
595 }
596
597 /**
598  * Automatically enable/disable flows according to configuration.
599  *
600  * @param priv
601  *   Private structure.
602  *
603  * @return
604  *   0 on success, errno value on failure.
605  */
606 int
607 priv_rehash_flows(struct priv *priv)
608 {
609         size_t i;
610
611         for (i = 0; i != RTE_DIM((*priv->hash_rxqs)[0].special_flow); ++i)
612                 if (!priv_allow_flow_type(priv, i)) {
613                         priv_special_flow_disable(priv, i);
614                 } else {
615                         int ret = priv_special_flow_enable(priv, i);
616
617                         if (ret)
618                                 return ret;
619                 }
620         if (priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
621                 return priv_mac_addrs_enable(priv);
622         priv_mac_addrs_disable(priv);
623         return 0;
624 }
625
626 /**
627  * Allocate RX queue elements.
628  *
629  * @param rxq_ctrl
630  *   Pointer to RX queue structure.
631  *
632  * @return
633  *   0 on success, errno value on failure.
634  */
635 int
636 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
637 {
638         const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
639         unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
640         unsigned int i;
641         int ret = 0;
642
643         /* Iterate on segments. */
644         for (i = 0; (i != elts_n); ++i) {
645                 struct rte_mbuf *buf;
646
647                 buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
648                 if (buf == NULL) {
649                         ERROR("%p: empty mbuf pool", (void *)rxq_ctrl);
650                         ret = ENOMEM;
651                         goto error;
652                 }
653                 /* Headroom is reserved by rte_pktmbuf_alloc(). */
654                 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
655                 /* Buffer is supposed to be empty. */
656                 assert(rte_pktmbuf_data_len(buf) == 0);
657                 assert(rte_pktmbuf_pkt_len(buf) == 0);
658                 assert(!buf->next);
659                 /* Only the first segment keeps headroom. */
660                 if (i % sges_n)
661                         SET_DATA_OFF(buf, 0);
662                 PORT(buf) = rxq_ctrl->rxq.port_id;
663                 DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
664                 PKT_LEN(buf) = DATA_LEN(buf);
665                 NB_SEGS(buf) = 1;
666                 (*rxq_ctrl->rxq.elts)[i] = buf;
667         }
668         /* If Rx vector is activated. */
669         if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
670                 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
671                 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
672                 int j;
673
674                 /* Initialize default rearm_data for vPMD. */
675                 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
676                 rte_mbuf_refcnt_set(mbuf_init, 1);
677                 mbuf_init->nb_segs = 1;
678                 mbuf_init->port = rxq->port_id;
679                 /*
680                  * prevent compiler reordering:
681                  * rearm_data covers previous fields.
682                  */
683                 rte_compiler_barrier();
684                 rxq->mbuf_initializer =
685                         *(uint64_t *)&mbuf_init->rearm_data;
686                 /* Padding with a fake mbuf for vectorized Rx. */
687                 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
688                         (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
689         }
690         DEBUG("%p: allocated and configured %u segments (max %u packets)",
691               (void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n));
692         assert(ret == 0);
693         return 0;
694 error:
695         elts_n = i;
696         for (i = 0; (i != elts_n); ++i) {
697                 if ((*rxq_ctrl->rxq.elts)[i] != NULL)
698                         rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
699                 (*rxq_ctrl->rxq.elts)[i] = NULL;
700         }
701         DEBUG("%p: failed, freed everything", (void *)rxq_ctrl);
702         assert(ret > 0);
703         return ret;
704 }
705
706 /**
707  * Free RX queue elements.
708  *
709  * @param rxq_ctrl
710  *   Pointer to RX queue structure.
711  */
712 static void
713 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
714 {
715         struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
716         const uint16_t q_n = (1 << rxq->elts_n);
717         const uint16_t q_mask = q_n - 1;
718         uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
719         uint16_t i;
720
721         DEBUG("%p: freeing WRs", (void *)rxq_ctrl);
722         if (rxq->elts == NULL)
723                 return;
724         /**
725          * Some mbuf in the Ring belongs to the application.  They cannot be
726          * freed.
727          */
728         if (rxq_check_vec_support(rxq) > 0) {
729                 for (i = 0; i < used; ++i)
730                         (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
731                 rxq->rq_pi = rxq->rq_ci;
732         }
733         for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
734                 if ((*rxq->elts)[i] != NULL)
735                         rte_pktmbuf_free_seg((*rxq->elts)[i]);
736                 (*rxq->elts)[i] = NULL;
737         }
738 }
739
740 /**
741  * Clean up a RX queue.
742  *
743  * Destroy objects, free allocated memory and reset the structure for reuse.
744  *
745  * @param rxq_ctrl
746  *   Pointer to RX queue structure.
747  */
748 void
749 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
750 {
751         DEBUG("cleaning up %p", (void *)rxq_ctrl);
752         if (rxq_ctrl->ibv)
753                 mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
754         memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
755 }
756
757 /**
758  *
759  * @param dev
760  *   Pointer to Ethernet device structure.
761  * @param idx
762  *   RX queue index.
763  * @param desc
764  *   Number of descriptors to configure in queue.
765  * @param socket
766  *   NUMA socket on which memory must be allocated.
767  * @param[in] conf
768  *   Thresholds parameters.
769  * @param mp
770  *   Memory pool for buffer allocations.
771  *
772  * @return
773  *   0 on success, negative errno value on failure.
774  */
775 int
776 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
777                     unsigned int socket, const struct rte_eth_rxconf *conf,
778                     struct rte_mempool *mp)
779 {
780         struct priv *priv = dev->data->dev_private;
781         struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
782         struct mlx5_rxq_ctrl *rxq_ctrl =
783                 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
784         int ret = 0;
785
786         (void)conf;
787         if (mlx5_is_secondary())
788                 return -E_RTE_SECONDARY;
789         priv_lock(priv);
790         if (!rte_is_power_of_2(desc)) {
791                 desc = 1 << log2above(desc);
792                 WARN("%p: increased number of descriptors in RX queue %u"
793                      " to the next power of two (%d)",
794                      (void *)dev, idx, desc);
795         }
796         DEBUG("%p: configuring queue %u for %u descriptors",
797               (void *)dev, idx, desc);
798         if (idx >= priv->rxqs_n) {
799                 ERROR("%p: queue index out of range (%u >= %u)",
800                       (void *)dev, idx, priv->rxqs_n);
801                 priv_unlock(priv);
802                 return -EOVERFLOW;
803         }
804         if (!mlx5_priv_rxq_releasable(priv, idx)) {
805                 ret = EBUSY;
806                 ERROR("%p: unable to release queue index %u",
807                       (void *)dev, idx);
808                 goto out;
809         }
810         mlx5_priv_rxq_release(priv, idx);
811         rxq_ctrl = mlx5_priv_rxq_new(priv, idx, desc, socket, mp);
812         if (!rxq_ctrl) {
813                 ERROR("%p: unable to allocate queue index %u",
814                       (void *)dev, idx);
815                 ret = ENOMEM;
816                 goto out;
817         }
818         DEBUG("%p: adding RX queue %p to list",
819               (void *)dev, (void *)rxq_ctrl);
820         (*priv->rxqs)[idx] = &rxq_ctrl->rxq;
821 out:
822         priv_unlock(priv);
823         return -ret;
824 }
825
826 /**
827  * DPDK callback to release a RX queue.
828  *
829  * @param dpdk_rxq
830  *   Generic RX queue pointer.
831  */
832 void
833 mlx5_rx_queue_release(void *dpdk_rxq)
834 {
835         struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
836         struct mlx5_rxq_ctrl *rxq_ctrl;
837         struct priv *priv;
838
839         if (mlx5_is_secondary())
840                 return;
841
842         if (rxq == NULL)
843                 return;
844         rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
845         priv = rxq_ctrl->priv;
846         priv_lock(priv);
847         if (!mlx5_priv_rxq_releasable(priv, rxq_ctrl->rxq.stats.idx))
848                 rte_panic("Rx queue %p is still used by a flow and cannot be"
849                           " removed\n", (void *)rxq_ctrl);
850         mlx5_priv_rxq_release(priv, rxq_ctrl->rxq.stats.idx);
851         priv_unlock(priv);
852 }
853
854 /**
855  * Allocate queue vector and fill epoll fd list for Rx interrupts.
856  *
857  * @param priv
858  *   Pointer to private structure.
859  *
860  * @return
861  *   0 on success, negative on failure.
862  */
863 int
864 priv_rx_intr_vec_enable(struct priv *priv)
865 {
866         unsigned int i;
867         unsigned int rxqs_n = priv->rxqs_n;
868         unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
869         unsigned int count = 0;
870         struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
871
872         assert(!mlx5_is_secondary());
873         if (!priv->dev->data->dev_conf.intr_conf.rxq)
874                 return 0;
875         priv_rx_intr_vec_disable(priv);
876         intr_handle->intr_vec = malloc(sizeof(intr_handle->intr_vec[rxqs_n]));
877         if (intr_handle->intr_vec == NULL) {
878                 ERROR("failed to allocate memory for interrupt vector,"
879                       " Rx interrupts will not be supported");
880                 return -ENOMEM;
881         }
882         intr_handle->type = RTE_INTR_HANDLE_EXT;
883         for (i = 0; i != n; ++i) {
884                 /* This rxq ibv must not be released in this function. */
885                 struct mlx5_rxq_ibv *rxq_ibv = mlx5_priv_rxq_ibv_get(priv, i);
886                 int fd;
887                 int flags;
888                 int rc;
889
890                 /* Skip queues that cannot request interrupts. */
891                 if (!rxq_ibv || !rxq_ibv->channel) {
892                         /* Use invalid intr_vec[] index to disable entry. */
893                         intr_handle->intr_vec[i] =
894                                 RTE_INTR_VEC_RXTX_OFFSET +
895                                 RTE_MAX_RXTX_INTR_VEC_ID;
896                         continue;
897                 }
898                 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
899                         ERROR("too many Rx queues for interrupt vector size"
900                               " (%d), Rx interrupts cannot be enabled",
901                               RTE_MAX_RXTX_INTR_VEC_ID);
902                         priv_rx_intr_vec_disable(priv);
903                         return -1;
904                 }
905                 fd = rxq_ibv->channel->fd;
906                 flags = fcntl(fd, F_GETFL);
907                 rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
908                 if (rc < 0) {
909                         ERROR("failed to make Rx interrupt file descriptor"
910                               " %d non-blocking for queue index %d", fd, i);
911                         priv_rx_intr_vec_disable(priv);
912                         return -1;
913                 }
914                 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count;
915                 intr_handle->efds[count] = fd;
916                 count++;
917         }
918         if (!count)
919                 priv_rx_intr_vec_disable(priv);
920         else
921                 intr_handle->nb_efd = count;
922         return 0;
923 }
924
925 /**
926  * Clean up Rx interrupts handler.
927  *
928  * @param priv
929  *   Pointer to private structure.
930  */
931 void
932 priv_rx_intr_vec_disable(struct priv *priv)
933 {
934         struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
935         unsigned int i;
936         unsigned int rxqs_n = priv->rxqs_n;
937         unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
938
939         if (!priv->dev->data->dev_conf.intr_conf.rxq)
940                 return;
941         for (i = 0; i != n; ++i) {
942                 struct mlx5_rxq_ctrl *rxq_ctrl;
943                 struct mlx5_rxq_data *rxq_data;
944
945                 if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
946                     RTE_MAX_RXTX_INTR_VEC_ID)
947                         continue;
948                 /**
949                  * Need to access directly the queue to release the reference
950                  * kept in priv_rx_intr_vec_enable().
951                  */
952                 rxq_data = (*priv->rxqs)[i];
953                 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
954                 mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
955         }
956         rte_intr_free_epoll_fd(intr_handle);
957         free(intr_handle->intr_vec);
958         intr_handle->nb_efd = 0;
959         intr_handle->intr_vec = NULL;
960 }
961
962 /**
963  *  MLX5 CQ notification .
964  *
965  *  @param rxq
966  *     Pointer to receive queue structure.
967  *  @param sq_n_rxq
968  *     Sequence number per receive queue .
969  */
970 static inline void
971 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
972 {
973         int sq_n = 0;
974         uint32_t doorbell_hi;
975         uint64_t doorbell;
976         void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL;
977
978         sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK;
979         doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK);
980         doorbell = (uint64_t)doorbell_hi << 32;
981         doorbell |=  rxq->cqn;
982         rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
983         rte_wmb();
984         rte_write64(rte_cpu_to_be_64(doorbell), cq_db_reg);
985 }
986
987 /**
988  * DPDK callback for Rx queue interrupt enable.
989  *
990  * @param dev
991  *   Pointer to Ethernet device structure.
992  * @param rx_queue_id
993  *   Rx queue number.
994  *
995  * @return
996  *   0 on success, negative on failure.
997  */
998 int
999 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1000 {
1001         struct priv *priv = mlx5_get_priv(dev);
1002         struct mlx5_rxq_data *rxq_data;
1003         struct mlx5_rxq_ctrl *rxq_ctrl;
1004         int ret = 0;
1005
1006         priv_lock(priv);
1007         rxq_data = (*priv->rxqs)[rx_queue_id];
1008         if (!rxq_data) {
1009                 ret = EINVAL;
1010                 goto exit;
1011         }
1012         rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1013         if (rxq_ctrl->irq) {
1014                 struct mlx5_rxq_ibv *rxq_ibv;
1015
1016                 rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
1017                 if (!rxq_ibv) {
1018                         ret = EINVAL;
1019                         goto exit;
1020                 }
1021                 mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
1022                 mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
1023         }
1024 exit:
1025         priv_unlock(priv);
1026         if (ret)
1027                 WARN("unable to arm interrupt on rx queue %d", rx_queue_id);
1028         return -ret;
1029 }
1030
1031 /**
1032  * DPDK callback for Rx queue interrupt disable.
1033  *
1034  * @param dev
1035  *   Pointer to Ethernet device structure.
1036  * @param rx_queue_id
1037  *   Rx queue number.
1038  *
1039  * @return
1040  *   0 on success, negative on failure.
1041  */
1042 int
1043 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1044 {
1045         struct priv *priv = mlx5_get_priv(dev);
1046         struct mlx5_rxq_data *rxq_data;
1047         struct mlx5_rxq_ctrl *rxq_ctrl;
1048         struct mlx5_rxq_ibv *rxq_ibv = NULL;
1049         struct ibv_cq *ev_cq;
1050         void *ev_ctx;
1051         int ret = 0;
1052
1053         priv_lock(priv);
1054         rxq_data = (*priv->rxqs)[rx_queue_id];
1055         if (!rxq_data) {
1056                 ret = EINVAL;
1057                 goto exit;
1058         }
1059         rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1060         if (!rxq_ctrl->irq)
1061                 goto exit;
1062         rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
1063         if (!rxq_ibv) {
1064                 ret = EINVAL;
1065                 goto exit;
1066         }
1067         ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx);
1068         if (ret || ev_cq != rxq_ibv->cq) {
1069                 ret = EINVAL;
1070                 goto exit;
1071         }
1072         rxq_data->cq_arm_sn++;
1073         ibv_ack_cq_events(rxq_ibv->cq, 1);
1074 exit:
1075         if (rxq_ibv)
1076                 mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
1077         priv_unlock(priv);
1078         if (ret)
1079                 WARN("unable to disable interrupt on rx queue %d",
1080                      rx_queue_id);
1081         return -ret;
1082 }
1083
1084 /**
1085  * Create the Rx queue Verbs object.
1086  *
1087  * @param priv
1088  *   Pointer to private structure.
1089  * @param idx
1090  *   Queue index in DPDK Rx queue array
1091  *
1092  * @return
1093  *   The Verbs object initialised if it can be created.
1094  */
1095 struct mlx5_rxq_ibv*
1096 mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
1097 {
1098         struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1099         struct mlx5_rxq_ctrl *rxq_ctrl =
1100                 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1101         struct ibv_wq_attr mod;
1102         union {
1103                 struct ibv_cq_init_attr_ex cq;
1104                 struct ibv_wq_init_attr wq;
1105                 struct ibv_cq_ex cq_attr;
1106         } attr;
1107         unsigned int cqe_n = (1 << rxq_data->elts_n) - 1;
1108         struct mlx5_rxq_ibv *tmpl;
1109         struct mlx5dv_cq cq_info;
1110         struct mlx5dv_rwq rwq;
1111         unsigned int i;
1112         int ret = 0;
1113         struct mlx5dv_obj obj;
1114
1115         assert(rxq_data);
1116         assert(!rxq_ctrl->ibv);
1117         tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
1118                                  rxq_ctrl->socket);
1119         if (!tmpl) {
1120                 ERROR("%p: cannot allocate verbs resources",
1121                        (void *)rxq_ctrl);
1122                 goto error;
1123         }
1124         tmpl->rxq_ctrl = rxq_ctrl;
1125         /* Use the entire RX mempool as the memory region. */
1126         tmpl->mr = priv_mr_get(priv, rxq_data->mp);
1127         if (!tmpl->mr) {
1128                 tmpl->mr = priv_mr_new(priv, rxq_data->mp);
1129                 if (!tmpl->mr) {
1130                         ERROR("%p: MR creation failure", (void *)rxq_ctrl);
1131                         goto error;
1132                 }
1133         }
1134         if (rxq_ctrl->irq) {
1135                 tmpl->channel = ibv_create_comp_channel(priv->ctx);
1136                 if (!tmpl->channel) {
1137                         ERROR("%p: Comp Channel creation failure",
1138                               (void *)rxq_ctrl);
1139                         goto error;
1140                 }
1141         }
1142         attr.cq = (struct ibv_cq_init_attr_ex){
1143                 .comp_mask = 0,
1144         };
1145         if (priv->cqe_comp) {
1146                 attr.cq.comp_mask |= IBV_CQ_INIT_ATTR_MASK_FLAGS;
1147                 attr.cq.flags |= MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
1148                 /*
1149                  * For vectorized Rx, it must not be doubled in order to
1150                  * make cq_ci and rq_ci aligned.
1151                  */
1152                 if (rxq_check_vec_support(rxq_data) < 0)
1153                         cqe_n *= 2;
1154         }
1155         tmpl->cq = ibv_create_cq(priv->ctx, cqe_n, NULL, tmpl->channel, 0);
1156         if (tmpl->cq == NULL) {
1157                 ERROR("%p: CQ creation failure", (void *)rxq_ctrl);
1158                 goto error;
1159         }
1160         DEBUG("priv->device_attr.max_qp_wr is %d",
1161               priv->device_attr.orig_attr.max_qp_wr);
1162         DEBUG("priv->device_attr.max_sge is %d",
1163               priv->device_attr.orig_attr.max_sge);
1164         attr.wq = (struct ibv_wq_init_attr){
1165                 .wq_context = NULL, /* Could be useful in the future. */
1166                 .wq_type = IBV_WQT_RQ,
1167                 /* Max number of outstanding WRs. */
1168                 .max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n,
1169                 /* Max number of scatter/gather elements in a WR. */
1170                 .max_sge = 1 << rxq_data->sges_n,
1171                 .pd = priv->pd,
1172                 .cq = tmpl->cq,
1173                 .comp_mask =
1174                         IBV_WQ_FLAGS_CVLAN_STRIPPING |
1175                         0,
1176                 .create_flags = (rxq_data->vlan_strip ?
1177                                  IBV_WQ_FLAGS_CVLAN_STRIPPING :
1178                                  0),
1179         };
1180         /* By default, FCS (CRC) is stripped by hardware. */
1181         if (rxq_data->crc_present) {
1182                 attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
1183                 attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1184         }
1185 #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
1186         if (priv->hw_padding) {
1187                 attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
1188                 attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1189         }
1190 #endif
1191         tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq);
1192         if (tmpl->wq == NULL) {
1193                 ERROR("%p: WQ creation failure", (void *)rxq_ctrl);
1194                 goto error;
1195         }
1196         /*
1197          * Make sure number of WRs*SGEs match expectations since a queue
1198          * cannot allocate more than "desc" buffers.
1199          */
1200         if (((int)attr.wq.max_wr !=
1201              ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) ||
1202             ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) {
1203                 ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
1204                       (void *)rxq_ctrl,
1205                       ((1 << rxq_data->elts_n) >> rxq_data->sges_n),
1206                       (1 << rxq_data->sges_n),
1207                       attr.wq.max_wr, attr.wq.max_sge);
1208                 goto error;
1209         }
1210         /* Change queue state to ready. */
1211         mod = (struct ibv_wq_attr){
1212                 .attr_mask = IBV_WQ_ATTR_STATE,
1213                 .wq_state = IBV_WQS_RDY,
1214         };
1215         ret = ibv_modify_wq(tmpl->wq, &mod);
1216         if (ret) {
1217                 ERROR("%p: WQ state to IBV_WQS_RDY failed",
1218                       (void *)rxq_ctrl);
1219                 goto error;
1220         }
1221         obj.cq.in = tmpl->cq;
1222         obj.cq.out = &cq_info;
1223         obj.rwq.in = tmpl->wq;
1224         obj.rwq.out = &rwq;
1225         ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ);
1226         if (ret != 0)
1227                 goto error;
1228         if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
1229                 ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
1230                       "it should be set to %u", RTE_CACHE_LINE_SIZE);
1231                 goto error;
1232         }
1233         /* Fill the rings. */
1234         rxq_data->wqes = (volatile struct mlx5_wqe_data_seg (*)[])
1235                 (uintptr_t)rwq.buf;
1236         for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
1237                 struct rte_mbuf *buf = (*rxq_data->elts)[i];
1238                 volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
1239
1240                 /* scat->addr must be able to store a pointer. */
1241                 assert(sizeof(scat->addr) >= sizeof(uintptr_t));
1242                 *scat = (struct mlx5_wqe_data_seg){
1243                         .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
1244                                                                   uintptr_t)),
1245                         .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
1246                         .lkey = tmpl->mr->lkey,
1247                 };
1248         }
1249         rxq_data->rq_db = rwq.dbrec;
1250         rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
1251         rxq_data->cq_ci = 0;
1252         rxq_data->rq_ci = 0;
1253         rxq_data->rq_pi = 0;
1254         rxq_data->zip = (struct rxq_zip){
1255                 .ai = 0,
1256         };
1257         rxq_data->cq_db = cq_info.dbrec;
1258         rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
1259         /* Update doorbell counter. */
1260         rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n;
1261         rte_wmb();
1262         *rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci);
1263         DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
1264         rte_atomic32_inc(&tmpl->refcnt);
1265         DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1266               (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
1267         LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
1268         return tmpl;
1269 error:
1270         if (tmpl->wq)
1271                 claim_zero(ibv_destroy_wq(tmpl->wq));
1272         if (tmpl->cq)
1273                 claim_zero(ibv_destroy_cq(tmpl->cq));
1274         if (tmpl->channel)
1275                 claim_zero(ibv_destroy_comp_channel(tmpl->channel));
1276         if (tmpl->mr)
1277                 priv_mr_release(priv, tmpl->mr);
1278         return NULL;
1279 }
1280
1281 /**
1282  * Get an Rx queue Verbs object.
1283  *
1284  * @param priv
1285  *   Pointer to private structure.
1286  * @param idx
1287  *   Queue index in DPDK Rx queue array
1288  *
1289  * @return
1290  *   The Verbs object if it exists.
1291  */
1292 struct mlx5_rxq_ibv*
1293 mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
1294 {
1295         struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1296         struct mlx5_rxq_ctrl *rxq_ctrl;
1297
1298         if (idx >= priv->rxqs_n)
1299                 return NULL;
1300         if (!rxq_data)
1301                 return NULL;
1302         rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1303         if (rxq_ctrl->ibv) {
1304                 priv_mr_get(priv, rxq_data->mp);
1305                 rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
1306                 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1307                       (void *)rxq_ctrl->ibv,
1308                       rte_atomic32_read(&rxq_ctrl->ibv->refcnt));
1309         }
1310         return rxq_ctrl->ibv;
1311 }
1312
1313 /**
1314  * Release an Rx verbs queue object.
1315  *
1316  * @param priv
1317  *   Pointer to private structure.
1318  * @param rxq_ibv
1319  *   Verbs Rx queue object.
1320  *
1321  * @return
1322  *   0 on success, errno value on failure.
1323  */
1324 int
1325 mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
1326 {
1327         int ret;
1328
1329         assert(rxq_ibv);
1330         assert(rxq_ibv->wq);
1331         assert(rxq_ibv->cq);
1332         assert(rxq_ibv->mr);
1333         ret = priv_mr_release(priv, rxq_ibv->mr);
1334         if (!ret)
1335                 rxq_ibv->mr = NULL;
1336         DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1337               (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt));
1338         if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) {
1339                 rxq_free_elts(rxq_ibv->rxq_ctrl);
1340                 claim_zero(ibv_destroy_wq(rxq_ibv->wq));
1341                 claim_zero(ibv_destroy_cq(rxq_ibv->cq));
1342                 if (rxq_ibv->channel)
1343                         claim_zero(ibv_destroy_comp_channel(rxq_ibv->channel));
1344                 LIST_REMOVE(rxq_ibv, next);
1345                 rte_free(rxq_ibv);
1346                 return 0;
1347         }
1348         return EBUSY;
1349 }
1350
1351 /**
1352  * Verify the Verbs Rx queue list is empty
1353  *
1354  * @param priv
1355  *  Pointer to private structure.
1356  *
1357  * @return the number of object not released.
1358  */
1359 int
1360 mlx5_priv_rxq_ibv_verify(struct priv *priv)
1361 {
1362         int ret = 0;
1363         struct mlx5_rxq_ibv *rxq_ibv;
1364
1365         LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
1366                 DEBUG("%p: Verbs Rx queue %p still referenced", (void *)priv,
1367                       (void *)rxq_ibv);
1368                 ++ret;
1369         }
1370         return ret;
1371 }
1372
1373 /**
1374  * Return true if a single reference exists on the object.
1375  *
1376  * @param priv
1377  *   Pointer to private structure.
1378  * @param rxq_ibv
1379  *   Verbs Rx queue object.
1380  */
1381 int
1382 mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
1383 {
1384         (void)priv;
1385         assert(rxq_ibv);
1386         return (rte_atomic32_read(&rxq_ibv->refcnt) == 1);
1387 }
1388
1389 /**
1390  * Create a DPDK Rx queue.
1391  *
1392  * @param priv
1393  *   Pointer to private structure.
1394  * @param idx
1395  *   TX queue index.
1396  * @param desc
1397  *   Number of descriptors to configure in queue.
1398  * @param socket
1399  *   NUMA socket on which memory must be allocated.
1400  *
1401  * @return
1402  *   A DPDK queue object on success.
1403  */
1404 struct mlx5_rxq_ctrl*
1405 mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
1406                   unsigned int socket, struct rte_mempool *mp)
1407 {
1408         struct rte_eth_dev *dev = priv->dev;
1409         struct mlx5_rxq_ctrl *tmpl;
1410         const uint16_t desc_n =
1411                 desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
1412         unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
1413
1414         tmpl = rte_calloc_socket("RXQ", 1,
1415                                  sizeof(*tmpl) +
1416                                  desc_n * sizeof(struct rte_mbuf *),
1417                                  0, socket);
1418         if (!tmpl)
1419                 return NULL;
1420         if (priv->dev->data->dev_conf.intr_conf.rxq)
1421                 tmpl->irq = 1;
1422         /* Enable scattered packets support for this queue if necessary. */
1423         assert(mb_len >= RTE_PKTMBUF_HEADROOM);
1424         if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
1425             (mb_len - RTE_PKTMBUF_HEADROOM)) {
1426                 tmpl->rxq.sges_n = 0;
1427         } else if (dev->data->dev_conf.rxmode.enable_scatter) {
1428                 unsigned int size =
1429                         RTE_PKTMBUF_HEADROOM +
1430                         dev->data->dev_conf.rxmode.max_rx_pkt_len;
1431                 unsigned int sges_n;
1432
1433                 /*
1434                  * Determine the number of SGEs needed for a full packet
1435                  * and round it to the next power of two.
1436                  */
1437                 sges_n = log2above((size / mb_len) + !!(size % mb_len));
1438                 tmpl->rxq.sges_n = sges_n;
1439                 /* Make sure rxq.sges_n did not overflow. */
1440                 size = mb_len * (1 << tmpl->rxq.sges_n);
1441                 size -= RTE_PKTMBUF_HEADROOM;
1442                 if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
1443                         ERROR("%p: too many SGEs (%u) needed to handle"
1444                               " requested maximum packet size %u",
1445                               (void *)dev,
1446                               1 << sges_n,
1447                               dev->data->dev_conf.rxmode.max_rx_pkt_len);
1448                         goto error;
1449                 }
1450         } else {
1451                 WARN("%p: the requested maximum Rx packet size (%u) is"
1452                      " larger than a single mbuf (%u) and scattered"
1453                      " mode has not been requested",
1454                      (void *)dev,
1455                      dev->data->dev_conf.rxmode.max_rx_pkt_len,
1456                      mb_len - RTE_PKTMBUF_HEADROOM);
1457         }
1458         DEBUG("%p: maximum number of segments per packet: %u",
1459               (void *)dev, 1 << tmpl->rxq.sges_n);
1460         if (desc % (1 << tmpl->rxq.sges_n)) {
1461                 ERROR("%p: number of RX queue descriptors (%u) is not a"
1462                       " multiple of SGEs per packet (%u)",
1463                       (void *)dev,
1464                       desc,
1465                       1 << tmpl->rxq.sges_n);
1466                 goto error;
1467         }
1468         /* Toggle RX checksum offload if hardware supports it. */
1469         if (priv->hw_csum)
1470                 tmpl->rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
1471         if (priv->hw_csum_l2tun)
1472                 tmpl->rxq.csum_l2tun =
1473                         !!dev->data->dev_conf.rxmode.hw_ip_checksum;
1474         /* Configure VLAN stripping. */
1475         tmpl->rxq.vlan_strip = (priv->hw_vlan_strip &&
1476                                !!dev->data->dev_conf.rxmode.hw_vlan_strip);
1477         /* By default, FCS (CRC) is stripped by hardware. */
1478         if (dev->data->dev_conf.rxmode.hw_strip_crc) {
1479                 tmpl->rxq.crc_present = 0;
1480         } else if (priv->hw_fcs_strip) {
1481                 tmpl->rxq.crc_present = 1;
1482         } else {
1483                 WARN("%p: CRC stripping has been disabled but will still"
1484                      " be performed by hardware, make sure MLNX_OFED and"
1485                      " firmware are up to date",
1486                      (void *)dev);
1487                 tmpl->rxq.crc_present = 0;
1488         }
1489         DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
1490               " incoming frames to hide it",
1491               (void *)dev,
1492               tmpl->rxq.crc_present ? "disabled" : "enabled",
1493               tmpl->rxq.crc_present << 2);
1494         /* Save port ID. */
1495         tmpl->rxq.rss_hash = priv->rxqs_n > 1;
1496         tmpl->rxq.port_id = dev->data->port_id;
1497         tmpl->priv = priv;
1498         tmpl->rxq.mp = mp;
1499         tmpl->rxq.stats.idx = idx;
1500         tmpl->rxq.elts_n = log2above(desc);
1501         tmpl->rxq.elts =
1502                 (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
1503         rte_atomic32_inc(&tmpl->refcnt);
1504         DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1505               (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
1506         LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
1507         return tmpl;
1508 error:
1509         rte_free(tmpl);
1510         return NULL;
1511 }
1512
1513 /**
1514  * Get a Rx queue.
1515  *
1516  * @param priv
1517  *   Pointer to private structure.
1518  * @param idx
1519  *   TX queue index.
1520  *
1521  * @return
1522  *   A pointer to the queue if it exists.
1523  */
1524 struct mlx5_rxq_ctrl*
1525 mlx5_priv_rxq_get(struct priv *priv, uint16_t idx)
1526 {
1527         struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
1528
1529         if ((*priv->rxqs)[idx]) {
1530                 rxq_ctrl = container_of((*priv->rxqs)[idx],
1531                                         struct mlx5_rxq_ctrl,
1532                                         rxq);
1533
1534                 mlx5_priv_rxq_ibv_get(priv, idx);
1535                 rte_atomic32_inc(&rxq_ctrl->refcnt);
1536                 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1537                       (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
1538         }
1539         return rxq_ctrl;
1540 }
1541
1542 /**
1543  * Release a Rx queue.
1544  *
1545  * @param priv
1546  *   Pointer to private structure.
1547  * @param idx
1548  *   TX queue index.
1549  *
1550  * @return
1551  *   0 on success, errno value on failure.
1552  */
1553 int
1554 mlx5_priv_rxq_release(struct priv *priv, uint16_t idx)
1555 {
1556         struct mlx5_rxq_ctrl *rxq_ctrl;
1557
1558         if (!(*priv->rxqs)[idx])
1559                 return 0;
1560         rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
1561         assert(rxq_ctrl->priv);
1562         if (rxq_ctrl->ibv) {
1563                 int ret;
1564
1565                 ret = mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
1566                 if (!ret)
1567                         rxq_ctrl->ibv = NULL;
1568         }
1569         DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1570               (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
1571         if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
1572                 LIST_REMOVE(rxq_ctrl, next);
1573                 rte_free(rxq_ctrl);
1574                 (*priv->rxqs)[idx] = NULL;
1575                 return 0;
1576         }
1577         return EBUSY;
1578 }
1579
1580 /**
1581  * Verify if the queue can be released.
1582  *
1583  * @param priv
1584  *   Pointer to private structure.
1585  * @param idx
1586  *   TX queue index.
1587  *
1588  * @return
1589  *   1 if the queue can be released.
1590  */
1591 int
1592 mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx)
1593 {
1594         struct mlx5_rxq_ctrl *rxq_ctrl;
1595
1596         if (!(*priv->rxqs)[idx])
1597                 return -1;
1598         rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
1599         return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
1600 }
1601
1602 /**
1603  * Verify the Rx Queue list is empty
1604  *
1605  * @param priv
1606  *  Pointer to private structure.
1607  *
1608  * @return the number of object not released.
1609  */
1610 int
1611 mlx5_priv_rxq_verify(struct priv *priv)
1612 {
1613         struct mlx5_rxq_ctrl *rxq_ctrl;
1614         int ret = 0;
1615
1616         LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
1617                 DEBUG("%p: Rx Queue %p still referenced", (void *)priv,
1618                       (void *)rxq_ctrl);
1619                 ++ret;
1620         }
1621         return ret;
1622 }
1623
1624 /**
1625  * Create an indirection table.
1626  *
1627  * @param priv
1628  *   Pointer to private structure.
1629  * @param queues
1630  *   Queues entering in the indirection table.
1631  * @param queues_n
1632  *   Number of queues in the array.
1633  *
1634  * @return
1635  *   A new indirection table.
1636  */
1637 struct mlx5_ind_table_ibv*
1638 mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[],
1639                             uint16_t queues_n)
1640 {
1641         struct mlx5_ind_table_ibv *ind_tbl;
1642         const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
1643                 log2above(queues_n) :
1644                 priv->ind_table_max_size;
1645         struct ibv_wq *wq[1 << wq_n];
1646         unsigned int i;
1647         unsigned int j;
1648
1649         ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) +
1650                              queues_n * sizeof(uint16_t), 0);
1651         if (!ind_tbl)
1652                 return NULL;
1653         for (i = 0; i != queues_n; ++i) {
1654                 struct mlx5_rxq_ctrl *rxq =
1655                         mlx5_priv_rxq_get(priv, queues[i]);
1656
1657                 if (!rxq)
1658                         goto error;
1659                 wq[i] = rxq->ibv->wq;
1660                 ind_tbl->queues[i] = queues[i];
1661         }
1662         ind_tbl->queues_n = queues_n;
1663         /* Finalise indirection table. */
1664         for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j)
1665                 wq[i] = wq[j];
1666         ind_tbl->ind_table = ibv_create_rwq_ind_table(
1667                 priv->ctx,
1668                 &(struct ibv_rwq_ind_table_init_attr){
1669                         .log_ind_tbl_size = wq_n,
1670                         .ind_tbl = wq,
1671                         .comp_mask = 0,
1672                 });
1673         if (!ind_tbl->ind_table)
1674                 goto error;
1675         rte_atomic32_inc(&ind_tbl->refcnt);
1676         LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
1677         DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1678               (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1679         return ind_tbl;
1680 error:
1681         rte_free(ind_tbl);
1682         DEBUG("%p cannot create indirection table", (void *)priv);
1683         return NULL;
1684 }
1685
1686 /**
1687  * Get an indirection table.
1688  *
1689  * @param priv
1690  *   Pointer to private structure.
1691  * @param queues
1692  *   Queues entering in the indirection table.
1693  * @param queues_n
1694  *   Number of queues in the array.
1695  *
1696  * @return
1697  *   An indirection table if found.
1698  */
1699 struct mlx5_ind_table_ibv*
1700 mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[],
1701                             uint16_t queues_n)
1702 {
1703         struct mlx5_ind_table_ibv *ind_tbl;
1704
1705         LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
1706                 if ((ind_tbl->queues_n == queues_n) &&
1707                     (memcmp(ind_tbl->queues, queues,
1708                             ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
1709                      == 0))
1710                         break;
1711         }
1712         if (ind_tbl) {
1713                 unsigned int i;
1714
1715                 rte_atomic32_inc(&ind_tbl->refcnt);
1716                 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1717                       (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1718                 for (i = 0; i != ind_tbl->queues_n; ++i)
1719                         mlx5_priv_rxq_get(priv, ind_tbl->queues[i]);
1720         }
1721         return ind_tbl;
1722 }
1723
1724 /**
1725  * Release an indirection table.
1726  *
1727  * @param priv
1728  *   Pointer to private structure.
1729  * @param ind_table
1730  *   Indirection table to release.
1731  *
1732  * @return
1733  *   0 on success, errno value on failure.
1734  */
1735 int
1736 mlx5_priv_ind_table_ibv_release(struct priv *priv,
1737                                 struct mlx5_ind_table_ibv *ind_tbl)
1738 {
1739         unsigned int i;
1740
1741         DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1742               (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1743         if (rte_atomic32_dec_and_test(&ind_tbl->refcnt))
1744                 claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table));
1745         for (i = 0; i != ind_tbl->queues_n; ++i)
1746                 claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i]));
1747         if (!rte_atomic32_read(&ind_tbl->refcnt)) {
1748                 LIST_REMOVE(ind_tbl, next);
1749                 rte_free(ind_tbl);
1750                 return 0;
1751         }
1752         return EBUSY;
1753 }
1754
1755 /**
1756  * Verify the Rx Queue list is empty
1757  *
1758  * @param priv
1759  *  Pointer to private structure.
1760  *
1761  * @return the number of object not released.
1762  */
1763 int
1764 mlx5_priv_ind_table_ibv_verify(struct priv *priv)
1765 {
1766         struct mlx5_ind_table_ibv *ind_tbl;
1767         int ret = 0;
1768
1769         LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
1770                 DEBUG("%p: Verbs indirection table %p still referenced",
1771                       (void *)priv, (void *)ind_tbl);
1772                 ++ret;
1773         }
1774         return ret;
1775 }
1776
1777 /**
1778  * Create an Rx Hash queue.
1779  *
1780  * @param priv
1781  *   Pointer to private structure.
1782  * @param rss_key
1783  *   RSS key for the Rx hash queue.
1784  * @param rss_key_len
1785  *   RSS key length.
1786  * @param hash_fields
1787  *   Verbs protocol hash field to make the RSS on.
1788  * @param queues
1789  *   Queues entering in hash queue.
1790  * @param queues_n
1791  *   Number of queues.
1792  *
1793  * @return
1794  *   An hash Rx queue on success.
1795  */
1796 struct mlx5_hrxq*
1797 mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
1798                    uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
1799 {
1800         struct mlx5_hrxq *hrxq;
1801         struct mlx5_ind_table_ibv *ind_tbl;
1802         struct ibv_qp *qp;
1803
1804         ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
1805         if (!ind_tbl)
1806                 ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n);
1807         if (!ind_tbl)
1808                 return NULL;
1809         qp = ibv_create_qp_ex(
1810                 priv->ctx,
1811                 &(struct ibv_qp_init_attr_ex){
1812                         .qp_type = IBV_QPT_RAW_PACKET,
1813                         .comp_mask =
1814                                 IBV_QP_INIT_ATTR_PD |
1815                                 IBV_QP_INIT_ATTR_IND_TABLE |
1816                                 IBV_QP_INIT_ATTR_RX_HASH,
1817                         .rx_hash_conf = (struct ibv_rx_hash_conf){
1818                                 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
1819                                 .rx_hash_key_len = rss_key_len,
1820                                 .rx_hash_key = rss_key,
1821                                 .rx_hash_fields_mask = hash_fields,
1822                         },
1823                         .rwq_ind_tbl = ind_tbl->ind_table,
1824                         .pd = priv->pd,
1825                 });
1826         if (!qp)
1827                 goto error;
1828         hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0);
1829         if (!hrxq)
1830                 goto error;
1831         hrxq->ind_table = ind_tbl;
1832         hrxq->qp = qp;
1833         hrxq->rss_key_len = rss_key_len;
1834         hrxq->hash_fields = hash_fields;
1835         memcpy(hrxq->rss_key, rss_key, rss_key_len);
1836         rte_atomic32_inc(&hrxq->refcnt);
1837         LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
1838         DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1839               (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1840         return hrxq;
1841 error:
1842         mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
1843         if (qp)
1844                 claim_zero(ibv_destroy_qp(qp));
1845         return NULL;
1846 }
1847
1848 /**
1849  * Get an Rx Hash queue.
1850  *
1851  * @param priv
1852  *   Pointer to private structure.
1853  * @param rss_conf
1854  *   RSS configuration for the Rx hash queue.
1855  * @param queues
1856  *   Queues entering in hash queue.
1857  * @param queues_n
1858  *   Number of queues.
1859  *
1860  * @return
1861  *   An hash Rx queue on success.
1862  */
1863 struct mlx5_hrxq*
1864 mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
1865                    uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
1866 {
1867         struct mlx5_hrxq *hrxq;
1868
1869         LIST_FOREACH(hrxq, &priv->hrxqs, next) {
1870                 struct mlx5_ind_table_ibv *ind_tbl;
1871
1872                 if (hrxq->rss_key_len != rss_key_len)
1873                         continue;
1874                 if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
1875                         continue;
1876                 if (hrxq->hash_fields != hash_fields)
1877                         continue;
1878                 ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
1879                 if (!ind_tbl)
1880                         continue;
1881                 if (ind_tbl != hrxq->ind_table) {
1882                         mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
1883                         continue;
1884                 }
1885                 rte_atomic32_inc(&hrxq->refcnt);
1886                 DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1887                       (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1888                 return hrxq;
1889         }
1890         return NULL;
1891 }
1892
1893 /**
1894  * Release the hash Rx queue.
1895  *
1896  * @param priv
1897  *   Pointer to private structure.
1898  * @param hrxq
1899  *   Pointer to Hash Rx queue to release.
1900  *
1901  * @return
1902  *   0 on success, errno value on failure.
1903  */
1904 int
1905 mlx5_priv_hrxq_release(struct priv *priv, struct mlx5_hrxq *hrxq)
1906 {
1907         DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1908               (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1909         if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
1910                 claim_zero(ibv_destroy_qp(hrxq->qp));
1911                 mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table);
1912                 LIST_REMOVE(hrxq, next);
1913                 rte_free(hrxq);
1914                 return 0;
1915         }
1916         claim_nonzero(mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table));
1917         return EBUSY;
1918 }
1919
1920 /**
1921  * Verify the Rx Queue list is empty
1922  *
1923  * @param priv
1924  *  Pointer to private structure.
1925  *
1926  * @return the number of object not released.
1927  */
1928 int
1929 mlx5_priv_hrxq_ibv_verify(struct priv *priv)
1930 {
1931         struct mlx5_hrxq *hrxq;
1932         int ret = 0;
1933
1934         LIST_FOREACH(hrxq, &priv->hrxqs, next) {
1935                 DEBUG("%p: Verbs Hash Rx queue %p still referenced",
1936                       (void *)priv, (void *)hrxq);
1937                 ++ret;
1938         }
1939         return ret;
1940 }