net/mlx5: use flow to enable promiscuous mode
[dpdk.git] / drivers / net / mlx5 / mlx5_rxq.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2015 6WIND S.A.
5  *   Copyright 2015 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stddef.h>
35 #include <assert.h>
36 #include <errno.h>
37 #include <string.h>
38 #include <stdint.h>
39 #include <fcntl.h>
40 #include <sys/queue.h>
41
42 /* Verbs header. */
43 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
44 #ifdef PEDANTIC
45 #pragma GCC diagnostic ignored "-Wpedantic"
46 #endif
47 #include <infiniband/verbs.h>
48 #include <infiniband/mlx5dv.h>
49 #ifdef PEDANTIC
50 #pragma GCC diagnostic error "-Wpedantic"
51 #endif
52
53 #include <rte_mbuf.h>
54 #include <rte_malloc.h>
55 #include <rte_ethdev.h>
56 #include <rte_common.h>
57 #include <rte_interrupts.h>
58 #include <rte_debug.h>
59 #include <rte_io.h>
60
61 #include "mlx5.h"
62 #include "mlx5_rxtx.h"
63 #include "mlx5_utils.h"
64 #include "mlx5_autoconf.h"
65 #include "mlx5_defs.h"
66
67 /* Initialization data for hash RX queues. */
68 const struct hash_rxq_init hash_rxq_init[] = {
69         [HASH_RXQ_TCPV4] = {
70                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
71                                 IBV_RX_HASH_DST_IPV4 |
72                                 IBV_RX_HASH_SRC_PORT_TCP |
73                                 IBV_RX_HASH_DST_PORT_TCP),
74                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
75                 .flow_priority = 0,
76                 .flow_spec.tcp_udp = {
77                         .type = IBV_FLOW_SPEC_TCP,
78                         .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
79                 },
80                 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
81         },
82         [HASH_RXQ_UDPV4] = {
83                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
84                                 IBV_RX_HASH_DST_IPV4 |
85                                 IBV_RX_HASH_SRC_PORT_UDP |
86                                 IBV_RX_HASH_DST_PORT_UDP),
87                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
88                 .flow_priority = 0,
89                 .flow_spec.tcp_udp = {
90                         .type = IBV_FLOW_SPEC_UDP,
91                         .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
92                 },
93                 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
94         },
95         [HASH_RXQ_IPV4] = {
96                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
97                                 IBV_RX_HASH_DST_IPV4),
98                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
99                                 ETH_RSS_FRAG_IPV4),
100                 .flow_priority = 1,
101                 .flow_spec.ipv4 = {
102                         .type = IBV_FLOW_SPEC_IPV4,
103                         .size = sizeof(hash_rxq_init[0].flow_spec.ipv4),
104                 },
105                 .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
106         },
107         [HASH_RXQ_TCPV6] = {
108                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
109                                 IBV_RX_HASH_DST_IPV6 |
110                                 IBV_RX_HASH_SRC_PORT_TCP |
111                                 IBV_RX_HASH_DST_PORT_TCP),
112                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
113                 .flow_priority = 0,
114                 .flow_spec.tcp_udp = {
115                         .type = IBV_FLOW_SPEC_TCP,
116                         .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
117                 },
118                 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
119         },
120         [HASH_RXQ_UDPV6] = {
121                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
122                                 IBV_RX_HASH_DST_IPV6 |
123                                 IBV_RX_HASH_SRC_PORT_UDP |
124                                 IBV_RX_HASH_DST_PORT_UDP),
125                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
126                 .flow_priority = 0,
127                 .flow_spec.tcp_udp = {
128                         .type = IBV_FLOW_SPEC_UDP,
129                         .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
130                 },
131                 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
132         },
133         [HASH_RXQ_IPV6] = {
134                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
135                                 IBV_RX_HASH_DST_IPV6),
136                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
137                                 ETH_RSS_FRAG_IPV6),
138                 .flow_priority = 1,
139                 .flow_spec.ipv6 = {
140                         .type = IBV_FLOW_SPEC_IPV6,
141                         .size = sizeof(hash_rxq_init[0].flow_spec.ipv6),
142                 },
143                 .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
144         },
145         [HASH_RXQ_ETH] = {
146                 .hash_fields = 0,
147                 .dpdk_rss_hf = 0,
148                 .flow_priority = 2,
149                 .flow_spec.eth = {
150                         .type = IBV_FLOW_SPEC_ETH,
151                         .size = sizeof(hash_rxq_init[0].flow_spec.eth),
152                 },
153                 .underlayer = NULL,
154         },
155 };
156
157 /* Number of entries in hash_rxq_init[]. */
158 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
159
160 /* Initialization data for hash RX queue indirection tables. */
161 static const struct ind_table_init ind_table_init[] = {
162         {
163                 .max_size = -1u, /* Superseded by HW limitations. */
164                 .hash_types =
165                         1 << HASH_RXQ_TCPV4 |
166                         1 << HASH_RXQ_UDPV4 |
167                         1 << HASH_RXQ_IPV4 |
168                         1 << HASH_RXQ_TCPV6 |
169                         1 << HASH_RXQ_UDPV6 |
170                         1 << HASH_RXQ_IPV6 |
171                         0,
172                 .hash_types_n = 6,
173         },
174         {
175                 .max_size = 1,
176                 .hash_types = 1 << HASH_RXQ_ETH,
177                 .hash_types_n = 1,
178         },
179 };
180
181 #define IND_TABLE_INIT_N RTE_DIM(ind_table_init)
182
183 /* Default RSS hash key also used for ConnectX-3. */
184 uint8_t rss_hash_default_key[] = {
185         0x2c, 0xc6, 0x81, 0xd1,
186         0x5b, 0xdb, 0xf4, 0xf7,
187         0xfc, 0xa2, 0x83, 0x19,
188         0xdb, 0x1a, 0x3e, 0x94,
189         0x6b, 0x9e, 0x38, 0xd9,
190         0x2c, 0x9c, 0x03, 0xd1,
191         0xad, 0x99, 0x44, 0xa7,
192         0xd9, 0x56, 0x3d, 0x59,
193         0x06, 0x3c, 0x25, 0xf3,
194         0xfc, 0x1f, 0xdc, 0x2a,
195 };
196
197 /* Length of the default RSS hash key. */
198 const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key);
199
200 /**
201  * Populate flow steering rule for a given hash RX queue type using
202  * information from hash_rxq_init[]. Nothing is written to flow_attr when
203  * flow_attr_size is not large enough, but the required size is still returned.
204  *
205  * @param priv
206  *   Pointer to private structure.
207  * @param[out] flow_attr
208  *   Pointer to flow attribute structure to fill. Note that the allocated
209  *   area must be larger and large enough to hold all flow specifications.
210  * @param flow_attr_size
211  *   Entire size of flow_attr and trailing room for flow specifications.
212  * @param type
213  *   Hash RX queue type to use for flow steering rule.
214  *
215  * @return
216  *   Total size of the flow attribute buffer. No errors are defined.
217  */
218 size_t
219 priv_flow_attr(struct priv *priv, struct ibv_flow_attr *flow_attr,
220                size_t flow_attr_size, enum hash_rxq_type type)
221 {
222         size_t offset = sizeof(*flow_attr);
223         const struct hash_rxq_init *init = &hash_rxq_init[type];
224
225         assert(priv != NULL);
226         assert((size_t)type < RTE_DIM(hash_rxq_init));
227         do {
228                 offset += init->flow_spec.hdr.size;
229                 init = init->underlayer;
230         } while (init != NULL);
231         if (offset > flow_attr_size)
232                 return offset;
233         flow_attr_size = offset;
234         init = &hash_rxq_init[type];
235         *flow_attr = (struct ibv_flow_attr){
236                 .type = IBV_FLOW_ATTR_NORMAL,
237                 /* Priorities < 3 are reserved for flow director. */
238                 .priority = init->flow_priority + 3,
239                 .num_of_specs = 0,
240                 .port = priv->port,
241                 .flags = 0,
242         };
243         do {
244                 offset -= init->flow_spec.hdr.size;
245                 memcpy((void *)((uintptr_t)flow_attr + offset),
246                        &init->flow_spec,
247                        init->flow_spec.hdr.size);
248                 ++flow_attr->num_of_specs;
249                 init = init->underlayer;
250         } while (init != NULL);
251         return flow_attr_size;
252 }
253
254 /**
255  * Convert hash type position in indirection table initializer to
256  * hash RX queue type.
257  *
258  * @param table
259  *   Indirection table initializer.
260  * @param pos
261  *   Hash type position.
262  *
263  * @return
264  *   Hash RX queue type.
265  */
266 static enum hash_rxq_type
267 hash_rxq_type_from_pos(const struct ind_table_init *table, unsigned int pos)
268 {
269         enum hash_rxq_type type = HASH_RXQ_TCPV4;
270
271         assert(pos < table->hash_types_n);
272         do {
273                 if ((table->hash_types & (1 << type)) && (pos-- == 0))
274                         break;
275                 ++type;
276         } while (1);
277         return type;
278 }
279
280 /**
281  * Filter out disabled hash RX queue types from ind_table_init[].
282  *
283  * @param priv
284  *   Pointer to private structure.
285  * @param[out] table
286  *   Output table.
287  *
288  * @return
289  *   Number of table entries.
290  */
291 static unsigned int
292 priv_make_ind_table_init(struct priv *priv,
293                          struct ind_table_init (*table)[IND_TABLE_INIT_N])
294 {
295         uint64_t rss_hf;
296         unsigned int i;
297         unsigned int j;
298         unsigned int table_n = 0;
299         /* Mandatory to receive frames not handled by normal hash RX queues. */
300         unsigned int hash_types_sup = 1 << HASH_RXQ_ETH;
301
302         rss_hf = priv->rss_hf;
303         /* Process other protocols only if more than one queue. */
304         if (priv->rxqs_n > 1)
305                 for (i = 0; (i != hash_rxq_init_n); ++i)
306                         if (rss_hf & hash_rxq_init[i].dpdk_rss_hf)
307                                 hash_types_sup |= (1 << i);
308
309         /* Filter out entries whose protocols are not in the set. */
310         for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) {
311                 unsigned int nb;
312                 unsigned int h;
313
314                 /* j is increased only if the table has valid protocols. */
315                 assert(j <= i);
316                 (*table)[j] = ind_table_init[i];
317                 (*table)[j].hash_types &= hash_types_sup;
318                 for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h)
319                         if (((*table)[j].hash_types >> h) & 0x1)
320                                 ++nb;
321                 (*table)[i].hash_types_n = nb;
322                 if (nb) {
323                         ++table_n;
324                         ++j;
325                 }
326         }
327         return table_n;
328 }
329
330 /**
331  * Initialize hash RX queues and indirection table.
332  *
333  * @param priv
334  *   Pointer to private structure.
335  *
336  * @return
337  *   0 on success, errno value on failure.
338  */
339 int
340 priv_create_hash_rxqs(struct priv *priv)
341 {
342         struct ibv_wq *wqs[priv->reta_idx_n];
343         struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
344         unsigned int ind_tables_n =
345                 priv_make_ind_table_init(priv, &ind_table_init);
346         unsigned int hash_rxqs_n = 0;
347         struct hash_rxq (*hash_rxqs)[] = NULL;
348         struct ibv_rwq_ind_table *(*ind_tables)[] = NULL;
349         unsigned int i;
350         unsigned int j;
351         unsigned int k;
352         int err = 0;
353
354         assert(priv->ind_tables == NULL);
355         assert(priv->ind_tables_n == 0);
356         assert(priv->hash_rxqs == NULL);
357         assert(priv->hash_rxqs_n == 0);
358         assert(priv->pd != NULL);
359         assert(priv->ctx != NULL);
360         if (priv->isolated)
361                 return 0;
362         if (priv->rxqs_n == 0)
363                 return EINVAL;
364         assert(priv->rxqs != NULL);
365         if (ind_tables_n == 0) {
366                 ERROR("all hash RX queue types have been filtered out,"
367                       " indirection table cannot be created");
368                 return EINVAL;
369         }
370         if (priv->rxqs_n & (priv->rxqs_n - 1)) {
371                 INFO("%u RX queues are configured, consider rounding this"
372                      " number to the next power of two for better balancing",
373                      priv->rxqs_n);
374                 DEBUG("indirection table extended to assume %u WQs",
375                       priv->reta_idx_n);
376         }
377         for (i = 0; (i != priv->reta_idx_n); ++i) {
378                 struct mlx5_rxq_ctrl *rxq_ctrl;
379
380                 rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
381                                         struct mlx5_rxq_ctrl, rxq);
382                 wqs[i] = rxq_ctrl->ibv->wq;
383         }
384         /* Get number of hash RX queues to configure. */
385         for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
386                 hash_rxqs_n += ind_table_init[i].hash_types_n;
387         DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables",
388               hash_rxqs_n, priv->rxqs_n, ind_tables_n);
389         /* Create indirection tables. */
390         ind_tables = rte_calloc(__func__, ind_tables_n,
391                                 sizeof((*ind_tables)[0]), 0);
392         if (ind_tables == NULL) {
393                 err = ENOMEM;
394                 ERROR("cannot allocate indirection tables container: %s",
395                       strerror(err));
396                 goto error;
397         }
398         for (i = 0; (i != ind_tables_n); ++i) {
399                 struct ibv_rwq_ind_table_init_attr ind_init_attr = {
400                         .log_ind_tbl_size = 0, /* Set below. */
401                         .ind_tbl = wqs,
402                         .comp_mask = 0,
403                 };
404                 unsigned int ind_tbl_size = ind_table_init[i].max_size;
405                 struct ibv_rwq_ind_table *ind_table;
406
407                 if (priv->reta_idx_n < ind_tbl_size)
408                         ind_tbl_size = priv->reta_idx_n;
409                 ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
410                 errno = 0;
411                 ind_table = ibv_create_rwq_ind_table(priv->ctx,
412                                                      &ind_init_attr);
413                 if (ind_table != NULL) {
414                         (*ind_tables)[i] = ind_table;
415                         continue;
416                 }
417                 /* Not clear whether errno is set. */
418                 err = (errno ? errno : EINVAL);
419                 ERROR("RX indirection table creation failed with error %d: %s",
420                       err, strerror(err));
421                 goto error;
422         }
423         /* Allocate array that holds hash RX queues and related data. */
424         hash_rxqs = rte_calloc(__func__, hash_rxqs_n,
425                                sizeof((*hash_rxqs)[0]), 0);
426         if (hash_rxqs == NULL) {
427                 err = ENOMEM;
428                 ERROR("cannot allocate hash RX queues container: %s",
429                       strerror(err));
430                 goto error;
431         }
432         for (i = 0, j = 0, k = 0;
433              ((i != hash_rxqs_n) && (j != ind_tables_n));
434              ++i) {
435                 struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
436                 enum hash_rxq_type type =
437                         hash_rxq_type_from_pos(&ind_table_init[j], k);
438                 struct rte_eth_rss_conf *priv_rss_conf =
439                         (*priv->rss_conf)[type];
440                 struct ibv_rx_hash_conf hash_conf = {
441                         .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
442                         .rx_hash_key_len = (priv_rss_conf ?
443                                             priv_rss_conf->rss_key_len :
444                                             rss_hash_default_key_len),
445                         .rx_hash_key = (priv_rss_conf ?
446                                         priv_rss_conf->rss_key :
447                                         rss_hash_default_key),
448                         .rx_hash_fields_mask = hash_rxq_init[type].hash_fields,
449                 };
450                 struct ibv_qp_init_attr_ex qp_init_attr = {
451                         .qp_type = IBV_QPT_RAW_PACKET,
452                         .comp_mask = (IBV_QP_INIT_ATTR_PD |
453                                       IBV_QP_INIT_ATTR_IND_TABLE |
454                                       IBV_QP_INIT_ATTR_RX_HASH),
455                         .rx_hash_conf = hash_conf,
456                         .rwq_ind_tbl = (*ind_tables)[j],
457                         .pd = priv->pd,
458                 };
459
460                 DEBUG("using indirection table %u for hash RX queue %u type %d",
461                       j, i, type);
462                 *hash_rxq = (struct hash_rxq){
463                         .priv = priv,
464                         .qp = ibv_create_qp_ex(priv->ctx, &qp_init_attr),
465                         .type = type,
466                 };
467                 if (hash_rxq->qp == NULL) {
468                         err = (errno ? errno : EINVAL);
469                         ERROR("Hash RX QP creation failure: %s",
470                               strerror(err));
471                         goto error;
472                 }
473                 if (++k < ind_table_init[j].hash_types_n)
474                         continue;
475                 /* Switch to the next indirection table and reset hash RX
476                  * queue type array index. */
477                 ++j;
478                 k = 0;
479         }
480         priv->ind_tables = ind_tables;
481         priv->ind_tables_n = ind_tables_n;
482         priv->hash_rxqs = hash_rxqs;
483         priv->hash_rxqs_n = hash_rxqs_n;
484         assert(err == 0);
485         return 0;
486 error:
487         if (hash_rxqs != NULL) {
488                 for (i = 0; (i != hash_rxqs_n); ++i) {
489                         struct ibv_qp *qp = (*hash_rxqs)[i].qp;
490
491                         if (qp == NULL)
492                                 continue;
493                         claim_zero(ibv_destroy_qp(qp));
494                 }
495                 rte_free(hash_rxqs);
496         }
497         if (ind_tables != NULL) {
498                 for (j = 0; (j != ind_tables_n); ++j) {
499                         struct ibv_rwq_ind_table *ind_table =
500                                 (*ind_tables)[j];
501
502                         if (ind_table == NULL)
503                                 continue;
504                         claim_zero(ibv_destroy_rwq_ind_table(ind_table));
505                 }
506                 rte_free(ind_tables);
507         }
508         return err;
509 }
510
511 /**
512  * Clean up hash RX queues and indirection table.
513  *
514  * @param priv
515  *   Pointer to private structure.
516  */
517 void
518 priv_destroy_hash_rxqs(struct priv *priv)
519 {
520         unsigned int i;
521
522         DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n);
523         if (priv->hash_rxqs_n == 0) {
524                 assert(priv->hash_rxqs == NULL);
525                 assert(priv->ind_tables == NULL);
526                 return;
527         }
528         for (i = 0; (i != priv->hash_rxqs_n); ++i) {
529                 struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
530                 unsigned int j, k;
531
532                 assert(hash_rxq->priv == priv);
533                 assert(hash_rxq->qp != NULL);
534                 /* Also check that there are no remaining flows. */
535                 for (j = 0; (j != RTE_DIM(hash_rxq->special_flow)); ++j)
536                         for (k = 0;
537                              (k != RTE_DIM(hash_rxq->special_flow[j]));
538                              ++k)
539                                 assert(hash_rxq->special_flow[j][k] == NULL);
540                 for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
541                         for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
542                                 assert(hash_rxq->mac_flow[j][k] == NULL);
543                 claim_zero(ibv_destroy_qp(hash_rxq->qp));
544         }
545         priv->hash_rxqs_n = 0;
546         rte_free(priv->hash_rxqs);
547         priv->hash_rxqs = NULL;
548         for (i = 0; (i != priv->ind_tables_n); ++i) {
549                 struct ibv_rwq_ind_table *ind_table =
550                         (*priv->ind_tables)[i];
551
552                 assert(ind_table != NULL);
553                 claim_zero(ibv_destroy_rwq_ind_table(ind_table));
554         }
555         priv->ind_tables_n = 0;
556         rte_free(priv->ind_tables);
557         priv->ind_tables = NULL;
558 }
559
560 /**
561  * Check whether a given flow type is allowed.
562  *
563  * @param priv
564  *   Pointer to private structure.
565  * @param type
566  *   Flow type to check.
567  *
568  * @return
569  *   Nonzero if the given flow type is allowed.
570  */
571 int
572 priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
573 {
574         switch (type) {
575         case HASH_RXQ_FLOW_TYPE_ALLMULTI:
576                 return !!priv->allmulti_req;
577         case HASH_RXQ_FLOW_TYPE_BROADCAST:
578         case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
579                 /* If allmulti is enabled, broadcast and ipv6multi
580                  * are unnecessary. */
581                 return !priv->allmulti_req;
582         case HASH_RXQ_FLOW_TYPE_MAC:
583                 return 1;
584         default:
585                 /* Unsupported flow type is not allowed. */
586                 return 0;
587         }
588         return 0;
589 }
590
591 /**
592  * Automatically enable/disable flows according to configuration.
593  *
594  * @param priv
595  *   Private structure.
596  *
597  * @return
598  *   0 on success, errno value on failure.
599  */
600 int
601 priv_rehash_flows(struct priv *priv)
602 {
603         size_t i;
604
605         for (i = 0; i != RTE_DIM((*priv->hash_rxqs)[0].special_flow); ++i)
606                 if (!priv_allow_flow_type(priv, i)) {
607                         priv_special_flow_disable(priv, i);
608                 } else {
609                         int ret = priv_special_flow_enable(priv, i);
610
611                         if (ret)
612                                 return ret;
613                 }
614         if (priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
615                 return priv_mac_addrs_enable(priv);
616         priv_mac_addrs_disable(priv);
617         return 0;
618 }
619
620 /**
621  * Allocate RX queue elements.
622  *
623  * @param rxq_ctrl
624  *   Pointer to RX queue structure.
625  *
626  * @return
627  *   0 on success, errno value on failure.
628  */
629 int
630 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
631 {
632         const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
633         unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
634         unsigned int i;
635         int ret = 0;
636
637         /* Iterate on segments. */
638         for (i = 0; (i != elts_n); ++i) {
639                 struct rte_mbuf *buf;
640
641                 buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
642                 if (buf == NULL) {
643                         ERROR("%p: empty mbuf pool", (void *)rxq_ctrl);
644                         ret = ENOMEM;
645                         goto error;
646                 }
647                 /* Headroom is reserved by rte_pktmbuf_alloc(). */
648                 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
649                 /* Buffer is supposed to be empty. */
650                 assert(rte_pktmbuf_data_len(buf) == 0);
651                 assert(rte_pktmbuf_pkt_len(buf) == 0);
652                 assert(!buf->next);
653                 /* Only the first segment keeps headroom. */
654                 if (i % sges_n)
655                         SET_DATA_OFF(buf, 0);
656                 PORT(buf) = rxq_ctrl->rxq.port_id;
657                 DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
658                 PKT_LEN(buf) = DATA_LEN(buf);
659                 NB_SEGS(buf) = 1;
660                 (*rxq_ctrl->rxq.elts)[i] = buf;
661         }
662         /* If Rx vector is activated. */
663         if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
664                 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
665                 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
666                 int j;
667
668                 /* Initialize default rearm_data for vPMD. */
669                 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
670                 rte_mbuf_refcnt_set(mbuf_init, 1);
671                 mbuf_init->nb_segs = 1;
672                 mbuf_init->port = rxq->port_id;
673                 /*
674                  * prevent compiler reordering:
675                  * rearm_data covers previous fields.
676                  */
677                 rte_compiler_barrier();
678                 rxq->mbuf_initializer =
679                         *(uint64_t *)&mbuf_init->rearm_data;
680                 /* Padding with a fake mbuf for vectorized Rx. */
681                 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
682                         (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
683         }
684         DEBUG("%p: allocated and configured %u segments (max %u packets)",
685               (void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n));
686         assert(ret == 0);
687         return 0;
688 error:
689         elts_n = i;
690         for (i = 0; (i != elts_n); ++i) {
691                 if ((*rxq_ctrl->rxq.elts)[i] != NULL)
692                         rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
693                 (*rxq_ctrl->rxq.elts)[i] = NULL;
694         }
695         DEBUG("%p: failed, freed everything", (void *)rxq_ctrl);
696         assert(ret > 0);
697         return ret;
698 }
699
700 /**
701  * Free RX queue elements.
702  *
703  * @param rxq_ctrl
704  *   Pointer to RX queue structure.
705  */
706 static void
707 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
708 {
709         struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
710         const uint16_t q_n = (1 << rxq->elts_n);
711         const uint16_t q_mask = q_n - 1;
712         uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
713         uint16_t i;
714
715         DEBUG("%p: freeing WRs", (void *)rxq_ctrl);
716         if (rxq->elts == NULL)
717                 return;
718         /**
719          * Some mbuf in the Ring belongs to the application.  They cannot be
720          * freed.
721          */
722         if (rxq_check_vec_support(rxq) > 0) {
723                 for (i = 0; i < used; ++i)
724                         (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
725                 rxq->rq_pi = rxq->rq_ci;
726         }
727         for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
728                 if ((*rxq->elts)[i] != NULL)
729                         rte_pktmbuf_free_seg((*rxq->elts)[i]);
730                 (*rxq->elts)[i] = NULL;
731         }
732 }
733
734 /**
735  * Clean up a RX queue.
736  *
737  * Destroy objects, free allocated memory and reset the structure for reuse.
738  *
739  * @param rxq_ctrl
740  *   Pointer to RX queue structure.
741  */
742 void
743 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
744 {
745         DEBUG("cleaning up %p", (void *)rxq_ctrl);
746         if (rxq_ctrl->ibv)
747                 mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
748         memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
749 }
750
751 /**
752  *
753  * @param dev
754  *   Pointer to Ethernet device structure.
755  * @param idx
756  *   RX queue index.
757  * @param desc
758  *   Number of descriptors to configure in queue.
759  * @param socket
760  *   NUMA socket on which memory must be allocated.
761  * @param[in] conf
762  *   Thresholds parameters.
763  * @param mp
764  *   Memory pool for buffer allocations.
765  *
766  * @return
767  *   0 on success, negative errno value on failure.
768  */
769 int
770 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
771                     unsigned int socket, const struct rte_eth_rxconf *conf,
772                     struct rte_mempool *mp)
773 {
774         struct priv *priv = dev->data->dev_private;
775         struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
776         struct mlx5_rxq_ctrl *rxq_ctrl =
777                 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
778         int ret = 0;
779
780         (void)conf;
781         if (mlx5_is_secondary())
782                 return -E_RTE_SECONDARY;
783         priv_lock(priv);
784         if (!rte_is_power_of_2(desc)) {
785                 desc = 1 << log2above(desc);
786                 WARN("%p: increased number of descriptors in RX queue %u"
787                      " to the next power of two (%d)",
788                      (void *)dev, idx, desc);
789         }
790         DEBUG("%p: configuring queue %u for %u descriptors",
791               (void *)dev, idx, desc);
792         if (idx >= priv->rxqs_n) {
793                 ERROR("%p: queue index out of range (%u >= %u)",
794                       (void *)dev, idx, priv->rxqs_n);
795                 priv_unlock(priv);
796                 return -EOVERFLOW;
797         }
798         if (!mlx5_priv_rxq_releasable(priv, idx)) {
799                 ret = EBUSY;
800                 ERROR("%p: unable to release queue index %u",
801                       (void *)dev, idx);
802                 goto out;
803         }
804         mlx5_priv_rxq_release(priv, idx);
805         rxq_ctrl = mlx5_priv_rxq_new(priv, idx, desc, socket, mp);
806         if (!rxq_ctrl) {
807                 ERROR("%p: unable to allocate queue index %u",
808                       (void *)dev, idx);
809                 ret = ENOMEM;
810                 goto out;
811         }
812         DEBUG("%p: adding RX queue %p to list",
813               (void *)dev, (void *)rxq_ctrl);
814         (*priv->rxqs)[idx] = &rxq_ctrl->rxq;
815 out:
816         priv_unlock(priv);
817         return -ret;
818 }
819
820 /**
821  * DPDK callback to release a RX queue.
822  *
823  * @param dpdk_rxq
824  *   Generic RX queue pointer.
825  */
826 void
827 mlx5_rx_queue_release(void *dpdk_rxq)
828 {
829         struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
830         struct mlx5_rxq_ctrl *rxq_ctrl;
831         struct priv *priv;
832
833         if (mlx5_is_secondary())
834                 return;
835
836         if (rxq == NULL)
837                 return;
838         rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
839         priv = rxq_ctrl->priv;
840         priv_lock(priv);
841         if (!mlx5_priv_rxq_releasable(priv, rxq_ctrl->rxq.stats.idx))
842                 rte_panic("Rx queue %p is still used by a flow and cannot be"
843                           " removed\n", (void *)rxq_ctrl);
844         mlx5_priv_rxq_release(priv, rxq_ctrl->rxq.stats.idx);
845         priv_unlock(priv);
846 }
847
848 /**
849  * Allocate queue vector and fill epoll fd list for Rx interrupts.
850  *
851  * @param priv
852  *   Pointer to private structure.
853  *
854  * @return
855  *   0 on success, negative on failure.
856  */
857 int
858 priv_rx_intr_vec_enable(struct priv *priv)
859 {
860         unsigned int i;
861         unsigned int rxqs_n = priv->rxqs_n;
862         unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
863         unsigned int count = 0;
864         struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
865
866         assert(!mlx5_is_secondary());
867         if (!priv->dev->data->dev_conf.intr_conf.rxq)
868                 return 0;
869         priv_rx_intr_vec_disable(priv);
870         intr_handle->intr_vec = malloc(sizeof(intr_handle->intr_vec[rxqs_n]));
871         if (intr_handle->intr_vec == NULL) {
872                 ERROR("failed to allocate memory for interrupt vector,"
873                       " Rx interrupts will not be supported");
874                 return -ENOMEM;
875         }
876         intr_handle->type = RTE_INTR_HANDLE_EXT;
877         for (i = 0; i != n; ++i) {
878                 /* This rxq ibv must not be released in this function. */
879                 struct mlx5_rxq_ibv *rxq_ibv = mlx5_priv_rxq_ibv_get(priv, i);
880                 int fd;
881                 int flags;
882                 int rc;
883
884                 /* Skip queues that cannot request interrupts. */
885                 if (!rxq_ibv || !rxq_ibv->channel) {
886                         /* Use invalid intr_vec[] index to disable entry. */
887                         intr_handle->intr_vec[i] =
888                                 RTE_INTR_VEC_RXTX_OFFSET +
889                                 RTE_MAX_RXTX_INTR_VEC_ID;
890                         continue;
891                 }
892                 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
893                         ERROR("too many Rx queues for interrupt vector size"
894                               " (%d), Rx interrupts cannot be enabled",
895                               RTE_MAX_RXTX_INTR_VEC_ID);
896                         priv_rx_intr_vec_disable(priv);
897                         return -1;
898                 }
899                 fd = rxq_ibv->channel->fd;
900                 flags = fcntl(fd, F_GETFL);
901                 rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
902                 if (rc < 0) {
903                         ERROR("failed to make Rx interrupt file descriptor"
904                               " %d non-blocking for queue index %d", fd, i);
905                         priv_rx_intr_vec_disable(priv);
906                         return -1;
907                 }
908                 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count;
909                 intr_handle->efds[count] = fd;
910                 count++;
911         }
912         if (!count)
913                 priv_rx_intr_vec_disable(priv);
914         else
915                 intr_handle->nb_efd = count;
916         return 0;
917 }
918
919 /**
920  * Clean up Rx interrupts handler.
921  *
922  * @param priv
923  *   Pointer to private structure.
924  */
925 void
926 priv_rx_intr_vec_disable(struct priv *priv)
927 {
928         struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
929         unsigned int i;
930         unsigned int rxqs_n = priv->rxqs_n;
931         unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
932
933         if (!priv->dev->data->dev_conf.intr_conf.rxq)
934                 return;
935         for (i = 0; i != n; ++i) {
936                 struct mlx5_rxq_ctrl *rxq_ctrl;
937                 struct mlx5_rxq_data *rxq_data;
938
939                 if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
940                     RTE_MAX_RXTX_INTR_VEC_ID)
941                         continue;
942                 /**
943                  * Need to access directly the queue to release the reference
944                  * kept in priv_rx_intr_vec_enable().
945                  */
946                 rxq_data = (*priv->rxqs)[i];
947                 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
948                 mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
949         }
950         rte_intr_free_epoll_fd(intr_handle);
951         free(intr_handle->intr_vec);
952         intr_handle->nb_efd = 0;
953         intr_handle->intr_vec = NULL;
954 }
955
956 /**
957  *  MLX5 CQ notification .
958  *
959  *  @param rxq
960  *     Pointer to receive queue structure.
961  *  @param sq_n_rxq
962  *     Sequence number per receive queue .
963  */
964 static inline void
965 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
966 {
967         int sq_n = 0;
968         uint32_t doorbell_hi;
969         uint64_t doorbell;
970         void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL;
971
972         sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK;
973         doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK);
974         doorbell = (uint64_t)doorbell_hi << 32;
975         doorbell |=  rxq->cqn;
976         rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
977         rte_wmb();
978         rte_write64(rte_cpu_to_be_64(doorbell), cq_db_reg);
979 }
980
981 /**
982  * DPDK callback for Rx queue interrupt enable.
983  *
984  * @param dev
985  *   Pointer to Ethernet device structure.
986  * @param rx_queue_id
987  *   Rx queue number.
988  *
989  * @return
990  *   0 on success, negative on failure.
991  */
992 int
993 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
994 {
995         struct priv *priv = mlx5_get_priv(dev);
996         struct mlx5_rxq_data *rxq_data;
997         struct mlx5_rxq_ctrl *rxq_ctrl;
998         int ret = 0;
999
1000         priv_lock(priv);
1001         rxq_data = (*priv->rxqs)[rx_queue_id];
1002         if (!rxq_data) {
1003                 ret = EINVAL;
1004                 goto exit;
1005         }
1006         rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1007         if (rxq_ctrl->irq) {
1008                 struct mlx5_rxq_ibv *rxq_ibv;
1009
1010                 rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
1011                 if (!rxq_ibv) {
1012                         ret = EINVAL;
1013                         goto exit;
1014                 }
1015                 mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
1016                 mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
1017         }
1018 exit:
1019         priv_unlock(priv);
1020         if (ret)
1021                 WARN("unable to arm interrupt on rx queue %d", rx_queue_id);
1022         return -ret;
1023 }
1024
1025 /**
1026  * DPDK callback for Rx queue interrupt disable.
1027  *
1028  * @param dev
1029  *   Pointer to Ethernet device structure.
1030  * @param rx_queue_id
1031  *   Rx queue number.
1032  *
1033  * @return
1034  *   0 on success, negative on failure.
1035  */
1036 int
1037 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1038 {
1039         struct priv *priv = mlx5_get_priv(dev);
1040         struct mlx5_rxq_data *rxq_data;
1041         struct mlx5_rxq_ctrl *rxq_ctrl;
1042         struct mlx5_rxq_ibv *rxq_ibv = NULL;
1043         struct ibv_cq *ev_cq;
1044         void *ev_ctx;
1045         int ret = 0;
1046
1047         priv_lock(priv);
1048         rxq_data = (*priv->rxqs)[rx_queue_id];
1049         if (!rxq_data) {
1050                 ret = EINVAL;
1051                 goto exit;
1052         }
1053         rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1054         if (!rxq_ctrl->irq)
1055                 goto exit;
1056         rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
1057         if (!rxq_ibv) {
1058                 ret = EINVAL;
1059                 goto exit;
1060         }
1061         ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx);
1062         if (ret || ev_cq != rxq_ibv->cq) {
1063                 ret = EINVAL;
1064                 goto exit;
1065         }
1066         rxq_data->cq_arm_sn++;
1067         ibv_ack_cq_events(rxq_ibv->cq, 1);
1068 exit:
1069         if (rxq_ibv)
1070                 mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
1071         priv_unlock(priv);
1072         if (ret)
1073                 WARN("unable to disable interrupt on rx queue %d",
1074                      rx_queue_id);
1075         return -ret;
1076 }
1077
1078 /**
1079  * Create the Rx queue Verbs object.
1080  *
1081  * @param priv
1082  *   Pointer to private structure.
1083  * @param idx
1084  *   Queue index in DPDK Rx queue array
1085  *
1086  * @return
1087  *   The Verbs object initialised if it can be created.
1088  */
1089 struct mlx5_rxq_ibv*
1090 mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
1091 {
1092         struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1093         struct mlx5_rxq_ctrl *rxq_ctrl =
1094                 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1095         struct ibv_wq_attr mod;
1096         union {
1097                 struct ibv_cq_init_attr_ex cq;
1098                 struct ibv_wq_init_attr wq;
1099                 struct ibv_cq_ex cq_attr;
1100         } attr;
1101         unsigned int cqe_n = (1 << rxq_data->elts_n) - 1;
1102         struct mlx5_rxq_ibv *tmpl;
1103         struct mlx5dv_cq cq_info;
1104         struct mlx5dv_rwq rwq;
1105         unsigned int i;
1106         int ret = 0;
1107         struct mlx5dv_obj obj;
1108
1109         assert(rxq_data);
1110         assert(!rxq_ctrl->ibv);
1111         tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
1112                                  rxq_ctrl->socket);
1113         if (!tmpl) {
1114                 ERROR("%p: cannot allocate verbs resources",
1115                        (void *)rxq_ctrl);
1116                 goto error;
1117         }
1118         tmpl->rxq_ctrl = rxq_ctrl;
1119         /* Use the entire RX mempool as the memory region. */
1120         tmpl->mr = priv_mr_get(priv, rxq_data->mp);
1121         if (!tmpl->mr) {
1122                 tmpl->mr = priv_mr_new(priv, rxq_data->mp);
1123                 if (!tmpl->mr) {
1124                         ERROR("%p: MR creation failure", (void *)rxq_ctrl);
1125                         goto error;
1126                 }
1127         }
1128         if (rxq_ctrl->irq) {
1129                 tmpl->channel = ibv_create_comp_channel(priv->ctx);
1130                 if (!tmpl->channel) {
1131                         ERROR("%p: Comp Channel creation failure",
1132                               (void *)rxq_ctrl);
1133                         goto error;
1134                 }
1135         }
1136         attr.cq = (struct ibv_cq_init_attr_ex){
1137                 .comp_mask = 0,
1138         };
1139         if (priv->cqe_comp) {
1140                 attr.cq.comp_mask |= IBV_CQ_INIT_ATTR_MASK_FLAGS;
1141                 attr.cq.flags |= MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
1142                 /*
1143                  * For vectorized Rx, it must not be doubled in order to
1144                  * make cq_ci and rq_ci aligned.
1145                  */
1146                 if (rxq_check_vec_support(rxq_data) < 0)
1147                         cqe_n *= 2;
1148         }
1149         tmpl->cq = ibv_create_cq(priv->ctx, cqe_n, NULL, tmpl->channel, 0);
1150         if (tmpl->cq == NULL) {
1151                 ERROR("%p: CQ creation failure", (void *)rxq_ctrl);
1152                 goto error;
1153         }
1154         DEBUG("priv->device_attr.max_qp_wr is %d",
1155               priv->device_attr.orig_attr.max_qp_wr);
1156         DEBUG("priv->device_attr.max_sge is %d",
1157               priv->device_attr.orig_attr.max_sge);
1158         attr.wq = (struct ibv_wq_init_attr){
1159                 .wq_context = NULL, /* Could be useful in the future. */
1160                 .wq_type = IBV_WQT_RQ,
1161                 /* Max number of outstanding WRs. */
1162                 .max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n,
1163                 /* Max number of scatter/gather elements in a WR. */
1164                 .max_sge = 1 << rxq_data->sges_n,
1165                 .pd = priv->pd,
1166                 .cq = tmpl->cq,
1167                 .comp_mask =
1168                         IBV_WQ_FLAGS_CVLAN_STRIPPING |
1169                         0,
1170                 .create_flags = (rxq_data->vlan_strip ?
1171                                  IBV_WQ_FLAGS_CVLAN_STRIPPING :
1172                                  0),
1173         };
1174         /* By default, FCS (CRC) is stripped by hardware. */
1175         if (rxq_data->crc_present) {
1176                 attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
1177                 attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1178         }
1179 #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
1180         if (priv->hw_padding) {
1181                 attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
1182                 attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1183         }
1184 #endif
1185         tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq);
1186         if (tmpl->wq == NULL) {
1187                 ERROR("%p: WQ creation failure", (void *)rxq_ctrl);
1188                 goto error;
1189         }
1190         /*
1191          * Make sure number of WRs*SGEs match expectations since a queue
1192          * cannot allocate more than "desc" buffers.
1193          */
1194         if (((int)attr.wq.max_wr !=
1195              ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) ||
1196             ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) {
1197                 ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
1198                       (void *)rxq_ctrl,
1199                       ((1 << rxq_data->elts_n) >> rxq_data->sges_n),
1200                       (1 << rxq_data->sges_n),
1201                       attr.wq.max_wr, attr.wq.max_sge);
1202                 goto error;
1203         }
1204         /* Change queue state to ready. */
1205         mod = (struct ibv_wq_attr){
1206                 .attr_mask = IBV_WQ_ATTR_STATE,
1207                 .wq_state = IBV_WQS_RDY,
1208         };
1209         ret = ibv_modify_wq(tmpl->wq, &mod);
1210         if (ret) {
1211                 ERROR("%p: WQ state to IBV_WQS_RDY failed",
1212                       (void *)rxq_ctrl);
1213                 goto error;
1214         }
1215         obj.cq.in = tmpl->cq;
1216         obj.cq.out = &cq_info;
1217         obj.rwq.in = tmpl->wq;
1218         obj.rwq.out = &rwq;
1219         ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ);
1220         if (ret != 0)
1221                 goto error;
1222         if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
1223                 ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
1224                       "it should be set to %u", RTE_CACHE_LINE_SIZE);
1225                 goto error;
1226         }
1227         /* Fill the rings. */
1228         rxq_data->wqes = (volatile struct mlx5_wqe_data_seg (*)[])
1229                 (uintptr_t)rwq.buf;
1230         for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
1231                 struct rte_mbuf *buf = (*rxq_data->elts)[i];
1232                 volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
1233
1234                 /* scat->addr must be able to store a pointer. */
1235                 assert(sizeof(scat->addr) >= sizeof(uintptr_t));
1236                 *scat = (struct mlx5_wqe_data_seg){
1237                         .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
1238                                                                   uintptr_t)),
1239                         .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
1240                         .lkey = tmpl->mr->lkey,
1241                 };
1242         }
1243         rxq_data->rq_db = rwq.dbrec;
1244         rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
1245         rxq_data->cq_ci = 0;
1246         rxq_data->rq_ci = 0;
1247         rxq_data->rq_pi = 0;
1248         rxq_data->zip = (struct rxq_zip){
1249                 .ai = 0,
1250         };
1251         rxq_data->cq_db = cq_info.dbrec;
1252         rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
1253         /* Update doorbell counter. */
1254         rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n;
1255         rte_wmb();
1256         *rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci);
1257         DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
1258         rte_atomic32_inc(&tmpl->refcnt);
1259         DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1260               (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
1261         LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
1262         return tmpl;
1263 error:
1264         if (tmpl->wq)
1265                 claim_zero(ibv_destroy_wq(tmpl->wq));
1266         if (tmpl->cq)
1267                 claim_zero(ibv_destroy_cq(tmpl->cq));
1268         if (tmpl->channel)
1269                 claim_zero(ibv_destroy_comp_channel(tmpl->channel));
1270         if (tmpl->mr)
1271                 priv_mr_release(priv, tmpl->mr);
1272         return NULL;
1273 }
1274
1275 /**
1276  * Get an Rx queue Verbs object.
1277  *
1278  * @param priv
1279  *   Pointer to private structure.
1280  * @param idx
1281  *   Queue index in DPDK Rx queue array
1282  *
1283  * @return
1284  *   The Verbs object if it exists.
1285  */
1286 struct mlx5_rxq_ibv*
1287 mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
1288 {
1289         struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1290         struct mlx5_rxq_ctrl *rxq_ctrl;
1291
1292         if (idx >= priv->rxqs_n)
1293                 return NULL;
1294         if (!rxq_data)
1295                 return NULL;
1296         rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1297         if (rxq_ctrl->ibv) {
1298                 priv_mr_get(priv, rxq_data->mp);
1299                 rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
1300                 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1301                       (void *)rxq_ctrl->ibv,
1302                       rte_atomic32_read(&rxq_ctrl->ibv->refcnt));
1303         }
1304         return rxq_ctrl->ibv;
1305 }
1306
1307 /**
1308  * Release an Rx verbs queue object.
1309  *
1310  * @param priv
1311  *   Pointer to private structure.
1312  * @param rxq_ibv
1313  *   Verbs Rx queue object.
1314  *
1315  * @return
1316  *   0 on success, errno value on failure.
1317  */
1318 int
1319 mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
1320 {
1321         int ret;
1322
1323         assert(rxq_ibv);
1324         assert(rxq_ibv->wq);
1325         assert(rxq_ibv->cq);
1326         assert(rxq_ibv->mr);
1327         ret = priv_mr_release(priv, rxq_ibv->mr);
1328         if (!ret)
1329                 rxq_ibv->mr = NULL;
1330         DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1331               (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt));
1332         if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) {
1333                 rxq_free_elts(rxq_ibv->rxq_ctrl);
1334                 claim_zero(ibv_destroy_wq(rxq_ibv->wq));
1335                 claim_zero(ibv_destroy_cq(rxq_ibv->cq));
1336                 if (rxq_ibv->channel)
1337                         claim_zero(ibv_destroy_comp_channel(rxq_ibv->channel));
1338                 LIST_REMOVE(rxq_ibv, next);
1339                 rte_free(rxq_ibv);
1340                 return 0;
1341         }
1342         return EBUSY;
1343 }
1344
1345 /**
1346  * Verify the Verbs Rx queue list is empty
1347  *
1348  * @param priv
1349  *  Pointer to private structure.
1350  *
1351  * @return the number of object not released.
1352  */
1353 int
1354 mlx5_priv_rxq_ibv_verify(struct priv *priv)
1355 {
1356         int ret = 0;
1357         struct mlx5_rxq_ibv *rxq_ibv;
1358
1359         LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
1360                 DEBUG("%p: Verbs Rx queue %p still referenced", (void *)priv,
1361                       (void *)rxq_ibv);
1362                 ++ret;
1363         }
1364         return ret;
1365 }
1366
1367 /**
1368  * Return true if a single reference exists on the object.
1369  *
1370  * @param priv
1371  *   Pointer to private structure.
1372  * @param rxq_ibv
1373  *   Verbs Rx queue object.
1374  */
1375 int
1376 mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
1377 {
1378         (void)priv;
1379         assert(rxq_ibv);
1380         return (rte_atomic32_read(&rxq_ibv->refcnt) == 1);
1381 }
1382
1383 /**
1384  * Create a DPDK Rx queue.
1385  *
1386  * @param priv
1387  *   Pointer to private structure.
1388  * @param idx
1389  *   TX queue index.
1390  * @param desc
1391  *   Number of descriptors to configure in queue.
1392  * @param socket
1393  *   NUMA socket on which memory must be allocated.
1394  *
1395  * @return
1396  *   A DPDK queue object on success.
1397  */
1398 struct mlx5_rxq_ctrl*
1399 mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
1400                   unsigned int socket, struct rte_mempool *mp)
1401 {
1402         struct rte_eth_dev *dev = priv->dev;
1403         struct mlx5_rxq_ctrl *tmpl;
1404         const uint16_t desc_n =
1405                 desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
1406         unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
1407
1408         tmpl = rte_calloc_socket("RXQ", 1,
1409                                  sizeof(*tmpl) +
1410                                  desc_n * sizeof(struct rte_mbuf *),
1411                                  0, socket);
1412         if (!tmpl)
1413                 return NULL;
1414         if (priv->dev->data->dev_conf.intr_conf.rxq)
1415                 tmpl->irq = 1;
1416         /* Enable scattered packets support for this queue if necessary. */
1417         assert(mb_len >= RTE_PKTMBUF_HEADROOM);
1418         if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
1419             (mb_len - RTE_PKTMBUF_HEADROOM)) {
1420                 tmpl->rxq.sges_n = 0;
1421         } else if (dev->data->dev_conf.rxmode.enable_scatter) {
1422                 unsigned int size =
1423                         RTE_PKTMBUF_HEADROOM +
1424                         dev->data->dev_conf.rxmode.max_rx_pkt_len;
1425                 unsigned int sges_n;
1426
1427                 /*
1428                  * Determine the number of SGEs needed for a full packet
1429                  * and round it to the next power of two.
1430                  */
1431                 sges_n = log2above((size / mb_len) + !!(size % mb_len));
1432                 tmpl->rxq.sges_n = sges_n;
1433                 /* Make sure rxq.sges_n did not overflow. */
1434                 size = mb_len * (1 << tmpl->rxq.sges_n);
1435                 size -= RTE_PKTMBUF_HEADROOM;
1436                 if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
1437                         ERROR("%p: too many SGEs (%u) needed to handle"
1438                               " requested maximum packet size %u",
1439                               (void *)dev,
1440                               1 << sges_n,
1441                               dev->data->dev_conf.rxmode.max_rx_pkt_len);
1442                         goto error;
1443                 }
1444         } else {
1445                 WARN("%p: the requested maximum Rx packet size (%u) is"
1446                      " larger than a single mbuf (%u) and scattered"
1447                      " mode has not been requested",
1448                      (void *)dev,
1449                      dev->data->dev_conf.rxmode.max_rx_pkt_len,
1450                      mb_len - RTE_PKTMBUF_HEADROOM);
1451         }
1452         DEBUG("%p: maximum number of segments per packet: %u",
1453               (void *)dev, 1 << tmpl->rxq.sges_n);
1454         if (desc % (1 << tmpl->rxq.sges_n)) {
1455                 ERROR("%p: number of RX queue descriptors (%u) is not a"
1456                       " multiple of SGEs per packet (%u)",
1457                       (void *)dev,
1458                       desc,
1459                       1 << tmpl->rxq.sges_n);
1460                 goto error;
1461         }
1462         /* Toggle RX checksum offload if hardware supports it. */
1463         if (priv->hw_csum)
1464                 tmpl->rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
1465         if (priv->hw_csum_l2tun)
1466                 tmpl->rxq.csum_l2tun =
1467                         !!dev->data->dev_conf.rxmode.hw_ip_checksum;
1468         /* Configure VLAN stripping. */
1469         tmpl->rxq.vlan_strip = (priv->hw_vlan_strip &&
1470                                !!dev->data->dev_conf.rxmode.hw_vlan_strip);
1471         /* By default, FCS (CRC) is stripped by hardware. */
1472         if (dev->data->dev_conf.rxmode.hw_strip_crc) {
1473                 tmpl->rxq.crc_present = 0;
1474         } else if (priv->hw_fcs_strip) {
1475                 tmpl->rxq.crc_present = 1;
1476         } else {
1477                 WARN("%p: CRC stripping has been disabled but will still"
1478                      " be performed by hardware, make sure MLNX_OFED and"
1479                      " firmware are up to date",
1480                      (void *)dev);
1481                 tmpl->rxq.crc_present = 0;
1482         }
1483         DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
1484               " incoming frames to hide it",
1485               (void *)dev,
1486               tmpl->rxq.crc_present ? "disabled" : "enabled",
1487               tmpl->rxq.crc_present << 2);
1488         /* Save port ID. */
1489         tmpl->rxq.rss_hash = priv->rxqs_n > 1;
1490         tmpl->rxq.port_id = dev->data->port_id;
1491         tmpl->priv = priv;
1492         tmpl->rxq.mp = mp;
1493         tmpl->rxq.stats.idx = idx;
1494         tmpl->rxq.elts_n = log2above(desc);
1495         tmpl->rxq.elts =
1496                 (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
1497         rte_atomic32_inc(&tmpl->refcnt);
1498         DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1499               (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
1500         LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
1501         return tmpl;
1502 error:
1503         rte_free(tmpl);
1504         return NULL;
1505 }
1506
1507 /**
1508  * Get a Rx queue.
1509  *
1510  * @param priv
1511  *   Pointer to private structure.
1512  * @param idx
1513  *   TX queue index.
1514  *
1515  * @return
1516  *   A pointer to the queue if it exists.
1517  */
1518 struct mlx5_rxq_ctrl*
1519 mlx5_priv_rxq_get(struct priv *priv, uint16_t idx)
1520 {
1521         struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
1522
1523         if ((*priv->rxqs)[idx]) {
1524                 rxq_ctrl = container_of((*priv->rxqs)[idx],
1525                                         struct mlx5_rxq_ctrl,
1526                                         rxq);
1527
1528                 mlx5_priv_rxq_ibv_get(priv, idx);
1529                 rte_atomic32_inc(&rxq_ctrl->refcnt);
1530                 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1531                       (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
1532         }
1533         return rxq_ctrl;
1534 }
1535
1536 /**
1537  * Release a Rx queue.
1538  *
1539  * @param priv
1540  *   Pointer to private structure.
1541  * @param idx
1542  *   TX queue index.
1543  *
1544  * @return
1545  *   0 on success, errno value on failure.
1546  */
1547 int
1548 mlx5_priv_rxq_release(struct priv *priv, uint16_t idx)
1549 {
1550         struct mlx5_rxq_ctrl *rxq_ctrl;
1551
1552         if (!(*priv->rxqs)[idx])
1553                 return 0;
1554         rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
1555         assert(rxq_ctrl->priv);
1556         if (rxq_ctrl->ibv) {
1557                 int ret;
1558
1559                 ret = mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
1560                 if (!ret)
1561                         rxq_ctrl->ibv = NULL;
1562         }
1563         DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1564               (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
1565         if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
1566                 LIST_REMOVE(rxq_ctrl, next);
1567                 rte_free(rxq_ctrl);
1568                 (*priv->rxqs)[idx] = NULL;
1569                 return 0;
1570         }
1571         return EBUSY;
1572 }
1573
1574 /**
1575  * Verify if the queue can be released.
1576  *
1577  * @param priv
1578  *   Pointer to private structure.
1579  * @param idx
1580  *   TX queue index.
1581  *
1582  * @return
1583  *   1 if the queue can be released.
1584  */
1585 int
1586 mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx)
1587 {
1588         struct mlx5_rxq_ctrl *rxq_ctrl;
1589
1590         if (!(*priv->rxqs)[idx])
1591                 return -1;
1592         rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
1593         return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
1594 }
1595
1596 /**
1597  * Verify the Rx Queue list is empty
1598  *
1599  * @param priv
1600  *  Pointer to private structure.
1601  *
1602  * @return the number of object not released.
1603  */
1604 int
1605 mlx5_priv_rxq_verify(struct priv *priv)
1606 {
1607         struct mlx5_rxq_ctrl *rxq_ctrl;
1608         int ret = 0;
1609
1610         LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
1611                 DEBUG("%p: Rx Queue %p still referenced", (void *)priv,
1612                       (void *)rxq_ctrl);
1613                 ++ret;
1614         }
1615         return ret;
1616 }
1617
1618 /**
1619  * Create an indirection table.
1620  *
1621  * @param priv
1622  *   Pointer to private structure.
1623  * @param queues
1624  *   Queues entering in the indirection table.
1625  * @param queues_n
1626  *   Number of queues in the array.
1627  *
1628  * @return
1629  *   A new indirection table.
1630  */
1631 struct mlx5_ind_table_ibv*
1632 mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[],
1633                             uint16_t queues_n)
1634 {
1635         struct mlx5_ind_table_ibv *ind_tbl;
1636         const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
1637                 log2above(queues_n) :
1638                 priv->ind_table_max_size;
1639         struct ibv_wq *wq[1 << wq_n];
1640         unsigned int i;
1641         unsigned int j;
1642
1643         ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) +
1644                              queues_n * sizeof(uint16_t), 0);
1645         if (!ind_tbl)
1646                 return NULL;
1647         for (i = 0; i != queues_n; ++i) {
1648                 struct mlx5_rxq_ctrl *rxq =
1649                         mlx5_priv_rxq_get(priv, queues[i]);
1650
1651                 if (!rxq)
1652                         goto error;
1653                 wq[i] = rxq->ibv->wq;
1654                 ind_tbl->queues[i] = queues[i];
1655         }
1656         ind_tbl->queues_n = queues_n;
1657         /* Finalise indirection table. */
1658         for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j)
1659                 wq[i] = wq[j];
1660         ind_tbl->ind_table = ibv_create_rwq_ind_table(
1661                 priv->ctx,
1662                 &(struct ibv_rwq_ind_table_init_attr){
1663                         .log_ind_tbl_size = wq_n,
1664                         .ind_tbl = wq,
1665                         .comp_mask = 0,
1666                 });
1667         if (!ind_tbl->ind_table)
1668                 goto error;
1669         rte_atomic32_inc(&ind_tbl->refcnt);
1670         LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
1671         DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1672               (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1673         return ind_tbl;
1674 error:
1675         rte_free(ind_tbl);
1676         DEBUG("%p cannot create indirection table", (void *)priv);
1677         return NULL;
1678 }
1679
1680 /**
1681  * Get an indirection table.
1682  *
1683  * @param priv
1684  *   Pointer to private structure.
1685  * @param queues
1686  *   Queues entering in the indirection table.
1687  * @param queues_n
1688  *   Number of queues in the array.
1689  *
1690  * @return
1691  *   An indirection table if found.
1692  */
1693 struct mlx5_ind_table_ibv*
1694 mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[],
1695                             uint16_t queues_n)
1696 {
1697         struct mlx5_ind_table_ibv *ind_tbl;
1698
1699         LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
1700                 if ((ind_tbl->queues_n == queues_n) &&
1701                     (memcmp(ind_tbl->queues, queues,
1702                             ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
1703                      == 0))
1704                         break;
1705         }
1706         if (ind_tbl) {
1707                 unsigned int i;
1708
1709                 rte_atomic32_inc(&ind_tbl->refcnt);
1710                 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1711                       (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1712                 for (i = 0; i != ind_tbl->queues_n; ++i)
1713                         mlx5_priv_rxq_get(priv, ind_tbl->queues[i]);
1714         }
1715         return ind_tbl;
1716 }
1717
1718 /**
1719  * Release an indirection table.
1720  *
1721  * @param priv
1722  *   Pointer to private structure.
1723  * @param ind_table
1724  *   Indirection table to release.
1725  *
1726  * @return
1727  *   0 on success, errno value on failure.
1728  */
1729 int
1730 mlx5_priv_ind_table_ibv_release(struct priv *priv,
1731                                 struct mlx5_ind_table_ibv *ind_tbl)
1732 {
1733         unsigned int i;
1734
1735         DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1736               (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1737         if (rte_atomic32_dec_and_test(&ind_tbl->refcnt))
1738                 claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table));
1739         for (i = 0; i != ind_tbl->queues_n; ++i)
1740                 claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i]));
1741         if (!rte_atomic32_read(&ind_tbl->refcnt)) {
1742                 LIST_REMOVE(ind_tbl, next);
1743                 rte_free(ind_tbl);
1744                 return 0;
1745         }
1746         return EBUSY;
1747 }
1748
1749 /**
1750  * Verify the Rx Queue list is empty
1751  *
1752  * @param priv
1753  *  Pointer to private structure.
1754  *
1755  * @return the number of object not released.
1756  */
1757 int
1758 mlx5_priv_ind_table_ibv_verify(struct priv *priv)
1759 {
1760         struct mlx5_ind_table_ibv *ind_tbl;
1761         int ret = 0;
1762
1763         LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
1764                 DEBUG("%p: Verbs indirection table %p still referenced",
1765                       (void *)priv, (void *)ind_tbl);
1766                 ++ret;
1767         }
1768         return ret;
1769 }
1770
1771 /**
1772  * Create an Rx Hash queue.
1773  *
1774  * @param priv
1775  *   Pointer to private structure.
1776  * @param rss_key
1777  *   RSS key for the Rx hash queue.
1778  * @param rss_key_len
1779  *   RSS key length.
1780  * @param hash_fields
1781  *   Verbs protocol hash field to make the RSS on.
1782  * @param queues
1783  *   Queues entering in hash queue.
1784  * @param queues_n
1785  *   Number of queues.
1786  *
1787  * @return
1788  *   An hash Rx queue on success.
1789  */
1790 struct mlx5_hrxq*
1791 mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
1792                    uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
1793 {
1794         struct mlx5_hrxq *hrxq;
1795         struct mlx5_ind_table_ibv *ind_tbl;
1796         struct ibv_qp *qp;
1797
1798         ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
1799         if (!ind_tbl)
1800                 ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n);
1801         if (!ind_tbl)
1802                 return NULL;
1803         qp = ibv_create_qp_ex(
1804                 priv->ctx,
1805                 &(struct ibv_qp_init_attr_ex){
1806                         .qp_type = IBV_QPT_RAW_PACKET,
1807                         .comp_mask =
1808                                 IBV_QP_INIT_ATTR_PD |
1809                                 IBV_QP_INIT_ATTR_IND_TABLE |
1810                                 IBV_QP_INIT_ATTR_RX_HASH,
1811                         .rx_hash_conf = (struct ibv_rx_hash_conf){
1812                                 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
1813                                 .rx_hash_key_len = rss_key_len,
1814                                 .rx_hash_key = rss_key,
1815                                 .rx_hash_fields_mask = hash_fields,
1816                         },
1817                         .rwq_ind_tbl = ind_tbl->ind_table,
1818                         .pd = priv->pd,
1819                 });
1820         if (!qp)
1821                 goto error;
1822         hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0);
1823         if (!hrxq)
1824                 goto error;
1825         hrxq->ind_table = ind_tbl;
1826         hrxq->qp = qp;
1827         hrxq->rss_key_len = rss_key_len;
1828         hrxq->hash_fields = hash_fields;
1829         memcpy(hrxq->rss_key, rss_key, rss_key_len);
1830         rte_atomic32_inc(&hrxq->refcnt);
1831         LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
1832         DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1833               (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1834         return hrxq;
1835 error:
1836         mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
1837         if (qp)
1838                 claim_zero(ibv_destroy_qp(qp));
1839         return NULL;
1840 }
1841
1842 /**
1843  * Get an Rx Hash queue.
1844  *
1845  * @param priv
1846  *   Pointer to private structure.
1847  * @param rss_conf
1848  *   RSS configuration for the Rx hash queue.
1849  * @param queues
1850  *   Queues entering in hash queue.
1851  * @param queues_n
1852  *   Number of queues.
1853  *
1854  * @return
1855  *   An hash Rx queue on success.
1856  */
1857 struct mlx5_hrxq*
1858 mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
1859                    uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
1860 {
1861         struct mlx5_hrxq *hrxq;
1862
1863         LIST_FOREACH(hrxq, &priv->hrxqs, next) {
1864                 struct mlx5_ind_table_ibv *ind_tbl;
1865
1866                 if (hrxq->rss_key_len != rss_key_len)
1867                         continue;
1868                 if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
1869                         continue;
1870                 if (hrxq->hash_fields != hash_fields)
1871                         continue;
1872                 ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
1873                 if (!ind_tbl)
1874                         continue;
1875                 if (ind_tbl != hrxq->ind_table) {
1876                         mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
1877                         continue;
1878                 }
1879                 rte_atomic32_inc(&hrxq->refcnt);
1880                 DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1881                       (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1882                 return hrxq;
1883         }
1884         return NULL;
1885 }
1886
1887 /**
1888  * Release the hash Rx queue.
1889  *
1890  * @param priv
1891  *   Pointer to private structure.
1892  * @param hrxq
1893  *   Pointer to Hash Rx queue to release.
1894  *
1895  * @return
1896  *   0 on success, errno value on failure.
1897  */
1898 int
1899 mlx5_priv_hrxq_release(struct priv *priv, struct mlx5_hrxq *hrxq)
1900 {
1901         DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1902               (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1903         if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
1904                 claim_zero(ibv_destroy_qp(hrxq->qp));
1905                 mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table);
1906                 LIST_REMOVE(hrxq, next);
1907                 rte_free(hrxq);
1908                 return 0;
1909         }
1910         claim_nonzero(mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table));
1911         return EBUSY;
1912 }
1913
1914 /**
1915  * Verify the Rx Queue list is empty
1916  *
1917  * @param priv
1918  *  Pointer to private structure.
1919  *
1920  * @return the number of object not released.
1921  */
1922 int
1923 mlx5_priv_hrxq_ibv_verify(struct priv *priv)
1924 {
1925         struct mlx5_hrxq *hrxq;
1926         int ret = 0;
1927
1928         LIST_FOREACH(hrxq, &priv->hrxqs, next) {
1929                 DEBUG("%p: Verbs Hash Rx queue %p still referenced",
1930                       (void *)priv, (void *)hrxq);
1931                 ++ret;
1932         }
1933         return ret;
1934 }